2020-09-02 09:10发布
spark-submit几种提交模式的 区别是什么?
代码
package com.imooc.spark.Test import org.apache.spark.sql.types.{StringType, StructField, StructType} import org.apache.spark.sql.{Row, SaveMode, SparkSession} /** * 测试sparkContext 案例 */ object TestOfSparkContext2 { def main(args: Array[String]): Unit = { //System.setProperty("hadoop.home.dir", "E:\\soft\\winutils\\hadoop-common-2.2.0-bin-master") val spark = SparkSession.builder() .appName("TestOfSparkContext2") .master("local[2]").getOrCreate() //反射的方式 RDD=>DF // reflection(spark) //编程的方式 RDD=>DF program(spark) spark.stop() } private def reflection(spark: SparkSession) = { val rdd = spark.sparkContext .textFile("file///home/hadoop/data/test-data-spark/emp-forSpark.txt") .map(w => w.split("\t")) import spark.implicits._ val empDF = rdd.map(line => EMP(line(0), line(1), line(2), line(3), line(4), line(5), line(6), line(7))).toDF() empDF.printSchema() empDF.show() } def program(spark: SparkSession) = { val infoRdd = spark.sparkContext .textFile("/home/hadoop/data/test-data-spark/emp-forSpark.txt") .map(w => w.split("\t")).map(line => Row(line(0), line(1), line(2), line(3), line(4), line(5), line(6), line(7))) val filedName = Array(StructField("empNo", StringType, true) , StructField("ename", StringType, true), StructField("job", StringType, true) , StructField("mgr", StringType, true), StructField("hireDate", StringType, true) , StructField("sal", StringType, true), StructField("comm", StringType, true) , StructField("deptNo", StringType, true)) val schema = StructType(filedName) val empDf = spark.createDataFrame(infoRdd, schema) empDf.printSchema() empDf.show(false) //注册临时表 empDf.createOrReplaceTempView("emp") val sqlDF = spark.sql("select ename,job,comm from emp") // sqlDF.printSchema() //sqlDF.show() /*empDf.write.format("json") .mode(SaveMode.Overwrite).save("E:///testData/empTable3")*/ empDf.coalesce(1).write.format("json").mode(SaveMode.Overwrite) .partitionBy("deptno").save("/home/hadoop/app/spark-2.1.1-bin-2.6.0-cdh5.7.0/testData/emp-jsonTable") } case class EMP(empNo: String, empName: String, job: String, mgr: String, hiredate: String, sal: String, comm: String, deptNo: String) }12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
打包
提交
在$SPARK_HOME 下执行
./bin/spark-submit \ --class com.imooc.spark.Test.TestOfSparkContext2 \ --master local[2] \ /home/hadoop/data/test-jar/sql-1.0.jar12345
参数解释
对empDf.write.format(“json”).mode(SaveMode.Overwrite).save(“E:///testData/empTable3”)*/empDf.coalesce(1).write.format(“json”).mode(SaveMode.Overwrite).partitionBy(“deptno”).save("/home/hadoop/app/spark-2.1.1-bin-2.6.0-cdh5.7.0/testData/emp-jsonTable")中的参数解释
package com.imooc.spark.Test import org.apache.spark.sql.types.{StringType, StructField, StructType} import org.apache.spark.sql.{Row, SaveMode, SparkSession} /** * 测试sparkContext 案例 */ object TestOfSparkContext2OnYarn { def main(args: Array[String]): Unit = { //System.setProperty("hadoop.home.dir", "E:\\soft\\winutils\\hadoop-common-2.2.0-bin-master") /*val spark = SparkSession.builder() .appName("TestOfSparkContext2OnYarn") .master("local[2]").getOrCreate()*/ val path = "/user/hadoop/data/test-data-forSpark/emp-forSpark.txt" val spark = SparkSession.builder().getOrCreate() //反射的方式 RDD=>DF // reflection(spark,path) //编程的方式 RDD=>DF program(spark,path) spark.stop() } private def reflection(spark: SparkSession,path:String ) = { val rdd = spark.sparkContext .textFile(path) .map(w => w.split("\t")) import spark.implicits._ val empDF = rdd.map(line => EMP(line(0), line(1), line(2), line(3), line(4), line(5), line(6), line(7))).toDF() empDF.printSchema() empDF.show() } def program(spark: SparkSession,path:String) = { val infoRdd = spark.sparkContext .textFile(path) .map(w => w.split("\t")).map(line => Row(line(0), line(1), line(2), line(3), line(4), line(5), line(6), line(7))) val filedName = Array(StructField("empNo", StringType, true) , StructField("ename", StringType, true), StructField("job", StringType, true) , StructField("mgr", StringType, true), StructField("hireDate", StringType, true) , StructField("sal", StringType, true), StructField("comm", StringType, true) , StructField("deptNo", StringType, true)) val schema = StructType(filedName) val empDf = spark.createDataFrame(infoRdd, schema) empDf.printSchema() empDf.show(false) //注册临时表 empDf.createOrReplaceTempView("emp") val sqlDF = spark.sql("select ename,job,comm from emp") // sqlDF.printSchema() //sqlDF.show() /*empDf.write.format("json") .mode(SaveMode.Overwrite).save("E:///testData/empTable3")*/ empDf.coalesce(1).write.format("json").mode(SaveMode.Overwrite) .partitionBy("deptno").save("hdfs://hadoop001:9000/user/hadoop/emp-spark-test/emp-jsonOnYarnTable") } case class EMP(empNo: String, empName: String, job: String, mgr: String, hiredate: String, sal: String, comm: String, deptNo: String) }12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
./bin/spark-submit \ --class com.imooc.spark.Test.TestOfSparkContext2OnYarn \ --master yarn \ /home/hadoop/data/jar-test/sql-3.0-onYarn.jar ./bin/spark-submit \ --class com.imooc.spark.Test.TestOfSparkContext2OnYarn \ --master yarn \ --deploy-mode client \ /home/hadoop/data/jar-test/sql-3.0-onYarn.jar12345678910
spark-submit 提交jar包到yarn上的时候,数据输入路径,数据输出路径都必须是HDFS的路径,否则报错 :Input path does not exist
查看
日志直接在控制台输出
结果
[hadoop@hadoop001 ~]$ hadoop fs -ls /user/hadoop/emp-spark-test/emp-jsonOnYarnTable 18/11/20 17:42:27 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Found 5 items -rw-r--r-- 1 hadoop supergroup 0 2018-11-20 17:33 /user/hadoop/emp-spark-test/emp-jsonOnYarnTable/_SUCCESS drwxr-xr-x - hadoop supergroup 0 2018-11-20 17:33 /user/hadoop/emp-spark-test/emp-jsonOnYarnTable/deptno=10 drwxr-xr-x - hadoop supergroup 0 2018-11-20 17:33 /user/hadoop/emp-spark-test/emp-jsonOnYarnTable/deptno=20 drwxr-xr-x - hadoop supergroup 0 2018-11-20 17:33 /user/hadoop/emp-spark-test/emp-jsonOnYarnTable/deptno=30 drwxr-xr-x - hadoop supergroup 0 2018-11-20 17:33 /user/hadoop/emp-spark-test/emp-jsonOnYarnTable/deptno=44 [hadoop@hadoop001 ~]$ 12345678910
本地提交:输入输出路文件在本地写法:file:///输入输出文件在HDFS写法:hdfs://ip:port/其中提交模式是yarn模式的时候,输入输出只能是在hdfs上操作
本地提交:输入输出路文件在本地写法:file:///
输入输出文件在HDFS写法:hdfs://ip:port/
其中提交模式是yarn模式的时候,输入输出只能是在hdfs上操作
提交命令
sql-1.0-yarnCluster.jar ./bin/spark-submit \ --class com.imooc.spark.Test.TestOfSparkContext2OnYarn \ --master yarn \ ----deploy-mode cluster \ /home/hadoop/data/jar-test/sql-3.0-onYarn.jar123456
查看输出日志需要在 yarn的日志上查看
Spark on YARN 之 client 与cluster 模式区别
和local模式一样和local模式一样
./bin/spark-submit–class com.imooc.spark.Test.TestOfSparkContext2–master spark://192.168.52.130:7077–executor-memory 4G–total-executor-cores 6/home/hadoop/data/jar-test/sql-1.0-yarnCluster.jar
https://blog.csdn.net/huonan_123/article/details/84282843?ops_request_misc={"request_id":"159901385219724836763813","scm":"20140713.130102334.."}&request_id=159901385219724836763813&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~first_rank_ecpm_v3~pc_rank_v2-1-84282843.first_rank_ecpm_v3_pc_rank_v2&utm_term=spark-submit几种提交模式的+区别是什么&spm=1018.2118.3001.4187
最多设置5个标签!
local 模式
代码
打包
提交
参数解释
yarn client模式提交
代码
提交
注意:
查看
日志直接在控制台输出
结果
注意
yarn cluster 模式
提交命令
查看
查看输出日志需要在 yarn的日志上查看
Spark on YARN 之 client 与cluster 模式区别
standalone 模式
代码
提交
三种模式比较
提交参数详解
https://blog.csdn.net/huonan_123/article/details/84282843?ops_request_misc={"request_id":"159901385219724836763813","scm":"20140713.130102334.."}&request_id=159901385219724836763813&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~first_rank_ecpm_v3~pc_rank_v2-1-84282843.first_rank_ecpm_v3_pc_rank_v2&utm_term=spark-submit几种提交模式的+区别是什么&spm=1018.2118.3001.4187
一周热门 更多>