AnsweredAssumed Answered

Can not run my spark job on Mapr sandbox and on yarn mode

Question asked by mahdi62b on Jan 1, 2017
Latest reply on Mar 1, 2017 by prakhar

Hi I have downloaded MapR sandbox and try a very simple aplication as bellow on that

 

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import StreamingContext._
import org.apache.hadoop.conf._
import org.apache.hadoop.fs._
import org.apache.hadoop.io.{LongWritable, Text}
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat

object SimpleStream {
  def main(args: Array[String]) {

    //StreamingExamples.setStreamingLogLevels()
    val sparkConf = new SparkConf().setAppName("Simple Application")
      .set("spark.executor.instances","2")
      .set("spark.rdd.compress","true")
      .set("spark.driver.memory", "1g")
      .set("spark.driver.cores", "1")
      .set("spark.executor.memory", "1g")
      .set("spark.executor.cores", "1")
      .set("spark.streaming.fileStream.minRememberDuration", "2000000h")
    // Create the context
    val ssc = new StreamingContext(sparkConf, Seconds(2))
    val filterF =  {(x: Path) => true }

    // Create the FileInputDStream on the directory and use the
    // stream to count words in new files created
   // val lines = ssc.fileStream[LongWritable, Text, TextInputFormat]("/user/mapr/SparkStreaming/src/main/CCN", filterF, false)
     // .map(x=>x._2.toString())

    val lines = ssc.fileStream[LongWritable, Text, TextInputFormat]("/user/mapr/SparkStreaming/src/main/CCN", filterF, false)
    .map(x=>x._2.toString())
    val words = lines.flatMap(_.split(" "))
    val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
    wordCounts.foreachRDD(r=> r.foreach(println))
    ssc.start()
    ssc.awaitTerminationOrTimeout(1000000000)
  }
}


I can run above on local[3] mode but when I want to submit by spark-submit

/opt/mapr/spark/spark-1.6.1/bin/spark-submit --class SimpleStream --master yarn-cluster /user/mapr/SparkStreaming/target/scala-2.10/sparkhbase_2.10-1.0.2.jar

I got below exception:
Exception in thread "main" org.apache.spark.SparkException: Application application_1483271616344_0007 finished with failed status
at org.apache.spark.deploy.yarn.Client.run(Client.scala:1034)
at org.apache.spark.deploy.yarn.Client$.main(Client.scala:1081)
at org.apache.spark.deploy.yarn.Client.main(Client.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:752)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)


Why I cant run above on yarn ?

Outcomes