AnsweredAssumed Answered

Spark not reading file from executor working directory

Question asked by sniper99 on Jul 2, 2018
Latest reply on Jul 5, 2018 by sniper99

getting the below error when trying to read file from the executors working directory. I can see that the file is present in the path.

 

 String config = System.getProperty("config.properties");
String fileName = SparkFiles.get(config);
Configuration hdfsConf = new Configuration();
FileSystem fs = FileSystem.get(hdfsConf);
FSDataInputStream is = fs.open(new Path(fileName));
configFile.load(is);
String hbaseTableName = configFile.getProperty("hbase.table");

 

 

spark-submit --class ****************** --master yarn --executor-memory 10g --driver-memory 9g --num-executors 2 --executor-cores 2 --jars /users/*********-0.0.3-SNAPSHOT.jar --files /hdfs/******/config.properties --conf "spark.executor.extraJavaOptions=-Dconfig.properties=config.properties" --conf "spark.driver.extraJavaOptions=-Dconfig.properties=config.properties" /users/******************-0.1-SNAPSHOT.jar

 

java.io.FileNotFoundException: /hdfs/app/local.******.logs/usercache/*****/appcache/application_******_10679/container_e102_*********_10679_01_000003/config.properties
        at com.mapr.fs.MapRClientImpl.open(MapRClientImpl.java:327)
        at com.mapr.fs.MapRFileSystem.open(MapRFileSystem.java:1012)
        at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:807)
        at com.*********.<init>(**.java:45)
        at com.*********$2$1.call(**.java:105)
        at com.*********$2$1.call(**.java:1)
        at org.apache.spark.api.java.JavaRDDLike$$anonfun$foreachPartition$1.apply(JavaRDDLike.scala:219)
        at org.apache.spark.api.java.JavaRDDLike$$anonfun$foreachPartition$1.apply(JavaRDDLike.scala:219)
        at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:926)
        at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:926)
        at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069)
        at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:108)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
End of LogType:stderr

Outcomes