val spark = SparkSession.builder
.appName(appName)
.config("spark.delta.logStore.class", "org.apache.spark.sql.delta.storage.S3SingleDriverLogStore")
.config("hive.exec.dynamic.partition", "true")
.config("hive.exec.dynamic.partition.mode", "nonstrict")
.config("hive.exec.max.dynamic.partitions", 5000)
.config("hive.exec.max.dynamic.partitions.pernode", 5000)
.enableHiveSupport()
.master("local[2]")
.getOrCreate()
spark
.sparkContext
.hadoopConfiguration
.set("fs.s3.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem")
spark.read.json("s3a:///bucketname/foldername/").inputFiles
Raises the following exception
Exception in thread "main" java.lang.NullPointerException: null uri host.
at java.util.Objects.requireNonNull(Objects.java:228)
at org.apache.hadoop.fs.s3native.S3xLoginHelper.buildFSURI(S3xLoginHelper.java:73)
at org.apache.hadoop.fs.s3a.S3AFileSystem.setUri(S3AFileSystem.java:470)
at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:235)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3303)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:124)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3352)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3320)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:479)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:361)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary$1.apply(DataSource.scala:547)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary$1.apply(DataSource.scala:545)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.immutable.List.flatMap(List.scala:355)
at org.apache.spark.sql.execution.datasources.DataSource.org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary(DataSource.scala:545)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:359)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:391)
at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:325)
I have verified that I am able to read from the bucket and have the correct permissions.
Apparently I was missing the bucket name from the path. Also used s3a:// instead of s3a:///
Related
I'm using Spark 3.12, Scala 2.12, Hadoop 3.1.1.3.1.2-50, Elasticsearch 7.10.1 (due to license issues), Centos 7
to try an ingest json data in gzip files located on HDFS into Elasticsearch using spark streaming.
I get a
Logical Plan:
FileStreamSource[hdfs://pct/user/papago-mlops-datalake/raw/mt-log/engine=n2mt/year=2022/date=0430/hour=00]
at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:356)
at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:244)
Caused by: java.lang.NoSuchMethodError: org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(Lorg/apache/spark/sql/SparkSession;Lorg/apache/spark/sql/execution/QueryExecution;Lscala/Function0;)Ljava/lang/Object;
at org.elasticsearch.spark.sql.streaming.EsSparkSqlStreamingSink.addBatch(EsSparkSqlStreamingSink.scala:62)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:586)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$15(MicroBatchExecution.scala:584)
at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:357)
at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:355)
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:68)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:584)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:226)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:357)
at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:355)
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:68)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:194)
at org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:57)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:188)
at org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:334)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:317)
... 1 more
ApplicationMaster host: ac3m8x2183.bdp.bdata.ai
ApplicationMaster RPC port: 39673
queue: batch
start time: 1654588583366
final status: FAILED
tracking URL: https://gemini-rm2.bdp.bdata.ai:9090/proxy/application_1654575947385_29572/
user: papago-mlops-datalake
Exception in thread "main" org.apache.spark.SparkException: Application application_1654575947385_29572 finished with failed status
at org.apache.spark.deploy.yarn.Client.run(Client.scala:1269)
at org.apache.spark.deploy.yarn.YarnClusterApplication.start(Client.scala:1627)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:904)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:198)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:228)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
using
implementation("org.elasticsearch:elasticsearch-hadoop:8.2.2")
implementation("com.typesafe:config:1.4.2")
implementation("org.apache.spark:spark-sql_2.12:3.1.2")
testImplementation("org.scalatest:scalatest_2.12:3.2.12")
testRuntimeOnly("com.vladsch.flexmark:flexmark-all:0.61.0")
compileOnly("org.apache.spark:spark-sql_2.12:3.1.2")
compileOnly("org.apache.spark:spark-core_2.12:3.1.2")
compileOnly("org.apache.spark:spark-launcher_2.12:3.1.2")
compileOnly("org.apache.spark:spark-streaming_2.12:3.1.2")
compileOnly("org.elasticsearch:elasticsearch-spark-30_2.12:8.2.2")
libraries. I tried using ES-Hadoop version 7.10.1, but ES-Spark only supports down to 7.12.0 for Spark 3.0 and I still get the same error.
My code is pretty simple
def main(args: Array[String]): Unit = {
// Set the log level to only print errors
Logger.getLogger("org").setLevel(Level.ERROR)
val spark = SparkSession
.builder()
.config(ConfigurationOptions.ES_NET_HTTP_AUTH_USER, elasticsearchUser)
.config(ConfigurationOptions.ES_NET_HTTP_AUTH_PASS, elasticsearchPass)
.config(ConfigurationOptions.ES_NODES, elasticsearchHost)
.config(ConfigurationOptions.ES_PORT, elasticsearchPort)
.appName(appName)
.master(master)
.getOrCreate()
val streamingDF: DataFrame = spark.readStream
.schema(jsonSchema)
.format("org.apache.spark.sql.execution.datasources.json.JsonFileFormat")
.load(pathToJSONResource)
streamingDF.writeStream
.outputMode(outputMode)
.format(destination)
.option("checkpointLocation", checkpointLocation)
.start(indexAndDocType)
.awaitTermination()
// Stop the session
spark.stop()
}
}
If I can't use the ES-Hadoop libraries is there another way I can go about ingesting JSON into ES from HDFS?
I try to execute this writeStream
def _write_stream(data_frame, checkpoint_path, write_stream_path):
data_frame.writeStream.format("delta") \
.option("checkpointLocation", checkpoint_path) \
.trigger(processingTime="1 second") \
.option("mergeSchema", "true") \
.outputMode("append") \
.table(write_stream_path)
but I get this error
at
org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:428)
at
org.apache.spark.util.ThreadUtils$.parallelMap(ThreadUtils.scala:399)
at
com.databricks.sql.streaming.state.RocksDBFileManager.loadImmutableFilesFromDbfs(RocksDBFileManager.scala:433)
at
com.databricks.sql.streaming.state.RocksDBFileManager.loadCheckpointFromDbfs(RocksDBFileManager.scala:202)
at
com.databricks.sql.rocksdb.CloudRocksDB.$anonfun$open$5(CloudRocksDB.scala:437)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.util.Utils$.timeTakenMs(Utils.scala:627) at
com.databricks.sql.rocksdb.CloudRocksDB.timeTakenMs(CloudRocksDB.scala:523)
at
com.databricks.sql.rocksdb.CloudRocksDB.$anonfun$open$2(CloudRocksDB.scala:435)
at
com.databricks.logging.UsageLogging.$anonfun$recordOperation$1(UsageLogging.scala:369)
at
com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:457)
at
com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:477)
at
com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:240)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62) at
com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:235)
at
com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:232)
at
com.databricks.spark.util.PublicDBLogging.withAttributionContext(DatabricksSparkUsageLogger.scala:20)
at
com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:279)
at
com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:271)
at
com.databricks.spark.util.PublicDBLogging.withAttributionTags(DatabricksSparkUsageLogger.scala:20)
at
com.databricks.logging.UsageLogging.recordOperationWithResultTags(UsageLogging.scala:452)
at
com.databricks.logging.UsageLogging.recordOperationWithResultTags$(UsageLogging.scala:378)
at
com.databricks.spark.util.PublicDBLogging.recordOperationWithResultTags(DatabricksSparkUsageLogger.scala:20)
at
com.databricks.logging.UsageLogging.recordOperation(UsageLogging.scala:369)
at
com.databricks.logging.UsageLogging.recordOperation$(UsageLogging.scala:341)
at
com.databricks.spark.util.PublicDBLogging.recordOperation(DatabricksSparkUsageLogger.scala:20)
at
com.databricks.spark.util.PublicDBLogging.recordOperation0(DatabricksSparkUsageLogger.scala:57)
at
com.databricks.spark.util.DatabricksSparkUsageLogger.recordOperation(DatabricksSparkUsageLogger.scala:125)
at
com.databricks.spark.util.UsageLogger.recordOperation(UsageLogger.scala:70)
at
com.databricks.spark.util.UsageLogger.recordOperation$(UsageLogger.scala:57)
at
com.databricks.spark.util.DatabricksSparkUsageLogger.recordOperation(DatabricksSparkUsageLogger.scala:86)
at
com.databricks.spark.util.UsageLogging.recordOperation(UsageLogger.scala:402)
at
com.databricks.spark.util.UsageLogging.recordOperation$(UsageLogger.scala:381)
at
com.databricks.sql.rocksdb.CloudRocksDB.recordOperation(CloudRocksDB.scala:52)
at
com.databricks.sql.rocksdb.CloudRocksDB.recordRocksDBOperation(CloudRocksDB.scala:542)
at
com.databricks.sql.rocksdb.CloudRocksDB.$anonfun$open$1(CloudRocksDB.scala:427)
at
com.databricks.backend.daemon.driver.ProgressReporter$.withStatusCode(ProgressReporter.scala:377)
at
com.databricks.backend.daemon.driver.ProgressReporter$.withStatusCode(ProgressReporter.scala:363)
at
com.databricks.spark.util.SparkDatabricksProgressReporter$.withStatusCode(ProgressReporter.scala:34)
at
com.databricks.sql.rocksdb.CloudRocksDB.open(CloudRocksDB.scala:427)
at
com.databricks.sql.rocksdb.CloudRocksDB.(CloudRocksDB.scala:80)
at
com.databricks.sql.rocksdb.CloudRocksDB$.open(CloudRocksDB.scala:595)
at
com.databricks.sql.fileNotification.autoIngest.CloudFilesSource.(CloudFilesSource.scala:82)
at
com.databricks.sql.fileNotification.autoIngest.CloudFilesNotificationSource.(CloudFilesNotificationSource.scala:44)
at
com.databricks.sql.fileNotification.autoIngest.CloudFilesSourceProvider.createSource(CloudFilesSourceProvider.scala:172)
at
org.apache.spark.sql.execution.datasources.DataSource.createSource(DataSource.scala:326)
at
org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$1.$anonfun$applyOrElse$1(MicroBatchExecution.scala:100)
at scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:86)
at
org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$1.applyOrElse(MicroBatchExecution.scala:97)
at
org.apache.spark.sql.execution.streaming.MicroBatchExecution$$anonfun$1.applyOrElse(MicroBatchExecution.scala:95)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:484)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:86)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:484)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:262)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:258)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:460)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:428)
at
org.apache.spark.sql.execution.streaming.MicroBatchExecution.planQuery(MicroBatchExecution.scala:95)
at
org.apache.spark.sql.execution.streaming.MicroBatchExecution.logicalPlan$lzycompute(MicroBatchExecution.scala:165)
at
org.apache.spark.sql.execution.streaming.MicroBatchExecution.logicalPlan(MicroBatchExecution.scala:165)
at
org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:349)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:852)
at
org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:341)
at
org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:268)
Caused by: java.io.FileNotFoundException: No such file or directory:
s3:///**/*/checkpoint/sources/0/rocksdb/SSTs/.sst
at
shaded.databricks.org.apache.hadoop.fs.s3a.S3AFileSystem.s3GetFileStatus(S3AFileSystem.java:3254)
at
shaded.databricks.org.apache.hadoop.fs.s3a.S3AFileSystem.innerGetFileStatus(S3AFileSystem.java:3137)
at
shaded.databricks.org.apache.hadoop.fs.s3a.S3AFileSystem.getFileStatus(S3AFileSystem.java:3076)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:337) at
org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:289) at
org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:2034)
at
org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:2003)
at
org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:1979)
at
com.databricks.sql.streaming.state.RocksDBFileManager.$anonfun$loadImmutableFilesFromDbfs$6(RocksDBFileManager.scala:442)
at
com.databricks.sql.streaming.state.RocksDBFileManager.$anonfun$loadImmutableFilesFromDbfs$6$adapted(RocksDBFileManager.scala:433)
at
org.apache.spark.util.ThreadUtils$.$anonfun$parallelMap$2(ThreadUtils.scala:397)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659) at
scala.util.Success.$anonfun$map$1(Try.scala:255) at
scala.util.Success.map(Try.scala:213) at
scala.concurrent.Future.$anonfun$map$1(Future.scala:292) at
scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33) at
scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64) at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:104)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:68)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:54)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:101)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:104)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Please check on checkpoint_path location is present or not . Error log clearly tells , path is not exists.
Caused by: java.io.FileNotFoundException: No such file or directory: s3:///**/*/checkpoint/sources/0/rocksdb/SSTs/.sst
Hi All, I'm running Spark(2.4.4) in kerberos environment, I've written a code to query Hive Table Via Spark. I am doing kinit also in spark-submit command, but still i'm facing
java.io.IOException:
org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS];
Here is My code:-
#transient lazy val spark: SparkSession = getSparkSession()
def getSparkSession(): SparkSession = {
log.info("Creating spark session")
var sparkBuilder: SparkSession.Builder = SparkSession.builder().
master("local[*]").
appName("Query Hive Via Spark").
config("hive.exec.scratchdir", "/tmp/hive").enableHiveSupport().
config("hive.exec.dynamic.partition", "true").
config("hive.exec.dynamic.partition.mode", "nonstrict").
config("hive.exec.max.dynamic.partitions", "1000")
#transient lazy val spark: SparkSession = sparkBuilder.getOrCreate()
registerUdfs(spark)
spark.sparkContext.setLogLevel(logLevel)
spark
}
Code to Access Hive Tables via Spark Sql.
val resultDF= spark.sql(s"SELECT count(*) AS cnt FROM brl_in_cash.cash_in_incoming_data WHERE insert_date='20200821'")
resultDF.printSchema()
resultDF.show(false)
I am executing a shell script for spark-submit where i am doing kinit and also passing --principal $KERBEROS_PRINCIPAL --keytab $KERBEROS_KEYTAB .
Spark-submit Command :-
spark-submit --master yarn --deploy-mode cluster \
--verbose \
--name ${appName} \
--principal $KERBEROS_PRINCIPAL \
--keytab $KERBEROS_KEYTAB \
--driver-memory 4g \
--executor-memory 4g \
--executor-cores 2 \
--files ${hiveSite.xml} \
--conf spark.hadoop.yarn.timeline-service.enabled=false \
--conf spark.hadoop.yarn.client.failover-proxy-provider=org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider \
--conf spark.security.credentials.EsServiceCredentialProvider.enabled=false \
--class com.dpk.hive.HiveViaSpark "${jarPath}"
Error Log :-
20/08/26 13:34:17 INFO TezClient: Failed to retrieve AM Status via proxy
com.google.protobuf.ServiceException: java.io.IOException: Failed on local exception: java.io.IOException: org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS]; Host Details : local host is: "dfghcv012.global.xyz.com/10.7.1.52"; destination host is: "dfghcv013.global.xyz.com":43890;
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:243)
at com.sun.proxy.$Proxy36.getAMStatus(Unknown Source)
at org.apache.tez.client.TezClient.getAppMasterStatus(TezClient.java:618)
at org.apache.tez.client.TezClient.waitTillReady(TezClient.java:697)
at org.apache.hadoop.hive.ql.exec.tez.TezSessionState.open(TezSessionState.java:205)
at org.apache.hadoop.hive.ql.exec.tez.TezSessionState.open(TezSessionState.java:116)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:532)
at org.apache.spark.sql.hive.client.HiveClientImpl.newState(HiveClientImpl.scala:183)
at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:117)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:271)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:384)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:286)
at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:215)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:215)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:215)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:214)
at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114)
at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)
at org.apache.spark.sql.internal.SharedState.globalTempViewManager$lzycompute(SharedState.scala:141)
at org.apache.spark.sql.internal.SharedState.globalTempViewManager(SharedState.scala:136)
at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$2.apply(HiveSessionStateBuilder.scala:55)
at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$2.apply(HiveSessionStateBuilder.scala:55)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.globalTempViewManager$lzycompute(SessionCatalog.scala:91)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.globalTempViewManager(SessionCatalog.scala:91)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.isTemporaryTable(SessionCatalog.scala:736)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.isRunningDirectlyOnFiles(Analyzer.scala:747)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.resolveRelation(Analyzer.scala:681)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:713)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:706)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$apply$1.apply(AnalysisHelper.scala:90)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$apply$1.apply(AnalysisHelper.scala:90)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:89)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:329)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:327)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:329)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:327)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:706)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:652)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84)
at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
at scala.collection.immutable.List.foldLeft(List.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:127)
at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:121)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:106)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:105)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201)
at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:105)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:78)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
at com.sc.sdm.rt.oa.recon.TestConnection$.main(TestConnection.scala:34)
at com.sc.sdm.rt.oa.recon.TestConnection.main(TestConnection.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:684)
Caused by: java.io.IOException: Failed on local exception: java.io.IOException: org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS]; Host Details : local host is: "dfghcv012.global.xyz.com/10.7.1.52"; destination host is: "dfghcv013.global.xyz.com":43890;
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:776)
at org.apache.hadoop.ipc.Client.call(Client.java:1479)
at org.apache.hadoop.ipc.Client.call(Client.java:1412)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229)
... 91 more
Any Help is Appreciated!!
I was able to resolve this.
I removed
--files hive-site.xml
added spark configuration
--conf spark.security.credentials.hadoopfs.enabled=true
Above changes worked for me.
I am running a Spark streaming application where each batches writes its final output to S3 in parquet format by using SqlContext.
I am able to get this application to run successfully in EMR.
However, after running for a couple of hours, the spark jobs suddenly halts on a FileNotFoundException.
I am not sure what to do next here.
Any pointers in how to debug/fix this issue would be useful.
I use Spark 2.2.1, EMR 5.1.1 and Java 8 for my application.
My streaming application code
public class StreamingApp {
JavaStreamingContext initDAG() {
JavaSparkContext sc = new JavaSparkContext(sparkConf);
// new context
JavaStreamingContext jssc = new JavaStreamingContext(sc, batchInterval);
SQLContext sqlContext = new SQLContext(sc);
...
// Converting to Dataset's Row type
JavaDStream<Row> rowStream = inputStream.map(new ObjectToRowMapperFunction());
// Writing to Disk
rowStream.foreachRDD(new RddToParquetFunction(sqlContext));
return jssc;
}
...
}
public class RddToParquetFunction implements VoidFunction<JavaRDD<Row>> {
private final StructType userStructType;
private final SQLContext sqlContext;
public RddToParquetFunction(SQLContext sqlContext) {
userStructType = ProtobufSparkStructMapper.schemaFor(UserMessage.class);
this.sqlContext = sqlContext;
}
#Override
public void call(JavaRDD<Row> rowRDD) throws Exception {
Dataset<Row> userDataFrame = sqlContext.createDataFrame(rowRDD, userStructType);
userDataFrame.write().mode(SaveMode.Append).parquet("s3://XXXXXXX/XXXXX/");
}
}
appropriate spark driver logs
18/02/15 22:47:57 ERROR ApplicationMaster: User class threw exception: org.apache.spark.SparkException: Job aborted.
org.apache.spark.SparkException: Job aborted.
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:213)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:166)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:166)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:166)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:145)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(comm ands.scala:58)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92)
at org.apache.spark.sql.execution.datasources.DataSource.writeInFileFormat(DataSource.scala:435)
at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:471)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:50)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:609)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:233)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:217)
at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:508)
at app.functions.RddToParquetFunction.call(RddToParquetFunction.java:37)
at app.functions.RddToParquetFunction.call(RddToParquetFunction.java:17)
at org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:272)
at org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:272)
at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:628)
at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:628)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:51)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:416)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:257)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:257)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:257)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:256)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.FileNotFoundException: File s3://XXXXXXX/XXXXX/output/_temporary/0/task_20180215224653_0267_m_000032 does not exist.
at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.listStatus(S3NativeFileSystem.java:996)
at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.listStatus(S3NativeFileSystem.java:937)
at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.listStatus(EmrFileSystem.java:337)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.mergePaths(FileOutputCommitter.java:426)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitJobInternal(FileOutputCommitter.java:362)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitJob(FileOutputCommitter.java:334)
at org.apache.parquet.hadoop.ParquetOutputCommitter.commitJob(ParquetOutputCommitter.java:47)
at org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.commitJob(HadoopMapReduceCommitProtocol.scala:142)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:207)
... 57 more
Unless you pay the premium for Amazon's consistent EMR you can't reliably use S3 as a destination for your work.
ASF Hadoop+ Spark has fixed this on Hadoop 3.1+ with the S3A committers. Without that, and on amazon EMR, you need to write to HDFS and then use distcp to copy up the results if needed. If chaining together work, leave on HDFS.
I am trying to use SparkSession to reading data from Hive.
my code:
val warehouseLocation = "/user/xx/warehouse"
val spark = SparkSession
.builder()
.master("local[*]")
.appName("HiveReceiver")
.config("spark.sql.warehouse.dir",warehouseLocation)
.enableHiveSupport()
.getOrCreate()
import spark.sql
sql("select * from sparktest.test").show()
spark.stop()
my versions:
spark:2.1.1
hive:1.2.1
hadoop:2.7.1
but there are some Exceptions when it run in IDEA:
Exception in thread "main" java.lang.NoSuchMethodError:
org.apache.hadoop.hive.metastore.api.Table.setTableName(Ljava/lang/String;)V
at
org.apache.spark.sql.hive.MetastoreRelation.(MetastoreRelation.scala:76)
at
org.apache.spark.sql.hive.HiveMetastoreCatalog.lookupRelation(HiveMetastoreCatalog.scala:142)
at
org.apache.spark.sql.hive.HiveSessionCatalog.lookupRelation(HiveSessionCatalog.scala:70)
at
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveRelations$$lookupTableFromCatalog(Analyzer.scala:457)
at
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:479)
at
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:464)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$1.apply(LogicalPlan.scala:58)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$1.apply(LogicalPlan.scala:58)
at
org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:307)
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188)
at
org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:305)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:58)
at
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:464)
at
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:454)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82)
at
scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
at scala.collection.immutable.List.foldLeft(List.scala:84) at
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74)
at scala.collection.immutable.List.foreach(List.scala:381) at
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74)
at
org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:69)
at
org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:67)
at
org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:50)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:63) at
org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592) at
com.bdp.steaming.HiveReceiver$.main(HiveReceiver.scala:24) at
com.bdp.steaming.HiveReceiver.main(HiveReceiver.scala)
someone can tell where is the bug?
I have solved this question.In my case,there are two hive-metastore dependencies in my project,then i excluded a hive-metastore dependency.It worked.