AnsweredAssumed Answered

SparkSQL Job Fails on Hive - Doesn't get Hive Settings

Question asked by bookbinder on Feb 2, 2017
Latest reply on Feb 17, 2017 by maprcommunity

Hi Everyone,

 

I am unable to run SparkSQL jobs on Hive. This is in a MapR 5.1 secure cluster running hive 1.2 and spark 1.6.1. I've followed all the guides to setup spark on yarn, in a secure cluster, for Spark-SQL.

 

Command/Results

[mapr@SERVER ~]$ MASTER=yarn-cluster /opt/mapr/spark/spark-1.6.1/bin/run-example sql.hive.HiveFromSpark
17/02/02 17:16:34 WARN HiveConf: HiveConf of name hive.sentry.subject.name does not exist
17/02/02 17:16:34 WARN HiveConf: HiveConf of name hive.sentry.conf.url does not exist
17/02/02 17:16:34 WARN HiveConf: HiveConf of name hive.internal.ss.authz.settings.applied.marker does not exist
17/02/02 17:16:35 WARN HiveConf: HiveConf of name hive.sentry.subject.name does not exist
17/02/02 17:16:35 WARN HiveConf: HiveConf of name hive.sentry.conf.url does not exist
17/02/02 17:16:35 WARN HiveConf: HiveConf of name hive.internal.ss.authz.settings.applied.marker does not exist
17/02/02 17:16:35 WARN Hive: Failed to access metastore. This class should not accessed in runtime.
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
at org.apache.hadoop.hive.ql.metadata.Hive.getAllDatabases(Hive.java:1238)

 

I am attaching my configuration files since that may help.

 

spark-defaults.conf

spark.executor.memory 2g
spark.logConf true
spark.eventLog.dir maprfs:///apps/spark
spark.eventLog.enabled true
spark.ssl.keyPassword ***
spark.ssl.keyStore /opt/mapr/conf/ssl_keystore
spark.ssl.keyStorePassword ***
spark.ssl.trustStore /opt/mapr/conf/ssl_truststore
spark.ssl.trustStorePassword ***
spark.ssl.protocol TLSv1.2
spark.ssl.enabledAlgorithms TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA
spark.executor.extraClassPath

spark.sql.hive.metastore.sharedPrefixes com.mysql.jdbc,org.postgresql,com.microsoft.sqlserver,oracle.jdbc,com.mapr.fs.shim.LibraryLoader,com.mapr.security.JNISecurity,com.mapr.fs.jni
spark.executor.extraClassPath

spark.yarn.jar  :///apps/spark/spark-assembly-1.6.1-mapr-1611-hadoop2.7.0-mapr-1602.jar

#Security Settings
spark.ssl.akka.enabled true
spark.ssl.fs.enabled true
spark.authenticate true
#Spark History Server
spark.yarn.historyServer.address historyserver:18080
#Spark JARS
spark.yarn.dist.files /opt/mapr/hive/hive-1.2/conf/hive-site.xml,/opt/mapr/hive/hive-1.2/lib/datanucleus-api-jdo-4.2.1.jar,/opt/mapr/hive/hive-1.2/lib/datanucleus-core-4.1.6.jar,/opt/mapr/hive/hive-1.2/lib/datanucleus-rdbms-4.1.7.jar
spark.sql.hive.metastore.version 1.2.1
spark.sql.hive.metastore.jars /opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop:/opt/mapr/hadoop/hadoop-2.7.0/share/hadoop/common/lib/*:/opt/mapr/hadoop/hadoop-2.7.0/share/hadoop/common/*:/opt/mapr/hadoop/./hadoop-2.7.0/share/hadoop/mapreduce/*:/opt/mapr/hadoop/hadoop-2.7.0/share/hadoop/mapreduce/lib/*:/opt/mapr/hadoop/hadoop-2.7.0/share/hadoop/yarn/*:/opt/mapr/hadoop/hadoop-2.7.0/share/hadoop/yarn/lib/*:/opt/mapr/hive/hive-1.2/lib/accumulo-core-1.6.0.jar:/opt/mapr/hive/hive-1.2/lib/hive-contrib-1.2.0-mapr-1611.jar:/opt/mapr/hive/hive-1.2/lib/*:/opt/mapr/sentry/sentry-1.6.0/lib/*

 

hive-site.xml

 

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<configuration>
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>true</value>
<description>creates necessary schema on a startup if one doesn't exist. set
this to false, after creating it once</description>
</property>

<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://DBSERVER:3306/METADB?createDatabaseIfNotExist=false</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>

<!--extra sentry
<property>
<name>hive.metastore.execute.setugi</name>
<value>true</value>
</property>

<property>
<name>hive.server2.enable.doAs</name>
<value>true</value>
<description>Set this property to enable impersonation in Hive Server 2</description>
</property>
-->

<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>

<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>USER</value>
<description>username to use against metastore database</description>
</property>

<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>PASS</value>
<description>password to use against metastore database</description>
</property>

<property>
<name>hive.metastore.uris</name>
<value>thrift://SERVERNAME:9083</value>
</property>
<!--sentry-->
<property>
<name>hive.server2.session.hook</name>
<value>org.apache.sentry.binding.hive.HiveAuthzBindingSessionHook</value>
</property>

<property>
<name>hive.sentry.conf.url</name>
<value>file:///opt/mapr/sentry/sentry-1.6.0/conf/sentry-site.xml</value>
<description>sentry-site.xml file location</description>
</property>

<property>
<name>hive.metastore.rawstore.impl</name>
<value>org.apache.sentry.binding.metastore.AuthorizingObjectStore</value>
</property>

<property>
<name>hive.metastore.filter.hook</name>
<value>org.apache.sentry.binding.metastore.SentryMetaStoreFilterHook</value>
</property>

<property>
<name>hive.server2.enable.doAs</name>
<value>false</value>
<description>Set this property to enable impersonation in Hive Server 2</description>
</property>

<property>
<name>hive.metastore.execute.setugi</name>
<value>true</value>
</property>

<property>
<name>hive.internal.ss.authz.settings.applied.marker</name>
<value>true</value>
</property>

<property>
<name>hive.sentry.subject.name</name>
<value>mapr</value>
</property>

<property>
<name>hive.support.concurrency</name>
<description>Enable Hive's Table Lock Manager Service</description>
<value>true</value>
</property>

<property>
<name>hive.security.authorization.task.factory</name>
<value>org.apache.sentry.binding.hive.SentryHiveAuthorizationTaskFactoryImpl</value>
</property>

<property>
<name>hive.metastore.rawstore.impl</name>
<value>org.apache.sentry.binding.metastore.AuthorizingObjectStore</value>
</property>

<property>
<name>hive.metastore.pre.event.listeners</name>
<value>org.apache.sentry.binding.metastore.MetastoreAuthzBinding</value>
<description>list of comma separated listeners for metastore events.</description>
</property>

<!--end sentry-->

<property>
<name>hive.zookeeper.quorum</name>
<value>SERVER1:5181,SERVER2:5181,SERVER3:5181</value>
</property>

<!--Start of Kerberos Configuration-->

<property>
<name>hive.metastore.sasl.enabled</name>
<value>true</value>
</property>

<property>
<name>hive.metastore.kerberos.keytab.file</name>
<value>/opt/mapr/conf/mapr.keytab</value>
</property>

<property>
<name>hive.metastore.kerberos.principal</name>
<value>mapr/user@DOMAIN.NET</value>
</property>

<property>
<name>hive.server2.authentication</name>
<value>KERBEROS</value>
</property>

<property>
<name>hive.server2.authentication.kerberos.principal</name>
<value>mapr/user@DOMAIN.NET</value>
</property>

<property>
<name>hive.server2.authentication.kerberos.keytab</name>
<value>/opt/mapr/conf/mapr.keytab</value>
</property>

<!--insert properties-->
<property>
<name>hive.support.concurrency</name>
<value>true</value>
</property>

<property>
<name>hive.enforce.bucketing</name>
<value>true</value>
</property>

<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>

<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
</property>

<property>
<name>hive.compactor.initiator.on</name>
<value>true</value>
</property>

<property>
<name>hive.compactor.worker.threads</name>
<value>1</value>
</property>

<!--end insert properties-->

<!--Performance Tuning-->
<property>
<name>hive.server2.thrift.max.worker.threads</name> <!--deafult:100(500 from hive 0.12-->
<value>1000</value>
<description>Maximum number of Thrift worker threads</description>
</property>
<property>
<name>hive.server2.thrift.min.worker.threads</name> <!--deafult:5-->
<value>50</value>
<description>Minimum number of Thrift worker threads</description>
</property>
<!--Hiveserver2 client connections-->
<!--Hive dynamic partitions-->
<property>
<name>hive.exec.max.dynamic.partitions</name> <!--deafult:1000-->
<value>10000</value>
<description>Maximum number of dynamic partitions allowed to be created in total</description>
</property>
<property>
<name>hive.exec.max.dynamic.partitions.pernode</name> <!--deafult:100-->
<value>1000</value>
<description>Maximum number of dynamic partitions allowed to be created in each mapper/reducer node</description>
</property>
<!--Hive dynamic partitions-->

<!--Session Time Out Setting-->
<property>
<name>hive.server2.thrift.worker.keepalive.time</name> <!--deafult:60-->
<value>600</value>
<description>Keepalive time (in seconds) for an idle worker thread. When number of workers > min workers, excess threads are killed after this time interval</description>
</property>

<!--Performance Tuning-->

</configuration>

Any help is much appreciated!

Outcomes