You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by ๏̯͡๏ <ÐΞ€ρ@Ҝ>, de...@gmail.com on 2015/03/27 03:45:42 UTC

spark-sql throws org.datanucleus.store.rdbms.connectionpool.DatastoreDriverNotFoundException

I am unable to run spark-sql form command line.  I attempted the following

1)

export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
export
SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar
cd $SPARK_HOME

./bin/spark-sql

./bin/spark-sql
2)

export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
export
SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar
cd $SPARK_HOME

./bin/spark-sql --jars
/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar


3)

export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
export
SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar:/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar
export HADOOP_CONF_DIR=/apache/hadoop/conf
cd $SPARK_HOME
./bin/spark-sql



*Each time i get the below exception*


Spark assembly has been built with Hive, including Datanucleus jars on
classpath
15/03/26 19:43:49 WARN conf.HiveConf: DEPRECATED: Configuration property
hive.metastore.local no longer has any effect. Make sure to provide a valid
value for hive.metastore.uris if you are connecting to a remote metastore.
15/03/26 19:43:49 WARN conf.HiveConf: DEPRECATED: hive.metastore.ds.retry.*
no longer has any effect.  Use hive.hmshandler.retry.* instead
15/03/26 19:43:49 INFO metastore.HiveMetaStore: 0: Opening raw store with
implemenation class:org.apache.hadoop.hive.metastore.ObjectStore
15/03/26 19:43:49 INFO metastore.ObjectStore: ObjectStore, initialize called
15/03/26 19:43:50 INFO DataNucleus.Persistence: Property
datanucleus.cache.level2 unknown - will be ignored
15/03/26 19:43:50 INFO DataNucleus.Persistence: Property
hive.metastore.integral.jdo.pushdown unknown - will be ignored
Exception in thread "main" java.lang.RuntimeException:
java.lang.RuntimeException: Unable to instantiate
org.apache.hadoop.hive.metastore.HiveMetaStoreClient
at
org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:346)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:101)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.RuntimeException: Unable to instantiate
org.apache.hadoop.hive.metastore.HiveMetaStoreClient
at
org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1412)
at
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:62)
at
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:72)
at
org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:2453)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:2465)
at
org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:340)
... 11 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
at
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
at
org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1410)
... 16 more
Caused by: javax.jdo.JDOFatalInternalException: Error creating
transactional connection factory
NestedThrowables:
java.lang.reflect.InvocationTargetException
at
org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:587)
at
org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:788)
at
org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
at
org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
at java.security.AccessController.doPrivileged(Native Method)
at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
at
javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
at org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:310)
at
org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:339)
at
org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:248)
at
org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:223)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:73)
at
org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:133)
at
org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:58)
at
org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:67)
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:497)
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:475)
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:523)
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:397)
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.<init>(HiveMetaStore.java:356)
at
org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:54)
at
org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:59)
at
org.apache.hadoop.hive.metastore.HiveMetaStore.newHMSHandler(HiveMetaStore.java:4944)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:171)
... 21 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
at
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
at
org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
at
org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:325)
at
org.datanucleus.store.AbstractStoreManager.registerConnectionFactory(AbstractStoreManager.java:282)
at
org.datanucleus.store.AbstractStoreManager.<init>(AbstractStoreManager.java:240)
at
org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:286)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
at
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
at
org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
at
org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
at
org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
at
org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
... 50 more
Caused by: org.datanucleus.exceptions.NucleusException: Attempt to invoke
the "BONECP" plugin to create a ConnectionPool gave an error : The
specified datastore driver ("com.mysql.jdbc.Driver") was not found in the
CLASSPATH. Please check your CLASSPATH specification, and the name of the
driver.
at
org.datanucleus.store.rdbms.ConnectionFactoryImpl.generateDataSources(ConnectionFactoryImpl.java:259)
at
org.datanucleus.store.rdbms.ConnectionFactoryImpl.initialiseDataSources(ConnectionFactoryImpl.java:131)
at
org.datanucleus.store.rdbms.ConnectionFactoryImpl.<init>(ConnectionFactoryImpl.java:85)
... 68 more
Caused by:
org.datanucleus.store.rdbms.connectionpool.DatastoreDriverNotFoundException:
The specified datastore driver ("com.mysql.jdbc.Driver") was not found in
the CLASSPATH. Please check your CLASSPATH specification, and the name of
the driver.
at
org.datanucleus.store.rdbms.connectionpool.AbstractConnectionPoolFactory.loadDriver(AbstractConnectionPoolFactory.java:58)
at
org.datanucleus.store.rdbms.connectionpool.BoneCPConnectionPoolFactory.createConnectionPool(BoneCPConnectionPoolFactory.java:54)
at
org.datanucleus.store.rdbms.ConnectionFactoryImpl.generateDataSources(ConnectionFactoryImpl.java:238)
... 70 more
-- 
Deepak

Re: spark-sql throws org.datanucleus.store.rdbms.connectionpool.DatastoreDriverNotFoundException

Posted by Denny Lee <de...@gmail.com>.
If you're not using MySQL as your metastore for Hive, out of curiosity what
are you using?

The error you are seeing is common when there isn't the correct driver to
allow Spark to connect to the Hive metastore because the correct driver
isn't there.

As well, I noticed that you're using SPARK_CLASSPATH which has been
deprecated.  Depending on your scenario, you may want to use --jars,
--driver-class-path, or extraClassPath.  A good thread on this topic can be
found at
http://mail-archives.us.apache.org/mod_mbox/spark-user/201503.mbox/%3C01a901d0547c$a23ba480$e6b2ed80$@innowireless.com%3E
.

For example, when I connect to my own Hive metastore via Spark 1.3, I
reference the --driver-class-path where in my case I am using MySQL as my
Hive metastore:

./bin/spark-sql --master spark://$standalone$:7077 --driver-class-path
mysql-connector-$version$.jar

HTH!


On Thu, Mar 26, 2015 at 8:09 PM ÐΞ€ρ@Ҝ (๏̯͡๏) <de...@gmail.com> wrote:

> I do not use MySQL, i want to read Hive tables from Spark SQL and
> transform them in Spark SQL. Why do i need a MySQL driver ? If i still need
> it which version should i use.
>
> Assuming i need it, i downloaded the latest version of it from
> http://mvnrepository.com/artifact/mysql/mysql-connector-java/5.1.34 and
> ran the following commands, i do not see above exception , however i see a
> new one.
>
>
>
>
>
> export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
> export
> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar:/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar:
> */home/dvasthimal/spark1.3/mysql-connector-java-5.1.34.jar*
> export HADOOP_CONF_DIR=/apache/hadoop/conf
> cd $SPARK_HOME
> ./bin/spark-sql
> Spark assembly has been built with Hive, including Datanucleus jars on
> classpath
> ...
> ...
>
> spark-sql>
>
> spark-sql>
>
> spark-sql>
>
>
> show tables;
>
> 15/03/26 20:03:57 INFO metastore.HiveMetaStore: 0: get_tables: db=default
> pat=.*
>
> 15/03/26 20:03:57 INFO HiveMetaStore.audit: ugi=dvasthimal@CORP.EBAY.COM
> ip=unknown-ip-addr cmd=get_tables: db=default pat=.*
>
> 15/03/26 20:03:58 INFO spark.SparkContext: Starting job: collect at
> SparkPlan.scala:83
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Got job 1 (collect at
> SparkPlan.scala:83) with 1 output partitions (allowLocal=false)
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Final stage: Stage
> 1(collect at SparkPlan.scala:83)
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Parents of final stage:
> List()
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Missing parents: List()
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Submitting Stage 1
> (MapPartitionsRDD[3] at map at SparkPlan.scala:83), which has no missing
> parents
>
> 15/03/26 20:03:58 INFO scheduler.TaskSchedulerImpl: Cancelling stage 1
>
> 15/03/26 20:03:58 INFO scheduler.StatsReportListener: Finished stage:
> org.apache.spark.scheduler.StageInfo@2bfd9c4d
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Job 1 failed: collect at
> SparkPlan.scala:83, took 0.005163 s
>
> 15/03/26 20:03:58 ERROR thriftserver.SparkSQLDriver: Failed in [show
> tables]
>
> org.apache.spark.SparkException: Job aborted due to stage failure: Task
> serialization failed: java.lang.reflect.InvocationTargetException
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>
>
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>
> java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
>
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
>
> org.apache.spark.broadcast.TorrentBroadcast.org
> $apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
>
>
> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)
>
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
>
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
>
>
> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
>
> org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
>
> org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
>
> org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
>
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
>
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
> at
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
> org.apache.spark.SparkException: Job aborted due to stage failure: Task
> serialization failed: java.lang.reflect.InvocationTargetException
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>
>
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>
> java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
>
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
>
> org.apache.spark.broadcast.TorrentBroadcast.org
> $apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
>
>
> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)
>
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
>
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
>
>
> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
>
> org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
>
> org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
>
> org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
>
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
>
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
> at
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
> 15/03/26 20:03:58 ERROR CliDriver: org.apache.spark.SparkException: Job
> aborted due to stage failure: Task serialization failed:
> java.lang.reflect.InvocationTargetException
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>
>
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>
> java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
>
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
>
> org.apache.spark.broadcast.TorrentBroadcast.org
> $apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
>
>
> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)
>
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
>
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
>
>
> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
>
> org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
>
> org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
>
> org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
>
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
>
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
> at
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
> Regards,
>
> Deepak
>
>
>
> On Fri, Mar 27, 2015 at 8:33 AM, Cheng Lian <li...@gmail.com> wrote:
>
>>  As the exception suggests, you don't have MySQL JDBC driver on your
>> classpath.
>>
>>
>>
>> On 3/27/15 10:45 AM, ÐΞ€ρ@Ҝ (๏̯͡๏) wrote:
>>
>>  I am unable to run spark-sql form command line.  I attempted the
>> following
>>
>>  1)
>>
>>  export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
>> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
>> export
>> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar
>> cd $SPARK_HOME
>>
>>  ./bin/spark-sql
>>
>>  ./bin/spark-sql
>> 2)
>>
>>   export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
>> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
>> export
>> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar
>> cd $SPARK_HOME
>>
>>   ./bin/spark-sql --jars
>> /home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar
>>
>>
>>  3)
>>
>>  export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
>> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
>> export
>> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar:/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar
>> export HADOOP_CONF_DIR=/apache/hadoop/conf
>> cd $SPARK_HOME
>> ./bin/spark-sql
>>
>>
>>
>>  *Each time i get the below exception*
>>
>>
>>  Spark assembly has been built with Hive, including Datanucleus jars on
>> classpath
>> 15/03/26 19:43:49 WARN conf.HiveConf: DEPRECATED: Configuration property
>> hive.metastore.local no longer has any effect. Make sure to provide a valid
>> value for hive.metastore.uris if you are connecting to a remote metastore.
>> 15/03/26 19:43:49 WARN conf.HiveConf: DEPRECATED:
>> hive.metastore.ds.retry.* no longer has any effect.  Use
>> hive.hmshandler.retry.* instead
>> 15/03/26 19:43:49 INFO metastore.HiveMetaStore: 0: Opening raw store with
>> implemenation class:org.apache.hadoop.hive.metastore.ObjectStore
>> 15/03/26 19:43:49 INFO metastore.ObjectStore: ObjectStore, initialize
>> called
>> 15/03/26 19:43:50 INFO DataNucleus.Persistence: Property
>> datanucleus.cache.level2 unknown - will be ignored
>> 15/03/26 19:43:50 INFO DataNucleus.Persistence: Property
>> hive.metastore.integral.jdo.pushdown unknown - will be ignored
>> Exception in thread "main" java.lang.RuntimeException:
>> java.lang.RuntimeException: Unable to instantiate
>> org.apache.hadoop.hive.metastore.HiveMetaStoreClient
>>  at
>> org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:346)
>>  at
>> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:101)
>>  at
>> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
>>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>  at
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>>  at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>  at java.lang.reflect.Method.invoke(Method.java:606)
>>  at
>> org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569)
>>  at
>> org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166)
>>  at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189)
>>  at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110)
>>  at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>> Caused by: java.lang.RuntimeException: Unable to instantiate
>> org.apache.hadoop.hive.metastore.HiveMetaStoreClient
>>  at
>> org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1412)
>>  at
>> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:62)
>>  at
>> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:72)
>>  at
>> org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:2453)
>>  at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:2465)
>>  at
>> org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:340)
>>  ... 11 more
>> Caused by: java.lang.reflect.InvocationTargetException
>>  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>>  at
>> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>>  at
>> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>>  at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>>  at
>> org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1410)
>>  ... 16 more
>> Caused by: javax.jdo.JDOFatalInternalException: Error creating
>> transactional connection factory
>> NestedThrowables:
>> java.lang.reflect.InvocationTargetException
>>  at
>> org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:587)
>>  at
>> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:788)
>>  at
>> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
>>  at
>> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
>>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>  at
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>>  at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>  at java.lang.reflect.Method.invoke(Method.java:606)
>>  at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
>>  at java.security.AccessController.doPrivileged(Native Method)
>>  at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
>>  at
>> javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
>>  at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
>>  at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
>>  at
>> org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:310)
>>  at
>> org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:339)
>>  at
>> org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:248)
>>  at
>> org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:223)
>>  at
>> org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:73)
>>  at
>> org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:133)
>>  at
>> org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:58)
>>  at
>> org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:67)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:497)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:475)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:523)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:397)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.<init>(HiveMetaStore.java:356)
>>  at
>> org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:54)
>>  at
>> org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:59)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStore.newHMSHandler(HiveMetaStore.java:4944)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:171)
>>  ... 21 more
>> Caused by: java.lang.reflect.InvocationTargetException
>>  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>>  at
>> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>>  at
>> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>>  at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>>  at
>> org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
>>  at
>> org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:325)
>>  at
>> org.datanucleus.store.AbstractStoreManager.registerConnectionFactory(AbstractStoreManager.java:282)
>>  at
>> org.datanucleus.store.AbstractStoreManager.<init>(AbstractStoreManager.java:240)
>>  at
>> org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:286)
>>  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>>  at
>> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>>  at
>> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>>  at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>>  at
>> org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
>>  at
>> org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
>>  at
>> org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
>>  at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
>>  at
>> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
>>  ... 50 more
>> Caused by: org.datanucleus.exceptions.NucleusException: Attempt to invoke
>> the "BONECP" plugin to create a ConnectionPool gave an error : The
>> specified datastore driver ("com.mysql.jdbc.Driver") was not found in the
>> CLASSPATH. Please check your CLASSPATH specification, and the name of the
>> driver.
>>  at
>> org.datanucleus.store.rdbms.ConnectionFactoryImpl.generateDataSources(ConnectionFactoryImpl.java:259)
>>  at
>> org.datanucleus.store.rdbms.ConnectionFactoryImpl.initialiseDataSources(ConnectionFactoryImpl.java:131)
>>  at
>> org.datanucleus.store.rdbms.ConnectionFactoryImpl.<init>(ConnectionFactoryImpl.java:85)
>>  ... 68 more
>> Caused by:
>> org.datanucleus.store.rdbms.connectionpool.DatastoreDriverNotFoundException:
>> The specified datastore driver ("com.mysql.jdbc.Driver") was not found in
>> the CLASSPATH. Please check your CLASSPATH specification, and the name of
>> the driver.
>>  at
>> org.datanucleus.store.rdbms.connectionpool.AbstractConnectionPoolFactory.loadDriver(AbstractConnectionPoolFactory.java:58)
>>  at
>> org.datanucleus.store.rdbms.connectionpool.BoneCPConnectionPoolFactory.createConnectionPool(BoneCPConnectionPoolFactory.java:54)
>>  at
>> org.datanucleus.store.rdbms.ConnectionFactoryImpl.generateDataSources(ConnectionFactoryImpl.java:238)
>>  ... 70 more
>>  --
>>  Deepak
>>
>>
>>
>
>
> --
> Deepak
>
>

Re: spark-sql throws org.datanucleus.store.rdbms.connectionpool.DatastoreDriverNotFoundException

Posted by ๏̯͡๏ <ÐΞ€ρ@Ҝ>, de...@gmail.com.
Ok.
I modified as per your suggestions

export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
export HADOOP_CONF_DIR=/apache/hadoop/conf

cd $SPARK_HOME
./bin/spark-sql -v  --driver-class-path
 /apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar:/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar:/home/dvasthimal/spark1.3/mysql-connector-java-5.1.34.jar


spark-sql prompt . I ran show tables , desc dw_bid. Each throw below
exception.





spark-sql> desc dw_bid;
15/03/26 23:10:14 WARN conf.HiveConf: DEPRECATED: Configuration property
hive.metastore.local no longer has any effect. Make sure to provide a valid
value for hive.metastore.uris if you are connecting to a remote metastore.
15/03/26 23:10:14 WARN conf.HiveConf: DEPRECATED: hive.metastore.ds.retry.*
no longer has any effect.  Use hive.hmshandler.retry.* instead
15/03/26 23:10:14 INFO parse.ParseDriver: Parsing command: desc dw_bid
15/03/26 23:10:14 INFO parse.ParseDriver: Parse Completed
15/03/26 23:10:15 INFO metastore.HiveMetaStore: 0: get_table : db=default
tbl=dw_bid
15/03/26 23:10:15 INFO HiveMetaStore.audit: ugi=dvasthimal@CORP.EBAY.COM
ip=unknown-ip-addr cmd=get_table : db=default tbl=dw_bid
15/03/26 23:10:15 INFO spark.SparkContext: Starting job: collect at
SparkPlan.scala:83
15/03/26 23:10:15 INFO scheduler.DAGScheduler: Got job 0 (collect at
SparkPlan.scala:83) with 1 output partitions (allowLocal=false)
15/03/26 23:10:15 INFO scheduler.DAGScheduler: Final stage: Stage 0(collect
at SparkPlan.scala:83)
15/03/26 23:10:15 INFO scheduler.DAGScheduler: Parents of final stage:
List()
15/03/26 23:10:15 INFO scheduler.DAGScheduler: Missing parents: List()
15/03/26 23:10:15 INFO scheduler.DAGScheduler: Submitting Stage 0
(MapPartitionsRDD[1] at map at SparkPlan.scala:83), which has no missing
parents
15/03/26 23:10:16 INFO scheduler.TaskSchedulerImpl: Cancelling stage 0
15/03/26 23:10:16 INFO scheduler.DAGScheduler: Job 0 failed: collect at
SparkPlan.scala:83, took 0.078101 s
15/03/26 23:10:16 ERROR thriftserver.SparkSQLDriver: Failed in [desc dw_bid]
org.apache.spark.SparkException: Job aborted due to stage failure: Task
serialization failed: java.lang.reflect.InvocationTargetException
sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
java.lang.reflect.Constructor.newInstance(Constructor.java:526)
org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
org.apache.spark.broadcast.TorrentBroadcast.org
$apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)

at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)
at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
at
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
org.apache.spark.SparkException: Job aborted due to stage failure: Task
serialization failed: java.lang.reflect.InvocationTargetException
sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
java.lang.reflect.Constructor.newInstance(Constructor.java:526)
org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
org.apache.spark.broadcast.TorrentBroadcast.org
$apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)

at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)
at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
at
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)

15/03/26 23:10:16 ERROR CliDriver: org.apache.spark.SparkException: Job
aborted due to stage failure: Task serialization failed:
java.lang.reflect.InvocationTargetException
sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
java.lang.reflect.Constructor.newInstance(Constructor.java:526)
org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
org.apache.spark.broadcast.TorrentBroadcast.org
$apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)

at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)
at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
at
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)

spark-sql> 15/03/26 23:10:16 INFO scheduler.StatsReportListener: Finished
stage: org.apache.spark.scheduler.StageInfo@2c3fa686


Also, how do i know from log messages that spark sql has connected to Hive.

On Fri, Mar 27, 2015 at 9:36 AM, Cheng Lian <li...@gmail.com> wrote:

>  Hey Deepak,
>
> It seems that your hive-site.xml says your Hive metastore setup is using
> MySQL. If that's not the case, you need to adjust your hive-site.xml
> configurations. As for the version of MySQL driver, it should match the
> MySQL server.
>
> Cheng
>
> On 3/27/15 11:07 AM, ÐΞ€ρ@Ҝ (๏̯͡๏) wrote:
>
> I do not use MySQL, i want to read Hive tables from Spark SQL and
> transform them in Spark SQL. Why do i need a MySQL driver ? If i still need
> it which version should i use.
>
>  Assuming i need it, i downloaded the latest version of it from
> http://mvnrepository.com/artifact/mysql/mysql-connector-java/5.1.34 and
> ran the following commands, i do not see above exception , however i see a
> new one.
>
>
>
>
>
>  export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
> export
> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar:/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar:
> */home/dvasthimal/spark1.3/mysql-connector-java-5.1.34.jar*
> export HADOOP_CONF_DIR=/apache/hadoop/conf
> cd $SPARK_HOME
> ./bin/spark-sql
> Spark assembly has been built with Hive, including Datanucleus jars on
> classpath
> ...
> ...
>
> spark-sql>
>
>  spark-sql>
>
>  spark-sql>
>
>
>  show tables;
>
> 15/03/26 20:03:57 INFO metastore.HiveMetaStore: 0: get_tables: db=default
> pat=.*
>
> 15/03/26 20:03:57 INFO HiveMetaStore.audit: ugi=dvasthimal@CORP.EBAY.COM
> ip=unknown-ip-addr cmd=get_tables: db=default pat=.*
>
> 15/03/26 20:03:58 INFO spark.SparkContext: Starting job: collect at
> SparkPlan.scala:83
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Got job 1 (collect at
> SparkPlan.scala:83) with 1 output partitions (allowLocal=false)
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Final stage: Stage
> 1(collect at SparkPlan.scala:83)
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Parents of final stage:
> List()
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Missing parents: List()
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Submitting Stage 1
> (MapPartitionsRDD[3] at map at SparkPlan.scala:83), which has no missing
> parents
>
> 15/03/26 20:03:58 INFO scheduler.TaskSchedulerImpl: Cancelling stage 1
>
> 15/03/26 20:03:58 INFO scheduler.StatsReportListener: Finished stage:
> org.apache.spark.scheduler.StageInfo@2bfd9c4d
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Job 1 failed: collect at
> SparkPlan.scala:83, took 0.005163 s
>
> 15/03/26 20:03:58 ERROR thriftserver.SparkSQLDriver: Failed in [show
> tables]
>
> org.apache.spark.SparkException: Job aborted due to stage failure: Task
> serialization failed: java.lang.reflect.InvocationTargetException
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>
>
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>
> java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
>
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
>
> org.apache.spark.broadcast.TorrentBroadcast.org
> $apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
>
>
> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)
>
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
>
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
>
>
> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
>
> org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
>
> org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
>
> org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
>
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
>  at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
>
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
> at
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
> org.apache.spark.SparkException: Job aborted due to stage failure: Task
> serialization failed: java.lang.reflect.InvocationTargetException
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>
>
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>
> java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
>
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
>
> org.apache.spark.broadcast.TorrentBroadcast.org
> $apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
>
>
> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)
>
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
>
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
>
>
> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
>
> org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
>
> org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
>
> org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
>
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
>  at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
>
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
> at
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
>  15/03/26 20:03:58 ERROR CliDriver: org.apache.spark.SparkException: Job
> aborted due to stage failure: Task serialization failed:
> java.lang.reflect.InvocationTargetException
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>
>
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>
> java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
>
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
>
> org.apache.spark.broadcast.TorrentBroadcast.org
> $apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
>
>
> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)
>
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
>
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
>
>
> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
>
> org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
>
> org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
>
> org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
>
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
>  at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
>
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
>
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)
>
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
> at
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
>  Regards,
>
> Deepak
>
>
>
> On Fri, Mar 27, 2015 at 8:33 AM, Cheng Lian <li...@gmail.com> wrote:
>
>>  As the exception suggests, you don't have MySQL JDBC driver on your
>> classpath.
>>
>>
>>
>> On 3/27/15 10:45 AM, ÐΞ€ρ@Ҝ (๏̯͡๏) wrote:
>>
>>  I am unable to run spark-sql form command line.  I attempted the
>> following
>>
>>  1)
>>
>>  export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
>> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
>> export
>> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar
>> cd $SPARK_HOME
>>
>>  ./bin/spark-sql
>>
>>  ./bin/spark-sql
>> 2)
>>
>>   export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
>> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
>> export
>> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar
>> cd $SPARK_HOME
>>
>>   ./bin/spark-sql --jars
>> /home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar
>>
>>
>>  3)
>>
>>  export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
>> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
>> export
>> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar:/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar
>> export HADOOP_CONF_DIR=/apache/hadoop/conf
>> cd $SPARK_HOME
>> ./bin/spark-sql
>>
>>
>>
>>  *Each time i get the below exception*
>>
>>
>>  Spark assembly has been built with Hive, including Datanucleus jars on
>> classpath
>> 15/03/26 19:43:49 WARN conf.HiveConf: DEPRECATED: Configuration property
>> hive.metastore.local no longer has any effect. Make sure to provide a valid
>> value for hive.metastore.uris if you are connecting to a remote metastore.
>> 15/03/26 19:43:49 WARN conf.HiveConf: DEPRECATED:
>> hive.metastore.ds.retry.* no longer has any effect.  Use
>> hive.hmshandler.retry.* instead
>> 15/03/26 19:43:49 INFO metastore.HiveMetaStore: 0: Opening raw store with
>> implemenation class:org.apache.hadoop.hive.metastore.ObjectStore
>> 15/03/26 19:43:49 INFO metastore.ObjectStore: ObjectStore, initialize
>> called
>> 15/03/26 19:43:50 INFO DataNucleus.Persistence: Property
>> datanucleus.cache.level2 unknown - will be ignored
>> 15/03/26 19:43:50 INFO DataNucleus.Persistence: Property
>> hive.metastore.integral.jdo.pushdown unknown - will be ignored
>> Exception in thread "main" java.lang.RuntimeException:
>> java.lang.RuntimeException: Unable to instantiate
>> org.apache.hadoop.hive.metastore.HiveMetaStoreClient
>>  at
>> org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:346)
>>  at
>> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:101)
>>  at
>> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
>>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>  at
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>>  at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>  at java.lang.reflect.Method.invoke(Method.java:606)
>>  at
>> org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569)
>>  at
>> org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166)
>>  at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189)
>>  at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110)
>>  at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>> Caused by: java.lang.RuntimeException: Unable to instantiate
>> org.apache.hadoop.hive.metastore.HiveMetaStoreClient
>>  at
>> org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1412)
>>  at
>> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:62)
>>  at
>> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:72)
>>  at
>> org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:2453)
>>  at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:2465)
>>  at
>> org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:340)
>>  ... 11 more
>> Caused by: java.lang.reflect.InvocationTargetException
>>  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>>  at
>> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>>  at
>> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>>  at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>>  at
>> org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1410)
>>  ... 16 more
>> Caused by: javax.jdo.JDOFatalInternalException: Error creating
>> transactional connection factory
>> NestedThrowables:
>> java.lang.reflect.InvocationTargetException
>>  at
>> org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:587)
>>  at
>> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:788)
>>  at
>> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
>>  at
>> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
>>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>  at
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>>  at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>  at java.lang.reflect.Method.invoke(Method.java:606)
>>  at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
>>  at java.security.AccessController.doPrivileged(Native Method)
>>  at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
>>  at
>> javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
>>  at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
>>  at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
>>  at
>> org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:310)
>>  at
>> org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:339)
>>  at
>> org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:248)
>>  at
>> org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:223)
>>  at
>> org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:73)
>>  at
>> org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:133)
>>  at
>> org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:58)
>>  at
>> org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:67)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:497)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:475)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:523)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:397)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.<init>(HiveMetaStore.java:356)
>>  at
>> org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:54)
>>  at
>> org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:59)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStore.newHMSHandler(HiveMetaStore.java:4944)
>>  at
>> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:171)
>>  ... 21 more
>> Caused by: java.lang.reflect.InvocationTargetException
>>  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>>  at
>> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>>  at
>> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>>  at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>>  at
>> org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
>>  at
>> org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:325)
>>  at
>> org.datanucleus.store.AbstractStoreManager.registerConnectionFactory(AbstractStoreManager.java:282)
>>  at
>> org.datanucleus.store.AbstractStoreManager.<init>(AbstractStoreManager.java:240)
>>  at
>> org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:286)
>>  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>>  at
>> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>>  at
>> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>>  at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>>  at
>> org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
>>  at
>> org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
>>  at
>> org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
>>  at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
>>  at
>> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
>>  ... 50 more
>> Caused by: org.datanucleus.exceptions.NucleusException: Attempt to invoke
>> the "BONECP" plugin to create a ConnectionPool gave an error : The
>> specified datastore driver ("com.mysql.jdbc.Driver") was not found in the
>> CLASSPATH. Please check your CLASSPATH specification, and the name of the
>> driver.
>>  at
>> org.datanucleus.store.rdbms.ConnectionFactoryImpl.generateDataSources(ConnectionFactoryImpl.java:259)
>>  at
>> org.datanucleus.store.rdbms.ConnectionFactoryImpl.initialiseDataSources(ConnectionFactoryImpl.java:131)
>>  at
>> org.datanucleus.store.rdbms.ConnectionFactoryImpl.<init>(ConnectionFactoryImpl.java:85)
>>  ... 68 more
>> Caused by:
>> org.datanucleus.store.rdbms.connectionpool.DatastoreDriverNotFoundException:
>> The specified datastore driver ("com.mysql.jdbc.Driver") was not found in
>> the CLASSPATH. Please check your CLASSPATH specification, and the name of
>> the driver.
>>  at
>> org.datanucleus.store.rdbms.connectionpool.AbstractConnectionPoolFactory.loadDriver(AbstractConnectionPoolFactory.java:58)
>>  at
>> org.datanucleus.store.rdbms.connectionpool.BoneCPConnectionPoolFactory.createConnectionPool(BoneCPConnectionPoolFactory.java:54)
>>  at
>> org.datanucleus.store.rdbms.ConnectionFactoryImpl.generateDataSources(ConnectionFactoryImpl.java:238)
>>  ... 70 more
>>  --
>>  Deepak
>>
>>
>>
>
>
>  --
>  Deepak
>
>
>


-- 
Deepak

Re: spark-sql throws org.datanucleus.store.rdbms.connectionpool.DatastoreDriverNotFoundException

Posted by Cheng Lian <li...@gmail.com>.
Hey Deepak,

It seems that your hive-site.xml says your Hive metastore setup is using 
MySQL. If that's not the case, you need to adjust your hive-site.xml 
configurations. As for the version of MySQL driver, it should match the 
MySQL server.

Cheng

On 3/27/15 11:07 AM, ÐΞ€ρ@Ҝ (๏̯͡๏) wrote:
> I do not use MySQL, i want to read Hive tables from Spark SQL and 
> transform them in Spark SQL. Why do i need a MySQL driver ? If i still 
> need it which version should i use.
>
> Assuming i need it, i downloaded the latest version of it from 
> http://mvnrepository.com/artifact/mysql/mysql-connector-java/5.1.34 
> and ran the following commands, i do not see above exception , however 
> i see a new one.
>
>
>
>
>
> export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
> export 
> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar:/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar:*/home/dvasthimal/spark1.3/mysql-connector-java-5.1.34.jar*
> export HADOOP_CONF_DIR=/apache/hadoop/conf
> cd $SPARK_HOME
> ./bin/spark-sql
> Spark assembly has been built with Hive, including Datanucleus jars on 
> classpath
> ...
> ...
>
> spark-sql>
>
> spark-sql>
>
> spark-sql>
>
>
> show tables;
>
> 15/03/26 20:03:57 INFO metastore.HiveMetaStore: 0: get_tables: 
> db=default pat=.*
>
> 15/03/26 20:03:57 INFO HiveMetaStore.audit: 
> ugi=dvasthimal@CORP.EBAY.COM 
> <ma...@CORP.EBAY.COM>ip=unknown-ip-addrcmd=get_tables: 
> db=default pat=.*
>
> 15/03/26 20:03:58 INFO spark.SparkContext: Starting job: collect at 
> SparkPlan.scala:83
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Got job 1 (collect at 
> SparkPlan.scala:83) with 1 output partitions (allowLocal=false)
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Final stage: Stage 
> 1(collect at SparkPlan.scala:83)
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Parents of final stage: 
> List()
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Missing parents: List()
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Submitting Stage 1 
> (MapPartitionsRDD[3] at map at SparkPlan.scala:83), which has no 
> missing parents
>
> 15/03/26 20:03:58 INFO scheduler.TaskSchedulerImpl: Cancelling stage 1
>
> 15/03/26 20:03:58 INFO scheduler.StatsReportListener: Finished stage: 
> org.apache.spark.scheduler.StageInfo@2bfd9c4d
>
> 15/03/26 20:03:58 INFO scheduler.DAGScheduler: Job 1 failed: collect 
> at SparkPlan.scala:83, took 0.005163 s
>
> 15/03/26 20:03:58 ERROR thriftserver.SparkSQLDriver: Failed in [show 
> tables]
>
> org.apache.spark.SparkException: Job aborted due to stage failure: 
> Task serialization failed: java.lang.reflect.InvocationTargetException
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>
> java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
>
> org.apache.spark.broadcast.TorrentBroadcast.org 
> <http://org.apache.spark.broadcast.TorrentBroadcast.org>$apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
>
> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
>
> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
>
> org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
>
> org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
>
> org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
> at org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
>
> at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
>
> at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
>
> at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>
> at 
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
>
> at org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)
>
> at org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
> at 
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
> at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
> at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
> org.apache.spark.SparkException: Job aborted due to stage failure: 
> Task serialization failed: java.lang.reflect.InvocationTargetException
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>
> java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
>
> org.apache.spark.broadcast.TorrentBroadcast.org 
> <http://org.apache.spark.broadcast.TorrentBroadcast.org>$apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
>
> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
>
> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
>
> org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
>
> org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
>
> org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
> at org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
>
> at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
>
> at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
>
> at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>
> at 
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
>
> at org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)
>
> at org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
> at 
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
> at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
> at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
> 15/03/26 20:03:58 ERROR CliDriver: org.apache.spark.SparkException: 
> Job aborted due to stage failure: Task serialization failed: 
> java.lang.reflect.InvocationTargetException
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>
> java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)
>
> org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)
>
> org.apache.spark.broadcast.TorrentBroadcast.org 
> <http://org.apache.spark.broadcast.TorrentBroadcast.org>$apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)
>
> org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
>
> org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
>
> org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
>
> org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
>
> org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
>
> org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
> at org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
>
> at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
>
> at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
>
> at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>
> at 
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
>
> at org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)
>
> at org.apache.spark.scheduler.DAGScheduler.org 
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
>
> at 
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
>
> at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
>
> at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
>
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
>
> Regards,
>
> Deepak
>
>
>
> On Fri, Mar 27, 2015 at 8:33 AM, Cheng Lian <lian.cs.zju@gmail.com 
> <ma...@gmail.com>> wrote:
>
>     As the exception suggests, you don't have MySQL JDBC driver on
>     your classpath.
>
>
>
>     On 3/27/15 10:45 AM, ÐΞ€ρ@Ҝ (๏̯͡๏) wrote:
>>     I am unable to run spark-sql form command line.  I attempted the
>>     following
>>
>>     1)
>>
>>     export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
>>     export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
>>     export
>>     SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar
>>     cd $SPARK_HOME
>>
>>     ./bin/spark-sql
>>
>>     ./bin/spark-sql
>>     2)
>>
>>     export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
>>     export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
>>     export
>>     SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar
>>     cd $SPARK_HOME
>>
>>     ./bin/spark-sql --jars
>>     /home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar
>>
>>
>>     3)
>>
>>     export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
>>     export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
>>     export
>>     SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar:/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar
>>     export HADOOP_CONF_DIR=/apache/hadoop/conf
>>     cd $SPARK_HOME
>>     ./bin/spark-sql
>>
>>
>>
>>     _Each time i get the below exception_
>>
>>
>>     Spark assembly has been built with Hive, including Datanucleus
>>     jars on classpath
>>     15/03/26 19:43:49 WARN conf.HiveConf: DEPRECATED: Configuration
>>     property hive.metastore.local no longer has any effect. Make sure
>>     to provide a valid value for hive.metastore.uris if you are
>>     connecting to a remote metastore.
>>     15/03/26 19:43:49 WARN conf.HiveConf: DEPRECATED:
>>     hive.metastore.ds.retry.* no longer has any effect.  Use
>>     hive.hmshandler.retry.* instead
>>     15/03/26 19:43:49 INFO metastore.HiveMetaStore: 0: Opening raw
>>     store with implemenation
>>     class:org.apache.hadoop.hive.metastore.ObjectStore
>>     15/03/26 19:43:49 INFO metastore.ObjectStore: ObjectStore,
>>     initialize called
>>     15/03/26 19:43:50 INFO DataNucleus.Persistence: Property
>>     datanucleus.cache.level2 unknown - will be ignored
>>     15/03/26 19:43:50 INFO DataNucleus.Persistence: Property
>>     hive.metastore.integral.jdo.pushdown unknown - will be ignored
>>     Exception in thread "main" java.lang.RuntimeException:
>>     java.lang.RuntimeException: Unable to instantiate
>>     org.apache.hadoop.hive.metastore.HiveMetaStoreClient
>>     at
>>     org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:346)
>>     at
>>     org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:101)
>>     at
>>     org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>     at
>>     sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>>     at
>>     sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>     at java.lang.reflect.Method.invoke(Method.java:606)
>>     at
>>     org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569)
>>     at
>>     org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166)
>>     at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189)
>>     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110)
>>     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>>     Caused by: java.lang.RuntimeException: Unable to instantiate
>>     org.apache.hadoop.hive.metastore.HiveMetaStoreClient
>>     at
>>     org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1412)
>>     at
>>     org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:62)
>>     at
>>     org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:72)
>>     at
>>     org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:2453)
>>     at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:2465)
>>     at
>>     org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:340)
>>     ... 11 more
>>     Caused by: java.lang.reflect.InvocationTargetException
>>     at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native
>>     Method)
>>     at
>>     sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>>     at
>>     sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>>     at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>>     at
>>     org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1410)
>>     ... 16 more
>>     Caused by: javax.jdo.JDOFatalInternalException: Error creating
>>     transactional connection factory
>>     NestedThrowables:
>>     java.lang.reflect.InvocationTargetException
>>     at
>>     org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:587)
>>     at
>>     org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:788)
>>     at
>>     org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
>>     at
>>     org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>     at
>>     sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>>     at
>>     sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>     at java.lang.reflect.Method.invoke(Method.java:606)
>>     at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
>>     at java.security.AccessController.doPrivileged(Native Method)
>>     at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
>>     at
>>     javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
>>     at
>>     javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
>>     at
>>     javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
>>     at
>>     org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:310)
>>     at
>>     org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:339)
>>     at
>>     org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:248)
>>     at
>>     org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:223)
>>     at
>>     org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:73)
>>     at
>>     org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:133)
>>     at
>>     org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:58)
>>     at
>>     org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:67)
>>     at
>>     org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:497)
>>     at
>>     org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:475)
>>     at
>>     org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:523)
>>     at
>>     org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:397)
>>     at
>>     org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.<init>(HiveMetaStore.java:356)
>>     at
>>     org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:54)
>>     at
>>     org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:59)
>>     at
>>     org.apache.hadoop.hive.metastore.HiveMetaStore.newHMSHandler(HiveMetaStore.java:4944)
>>     at
>>     org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:171)
>>     ... 21 more
>>     Caused by: java.lang.reflect.InvocationTargetException
>>     at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native
>>     Method)
>>     at
>>     sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>>     at
>>     sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>>     at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>>     at
>>     org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
>>     at
>>     org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:325)
>>     at
>>     org.datanucleus.store.AbstractStoreManager.registerConnectionFactory(AbstractStoreManager.java:282)
>>     at
>>     org.datanucleus.store.AbstractStoreManager.<init>(AbstractStoreManager.java:240)
>>     at
>>     org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:286)
>>     at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native
>>     Method)
>>     at
>>     sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>>     at
>>     sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>>     at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>>     at
>>     org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
>>     at
>>     org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
>>     at
>>     org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
>>     at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
>>     at
>>     org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
>>     ... 50 more
>>     Caused by: org.datanucleus.exceptions.NucleusException: Attempt
>>     to invoke the "BONECP" plugin to create a ConnectionPool gave an
>>     error : The specified datastore driver ("com.mysql.jdbc.Driver")
>>     was not found in the CLASSPATH. Please check your CLASSPATH
>>     specification, and the name of the driver.
>>     at
>>     org.datanucleus.store.rdbms.ConnectionFactoryImpl.generateDataSources(ConnectionFactoryImpl.java:259)
>>     at
>>     org.datanucleus.store.rdbms.ConnectionFactoryImpl.initialiseDataSources(ConnectionFactoryImpl.java:131)
>>     at
>>     org.datanucleus.store.rdbms.ConnectionFactoryImpl.<init>(ConnectionFactoryImpl.java:85)
>>     ... 68 more
>>     Caused by:
>>     org.datanucleus.store.rdbms.connectionpool.DatastoreDriverNotFoundException:
>>     The specified datastore driver ("com.mysql.jdbc.Driver") was not
>>     found in the CLASSPATH. Please check your CLASSPATH
>>     specification, and the name of the driver.
>>     at
>>     org.datanucleus.store.rdbms.connectionpool.AbstractConnectionPoolFactory.loadDriver(AbstractConnectionPoolFactory.java:58)
>>     at
>>     org.datanucleus.store.rdbms.connectionpool.BoneCPConnectionPoolFactory.createConnectionPool(BoneCPConnectionPoolFactory.java:54)
>>     at
>>     org.datanucleus.store.rdbms.ConnectionFactoryImpl.generateDataSources(ConnectionFactoryImpl.java:238)
>>     ... 70 more
>>     -- 
>>     Deepak
>>
>
>
>
>
> -- 
> Deepak
>


Re: spark-sql throws org.datanucleus.store.rdbms.connectionpool.DatastoreDriverNotFoundException

Posted by ๏̯͡๏ <ÐΞ€ρ@Ҝ>, de...@gmail.com.
I do not use MySQL, i want to read Hive tables from Spark SQL and transform
them in Spark SQL. Why do i need a MySQL driver ? If i still need it which
version should i use.

Assuming i need it, i downloaded the latest version of it from
http://mvnrepository.com/artifact/mysql/mysql-connector-java/5.1.34 and ran
the following commands, i do not see above exception , however i see a new
one.





export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
export
SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar:/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar:
*/home/dvasthimal/spark1.3/mysql-connector-java-5.1.34.jar*
export HADOOP_CONF_DIR=/apache/hadoop/conf
cd $SPARK_HOME
./bin/spark-sql
Spark assembly has been built with Hive, including Datanucleus jars on
classpath
...
...

spark-sql>

spark-sql>

spark-sql>


show tables;

15/03/26 20:03:57 INFO metastore.HiveMetaStore: 0: get_tables: db=default
pat=.*

15/03/26 20:03:57 INFO HiveMetaStore.audit: ugi=dvasthimal@CORP.EBAY.COM
ip=unknown-ip-addr cmd=get_tables: db=default pat=.*

15/03/26 20:03:58 INFO spark.SparkContext: Starting job: collect at
SparkPlan.scala:83

15/03/26 20:03:58 INFO scheduler.DAGScheduler: Got job 1 (collect at
SparkPlan.scala:83) with 1 output partitions (allowLocal=false)

15/03/26 20:03:58 INFO scheduler.DAGScheduler: Final stage: Stage 1(collect
at SparkPlan.scala:83)

15/03/26 20:03:58 INFO scheduler.DAGScheduler: Parents of final stage:
List()

15/03/26 20:03:58 INFO scheduler.DAGScheduler: Missing parents: List()

15/03/26 20:03:58 INFO scheduler.DAGScheduler: Submitting Stage 1
(MapPartitionsRDD[3] at map at SparkPlan.scala:83), which has no missing
parents

15/03/26 20:03:58 INFO scheduler.TaskSchedulerImpl: Cancelling stage 1

15/03/26 20:03:58 INFO scheduler.StatsReportListener: Finished stage:
org.apache.spark.scheduler.StageInfo@2bfd9c4d

15/03/26 20:03:58 INFO scheduler.DAGScheduler: Job 1 failed: collect at
SparkPlan.scala:83, took 0.005163 s

15/03/26 20:03:58 ERROR thriftserver.SparkSQLDriver: Failed in [show tables]

org.apache.spark.SparkException: Job aborted due to stage failure: Task
serialization failed: java.lang.reflect.InvocationTargetException

sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)

sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)

sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)

java.lang.reflect.Constructor.newInstance(Constructor.java:526)

org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)

org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)

org.apache.spark.broadcast.TorrentBroadcast.org
$apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)

org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)

org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)

org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)

org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)

org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)

org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)

org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)

org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)

org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)

org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)

org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)


at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)

at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)

at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)

at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)

at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)

at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)

at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)

at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)

at
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)

at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)

at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)

at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)

org.apache.spark.SparkException: Job aborted due to stage failure: Task
serialization failed: java.lang.reflect.InvocationTargetException

sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)

sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)

sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)

java.lang.reflect.Constructor.newInstance(Constructor.java:526)

org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)

org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)

org.apache.spark.broadcast.TorrentBroadcast.org
$apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)

org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)

org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)

org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)

org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)

org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)

org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)

org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)

org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)

org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)

org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)

org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)


at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)

at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)

at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)

at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)

at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)

at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)

at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)

at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)

at
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)

at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)

at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)

at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)


15/03/26 20:03:58 ERROR CliDriver: org.apache.spark.SparkException: Job
aborted due to stage failure: Task serialization failed:
java.lang.reflect.InvocationTargetException

sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)

sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)

sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)

java.lang.reflect.Constructor.newInstance(Constructor.java:526)

org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:68)

org.apache.spark.io.CompressionCodec$.createCodec(CompressionCodec.scala:60)

org.apache.spark.broadcast.TorrentBroadcast.org
$apache$spark$broadcast$TorrentBroadcast$$setConf(TorrentBroadcast.scala:73)

org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:79)

org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)

org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)

org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)

org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)

org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)

org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)

org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)

org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)

org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)

org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)


at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)

at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)

at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)

at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)

at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)

at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)

at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:847)

at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)

at
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)

at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)

at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)

at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)


Regards,

Deepak



On Fri, Mar 27, 2015 at 8:33 AM, Cheng Lian <li...@gmail.com> wrote:

>  As the exception suggests, you don't have MySQL JDBC driver on your
> classpath.
>
>
>
> On 3/27/15 10:45 AM, ÐΞ€ρ@Ҝ (๏̯͡๏) wrote:
>
>  I am unable to run spark-sql form command line.  I attempted the
> following
>
>  1)
>
>  export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
> export
> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar
> cd $SPARK_HOME
>
>  ./bin/spark-sql
>
>  ./bin/spark-sql
> 2)
>
>   export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
> export
> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar
> cd $SPARK_HOME
>
>   ./bin/spark-sql --jars
> /home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar
>
>
>  3)
>
>  export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
> export
> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar:/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar
> export HADOOP_CONF_DIR=/apache/hadoop/conf
> cd $SPARK_HOME
> ./bin/spark-sql
>
>
>
>  *Each time i get the below exception*
>
>
>  Spark assembly has been built with Hive, including Datanucleus jars on
> classpath
> 15/03/26 19:43:49 WARN conf.HiveConf: DEPRECATED: Configuration property
> hive.metastore.local no longer has any effect. Make sure to provide a valid
> value for hive.metastore.uris if you are connecting to a remote metastore.
> 15/03/26 19:43:49 WARN conf.HiveConf: DEPRECATED:
> hive.metastore.ds.retry.* no longer has any effect.  Use
> hive.hmshandler.retry.* instead
> 15/03/26 19:43:49 INFO metastore.HiveMetaStore: 0: Opening raw store with
> implemenation class:org.apache.hadoop.hive.metastore.ObjectStore
> 15/03/26 19:43:49 INFO metastore.ObjectStore: ObjectStore, initialize
> called
> 15/03/26 19:43:50 INFO DataNucleus.Persistence: Property
> datanucleus.cache.level2 unknown - will be ignored
> 15/03/26 19:43:50 INFO DataNucleus.Persistence: Property
> hive.metastore.integral.jdo.pushdown unknown - will be ignored
> Exception in thread "main" java.lang.RuntimeException:
> java.lang.RuntimeException: Unable to instantiate
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient
>  at
> org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:346)
>  at
> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:101)
>  at
> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>  at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>  at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  at java.lang.reflect.Method.invoke(Method.java:606)
>  at
> org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569)
>  at
> org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166)
>  at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189)
>  at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110)
>  at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.lang.RuntimeException: Unable to instantiate
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient
>  at
> org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1412)
>  at
> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:62)
>  at
> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:72)
>  at
> org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:2453)
>  at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:2465)
>  at
> org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:340)
>  ... 11 more
> Caused by: java.lang.reflect.InvocationTargetException
>  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>  at
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>  at
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>  at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>  at
> org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1410)
>  ... 16 more
> Caused by: javax.jdo.JDOFatalInternalException: Error creating
> transactional connection factory
> NestedThrowables:
> java.lang.reflect.InvocationTargetException
>  at
> org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:587)
>  at
> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:788)
>  at
> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
>  at
> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>  at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>  at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  at java.lang.reflect.Method.invoke(Method.java:606)
>  at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
>  at java.security.AccessController.doPrivileged(Native Method)
>  at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
>  at
> javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
>  at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
>  at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
>  at
> org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:310)
>  at
> org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:339)
>  at
> org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:248)
>  at
> org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:223)
>  at
> org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:73)
>  at
> org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:133)
>  at
> org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:58)
>  at
> org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:67)
>  at
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:497)
>  at
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:475)
>  at
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:523)
>  at
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:397)
>  at
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.<init>(HiveMetaStore.java:356)
>  at
> org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:54)
>  at
> org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:59)
>  at
> org.apache.hadoop.hive.metastore.HiveMetaStore.newHMSHandler(HiveMetaStore.java:4944)
>  at
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:171)
>  ... 21 more
> Caused by: java.lang.reflect.InvocationTargetException
>  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>  at
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>  at
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>  at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>  at
> org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
>  at
> org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:325)
>  at
> org.datanucleus.store.AbstractStoreManager.registerConnectionFactory(AbstractStoreManager.java:282)
>  at
> org.datanucleus.store.AbstractStoreManager.<init>(AbstractStoreManager.java:240)
>  at
> org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:286)
>  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>  at
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
>  at
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>  at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
>  at
> org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
>  at
> org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
>  at
> org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
>  at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
>  at
> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
>  ... 50 more
> Caused by: org.datanucleus.exceptions.NucleusException: Attempt to invoke
> the "BONECP" plugin to create a ConnectionPool gave an error : The
> specified datastore driver ("com.mysql.jdbc.Driver") was not found in the
> CLASSPATH. Please check your CLASSPATH specification, and the name of the
> driver.
>  at
> org.datanucleus.store.rdbms.ConnectionFactoryImpl.generateDataSources(ConnectionFactoryImpl.java:259)
>  at
> org.datanucleus.store.rdbms.ConnectionFactoryImpl.initialiseDataSources(ConnectionFactoryImpl.java:131)
>  at
> org.datanucleus.store.rdbms.ConnectionFactoryImpl.<init>(ConnectionFactoryImpl.java:85)
>  ... 68 more
> Caused by:
> org.datanucleus.store.rdbms.connectionpool.DatastoreDriverNotFoundException:
> The specified datastore driver ("com.mysql.jdbc.Driver") was not found in
> the CLASSPATH. Please check your CLASSPATH specification, and the name of
> the driver.
>  at
> org.datanucleus.store.rdbms.connectionpool.AbstractConnectionPoolFactory.loadDriver(AbstractConnectionPoolFactory.java:58)
>  at
> org.datanucleus.store.rdbms.connectionpool.BoneCPConnectionPoolFactory.createConnectionPool(BoneCPConnectionPoolFactory.java:54)
>  at
> org.datanucleus.store.rdbms.ConnectionFactoryImpl.generateDataSources(ConnectionFactoryImpl.java:238)
>  ... 70 more
>  --
>  Deepak
>
>
>


-- 
Deepak

Re: spark-sql throws org.datanucleus.store.rdbms.connectionpool.DatastoreDriverNotFoundException

Posted by Cheng Lian <li...@gmail.com>.
As the exception suggests, you don't have MySQL JDBC driver on your 
classpath.


On 3/27/15 10:45 AM, ÐΞ€ρ@Ҝ (๏̯͡๏) wrote:
> I am unable to run spark-sql form command line.  I attempted the following
>
> 1)
>
> export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
> export 
> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar
> cd $SPARK_HOME
>
> ./bin/spark-sql
>
> ./bin/spark-sql
> 2)
>
> export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
> export 
> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar
> cd $SPARK_HOME
>
> ./bin/spark-sql --jars 
> /home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar
>
>
> 3)
>
> export SPARK_HOME=/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4
> export SPARK_JAR=$SPARK_HOME/lib/spark-assembly-1.3.0-hadoop2.4.0.jar
> export 
> SPARK_CLASSPATH=/apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop/lib/hadoop-lzo-0.6.0.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar:/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar:/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar
> export HADOOP_CONF_DIR=/apache/hadoop/conf
> cd $SPARK_HOME
> ./bin/spark-sql
>
>
>
> _Each time i get the below exception_
>
>
> Spark assembly has been built with Hive, including Datanucleus jars on 
> classpath
> 15/03/26 19:43:49 WARN conf.HiveConf: DEPRECATED: Configuration 
> property hive.metastore.local no longer has any effect. Make sure to 
> provide a valid value for hive.metastore.uris if you are connecting to 
> a remote metastore.
> 15/03/26 19:43:49 WARN conf.HiveConf: DEPRECATED: 
> hive.metastore.ds.retry.* no longer has any effect.  Use 
> hive.hmshandler.retry.* instead
> 15/03/26 19:43:49 INFO metastore.HiveMetaStore: 0: Opening raw store 
> with implemenation class:org.apache.hadoop.hive.metastore.ObjectStore
> 15/03/26 19:43:49 INFO metastore.ObjectStore: ObjectStore, initialize 
> called
> 15/03/26 19:43:50 INFO DataNucleus.Persistence: Property 
> datanucleus.cache.level2 unknown - will be ignored
> 15/03/26 19:43:50 INFO DataNucleus.Persistence: Property 
> hive.metastore.integral.jdo.pushdown unknown - will be ignored
> Exception in thread "main" java.lang.RuntimeException: 
> java.lang.RuntimeException: Unable to instantiate 
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient
> at 
> org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:346)
> at 
> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:101)
> at 
> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:606)
> at 
> org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569)
> at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166)
> at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189)
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110)
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.lang.RuntimeException: Unable to instantiate 
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient
> at 
> org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1412)
> at 
> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:62)
> at 
> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:72)
> at 
> org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:2453)
> at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:2465)
> at 
> org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:340)
> ... 11 more
> Caused by: java.lang.reflect.InvocationTargetException
> at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
> at 
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
> at 
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
> at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
> at 
> org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1410)
> ... 16 more
> Caused by: javax.jdo.JDOFatalInternalException: Error creating 
> transactional connection factory
> NestedThrowables:
> java.lang.reflect.InvocationTargetException
> at 
> org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:587)
> at 
> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:788)
> at 
> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)
> at 
> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:606)
> at javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)
> at 
> javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)
> at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)
> at javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)
> at 
> org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:310)
> at 
> org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:339)
> at 
> org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:248)
> at 
> org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:223)
> at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:73)
> at 
> org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:133)
> at 
> org.apache.hadoop.hive.metastore.RawStoreProxy.<init>(RawStoreProxy.java:58)
> at 
> org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:67)
> at 
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:497)
> at 
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:475)
> at 
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:523)
> at 
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:397)
> at 
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.<init>(HiveMetaStore.java:356)
> at 
> org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:54)
> at 
> org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:59)
> at 
> org.apache.hadoop.hive.metastore.HiveMetaStore.newHMSHandler(HiveMetaStore.java:4944)
> at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:171)
> ... 21 more
> Caused by: java.lang.reflect.InvocationTargetException
> at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
> at 
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
> at 
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
> at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
> at 
> org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
> at 
> org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:325)
> at 
> org.datanucleus.store.AbstractStoreManager.registerConnectionFactory(AbstractStoreManager.java:282)
> at 
> org.datanucleus.store.AbstractStoreManager.<init>(AbstractStoreManager.java:240)
> at 
> org.datanucleus.store.rdbms.RDBMSStoreManager.<init>(RDBMSStoreManager.java:286)
> at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
> at 
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
> at 
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
> at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
> at 
> org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)
> at 
> org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)
> at 
> org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)
> at org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)
> at 
> org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)
> ... 50 more
> Caused by: org.datanucleus.exceptions.NucleusException: Attempt to 
> invoke the "BONECP" plugin to create a ConnectionPool gave an error : 
> The specified datastore driver ("com.mysql.jdbc.Driver") was not found 
> in the CLASSPATH. Please check your CLASSPATH specification, and the 
> name of the driver.
> at 
> org.datanucleus.store.rdbms.ConnectionFactoryImpl.generateDataSources(ConnectionFactoryImpl.java:259)
> at 
> org.datanucleus.store.rdbms.ConnectionFactoryImpl.initialiseDataSources(ConnectionFactoryImpl.java:131)
> at 
> org.datanucleus.store.rdbms.ConnectionFactoryImpl.<init>(ConnectionFactoryImpl.java:85)
> ... 68 more
> Caused by: 
> org.datanucleus.store.rdbms.connectionpool.DatastoreDriverNotFoundException: 
> The specified datastore driver ("com.mysql.jdbc.Driver") was not found 
> in the CLASSPATH. Please check your CLASSPATH specification, and the 
> name of the driver.
> at 
> org.datanucleus.store.rdbms.connectionpool.AbstractConnectionPoolFactory.loadDriver(AbstractConnectionPoolFactory.java:58)
> at 
> org.datanucleus.store.rdbms.connectionpool.BoneCPConnectionPoolFactory.createConnectionPool(BoneCPConnectionPoolFactory.java:54)
> at 
> org.datanucleus.store.rdbms.ConnectionFactoryImpl.generateDataSources(ConnectionFactoryImpl.java:238)
> ... 70 more
> -- 
> Deepak
>