You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hive.apache.org by Mich Talebzadeh <mi...@gmail.com> on 2018/08/26 22:05:59 UTC
Trying to create an extrenal table in Hive for MongoDBthrows error
Hi,
Trying to create an external table in Hive to be accessed by MongoDB.
This is the code in Hive
ADD JAR /home/hduser/jars/mongo-hadoop-core-2.0.2.jar;
ADD JAR /home/hduser/jars/mongo-hadoop-hive-2.0.2.jar;
ADD JAR /home/hduser/jars/mongo-java-driver-3.8.1.jar;
use accounts;
DROP TABLE IF EXISTS ll_18740868_mongo;
CREATE EXTERNAL TABLE ll_18740868_mongo (
TransactionDate DATE
,TransactionType String
,SortCode String
,AccountNumber String
,TransactionDescription String
,DebitAmount Double
,CreditAmount Double
,Balance Double
)
COMMENT 'Mongo external table'
STORED BY 'com.mongodb.hadoop.hive.MongoStorageHandler'
WITH SERDEPROPERTIES('mongo.columns.mapping'=
'{
"TransactionDate":"TransactionDate",
"TransactionType":"TransactionType",
"SortCode":"SortCode",
"AccountNumber":"AccountNumber",
"TransactionDescription":"TransactionDescription",
"DebitAmount":"DebitAmount",
"CreditAmount":"CreditAmount",
"Balance":"Balance"
}'
)
TBLPROPERTIES ('mongo.uri'='mongodb://account_user_RO:mongodb@rhes75
:60100/accounts.ll_18740868_mongo')
;
In debug mode it throws this error
CREATE EXTERNAL TABLE ll_18740868_mongo (
TransactionDate DATE
,TransactionType String
,SortCode String
,AccountNumber String
,TransactionDescription String
,DebitAmount Double
,CreditAmount Double
,Balance Double
)
COMMENT 'Mongo external table'
STORED BY 'com.mongodb.hadoop.hive.MongoStorageHandler'
WITH SERDEPROPERTIES('mongo.columns.mapping'=
'{
"TransactionDate":"TransactionDate",
"TransactionType":"TransactionType",
"SortCode":"SortCode",
"AccountNumber":"AccountNumber",
"TransactionDescription":"TransactionDescription",
"DebitAmount":"DebitAmount",
"CreditAmount":"CreditAmount",
"Balance":"Balance"
}'
)
TBLPROPERTIES ('mongo.uri'='mongodb://account_user_RO:mongodb@rhes75
:60100/accounts.ll_18740868_mongo')
2018-08-26 23:01:32,424 INFO [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
ql.Driver: Starting task [Stage-0:DDL] in serial mode
2018-08-26 23:01:32,426 ERROR [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
exec.DDLTask: Failed
java.lang.NoClassDefFoundError: org/apache/hadoop/hive/serde2/SerDe
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
at java.security.SecureClassLoader.defineClass
(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at
com.mongodb.hadoop.hive.MongoStorageHandler.getSerDeClass(MongoStorageHandler.java:88)
at
org.apache.hadoop.hive.ql.plan.CreateTableDesc.toTable(CreateTableDesc.java:744)
at
org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4882)
at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:428)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:205)
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:97)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2479)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:2150)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1826)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1567)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1561)
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:157)
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:218)
at
org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:239)
at
org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:188)
at
org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:402)
at
org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:335)
at
org.apache.hadoop.hive.cli.CliDriver.processReader(CliDriver.java:471)
at
org.apache.hadoop.hive.cli.CliDriver.processFile(CliDriver.java:487)
at
org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:793)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:683)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Caused by: java.lang.ClassNotFoundException:
org.apache.hadoop.hive.serde2.SerDe
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 39 more
2018-08-26 23:01:32,426 INFO [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
reexec.ReOptimizePlugin: ReOptimization: retryPossible: false
FAILED: Execution Error, return code 1 from
org.apache.hadoop.hive.ql.exec.DDLTask. org/apache/hadoop/hive/serde2/SerDe
2018-08-26 23:01:32,426 ERROR [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
ql.Driver: FAILED: Execution Error, return code 1 from
org.apache.hadoop.hive.ql.exec.DDLTask. org/apache/hadoop/hive/serde2/SerDe
2018-08-26 23:01:32,426 INFO [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
ql.Driver: Completed executing
command(queryId=hduser_20180826230132_53320afb-5265-407f-82a3-a1b22e584e20);
Time taken: 0.005 seconds
2018-08-26 23:01:32,426 INFO [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
lockmgr.DbTxnManager: Stopped heartbeat for query:
hduser_20180826230132_53320afb-5265-407f-82a3-a1b22e584e20
2018-08-26 23:01:32,437 INFO [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
conf.HiveConf: Using the default value passed in for log id:
a928186b-244d-4b81-bda9-dd3a83e306b2
2018-08-26 23:01:32,437 INFO [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
session.SessionState: Resetting thread name to main
2018-08-26 23:01:32,437 INFO [main] conf.HiveConf: Using the default value
passed in for log id: a928186b-244d-4b81-bda9-dd3a83e306b2
2018-08-26 23:01:32,447 INFO [main] session.SessionState: Deleted
directory: /tmp/hive/hduser/a928186b-244d-4b81-bda9-dd3a83e306b2 on fs with
scheme hdfs
2018-08-26 23:01:32,447 INFO [main] session.SessionState: Deleted
directory: /tmp/hive/a928186b-244d-4b81-bda9-dd3a83e306b2 on fs with scheme
file
2018-08-26 23:01:32,447 INFO [main] metastore.HiveMetaStoreClient: Closed
a connection to metastore, current connections: 0
May be I am using incompatible jars here?
Dr Mich Talebzadeh
LinkedIn * https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
<https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*
http://talebzadehmich.wordpress.com
*Disclaimer:* Use it at your own risk. Any and all responsibility for any
loss, damage or destruction of data or any other property which may arise
from relying on this email's technical content is explicitly disclaimed.
The author will in no case be liable for any monetary damages arising from
such loss, damage or destruction.
Re: Trying to create an extrenal table in Hive for MongoDBthrows error
Posted by Mich Talebzadeh <mi...@gmail.com>.
I sorted it out this by spending a fair bit of time with jar files needed.
These are my versions
Hive 3
Hadoop 3.1
MongoDB 4.0.1
You need the following jar file added to Hive
ADD JAR hdfs://rhes75:9000/jars/mongo-hadoop-core-2.0.2.jar;
ADD JAR hdfs://rhes75:9000/jars/mongo-hadoop-hive-2.0.2.jar;
ADD JAR hdfs://rhes75:9000/jars/mongo-java-driver-3.8.1.jar;
--ADD JAR hdfs://rhes75:9000/jars/hive-serde-3.1.0.jar;
*ADD JAR hdfs://rhes75:9000/jars/hive-serde-0.11.0.jar;*
The jar file hive-serde-0.11.0.jar is the one you need to use!
This code works for mapping a Hive external table (here
accounts.ll_18740868_mongo) to an existing Mongo collection
ADD JAR hdfs://rhes75:9000/jars/mongo-hadoop-core-2.0.2.jar;
ADD JAR hdfs://rhes75:9000/jars/mongo-hadoop-hive-2.0.2.jar;
ADD JAR hdfs://rhes75:9000/jars/mongo-java-driver-3.8.1.jar;
--ADD JAR hdfs://rhes75:9000/jars/hive-serde-3.1.0.jar;
ADD JAR hdfs://rhes75:9000/jars/hive-serde-0.11.0.jar;
DROP TABLE IF EXISTS accounts.ll_18740868_mongo;
CREATE EXTERNAL TABLE accounts.ll_18740868_mongo (
id Int
, TRANSACTIONDATE Date
, TRANSACTIONTYPE String
, SORTCODE String
, ACCOUNTNUMBER String
, TRANSACTIONDESCRIPTION String
, DEBITAMOUNT Double
, CREDITAMOUNT Double
, BALANCE Double
)
COMMENT 'Mongo external table'
STORED BY 'com.mongodb.hadoop.hive.MongoStorageHandler'
WITH SERDEPROPERTIES('mongo.columns.mapping'=
'{
"id":"_id"
, "TRANSACTIONDATE":"TRANSACTIONDATE"
, "TRANSACTIONTYPE":"TRANSACTIONTYPE"
, "SORTCODE":"SORTCODE"
, "ACCOUNTNUMBER":"ACCOUNTNUMBER"
, "TRANSACTIONDESCRIPTION":"TRANSACTIONDESCRIPTION"
, "DEBITAMOUNT":"DEBITAMOUNT"
, "CREDITAMOUNT":"CREDITAMOUNT"
, "BALANCE":"BALANCE"
}'
)
TBLPROPERTIES
('mongo.uri'='mongodb://account_user_RO:<password>@rhes75:60100/accounts.ll_18740868')
;
desc accounts.ll_18740868_mongo;
You will need to load the jar files whenever you need to access this table
In MongoDB I have
> use accounts;
switched to db accounts
> db.ll_18740868.count()
3623
And confirmed in Hive
0: jdbc:hive2://rhes75:10099/default> select count(1) from
accounts.ll_18740868_mongo;
+-------+
| _c0 |
+-------+
| 3623 |
+-------+
1 row selected (1.426 seconds)
HTH
Dr Mich Talebzadeh
LinkedIn * https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
<https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*
http://talebzadehmich.wordpress.com
*Disclaimer:* Use it at your own risk. Any and all responsibility for any
loss, damage or destruction of data or any other property which may arise
from relying on this email's technical content is explicitly disclaimed.
The author will in no case be liable for any monetary damages arising from
such loss, damage or destruction.
On Sun, 26 Aug 2018 at 23:05, Mich Talebzadeh <mi...@gmail.com>
wrote:
> Hi,
>
> Trying to create an external table in Hive to be accessed by MongoDB.
>
> This is the code in Hive
>
> ADD JAR /home/hduser/jars/mongo-hadoop-core-2.0.2.jar;
> ADD JAR /home/hduser/jars/mongo-hadoop-hive-2.0.2.jar;
> ADD JAR /home/hduser/jars/mongo-java-driver-3.8.1.jar;
> use accounts;
> DROP TABLE IF EXISTS ll_18740868_mongo;
> CREATE EXTERNAL TABLE ll_18740868_mongo (
> TransactionDate DATE
> ,TransactionType String
> ,SortCode String
> ,AccountNumber String
> ,TransactionDescription String
> ,DebitAmount Double
> ,CreditAmount Double
> ,Balance Double
> )
> COMMENT 'Mongo external table'
> STORED BY 'com.mongodb.hadoop.hive.MongoStorageHandler'
> WITH SERDEPROPERTIES('mongo.columns.mapping'=
> '{
> "TransactionDate":"TransactionDate",
> "TransactionType":"TransactionType",
> "SortCode":"SortCode",
> "AccountNumber":"AccountNumber",
> "TransactionDescription":"TransactionDescription",
> "DebitAmount":"DebitAmount",
> "CreditAmount":"CreditAmount",
> "Balance":"Balance"
> }'
> )
> TBLPROPERTIES ('mongo.uri'='mongodb://account_user_RO:mongodb@rhes75
> :60100/accounts.ll_18740868_mongo')
> ;
>
> In debug mode it throws this error
>
> CREATE EXTERNAL TABLE ll_18740868_mongo (
> TransactionDate DATE
> ,TransactionType String
> ,SortCode String
> ,AccountNumber String
> ,TransactionDescription String
> ,DebitAmount Double
> ,CreditAmount Double
> ,Balance Double
> )
> COMMENT 'Mongo external table'
> STORED BY 'com.mongodb.hadoop.hive.MongoStorageHandler'
> WITH SERDEPROPERTIES('mongo.columns.mapping'=
> '{
> "TransactionDate":"TransactionDate",
> "TransactionType":"TransactionType",
> "SortCode":"SortCode",
> "AccountNumber":"AccountNumber",
> "TransactionDescription":"TransactionDescription",
> "DebitAmount":"DebitAmount",
> "CreditAmount":"CreditAmount",
> "Balance":"Balance"
> }'
> )
> TBLPROPERTIES ('mongo.uri'='mongodb://account_user_RO:mongodb@rhes75
> :60100/accounts.ll_18740868_mongo')
> 2018-08-26 23:01:32,424 INFO [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
> ql.Driver: Starting task [Stage-0:DDL] in serial mode
> 2018-08-26 23:01:32,426 ERROR [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
> exec.DDLTask: Failed
> java.lang.NoClassDefFoundError: org/apache/hadoop/hive/serde2/SerDe
> at java.lang.ClassLoader.defineClass1(Native Method)
> at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
> at java.security.SecureClassLoader.defineClass
> (SecureClassLoader.java:142)
> at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
> at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at
> com.mongodb.hadoop.hive.MongoStorageHandler.getSerDeClass(MongoStorageHandler.java:88)
> at
> org.apache.hadoop.hive.ql.plan.CreateTableDesc.toTable(CreateTableDesc.java:744)
> at
> org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4882)
> at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:428)
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:205)
> at
> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:97)
> at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2479)
> at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:2150)
> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1826)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1567)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1561)
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:157)
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:218)
> at
> org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:239)
> at
> org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:188)
> at
> org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:402)
> at
> org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:335)
> at
> org.apache.hadoop.hive.cli.CliDriver.processReader(CliDriver.java:471)
> at
> org.apache.hadoop.hive.cli.CliDriver.processFile(CliDriver.java:487)
> at
> org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:793)
> at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759)
> at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:683)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
> at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
> Caused by: java.lang.ClassNotFoundException:
> org.apache.hadoop.hive.serde2.SerDe
> at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> ... 39 more
> 2018-08-26 23:01:32,426 INFO [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
> reexec.ReOptimizePlugin: ReOptimization: retryPossible: false
> FAILED: Execution Error, return code 1 from
> org.apache.hadoop.hive.ql.exec.DDLTask. org/apache/hadoop/hive/serde2/SerDe
> 2018-08-26 23:01:32,426 ERROR [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
> ql.Driver: FAILED: Execution Error, return code 1 from
> org.apache.hadoop.hive.ql.exec.DDLTask. org/apache/hadoop/hive/serde2/SerDe
> 2018-08-26 23:01:32,426 INFO [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
> ql.Driver: Completed executing
> command(queryId=hduser_20180826230132_53320afb-5265-407f-82a3-a1b22e584e20);
> Time taken: 0.005 seconds
> 2018-08-26 23:01:32,426 INFO [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
> lockmgr.DbTxnManager: Stopped heartbeat for query:
> hduser_20180826230132_53320afb-5265-407f-82a3-a1b22e584e20
> 2018-08-26 23:01:32,437 INFO [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
> conf.HiveConf: Using the default value passed in for log id:
> a928186b-244d-4b81-bda9-dd3a83e306b2
> 2018-08-26 23:01:32,437 INFO [a928186b-244d-4b81-bda9-dd3a83e306b2 main]
> session.SessionState: Resetting thread name to main
> 2018-08-26 23:01:32,437 INFO [main] conf.HiveConf: Using the default
> value passed in for log id: a928186b-244d-4b81-bda9-dd3a83e306b2
> 2018-08-26 23:01:32,447 INFO [main] session.SessionState: Deleted
> directory: /tmp/hive/hduser/a928186b-244d-4b81-bda9-dd3a83e306b2 on fs with
> scheme hdfs
> 2018-08-26 23:01:32,447 INFO [main] session.SessionState: Deleted
> directory: /tmp/hive/a928186b-244d-4b81-bda9-dd3a83e306b2 on fs with scheme
> file
> 2018-08-26 23:01:32,447 INFO [main] metastore.HiveMetaStoreClient: Closed
> a connection to metastore, current connections: 0
>
> May be I am using incompatible jars here?
>
>
> Dr Mich Talebzadeh
>
>
>
> LinkedIn * https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
> <https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*
>
>
>
> http://talebzadehmich.wordpress.com
>
>
> *Disclaimer:* Use it at your own risk. Any and all responsibility for any
> loss, damage or destruction of data or any other property which may arise
> from relying on this email's technical content is explicitly disclaimed.
> The author will in no case be liable for any monetary damages arising from
> such loss, damage or destruction.
>
>
>