You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hive.apache.org by Brian Salazar <br...@gmail.com> on 2011/02/04 23:24:45 UTC

Bulk load into HBase

I have been using the Bulk Load example here:
http://wiki.apache.org/hadoop/Hive/HBaseBulkLoad

I am having an issue with a bulk load of 1 million records into HBase
on a cluster of 6 using Hive.

Hive 0.6.0 (built from source to get UDFRowSequence)
Hadoop 0.20.2
HBase 0.20.6
Zookeeper 3.3.2

Here is my issue:

hive> set mapred.reduce.tasks=1;

hive> create temporary function row_sequence as
    > 'org.apache.hadoop.hive.contrib.udf.UDFRowSequence';
OK
Time taken: 0.0090 seconds

hive> create external table hb_range_keys(uid_range_start string)
    > row format serde
    > 'org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe'
    > stored as
    > inputformat
    > 'org.apache.hadoop.mapred.TextInputFormat'
    > outputformat
    > 'org.apache.hadoop.hive.ql.io.HiveNullValueSequenceFileOutputFormat'
    > location '/tmp/hb_range_keys';
OK
Time taken: 0.176 seconds

hive> insert overwrite table hb_range_keys
    > select uid from
    > (select uid
    > from cdata_dump
    > tablesample(bucket 1 out of 1000 on uid) s
    > order by uid
    > limit 1000) x
    > where (row_sequence() % 100000)=0
    > order by uid
    > limit 9;
Total MapReduce jobs = 3
Launching Job 1 out of 3
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapred.reduce.tasks=<number>
Starting Job = job_201102040059_0036, Tracking URL =
http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0036
Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
-Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0036
2011-02-04 22:12:25,921 Stage-1 map = 0%,  reduce = 0%
2011-02-04 22:12:28,943 Stage-1 map = 50%,  reduce = 0%
2011-02-04 22:12:31,961 Stage-1 map = 100%,  reduce = 0%
2011-02-04 22:12:37,993 Stage-1 map = 100%,  reduce = 100%
Ended Job = job_201102040059_0036
Launching Job 2 out of 3
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapred.reduce.tasks=<number>
Starting Job = job_201102040059_0037, Tracking URL =
http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0037
Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
-Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0037
2011-02-04 22:12:46,645 Stage-2 map = 0%,  reduce = 0%
2011-02-04 22:12:49,658 Stage-2 map = 100%,  reduce = 0%
2011-02-04 22:12:58,695 Stage-2 map = 100%,  reduce = 100%
Ended Job = job_201102040059_0037
Launching Job 3 out of 3
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapred.reduce.tasks=<number>
Starting Job = job_201102040059_0038, Tracking URL =
http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0038
Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
-Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0038
2011-02-04 22:13:06,542 Stage-3 map = 0%,  reduce = 0%
2011-02-04 22:13:09,555 Stage-3 map = 100%,  reduce = 0%
2011-02-04 22:13:18,592 Stage-3 map = 100%,  reduce = 100%
Ended Job = job_201102040059_0038
Loading data to table hb_range_keys
Deleted hdfs://hadoop-1:54310/tmp/hb_range_keys
Table hb_range_keys stats: [num_partitions: 0, num_files: 0, num_rows:
0, total_size: 0]
OK
Time taken: 61.318 seconds

hive> dfs -cp /tmp/hb_range_keys/* /tmp/hb_range_key_list;

hive> dfs -mkdir /tmp/cdata;

hive> set hive.mapred.partitioner=org.apache.hadoop.mapred.lib.TotalOrderPartitioner;

hive> set total.order.partitioner.path=/tmp/hb_range_key_list;

hive> create table cdata(uid string, retail_cat_name1 string,
retail_cat_name2 string, retail_cat_name3 string, bread_crumb_csv
string)
    > stored as
    > INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
    > OUTPUTFORMAT 'org.apache.hadoop.hive.hbase.HiveHFileOutputFormat'
    > TBLPROPERTIES ('hfile.family.path' = '/tmp/cdata/cf');
OK
Time taken: 0.103 seconds

hive> desc cdata_dump;
OK
uid     string
retail_cat_name1        string
retail_cat_name2        string
retail_cat_name3        string
bread_crumb_csv string
Time taken: 4.194 seconds

hive> insert overwrite table cdata
    > select uid, retail_cat_name1, retail_cat_name2,
retail_cat_name3, bread_crumb_csv
    > from cdata_dump
    > cluster by uid;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Defaulting to jobconf value of: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapred.reduce.tasks=<number>
Starting Job = job_201102040059_0039, Tracking URL =
http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0039
Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
-Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0039
2011-02-04 22:14:29,724 Stage-1 map = 0%,  reduce = 0%
2011-02-04 22:14:38,754 Stage-1 map = 100%,  reduce = 0%
2011-02-04 22:14:47,787 Stage-1 map = 100%,  reduce = 33%
2011-02-04 22:14:50,799 Stage-1 map = 100%,  reduce = 0%
2011-02-04 22:14:59,832 Stage-1 map = 100%,  reduce = 33%
2011-02-04 22:15:02,845 Stage-1 map = 100%,  reduce = 0%
2011-02-04 22:15:11,880 Stage-1 map = 100%,  reduce = 17%
2011-02-04 22:15:14,893 Stage-1 map = 100%,  reduce = 0%
2011-02-04 22:15:34,965 Stage-1 map = 100%,  reduce = 100%
Ended Job = job_201102040059_0039 with errors
FAILED: Execution Error, return code 2 from
org.apache.hadoop.hive.ql.exec.MapRedTask

Here is the task log:

2011-02-04 22:14:38,240 WARN org.apache.hadoop.conf.Configuration:
/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/job.xml:a
attempt to override final parameter: mapred.hosts.exclude;  Ignoring.
2011-02-04 22:14:38,300 INFO org.apache.hadoop.metrics.jvm.JvmMetrics:
Initializing JVM Metrics with processName=SHUFFLE, sessionId=
2011-02-04 22:14:38,406 WARN org.apache.hadoop.conf.Configuration:
/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/job.xml:a
attempt to override final parameter: mapred.hosts.exclude;  Ignoring.
2011-02-04 22:14:38,453 INFO org.apache.hadoop.mapred.ReduceTask:
ShuffleRamManager: MemoryLimit=130514944,
MaxSingleShuffleLimit=32628736
2011-02-04 22:14:38,461 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Thread started: Thread for
merging on-disk files
2011-02-04 22:14:38,461 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Thread started: Thread for
merging in memory files
2011-02-04 22:14:38,461 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Thread waiting: Thread for
merging on-disk files
2011-02-04 22:14:38,463 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Need another 2 map output(s)
where 0 is already in progress
2011-02-04 22:14:38,463 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Thread started: Thread for
polling Map Completion Events
2011-02-04 22:14:38,464 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Scheduled 0 outputs (0 slow hosts
and0 dup hosts)
2011-02-04 22:14:38,468 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0: Got 1 new map-outputs
2011-02-04 22:14:42,475 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0: Got 1 new map-outputs
2011-02-04 22:14:43,465 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Scheduled 2 outputs (0 slow hosts
and0 dup hosts)
2011-02-04 22:14:43,496 INFO org.apache.hadoop.mapred.ReduceTask:
header: attempt_201102040059_0039_m_000001_0, compressed len:
36393372, decompressed len: 36393368
2011-02-04 22:14:43,496 INFO org.apache.hadoop.mapred.ReduceTask:
Shuffling 36393368 bytes (36393372 raw bytes) into Local-FS from
attempt_201102040059_0039_m_000001_0
2011-02-04 22:14:43,496 INFO org.apache.hadoop.mapred.ReduceTask:
header: attempt_201102040059_0039_m_000000_0, compressed len:
35832828, decompressed len: 35832824
2011-02-04 22:14:43,496 INFO org.apache.hadoop.mapred.ReduceTask:
Shuffling 35832824 bytes (35832828 raw bytes) into Local-FS from
attempt_201102040059_0039_m_000000_0
2011-02-04 22:14:43,748 INFO org.apache.hadoop.mapred.ReduceTask: Read
36393372 bytes from map-output for
attempt_201102040059_0039_m_000001_0
2011-02-04 22:14:43,750 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Thread waiting: Thread for
merging on-disk files
2011-02-04 22:14:43,860 INFO org.apache.hadoop.mapred.ReduceTask: Read
35832828 bytes from map-output for
attempt_201102040059_0039_m_000000_0
2011-02-04 22:14:43,862 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Thread waiting: Thread for
merging on-disk files
2011-02-04 22:14:44,476 INFO org.apache.hadoop.mapred.ReduceTask:
GetMapEventsThread exiting
2011-02-04 22:14:44,476 INFO org.apache.hadoop.mapred.ReduceTask:
getMapsEventsThread joined.
2011-02-04 22:14:44,477 INFO org.apache.hadoop.mapred.ReduceTask:
Closed ram manager
2011-02-04 22:14:44,477 INFO org.apache.hadoop.mapred.ReduceTask:
Interleaved on-disk merge complete: 2 files left.
2011-02-04 22:14:44,477 INFO org.apache.hadoop.mapred.ReduceTask:
In-memory merge complete: 0 files left.
2011-02-04 22:14:44,484 INFO org.apache.hadoop.mapred.ReduceTask:
Merging 2 files, 72226200 bytes from disk
2011-02-04 22:14:44,485 INFO org.apache.hadoop.mapred.ReduceTask:
Merging 0 segments, 0 bytes from memory into reduce
2011-02-04 22:14:44,488 INFO org.apache.hadoop.mapred.Merger: Merging
2 sorted segments
2011-02-04 22:14:44,503 INFO org.apache.hadoop.mapred.Merger: Down to
the last merge-pass, with 2 segments left of total size: 72226192
bytes
2011-02-04 22:14:44,515 INFO ExecReducer: maximum memory = 186449920
2011-02-04 22:14:44,515 INFO ExecReducer: conf classpath =
[file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/jars/classes,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/jars/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/]
2011-02-04 22:14:44,516 INFO ExecReducer: thread classpath =
[file:/usr/local/hadoop-0.20.2/conf/,
file:/usr/java/jdk1.6.0_23/lib/tools.jar,
file:/usr/local/hadoop-0.20.2/,
file:/usr/local/hadoop-0.20.2/hadoop-0.20.2-core.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-cli-1.2.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-codec-1.3.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-el-1.0.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-httpclient-3.0.1.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-logging-1.0.4.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-logging-api-1.0.4.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-net-1.4.1.jar,
file:/usr/local/hadoop-0.20.2/lib/core-3.1.1.jar,
file:/usr/local/hadoop-0.20.2/lib/hbase-0.20.6.jar,
file:/usr/local/hadoop-0.20.2/lib/hsqldb-1.8.0.10.jar,
file:/usr/local/hadoop-0.20.2/lib/jasper-compiler-5.5.12.jar,
file:/usr/local/hadoop-0.20.2/lib/jasper-runtime-5.5.12.jar,
file:/usr/local/hadoop-0.20.2/lib/jets3t-0.6.1.jar,
file:/usr/local/hadoop-0.20.2/lib/jetty-6.1.14.jar,
file:/usr/local/hadoop-0.20.2/lib/jetty-util-6.1.14.jar,
file:/usr/local/hadoop-0.20.2/lib/junit-3.8.1.jar,
file:/usr/local/hadoop-0.20.2/lib/kfs-0.2.2.jar,
file:/usr/local/hadoop-0.20.2/lib/log4j-1.2.15.jar,
file:/usr/local/hadoop-0.20.2/lib/mockito-all-1.8.0.jar,
file:/usr/local/hadoop-0.20.2/lib/oro-2.0.8.jar,
file:/usr/local/hadoop-0.20.2/lib/servlet-api-2.5-6.1.14.jar,
file:/usr/local/hadoop-0.20.2/lib/slf4j-api-1.4.3.jar,
file:/usr/local/hadoop-0.20.2/lib/slf4j-log4j12-1.4.3.jar,
file:/usr/local/hadoop-0.20.2/lib/xmlenc-0.52.jar,
file:/usr/local/hadoop-0.20.2/lib/zookeeper-3.3.2.jar,
file:/usr/local/hadoop-0.20.2/lib/jsp-2.1/jsp-2.1.jar,
file:/usr/local/hadoop-0.20.2/lib/jsp-2.1/jsp-api-2.1.jar,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/work/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/jars/classes,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/jars/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0039/libjars/hive_hbase-handler.jar/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0039/libjars/zookeeper-3.3.2.jar/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0039/libjars/hbase-0.20.6.jar/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0039/libjars/hive_contrib.jar/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/work/]
2011-02-04 22:14:44,779 INFO ExecReducer:
<OP>Id =3
  <Children>
    <FS>Id =4
      <Parent>Id = 3 null<\Parent>
    <\FS>
  <\Children>
<\OP>
2011-02-04 22:14:44,779 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: Initializing Self 3 OP
2011-02-04 22:14:44,785 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: Operator 3 OP
initialized
2011-02-04 22:14:44,785 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: Initializing children
of 3 OP
2011-02-04 22:14:44,785 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Initializing child 4
FS
2011-02-04 22:14:44,785 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Initializing Self 4
FS
2011-02-04 22:14:44,812 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Operator 4 FS
initialized
2011-02-04 22:14:44,812 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Initialization Done 4
FS
2011-02-04 22:14:44,812 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: Initialization Done 3
OP
2011-02-04 22:14:44,820 INFO ExecReducer: ExecReducer: processing 1
rows: used memory = 7862736
2011-02-04 22:14:44,820 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: 3 forwarding 1 rows
2011-02-04 22:14:44,820 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Final Path: FS
hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-04_22-14-24_938_3413312759179707816/_tmp.-ext-10000/000000_0
2011-02-04 22:14:44,820 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Writing to temp file:
FS hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-04_22-14-24_938_3413312759179707816/_tmp.-ext-10000/_tmp.000000_0
2011-02-04 22:14:44,820 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: New Final Path: FS
hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-04_22-14-24_938_3413312759179707816/_tmp.-ext-10000/000000_0
2011-02-04 22:14:44,952 INFO org.mortbay.log: Logging to
org.slf4j.impl.Log4jLoggerAdapter(org.mortbay.log) via
org.mortbay.log.Slf4jLog
2011-02-04 22:14:44,952 INFO org.mortbay.log:
Writer=hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-04_22-14-24_938_3413312759179707816/_tmp.-ext-10000/_tmp.000000_0/_temporary/_attempt__0000_r_000000_0/cf/4554463474860488518
2011-02-04 22:14:44,960 FATAL ExecReducer:
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
while processing row (tag=0)
{"key":{"reducesinkkey0":""},"value":{"_col0":"","_col1":"|","_col2":null,"_col3":"|","_col4":null},"alias":0}
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:256)
	at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:463)
	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:411)
	at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException:
java.io.IOException: Added a key not lexically larger than previous
key=cfretail_cat_name1-�ۚ, lastkey=cfretail_cat_name3-�ۚ
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:576)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:744)
	at org.apache.hadoop.hive.ql.exec.ExtractOperator.processOp(ExtractOperator.java:45)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:247)
	... 3 more
Caused by: java.io.IOException: Added a key not lexically larger than
previous key=cfretail_cat_name1-�ۚ,
lastkey=cfretail_cat_name3-�ۚ
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.checkKey(HFile.java:551)
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:513)
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:481)
	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:95)
	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:65)
	at org.apache.hadoop.hive.hbase.HiveHFileOutputFormat$2.write(HiveHFileOutputFormat.java:200)
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:561)
	... 8 more

2011-02-04 22:14:44,962 WARN org.apache.hadoop.mapred.TaskTracker:
Error running child
java.lang.RuntimeException:
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
while processing row (tag=0)
{"key":{"reducesinkkey0":""},"value":{"_col0":"","_col1":"|","_col2":null,"_col3":"|","_col4":null},"alias":0}
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:268)
	at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:463)
	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:411)
	at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive
Runtime Error while processing row (tag=0)
{"key":{"reducesinkkey0":""},"value":{"_col0":"","_col1":"|","_col2":null,"_col3":"|","_col4":null},"alias":0}
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:256)
	... 3 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException:
java.io.IOException: Added a key not lexically larger than previous
key=cfretail_cat_name1-�ۚ, lastkey=cfretail_cat_name3-�ۚ
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:576)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:744)
	at org.apache.hadoop.hive.ql.exec.ExtractOperator.processOp(ExtractOperator.java:45)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:247)
	... 3 more
Caused by: java.io.IOException: Added a key not lexically larger than
previous key=cfretail_cat_name1-�ۚ,
lastkey=cfretail_cat_name3-�ۚ
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.checkKey(HFile.java:551)
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:513)
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:481)
	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:95)
	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:65)
	at org.apache.hadoop.hive.hbase.HiveHFileOutputFormat$2.write(HiveHFileOutputFormat.java:200)
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:561)
	... 8 more
2011-02-04 22:14:44,966 INFO org.apache.hadoop.mapred.TaskRunner:
Runnning cleanup for the task


Any ideas?


Thanks in advance!

- Brian

Re: Bulk load into HBase

Posted by Brian Salazar <br...@gmail.com>.
Thank's again for your input John.

I had to double my heap size to 2000 because I was getting OutOfMemory
errors. I have changed my approach to the one mentioned in John's link
(http://svn.apache.org/repos/asf/hive/trunk/hbase-handler/src/test/queries/hbase_bulk.m)
and am running into a very similar issue.

hive> create table hbsort(uid string, retail_cat_name1 string,
retail_cat_name2 string, retail_cat_name3 string, bread_crumb_csv
string)
    > stored as
    > INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
    > OUTPUTFORMAT 'org.apache.hadoop.hive.hbase.HiveHFileOutputFormat'
    > TBLPROPERTIES ('hfile.family.path' = '/tmp/hbsort/cf');
OK
Time taken: 4.547 seconds

hive> create table hbpartition(part_break string)
    > row format serde
    > 'org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe'
    > stored as
    > inputformat
    > 'org.apache.hadoop.mapred.TextInputFormat'
    > outputformat
    > 'org.apache.hadoop.hive.ql.io.HiveNullValueSequenceFileOutputFormat';
OK
Time taken: 0.066 seconds

hive> insert overwrite table hbpartition
    > select distinct uid
    > from cdata_dump;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapred.reduce.tasks=<number>
Starting Job = job_201102040059_0057, Tracking URL =
http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0057
Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
-Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0057
2011-02-04 23:58:23,161 Stage-1 map = 0%,  reduce = 0%
2011-02-04 23:58:32,205 Stage-1 map = 100%,  reduce = 0%
2011-02-04 23:58:41,261 Stage-1 map = 100%,  reduce = 33%
2011-02-04 23:58:44,275 Stage-1 map = 100%,  reduce = 75%
2011-02-04 23:58:47,295 Stage-1 map = 100%,  reduce = 100%
Ended Job = job_201102040059_0057
Loading data to table hbpartition
Deleted hdfs://hadoop-1:54310/user/hive/warehouse/hbpartition
Table hbpartition stats: [num_partitions: 0, num_files: 1, num_rows:
0, total_size: 23555186]
731399 Rows loaded to hbpartition
OK
Time taken: 33.11 seconds

hive> dfs -count /user/hive/warehouse/hbpartition;
           1            1           23555186
hdfs://hadoop-1:54310/user/hive/warehouse/hbpartition

hive> dfs -cp /user/hive/warehouse/hbpartition/* /tmp/hbpartition.lst;

hive> set hive.mapred.partitioner=org.apache.hadoop.mapred.lib.TotalOrderPartitioner;

hive> set total.order.partitioner.natural.order=false;

hive> set total.order.partitioner.path=/tmp/hbpartition.lst;

hive> insert overwrite table hbsort
    > select distinct uid, retail_cat_name1, retail_cat_name2,
retail_cat_name3, bread_crumb_csv
    > from cdata_dump
    > cluster by uid;
Total MapReduce jobs = 2
Launching Job 1 out of 2
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapred.reduce.tasks=<number>
Starting Job = job_201102040059_0059, Tracking URL =
http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0059
Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
-Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0059
2011-02-05 00:01:50,220 Stage-1 map = 0%,  reduce = 0%
2011-02-05 00:01:59,264 Stage-1 map = 100%,  reduce = 0%
2011-02-05 00:02:08,319 Stage-1 map = 100%,  reduce = 17%
2011-02-05 00:02:11,334 Stage-1 map = 100%,  reduce = 67%
2011-02-05 00:02:14,349 Stage-1 map = 100%,  reduce = 83%
2011-02-05 00:02:17,366 Stage-1 map = 100%,  reduce = 100%
Ended Job = job_201102040059_0059
Launching Job 2 out of 2
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapred.reduce.tasks=<number>
Starting Job = job_201102040059_0060, Tracking URL =
http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0060
Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
-Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0060
2011-02-05 00:02:25,157 Stage-2 map = 0%,  reduce = 0%
2011-02-05 00:02:34,192 Stage-2 map = 100%,  reduce = 0%
2011-02-05 00:02:43,230 Stage-2 map = 100%,  reduce = 17%
2011-02-05 00:02:46,243 Stage-2 map = 100%,  reduce = 0%
2011-02-05 00:02:55,277 Stage-2 map = 100%,  reduce = 17%
2011-02-05 00:02:58,291 Stage-2 map = 100%,  reduce = 0%
2011-02-05 00:03:08,327 Stage-2 map = 100%,  reduce = 33%
2011-02-05 00:03:14,349 Stage-2 map = 100%,  reduce = 0%
2011-02-05 00:03:32,417 Stage-2 map = 100%,  reduce = 100%
Ended Job = job_201102040059_0060 with errors
FAILED: Execution Error, return code 2 from
org.apache.hadoop.hive.ql.exec.MapRedTask

Here is the job log:
2011-02-05 00:02:33,672 WARN org.apache.hadoop.conf.Configuration:
/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0060/attempt_201102040059_0060_r_000000_0/job.xml:a
attempt to override final parameter: mapred.hosts.exclude;  Ignoring.
2011-02-05 00:02:33,715 INFO org.apache.hadoop.metrics.jvm.JvmMetrics:
Initializing JVM Metrics with processName=SHUFFLE, sessionId=
2011-02-05 00:02:33,831 WARN org.apache.hadoop.conf.Configuration:
/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0060/attempt_201102040059_0060_r_000000_0/job.xml:a
attempt to override final parameter: mapred.hosts.exclude;  Ignoring.
2011-02-05 00:02:33,879 INFO org.apache.hadoop.mapred.ReduceTask:
ShuffleRamManager: MemoryLimit=130514944,
MaxSingleShuffleLimit=32628736
2011-02-05 00:02:33,887 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0060_r_000000_0 Thread started: Thread for
merging on-disk files
2011-02-05 00:02:33,888 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0060_r_000000_0 Thread waiting: Thread for
merging on-disk files
2011-02-05 00:02:33,888 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0060_r_000000_0 Thread started: Thread for
merging in memory files
2011-02-05 00:02:33,890 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0060_r_000000_0 Need another 2 map output(s)
where 0 is already in progress
2011-02-05 00:02:33,890 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0060_r_000000_0 Thread started: Thread for
polling Map Completion Events
2011-02-05 00:02:33,890 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0060_r_000000_0 Scheduled 0 outputs (0 slow hosts
and0 dup hosts)
2011-02-05 00:02:33,895 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0060_r_000000_0: Got 1 new map-outputs
2011-02-05 00:02:37,901 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0060_r_000000_0: Got 1 new map-outputs
2011-02-05 00:02:38,891 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0060_r_000000_0 Scheduled 2 outputs (0 slow hosts
and0 dup hosts)
2011-02-05 00:02:38,924 INFO org.apache.hadoop.mapred.ReduceTask:
header: attempt_201102040059_0060_m_000000_0, compressed len:
28740383, decompressed len: 28740379
2011-02-05 00:02:38,924 INFO org.apache.hadoop.mapred.ReduceTask:
Shuffling 28740379 bytes (28740383 raw bytes) into RAM from
attempt_201102040059_0060_m_000000_0
2011-02-05 00:02:38,926 INFO org.apache.hadoop.mapred.ReduceTask:
header: attempt_201102040059_0060_m_000001_0, compressed len:
28735497, decompressed len: 28735493
2011-02-05 00:02:38,926 INFO org.apache.hadoop.mapred.ReduceTask:
Shuffling 28735493 bytes (28735497 raw bytes) into RAM from
attempt_201102040059_0060_m_000001_0
2011-02-05 00:02:39,265 INFO org.apache.hadoop.mapred.ReduceTask: Read
28735493 bytes from map-output for
attempt_201102040059_0060_m_000001_0
2011-02-05 00:02:39,271 INFO org.apache.hadoop.mapred.ReduceTask: Rec
#1 from attempt_201102040059_0060_m_000001_0 -> (7, 39) from Hadoop-3
2011-02-05 00:02:39,287 INFO org.apache.hadoop.mapred.ReduceTask: Read
28740379 bytes from map-output for
attempt_201102040059_0060_m_000000_0
2011-02-05 00:02:39,287 INFO org.apache.hadoop.mapred.ReduceTask: Rec
#1 from attempt_201102040059_0060_m_000000_0 -> (25, 58) from Hadoop-6
2011-02-05 00:02:39,903 INFO org.apache.hadoop.mapred.ReduceTask:
GetMapEventsThread exiting
2011-02-05 00:02:39,903 INFO org.apache.hadoop.mapred.ReduceTask:
getMapsEventsThread joined.
2011-02-05 00:02:39,904 INFO org.apache.hadoop.mapred.ReduceTask:
Closed ram manager
2011-02-05 00:02:39,904 INFO org.apache.hadoop.mapred.ReduceTask:
Interleaved on-disk merge complete: 0 files left.
2011-02-05 00:02:39,904 INFO org.apache.hadoop.mapred.ReduceTask:
In-memory merge complete: 2 files left.
2011-02-05 00:02:39,925 INFO org.apache.hadoop.mapred.Merger: Merging
2 sorted segments
2011-02-05 00:02:39,926 INFO org.apache.hadoop.mapred.Merger: Down to
the last merge-pass, with 2 segments left of total size: 57475872
bytes
2011-02-05 00:02:41,132 INFO org.apache.hadoop.mapred.ReduceTask:
Merged 2 segments, 57475872 bytes to disk to satisfy reduce memory
limit
2011-02-05 00:02:41,133 INFO org.apache.hadoop.mapred.ReduceTask:
Merging 1 files, 57475874 bytes from disk
2011-02-05 00:02:41,134 INFO org.apache.hadoop.mapred.ReduceTask:
Merging 0 segments, 0 bytes from memory into reduce
2011-02-05 00:02:41,134 INFO org.apache.hadoop.mapred.Merger: Merging
1 sorted segments
2011-02-05 00:02:41,138 INFO org.apache.hadoop.mapred.Merger: Down to
the last merge-pass, with 1 segments left of total size: 57475870
bytes
2011-02-05 00:02:41,150 INFO ExecReducer: maximum memory = 186449920
2011-02-05 00:02:41,150 INFO ExecReducer: conf classpath =
[file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0060/jars/classes,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0060/jars/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0060/attempt_201102040059_0060_r_000000_0/]
2011-02-05 00:02:41,151 INFO ExecReducer: thread classpath =
[file:/usr/local/hadoop-0.20.2/conf/,
file:/usr/java/jdk1.6.0_23/lib/tools.jar,
file:/usr/local/hadoop-0.20.2/,
file:/usr/local/hadoop-0.20.2/hadoop-0.20.2-core.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-cli-1.2.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-codec-1.3.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-el-1.0.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-httpclient-3.0.1.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-logging-1.0.4.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-logging-api-1.0.4.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-net-1.4.1.jar,
file:/usr/local/hadoop-0.20.2/lib/core-3.1.1.jar,
file:/usr/local/hadoop-0.20.2/lib/hbase-0.20.6.jar,
file:/usr/local/hadoop-0.20.2/lib/hsqldb-1.8.0.10.jar,
file:/usr/local/hadoop-0.20.2/lib/jasper-compiler-5.5.12.jar,
file:/usr/local/hadoop-0.20.2/lib/jasper-runtime-5.5.12.jar,
file:/usr/local/hadoop-0.20.2/lib/jets3t-0.6.1.jar,
file:/usr/local/hadoop-0.20.2/lib/jetty-6.1.14.jar,
file:/usr/local/hadoop-0.20.2/lib/jetty-util-6.1.14.jar,
file:/usr/local/hadoop-0.20.2/lib/junit-3.8.1.jar,
file:/usr/local/hadoop-0.20.2/lib/kfs-0.2.2.jar,
file:/usr/local/hadoop-0.20.2/lib/log4j-1.2.15.jar,
file:/usr/local/hadoop-0.20.2/lib/mockito-all-1.8.0.jar,
file:/usr/local/hadoop-0.20.2/lib/oro-2.0.8.jar,
file:/usr/local/hadoop-0.20.2/lib/servlet-api-2.5-6.1.14.jar,
file:/usr/local/hadoop-0.20.2/lib/slf4j-api-1.4.3.jar,
file:/usr/local/hadoop-0.20.2/lib/slf4j-log4j12-1.4.3.jar,
file:/usr/local/hadoop-0.20.2/lib/xmlenc-0.52.jar,
file:/usr/local/hadoop-0.20.2/lib/zookeeper-3.3.2.jar,
file:/usr/local/hadoop-0.20.2/lib/jsp-2.1/jsp-2.1.jar,
file:/usr/local/hadoop-0.20.2/lib/jsp-2.1/jsp-api-2.1.jar,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0060/attempt_201102040059_0060_r_000000_0/work/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0060/jars/classes,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0060/jars/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0060/libjars/hive_hbase-handler.jar/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0060/libjars/zookeeper-3.3.2.jar/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0060/libjars/hbase-0.20.6.jar/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0060/libjars/hive_contrib.jar/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0060/attempt_201102040059_0060_r_000000_0/work/]
2011-02-05 00:02:41,337 INFO ExecReducer:
<OP>Id =2
  <Children>
    <FS>Id =3
      <Parent>Id = 2 null<\Parent>
    <\FS>
  <\Children>
<\OP>
2011-02-05 00:02:41,337 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: Initializing Self 2 OP
2011-02-05 00:02:41,344 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: Operator 2 OP
initialized
2011-02-05 00:02:41,344 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: Initializing children
of 2 OP
2011-02-05 00:02:41,344 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Initializing child 3
FS
2011-02-05 00:02:41,344 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Initializing Self 3
FS
2011-02-05 00:02:41,373 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Operator 3 FS
initialized
2011-02-05 00:02:41,373 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Initialization Done 3
FS
2011-02-05 00:02:41,373 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: Initialization Done 2
OP
2011-02-05 00:02:41,378 INFO ExecReducer: ExecReducer: processing 1
rows: used memory = 63894624
2011-02-05 00:02:41,379 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: 2 forwarding 1 rows
2011-02-05 00:02:41,379 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Final Path: FS
hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-05_00-01-40_502_8610626405174469619/_tmp.-ext-10000/000000_0
2011-02-05 00:02:41,379 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Writing to temp file:
FS hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-05_00-01-40_502_8610626405174469619/_tmp.-ext-10000/_tmp.000000_0
2011-02-05 00:02:41,379 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: New Final Path: FS
hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-05_00-01-40_502_8610626405174469619/_tmp.-ext-10000/000000_0
2011-02-05 00:02:41,498 INFO org.mortbay.log: Logging to
org.slf4j.impl.Log4jLoggerAdapter(org.mortbay.log) via
org.mortbay.log.Slf4jLog
2011-02-05 00:02:41,498 INFO org.mortbay.log:
Writer=hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-05_00-01-40_502_8610626405174469619/_tmp.-ext-10000/_tmp.000000_0/_temporary/_attempt__0000_r_000000_0/cf/746579574795151342
2011-02-05 00:02:41,506 FATAL ExecReducer:
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
while processing row (tag=0)
{"key":{"reducesinkkey0":""},"value":{"_col0":"","_col1":"|","_col2":"0,2636,1040579","_col3":"|","_col4":"1040579"},"alias":0}
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:256)
	at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:463)
	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:411)
	at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException:
java.io.IOException: Added a key not lexically larger than previous
key=cfbread_crumb_csv-� ��, lastkey=cfretail_cat_name3-�
��
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:576)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:744)
	at org.apache.hadoop.hive.ql.exec.ExtractOperator.processOp(ExtractOperator.java:45)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:247)
	... 3 more
Caused by: java.io.IOException: Added a key not lexically larger than
previous key=cfbread_crumb_csv-� ��,
lastkey=cfretail_cat_name3-� ��
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.checkKey(HFile.java:551)
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:513)
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:481)
	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:95)
	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:65)
	at org.apache.hadoop.hive.hbase.HiveHFileOutputFormat$2.write(HiveHFileOutputFormat.java:200)
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:561)
	... 8 more

2011-02-05 00:02:41,508 WARN org.apache.hadoop.mapred.TaskTracker:
Error running child
java.lang.RuntimeException:
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
while processing row (tag=0)
{"key":{"reducesinkkey0":""},"value":{"_col0":"","_col1":"|","_col2":"0,2636,1040579","_col3":"|","_col4":"1040579"},"alias":0}
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:268)
	at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:463)
	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:411)
	at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive
Runtime Error while processing row (tag=0)
{"key":{"reducesinkkey0":""},"value":{"_col0":"","_col1":"|","_col2":"0,2636,1040579","_col3":"|","_col4":"1040579"},"alias":0}
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:256)
	... 3 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException:
java.io.IOException: Added a key not lexically larger than previous
key=cfbread_crumb_csv-� ��, lastkey=cfretail_cat_name3-�
��
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:576)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:744)
	at org.apache.hadoop.hive.ql.exec.ExtractOperator.processOp(ExtractOperator.java:45)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:247)
	... 3 more
Caused by: java.io.IOException: Added a key not lexically larger than
previous key=cfbread_crumb_csv-� ��,
lastkey=cfretail_cat_name3-� ��
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.checkKey(HFile.java:551)
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:513)
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:481)
	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:95)
	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:65)
	at org.apache.hadoop.hive.hbase.HiveHFileOutputFormat$2.write(HiveHFileOutputFormat.java:200)
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:561)
	... 8 more
2011-02-05 00:02:41,512 INFO org.apache.hadoop.mapred.TaskRunner:
Runnning cleanup for the task

Any help would be greatly appreciated.

- Brian

Re: Bulk load into HBase

Posted by John Sichi <js...@fb.com>.
As I mentioned in my previous message, some changes in Hive required an update to the unit test, meaning the wiki is out of date as well.

This is the latest unit test script; the main change is that instead of using an external table for generating the partition list, it now uses a managed table:

http://svn.apache.org/repos/asf/hive/trunk/hbase-handler/src/test/queries/hbase_bulk.m

If you get it working, please update the wiki.

JVS

On Feb 4, 2011, at 2:24 PM, Brian Salazar wrote:

> I have been using the Bulk Load example here:
> http://wiki.apache.org/hadoop/Hive/HBaseBulkLoad
> 
> I am having an issue with a bulk load of 1 million records into HBase
> on a cluster of 6 using Hive.
> 
> Hive 0.6.0 (built from source to get UDFRowSequence)
> Hadoop 0.20.2
> HBase 0.20.6
> Zookeeper 3.3.2
> 
> Here is my issue:
> 
> hive> set mapred.reduce.tasks=1;
> 
> hive> create temporary function row_sequence as
>> 'org.apache.hadoop.hive.contrib.udf.UDFRowSequence';
> OK
> Time taken: 0.0090 seconds
> 
> hive> create external table hb_range_keys(uid_range_start string)
>> row format serde
>> 'org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe'
>> stored as
>> inputformat
>> 'org.apache.hadoop.mapred.TextInputFormat'
>> outputformat
>> 'org.apache.hadoop.hive.ql.io.HiveNullValueSequenceFileOutputFormat'
>> location '/tmp/hb_range_keys';
> OK
> Time taken: 0.176 seconds
> 
> hive> insert overwrite table hb_range_keys
>> select uid from
>> (select uid
>> from cdata_dump
>> tablesample(bucket 1 out of 1000 on uid) s
>> order by uid
>> limit 1000) x
>> where (row_sequence() % 100000)=0
>> order by uid
>> limit 9;
> Total MapReduce jobs = 3
> Launching Job 1 out of 3
> Number of reduce tasks determined at compile time: 1
> In order to change the average load for a reducer (in bytes):
>  set hive.exec.reducers.bytes.per.reducer=<number>
> In order to limit the maximum number of reducers:
>  set hive.exec.reducers.max=<number>
> In order to set a constant number of reducers:
>  set mapred.reduce.tasks=<number>
> Starting Job = job_201102040059_0036, Tracking URL =
> http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0036
> Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
> -Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0036
> 2011-02-04 22:12:25,921 Stage-1 map = 0%,  reduce = 0%
> 2011-02-04 22:12:28,943 Stage-1 map = 50%,  reduce = 0%
> 2011-02-04 22:12:31,961 Stage-1 map = 100%,  reduce = 0%
> 2011-02-04 22:12:37,993 Stage-1 map = 100%,  reduce = 100%
> Ended Job = job_201102040059_0036
> Launching Job 2 out of 3
> Number of reduce tasks determined at compile time: 1
> In order to change the average load for a reducer (in bytes):
>  set hive.exec.reducers.bytes.per.reducer=<number>
> In order to limit the maximum number of reducers:
>  set hive.exec.reducers.max=<number>
> In order to set a constant number of reducers:
>  set mapred.reduce.tasks=<number>
> Starting Job = job_201102040059_0037, Tracking URL =
> http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0037
> Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
> -Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0037
> 2011-02-04 22:12:46,645 Stage-2 map = 0%,  reduce = 0%
> 2011-02-04 22:12:49,658 Stage-2 map = 100%,  reduce = 0%
> 2011-02-04 22:12:58,695 Stage-2 map = 100%,  reduce = 100%
> Ended Job = job_201102040059_0037
> Launching Job 3 out of 3
> Number of reduce tasks determined at compile time: 1
> In order to change the average load for a reducer (in bytes):
>  set hive.exec.reducers.bytes.per.reducer=<number>
> In order to limit the maximum number of reducers:
>  set hive.exec.reducers.max=<number>
> In order to set a constant number of reducers:
>  set mapred.reduce.tasks=<number>
> Starting Job = job_201102040059_0038, Tracking URL =
> http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0038
> Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
> -Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0038
> 2011-02-04 22:13:06,542 Stage-3 map = 0%,  reduce = 0%
> 2011-02-04 22:13:09,555 Stage-3 map = 100%,  reduce = 0%
> 2011-02-04 22:13:18,592 Stage-3 map = 100%,  reduce = 100%
> Ended Job = job_201102040059_0038
> Loading data to table hb_range_keys
> Deleted hdfs://hadoop-1:54310/tmp/hb_range_keys
> Table hb_range_keys stats: [num_partitions: 0, num_files: 0, num_rows:
> 0, total_size: 0]
> OK
> Time taken: 61.318 seconds
> 
> hive> dfs -cp /tmp/hb_range_keys/* /tmp/hb_range_key_list;
> 
> hive> dfs -mkdir /tmp/cdata;
> 
> hive> set hive.mapred.partitioner=org.apache.hadoop.mapred.lib.TotalOrderPartitioner;
> 
> hive> set total.order.partitioner.path=/tmp/hb_range_key_list;
> 
> hive> create table cdata(uid string, retail_cat_name1 string,
> retail_cat_name2 string, retail_cat_name3 string, bread_crumb_csv
> string)
>> stored as
>> INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
>> OUTPUTFORMAT 'org.apache.hadoop.hive.hbase.HiveHFileOutputFormat'
>> TBLPROPERTIES ('hfile.family.path' = '/tmp/cdata/cf');
> OK
> Time taken: 0.103 seconds
> 
> hive> desc cdata_dump;
> OK
> uid     string
> retail_cat_name1        string
> retail_cat_name2        string
> retail_cat_name3        string
> bread_crumb_csv string
> Time taken: 4.194 seconds
> 
> hive> insert overwrite table cdata
>> select uid, retail_cat_name1, retail_cat_name2,
> retail_cat_name3, bread_crumb_csv
>> from cdata_dump
>> cluster by uid;
> Total MapReduce jobs = 1
> Launching Job 1 out of 1
> Number of reduce tasks not specified. Defaulting to jobconf value of: 1
> In order to change the average load for a reducer (in bytes):
>  set hive.exec.reducers.bytes.per.reducer=<number>
> In order to limit the maximum number of reducers:
>  set hive.exec.reducers.max=<number>
> In order to set a constant number of reducers:
>  set mapred.reduce.tasks=<number>
> Starting Job = job_201102040059_0039, Tracking URL =
> http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0039
> Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
> -Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0039
> 2011-02-04 22:14:29,724 Stage-1 map = 0%,  reduce = 0%
> 2011-02-04 22:14:38,754 Stage-1 map = 100%,  reduce = 0%
> 2011-02-04 22:14:47,787 Stage-1 map = 100%,  reduce = 33%
> 2011-02-04 22:14:50,799 Stage-1 map = 100%,  reduce = 0%
> 2011-02-04 22:14:59,832 Stage-1 map = 100%,  reduce = 33%
> 2011-02-04 22:15:02,845 Stage-1 map = 100%,  reduce = 0%
> 2011-02-04 22:15:11,880 Stage-1 map = 100%,  reduce = 17%
> 2011-02-04 22:15:14,893 Stage-1 map = 100%,  reduce = 0%
> 2011-02-04 22:15:34,965 Stage-1 map = 100%,  reduce = 100%
> Ended Job = job_201102040059_0039 with errors
> FAILED: Execution Error, return code 2 from
> org.apache.hadoop.hive.ql.exec.MapRedTask
> 
> Here is the task log:
> 
> 2011-02-04 22:14:38,240 WARN org.apache.hadoop.conf.Configuration:
> /mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/job.xml:a
> attempt to override final parameter: mapred.hosts.exclude;  Ignoring.
> 2011-02-04 22:14:38,300 INFO org.apache.hadoop.metrics.jvm.JvmMetrics:
> Initializing JVM Metrics with processName=SHUFFLE, sessionId=
> 2011-02-04 22:14:38,406 WARN org.apache.hadoop.conf.Configuration:
> /mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/job.xml:a
> attempt to override final parameter: mapred.hosts.exclude;  Ignoring.
> 2011-02-04 22:14:38,453 INFO org.apache.hadoop.mapred.ReduceTask:
> ShuffleRamManager: MemoryLimit=130514944,
> MaxSingleShuffleLimit=32628736
> 2011-02-04 22:14:38,461 INFO org.apache.hadoop.mapred.ReduceTask:
> attempt_201102040059_0039_r_000000_0 Thread started: Thread for
> merging on-disk files
> 2011-02-04 22:14:38,461 INFO org.apache.hadoop.mapred.ReduceTask:
> attempt_201102040059_0039_r_000000_0 Thread started: Thread for
> merging in memory files
> 2011-02-04 22:14:38,461 INFO org.apache.hadoop.mapred.ReduceTask:
> attempt_201102040059_0039_r_000000_0 Thread waiting: Thread for
> merging on-disk files
> 2011-02-04 22:14:38,463 INFO org.apache.hadoop.mapred.ReduceTask:
> attempt_201102040059_0039_r_000000_0 Need another 2 map output(s)
> where 0 is already in progress
> 2011-02-04 22:14:38,463 INFO org.apache.hadoop.mapred.ReduceTask:
> attempt_201102040059_0039_r_000000_0 Thread started: Thread for
> polling Map Completion Events
> 2011-02-04 22:14:38,464 INFO org.apache.hadoop.mapred.ReduceTask:
> attempt_201102040059_0039_r_000000_0 Scheduled 0 outputs (0 slow hosts
> and0 dup hosts)
> 2011-02-04 22:14:38,468 INFO org.apache.hadoop.mapred.ReduceTask:
> attempt_201102040059_0039_r_000000_0: Got 1 new map-outputs
> 2011-02-04 22:14:42,475 INFO org.apache.hadoop.mapred.ReduceTask:
> attempt_201102040059_0039_r_000000_0: Got 1 new map-outputs
> 2011-02-04 22:14:43,465 INFO org.apache.hadoop.mapred.ReduceTask:
> attempt_201102040059_0039_r_000000_0 Scheduled 2 outputs (0 slow hosts
> and0 dup hosts)
> 2011-02-04 22:14:43,496 INFO org.apache.hadoop.mapred.ReduceTask:
> header: attempt_201102040059_0039_m_000001_0, compressed len:
> 36393372, decompressed len: 36393368
> 2011-02-04 22:14:43,496 INFO org.apache.hadoop.mapred.ReduceTask:
> Shuffling 36393368 bytes (36393372 raw bytes) into Local-FS from
> attempt_201102040059_0039_m_000001_0
> 2011-02-04 22:14:43,496 INFO org.apache.hadoop.mapred.ReduceTask:
> header: attempt_201102040059_0039_m_000000_0, compressed len:
> 35832828, decompressed len: 35832824
> 2011-02-04 22:14:43,496 INFO org.apache.hadoop.mapred.ReduceTask:
> Shuffling 35832824 bytes (35832828 raw bytes) into Local-FS from
> attempt_201102040059_0039_m_000000_0
> 2011-02-04 22:14:43,748 INFO org.apache.hadoop.mapred.ReduceTask: Read
> 36393372 bytes from map-output for
> attempt_201102040059_0039_m_000001_0
> 2011-02-04 22:14:43,750 INFO org.apache.hadoop.mapred.ReduceTask:
> attempt_201102040059_0039_r_000000_0 Thread waiting: Thread for
> merging on-disk files
> 2011-02-04 22:14:43,860 INFO org.apache.hadoop.mapred.ReduceTask: Read
> 35832828 bytes from map-output for
> attempt_201102040059_0039_m_000000_0
> 2011-02-04 22:14:43,862 INFO org.apache.hadoop.mapred.ReduceTask:
> attempt_201102040059_0039_r_000000_0 Thread waiting: Thread for
> merging on-disk files
> 2011-02-04 22:14:44,476 INFO org.apache.hadoop.mapred.ReduceTask:
> GetMapEventsThread exiting
> 2011-02-04 22:14:44,476 INFO org.apache.hadoop.mapred.ReduceTask:
> getMapsEventsThread joined.
> 2011-02-04 22:14:44,477 INFO org.apache.hadoop.mapred.ReduceTask:
> Closed ram manager
> 2011-02-04 22:14:44,477 INFO org.apache.hadoop.mapred.ReduceTask:
> Interleaved on-disk merge complete: 2 files left.
> 2011-02-04 22:14:44,477 INFO org.apache.hadoop.mapred.ReduceTask:
> In-memory merge complete: 0 files left.
> 2011-02-04 22:14:44,484 INFO org.apache.hadoop.mapred.ReduceTask:
> Merging 2 files, 72226200 bytes from disk
> 2011-02-04 22:14:44,485 INFO org.apache.hadoop.mapred.ReduceTask:
> Merging 0 segments, 0 bytes from memory into reduce
> 2011-02-04 22:14:44,488 INFO org.apache.hadoop.mapred.Merger: Merging
> 2 sorted segments
> 2011-02-04 22:14:44,503 INFO org.apache.hadoop.mapred.Merger: Down to
> the last merge-pass, with 2 segments left of total size: 72226192
> bytes
> 2011-02-04 22:14:44,515 INFO ExecReducer: maximum memory = 186449920
> 2011-02-04 22:14:44,515 INFO ExecReducer: conf classpath =
> [file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/jars/classes,
> file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/jars/,
> file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/]
> 2011-02-04 22:14:44,516 INFO ExecReducer: thread classpath =
> [file:/usr/local/hadoop-0.20.2/conf/,
> file:/usr/java/jdk1.6.0_23/lib/tools.jar,
> file:/usr/local/hadoop-0.20.2/,
> file:/usr/local/hadoop-0.20.2/hadoop-0.20.2-core.jar,
> file:/usr/local/hadoop-0.20.2/lib/commons-cli-1.2.jar,
> file:/usr/local/hadoop-0.20.2/lib/commons-codec-1.3.jar,
> file:/usr/local/hadoop-0.20.2/lib/commons-el-1.0.jar,
> file:/usr/local/hadoop-0.20.2/lib/commons-httpclient-3.0.1.jar,
> file:/usr/local/hadoop-0.20.2/lib/commons-logging-1.0.4.jar,
> file:/usr/local/hadoop-0.20.2/lib/commons-logging-api-1.0.4.jar,
> file:/usr/local/hadoop-0.20.2/lib/commons-net-1.4.1.jar,
> file:/usr/local/hadoop-0.20.2/lib/core-3.1.1.jar,
> file:/usr/local/hadoop-0.20.2/lib/hbase-0.20.6.jar,
> file:/usr/local/hadoop-0.20.2/lib/hsqldb-1.8.0.10.jar,
> file:/usr/local/hadoop-0.20.2/lib/jasper-compiler-5.5.12.jar,
> file:/usr/local/hadoop-0.20.2/lib/jasper-runtime-5.5.12.jar,
> file:/usr/local/hadoop-0.20.2/lib/jets3t-0.6.1.jar,
> file:/usr/local/hadoop-0.20.2/lib/jetty-6.1.14.jar,
> file:/usr/local/hadoop-0.20.2/lib/jetty-util-6.1.14.jar,
> file:/usr/local/hadoop-0.20.2/lib/junit-3.8.1.jar,
> file:/usr/local/hadoop-0.20.2/lib/kfs-0.2.2.jar,
> file:/usr/local/hadoop-0.20.2/lib/log4j-1.2.15.jar,
> file:/usr/local/hadoop-0.20.2/lib/mockito-all-1.8.0.jar,
> file:/usr/local/hadoop-0.20.2/lib/oro-2.0.8.jar,
> file:/usr/local/hadoop-0.20.2/lib/servlet-api-2.5-6.1.14.jar,
> file:/usr/local/hadoop-0.20.2/lib/slf4j-api-1.4.3.jar,
> file:/usr/local/hadoop-0.20.2/lib/slf4j-log4j12-1.4.3.jar,
> file:/usr/local/hadoop-0.20.2/lib/xmlenc-0.52.jar,
> file:/usr/local/hadoop-0.20.2/lib/zookeeper-3.3.2.jar,
> file:/usr/local/hadoop-0.20.2/lib/jsp-2.1/jsp-2.1.jar,
> file:/usr/local/hadoop-0.20.2/lib/jsp-2.1/jsp-api-2.1.jar,
> file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/work/,
> file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/jars/classes,
> file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/jars/,
> file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0039/libjars/hive_hbase-handler.jar/,
> file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0039/libjars/zookeeper-3.3.2.jar/,
> file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0039/libjars/hbase-0.20.6.jar/,
> file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0039/libjars/hive_contrib.jar/,
> file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/work/]
> 2011-02-04 22:14:44,779 INFO ExecReducer:
> <OP>Id =3
>  <Children>
>    <FS>Id =4
>      <Parent>Id = 3 null<\Parent>
>    <\FS>
>  <\Children>
> <\OP>
> 2011-02-04 22:14:44,779 INFO
> org.apache.hadoop.hive.ql.exec.ExtractOperator: Initializing Self 3 OP
> 2011-02-04 22:14:44,785 INFO
> org.apache.hadoop.hive.ql.exec.ExtractOperator: Operator 3 OP
> initialized
> 2011-02-04 22:14:44,785 INFO
> org.apache.hadoop.hive.ql.exec.ExtractOperator: Initializing children
> of 3 OP
> 2011-02-04 22:14:44,785 INFO
> org.apache.hadoop.hive.ql.exec.FileSinkOperator: Initializing child 4
> FS
> 2011-02-04 22:14:44,785 INFO
> org.apache.hadoop.hive.ql.exec.FileSinkOperator: Initializing Self 4
> FS
> 2011-02-04 22:14:44,812 INFO
> org.apache.hadoop.hive.ql.exec.FileSinkOperator: Operator 4 FS
> initialized
> 2011-02-04 22:14:44,812 INFO
> org.apache.hadoop.hive.ql.exec.FileSinkOperator: Initialization Done 4
> FS
> 2011-02-04 22:14:44,812 INFO
> org.apache.hadoop.hive.ql.exec.ExtractOperator: Initialization Done 3
> OP
> 2011-02-04 22:14:44,820 INFO ExecReducer: ExecReducer: processing 1
> rows: used memory = 7862736
> 2011-02-04 22:14:44,820 INFO
> org.apache.hadoop.hive.ql.exec.ExtractOperator: 3 forwarding 1 rows
> 2011-02-04 22:14:44,820 INFO
> org.apache.hadoop.hive.ql.exec.FileSinkOperator: Final Path: FS
> hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-04_22-14-24_938_3413312759179707816/_tmp.-ext-10000/000000_0
> 2011-02-04 22:14:44,820 INFO
> org.apache.hadoop.hive.ql.exec.FileSinkOperator: Writing to temp file:
> FS hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-04_22-14-24_938_3413312759179707816/_tmp.-ext-10000/_tmp.000000_0
> 2011-02-04 22:14:44,820 INFO
> org.apache.hadoop.hive.ql.exec.FileSinkOperator: New Final Path: FS
> hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-04_22-14-24_938_3413312759179707816/_tmp.-ext-10000/000000_0
> 2011-02-04 22:14:44,952 INFO org.mortbay.log: Logging to
> org.slf4j.impl.Log4jLoggerAdapter(org.mortbay.log) via
> org.mortbay.log.Slf4jLog
> 2011-02-04 22:14:44,952 INFO org.mortbay.log:
> Writer=hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-04_22-14-24_938_3413312759179707816/_tmp.-ext-10000/_tmp.000000_0/_temporary/_attempt__0000_r_000000_0/cf/4554463474860488518
> 2011-02-04 22:14:44,960 FATAL ExecReducer:
> org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
> while processing row (tag=0)
> {"key":{"reducesinkkey0":""},"value":{"_col0":"","_col1":"|","_col2":null,"_col3":"|","_col4":null},"alias":0}
> 	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:256)
> 	at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:463)
> 	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:411)
> 	at org.apache.hadoop.mapred.Child.main(Child.java:170)
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException:
> java.io.IOException: Added a key not lexically larger than previous
> key=cfretail_cat_name1-�ۚ, lastkey=cfretail_cat_name3-�ۚ
> 	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:576)
> 	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
> 	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:744)
> 	at org.apache.hadoop.hive.ql.exec.ExtractOperator.processOp(ExtractOperator.java:45)
> 	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
> 	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:247)
> 	... 3 more
> Caused by: java.io.IOException: Added a key not lexically larger than
> previous key=cfretail_cat_name1-�ۚ,
> lastkey=cfretail_cat_name3-�ۚ
> 	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.checkKey(HFile.java:551)
> 	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:513)
> 	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:481)
> 	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:95)
> 	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:65)
> 	at org.apache.hadoop.hive.hbase.HiveHFileOutputFormat$2.write(HiveHFileOutputFormat.java:200)
> 	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:561)
> 	... 8 more
> 
> 2011-02-04 22:14:44,962 WARN org.apache.hadoop.mapred.TaskTracker:
> Error running child
> java.lang.RuntimeException:
> org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
> while processing row (tag=0)
> {"key":{"reducesinkkey0":""},"value":{"_col0":"","_col1":"|","_col2":null,"_col3":"|","_col4":null},"alias":0}
> 	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:268)
> 	at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:463)
> 	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:411)
> 	at org.apache.hadoop.mapred.Child.main(Child.java:170)
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive
> Runtime Error while processing row (tag=0)
> {"key":{"reducesinkkey0":""},"value":{"_col0":"","_col1":"|","_col2":null,"_col3":"|","_col4":null},"alias":0}
> 	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:256)
> 	... 3 more
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException:
> java.io.IOException: Added a key not lexically larger than previous
> key=cfretail_cat_name1-�ۚ, lastkey=cfretail_cat_name3-�ۚ
> 	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:576)
> 	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
> 	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:744)
> 	at org.apache.hadoop.hive.ql.exec.ExtractOperator.processOp(ExtractOperator.java:45)
> 	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
> 	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:247)
> 	... 3 more
> Caused by: java.io.IOException: Added a key not lexically larger than
> previous key=cfretail_cat_name1-�ۚ,
> lastkey=cfretail_cat_name3-�ۚ
> 	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.checkKey(HFile.java:551)
> 	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:513)
> 	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:481)
> 	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:95)
> 	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:65)
> 	at org.apache.hadoop.hive.hbase.HiveHFileOutputFormat$2.write(HiveHFileOutputFormat.java:200)
> 	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:561)
> 	... 8 more
> 2011-02-04 22:14:44,966 INFO org.apache.hadoop.mapred.TaskRunner:
> Runnning cleanup for the task
> 
> 
> Any ideas?
> 
> 
> Thanks in advance!
> 
> - Brian