You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Oleksiy Sayankin (JIRA)" <ji...@apache.org> on 2017/07/14 16:37:00 UTC
[jira] [Created] (HIVE-17098) Race condition in Hbase tables
Oleksiy Sayankin created HIVE-17098:
---------------------------------------
Summary: Race condition in Hbase tables
Key: HIVE-17098
URL: https://issues.apache.org/jira/browse/HIVE-17098
Project: Hive
Issue Type: Bug
Reporter: Oleksiy Sayankin
Assignee: Oleksiy Sayankin
These steps simulate our customer production env.
*STEP 1. Create test tables*
{code}
CREATE TABLE for_loading(
key int,
value string,
age int,
salary decimal (10,2)
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
{code}
{code}
CREATE TABLE test_1(
key int,
value string,
age int,
salary decimal (10,2)
)
ROW FORMAT SERDE
'org.apache.hadoop.hive.hbase.HBaseSerDe'
STORED BY
'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
'hbase.columns.mapping'=':key, cf1:value, cf1:age, cf1:salary',
'serialization.format'='1')
TBLPROPERTIES (
'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}',
'hbase.table.name'='test_1',
'numFiles'='0',
'numRows'='0',
'rawDataSize'='0',
'totalSize'='0',
'transient_lastDdlTime'='1495769316');
{code}
{code}
CREATE TABLE test_2(
key int,
value string,
age int,
salary decimal (10,2)
)
ROW FORMAT SERDE
'org.apache.hadoop.hive.hbase.HBaseSerDe'
STORED BY
'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
'hbase.columns.mapping'=':key, cf1:value, cf1:age, cf1:salary',
'serialization.format'='1')
TBLPROPERTIES (
'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}',
'hbase.table.name'='test_2',
'numFiles'='0',
'numRows'='0',
'rawDataSize'='0',
'totalSize'='0',
'transient_lastDdlTime'='1495769316');
{code}
*STEP 2. Create test data*
{code}
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import static java.lang.String.format;
public class Generator {
private static List<String> lines = new ArrayList<>();
private static List<String> name = Arrays.asList("Brian", "John", "Rodger", "Max", "Freddie", "Albert", "Fedor", "Lev", "Niccolo");
private static List<BigDecimal> salary = new ArrayList<>();
public static void main(String[] args) {
generateData(Integer.parseInt(args[0]), args[1]);
}
public static void generateData(int rowNumber, String file) {
double maxValue = 20000.55;
double minValue = 1000.03;
Random random = new Random();
for (int i = 1; i <= rowNumber; i++) {
lines.add(
i + "," +
name.get(random.nextInt(name.size())) + "," +
(random.nextInt(62) + 18) + "," +
format("%.2f", (minValue + (maxValue - minValue) * random.nextDouble())));
}
Path path = Paths.get(file);
try {
Files.write(path, lines, Charset.forName("UTF-8"), StandardOpenOption.APPEND);
} catch (IOException e) {
e.printStackTrace();
}
}
}
{code}
{code}
javac Generator.java
java Generator 3000000 dataset.csv
hadoop fs -put dataset.csv /
{code}
*STEP 3. Upload test data*
{code}
load data local inpath '/home/myuser/dataset.csv' into table for_loading;
{code}
{code}
from for_loading
insert into table test_1
select key,value,age,salary;
{code}
{code}
from for_loading
insert into table test_2
select key,value,age,salary;
{code}
*STEP 4. Run test queries*
Run in 5 parallel terminals for table {{test_1}}
{code}
for i in {1..500}; do beeline -u "jdbc:hive2://localhost:10000/default testuser1" -e "select * from test_1 limit 10;" 1>/dev/null; done
{code}
Run in 5 parallel terminals for table {{test_2}}
{code}
for i in {1..500}; do beeline -u "jdbc:hive2://localhost:10000/default testuser2" -e "select * from test_2 limit 10;" 1>/dev/null; done
{code}
*EXPECTED RESULT:*
All queris are OK.
*ACTUAL RESULT*
{code}
org.apache.hive.service.cli.HiveSQLException: java.io.IOException: java.lang.IllegalStateException: The input format instance has not been properly ini
tialized. Ensure you call initializeTable either in your constructor or initialize method
at org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:484)
at org.apache.hive.service.cli.operation.OperationManager.getOperationNextRowSet(OperationManager.java:308)
at org.apache.hive.service.cli.session.HiveSessionImpl.fetchResults(HiveSessionImpl.java:847)
at sun.reflect.GeneratedMethodAccessor8.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
at org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
at org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595)
at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
at com.sun.proxy.$Proxy25.fetchResults(Unknown Source)
at org.apache.hive.service.cli.CLIService.fetchResults(CLIService.java:504)
at org.apache.hive.service.cli.thrift.ThriftCLIService.FetchResults(ThriftCLIService.java:698)
at org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1717)
at org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1702)
at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.IOException: java.lang.IllegalStateException: The input format instance has not been properly initialized. Ensure you call initializeTable either in your constructor or initialize method
at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:521)
at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:428)
at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:146)
at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2099)
at org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:479)
... 24 more
Caused by: java.lang.IllegalStateException: The input format instance has not been properly initialized. Ensure you call initializeTable either in your constructor or initialize method
at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getRegionLocator(TableInputFormatBase.java:579)
at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getStartEndKeys(TableInputFormatBase.java:225)
at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:261)
at org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat.getSplitsInternal(HiveHBaseTableInputFormat.java:525)
at org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat.getSplits(HiveHBaseTableInputFormat.java:452)
at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextSplits(FetchOperator.java:372)
at org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:304)
at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:459)
... 28 more
{code}
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)