You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Oleksiy Sayankin (JIRA)" <ji...@apache.org> on 2017/07/14 16:37:00 UTC

[jira] [Created] (HIVE-17098) Race condition in Hbase tables

Oleksiy Sayankin created HIVE-17098:
---------------------------------------

             Summary: Race condition in Hbase tables
                 Key: HIVE-17098
                 URL: https://issues.apache.org/jira/browse/HIVE-17098
             Project: Hive
          Issue Type: Bug
            Reporter: Oleksiy Sayankin
            Assignee: Oleksiy Sayankin


These steps simulate our customer production env.

*STEP 1. Create test tables*

{code}
CREATE TABLE for_loading(
  key int, 
  value string,
  age int,
  salary decimal (10,2)
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
{code}


{code}
CREATE TABLE test_1(
  key int, 
  value string,
  age int,
  salary decimal (10,2)
)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.hbase.HBaseSerDe' 
STORED BY 
  'org.apache.hadoop.hive.hbase.HBaseStorageHandler' 
WITH SERDEPROPERTIES ( 
  'hbase.columns.mapping'=':key, cf1:value, cf1:age, cf1:salary', 
  'serialization.format'='1')
TBLPROPERTIES (
  'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}', 
  'hbase.table.name'='test_1', 
  'numFiles'='0', 
  'numRows'='0', 
  'rawDataSize'='0', 
  'totalSize'='0', 
  'transient_lastDdlTime'='1495769316');
{code}

{code}
CREATE TABLE test_2(
  key int, 
  value string,
  age int,
  salary decimal (10,2)
)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.hbase.HBaseSerDe' 
STORED BY 
  'org.apache.hadoop.hive.hbase.HBaseStorageHandler' 
WITH SERDEPROPERTIES ( 
  'hbase.columns.mapping'=':key, cf1:value, cf1:age, cf1:salary', 
  'serialization.format'='1')
TBLPROPERTIES (
  'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}', 
  'hbase.table.name'='test_2', 
  'numFiles'='0', 
  'numRows'='0', 
  'rawDataSize'='0', 
  'totalSize'='0', 
  'transient_lastDdlTime'='1495769316');
{code}


*STEP 2. Create test data*

{code}
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;

import static java.lang.String.format;

public class Generator {
    private static List<String> lines = new ArrayList<>();
    private static List<String> name = Arrays.asList("Brian", "John", "Rodger", "Max", "Freddie", "Albert", "Fedor", "Lev", "Niccolo");
    private static List<BigDecimal> salary = new ArrayList<>();

    public static void main(String[] args) {
        generateData(Integer.parseInt(args[0]), args[1]);
    }

    public static void generateData(int rowNumber, String file) {

        double maxValue = 20000.55;
        double minValue = 1000.03;

        Random random = new Random();
        for (int i = 1; i <= rowNumber; i++) {
            lines.add(
                i + "," +
                    name.get(random.nextInt(name.size())) + "," +
                    (random.nextInt(62) + 18) + "," +
                    format("%.2f", (minValue + (maxValue - minValue) * random.nextDouble())));
        }

        Path path = Paths.get(file);

        try {
            Files.write(path, lines, Charset.forName("UTF-8"), StandardOpenOption.APPEND);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
{code}

{code}
javac Generator.java
java Generator 3000000 dataset.csv
hadoop fs -put dataset.csv /
{code}


*STEP 3. Upload test data*

{code}
load data local inpath '/home/myuser/dataset.csv' into table for_loading;
{code}

{code}
from for_loading
insert into table test_1
select key,value,age,salary;
{code}

{code}
from for_loading
insert into table test_2
select key,value,age,salary;
{code}

*STEP 4. Run test queries*

Run in 5 parallel terminals for table {{test_1}}

{code}
for i in {1..500}; do beeline -u "jdbc:hive2://localhost:10000/default testuser1" -e "select * from test_1 limit 10;" 1>/dev/null; done
{code}


Run in 5 parallel terminals for table {{test_2}}

{code}
for i in {1..500}; do beeline -u "jdbc:hive2://localhost:10000/default testuser2" -e "select * from test_2 limit 10;" 1>/dev/null; done
{code}

*EXPECTED RESULT:*

All queris are OK.




*ACTUAL RESULT*


{code}
org.apache.hive.service.cli.HiveSQLException: java.io.IOException: java.lang.IllegalStateException: The input format instance has not been properly ini
tialized. Ensure you call initializeTable either in your constructor or initialize method
        at org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:484)
        at org.apache.hive.service.cli.operation.OperationManager.getOperationNextRowSet(OperationManager.java:308)
        at org.apache.hive.service.cli.session.HiveSessionImpl.fetchResults(HiveSessionImpl.java:847)
        at sun.reflect.GeneratedMethodAccessor8.invoke(Unknown Source)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
        at org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
        at org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595)
        at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
        at com.sun.proxy.$Proxy25.fetchResults(Unknown Source)
        at org.apache.hive.service.cli.CLIService.fetchResults(CLIService.java:504)
        at org.apache.hive.service.cli.thrift.ThriftCLIService.FetchResults(ThriftCLIService.java:698)
        at org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1717)
        at org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1702)
        at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
        at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
        at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
        at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.IOException: java.lang.IllegalStateException: The input format instance has not been properly initialized. Ensure you call initializeTable either in your constructor or initialize method
        at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:521)
        at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:428)
        at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:146)
        at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2099)
        at org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:479)
        ... 24 more
Caused by: java.lang.IllegalStateException: The input format instance has not been properly initialized. Ensure you call initializeTable either in your constructor or initialize method
        at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getRegionLocator(TableInputFormatBase.java:579)
        at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getStartEndKeys(TableInputFormatBase.java:225)
        at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:261)
        at org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat.getSplitsInternal(HiveHBaseTableInputFormat.java:525)
        at org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat.getSplits(HiveHBaseTableInputFormat.java:452)
        at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextSplits(FetchOperator.java:372)
        at org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:304)
        at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:459)
        ... 28 more
{code}




--
This message was sent by Atlassian JIRA
(v6.4.14#64029)