You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/09/14 20:02:29 UTC

[hive] branch master updated: HIVE-22055: select count gives incorrect result after loading data from text file (Attila Magyar, reviewed by Slim Bouguerra)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new df452d3  HIVE-22055: select count gives incorrect result after loading data from text file (Attila Magyar, reviewed by Slim Bouguerra)
df452d3 is described below

commit df452d3d774c0ee0f002fdc3a667febd4c42a77c
Author: Attila Magyar <am...@hortonworks.com>
AuthorDate: Sat Sep 14 13:02:06 2019 -0700

    HIVE-22055: select count gives incorrect result after loading data from text file (Attila Magyar, reviewed by Slim Bouguerra)
---
 .../test/resources/testconfiguration.properties    |   1 +
 .../llap/io/encoded/SerDeEncodedDataReader.java    |   8 +-
 .../clientpositive/mm_loaddata_split_change.q      |  37 ++++
 .../llap/mm_loaddata_split_change.q.out            | 190 +++++++++++++++++++++
 4 files changed, 233 insertions(+), 3 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 3a880c8..40f5de1 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -634,6 +634,7 @@ minillaplocal.query.files=\
   mm_conversions.q,\
   mm_exim.q,\
   mm_loaddata.q,\
+  mm_loaddata_split_change.q,\
   mrr.q,\
   multiMapJoin1.q,\
   multiMapJoin2.q,\
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
index 462b25f..d414b14 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
@@ -820,7 +820,8 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
     List<StripeData> slices = cachedData.getData();
     if (slices.isEmpty()) return false;
     long uncachedPrefixEnd = slices.get(0).getKnownTornStart(),
-        uncachedSuffixStart = slices.get(slices.size() - 1).getLastEnd();
+        uncachedSuffixStart = slices.get(slices.size() - 1).getLastEnd(),
+        lastStripeLastStart = slices.get(slices.size() - 1).getLastStart();
     Ref<Integer> stripeIx = Ref.from(0);
     if (uncachedPrefixEnd > split.getStart()) {
       // TODO: can we merge neighboring splits? So we don't init so many readers.
@@ -856,8 +857,9 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
     if (uncachedSuffixStart < endOfSplit || isUnfortunate) {
       // Note: we assume 0-length split is correct given now LRR interprets offsets (reading an
       // extra row). Should we instead assume 1+ chars and add 1 for isUnfortunate?
-      FileSplit splitPart = new FileSplit(split.getPath(), uncachedSuffixStart,
-          endOfSplit - uncachedSuffixStart, hosts, inMemoryHosts);
+      // Do not read from uncachedSuffixStart as LineRecordReader skips first row
+      FileSplit splitPart = new FileSplit(split.getPath(), lastStripeLastStart,
+          endOfSplit - lastStripeLastStart, hosts, inMemoryHosts);
       if (!processOneFileSplit(splitPart, startTime, stripeIx, null)) return null;
     }
     return true;
diff --git a/ql/src/test/queries/clientpositive/mm_loaddata_split_change.q b/ql/src/test/queries/clientpositive/mm_loaddata_split_change.q
new file mode 100644
index 0000000..67e4fd3
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/mm_loaddata_split_change.q
@@ -0,0 +1,37 @@
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set tez.grouping.min-size=1;
+set tez.grouping.max-size=2;
+set mapreduce.map.memory.mb=1024;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.vectorized.execution.enabled=true;
+set hive.vectorized.use.vector.serde.deserialize=true;
+set hive.vectorized.use.row.serde.deserialize=true;
+
+drop table load0_ss;
+create table load0_ss (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only");
+
+load data local inpath '../../data/files/kv1.txt' into table load0_ss;
+select count(1) from load0_ss;
+load data local inpath '../../data/files/kv2.txt' into table load0_ss;
+select count(1) from load0_ss;
+load data local inpath '../../data/files/kv1.txt' into table load0_ss;
+select count(1) from load0_ss;
+load data local inpath '../../data/files/kv2.txt' into table load0_ss;
+select count(1) from load0_ss;
+load data local inpath '../../data/files/kv1.txt' into table load0_ss;
+select count(1) from load0_ss;
+load data local inpath '../../data/files/kv2.txt' into table load0_ss;
+select count(1) from load0_ss;
+load data local inpath '../../data/files/kv1.txt' into table load0_ss;
+select count(1) from load0_ss;
+load data local inpath '../../data/files/kv2.txt' into table load0_ss;
+select count(1) from load0_ss;
+load data local inpath '../../data/files/kv1.txt' into table load0_ss;
+select count(1) from load0_ss;
+load data local inpath '../../data/files/kv2.txt' into table load0_ss;
+select count(1) from load0_ss;
+
+drop table load0_ss;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/mm_loaddata_split_change.q.out b/ql/src/test/results/clientpositive/llap/mm_loaddata_split_change.q.out
new file mode 100644
index 0000000..d7ecfd6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/mm_loaddata_split_change.q.out
@@ -0,0 +1,190 @@
+PREHOOK: query: drop table load0_ss
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table load0_ss
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table load0_ss (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@load0_ss
+POSTHOOK: query: create table load0_ss (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@load0_ss
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_ss
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_ss
+PREHOOK: query: select count(1) from load0_ss
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_ss
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+500
+PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_ss
+POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_ss
+PREHOOK: query: select count(1) from load0_ss
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_ss
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+1000
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_ss
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_ss
+PREHOOK: query: select count(1) from load0_ss
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_ss
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+1500
+PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_ss
+POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_ss
+PREHOOK: query: select count(1) from load0_ss
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_ss
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+2000
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_ss
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_ss
+PREHOOK: query: select count(1) from load0_ss
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_ss
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+2500
+PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_ss
+POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_ss
+PREHOOK: query: select count(1) from load0_ss
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_ss
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+3000
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_ss
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_ss
+PREHOOK: query: select count(1) from load0_ss
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_ss
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+3500
+PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_ss
+POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_ss
+PREHOOK: query: select count(1) from load0_ss
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_ss
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+4000
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_ss
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_ss
+PREHOOK: query: select count(1) from load0_ss
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_ss
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+4500
+PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_ss
+POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_ss
+PREHOOK: query: select count(1) from load0_ss
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_ss
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_ss
+#### A masked pattern was here ####
+5000
+PREHOOK: query: drop table load0_ss
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@load0_ss
+PREHOOK: Output: default@load0_ss
+POSTHOOK: query: drop table load0_ss
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@load0_ss
+POSTHOOK: Output: default@load0_ss