You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by jh...@apache.org on 2018/07/13 04:03:32 UTC
[tajo] branch master updated: TAJO-2189: Dictionary encoded text in
ORC scanner may cause incorrect result. (#1055)
This is an automated email from the ASF dual-hosted git repository.
jhkim pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tajo.git
The following commit(s) were added to refs/heads/master by this push:
new fb32619 TAJO-2189: Dictionary encoded text in ORC scanner may cause incorrect result. (#1055)
fb32619 is described below
commit fb326195083959014c82c10187cb46de91ece33f
Author: Jinho Kim <jh...@apache.org>
AuthorDate: Fri Jul 13 13:03:30 2018 +0900
TAJO-2189: Dictionary encoded text in ORC scanner may cause incorrect result. (#1055)
---
CHANGES | 2 +
.../storage/thirdparty/orc/TreeReaderFactory.java | 2 +-
.../java/org/apache/tajo/storage/TestStorages.java | 55 ++++++++++++++++++++++
3 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/CHANGES b/CHANGES
index 82b8489..37b7af2 100644
--- a/CHANGES
+++ b/CHANGES
@@ -169,6 +169,8 @@ Release 0.12.0 - unreleased
BUG FIXES
+ TAJO-2189: Dictionary encoded text in ORC scanner may cause incorrect result. (jinho)
+
TAJO-2188: Can't start tajo daemon on HDP,CDH (jinho)
TAJO-2184: Can not run Tajo with non-default $TAJO_CONF_DIR (Lee Dongjin via hyunsik)
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TreeReaderFactory.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TreeReaderFactory.java
index 3099779..2f06c48 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TreeReaderFactory.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TreeReaderFactory.java
@@ -1217,7 +1217,7 @@ public class TreeReaderFactory {
} else {
result.clear();
}
- return result.getBytes();
+ return result.copyBytes();
} else {
return null;
}
diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
index deb758d..871bb2d 100644
--- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
+++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
@@ -78,6 +78,16 @@ public class TestStorages {
" ]\n" +
"}\n";
+ private static String TEST_VARIABLE_LENGTH_AVRO_SCHEMA =
+ "{\n" +
+ " \"type\": \"record\",\n" +
+ " \"namespace\": \"org.apache.tajo\",\n" +
+ " \"name\": \"testVariableLength\",\n" +
+ " \"fields\": [\n" +
+ " { \"name\": \"col1\", \"type\": \"string\" }\n" +
+ " ]\n" +
+ "}\n";
+
private static String TEST_NULL_HANDLING_TYPES_AVRO_SCHEMA =
"{\n" +
" \"type\": \"record\",\n" +
@@ -480,6 +490,51 @@ public class TestStorages {
}
@Test
+ public void testVariableLength() throws IOException {
+ SchemaBuilder schemaBld = SchemaBuilder.builder()
+ .add("col1", Type.TEXT);
+
+ Schema schema = schemaBld.build();
+
+ TableMeta meta = CatalogUtil.newTableMeta(dataFormat, conf);
+ if (dataFormat.equalsIgnoreCase(BuiltinStorages.AVRO)) {
+ meta.putProperty(StorageConstants.AVRO_SCHEMA_LITERAL, TEST_VARIABLE_LENGTH_AVRO_SCHEMA);
+ }
+
+ FileTablespace sm = TablespaceManager.getLocalFs();
+ Path tablePath = new Path(testDir, "testVariableLength.data");
+ Appender appender = sm.getAppender(meta, schema, tablePath);
+ appender.init();
+
+ String testStr = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
+ String testStr2 = "xxxxxxxxxxxxxxxx"; // test for dictionary encoding
+
+ for(int i = 100; i > 0; i--) {
+ VTuple tuple = new VTuple(1 );
+ tuple.put(new Datum[] {
+ DatumFactory.createText(i % 2 == 0 ? testStr + i : testStr2)
+ });
+
+ appender.addTuple(tuple);
+ }
+ appender.flush();
+ appender.close();
+
+ FileStatus status = fs.getFileStatus(tablePath);
+ FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen());
+ Scanner scanner = sm.getScanner(meta, schema, fragment, null);
+ scanner.init();
+
+ Tuple retrieved;
+ int idx = 100;
+ while ((retrieved = scanner.next()) != null) {
+ assertEquals((idx % 2 == 0 ? testStr + idx : testStr2), retrieved.asDatum(0).toString());
+ idx--;
+ }
+ scanner.close();
+ }
+
+ @Test
public void testNullHandlingTypes() throws IOException {
SchemaBuilder schemaBld = SchemaBuilder.builder()
.add("col1", Type.BOOLEAN)