You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by jh...@apache.org on 2018/07/13 04:03:32 UTC

[tajo] branch master updated: TAJO-2189: Dictionary encoded text in ORC scanner may cause incorrect result. (#1055)

This is an automated email from the ASF dual-hosted git repository.

jhkim pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tajo.git


The following commit(s) were added to refs/heads/master by this push:
     new fb32619  TAJO-2189: Dictionary encoded text in ORC scanner may cause incorrect result. (#1055)
fb32619 is described below

commit fb326195083959014c82c10187cb46de91ece33f
Author: Jinho Kim <jh...@apache.org>
AuthorDate: Fri Jul 13 13:03:30 2018 +0900

    TAJO-2189: Dictionary encoded text in ORC scanner may cause incorrect result. (#1055)
---
 CHANGES                                            |  2 +
 .../storage/thirdparty/orc/TreeReaderFactory.java  |  2 +-
 .../java/org/apache/tajo/storage/TestStorages.java | 55 ++++++++++++++++++++++
 3 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/CHANGES b/CHANGES
index 82b8489..37b7af2 100644
--- a/CHANGES
+++ b/CHANGES
@@ -169,6 +169,8 @@ Release 0.12.0 - unreleased
 
   BUG FIXES
 
+    TAJO-2189: Dictionary encoded text in ORC scanner may cause incorrect result. (jinho)
+
     TAJO-2188: Can't start tajo daemon on HDP,CDH (jinho)
 
     TAJO-2184: Can not run Tajo with non-default $TAJO_CONF_DIR (Lee Dongjin via hyunsik)
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TreeReaderFactory.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TreeReaderFactory.java
index 3099779..2f06c48 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TreeReaderFactory.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TreeReaderFactory.java
@@ -1217,7 +1217,7 @@ public class TreeReaderFactory {
         } else {
           result.clear();
         }
-        return result.getBytes();
+        return result.copyBytes();
       } else {
         return null;
       }
diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
index deb758d..871bb2d 100644
--- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
+++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
@@ -78,6 +78,16 @@ public class TestStorages {
       "  ]\n" +
       "}\n";
 
+  private static String TEST_VARIABLE_LENGTH_AVRO_SCHEMA =
+      "{\n" +
+          "  \"type\": \"record\",\n" +
+          "  \"namespace\": \"org.apache.tajo\",\n" +
+          "  \"name\": \"testVariableLength\",\n" +
+          "  \"fields\": [\n" +
+          "    { \"name\": \"col1\", \"type\": \"string\" }\n" +
+          "  ]\n" +
+          "}\n";
+
   private static String TEST_NULL_HANDLING_TYPES_AVRO_SCHEMA =
       "{\n" +
       "  \"type\": \"record\",\n" +
@@ -480,6 +490,51 @@ public class TestStorages {
   }
 
   @Test
+  public void testVariableLength() throws IOException {
+    SchemaBuilder schemaBld = SchemaBuilder.builder()
+        .add("col1", Type.TEXT);
+
+    Schema schema = schemaBld.build();
+
+    TableMeta meta = CatalogUtil.newTableMeta(dataFormat, conf);
+    if (dataFormat.equalsIgnoreCase(BuiltinStorages.AVRO)) {
+      meta.putProperty(StorageConstants.AVRO_SCHEMA_LITERAL, TEST_VARIABLE_LENGTH_AVRO_SCHEMA);
+    }
+
+    FileTablespace sm = TablespaceManager.getLocalFs();
+    Path tablePath = new Path(testDir, "testVariableLength.data");
+    Appender appender = sm.getAppender(meta, schema, tablePath);
+    appender.init();
+
+    String testStr = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
+    String testStr2 = "xxxxxxxxxxxxxxxx";  // test for dictionary encoding
+
+    for(int i = 100; i > 0; i--) {
+      VTuple tuple = new VTuple(1 );
+      tuple.put(new Datum[] {
+          DatumFactory.createText(i % 2 == 0 ? testStr + i : testStr2)
+      });
+
+      appender.addTuple(tuple);
+    }
+    appender.flush();
+    appender.close();
+
+    FileStatus status = fs.getFileStatus(tablePath);
+    FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen());
+    Scanner scanner =  sm.getScanner(meta, schema, fragment, null);
+    scanner.init();
+
+    Tuple retrieved;
+    int idx = 100;
+    while ((retrieved = scanner.next()) != null) {
+      assertEquals((idx % 2 == 0 ? testStr + idx : testStr2), retrieved.asDatum(0).toString());
+      idx--;
+    }
+    scanner.close();
+  }
+
+  @Test
   public void testNullHandlingTypes() throws IOException {
     SchemaBuilder schemaBld = SchemaBuilder.builder()
         .add("col1", Type.BOOLEAN)