You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/11/20 02:02:16 UTC

orc git commit: ORC-104. Fix TypeDescription to properly handle quoted field names. (omalley)

Repository: orc
Updated Branches:
  refs/heads/master 137aa7221 -> fb8aec28d


ORC-104. Fix TypeDescription to properly handle quoted field names.
(omalley)

Fixes #63

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/fb8aec28
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/fb8aec28
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/fb8aec28

Branch: refs/heads/master
Commit: fb8aec28d834c3928617ba808acc8c4b606b91ee
Parents: 137aa72
Author: Owen O'Malley <om...@apache.org>
Authored: Mon Oct 10 13:44:41 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Sat Nov 19 17:56:33 2016 -0800

----------------------------------------------------------------------
 .../java/org/apache/orc/TypeDescription.java    | 66 ++++++++++++++++----
 .../org/apache/orc/TestTypeDescription.java     | 48 ++++++++++++++
 2 files changed, 103 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/fb8aec28/java/core/src/java/org/apache/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/TypeDescription.java b/java/core/src/java/org/apache/orc/TypeDescription.java
index bc6787d..69d46b0 100644
--- a/java/core/src/java/org/apache/orc/TypeDescription.java
+++ b/java/core/src/java/org/apache/orc/TypeDescription.java
@@ -34,6 +34,7 @@ import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.regex.Pattern;
 
 /**
  * This is the description of the types in an ORC file.
@@ -45,6 +46,7 @@ public class TypeDescription
   private static final int DEFAULT_PRECISION = 38;
   private static final int DEFAULT_SCALE = 10;
   private static final int DEFAULT_LENGTH = 256;
+  private static final Pattern UNQUOTED_NAMES = Pattern.compile("^\\w+$");
 
   @Override
   public int compareTo(TypeDescription other) {
@@ -239,18 +241,50 @@ public class TypeDescription
   }
 
   static String parseName(StringPosition source) {
-    int start = source.position;
-    while (source.position < source.length) {
-      char ch = source.value.charAt(source.position);
-      if (!Character.isLetterOrDigit(ch) && ch != '.' && ch != '_') {
-        break;
-      }
-      source.position += 1;
-    }
-    if (source.position == start) {
+    if (source.position == source.length) {
       throw new IllegalArgumentException("Missing name at " + source);
     }
-    return source.value.substring(start, source.position);
+    final int start = source.position;
+    if (source.value.charAt(source.position) == '`') {
+      source.position += 1;
+      StringBuilder buffer = new StringBuilder();
+      boolean closed = false;
+      while (source.position < source.length) {
+        char ch = source.value.charAt(source.position);
+        source.position += 1;
+        if (ch == '`') {
+          if (source.position < source.length &&
+              source.value.charAt(source.position) == '`') {
+            source.position += 1;
+            buffer.append('`');
+          } else {
+            closed = true;
+            break;
+          }
+        } else {
+          buffer.append(ch);
+        }
+      }
+      if (!closed) {
+        source.position = start;
+        throw new IllegalArgumentException("Unmatched quote at " + source);
+      } else if (buffer.length() == 0) {
+        throw new IllegalArgumentException("Empty quoted field name at " + source);
+      }
+      return buffer.toString();
+    } else {
+      while (source.position < source.length) {
+        char ch = source.value.charAt(source.position);
+        if (!Character.isLetterOrDigit(ch) && ch != '.' && ch != '_') {
+          break;
+        }
+        source.position += 1;
+      }
+      if (source.position == start) {
+        throw new IllegalArgumentException("Missing name at " + source);
+      }
+      return source.value.substring(start, source.position);
+    }
   }
 
   static void requireChar(StringPosition source, char required) {
@@ -731,6 +765,16 @@ public class TypeDescription
   private int precision = DEFAULT_PRECISION;
   private int scale = DEFAULT_SCALE;
 
+  static void printFieldName(StringBuilder buffer, String name) {
+    if (UNQUOTED_NAMES.matcher(name).matches()) {
+      buffer.append(name);
+    } else {
+      buffer.append('`');
+      buffer.append(name.replace("`", "``"));
+      buffer.append('`');
+    }
+  }
+
   public void printToBuffer(StringBuilder buffer) {
     buffer.append(category.name);
     switch (category) {
@@ -765,7 +809,7 @@ public class TypeDescription
           if (i != 0) {
             buffer.append(',');
           }
-          buffer.append(fieldNames.get(i));
+          printFieldName(buffer, fieldNames.get(i));
           buffer.append(':');
           children.get(i).printToBuffer(buffer);
         }

http://git-wip-us.apache.org/repos/asf/orc/blob/fb8aec28/java/core/src/test/org/apache/orc/TestTypeDescription.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestTypeDescription.java b/java/core/src/test/org/apache/orc/TestTypeDescription.java
index 1f6f254..c5944af 100644
--- a/java/core/src/test/org/apache/orc/TestTypeDescription.java
+++ b/java/core/src/test/org/apache/orc/TestTypeDescription.java
@@ -23,6 +23,8 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
 
+import java.util.List;
+
 public class TestTypeDescription {
   @Rule
   public ExpectedException thrown= ExpectedException.none();
@@ -70,6 +72,19 @@ public class TestTypeDescription {
   }
 
   @Test
+  public void testSpecialFieldNames() {
+    TypeDescription type = TypeDescription.createStruct()
+        .addField("foo bar", TypeDescription.createInt())
+        .addField("`some`thing`", TypeDescription.createInt())
+        .addField("�\u0153", TypeDescription.createInt())
+        .addField("1234567890_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", TypeDescription.createInt())
+        .addField("'!@#$%^&*()-=_+", TypeDescription.createInt());
+    assertEquals("struct<`foo bar`:int,```some``thing```:int,`�\u0153`:int," +
+        "1234567890_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ:int," +
+        "`'!@#$%^&*()-=_+`:int>", type.toString());
+  }
+
+  @Test
   public void testParserSimple() {
     TypeDescription expected = TypeDescription.createStruct()
         .addField("b1", TypeDescription.createBinary())
@@ -117,6 +132,39 @@ public class TestTypeDescription {
   }
 
   @Test
+  public void testSpecialFieldNameParser() {
+    TypeDescription type = TypeDescription.fromString("struct<`foo bar`:int," +
+        "```quotes```:double,`abc``def````ghi`:float>");
+    assertEquals(TypeDescription.Category.STRUCT, type.getCategory());
+    List<String> fields = type.getFieldNames();
+    assertEquals(3, fields.size());
+    assertEquals("foo bar", fields.get(0));
+    assertEquals("`quotes`", fields.get(1));
+    assertEquals("abc`def``ghi", fields.get(2));
+  }
+
+  @Test
+  public void testMissingField() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Missing name at 'struct<^'");
+    TypeDescription.fromString("struct<");
+  }
+
+  @Test
+  public void testQuotedField1() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Unmatched quote at 'struct<^`abc'");
+    TypeDescription.fromString("struct<`abc");
+  }
+
+  @Test
+  public void testQuotedField2() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Empty quoted field name at 'struct<``^:int>'");
+    TypeDescription.fromString("struct<``:int>");
+  }
+
+  @Test
   public void testParserUnknownCategory() {
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage("Can't parse category at 'FOOBAR^'");