You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/11/20 02:02:16 UTC
orc git commit: ORC-104. Fix TypeDescription to properly handle
quoted field names. (omalley)
Repository: orc
Updated Branches:
refs/heads/master 137aa7221 -> fb8aec28d
ORC-104. Fix TypeDescription to properly handle quoted field names.
(omalley)
Fixes #63
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/fb8aec28
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/fb8aec28
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/fb8aec28
Branch: refs/heads/master
Commit: fb8aec28d834c3928617ba808acc8c4b606b91ee
Parents: 137aa72
Author: Owen O'Malley <om...@apache.org>
Authored: Mon Oct 10 13:44:41 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Sat Nov 19 17:56:33 2016 -0800
----------------------------------------------------------------------
.../java/org/apache/orc/TypeDescription.java | 66 ++++++++++++++++----
.../org/apache/orc/TestTypeDescription.java | 48 ++++++++++++++
2 files changed, 103 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/fb8aec28/java/core/src/java/org/apache/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/TypeDescription.java b/java/core/src/java/org/apache/orc/TypeDescription.java
index bc6787d..69d46b0 100644
--- a/java/core/src/java/org/apache/orc/TypeDescription.java
+++ b/java/core/src/java/org/apache/orc/TypeDescription.java
@@ -34,6 +34,7 @@ import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.regex.Pattern;
/**
* This is the description of the types in an ORC file.
@@ -45,6 +46,7 @@ public class TypeDescription
private static final int DEFAULT_PRECISION = 38;
private static final int DEFAULT_SCALE = 10;
private static final int DEFAULT_LENGTH = 256;
+ private static final Pattern UNQUOTED_NAMES = Pattern.compile("^\\w+$");
@Override
public int compareTo(TypeDescription other) {
@@ -239,18 +241,50 @@ public class TypeDescription
}
static String parseName(StringPosition source) {
- int start = source.position;
- while (source.position < source.length) {
- char ch = source.value.charAt(source.position);
- if (!Character.isLetterOrDigit(ch) && ch != '.' && ch != '_') {
- break;
- }
- source.position += 1;
- }
- if (source.position == start) {
+ if (source.position == source.length) {
throw new IllegalArgumentException("Missing name at " + source);
}
- return source.value.substring(start, source.position);
+ final int start = source.position;
+ if (source.value.charAt(source.position) == '`') {
+ source.position += 1;
+ StringBuilder buffer = new StringBuilder();
+ boolean closed = false;
+ while (source.position < source.length) {
+ char ch = source.value.charAt(source.position);
+ source.position += 1;
+ if (ch == '`') {
+ if (source.position < source.length &&
+ source.value.charAt(source.position) == '`') {
+ source.position += 1;
+ buffer.append('`');
+ } else {
+ closed = true;
+ break;
+ }
+ } else {
+ buffer.append(ch);
+ }
+ }
+ if (!closed) {
+ source.position = start;
+ throw new IllegalArgumentException("Unmatched quote at " + source);
+ } else if (buffer.length() == 0) {
+ throw new IllegalArgumentException("Empty quoted field name at " + source);
+ }
+ return buffer.toString();
+ } else {
+ while (source.position < source.length) {
+ char ch = source.value.charAt(source.position);
+ if (!Character.isLetterOrDigit(ch) && ch != '.' && ch != '_') {
+ break;
+ }
+ source.position += 1;
+ }
+ if (source.position == start) {
+ throw new IllegalArgumentException("Missing name at " + source);
+ }
+ return source.value.substring(start, source.position);
+ }
}
static void requireChar(StringPosition source, char required) {
@@ -731,6 +765,16 @@ public class TypeDescription
private int precision = DEFAULT_PRECISION;
private int scale = DEFAULT_SCALE;
+ static void printFieldName(StringBuilder buffer, String name) {
+ if (UNQUOTED_NAMES.matcher(name).matches()) {
+ buffer.append(name);
+ } else {
+ buffer.append('`');
+ buffer.append(name.replace("`", "``"));
+ buffer.append('`');
+ }
+ }
+
public void printToBuffer(StringBuilder buffer) {
buffer.append(category.name);
switch (category) {
@@ -765,7 +809,7 @@ public class TypeDescription
if (i != 0) {
buffer.append(',');
}
- buffer.append(fieldNames.get(i));
+ printFieldName(buffer, fieldNames.get(i));
buffer.append(':');
children.get(i).printToBuffer(buffer);
}
http://git-wip-us.apache.org/repos/asf/orc/blob/fb8aec28/java/core/src/test/org/apache/orc/TestTypeDescription.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestTypeDescription.java b/java/core/src/test/org/apache/orc/TestTypeDescription.java
index 1f6f254..c5944af 100644
--- a/java/core/src/test/org/apache/orc/TestTypeDescription.java
+++ b/java/core/src/test/org/apache/orc/TestTypeDescription.java
@@ -23,6 +23,8 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
+import java.util.List;
+
public class TestTypeDescription {
@Rule
public ExpectedException thrown= ExpectedException.none();
@@ -70,6 +72,19 @@ public class TestTypeDescription {
}
@Test
+ public void testSpecialFieldNames() {
+ TypeDescription type = TypeDescription.createStruct()
+ .addField("foo bar", TypeDescription.createInt())
+ .addField("`some`thing`", TypeDescription.createInt())
+ .addField("�\u0153", TypeDescription.createInt())
+ .addField("1234567890_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", TypeDescription.createInt())
+ .addField("'!@#$%^&*()-=_+", TypeDescription.createInt());
+ assertEquals("struct<`foo bar`:int,```some``thing```:int,`�\u0153`:int," +
+ "1234567890_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ:int," +
+ "`'!@#$%^&*()-=_+`:int>", type.toString());
+ }
+
+ @Test
public void testParserSimple() {
TypeDescription expected = TypeDescription.createStruct()
.addField("b1", TypeDescription.createBinary())
@@ -117,6 +132,39 @@ public class TestTypeDescription {
}
@Test
+ public void testSpecialFieldNameParser() {
+ TypeDescription type = TypeDescription.fromString("struct<`foo bar`:int," +
+ "```quotes```:double,`abc``def````ghi`:float>");
+ assertEquals(TypeDescription.Category.STRUCT, type.getCategory());
+ List<String> fields = type.getFieldNames();
+ assertEquals(3, fields.size());
+ assertEquals("foo bar", fields.get(0));
+ assertEquals("`quotes`", fields.get(1));
+ assertEquals("abc`def``ghi", fields.get(2));
+ }
+
+ @Test
+ public void testMissingField() {
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Missing name at 'struct<^'");
+ TypeDescription.fromString("struct<");
+ }
+
+ @Test
+ public void testQuotedField1() {
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Unmatched quote at 'struct<^`abc'");
+ TypeDescription.fromString("struct<`abc");
+ }
+
+ @Test
+ public void testQuotedField2() {
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Empty quoted field name at 'struct<``^:int>'");
+ TypeDescription.fromString("struct<``:int>");
+ }
+
+ @Test
public void testParserUnknownCategory() {
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("Can't parse category at 'FOOBAR^'");