You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2018/03/02 18:39:34 UTC
orc git commit: ORC-308. Add function to get subtypes by name.
Repository: orc
Updated Branches:
refs/heads/master 411e633a2 -> 51b6b6ce3
ORC-308. Add function to get subtypes by name.
Fixes #221
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/51b6b6ce
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/51b6b6ce
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/51b6b6ce
Branch: refs/heads/master
Commit: 51b6b6ce3c63e3da326978fa016f72247b9f6c0d
Parents: 411e633
Author: Owen O'Malley <om...@apache.org>
Authored: Tue Feb 27 16:02:16 2018 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Fri Mar 2 10:37:11 2018 -0800
----------------------------------------------------------------------
.../java/org/apache/orc/TypeDescription.java | 123 ++++++++++++++++++-
.../org/apache/orc/TestTypeDescription.java | 83 +++++++++++++
2 files changed, 205 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/51b6b6ce/java/core/src/java/org/apache/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/TypeDescription.java b/java/core/src/java/org/apache/orc/TypeDescription.java
index 7f4d241..86d88ff 100644
--- a/java/core/src/java/org/apache/orc/TypeDescription.java
+++ b/java/core/src/java/org/apache/orc/TypeDescription.java
@@ -275,7 +275,7 @@ public class TypeDescription
} else {
while (source.position < source.length) {
char ch = source.value.charAt(source.position);
- if (!Character.isLetterOrDigit(ch) && ch != '.' && ch != '_') {
+ if (!Character.isLetterOrDigit(ch) && ch != '_') {
break;
}
source.position += 1;
@@ -925,4 +925,125 @@ public class TypeDescription
return prev.findSubtype(goal);
}
}
+
+ /**
+ * Split a compound name into parts separated by '.'.
+ * @param source the string to parse into simple names
+ * @return a list of simple names from the source
+ */
+ private static List<String> splitName(StringPosition source) {
+ List<String> result = new ArrayList<>();
+ do {
+ result.add(parseName(source));
+ } while (consumeChar(source, '.'));
+ return result;
+ }
+
+ private static final Pattern INTEGER_PATTERN = Pattern.compile("^[0-9]+$");
+
+ private TypeDescription findSubtype(StringPosition source) {
+ List<String> names = splitName(source);
+ if (names.size() == 1 && INTEGER_PATTERN.matcher(names.get(0)).matches()) {
+ return findSubtype(Integer.parseInt(names.get(0)));
+ }
+ TypeDescription current = this;
+ while (names.size() > 0) {
+ String first = names.remove(0);
+ switch (current.category) {
+ case STRUCT: {
+ int posn = current.fieldNames.indexOf(first);
+ if (posn == -1) {
+ throw new IllegalArgumentException("Field " + first +
+ " not found in " + current.toString());
+ }
+ current = current.children.get(posn);
+ break;
+ }
+ case LIST:
+ if (first.equals("_elem")) {
+ current = current.getChildren().get(0);
+ } else {
+ throw new IllegalArgumentException("Field " + first +
+ "not found in " + current.toString());
+ }
+ break;
+ case MAP:
+ if (first.equals("_key")) {
+ current = current.getChildren().get(0);
+ } else if (first.equals("_value")) {
+ current = current.getChildren().get(1);
+ } else {
+ throw new IllegalArgumentException("Field " + first +
+ "not found in " + current.toString());
+ }
+ break;
+ case UNION: {
+ try {
+ int posn = Integer.parseInt(first);
+ if (posn < 0 || posn >= current.getChildren().size()) {
+ throw new NumberFormatException("off end of union");
+ }
+ current = current.getChildren().get(posn);
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Field " + first +
+ "not found in " + current.toString(), e);
+ }
+ break;
+ }
+ default:
+ throw new IllegalArgumentException("Field " + first +
+ "not found in " + current.toString());
+ }
+ }
+ return current;
+ }
+
+ /**
+ * Find a subtype of this schema by name.
+ * If the name is a simple integer, it will be used as a column number.
+ * Otherwise, this routine will recursively search for the name.
+ * <ul>
+ * <li>Struct fields are selected by name.</li>
+ * <li>List children are selected by "_elem".</li>
+ * <li>Map children are selected by "_key" or "_value".</li>
+ * <li>Union children are selected by number starting at 0.</li>
+ * </ul>
+ * Names are separated by '.'.
+ * @param columnName the name to search for
+ * @return the subtype
+ */
+ public TypeDescription findSubtype(String columnName) {
+ StringPosition source = new StringPosition(columnName);
+ TypeDescription result = findSubtype(source);
+ if (source.position != source.length) {
+ throw new IllegalArgumentException("Remaining text in parsing field name "
+ + source);
+ }
+ return result;
+ }
+
+ /**
+ * Find a list of subtypes from a string, including the empty list.
+ *
+ * Each column name is separated by ','.
+ * @param columnNameList the list of column names
+ * @return the list of subtypes that correspond to the column names
+ */
+ public List<TypeDescription> findSubtypes(String columnNameList) {
+ StringPosition source = new StringPosition(columnNameList);
+ List<TypeDescription> result = new ArrayList<>();
+ boolean needComma = false;
+ while (source.position != source.length) {
+ if (needComma) {
+ if (!consumeChar(source, ',')) {
+ throw new IllegalArgumentException("Comma expected in list of column"
+ + " names at " + source);
+ }
+ } else {
+ needComma = true;
+ }
+ result.add(findSubtype(source));
+ }
+ return result;
+ }
}
http://git-wip-us.apache.org/repos/asf/orc/blob/51b6b6ce/java/core/src/test/org/apache/orc/TestTypeDescription.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestTypeDescription.java b/java/core/src/test/org/apache/orc/TestTypeDescription.java
index 404c9d0..cb2e8d7 100644
--- a/java/core/src/test/org/apache/orc/TestTypeDescription.java
+++ b/java/core/src/test/org/apache/orc/TestTypeDescription.java
@@ -18,6 +18,7 @@
package org.apache.orc;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
import org.junit.Rule;
import org.junit.Test;
@@ -220,4 +221,86 @@ public class TestTypeDescription {
assertEquals(0, type.getId());
assertEquals(2, leaf.getId());
}
+
+ @Test
+ public void testFindSubtype() {
+ TypeDescription type = TypeDescription.fromString(
+ "struct<a:int," +
+ "b:struct<c:array<int>,d:map<string,struct<e:string>>>," +
+ "f:string," +
+ "g:uniontype<string,int>>");
+ assertEquals(0, type.findSubtype("0").getId());
+ assertEquals(1, type.findSubtype("a").getId());
+ assertEquals(2, type.findSubtype("b").getId());
+ assertEquals(3, type.findSubtype("b.c").getId());
+ assertEquals(4, type.findSubtype("b.c._elem").getId());
+ assertEquals(5, type.findSubtype("b.d").getId());
+ assertEquals(6, type.findSubtype("b.d._key").getId());
+ assertEquals(7, type.findSubtype("b.d._value").getId());
+ assertEquals(8, type.findSubtype("b.d._value.e").getId());
+ assertEquals(9, type.findSubtype("f").getId());
+ assertEquals(10, type.findSubtype("g").getId());
+ assertEquals(11, type.findSubtype("g.0").getId());
+ assertEquals(12, type.findSubtype("g.1").getId());
+ }
+
+ @Test
+ public void testBadFindSubtype() {
+ TypeDescription type = TypeDescription.fromString(
+ "struct<a:int," +
+ "b:struct<c:array<int>,d:map<string,struct<e:string>>>," +
+ "f:string," +
+ "g:uniontype<string,int>>");
+ try {
+ type.findSubtype("13");
+ assertTrue(false);
+ } catch (IllegalArgumentException e) {
+ // PASS
+ }
+ try {
+ type.findSubtype("aa");
+ assertTrue(false);
+ } catch (IllegalArgumentException e) {
+ // PASS
+ }
+ try {
+ type.findSubtype("b.a");
+ assertTrue(false);
+ } catch (IllegalArgumentException e) {
+ // PASS
+ }
+ try {
+ type.findSubtype("g.2");
+ assertTrue(false);
+ } catch (IllegalArgumentException e) {
+ // PASS
+ }
+ try {
+ type.findSubtype("b.c.d");
+ assertTrue(false);
+ } catch (IllegalArgumentException e) {
+ // PASS
+ }
+ }
+
+ @Test
+ public void testFindSubtypes() {
+ TypeDescription type = TypeDescription.fromString(
+ "struct<a:int," +
+ "b:struct<c:array<int>,d:map<string,struct<e:string>>>," +
+ "f:string," +
+ "g:uniontype<string,int>>");
+ List<TypeDescription> results = type.findSubtypes("a");
+ assertEquals(1, results.size());
+ assertEquals(1, results.get(0).getId());
+
+ results = type.findSubtypes("b.d._value.e,3,g.0");
+ assertEquals(3, results.size());
+ assertEquals(8, results.get(0).getId());
+ assertEquals(3, results.get(1).getId());
+ assertEquals(11, results.get(2).getId());
+
+ results = type.findSubtypes("");
+ assertEquals(0, results.size());
+ }
}