You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vi...@apache.org on 2018/03/12 03:52:24 UTC
[3/5] hive git commit: HIVE-17580 : Remove dependency of
get_fields_with_environment_context API to serde (Vihang Karajgaonkar,
reviewed by Alan Gates)
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java
index f3b19f0..f47aeb9 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java
@@ -25,14 +25,11 @@ import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.HashMap;
-import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -47,7 +44,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoParser.PrimitiveParts;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -240,15 +237,6 @@ public final class TypeInfoUtils {
}
}
- public static String getBaseName(String typeName) {
- int idx = typeName.indexOf('(');
- if (idx == -1) {
- return typeName;
- } else {
- return typeName.substring(0, idx);
- }
- }
-
/**
* returns true if both TypeInfos are of primitive type, and the primitive category matches.
* @param ti1
@@ -256,7 +244,8 @@ public final class TypeInfoUtils {
* @return
*/
public static boolean doPrimitiveCategoriesMatch(TypeInfo ti1, TypeInfo ti2) {
- if (ti1.getCategory() == Category.PRIMITIVE && ti2.getCategory() == Category.PRIMITIVE) {
+ if (ti1.getCategory() == ObjectInspector.Category.PRIMITIVE.toMetastoreTypeCategory()
+ && ti2.getCategory() == ObjectInspector.Category.PRIMITIVE.toMetastoreTypeCategory()) {
if (((PrimitiveTypeInfo)ti1).getPrimitiveCategory()
== ((PrimitiveTypeInfo)ti2).getPrimitiveCategory()) {
return true;
@@ -266,328 +255,10 @@ public final class TypeInfoUtils {
}
/**
- * Parse a recursive TypeInfo list String. For example, the following inputs
- * are valid inputs:
- * "int,string,map<string,int>,list<map<int,list<string>>>,list<struct<a:int,b:string>>"
- * The separators between TypeInfos can be ",", ":", or ";".
- *
- * In order to use this class: TypeInfoParser parser = new
- * TypeInfoParser("int,string"); ArrayList<TypeInfo> typeInfos =
- * parser.parseTypeInfos();
- */
- private static class TypeInfoParser {
-
- private static class Token {
- public int position;
- public String text;
- public boolean isType;
-
- @Override
- public String toString() {
- return "" + position + ":" + text;
- }
- };
-
- private static boolean isTypeChar(char c) {
- return Character.isLetterOrDigit(c) || c == '_' || c == '.' || c == ' ' || c == '$';
- }
-
- /**
- * Tokenize the typeInfoString. The rule is simple: all consecutive
- * alphadigits and '_', '.' are in one token, and all other characters are
- * one character per token.
- *
- * tokenize("map<int,string>") should return
- * ["map","<","int",",","string",">"]
- *
- * Note that we add '$' in new Calcite return path. As '$' will not appear
- * in any type in Hive, it is safe to do so.
- */
- private static ArrayList<Token> tokenize(String typeInfoString) {
- ArrayList<Token> tokens = new ArrayList<Token>(0);
- int begin = 0;
- int end = 1;
- while (end <= typeInfoString.length()) {
- // last character ends a token?
- // if there are quotes, all the text between the quotes
- // is considered a single token (this can happen for
- // timestamp with local time-zone)
- if (begin > 0 &&
- typeInfoString.charAt(begin - 1) == '(' &&
- typeInfoString.charAt(begin) == '\'') {
- // Ignore starting quote
- begin++;
- do {
- end++;
- } while (typeInfoString.charAt(end) != '\'');
- } else if (typeInfoString.charAt(begin) == '\'' &&
- typeInfoString.charAt(begin + 1) == ')') {
- // Ignore closing quote
- begin++;
- end++;
- }
- if (end == typeInfoString.length()
- || !isTypeChar(typeInfoString.charAt(end - 1))
- || !isTypeChar(typeInfoString.charAt(end))) {
- Token t = new Token();
- t.position = begin;
- t.text = typeInfoString.substring(begin, end);
- t.isType = isTypeChar(typeInfoString.charAt(begin));
- tokens.add(t);
- begin = end;
- }
- end++;
- }
- return tokens;
- }
-
- public TypeInfoParser(String typeInfoString) {
- this.typeInfoString = typeInfoString;
- typeInfoTokens = tokenize(typeInfoString);
- }
-
- private final String typeInfoString;
- private final ArrayList<Token> typeInfoTokens;
- private ArrayList<TypeInfo> typeInfos;
- private int iToken;
-
- public ArrayList<TypeInfo> parseTypeInfos() {
- typeInfos = new ArrayList<TypeInfo>();
- iToken = 0;
- while (iToken < typeInfoTokens.size()) {
- typeInfos.add(parseType());
- if (iToken < typeInfoTokens.size()) {
- Token separator = typeInfoTokens.get(iToken);
- if (",".equals(separator.text) || ";".equals(separator.text)
- || ":".equals(separator.text)) {
- iToken++;
- } else {
- throw new IllegalArgumentException(
- "Error: ',', ':', or ';' expected at position "
- + separator.position + " from '" + typeInfoString + "' "
- + typeInfoTokens);
- }
- }
- }
- return typeInfos;
- }
-
- private Token peek() {
- if (iToken < typeInfoTokens.size()) {
- return typeInfoTokens.get(iToken);
- } else {
- return null;
- }
- }
-
- private Token expect(String item) {
- return expect(item, null);
- }
-
- private Token expect(String item, String alternative) {
- if (iToken >= typeInfoTokens.size()) {
- throw new IllegalArgumentException("Error: " + item
- + " expected at the end of '" + typeInfoString + "'");
- }
- Token t = typeInfoTokens.get(iToken);
- if (item.equals("type")) {
- if (!serdeConstants.LIST_TYPE_NAME.equals(t.text)
- && !serdeConstants.MAP_TYPE_NAME.equals(t.text)
- && !serdeConstants.STRUCT_TYPE_NAME.equals(t.text)
- && !serdeConstants.UNION_TYPE_NAME.equals(t.text)
- && null == PrimitiveObjectInspectorUtils
- .getTypeEntryFromTypeName(t.text)
- && !t.text.equals(alternative)) {
- throw new IllegalArgumentException("Error: " + item
- + " expected at the position " + t.position + " of '"
- + typeInfoString + "' but '" + t.text + "' is found.");
- }
- } else if (item.equals("name")) {
- if (!t.isType && !t.text.equals(alternative)) {
- throw new IllegalArgumentException("Error: " + item
- + " expected at the position " + t.position + " of '"
- + typeInfoString + "' but '" + t.text + "' is found.");
- }
- } else {
- if (!item.equals(t.text) && !t.text.equals(alternative)) {
- throw new IllegalArgumentException("Error: " + item
- + " expected at the position " + t.position + " of '"
- + typeInfoString + "' but '" + t.text + "' is found.");
- }
- }
- iToken++;
- return t;
- }
-
- private String[] parseParams() {
- List<String> params = new LinkedList<String>();
-
- Token t = peek();
- if (t != null && t.text.equals("(")) {
- expect("(");
-
- // checking for null in the for-loop condition prevents null-ptr exception
- // and allows us to fail more gracefully with a parsing error.
- for(t = peek(); (t == null) || !t.text.equals(")"); t = expect(",",")")) {
- params.add(expect("name").text);
- }
- if (params.size() == 0) {
- throw new IllegalArgumentException(
- "type parameters expected for type string " + typeInfoString);
- }
- }
-
- return params.toArray(new String[params.size()]);
- }
-
- private TypeInfo parseType() {
-
- Token t = expect("type");
-
- // Is this a primitive type?
- PrimitiveTypeEntry typeEntry =
- PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(t.text);
- if (typeEntry != null && typeEntry.primitiveCategory != PrimitiveCategory.UNKNOWN ) {
- String[] params = parseParams();
- switch (typeEntry.primitiveCategory) {
- case CHAR:
- case VARCHAR:
- if (params == null || params.length == 0) {
- throw new IllegalArgumentException(typeEntry.typeName
- + " type is specified without length: " + typeInfoString);
- }
-
- int length = 1;
- if (params.length == 1) {
- length = Integer.parseInt(params[0]);
- if (typeEntry.primitiveCategory == PrimitiveCategory.VARCHAR) {
- BaseCharUtils.validateVarcharParameter(length);
- return TypeInfoFactory.getVarcharTypeInfo(length);
- } else {
- BaseCharUtils.validateCharParameter(length);
- return TypeInfoFactory.getCharTypeInfo(length);
- }
- } else if (params.length > 1) {
- throw new IllegalArgumentException(
- "Type " + typeEntry.typeName+ " only takes one parameter, but " +
- params.length + " is seen");
- }
-
- case DECIMAL:
- int precision = HiveDecimal.USER_DEFAULT_PRECISION;
- int scale = HiveDecimal.USER_DEFAULT_SCALE;
- if (params == null || params.length == 0) {
- // It's possible that old metadata still refers to "decimal" as a column type w/o
- // precision/scale. In this case, the default (10,0) is assumed. Thus, do nothing here.
- } else if (params.length == 1) {
- // only precision is specified
- precision = Integer.valueOf(params[0]);
- HiveDecimalUtils.validateParameter(precision, scale);
- } else if (params.length == 2) {
- // New metadata always have two parameters.
- precision = Integer.parseInt(params[0]);
- scale = Integer.parseInt(params[1]);
- HiveDecimalUtils.validateParameter(precision, scale);
- } else if (params.length > 2) {
- throw new IllegalArgumentException("Type decimal only takes two parameter, but " +
- params.length + " is seen");
- }
- return TypeInfoFactory.getDecimalTypeInfo(precision, scale);
-
- default:
- return TypeInfoFactory.getPrimitiveTypeInfo(typeEntry.typeName);
- }
- }
-
- // Is this a list type?
- if (serdeConstants.LIST_TYPE_NAME.equals(t.text)) {
- expect("<");
- TypeInfo listElementType = parseType();
- expect(">");
- return TypeInfoFactory.getListTypeInfo(listElementType);
- }
-
- // Is this a map type?
- if (serdeConstants.MAP_TYPE_NAME.equals(t.text)) {
- expect("<");
- TypeInfo mapKeyType = parseType();
- expect(",");
- TypeInfo mapValueType = parseType();
- expect(">");
- return TypeInfoFactory.getMapTypeInfo(mapKeyType, mapValueType);
- }
-
- // Is this a struct type?
- if (serdeConstants.STRUCT_TYPE_NAME.equals(t.text)) {
- ArrayList<String> fieldNames = new ArrayList<String>();
- ArrayList<TypeInfo> fieldTypeInfos = new ArrayList<TypeInfo>();
- boolean first = true;
- do {
- if (first) {
- expect("<");
- first = false;
- } else {
- Token separator = expect(">", ",");
- if (separator.text.equals(">")) {
- // end of struct
- break;
- }
- }
- Token name = expect("name",">");
- if (name.text.equals(">")) {
- break;
- }
- fieldNames.add(name.text);
- expect(":");
- fieldTypeInfos.add(parseType());
- } while (true);
-
- return TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos);
- }
- // Is this a union type?
- if (serdeConstants.UNION_TYPE_NAME.equals(t.text)) {
- List<TypeInfo> objectTypeInfos = new ArrayList<TypeInfo>();
- boolean first = true;
- do {
- if (first) {
- expect("<");
- first = false;
- } else {
- Token separator = expect(">", ",");
- if (separator.text.equals(">")) {
- // end of union
- break;
- }
- }
- objectTypeInfos.add(parseType());
- } while (true);
-
- return TypeInfoFactory.getUnionTypeInfo(objectTypeInfos);
- }
-
- throw new RuntimeException("Internal error parsing position "
- + t.position + " of '" + typeInfoString + "'");
- }
-
- public PrimitiveParts parsePrimitiveParts() {
- PrimitiveParts parts = new PrimitiveParts();
- Token t = expect("type");
- parts.typeName = t.text;
- parts.typeParams = parseParams();
- return parts;
- }
- }
-
- public static class PrimitiveParts {
- public String typeName;
- public String[] typeParams;
- }
-
- /**
* Make some of the TypeInfo parsing available as a utility.
*/
public static PrimitiveParts parsePrimitiveParts(String typeInfoString) {
- TypeInfoParser parser = new TypeInfoParser(typeInfoString);
+ TypeInfoParser parser = new TypeInfoParser(typeInfoString, TypeInfoFactory.getInstance());
return parser.parsePrimitiveParts();
}
@@ -844,7 +515,7 @@ public final class TypeInfoUtils {
}
public static ArrayList<TypeInfo> getTypeInfosFromTypeString(String typeString) {
- TypeInfoParser parser = new TypeInfoParser(typeString);
+ TypeInfoParser parser = new TypeInfoParser(typeString, TypeInfoFactory.getInstance());
return parser.parseTypeInfos();
}
@@ -861,7 +532,7 @@ public final class TypeInfoUtils {
}
public static TypeInfo getTypeInfoFromTypeString(String typeString) {
- TypeInfoParser parser = new TypeInfoParser(typeString);
+ TypeInfoParser parser = new TypeInfoParser(typeString, TypeInfoFactory.getInstance());
return parser.parseTypeInfos().get(0);
}
@@ -972,7 +643,8 @@ public final class TypeInfoUtils {
// Reimplemented to use PrimitiveCategory rather than TypeInfo, because
// 2 TypeInfos from the same qualified type (varchar, decimal) should still be
// seen as equivalent.
- if (from.getCategory() == Category.PRIMITIVE && to.getCategory() == Category.PRIMITIVE) {
+ if (from.getCategory() == Category.PRIMITIVE.toMetastoreTypeCategory()
+ && to.getCategory() == Category.PRIMITIVE.toMetastoreTypeCategory()) {
return implicitConvertible(
((PrimitiveTypeInfo) from).getPrimitiveCategory(),
((PrimitiveTypeInfo) to).getPrimitiveCategory());
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/UnionTypeInfo.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/UnionTypeInfo.java b/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/UnionTypeInfo.java
deleted file mode 100644
index 842997c..0000000
--- a/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/UnionTypeInfo.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.serde2.typeinfo;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.hive.common.classification.InterfaceAudience;
-import org.apache.hadoop.hive.common.classification.InterfaceStability;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-
-/**
- * UnionTypeInfo represents the TypeInfo of an union. A union holds only one
- * field of the specified fields at any point of time. The fields, a Union can
- * hold, can have the same or different TypeInfo.
- *
- * Always use the TypeInfoFactory to create new TypeInfo objects, instead of
- * directly creating an instance of this class.
- */
-@InterfaceAudience.Public
-@InterfaceStability.Stable
-public class UnionTypeInfo extends TypeInfo implements Serializable {
-
- private static final long serialVersionUID = 1L;
-
- private List<TypeInfo> allUnionObjectTypeInfos;
-
- /**
- * For java serialization use only.
- */
- public UnionTypeInfo() {
- }
-
- @Override
- public String getTypeName() {
- StringBuilder sb = new StringBuilder();
- sb.append(serdeConstants.UNION_TYPE_NAME + "<");
- for (int i = 0; i < allUnionObjectTypeInfos.size(); i++) {
- if (i > 0) {
- sb.append(",");
- }
- sb.append(allUnionObjectTypeInfos.get(i).getTypeName());
- }
- sb.append(">");
- return sb.toString();
- }
-
- /**
- * For java serialization use only.
- */
- public void setAllUnionObjectTypeInfos(
- List<TypeInfo> allUnionObjectTypeInfos) {
- this.allUnionObjectTypeInfos = allUnionObjectTypeInfos;
- }
-
- /**
- * For TypeInfoFactory use only.
- */
- UnionTypeInfo(List<TypeInfo> typeInfos) {
- allUnionObjectTypeInfos = new ArrayList<TypeInfo>();
- allUnionObjectTypeInfos.addAll(typeInfos);
- }
-
- @Override
- public Category getCategory() {
- return Category.UNION;
- }
-
- public List<TypeInfo> getAllUnionObjectTypeInfos() {
- return allUnionObjectTypeInfos;
- }
-
- @Override
- public boolean equals(Object other) {
- if (this == other) {
- return true;
- }
- if (!(other instanceof UnionTypeInfo)) {
- return false;
- }
- UnionTypeInfo o = (UnionTypeInfo) other;
-
- // Compare the field types
- return o.getAllUnionObjectTypeInfos().equals(getAllUnionObjectTypeInfos());
- }
-
- @Override
- public int hashCode() {
- return allUnionObjectTypeInfos.hashCode();
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeInfo.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeInfo.java b/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeInfo.java
index edf12a2..a6c248a 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeInfo.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeInfo.java
@@ -51,7 +51,6 @@ public class VarcharTypeInfo extends BaseCharTypeInfo {
return this.getLength() == pti.getLength();
}
-
/**
* Generate the hashCode for this TypeInfo.
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java b/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java
index 749d8ac..655d768 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/SerdeRandomRowSource.java
@@ -435,7 +435,7 @@ public class SerdeRandomRowSource {
}
typeInfos[c] = typeInfo;
- final Category category = typeInfo.getCategory();
+ final Category category = Category.fromMetastoreTypeCategory(typeInfo.getCategory());
categories[c] = category;
ObjectInspector objectInspector = getObjectInspector(typeInfo);
switch (category) {
@@ -640,7 +640,7 @@ public class SerdeRandomRowSource {
elementObjectInspector);
boolean isStringFamily = false;
PrimitiveCategory primitiveCategory = null;
- if (elementTypeInfo.getCategory() == Category.PRIMITIVE) {
+ if (elementTypeInfo.getCategory() == Category.PRIMITIVE.toMetastoreTypeCategory()) {
primitiveCategory = ((PrimitiveTypeInfo) elementTypeInfo).getPrimitiveCategory();
if (primitiveCategory == PrimitiveCategory.STRING ||
primitiveCategory == PrimitiveCategory.BINARY ||
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java
index 3736a1f..6bcde38 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java
@@ -27,6 +27,7 @@ import java.util.List;
import org.apache.avro.Schema;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
@@ -403,7 +404,7 @@ public class TestAvroObjectInspectorGenerator {
// Column types
assertEquals(1, aoig.getColumnTypes().size());
TypeInfo typeInfo = aoig.getColumnTypes().get(0);
- assertEquals(ObjectInspector.Category.MAP, typeInfo.getCategory());
+ assertEquals(ObjectInspector.Category.MAP, Category.fromMetastoreTypeCategory(typeInfo.getCategory()));
assertTrue(typeInfo instanceof MapTypeInfo);
MapTypeInfo mapTypeInfo = (MapTypeInfo)typeInfo;
@@ -423,7 +424,7 @@ public class TestAvroObjectInspectorGenerator {
// Column types
assertEquals(1, aoig.getColumnTypes().size());
TypeInfo typeInfo = aoig.getColumnTypes().get(0);
- assertEquals(ObjectInspector.Category.LIST, typeInfo.getCategory());
+ assertEquals(ObjectInspector.Category.LIST, Category.fromMetastoreTypeCategory(typeInfo.getCategory()));
assertTrue(typeInfo instanceof ListTypeInfo);
ListTypeInfo listTypeInfo = (ListTypeInfo)typeInfo;
@@ -442,7 +443,7 @@ public class TestAvroObjectInspectorGenerator {
// Column types
assertEquals(1, aoig.getColumnTypes().size());
TypeInfo typeInfo = aoig.getColumnTypes().get(0);
- assertEquals(ObjectInspector.Category.STRUCT, typeInfo.getCategory());
+ assertEquals(ObjectInspector.Category.STRUCT, Category.fromMetastoreTypeCategory(typeInfo.getCategory()));
assertTrue(typeInfo instanceof StructTypeInfo);
StructTypeInfo structTypeInfo = (StructTypeInfo)typeInfo;
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestInstanceCache.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestInstanceCache.java b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestInstanceCache.java
deleted file mode 100644
index cb7c6ed..0000000
--- a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestInstanceCache.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.serde2.avro;
-
-import java.util.Set;
-import org.junit.Test;
-
-import static org.junit.Assert.assertSame;
-
-public class TestInstanceCache {
- private static class Foo {
-
- private int value = 42;
-
- @Override
- public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
-
- Foo foo = (Foo) o;
-
- return value == foo.value;
-
- }
-
- @Override
- public int hashCode() {
- return value;
- }
- }
-
- private static class Wrapper<T> {
- public final T wrapped;
-
- private Wrapper(T wrapped) {
- this.wrapped = wrapped;
- }
- }
-
- @Test
- public void instanceCachesOnlyCreateOneInstance() throws AvroSerdeException {
- InstanceCache<Foo, Wrapper<Foo>> ic = new InstanceCache<Foo, Wrapper<Foo>>() {
- @Override
- protected Wrapper makeInstance(Foo hv,
- Set<Foo> seenSchemas) {
- return new Wrapper(hv);
- }
- };
- Foo f1 = new Foo();
-
- Wrapper fc = ic.retrieve(f1, null);
- assertSame(f1, fc.wrapped); // Our original foo should be in the wrapper
-
- Foo f2 = new Foo(); // Different instance, same value
-
- Wrapper fc2 = ic.retrieve(f2, null);
- assertSame(fc2,fc); // Since equiv f, should get back first container
- assertSame(fc2.wrapped, f1);
- }
-
- @Test
- public void instanceCacheReturnsCorrectInstances() throws AvroSerdeException {
- InstanceCache<String, Wrapper<String>> ic = new InstanceCache<String, Wrapper<String>>() {
- @Override
- protected Wrapper<String> makeInstance(
- String hv, Set<String> seenSchemas) {
- return new Wrapper<String>(hv);
- }
- };
-
- Wrapper<String> one = ic.retrieve("one", null);
- Wrapper<String> two = ic.retrieve("two", null);
-
- Wrapper<String> anotherOne = ic.retrieve("one", null);
- assertSame(one, anotherOne);
-
- Wrapper<String> anotherTwo = ic.retrieve("two", null);
- assertSame(two, anotherTwo);
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestSchemaToTypeInfo.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestSchemaToTypeInfo.java b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestSchemaToTypeInfo.java
index af258c7..7a663c0 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestSchemaToTypeInfo.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestSchemaToTypeInfo.java
@@ -33,9 +33,9 @@ public class TestSchemaToTypeInfo {
@Test
public void testDisallowRecursiveSchema()
- throws AvroSerdeException {
+ throws Exception {
- expect.expect(AvroSerdeException.class);
+ expect.expect(Exception.class);
expect.expectMessage("Recursive schemas are not supported");
final String schemaString = "{\n"
@@ -49,6 +49,7 @@ public class TestSchemaToTypeInfo {
+ " } ]\n"
+ "}";
- List<TypeInfo> types = SchemaToTypeInfo.generateColumnTypes(new Schema.Parser().parse(schemaString));
+ List<TypeInfo> types = SchemaToHiveTypeInfo.getInstance()
+ .generateColumnTypes(new Schema.Parser().parse(schemaString));
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
index 62741d3..f1e3228 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
@@ -313,7 +313,7 @@ public class TestBinarySortableFast extends TestCase {
private void verifyRead(BinarySortableDeserializeRead binarySortableDeserializeRead,
TypeInfo typeInfo, Object expectedObject) throws IOException {
- if (typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) {
+ if (typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE.toMetastoreTypeCategory()) {
VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, typeInfo, expectedObject);
} else {
Object complexFieldObj = VerifyFast.deserializeReadComplexType(binarySortableDeserializeRead, typeInfo);
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
index fbb6040..97cf220 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
@@ -199,7 +199,7 @@ public class TestLazySimpleFast extends TestCase {
private void verifyReadNull(LazySimpleDeserializeRead lazySimpleDeserializeRead,
TypeInfo typeInfo) throws IOException {
- if (typeInfo.getCategory() == Category.PRIMITIVE) {
+ if (typeInfo.getCategory() == Category.PRIMITIVE.toMetastoreTypeCategory()) {
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, typeInfo, null);
} else {
Object complexFieldObj = VerifyFast.deserializeReadComplexType(lazySimpleDeserializeRead, typeInfo);
@@ -211,7 +211,7 @@ public class TestLazySimpleFast extends TestCase {
private void verifyRead(LazySimpleDeserializeRead lazySimpleDeserializeRead,
TypeInfo typeInfo, Object expectedObject) throws IOException {
- if (typeInfo.getCategory() == Category.PRIMITIVE) {
+ if (typeInfo.getCategory() == Category.PRIMITIVE.toMetastoreTypeCategory()) {
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, typeInfo, expectedObject);
} else {
Object complexFieldObj = VerifyFast.deserializeReadComplexType(lazySimpleDeserializeRead, typeInfo);
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
index e95c6eb..d42eda0 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
@@ -211,7 +211,7 @@ public class TestLazyBinaryFast extends TestCase {
private void verifyRead(LazyBinaryDeserializeRead lazyBinaryDeserializeRead,
TypeInfo typeInfo, Object expectedObject) throws IOException {
- if (typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) {
+ if (typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE.toMetastoreTypeCategory()) {
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, typeInfo, expectedObject);
} else {
Object complexFieldObj = VerifyFast.deserializeReadComplexType(lazyBinaryDeserializeRead, typeInfo);
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java
index 17b844c..269eb98 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java
@@ -458,7 +458,7 @@ public class TestStandardObjectInspectors extends TestCase {
assertEquals(unionTypeName.toString(), uoi1.getTypeName());
// TypeInfo
TypeInfo typeInfo1 = TypeInfoUtils.getTypeInfoFromObjectInspector(uoi1);
- assertEquals(Category.UNION, typeInfo1.getCategory());
+ assertEquals(Category.UNION, Category.fromMetastoreTypeCategory(typeInfo1.getCategory()));
assertEquals(UnionTypeInfo.class.getName(), typeInfo1.getClass().getName());
assertEquals(typeInfo1.getTypeName(), uoi1.getTypeName());
assertEquals(typeInfo1,
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/AvroStorageSchemaReader.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/AvroStorageSchemaReader.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/AvroStorageSchemaReader.java
new file mode 100644
index 0000000..c6c3029
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/AvroStorageSchemaReader.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.utils.AvroSchemaUtils;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Properties;
+
+public class AvroStorageSchemaReader implements StorageSchemaReader {
+ private static final Logger LOG = LoggerFactory.getLogger(AvroStorageSchemaReader.class);
+
+ @Override
+ public List<FieldSchema> readSchema(Table tbl, EnvironmentContext envContext,
+ Configuration conf) throws MetaException {
+ Properties tblMetadataProperties = MetaStoreUtils.getTableMetadata(tbl);
+ try {
+ return AvroSchemaUtils.getFieldsFromAvroSchema(conf, tblMetadataProperties);
+ } catch (Exception e) {
+ LOG.warn("Received IOException while reading avro schema for table " + tbl.getTableName(), e);
+ throw new MetaException(e.getMessage());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java
index d5dea4d..ab8590e 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java
@@ -72,6 +72,8 @@ public class ColumnType {
public static final String TIMESTAMPTZ_TYPE_NAME = "timestamp with time zone";
+ public static final String TIMESTAMPLOCALTZ_TYPE_NAME = "timestamp with local time zone";
+
public static final String LIST_TYPE_NAME = "array";
public static final String MAP_TYPE_NAME = "map";
@@ -105,7 +107,8 @@ public class ColumnType {
INTERVAL_DAY_TIME_TYPE_NAME,
DECIMAL_TYPE_NAME,
BINARY_TYPE_NAME,
- TIMESTAMPTZ_TYPE_NAME);
+ TIMESTAMPTZ_TYPE_NAME,
+ TIMESTAMPLOCALTZ_TYPE_NAME);
public static final Set<String> StringTypes = StringUtils.asSet(
STRING_TYPE_NAME,
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultStorageSchemaReader.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultStorageSchemaReader.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultStorageSchemaReader.java
index 1dbfa42..65c2af6 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultStorageSchemaReader.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultStorageSchemaReader.java
@@ -22,17 +22,121 @@ import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import org.apache.hadoop.hive.metastore.utils.AvroSchemaUtils;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.utils.StorageSchemaUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
+import java.util.Properties;
+
+import static org.apache.hadoop.hive.metastore.utils.AvroSchemaUtils.LIST_COLUMN_COMMENTS;
/**
* Default StorageSchemaReader. This just throws as the metastore currently doesn't know how to
* read schemas from storage.
*/
public class DefaultStorageSchemaReader implements StorageSchemaReader {
+ private final static Logger LOG = LoggerFactory.getLogger(DefaultStorageSchemaReader.class);
+
+ private static final String AVRO_SERIALIZATION_LIB =
+ "org.apache.hadoop.hive.serde2.avro.AvroSerDe";
+
@Override
public List<FieldSchema> readSchema(Table tbl, EnvironmentContext envContext,
- Configuration conf) throws MetaException {
- throw new UnsupportedOperationException("Storage schema reading not supported");
+ Configuration conf) throws MetaException {
+ String serializationLib = tbl.getSd().getSerdeInfo().getSerializationLib();
+ if (null == serializationLib || MetastoreConf
+ .getStringCollection(conf, MetastoreConf.ConfVars.SERDES_USING_METASTORE_FOR_SCHEMA)
+ .contains(serializationLib)) {
+ //safety check to make sure we should be using storage schema reader for this table
+ throw new MetaException(
+ "Invalid usage of default storage schema reader for table " + tbl.getTableName()
+ + " with storage descriptor " + tbl.getSd().getSerdeInfo().getSerializationLib());
+ }
+ Properties tblMetadataProperties = MetaStoreUtils.getTableMetadata(tbl);
+ if(AVRO_SERIALIZATION_LIB.equals(serializationLib)) {
+ //in case of avro table use AvroStorageSchemaReader utils
+ try {
+ return AvroSchemaUtils.getFieldsFromAvroSchema(conf, tblMetadataProperties);
+ } catch (Exception e) {
+ LOG.warn("Exception received while reading avro schema for table " + tbl.getTableName(), e);
+ throw new MetaException(e.getMessage());
+ }
+ } else {
+ return getFieldSchemasFromTableMetadata(tblMetadataProperties);
+ }
+ }
+
+ /**
+ * This method implements a generic way to get the FieldSchemas from the table metadata
+ * properties like column names and column types. Most of the serdes have the same implemention
+ * in their initialize method
+ * //TODO refactor the common code from the serdes and move it to serde-api so that there is no
+ * //duplicate code
+ *
+ * @return list of FieldSchema objects
+ */
+ public static List<FieldSchema> getFieldSchemasFromTableMetadata(
+ Properties tblMetadataProperties) {
+ List<String> columnNames = null;
+ List<TypeInfo> columnTypes = null;
+ // Get column names and types
+ String columnNameProperty = tblMetadataProperties.getProperty( ColumnType.LIST_COLUMNS);
+ String columnTypeProperty = tblMetadataProperties.getProperty( ColumnType.LIST_COLUMN_TYPES);
+ final String columnNameDelimiter = tblMetadataProperties
+ .containsKey( ColumnType.COLUMN_NAME_DELIMITER) ? tblMetadataProperties
+ .getProperty( ColumnType.COLUMN_NAME_DELIMITER) : String
+ .valueOf(StorageSchemaUtils.COMMA);
+ // all table column names
+ if (columnNameProperty.isEmpty()) {
+ columnNames = Collections.emptyList();
+ } else {
+ columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
+ }
+
+ // all column types
+ if (columnTypeProperty.isEmpty()) {
+ columnTypes = Collections.emptyList();
+ } else {
+ columnTypes = StorageSchemaUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ }
+
+ final String columnCommentProperty =
+ tblMetadataProperties.getProperty(LIST_COLUMN_COMMENTS, "");
+ List<String> columnComments = null;
+ if (columnCommentProperty == null || columnCommentProperty.isEmpty()) {
+ columnComments = new ArrayList<>(0);
+ } else {
+ columnComments = Arrays.asList(
+ columnCommentProperty.split(String.valueOf(ColumnType.COLUMN_COMMENTS_DELIMITER)));
+ }
+ LOG.debug("columns: {}, {}", columnNameProperty, columnNames);
+ LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes);
+ LOG.debug("comments: {} ", columnCommentProperty);
+ return getFieldSchemaFromColumnInfo(columnNames, columnTypes, columnComments);
+ }
+
+ private static List<FieldSchema> getFieldSchemaFromColumnInfo(List<String> columnNames,
+ List<TypeInfo> columnTypes, List<String> columnComments) {
+ int len = columnNames.size();
+ List<FieldSchema> fieldSchemas = new ArrayList<>(len);
+ for (int i = 0; i < len; i++) {
+ FieldSchema fieldSchema = new FieldSchema();
+ fieldSchema.setName(columnNames.get(i));
+ //In case of complex types getTypeName() will recusively go into typeName
+ //of individual fields when the ColumnType was constructed
+ //in SchemaToTypeInfo.generateColumnTypes in the constructor
+ fieldSchema.setType(columnTypes.get(i).getTypeName());
+ fieldSchema.setComment(StorageSchemaUtils.determineFieldComment(columnComments.get(i)));
+ }
+ return fieldSchemas;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/StorageSchemaReader.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/StorageSchemaReader.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/StorageSchemaReader.java
index 6251e23..009c929 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/StorageSchemaReader.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/StorageSchemaReader.java
@@ -32,7 +32,7 @@ import java.util.List;
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
-interface StorageSchemaReader {
+public interface StorageSchemaReader {
/**
* Read the schema from the storage representation of the table.
* @param tbl metastore table object
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/AvroFieldSchemaGenerator.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/AvroFieldSchemaGenerator.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/AvroFieldSchemaGenerator.java
new file mode 100644
index 0000000..b1261d8
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/AvroFieldSchemaGenerator.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.utils;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.serde2.avro.SchemaToMetastoreTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class AvroFieldSchemaGenerator {
+ final private List<String> columnNames;
+ final private List<TypeInfo> columnTypes;
+ final private List<String> columnComments;
+
+ public AvroFieldSchemaGenerator(Schema schema) throws Exception {
+ verifySchemaIsARecord(schema);
+
+ this.columnNames = generateColumnNames(schema);
+ this.columnTypes = SchemaToMetastoreTypeInfo.getInstance().generateColumnTypes(schema);
+ this.columnComments = generateColumnComments(schema);
+ assert columnNames.size() == columnTypes.size();
+ }
+
+ private static void verifySchemaIsARecord(Schema schema) throws Exception {
+ if(!schema.getType().equals(Schema.Type.RECORD)) {
+ throw new Exception("Schema for table must be of type RECORD. " +
+ "Received type: " + schema.getType());
+ }
+ }
+
+ private static List<String> generateColumnNames(Schema schema) {
+ List<Schema.Field> fields = schema.getFields();
+ List<String> fieldsList = new ArrayList<String>(fields.size());
+
+ for (Schema.Field field : fields) {
+ fieldsList.add(field.name());
+ }
+
+ return fieldsList;
+ }
+
+ private static List<String> generateColumnComments(Schema schema) {
+ List<Schema.Field> fields = schema.getFields();
+ List<String> fieldComments = new ArrayList<String>(fields.size());
+
+ for (Schema.Field field : fields) {
+ String fieldComment = field.doc() == null ? "" : field.doc();
+ fieldComments.add(fieldComment);
+ }
+
+ return fieldComments;
+ }
+
+ public List<FieldSchema> getFieldSchemas() throws Exception {
+ int len = columnNames.size();
+ List<FieldSchema> fieldSchemas = new ArrayList<>(len);
+ for(int i = 0; i<len; i++) {
+ FieldSchema fieldSchema = new FieldSchema();
+ fieldSchema.setName(columnNames.get(i));
+ TypeInfo columnType = columnTypes.get(i);
+ if(!AvroSchemaUtils.supportedCategories(columnType)) {
+ throw new Exception("Don't yet support this type: " + columnType);
+ }
+ //In case of complex types getTypeName() will recusively go into typeName
+ //of individual fields when the ColumnType was constructed
+ //in SchemaToTypeInfo.generateColumnTypes in the constructor
+ fieldSchema.setType(columnTypes.get(i).getTypeName());
+ fieldSchema.setComment(StorageSchemaUtils.determineFieldComment(columnComments.get(i)));
+ fieldSchemas.add(fieldSchema);
+ }
+ return fieldSchemas;
+ }
+
+ private static final String FROM_SERIALIZER = "from deserializer";
+
+ private static String determineFieldComment(String comment) {
+ return (comment == null) ? FROM_SERIALIZER : comment;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/AvroSchemaUtils.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/AvroSchemaUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/AvroSchemaUtils.java
new file mode 100644
index 0000000..d7bcd15
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/AvroSchemaUtils.java
@@ -0,0 +1,366 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.utils;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.ColumnType;
+import org.apache.hadoop.hive.serde2.typeinfo.MetastoreTypeCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.MetastoreTypeInfoFactory;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.serde2.avro.AvroSerDeConstants;
+import org.apache.hadoop.hive.serde2.avro.SchemaResolutionProblem;
+import org.apache.hadoop.hive.serde2.avro.TypeInfoToSchema;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.*;
+
+/*
+ * Many of the util methods are copied from AvroSerDeUtils from Hive
+ */
+public class AvroSchemaUtils {
+ private static final Logger LOG = LoggerFactory.getLogger(AvroSchemaUtils.class);
+ /**
+ * Enum container for all avro table properties.
+ * If introducing a new avro-specific table property,
+ * add it here. Putting them in an enum rather than separate strings
+ * allows them to be programmatically grouped and referenced together.
+ */
+ public static enum AvroTableProperties {
+ SCHEMA_LITERAL("avro.schema.literal"),
+ SCHEMA_URL("avro.schema.url"),
+ SCHEMA_NAMESPACE("avro.schema.namespace"),
+ SCHEMA_NAME("avro.schema.name"),
+ SCHEMA_DOC("avro.schema.doc"),
+ AVRO_SERDE_SCHEMA("avro.serde.schema"),
+ SCHEMA_RETRIEVER("avro.schema.retriever");
+
+ private final String propName;
+
+ AvroTableProperties(String propName) {
+ this.propName = propName;
+ }
+
+ public String getPropName(){
+ return this.propName;
+ }
+ }
+
+ // Following parameters slated for removal, prefer usage of enum above, that allows programmatic access.
+ @Deprecated public static final String SCHEMA_LITERAL = "avro.schema.literal";
+ @Deprecated public static final String SCHEMA_URL = "avro.schema.url";
+ @Deprecated public static final String SCHEMA_NAMESPACE = "avro.schema.namespace";
+ @Deprecated public static final String SCHEMA_NAME = "avro.schema.name";
+ @Deprecated public static final String SCHEMA_DOC = "avro.schema.doc";
+ @Deprecated public static final String AVRO_SERDE_SCHEMA = AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName();
+ @Deprecated public static final String SCHEMA_RETRIEVER = AvroTableProperties.SCHEMA_RETRIEVER.getPropName();
+
+ public static final String SCHEMA_NONE = "none";
+ public static final String EXCEPTION_MESSAGE = "Neither "
+ + AvroTableProperties.SCHEMA_LITERAL.getPropName() + " nor "
+ + AvroTableProperties.SCHEMA_URL.getPropName() + " specified, can't determine table schema";
+
+ public static final String LIST_COLUMN_COMMENTS = "columns.comments";
+ public static final char COMMA = ',';
+
+ public static List<FieldSchema> getFieldsFromAvroSchema(Configuration configuration,
+ Properties properties) throws Exception {
+ // Reset member variables so we don't get in a half-constructed state
+ Schema schema = null;
+ List<String> columnNames = null;
+ List<TypeInfo> columnTypes = null;
+
+ final String columnNameProperty = properties.getProperty(ColumnType.LIST_COLUMNS);
+ final String columnTypeProperty = properties.getProperty(ColumnType.LIST_COLUMN_TYPES);
+ final String columnCommentProperty = properties.getProperty(LIST_COLUMN_COMMENTS,"");
+ final String columnNameDelimiter = properties.containsKey(ColumnType.COLUMN_NAME_DELIMITER) ? properties
+ .getProperty(ColumnType.COLUMN_NAME_DELIMITER) : String.valueOf(COMMA);
+
+ if (hasExternalSchema(properties)
+ || columnNameProperty == null || columnNameProperty.isEmpty()
+ || columnTypeProperty == null || columnTypeProperty.isEmpty()) {
+ schema = AvroSchemaUtils.determineSchemaOrThrowException(configuration, properties);
+ } else {
+ // Get column names and sort order
+ columnNames = StringUtils.intern(
+ Arrays.asList(columnNameProperty.split(columnNameDelimiter)));
+ columnTypes = new TypeInfoParser(columnTypeProperty, MetastoreTypeInfoFactory
+ .getInstance()).parseTypeInfos();
+
+ schema = getSchemaFromCols(properties, columnNames, columnTypes, columnCommentProperty);
+ properties.setProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName(), schema.toString());
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Avro schema is " + schema);
+ }
+
+ if (configuration == null) {
+ LOG.debug("Configuration null, not inserting schema");
+ } else {
+ configuration.set(
+ AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(), schema.toString(false));
+ }
+ return new AvroFieldSchemaGenerator(schema).getFieldSchemas();
+ }
+
+
+ private static boolean hasExternalSchema(Properties properties) {
+ return properties.getProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName()) != null
+ || properties.getProperty(AvroTableProperties.SCHEMA_URL.getPropName()) != null;
+ }
+
+ public static boolean supportedCategories(TypeInfo ti) {
+ final MetastoreTypeCategory c = ti.getCategory();
+ return c.equals(MetastoreTypeCategory.PRIMITIVE) ||
+ c.equals(MetastoreTypeCategory.MAP) ||
+ c.equals(MetastoreTypeCategory.LIST) ||
+ c.equals(MetastoreTypeCategory.STRUCT) ||
+ c.equals(MetastoreTypeCategory.UNION);
+ }
+
+ /**
+ * Attempt to determine the schema via the usual means, but do not throw
+ * an exception if we fail. Instead, signal failure via a special
+ * schema.
+ */
+ public static Schema determineSchemaOrReturnErrorSchema(Configuration conf, Properties props) {
+ try {
+ return AvroSchemaUtils.determineSchemaOrThrowException(conf, props);
+ } catch (Exception e) {
+ LOG.warn("Encountered exception determining schema. Returning signal " +
+ "schema to indicate problem", e);
+ }
+ return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA;
+ }
+
+ /**
+ * Determine the schema to that's been provided for Avro serde work.
+ * @param properties containing a key pointing to the schema, one way or another
+ * @return schema to use while serdeing the avro file
+ */
+ public static Schema determineSchemaOrThrowException(Configuration conf, Properties properties)
+ throws Exception {
+ String schemaString = properties.getProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName());
+ if(schemaString != null && !schemaString.equals(SCHEMA_NONE))
+ return AvroSchemaUtils.getSchemaFor(schemaString);
+
+ // Try pulling directly from URL
+ schemaString = properties.getProperty(AvroTableProperties.SCHEMA_URL.getPropName());
+ if (schemaString == null) {
+ final String columnNameProperty = properties.getProperty(ColumnType.LIST_COLUMNS);
+ final String columnTypeProperty = properties.getProperty(ColumnType.LIST_COLUMN_TYPES);
+ final String columnCommentProperty = properties.getProperty(LIST_COLUMN_COMMENTS);
+ if (columnNameProperty == null || columnNameProperty.isEmpty()
+ || columnTypeProperty == null || columnTypeProperty.isEmpty() ) {
+ throw new IOException(EXCEPTION_MESSAGE);
+ }
+ final String columnNameDelimiter = properties.containsKey(ColumnType.COLUMN_NAME_DELIMITER) ? properties
+ .getProperty(ColumnType.COLUMN_NAME_DELIMITER) : String.valueOf(COMMA);
+ // Get column names and types
+ List<String> columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
+ List<TypeInfo> columnTypes =
+ new TypeInfoParser(columnTypeProperty,
+ MetastoreTypeInfoFactory.getInstance()).parseTypeInfos();
+ //TODO Why can't we directly bypass this whole logic and use ColumnTypeInfo to use
+ //AvroFieldSchemaGenerator directly?
+ Schema schema = getSchemaFromCols(properties, columnNames, columnTypes, columnCommentProperty);
+ properties.setProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName(), schema.toString());
+ if (conf != null)
+ conf.set(AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(), schema.toString(false));
+ return schema;
+ } else if(schemaString.equals(SCHEMA_NONE)) {
+ throw new Exception(EXCEPTION_MESSAGE);
+ }
+
+ try {
+ Schema s = getSchemaFromFS(schemaString, conf);
+ if (s == null) {
+ //in case schema is not a file system
+ return AvroSchemaUtils.getSchemaFor(new URL(schemaString));
+ }
+ return s;
+ } catch (IOException ioe) {
+ throw new Exception("Unable to read schema from given path: " + schemaString, ioe);
+ } catch (URISyntaxException urie) {
+ throw new Exception("Unable to read schema from given path: " + schemaString, urie);
+ }
+ }
+
+ // Protected for testing and so we can pass in a conf for testing.
+ protected static Schema getSchemaFromFS(String schemaFSUrl,
+ Configuration conf) throws IOException, URISyntaxException {
+ FSDataInputStream in = null;
+ FileSystem fs = null;
+ try {
+ fs = FileSystem.get(new URI(schemaFSUrl), conf);
+ } catch (IOException ioe) {
+ //return null only if the file system in schema is not recognized
+ if (LOG.isDebugEnabled()) {
+ String msg = "Failed to open file system for uri " + schemaFSUrl + " assuming it is not a FileSystem url";
+ LOG.debug(msg, ioe);
+ }
+
+ return null;
+ }
+ try {
+ in = fs.open(new Path(schemaFSUrl));
+ Schema s = AvroSchemaUtils.getSchemaFor(in);
+ return s;
+ } finally {
+ if(in != null) in.close();
+ }
+ }
+
+ public static Schema getSchemaFor(File file) {
+ Schema.Parser parser = new Schema.Parser();
+ Schema schema;
+ try {
+ schema = parser.parse(file);
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to parse Avro schema from " + file.getName(), e);
+ }
+ return schema;
+ }
+
+ public static Schema getSchemaFor(InputStream stream) {
+ Schema.Parser parser = new Schema.Parser();
+ Schema schema;
+ try {
+ schema = parser.parse(stream);
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to parse Avro schema", e);
+ }
+ return schema;
+ }
+
+ public static Schema getSchemaFor(String str) {
+ Schema.Parser parser = new Schema.Parser();
+ Schema schema = parser.parse(str);
+ return schema;
+ }
+
+ public static Schema getSchemaFor(URL url) {
+ InputStream in = null;
+ try {
+ in = url.openStream();
+ return getSchemaFor(in);
+ } catch (Exception e) {
+ throw new RuntimeException("Failed to parse Avro schema", e);
+ } finally {
+ if (in != null) {
+ try {
+ in.close();
+ } catch (IOException e) {
+ // Ignore
+ }
+ }
+ }
+ }
+
+ public static Schema getSchemaFromCols(Properties properties,
+ List<String> columnNames, List<TypeInfo> columnTypes, String columnCommentProperty) {
+ List<String> columnComments;
+ if (columnCommentProperty == null || columnCommentProperty.isEmpty()) {
+ columnComments = new ArrayList<String>();
+ } else {
+ //Comments are separated by "\0" in columnCommentProperty, see method getSchema
+ //in MetaStoreUtils where this string columns.comments is generated
+ columnComments = Arrays.asList(columnCommentProperty.split("\0"));
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("columnComments is " + columnCommentProperty);
+ }
+ }
+ if (columnNames.size() != columnTypes.size()) {
+ throw new IllegalArgumentException("getSchemaFromCols initialization failed. Number of column " +
+ "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
+ columnTypes);
+ }
+
+ final String tableName = properties.getProperty(AvroSerDeConstants.TABLE_NAME);
+ final String tableComment = properties.getProperty(AvroSerDeConstants.TABLE_COMMENT);
+ TypeInfoToSchema metastoreTypeInfoToSchema = new TypeInfoToSchema();
+ return metastoreTypeInfoToSchema.convert(columnNames, columnTypes, columnComments,
+ properties.getProperty(AvroTableProperties.SCHEMA_NAMESPACE.getPropName()),
+ properties.getProperty(AvroTableProperties.SCHEMA_NAME.getPropName(), tableName),
+ properties.getProperty(AvroTableProperties.SCHEMA_DOC.getPropName(), tableComment));
+
+ }
+
+ /**
+ * Determine if an Avro schema is of type Union[T, NULL]. Avro supports nullable
+ * types via a union of type T and null. This is a very common use case.
+ * As such, we want to silently convert it to just T and allow the value to be null.
+ *
+ * When a Hive union type is used with AVRO, the schema type becomes
+ * Union[NULL, T1, T2, ...]. The NULL in the union should be silently removed
+ *
+ * @return true if type represents Union[T, Null], false otherwise
+ */
+ public static boolean isNullableType(Schema schema) {
+ if (!schema.getType().equals(Schema.Type.UNION)) {
+ return false;
+ }
+
+ List<Schema> itemSchemas = schema.getTypes();
+ if (itemSchemas.size() < 2) {
+ return false;
+ }
+
+ for (Schema itemSchema : itemSchemas) {
+ if (Schema.Type.NULL.equals(itemSchema.getType())) {
+ return true;
+ }
+ }
+
+ // [null, null] not allowed, so this check is ok.
+ return false;
+ }
+
+ /**
+ * In a nullable type, get the schema for the non-nullable type. This method
+ * does no checking that the provides Schema is nullable.
+ */
+ public static Schema getOtherTypeFromNullableType(Schema schema) {
+ List<Schema> itemSchemas = new ArrayList<>();
+ for (Schema itemSchema : schema.getTypes()) {
+ if (!Schema.Type.NULL.equals(itemSchema.getType())) {
+ itemSchemas.add(itemSchema);
+ }
+ }
+
+ if (itemSchemas.size() > 1) {
+ return Schema.createUnion(itemSchemas);
+ } else {
+ return itemSchemas.get(0);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/StorageSchemaUtils.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/StorageSchemaUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/StorageSchemaUtils.java
new file mode 100644
index 0000000..5ec642f
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/StorageSchemaUtils.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.utils;
+
+import org.apache.hadoop.hive.serde2.typeinfo.MetastoreTypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoParser;
+
+import java.util.List;
+
+public class StorageSchemaUtils {
+ public static final char COMMA = ',';
+ public static List<TypeInfo> getTypeInfosFromTypeString(String columnTypeProperty) {
+ return new TypeInfoParser(columnTypeProperty, MetastoreTypeInfoFactory.getInstance())
+ .parseTypeInfos();
+ }
+
+ private static final String FROM_STORAGE_SCHEMA_READER = "generated by storage schema reader";
+ public static String determineFieldComment(String comment) {
+ return (comment == null) ? FROM_STORAGE_SCHEMA_READER : comment;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/AvroSerDeConstants.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/AvroSerDeConstants.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/AvroSerDeConstants.java
new file mode 100644
index 0000000..42868ea
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/AvroSerDeConstants.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.avro;
+
+/**
+ * This class contains some of the constants which are specific to AvroSerDe
+ * They should always match with the constants defined in AvroSerDe.java in Hive Source code. These
+ * constants were copied as part of separating metastore from Hive.
+ */
+public class AvroSerDeConstants {
+ public static final String TABLE_NAME = "name";
+ public static final String TABLE_COMMENT = "comment";
+ public static final String LIST_COLUMN_COMMENTS = "columns.comments";
+
+ //it just so happens that the AVRO has these constants which are same as defined in ColumnType
+ //We should still keep it separate in case in future we need to separate the two
+ public static final String DECIMAL_TYPE_NAME = "decimal";
+ public static final String CHAR_TYPE_NAME = "char";
+ public static final String VARCHAR_TYPE_NAME = "varchar";
+ public static final String DATE_TYPE_NAME = "date";
+
+ public static final String AVRO_TIMESTAMP_TYPE_NAME = "timestamp-millis";
+ public static final String AVRO_PROP_LOGICAL_TYPE = "logicalType";
+ public static final String AVRO_PROP_PRECISION = "precision";
+ public static final String AVRO_PROP_SCALE = "scale";
+ public static final String AVRO_PROP_MAX_LENGTH = "maxLength";
+ public static final String AVRO_STRING_TYPE_NAME = "string";
+ public static final String AVRO_INT_TYPE_NAME = "int";
+ public static final String AVRO_LONG_TYPE_NAME = "long";
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java
new file mode 100644
index 0000000..12a8ff2
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.avro;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Cache for objects whose creation only depends on some other set of objects and therefore can be
+ * used against other equivalent versions of those objects. Essentially memoizes instance creation.
+ *
+ * @param <SeedObject> Object that determines the instance. The cache uses this object as a key for
+ * its hash which is why it is imperative to have appropriate equals and hashcode
+ * implementation for this object for the cache to work properly
+ * @param <Instance> Instance that will be created from SeedObject.
+ */
+public abstract class InstanceCache<SeedObject, Instance> {
+ private static final Logger LOG = LoggerFactory.getLogger(InstanceCache.class);
+ Map<SeedObject, Instance> cache = new HashMap<SeedObject, Instance>();
+
+ public InstanceCache() {}
+
+ /**
+ * Retrieve (or create if it doesn't exist) the correct Instance for this
+ * SeedObject
+ */
+ public Instance retrieve(SeedObject hv) throws Exception {
+ return retrieve(hv, null);
+ }
+
+ /**
+ * Retrieve (or create if it doesn't exist) the correct Instance for this
+ * SeedObject using 'seenSchemas' to resolve circular references
+ */
+ public synchronized Instance retrieve(SeedObject hv,
+ Set<SeedObject> seenSchemas) throws Exception {
+ if(LOG.isDebugEnabled()) LOG.debug("Checking for hv: " + hv.toString());
+
+ if(cache.containsKey(hv)) {
+ if(LOG.isDebugEnabled()) LOG.debug("Returning cache result.");
+ return cache.get(hv);
+ }
+
+ if(LOG.isDebugEnabled()) LOG.debug("Creating new instance and storing in cache");
+
+ Instance instance = makeInstance(hv, seenSchemas);
+ cache.put(hv, instance);
+ return instance;
+ }
+
+ protected abstract Instance makeInstance(SeedObject hv,
+ Set<SeedObject> seenSchemas) throws Exception;
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/SchemaResolutionProblem.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/SchemaResolutionProblem.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/SchemaResolutionProblem.java
new file mode 100644
index 0000000..3470514
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/SchemaResolutionProblem.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.avro;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate;
+import org.apache.hadoop.hive.metastore.utils.AvroSchemaUtils;
+
+@LimitedPrivate("Hive")
+public class SchemaResolutionProblem {
+ static final String sentinelString = "{\n" +
+ " \"namespace\": \"org.apache.hadoop.hive\",\n" +
+ " \"name\": \"CannotDetermineSchemaSentinel\",\n" +
+ " \"type\": \"record\",\n" +
+ " \"fields\": [\n" +
+ " {\n" +
+ " \"name\":\"ERROR_ERROR_ERROR_ERROR_ERROR_ERROR_ERROR\",\n" +
+ " \"type\":\"string\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"Cannot_determine_schema\",\n" +
+ " \"type\":\"string\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"check\",\n" +
+ " \"type\":\"string\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"schema\",\n" +
+ " \"type\":\"string\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"url\",\n" +
+ " \"type\":\"string\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"and\",\n" +
+ " \"type\":\"string\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"literal\",\n" +
+ " \"type\":\"string\"\n" +
+ " }\n" +
+ " ]\n" +
+ "}";
+ public final static Schema SIGNAL_BAD_SCHEMA = AvroSchemaUtils.getSchemaFor(sentinelString);
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/40ee74eb/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/SchemaToMetastoreTypeInfo.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/SchemaToMetastoreTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/SchemaToMetastoreTypeInfo.java
new file mode 100644
index 0000000..b40a241
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/SchemaToMetastoreTypeInfo.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.avro;
+
+import org.apache.hadoop.hive.serde2.typeinfo.MetastoreTypeInfoFactory;
+
+public class SchemaToMetastoreTypeInfo extends SchemaToTypeInfo {
+ private static final SchemaToMetastoreTypeInfo instance = new SchemaToMetastoreTypeInfo();
+ private SchemaToMetastoreTypeInfo() {
+ super(MetastoreTypeInfoFactory.getInstance());
+ }
+
+ public static final SchemaToMetastoreTypeInfo getInstance() {
+ return instance;
+ }
+}
\ No newline at end of file