You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/09/10 23:41:17 UTC
svn commit: r1624140 [1/3] - in /hive/branches/cbo: ./
common/src/java/org/apache/hadoop/hive/conf/ hbase-handler/
hbase-handler/if/ hbase-handler/src/java/org/apache/hadoop/hive/hbase/
hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/ hbase-...
Author: gunther
Date: Wed Sep 10 21:41:16 2014
New Revision: 1624140
URL: http://svn.apache.org/r1624140
Log:
Merge latest trunk into cbo branch. (Gunther Hagleitner)
Added:
hive/branches/cbo/hbase-handler/if/
- copied from r1624134, hive/trunk/hbase-handler/if/
hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java
- copied unchanged from r1624134, hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java
hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/
- copied from r1624134, hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/
hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java
- copied unchanged from r1624134, hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java
hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/
- copied from r1624134, hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
- copied unchanged from r1624134, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java
- copied unchanged from r1624134, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java
- copied unchanged from r1624134, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java
- copied unchanged from r1624134, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java
- copied unchanged from r1624134, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java
hive/branches/cbo/ql/src/test/queries/clientpositive/orc_ppd_boolean.q
- copied unchanged from r1624134, hive/trunk/ql/src/test/queries/clientpositive/orc_ppd_boolean.q
hive/branches/cbo/ql/src/test/queries/clientpositive/sort_merge_join_desc_8.q
- copied unchanged from r1624134, hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_8.q
hive/branches/cbo/ql/src/test/results/clientpositive/orc_ppd_boolean.q.out
- copied unchanged from r1624134, hive/trunk/ql/src/test/results/clientpositive/orc_ppd_boolean.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/sort_merge_join_desc_8.q.out
- copied unchanged from r1624134, hive/trunk/ql/src/test/results/clientpositive/sort_merge_join_desc_8.q.out
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
- copied unchanged from r1624134, hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java
- copied unchanged from r1624134, hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java
- copied unchanged from r1624134, hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java
- copied unchanged from r1624134, hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java
- copied unchanged from r1624134, hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java
Removed:
hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/service/server/TestHiveServer2Concurrency.java
Modified:
hive/branches/cbo/ (props changed)
hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/branches/cbo/hbase-handler/pom.xml (contents, props changed)
hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java
hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java
hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java
hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java
hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java
hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/AggregateDefinition.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
hive/branches/cbo/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q
hive/branches/cbo/ql/src/test/queries/clientpositive/input_lazyserde.q
hive/branches/cbo/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/input_lazyserde.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out
hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
hive/branches/cbo/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java
hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
Propchange: hive/branches/cbo/
------------------------------------------------------------------------------
Merged /hive/trunk:r1623597-1624134
Modified: hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed Sep 10 21:41:16 2014
@@ -103,7 +103,6 @@ public class HiveConf extends Configurat
* be recreated so that the change will take effect.
*/
public static final HiveConf.ConfVars[] metaVars = {
- HiveConf.ConfVars.METASTOREDIRECTORY,
HiveConf.ConfVars.METASTOREWAREHOUSE,
HiveConf.ConfVars.METASTOREURIS,
HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES,
@@ -346,9 +345,7 @@ public class HiveConf extends Configurat
MAPREDSETUPCLEANUPNEEDED(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDSETUPCLEANUPNEEDED"), false, "", true),
MAPREDTASKCLEANUPNEEDED(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDTASKCLEANUPNEEDED"), false, "", true),
- // Metastore stuff. Be sure to update HiveConf.metaVars when you add
- // something here!
- METASTOREDIRECTORY("hive.metastore.metadb.dir", "", ""),
+ // Metastore stuff. Be sure to update HiveConf.metaVars when you add something here!
METASTOREWAREHOUSE("hive.metastore.warehouse.dir", "/user/hive/warehouse",
"location of default database for the warehouse"),
METASTOREURIS("hive.metastore.uris", "",
@@ -1716,6 +1713,9 @@ public class HiveConf extends Configurat
HIVE_VECTORIZATION_ENABLED("hive.vectorized.execution.enabled", false,
"This flag should be set to true to enable vectorized mode of query execution.\n" +
"The default value is false."),
+ HIVE_VECTORIZATION_REDUCE_ENABLED("hive.vectorized.execution.reduce.enabled", true,
+ "This flag should be set to true to enable vectorized mode of the reduce-side of query execution.\n" +
+ "The default value is true."),
HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval", 100000,
"Number of entries added to the group by aggregation hash before a recomputation of average entry size is performed."),
HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries", 1000000,
Modified: hive/branches/cbo/hbase-handler/pom.xml
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/pom.xml?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/pom.xml (original)
+++ hive/branches/cbo/hbase-handler/pom.xml Wed Sep 10 21:41:16 2014
@@ -57,6 +57,11 @@
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro</artifactId>
+ <version>1.7.6</version>
+ </dependency>
</dependencies>
<profiles>
@@ -101,6 +106,12 @@
<version>${hbase.hadoop1.version}</version>
</dependency>
<dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>${hadoop-20S.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.hadoop1.version}</version>
@@ -134,11 +145,25 @@
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop-23.version}</version>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop-23.version}</version>
<optional>true</optional>
</dependency>
<dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>${hadoop-23.version}</version>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-hadoop2-compat</artifactId>
<version>${hbase.hadoop2.version}</version>
@@ -190,6 +215,12 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-servlet</artifactId>
+ <version>${jersey.version}</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
</profile>
</profiles>
@@ -209,6 +240,42 @@
</execution>
</executions>
</plugin>
+ <plugin>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro-maven-plugin</artifactId>
+ <version>1.7.6</version>
+ <executions>
+ <execution>
+ <phase>generate-test-sources</phase>
+ <goals>
+ <goal>protocol</goal>
+ </goals>
+ <configuration>
+ <testSourceDirectory>${project.basedir}/if/test</testSourceDirectory>
+ <testOutputDirectory>${project.basedir}/src/test</testOutputDirectory>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ <version>1.7</version>
+ <executions>
+ <execution>
+ <id>add-test-sources</id>
+ <phase>generate-test-sources</phase>
+ <goals>
+ <goal>add-test-source</goal>
+ </goals>
+ <configuration>
+ <sources>
+ <source>${project.basedir}/src/gen/avro/gen-java</source>
+ </sources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
Propchange: hive/branches/cbo/hbase-handler/pom.xml
------------------------------------------------------------------------------
Merged /hive/trunk/hbase-handler/pom.xml:r1623597-1624134
Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java Wed Sep 10 21:41:16 2014
@@ -23,16 +23,21 @@
package org.apache.hadoop.hive.hbase;
-import com.google.common.collect.Iterators;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
+import com.google.common.collect.Iterators;
public class ColumnMappings implements Iterable<ColumnMappings.ColumnMapping> {
@@ -53,24 +58,41 @@ public class ColumnMappings implements I
return columnsMapping.length;
}
- String toTypesString() {
+ String toNamesString(Properties tbl, String autogenerate) {
+ if (autogenerate != null && autogenerate.equals("true")) {
+ StringBuilder sb = new StringBuilder();
+ HBaseSerDeHelper.generateColumns(tbl, Arrays.asList(columnsMapping), sb);
+ return sb.toString();
+ }
+
+ return StringUtils.EMPTY; // return empty string
+ }
+
+ String toTypesString(Properties tbl, Configuration conf, String autogenerate)
+ throws SerDeException {
StringBuilder sb = new StringBuilder();
- for (ColumnMapping colMap : columnsMapping) {
- if (sb.length() > 0) {
- sb.append(":");
- }
- if (colMap.hbaseRowKey) {
- // the row key column becomes a STRING
- sb.append(serdeConstants.STRING_TYPE_NAME);
- } else if (colMap.qualifierName == null) {
- // a column family become a MAP
- sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
- + serdeConstants.STRING_TYPE_NAME + ">");
- } else {
- // an individual column becomes a STRING
- sb.append(serdeConstants.STRING_TYPE_NAME);
+
+ if (autogenerate != null && autogenerate.equals("true")) {
+ HBaseSerDeHelper.generateColumnTypes(tbl, Arrays.asList(columnsMapping), sb, conf);
+ } else {
+ for (ColumnMapping colMap : columnsMapping) {
+ if (sb.length() > 0) {
+ sb.append(":");
+ }
+ if (colMap.hbaseRowKey) {
+ // the row key column becomes a STRING
+ sb.append(serdeConstants.STRING_TYPE_NAME);
+ } else if (colMap.qualifierName == null) {
+ // a column family become a MAP
+ sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
+ + serdeConstants.STRING_TYPE_NAME + ">");
+ } else {
+ // an individual column becomes a STRING
+ sb.append(serdeConstants.STRING_TYPE_NAME);
+ }
}
}
+
return sb.toString();
}
Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java Wed Sep 10 21:41:16 2014
@@ -19,7 +19,9 @@
package org.apache.hadoop.hive.hbase;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
+import java.util.Map;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
@@ -94,4 +96,14 @@ public class HBaseCompositeKey extends L
return lazyObject;
}
+
+ /**
+ * Return the different parts of the key. By default, this returns an empty map. Consumers can
+ * choose to override this to provide their own names and types of parts of the key.
+ *
+ * @return map of parts name to their type
+ * */
+ public Map<String, String> getParts() {
+ return Collections.emptyMap();
+ }
}
Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java Wed Sep 10 21:41:16 2014
@@ -18,32 +18,31 @@
package org.apache.hadoop.hive.hbase;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import java.util.ArrayList;
-import java.util.List;
-
// Does same thing with LazyFactory#createLazyObjectInspector except that this replaces
// original keyOI with OI which is create by HBaseKeyFactory provided by serde property for hbase
public class HBaseLazyObjectFactory {
public static ObjectInspector createLazyHBaseStructInspector(
- SerDeParameters serdeParams, int index, HBaseKeyFactory factory) throws SerDeException {
+ SerDeParameters serdeParams, int index, HBaseKeyFactory keyFactory, List<HBaseValueFactory> valueFactories) throws SerDeException {
List<TypeInfo> columnTypes = serdeParams.getColumnTypes();
ArrayList<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>(
columnTypes.size());
for (int i = 0; i < columnTypes.size(); i++) {
if (i == index) {
- columnObjectInspectors.add(factory.createKeyObjectInspector(columnTypes.get(i)));
+ columnObjectInspectors.add(keyFactory.createKeyObjectInspector(columnTypes.get(i)));
} else {
- columnObjectInspectors.add(LazyFactory.createLazyObjectInspector(
- columnTypes.get(i), serdeParams.getSeparators(), 1, serdeParams.getNullSequence(),
- serdeParams.isEscaped(), serdeParams.getEscapeChar()));
+ columnObjectInspectors.add(valueFactories.get(i).createValueObjectInspector(
+ columnTypes.get(i)));
}
}
return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
@@ -51,4 +50,4 @@ public class HBaseLazyObjectFactory {
serdeParams.getNullSequence(), serdeParams.isLastColumnTakesRest(),
serdeParams.isEscaped(), serdeParams.getEscapeChar());
}
-}
+}
\ No newline at end of file
Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java Wed Sep 10 21:41:16 2014
@@ -18,6 +18,10 @@
package org.apache.hadoop.hive.hbase;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
import org.apache.hadoop.hive.serde2.ByteStream;
@@ -34,10 +38,6 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Writable;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
public class HBaseRowSerializer {
private final HBaseKeyFactory keyFactory;
@@ -279,6 +279,10 @@ public class HBaseRowSerializer {
}
}
return true;
+ case UNION: {
+ // union type currently not totally supported. See HIVE-2390
+ return false;
+ }
default:
throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
}
Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java Wed Sep 10 21:41:16 2014
@@ -18,6 +18,10 @@
package org.apache.hadoop.hive.hbase;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -25,6 +29,7 @@ import org.apache.hadoop.hbase.util.Byte
import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
@@ -33,10 +38,6 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
/**
* HBaseSerDe can be used to serialize object into an HBase table and
* deserialize objects from an HBase table.
@@ -50,15 +51,21 @@ public class HBaseSerDe extends Abstract
public static final String HBASE_KEY_COL = ":key";
public static final String HBASE_PUT_TIMESTAMP = "hbase.put.timestamp";
public static final String HBASE_COMPOSITE_KEY_CLASS = "hbase.composite.key.class";
+ public static final String HBASE_COMPOSITE_KEY_TYPES = "hbase.composite.key.types";
public static final String HBASE_COMPOSITE_KEY_FACTORY = "hbase.composite.key.factory";
public static final String HBASE_SCAN_CACHE = "hbase.scan.cache";
public static final String HBASE_SCAN_CACHEBLOCKS = "hbase.scan.cacheblock";
public static final String HBASE_SCAN_BATCH = "hbase.scan.batch";
+ public static final String HBASE_AUTOGENERATE_STRUCT = "hbase.struct.autogenerate";
/**
- * Determines whether a regex matching should be done on the columns or not. Defaults to true.
- * <strong>WARNING: Note that currently this only supports the suffix wildcard .*</strong>
+ * Determines whether a regex matching should be done on the columns or not. Defaults to true.
+ * <strong>WARNING: Note that currently this only supports the suffix wildcard .*</strong>
*/
public static final String HBASE_COLUMNS_REGEX_MATCHING = "hbase.columns.mapping.regex.matching";
+ /**
+ * Defines the type for a column.
+ **/
+ public static final String SERIALIZATION_TYPE = "serialization.type";
private ObjectInspector cachedObjectInspector;
private LazyHBaseRow cachedHBaseRow;
@@ -83,8 +90,11 @@ public class HBaseSerDe extends Abstract
throws SerDeException {
serdeParams = new HBaseSerDeParameters(conf, tbl, getClass().getName());
- cachedObjectInspector = HBaseLazyObjectFactory.createLazyHBaseStructInspector(
- serdeParams.getSerdeParams(), serdeParams.getKeyIndex(), serdeParams.getKeyFactory());
+ cachedObjectInspector =
+ HBaseLazyObjectFactory
+ .createLazyHBaseStructInspector(serdeParams.getSerdeParams(),
+ serdeParams.getKeyIndex(), serdeParams.getKeyFactory(),
+ serdeParams.getValueFactories());
cachedHBaseRow = new LazyHBaseRow(
(LazySimpleStructObjectInspector) cachedObjectInspector,
Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java Wed Sep 10 21:41:16 2014
@@ -18,13 +18,20 @@
package org.apache.hadoop.hive.hbase;
+import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
+import org.apache.avro.Schema;
+import org.apache.avro.reflect.ReflectData;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
+import org.apache.hadoop.hive.hbase.struct.AvroHBaseValueFactory;
+import org.apache.hadoop.hive.hbase.struct.DefaultHBaseValueFactory;
+import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -37,10 +44,12 @@ import org.apache.hadoop.util.Reflection
*/
public class HBaseSerDeParameters {
+ public static final String AVRO_SERIALIZATION_TYPE = "avro";
+ public static final String STRUCT_SERIALIZATION_TYPE = "struct";
+
private final SerDeParameters serdeParams;
private final Configuration job;
- private final Properties tbl;
private final String columnMappingString;
private final ColumnMappings columnMappings;
@@ -48,57 +57,50 @@ public class HBaseSerDeParameters {
private final long putTimestamp;
private final HBaseKeyFactory keyFactory;
+ private final List<HBaseValueFactory> valueFactories;
HBaseSerDeParameters(Configuration job, Properties tbl, String serdeName) throws SerDeException {
this.job = job;
- this.tbl = tbl;
- this.serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName);
- this.putTimestamp = Long.valueOf(tbl.getProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP, "-1"));
// Read configuration parameters
columnMappingString = tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING);
- doColumnRegexMatching = Boolean.valueOf(tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true"));
+ doColumnRegexMatching =
+ Boolean.valueOf(tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true"));
// Parse and initialize the HBase columns mapping
columnMappings = HBaseSerDe.parseColumnsMapping(columnMappingString, doColumnRegexMatching);
- columnMappings.setHiveColumnDescription(serdeName, serdeParams.getColumnNames(), serdeParams.getColumnTypes());
-
- // Precondition: make sure this is done after the rest of the SerDe initialization is done.
- String hbaseTableStorageType = tbl.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE);
- columnMappings.parseColumnStorageTypes(hbaseTableStorageType);
// Build the type property string if not supplied
String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
- if (columnTypeProperty == null) {
- tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, columnMappings.toTypesString());
- }
+ String autogenerate = tbl.getProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT);
- this.keyFactory = initKeyFactory(job, tbl);
- }
+ if (columnTypeProperty == null || columnTypeProperty.isEmpty()) {
+ String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
+ if (columnNameProperty == null || columnNameProperty.isEmpty()) {
+ if (autogenerate == null || autogenerate.isEmpty()) {
+ throw new IllegalArgumentException("Either the columns must be specified or the "
+ + HBaseSerDe.HBASE_AUTOGENERATE_STRUCT + " property must be set to true.");
+ }
- private HBaseKeyFactory initKeyFactory(Configuration conf, Properties tbl) throws SerDeException {
- try {
- HBaseKeyFactory keyFactory = createKeyFactory(conf, tbl);
- if (keyFactory != null) {
- keyFactory.init(this, tbl);
+ tbl.setProperty(serdeConstants.LIST_COLUMNS,
+ columnMappings.toNamesString(tbl, autogenerate));
}
- return keyFactory;
- } catch (Exception e) {
- throw new SerDeException(e);
- }
- }
- private static HBaseKeyFactory createKeyFactory(Configuration job, Properties tbl) throws Exception {
- String factoryClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_FACTORY);
- if (factoryClassName != null) {
- Class<?> factoryClazz = Class.forName(factoryClassName);
- return (HBaseKeyFactory) ReflectionUtils.newInstance(factoryClazz, job);
+ tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
+ columnMappings.toTypesString(tbl, job, autogenerate));
}
- String keyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
- if (keyClassName != null) {
- Class<?> keyClass = Class.forName(keyClassName);
- return new CompositeHBaseKeyFactory(keyClass);
- }
- return new DefaultHBaseKeyFactory();
+
+ this.serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName);
+ this.putTimestamp = Long.valueOf(tbl.getProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP, "-1"));
+
+ columnMappings.setHiveColumnDescription(serdeName, serdeParams.getColumnNames(),
+ serdeParams.getColumnTypes());
+
+ // Precondition: make sure this is done after the rest of the SerDe initialization is done.
+ String hbaseTableStorageType = tbl.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE);
+ columnMappings.parseColumnStorageTypes(hbaseTableStorageType);
+
+ this.keyFactory = initKeyFactory(job, tbl);
+ this.valueFactories = initValueFactories(job, tbl);
}
public List<String> getColumnNames() {
@@ -133,6 +135,10 @@ public class HBaseSerDeParameters {
return keyFactory;
}
+ public List<HBaseValueFactory> getValueFactories() {
+ return valueFactories;
+ }
+
public Configuration getBaseConfiguration() {
return job;
}
@@ -151,4 +157,190 @@ public class HBaseSerDeParameters {
public String toString() {
return "[" + columnMappingString + ":" + getColumnNames() + ":" + getColumnTypes() + "]";
}
-}
+
+ private HBaseKeyFactory initKeyFactory(Configuration conf, Properties tbl) throws SerDeException {
+ try {
+ HBaseKeyFactory keyFactory = createKeyFactory(conf, tbl);
+ if (keyFactory != null) {
+ keyFactory.init(this, tbl);
+ }
+ return keyFactory;
+ } catch (Exception e) {
+ throw new SerDeException(e);
+ }
+ }
+
+ private static HBaseKeyFactory createKeyFactory(Configuration job, Properties tbl)
+ throws Exception {
+ String factoryClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_FACTORY);
+ if (factoryClassName != null) {
+ Class<?> factoryClazz = Class.forName(factoryClassName);
+ return (HBaseKeyFactory) ReflectionUtils.newInstance(factoryClazz, job);
+ }
+ String keyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
+ if (keyClassName != null) {
+ Class<?> keyClass = Class.forName(keyClassName);
+ return new CompositeHBaseKeyFactory(keyClass);
+ }
+ return new DefaultHBaseKeyFactory();
+ }
+
+ private List<HBaseValueFactory> initValueFactories(Configuration conf, Properties tbl)
+ throws SerDeException {
+ List<HBaseValueFactory> valueFactories = createValueFactories(conf, tbl);
+
+ for (HBaseValueFactory valueFactory : valueFactories) {
+ valueFactory.init(this, conf, tbl);
+ }
+
+ return valueFactories;
+ }
+
+ private List<HBaseValueFactory> createValueFactories(Configuration conf, Properties tbl)
+ throws SerDeException {
+ List<HBaseValueFactory> valueFactories = new ArrayList<HBaseValueFactory>();
+
+ try {
+ for (int i = 0; i < columnMappings.size(); i++) {
+ String serType = getSerializationType(conf, tbl, columnMappings.getColumnsMapping()[i]);
+
+ if (serType != null && serType.equals(AVRO_SERIALIZATION_TYPE)) {
+ Schema schema = getSchema(conf, tbl, columnMappings.getColumnsMapping()[i]);
+ valueFactories.add(new AvroHBaseValueFactory(schema));
+ } else {
+ valueFactories.add(new DefaultHBaseValueFactory());
+ }
+ }
+ } catch (Exception e) {
+ throw new SerDeException(e);
+ }
+
+ return valueFactories;
+ }
+
+ /**
+ * Get the type for the given {@link ColumnMapping colMap}
+ * */
+ private String getSerializationType(Configuration conf, Properties tbl,
+ ColumnMapping colMap) throws Exception {
+ String serType = null;
+
+ if (colMap.qualifierName == null) {
+ // only a column family
+
+ if (colMap.qualifierPrefix != null) {
+ serType = tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + HBaseSerDe.SERIALIZATION_TYPE);
+ } else {
+ serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE);
+ }
+ } else if (!colMap.hbaseRowKey) {
+ // not an hbase row key. This should either be a prefix or an individual qualifier
+ String qualifierName = colMap.qualifierName;
+
+ if (colMap.qualifierName.endsWith("*")) {
+ qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1);
+ }
+
+ serType =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+ + HBaseSerDe.SERIALIZATION_TYPE);
+ }
+
+ return serType;
+ }
+
+ private Schema getSchema(Configuration conf, Properties tbl, ColumnMapping colMap)
+ throws Exception {
+ String serType = null;
+ String serClassName = null;
+ String schemaLiteral = null;
+ String schemaUrl = null;
+
+ if (colMap.qualifierName == null) {
+ // only a column family
+
+ if (colMap.qualifierPrefix != null) {
+ serType =
+ tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + HBaseSerDe.SERIALIZATION_TYPE);
+
+ serClassName =
+ tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + serdeConstants.SERIALIZATION_CLASS);
+
+ schemaLiteral =
+ tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + AvroSerdeUtils.SCHEMA_LITERAL);
+
+ schemaUrl =
+ tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + AvroSerdeUtils.SCHEMA_URL);
+ } else {
+ serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE);
+
+ serClassName =
+ tbl.getProperty(colMap.familyName + "." + serdeConstants.SERIALIZATION_CLASS);
+
+ schemaLiteral = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_LITERAL);
+
+ schemaUrl = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_URL);
+ }
+ } else if (!colMap.hbaseRowKey) {
+ // not an hbase row key. This should either be a prefix or an individual qualifier
+ String qualifierName = colMap.qualifierName;
+
+ if (colMap.qualifierName.endsWith("*")) {
+ qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1);
+ }
+
+ serType =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+ + HBaseSerDe.SERIALIZATION_TYPE);
+
+ serClassName =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+ + serdeConstants.SERIALIZATION_CLASS);
+
+ schemaLiteral =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+ + AvroSerdeUtils.SCHEMA_LITERAL);
+
+ schemaUrl =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "." + AvroSerdeUtils.SCHEMA_URL);
+ }
+
+ String avroSchemaRetClass = tbl.getProperty(AvroSerdeUtils.SCHEMA_RETRIEVER);
+
+ if (schemaLiteral == null && serClassName == null && schemaUrl == null
+ && avroSchemaRetClass == null) {
+ throw new IllegalArgumentException("serialization.type was set to [" + serType
+ + "] but neither " + AvroSerdeUtils.SCHEMA_LITERAL + ", " + AvroSerdeUtils.SCHEMA_URL
+ + ", serialization.class or " + AvroSerdeUtils.SCHEMA_RETRIEVER + " property was set");
+ }
+
+ Class<?> deserializerClass = null;
+
+ if (serClassName != null) {
+ deserializerClass = conf.getClassByName(serClassName);
+ }
+
+ Schema schema = null;
+
+ // only worry about getting schema if we are dealing with Avro
+ if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) {
+ if (avroSchemaRetClass == null) {
+ // bother about generating a schema only if a schema retriever class wasn't provided
+ if (schemaLiteral != null) {
+ schema = Schema.parse(schemaLiteral);
+ } else if (schemaUrl != null) {
+ schema = HBaseSerDeHelper.getSchemaFromFS(schemaUrl, conf);
+ } else if (deserializerClass != null) {
+ schema = ReflectData.get().getSchema(deserializerClass);
+ }
+ }
+ }
+
+ return schema;
+ }
+}
\ No newline at end of file
Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java Wed Sep 10 21:41:16 2014
@@ -149,8 +149,16 @@ public class LazyHBaseCellMap extends La
}
if (keyI.equals(key)) {
// Got a match, return the value
- LazyObject<?> v = (LazyObject<?>) entry.getValue();
- return v == null ? v : v.getObject();
+ Object _value = entry.getValue();
+
+ // If the given value is a type of LazyObject, then only try and convert it to that form.
+ // Else return it as it is.
+ if (_value instanceof LazyObject) {
+ LazyObject<?> v = (LazyObject<?>) entry.getValue();
+ return v == null ? v : v.getObject();
+ } else {
+ return _value;
+ }
}
}
Modified: hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java (original)
+++ hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java Wed Sep 10 21:41:16 2014
@@ -22,23 +22,45 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Properties;
import junit.framework.TestCase;
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.io.Encoder;
+import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hive.hbase.avro.Address;
+import org.apache.hadoop.hive.hbase.avro.ContactInfo;
+import org.apache.hadoop.hive.hbase.avro.Employee;
+import org.apache.hadoop.hive.hbase.avro.Gender;
+import org.apache.hadoop.hive.hbase.avro.HomePhone;
+import org.apache.hadoop.hive.hbase.avro.OfficePhone;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.BooleanWritable;
@@ -53,6 +75,66 @@ import org.apache.thrift.TException;
*/
public class TestHBaseSerDe extends TestCase {
+ static final byte[] TEST_BYTE_ARRAY = Bytes.toBytes("test");
+
+ private static final String RECORD_SCHEMA = "{\n" +
+ " \"namespace\": \"testing.test.mctesty\",\n" +
+ " \"name\": \"oneRecord\",\n" +
+ " \"type\": \"record\",\n" +
+ " \"fields\": [\n" +
+ " {\n" +
+ " \"name\":\"aRecord\",\n" +
+ " \"type\":{\"type\":\"record\",\n" +
+ " \"name\":\"recordWithinARecord\",\n" +
+ " \"fields\": [\n" +
+ " {\n" +
+ " \"name\":\"int1\",\n" +
+ " \"type\":\"int\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"boolean1\",\n" +
+ " \"type\":\"boolean\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"long1\",\n" +
+ " \"type\":\"long\"\n" +
+ " }\n" +
+ " ]}\n" +
+ " }\n" +
+ " ]\n" +
+ "}";
+
+ private static final String RECORD_SCHEMA_EVOLVED = "{\n" +
+ " \"namespace\": \"testing.test.mctesty\",\n" +
+ " \"name\": \"oneRecord\",\n" +
+ " \"type\": \"record\",\n" +
+ " \"fields\": [\n" +
+ " {\n" +
+ " \"name\":\"aRecord\",\n" +
+ " \"type\":{\"type\":\"record\",\n" +
+ " \"name\":\"recordWithinARecord\",\n" +
+ " \"fields\": [\n" +
+ " {\n" +
+ " \"name\":\"int1\",\n" +
+ " \"type\":\"int\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"string1\",\n" +
+ " \"type\":\"string\", \"default\": \"test\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"boolean1\",\n" +
+ " \"type\":\"boolean\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\":\"long1\",\n" +
+ " \"type\":\"long\"\n" +
+ " }\n" +
+ " ]}\n" +
+ " }\n" +
+ " ]\n" +
+ "}";
+
/**
* Test the default behavior of the Lazy family of objects and object inspectors.
*/
@@ -551,7 +633,7 @@ public class TestHBaseSerDe extends Test
"key,valint,valbyte,valshort,vallong,valfloat,valdouble,valbool");
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
"string:map<int,int>:map<tinyint,tinyint>:map<smallint,smallint>:map<bigint,bigint>:"
- + "map<float,float>:map<double,double>:map<boolean,boolean>");
+ + "map<float,float>:map<double,double>:map<boolean,boolean>");
tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
":key#-,cf-int:#b:b,cf-byte:#b:b,cf-short:#b:b,cf-long:#b:b,cf-float:#b:b,cf-double:#b:b," +
"cf-bool:#b:b");
@@ -565,7 +647,7 @@ public class TestHBaseSerDe extends Test
"key,valint,valbyte,valshort,vallong,valfloat,valdouble,valbool");
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
"string:map<int,int>:map<tinyint,tinyint>:map<smallint,smallint>:map<bigint,bigint>:"
- + "map<float,float>:map<double,double>:map<boolean,boolean>");
+ + "map<float,float>:map<double,double>:map<boolean,boolean>");
tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
":key#-,cf-int:#-:-,cf-byte:#-:-,cf-short:#-:-,cf-long:#-:-,cf-float:#-:-,cf-double:#-:-," +
"cf-bool:#-:-");
@@ -636,7 +718,7 @@ public class TestHBaseSerDe extends Test
"key,valbyte,valshort,valint,vallong,valfloat,valdouble,valstring,valbool");
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
"string:map<tinyint,tinyint>:map<smallint,smallint>:map<int,int>:map<bigint,bigint>:"
- + "map<float,float>:map<double,double>:map<string,string>:map<boolean,boolean>");
+ + "map<float,float>:map<double,double>:map<string,string>:map<boolean,boolean>");
tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
":key#s,cf-byte:#-:s,cf-short:#s:-,cf-int:#s:s,cf-long:#-:-,cf-float:#s:-,cf-double:#-:s," +
"cf-string:#s:s,cf-bool:#-:-");
@@ -650,7 +732,7 @@ public class TestHBaseSerDe extends Test
"key,valbyte,valshort,valint,vallong,valfloat,valdouble,valstring,valbool");
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
"string:map<tinyint,tinyint>:map<smallint,smallint>:map<int,int>:map<bigint,bigint>:"
- + "map<float,float>:map<double,double>:map<string,string>:map<boolean,boolean>");
+ + "map<float,float>:map<double,double>:map<string,string>:map<boolean,boolean>");
tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
":key#s,cf-byte:#s:s,cf-short:#s:s,cf-int:#s:s,cf-long:#s:s,cf-float:#s:s,cf-double:#s:s," +
"cf-string:#s:s,cf-bool:#s:s");
@@ -934,6 +1016,592 @@ public class TestHBaseSerDe extends Test
assertEquals("Serialized put:", p.toString(), put.toString());
}
+ public void testHBaseSerDeWithAvroSchemaInline() throws SerDeException, IOException {
+ byte[] cfa = "cola".getBytes();
+
+ byte[] qualAvro = "avro".getBytes();
+
+ byte[] rowKey = Bytes.toBytes("test-row1");
+
+ // Data
+ List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+ byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA);
+
+ kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+ Result r = new Result(kvs);
+
+ Put p = new Put(rowKey);
+
+ // Post serialization, separators are automatically inserted between different fields in the
+ // struct. Currently there is not way to disable that. So the work around here is to pad the
+ // data with the separator bytes before creating a "Put" object
+ p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+ Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")};
+
+ // Create, initialize, and test the SerDe
+ HBaseSerDe serDe = new HBaseSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createPropertiesForHiveAvroSchemaInline();
+ serDe.initialize(conf, tbl);
+
+ deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+ }
+
+ private Properties createPropertiesForHiveAvroSchemaInline() {
+ Properties tbl = new Properties();
+ tbl.setProperty("cola.avro.serialization.type", "avro");
+ tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_LITERAL, RECORD_SCHEMA);
+ tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
+ tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+
+ return tbl;
+ }
+
+ public void testHBaseSerDeWithForwardEvolvedSchema() throws SerDeException, IOException {
+ byte[] cfa = "cola".getBytes();
+
+ byte[] qualAvro = "avro".getBytes();
+
+ byte[] rowKey = Bytes.toBytes("test-row1");
+
+ // Data
+ List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+ byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA);
+
+ kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+ Result r = new Result(kvs);
+
+ Put p = new Put(rowKey);
+
+ // Post serialization, separators are automatically inserted between different fields in the
+ // struct. Currently there is not way to disable that. So the work around here is to pad the
+ // data with the separator bytes before creating a "Put" object
+ p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+ Object[] expectedFieldsData = {new String("test-row1"),
+ new String("[[42, test, true, 42432234234]]")};
+
+ // Create, initialize, and test the SerDe
+ HBaseSerDe serDe = new HBaseSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createPropertiesForHiveAvroForwardEvolvedSchema();
+ serDe.initialize(conf, tbl);
+
+ deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+ }
+
+ private Properties createPropertiesForHiveAvroForwardEvolvedSchema() {
+ Properties tbl = new Properties();
+ tbl.setProperty("cola.avro.serialization.type", "avro");
+ tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_LITERAL, RECORD_SCHEMA_EVOLVED);
+ tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
+ tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+
+ return tbl;
+ }
+
+ public void testHBaseSerDeWithBackwardEvolvedSchema() throws SerDeException, IOException {
+ byte[] cfa = "cola".getBytes();
+
+ byte[] qualAvro = "avro".getBytes();
+
+ byte[] rowKey = Bytes.toBytes("test-row1");
+
+ // Data
+ List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+ byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA_EVOLVED);
+
+ kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+ Result r = new Result(kvs);
+
+ Put p = new Put(rowKey);
+
+ // Post serialization, separators are automatically inserted between different fields in the
+ // struct. Currently there is not way to disable that. So the work around here is to pad the
+ // data with the separator bytes before creating a "Put" object
+ p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+ Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")};
+
+ // Create, initialize, and test the SerDe
+ HBaseSerDe serDe = new HBaseSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createPropertiesForHiveAvroBackwardEvolvedSchema();
+ serDe.initialize(conf, tbl);
+
+ deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+ }
+
+ private Properties createPropertiesForHiveAvroBackwardEvolvedSchema() {
+ Properties tbl = new Properties();
+ tbl.setProperty("cola.avro.serialization.type", "avro");
+ tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_LITERAL, RECORD_SCHEMA);
+ tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
+ tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+
+ return tbl;
+ }
+
+ public void testHBaseSerDeWithAvroSerClass() throws SerDeException, IOException {
+ byte[] cfa = "cola".getBytes();
+
+ byte[] qualAvro = "avro".getBytes();
+
+ byte[] rowKey = Bytes.toBytes("test-row1");
+
+ // Data
+ List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+ byte[] avroData = getTestAvroBytesFromClass1(1);
+
+ kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+ Result r = new Result(kvs);
+
+ Put p = new Put(rowKey);
+
+ // Post serialization, separators are automatically inserted between different fields in the
+ // struct. Currently there is not way to disable that. So the work around here is to pad the
+ // data with the separator bytes before creating a "Put" object
+ p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+ Object[] expectedFieldsData = {
+ new String("test-row1"),
+ new String(
+ "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], "
+ + "[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], "
+ + "[999, 1234567890], [999, 1234455555]]]")};
+
+ // Create, initialize, and test the SerDe
+ HBaseSerDe serDe = new HBaseSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createPropertiesForHiveAvroSerClass();
+ serDe.initialize(conf, tbl);
+
+ deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+ }
+
+ private Properties createPropertiesForHiveAvroSerClass() {
+ Properties tbl = new Properties();
+ tbl.setProperty("cola.avro.serialization.type", "avro");
+ tbl.setProperty("cola.avro." + serdeConstants.SERIALIZATION_CLASS,
+ "org.apache.hadoop.hive.hbase.avro.Employee");
+ tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
+ tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+
+ return tbl;
+ }
+
+ public void testHBaseSerDeWithAvroSchemaUrl() throws SerDeException, IOException {
+ byte[] cfa = "cola".getBytes();
+
+ byte[] qualAvro = "avro".getBytes();
+
+ byte[] rowKey = Bytes.toBytes("test-row1");
+
+ // Data
+ List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+ byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA);
+
+ kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+ Result r = new Result(kvs);
+
+ Put p = new Put(rowKey);
+
+ // Post serialization, separators are automatically inserted between different fields in the
+ // struct. Currently there is not way to disable that. So the work around here is to pad the
+ // data with the separator bytes before creating a "Put" object
+ p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+ Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")};
+
+ MiniDFSCluster miniDfs = null;
+
+ try {
+ // MiniDFSCluster litters files and folders all over the place.
+ miniDfs = new MiniDFSCluster(new Configuration(), 1, true, null);
+
+ miniDfs.getFileSystem().mkdirs(new Path("/path/to/schema"));
+ FSDataOutputStream out = miniDfs.getFileSystem().create(
+ new Path("/path/to/schema/schema.avsc"));
+ out.writeBytes(RECORD_SCHEMA);
+ out.close();
+ String onHDFS = miniDfs.getFileSystem().getUri() + "/path/to/schema/schema.avsc";
+
+ // Create, initialize, and test the SerDe
+ HBaseSerDe serDe = new HBaseSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createPropertiesForHiveAvroSchemaUrl(onHDFS);
+ serDe.initialize(conf, tbl);
+
+ deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+ } finally {
+ // Teardown the cluster
+ if (miniDfs != null) {
+ miniDfs.shutdown();
+ }
+ }
+ }
+
+ private Properties createPropertiesForHiveAvroSchemaUrl(String schemaUrl) {
+ Properties tbl = new Properties();
+ tbl.setProperty("cola.avro.serialization.type", "avro");
+ tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_URL, schemaUrl);
+ tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
+ tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+
+ return tbl;
+ }
+
+ public void testHBaseSerDeWithAvroExternalSchema() throws SerDeException, IOException {
+ byte[] cfa = "cola".getBytes();
+
+ byte[] qualAvro = "avro".getBytes();
+
+ byte[] rowKey = Bytes.toBytes("test-row1");
+
+ // Data
+ List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+ byte[] avroData = getTestAvroBytesFromClass2(1);
+
+ kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+ Result r = new Result(kvs);
+
+ Put p = new Put(rowKey);
+
+ // Post serialization, separators are automatically inserted between different fields in the
+ // struct. Currently there is not way to disable that. So the work around here is to pad the
+ // data with the separator bytes before creating a "Put" object
+ p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+ Object[] expectedFieldsData = {
+ new String("test-row1"),
+ new String(
+ "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], "
+ + "[999, 1234567890], [999, 1234455555]]]")};
+
+ // Create, initialize, and test the SerDe
+ HBaseSerDe serDe = new HBaseSerDe();
+ Configuration conf = new Configuration();
+
+ Properties tbl = createPropertiesForHiveAvroExternalSchema();
+ serDe.initialize(conf, tbl);
+
+ deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+ }
+
+ private Properties createPropertiesForHiveAvroExternalSchema() {
+ Properties tbl = new Properties();
+ tbl.setProperty("cola.avro.serialization.type", "avro");
+ tbl.setProperty(AvroSerdeUtils.SCHEMA_RETRIEVER,
+ "org.apache.hadoop.hive.hbase.HBaseTestAvroSchemaRetriever");
+ tbl.setProperty("cola.avro." + serdeConstants.SERIALIZATION_CLASS,
+ "org.apache.hadoop.hive.hbase.avro.Employee");
+ tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
+ tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+
+ return tbl;
+ }
+
+ public void testHBaseSerDeWithHiveMapToHBaseAvroColumnFamily() throws Exception {
+ byte[] cfa = "cola".getBytes();
+
+ byte[] qualAvroA = "prefixA_avro1".getBytes();
+ byte[] qualAvroB = "prefixB_avro2".getBytes();
+ byte[] qualAvroC = "prefixB_avro3".getBytes();
+
+ List<Object> qualifiers = new ArrayList<Object>();
+ qualifiers.add(new Text("prefixA_avro1"));
+ qualifiers.add(new Text("prefixB_avro2"));
+ qualifiers.add(new Text("prefixB_avro3"));
+
+ List<Object> expectedQualifiers = new ArrayList<Object>();
+ expectedQualifiers.add(new Text("prefixB_avro2"));
+ expectedQualifiers.add(new Text("prefixB_avro3"));
+
+ byte[] rowKey = Bytes.toBytes("test-row1");
+
+ // Data
+ List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+ byte[] avroDataA = getTestAvroBytesFromSchema(RECORD_SCHEMA);
+ byte[] avroDataB = getTestAvroBytesFromClass1(1);
+ byte[] avroDataC = getTestAvroBytesFromClass1(2);
+
+ kvs.add(new KeyValue(rowKey, cfa, qualAvroA, avroDataA));
+ kvs.add(new KeyValue(rowKey, cfa, qualAvroB, avroDataB));
+ kvs.add(new KeyValue(rowKey, cfa, qualAvroC, avroDataC));
+
+ Result r = new Result(kvs);
+
+ Put p = new Put(rowKey);
+
+ // Post serialization, separators are automatically inserted between different fields in the
+ // struct. Currently there is not way to disable that. So the work around here is to pad the
+ // data with the separator bytes before creating a "Put" object
+ p.add(new KeyValue(rowKey, cfa, qualAvroB, Bytes.padTail(avroDataB, 11)));
+ p.add(new KeyValue(rowKey, cfa, qualAvroC, Bytes.padTail(avroDataC, 11)));
+
+ Object[] expectedFieldsData = {
+ new Text("test-row1"),
+ new String(
+ "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], "
+ + "[999, 1234567890], [999, 1234455555]]]"),
+ new String(
+ "[Avro Employee2, 11111, 25, FEMALE, [[[Avro First Address2, Avro Second Address2, Avro City2, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address2, Avro Second Address2, Avro City2, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], "
+ + "[999, 1234567890], [999, 1234455555]]]")};
+
+ int[] expectedMapSize = new int[] {2};
+
+ // Create, initialize, and test the SerDe
+ HBaseSerDe serDe = new HBaseSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createPropertiesForHiveAvroColumnFamilyMap();
+ serDe.initialize(conf, tbl);
+
+ Object notPresentKey = new Text("prefixA_avro1");
+
+ deserializeAndSerializeHiveStructColumnFamily(serDe, r, p, expectedFieldsData, expectedMapSize,
+ expectedQualifiers,
+ notPresentKey);
+ }
+
+ private Properties createPropertiesForHiveAvroColumnFamilyMap() {
+ Properties tbl = new Properties();
+ tbl.setProperty("cola.prefixB_.serialization.type", "avro");
+ tbl.setProperty("cola.prefixB_." + serdeConstants.SERIALIZATION_CLASS,
+ "org.apache.hadoop.hive.hbase.avro.Employee");
+ tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, "cola:prefixB_.*");
+ tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+ tbl.setProperty(LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS, "true");
+
+ return tbl;
+ }
+
+ private void deserializeAndSerializeHiveAvro(HBaseSerDe serDe, Result r, Put p,
+ Object[] expectedFieldsData)
+ throws SerDeException, IOException {
+ StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
+
+ List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
+
+ Object row = serDe.deserialize(new ResultWritable(r));
+
+ for (int j = 0; j < fieldRefs.size(); j++) {
+ Object fieldData = soi.getStructFieldData(row, fieldRefs.get(j));
+ assertNotNull(fieldData);
+ assertEquals(expectedFieldsData[j], fieldData.toString().trim());
+ }
+
+ // Now serialize
+ Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
+
+ assertNotNull(put);
+ assertEquals(p.getFamilyCellMap(), put.getFamilyCellMap());
+ }
+
+ private void deserializeAndSerializeHiveStructColumnFamily(HBaseSerDe serDe, Result r, Put p,
+ Object[] expectedFieldsData,
+ int[] expectedMapSize, List<Object> expectedQualifiers, Object notPresentKey)
+ throws SerDeException, IOException {
+ StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
+
+ List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
+
+ Object row = serDe.deserialize(new ResultWritable(r));
+
+ int k = 0;
+
+ for (int i = 0; i < fieldRefs.size(); i++) {
+ Object fieldData = soi.getStructFieldData(row, fieldRefs.get(i));
+ assertNotNull(fieldData);
+
+ if (fieldData instanceof LazyPrimitive<?, ?>) {
+ assertEquals(expectedFieldsData[i], ((LazyPrimitive<?, ?>) fieldData).getWritableObject());
+ } else if (fieldData instanceof LazyHBaseCellMap) {
+
+ for (int j = 0; j < ((LazyHBaseCellMap) fieldData).getMapSize(); j++) {
+ assertEquals(expectedFieldsData[k + 1],
+ ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedQualifiers.get(k))
+ .toString().trim());
+ k++;
+ }
+
+ assertEquals(expectedMapSize[i - 1], ((LazyHBaseCellMap) fieldData).getMapSize());
+
+ // Make sure that the unwanted key is not present in the map
+ assertNull(((LazyHBaseCellMap) fieldData).getMapValueElement(notPresentKey));
+
+ } else {
+ fail("Error: field data not an instance of LazyPrimitive<?, ?> or LazyHBaseCellMap");
+ }
+ }
+
+ SerDeUtils.getJSONString(row, soi);
+
+ // Now serialize
+ Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
+
+ assertNotNull(put);
+ }
+
+ private byte[] getTestAvroBytesFromSchema(String schemaToUse) throws IOException {
+ Schema s = Schema.parse(schemaToUse);
+ GenericData.Record record = new GenericData.Record(s);
+ GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema());
+ innerRecord.put("int1", 42);
+ innerRecord.put("boolean1", true);
+ innerRecord.put("long1", 42432234234l);
+
+ if (schemaToUse.equals(RECORD_SCHEMA_EVOLVED)) {
+ innerRecord.put("string1", "new value");
+ }
+
+ record.put("aRecord", innerRecord);
+
+ DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(s);
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+
+ DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
+ dataFileWriter.create(s, out);
+ dataFileWriter.append(record);
+ dataFileWriter.close();
+
+ byte[] data = out.toByteArray();
+
+ out.close();
+ return data;
+ }
+
+ private byte[] getTestAvroBytesFromClass1(int i) throws IOException {
+ Employee employee = new Employee();
+
+ employee.setEmployeeName("Avro Employee" + i);
+ employee.setEmployeeID(11111L);
+ employee.setGender(Gender.FEMALE);
+ employee.setAge(25L);
+
+ Address address = new Address();
+
+ address.setAddress1("Avro First Address" + i);
+ address.setAddress2("Avro Second Address" + i);
+ address.setCity("Avro City" + i);
+ address.setZipcode(123456L);
+
+ Map<CharSequence, CharSequence> metadata = new HashMap<CharSequence, CharSequence>();
+
+ metadata.put("testkey", "testvalue");
+
+ address.setMetadata(metadata);
+
+ HomePhone hPhone = new HomePhone();
+
+ hPhone.setAreaCode(999L);
+ hPhone.setNumber(1234567890L);
+
+ OfficePhone oPhone = new OfficePhone();
+
+ oPhone.setAreaCode(999L);
+ oPhone.setNumber(1234455555L);
+
+ ContactInfo contact = new ContactInfo();
+
+ List<Address> addresses = new ArrayList<Address>();
+ address.setCounty(hPhone); // set value for the union type
+ addresses.add(address);
+ addresses.add(address);
+
+ contact.setAddress(addresses);
+
+ contact.setHomePhone(hPhone);
+ contact.setOfficePhone(oPhone);
+
+ employee.setContactInfo(contact);
+
+ DatumWriter<Employee> datumWriter = new SpecificDatumWriter<Employee>(Employee.class);
+ DataFileWriter<Employee> dataFileWriter = new DataFileWriter<Employee>(datumWriter);
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+
+ dataFileWriter.create(employee.getSchema(), out);
+ dataFileWriter.append(employee);
+ dataFileWriter.close();
+
+ return out.toByteArray();
+ }
+
+ private byte[] getTestAvroBytesFromClass2(int i) throws IOException {
+ Employee employee = new Employee();
+
+ employee.setEmployeeName("Avro Employee" + i);
+ employee.setEmployeeID(11111L);
+ employee.setGender(Gender.FEMALE);
+ employee.setAge(25L);
+
+ Address address = new Address();
+
+ address.setAddress1("Avro First Address" + i);
+ address.setAddress2("Avro Second Address" + i);
+ address.setCity("Avro City" + i);
+ address.setZipcode(123456L);
+
+ Map<CharSequence, CharSequence> metadata = new HashMap<CharSequence, CharSequence>();
+
+ metadata.put("testkey", "testvalue");
+
+ address.setMetadata(metadata);
+
+ HomePhone hPhone = new HomePhone();
+
+ hPhone.setAreaCode(999L);
+ hPhone.setNumber(1234567890L);
+
+ OfficePhone oPhone = new OfficePhone();
+
+ oPhone.setAreaCode(999L);
+ oPhone.setNumber(1234455555L);
+
+ ContactInfo contact = new ContactInfo();
+
+ List<Address> addresses = new ArrayList<Address>();
+ address.setCounty(hPhone); // set value for the union type
+ addresses.add(address);
+ addresses.add(address);
+
+ contact.setAddress(addresses);
+
+ contact.setHomePhone(hPhone);
+ contact.setOfficePhone(oPhone);
+
+ employee.setContactInfo(contact);
+
+ DatumWriter<Employee> employeeWriter = new SpecificDatumWriter<Employee>(Employee.class);
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+
+ Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
+
+ // write out a header for the payload
+ out.write(TEST_BYTE_ARRAY);
+
+ employeeWriter.write(employee, encoder);
+
+ encoder.flush();
+
+ return out.toByteArray();
+ }
+
class TestStruct {
String f1;
String f2;
Modified: hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java (original)
+++ hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java Wed Sep 10 21:41:16 2014
@@ -46,6 +46,7 @@ import org.junit.Test;
public class TestJdbcWithMiniHS2 {
private static MiniHS2 miniHS2 = null;
private static Path dataFilePath;
+ private static final String tmpDir = System.getProperty("test.tmp.dir");
private Connection hs2Conn = null;
@@ -303,7 +304,7 @@ public class TestJdbcWithMiniHS2 {
* @throws Exception
*/
@Test
- public void testScratchDirs() throws Exception {
+ public void testSessionScratchDirs() throws Exception {
// Stop HiveServer2
if (miniHS2.isStarted()) {
miniHS2.stop();
@@ -314,7 +315,7 @@ public class TestJdbcWithMiniHS2 {
// 1. Test with doAs=false
conf.setBoolean("hive.server2.enable.doAs", false);
// Set a custom prefix for hdfs scratch dir path
- conf.set("hive.exec.scratchdir", "/tmp/hs2");
+ conf.set("hive.exec.scratchdir", tmpDir + "/hs2");
// Set a scratch dir permission
String fsPermissionStr = "700";
conf.set("hive.scratch.dir.permission", fsPermissionStr);
@@ -326,19 +327,21 @@ public class TestJdbcWithMiniHS2 {
hs2Conn = getConnection(miniHS2.getJdbcURL(), userName, "password");
// FS
FileSystem fs = miniHS2.getLocalFS();
+ FsPermission expectedFSPermission = new FsPermission(HiveConf.getVar(conf,
+ HiveConf.ConfVars.SCRATCHDIRPERMISSION));
// Verify scratch dir paths and permission
// HDFS scratch dir
scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR) + "/" + userName);
- verifyScratchDir(conf, fs, scratchDirPath, userName, false);
+ verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, false);
// Local scratch dir
scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.LOCALSCRATCHDIR));
- verifyScratchDir(conf, fs, scratchDirPath, userName, true);
+ verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true);
// Downloaded resources dir
scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR));
- verifyScratchDir(conf, fs, scratchDirPath, userName, true);
+ verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true);
// 2. Test with doAs=true
// Restart HiveServer2 with doAs=true
@@ -356,15 +359,15 @@ public class TestJdbcWithMiniHS2 {
// Verify scratch dir paths and permission
// HDFS scratch dir
scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR) + "/" + userName);
- verifyScratchDir(conf, fs, scratchDirPath, userName, false);
+ verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, false);
// Local scratch dir
scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.LOCALSCRATCHDIR));
- verifyScratchDir(conf, fs, scratchDirPath, userName, true);
+ verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true);
// Downloaded resources dir
scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR));
- verifyScratchDir(conf, fs, scratchDirPath, userName, true);
+ verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true);
// Test for user "trinity"
userName = "trinity";
@@ -373,22 +376,61 @@ public class TestJdbcWithMiniHS2 {
// Verify scratch dir paths and permission
// HDFS scratch dir
scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR) + "/" + userName);
- verifyScratchDir(conf, fs, scratchDirPath, userName, false);
+ verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, false);
// Local scratch dir
scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.LOCALSCRATCHDIR));
- verifyScratchDir(conf, fs, scratchDirPath, userName, true);
+ verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true);
// Downloaded resources dir
scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR));
- verifyScratchDir(conf, fs, scratchDirPath, userName, true);
+ verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true);
+ }
+
+ /**
+ * Tests the creation of the root hdfs scratch dir, which should be writable by all (777).
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testRootScratchDir() throws Exception {
+ // Stop HiveServer2
+ if (miniHS2.isStarted()) {
+ miniHS2.stop();
+ }
+ HiveConf conf = new HiveConf();
+ String userName;
+ Path scratchDirPath;
+ conf.set("hive.exec.scratchdir", tmpDir + "/hs2");
+ // Start an instance of HiveServer2 which uses miniMR
+ miniHS2 = new MiniHS2(conf);
+ Map<String, String> confOverlay = new HashMap<String, String>();
+ miniHS2.start(confOverlay);
+ userName = System.getProperty("user.name");
+ hs2Conn = getConnection(miniHS2.getJdbcURL(), userName, "password");
+ // FS
+ FileSystem fs = miniHS2.getLocalFS();
+ FsPermission expectedFSPermission = new FsPermission("777");
+ // Verify scratch dir paths and permission
+ // HDFS scratch dir
+ scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR));
+ verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, false);
+ // Test with multi-level scratch dir path
+ // Stop HiveServer2
+ if (miniHS2.isStarted()) {
+ miniHS2.stop();
+ }
+ conf.set("hive.exec.scratchdir", tmpDir + "/level1/level2/level3");
+ miniHS2 = new MiniHS2(conf);
+ miniHS2.start(confOverlay);
+ hs2Conn = getConnection(miniHS2.getJdbcURL(), userName, "password");
+ scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR));
+ verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, false);
}
private void verifyScratchDir(HiveConf conf, FileSystem fs, Path scratchDirPath,
- String userName, boolean isLocal) throws Exception {
+ FsPermission expectedFSPermission, String userName, boolean isLocal) throws Exception {
String dirType = isLocal ? "Local" : "DFS";
- FsPermission expectedFSPermission = new FsPermission(HiveConf.getVar(conf,
- HiveConf.ConfVars.SCRATCHDIRPERMISSION));
assertTrue("The expected " + dirType + " scratch dir does not exist for the user: " +
userName, fs.exists(scratchDirPath));
if (fs.exists(scratchDirPath) && !isLocal) {
Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java Wed Sep 10 21:41:16 2014
@@ -210,14 +210,6 @@ public class HiveMetaStoreClient impleme
} catch (Exception e) {
MetaStoreUtils.logAndThrowMetaException(e);
}
- } else if (conf.getVar(HiveConf.ConfVars.METASTOREDIRECTORY) != null) {
- metastoreUris = new URI[1];
- try {
- metastoreUris[0] = new URI(conf
- .getVar(HiveConf.ConfVars.METASTOREDIRECTORY));
- } catch (URISyntaxException e) {
- MetaStoreUtils.logAndThrowMetaException(e);
- }
} else {
LOG.error("NOT getting uris from conf");
throw new MetaException("MetaStoreURIs not found in conf file");
@@ -1289,7 +1281,7 @@ public class HiveMetaStoreClient impleme
InvalidInputException{
return client.update_partition_column_statistics(statsObj);
}
-
+
/** {@inheritDoc} */
public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request)
throws NoSuchObjectException, InvalidObjectException, MetaException, TException,