You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/09/09 17:18:30 UTC
svn commit: r1623845 [1/3] - in /hive/trunk: hbase-handler/
hbase-handler/if/ hbase-handler/if/test/
hbase-handler/src/java/org/apache/hadoop/hive/hbase/
hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/
hbase-handler/src/test/org/apache/hado...
Author: brock
Date: Tue Sep 9 15:18:29 2014
New Revision: 1623845
URL: http://svn.apache.org/r1623845
Log:
HIVE-6147 - Support avro data stored in HBase columns (Swarnim Kulkarni via Brock)
Added:
hive/trunk/hbase-handler/if/
hive/trunk/hbase-handler/if/test/
hive/trunk/hbase-handler/if/test/avro_test.avpr
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java
hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java
hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/
hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Address.java
hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/ContactInfo.java
hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Employee.java
hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/EmployeeAvro.java
hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Gender.java
hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/HomePhone.java
hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java
Modified:
hive/trunk/hbase-handler/pom.xml
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java
hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java
hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
Added: hive/trunk/hbase-handler/if/test/avro_test.avpr
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/if/test/avro_test.avpr?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/if/test/avro_test.avpr (added)
+++ hive/trunk/hbase-handler/if/test/avro_test.avpr Tue Sep 9 15:18:29 2014
@@ -0,0 +1,144 @@
+{
+"protocol": "EmployeeAvro",
+"namespace": "org.apache.hadoop.hive.hbase.avro",
+"types": [
+{
+"type": "enum",
+"name": "Gender",
+"symbols": [
+"MALE",
+"FEMALE"
+]
+},
+{
+"type": "record",
+"name": "HomePhone",
+"fields": [
+{
+"name": "areaCode",
+"type": "long"
+},
+{
+"name": "number",
+"type": "long"
+}
+]
+},
+{
+"type": "record",
+"name": "OfficePhone",
+"fields": [
+{
+"name": "areaCode",
+"type": "long"
+},
+{
+"name": "number",
+"type": "long"
+}
+]
+},
+{
+"type": "record",
+"name": "Address",
+"fields": [
+{
+"name": "address1",
+"type": "string"
+},
+{
+"name": "address2",
+"type": "string"
+},
+{
+"name": "city",
+"type": "string"
+},
+{
+"name": "zipcode",
+"type": "long"
+},
+{
+"name": "county",
+"type": [
+"HomePhone",
+"OfficePhone",
+"string",
+"null"
+]
+},
+{
+"name": "aliases",
+"type": [
+{
+"type": "array",
+"items": "string"
+},
+"null"
+]
+},
+{
+"name": "metadata",
+"type": [
+"null",
+{
+"type": "map",
+"values": "string"
+}
+]
+}
+]
+},
+{
+"type": "record",
+"name": "ContactInfo",
+"fields": [
+{
+"name": "address",
+"type": [
+{
+"type": "array",
+"items": "Address"
+},
+"null"
+]
+},
+{
+"name": "homePhone",
+"type": "HomePhone"
+},
+{
+"name": "officePhone",
+"type": "OfficePhone"
+}
+]
+},
+{
+"type": "record",
+"name": "Employee",
+"fields": [
+{
+"name": "employeeName",
+"type": "string"
+},
+{
+"name": "employeeID",
+"type": "long"
+},
+{
+"name": "age",
+"type": "long"
+},
+{
+"name": "gender",
+"type": "Gender"
+},
+{
+"name": "contactInfo",
+"type": "ContactInfo"
+}
+]
+}
+],
+"messages": { }
+}
Modified: hive/trunk/hbase-handler/pom.xml
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/pom.xml?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/pom.xml (original)
+++ hive/trunk/hbase-handler/pom.xml Tue Sep 9 15:18:29 2014
@@ -57,6 +57,11 @@
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro</artifactId>
+ <version>1.7.6</version>
+ </dependency>
</dependencies>
<profiles>
@@ -101,6 +106,12 @@
<version>${hbase.hadoop1.version}</version>
</dependency>
<dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>${hadoop-20S.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.hadoop1.version}</version>
@@ -134,11 +145,25 @@
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop-23.version}</version>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop-23.version}</version>
<optional>true</optional>
</dependency>
<dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>${hadoop-23.version}</version>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-hadoop2-compat</artifactId>
<version>${hbase.hadoop2.version}</version>
@@ -190,6 +215,12 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-servlet</artifactId>
+ <version>${jersey.version}</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
</profile>
</profiles>
@@ -209,6 +240,42 @@
</execution>
</executions>
</plugin>
+ <plugin>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro-maven-plugin</artifactId>
+ <version>1.7.6</version>
+ <executions>
+ <execution>
+ <phase>generate-test-sources</phase>
+ <goals>
+ <goal>protocol</goal>
+ </goals>
+ <configuration>
+ <testSourceDirectory>${project.basedir}/if/test</testSourceDirectory>
+ <testOutputDirectory>${project.basedir}/src/test</testOutputDirectory>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ <version>1.7</version>
+ <executions>
+ <execution>
+ <id>add-test-sources</id>
+ <phase>generate-test-sources</phase>
+ <goals>
+ <goal>add-test-source</goal>
+ </goals>
+ <configuration>
+ <sources>
+ <source>${project.basedir}/src/gen/avro/gen-java</source>
+ </sources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java Tue Sep 9 15:18:29 2014
@@ -23,16 +23,21 @@
package org.apache.hadoop.hive.hbase;
-import com.google.common.collect.Iterators;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
+import com.google.common.collect.Iterators;
public class ColumnMappings implements Iterable<ColumnMappings.ColumnMapping> {
@@ -53,24 +58,41 @@ public class ColumnMappings implements I
return columnsMapping.length;
}
- String toTypesString() {
+ String toNamesString(Properties tbl, String autogenerate) {
+ if (autogenerate != null && autogenerate.equals("true")) {
+ StringBuilder sb = new StringBuilder();
+ HBaseSerDeHelper.generateColumns(tbl, Arrays.asList(columnsMapping), sb);
+ return sb.toString();
+ }
+
+ return StringUtils.EMPTY; // return empty string
+ }
+
+ String toTypesString(Properties tbl, Configuration conf, String autogenerate)
+ throws SerDeException {
StringBuilder sb = new StringBuilder();
- for (ColumnMapping colMap : columnsMapping) {
- if (sb.length() > 0) {
- sb.append(":");
- }
- if (colMap.hbaseRowKey) {
- // the row key column becomes a STRING
- sb.append(serdeConstants.STRING_TYPE_NAME);
- } else if (colMap.qualifierName == null) {
- // a column family become a MAP
- sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
- + serdeConstants.STRING_TYPE_NAME + ">");
- } else {
- // an individual column becomes a STRING
- sb.append(serdeConstants.STRING_TYPE_NAME);
+
+ if (autogenerate != null && autogenerate.equals("true")) {
+ HBaseSerDeHelper.generateColumnTypes(tbl, Arrays.asList(columnsMapping), sb, conf);
+ } else {
+ for (ColumnMapping colMap : columnsMapping) {
+ if (sb.length() > 0) {
+ sb.append(":");
+ }
+ if (colMap.hbaseRowKey) {
+ // the row key column becomes a STRING
+ sb.append(serdeConstants.STRING_TYPE_NAME);
+ } else if (colMap.qualifierName == null) {
+ // a column family become a MAP
+ sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
+ + serdeConstants.STRING_TYPE_NAME + ">");
+ } else {
+ // an individual column becomes a STRING
+ sb.append(serdeConstants.STRING_TYPE_NAME);
+ }
}
}
+
return sb.toString();
}
Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java Tue Sep 9 15:18:29 2014
@@ -19,7 +19,9 @@
package org.apache.hadoop.hive.hbase;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
+import java.util.Map;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
@@ -94,4 +96,14 @@ public class HBaseCompositeKey extends L
return lazyObject;
}
+
+ /**
+ * Return the different parts of the key. By default, this returns an empty map. Consumers can
+ * choose to override this to provide their own names and types of parts of the key.
+ *
+ * @return map of parts name to their type
+ * */
+ public Map<String, String> getParts() {
+ return Collections.emptyMap();
+ }
}
Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java Tue Sep 9 15:18:29 2014
@@ -18,32 +18,31 @@
package org.apache.hadoop.hive.hbase;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import java.util.ArrayList;
-import java.util.List;
-
// Does same thing with LazyFactory#createLazyObjectInspector except that this replaces
// original keyOI with OI which is create by HBaseKeyFactory provided by serde property for hbase
public class HBaseLazyObjectFactory {
public static ObjectInspector createLazyHBaseStructInspector(
- SerDeParameters serdeParams, int index, HBaseKeyFactory factory) throws SerDeException {
+ SerDeParameters serdeParams, int index, HBaseKeyFactory keyFactory, List<HBaseValueFactory> valueFactories) throws SerDeException {
List<TypeInfo> columnTypes = serdeParams.getColumnTypes();
ArrayList<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>(
columnTypes.size());
for (int i = 0; i < columnTypes.size(); i++) {
if (i == index) {
- columnObjectInspectors.add(factory.createKeyObjectInspector(columnTypes.get(i)));
+ columnObjectInspectors.add(keyFactory.createKeyObjectInspector(columnTypes.get(i)));
} else {
- columnObjectInspectors.add(LazyFactory.createLazyObjectInspector(
- columnTypes.get(i), serdeParams.getSeparators(), 1, serdeParams.getNullSequence(),
- serdeParams.isEscaped(), serdeParams.getEscapeChar()));
+ columnObjectInspectors.add(valueFactories.get(i).createValueObjectInspector(
+ columnTypes.get(i)));
}
}
return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
@@ -51,4 +50,4 @@ public class HBaseLazyObjectFactory {
serdeParams.getNullSequence(), serdeParams.isLastColumnTakesRest(),
serdeParams.isEscaped(), serdeParams.getEscapeChar());
}
-}
+}
\ No newline at end of file
Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java Tue Sep 9 15:18:29 2014
@@ -18,6 +18,10 @@
package org.apache.hadoop.hive.hbase;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
import org.apache.hadoop.hive.serde2.ByteStream;
@@ -34,10 +38,6 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Writable;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
public class HBaseRowSerializer {
private final HBaseKeyFactory keyFactory;
@@ -279,6 +279,10 @@ public class HBaseRowSerializer {
}
}
return true;
+ case UNION: {
+ // union type currently not totally supported. See HIVE-2390
+ return false;
+ }
default:
throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
}
Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java Tue Sep 9 15:18:29 2014
@@ -18,6 +18,10 @@
package org.apache.hadoop.hive.hbase;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -25,6 +29,7 @@ import org.apache.hadoop.hbase.util.Byte
import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
@@ -33,10 +38,6 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
/**
* HBaseSerDe can be used to serialize object into an HBase table and
* deserialize objects from an HBase table.
@@ -50,15 +51,21 @@ public class HBaseSerDe extends Abstract
public static final String HBASE_KEY_COL = ":key";
public static final String HBASE_PUT_TIMESTAMP = "hbase.put.timestamp";
public static final String HBASE_COMPOSITE_KEY_CLASS = "hbase.composite.key.class";
+ public static final String HBASE_COMPOSITE_KEY_TYPES = "hbase.composite.key.types";
public static final String HBASE_COMPOSITE_KEY_FACTORY = "hbase.composite.key.factory";
public static final String HBASE_SCAN_CACHE = "hbase.scan.cache";
public static final String HBASE_SCAN_CACHEBLOCKS = "hbase.scan.cacheblock";
public static final String HBASE_SCAN_BATCH = "hbase.scan.batch";
+ public static final String HBASE_AUTOGENERATE_STRUCT = "hbase.struct.autogenerate";
/**
- * Determines whether a regex matching should be done on the columns or not. Defaults to true.
- * <strong>WARNING: Note that currently this only supports the suffix wildcard .*</strong>
+ * Determines whether a regex matching should be done on the columns or not. Defaults to true.
+ * <strong>WARNING: Note that currently this only supports the suffix wildcard .*</strong>
*/
public static final String HBASE_COLUMNS_REGEX_MATCHING = "hbase.columns.mapping.regex.matching";
+ /**
+ * Defines the type for a column.
+ **/
+ public static final String SERIALIZATION_TYPE = "serialization.type";
private ObjectInspector cachedObjectInspector;
private LazyHBaseRow cachedHBaseRow;
@@ -83,8 +90,11 @@ public class HBaseSerDe extends Abstract
throws SerDeException {
serdeParams = new HBaseSerDeParameters(conf, tbl, getClass().getName());
- cachedObjectInspector = HBaseLazyObjectFactory.createLazyHBaseStructInspector(
- serdeParams.getSerdeParams(), serdeParams.getKeyIndex(), serdeParams.getKeyFactory());
+ cachedObjectInspector =
+ HBaseLazyObjectFactory
+ .createLazyHBaseStructInspector(serdeParams.getSerdeParams(),
+ serdeParams.getKeyIndex(), serdeParams.getKeyFactory(),
+ serdeParams.getValueFactories());
cachedHBaseRow = new LazyHBaseRow(
(LazySimpleStructObjectInspector) cachedObjectInspector,
Added: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java (added)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java Tue Sep 9 15:18:29 2014
@@ -0,0 +1,557 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.hbase;
+
+import static org.apache.hadoop.hive.hbase.HBaseSerDeParameters.AVRO_SERIALIZATION_TYPE;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.avro.Schema;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.avro.AvroObjectInspectorGenerator;
+import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Helper class for {@link HBaseSerDe}
+ * */
+public class HBaseSerDeHelper {
+
+ /**
+ * Logger
+ * */
+ public static final Log LOG = LogFactory.getLog(HBaseSerDeHelper.class);
+
+ /**
+ * Autogenerates the columns from the given serialization class
+ *
+ * @param tbl the hive table properties
+ * @param columnsMapping the hbase columns mapping determining hbase column families and
+ * qualifiers
+ * @param sb StringBuilder to form the list of columns
+ * @throws IllegalArgumentException if any of the given arguments was null
+ * */
+ public static void generateColumns(Properties tbl, List<ColumnMapping> columnsMapping,
+ StringBuilder sb) {
+ // Generate the columns according to the column mapping provided
+ // Note: The generated column names are same as the
+ // family_name.qualifier_name. If the qualifier
+ // name is null, each column is familyname_col[i] where i is the index of
+ // the column ranging
+ // from 0 to n-1 where n is the size of the column mapping. The filter
+ // function removes any
+ // special characters other than alphabets and numbers from the column
+ // family and qualifier name
+ // as the only special character allowed in a column name is "_" which is
+ // used as a separator
+ // between the column family and qualifier name.
+
+ if (columnsMapping == null) {
+ throw new IllegalArgumentException("columnsMapping cannot be null");
+ }
+
+ if (sb == null) {
+ throw new IllegalArgumentException("StringBuilder cannot be null");
+ }
+
+ for (int i = 0; i < columnsMapping.size(); i++) {
+ ColumnMapping colMap = columnsMapping.get(i);
+
+ if (colMap.hbaseRowKey) {
+ sb.append("key").append(StringUtils.COMMA_STR);
+ } else if (colMap.qualifierName == null) {
+ // this corresponds to a map<string,?>
+
+ if (colMap.qualifierPrefix != null) {
+ sb.append(filter(colMap.familyName)).append("_")
+ .append(filter(colMap.qualifierPrefix) + i).append(StringUtils.COMMA_STR);
+ } else {
+ sb.append(filter(colMap.familyName)).append("_").append("col" + i)
+ .append(StringUtils.COMMA_STR);
+ }
+ } else {
+ // just an individual column
+ sb.append(filter(colMap.familyName)).append("_").append(filter(colMap.qualifierName))
+ .append(StringUtils.COMMA_STR);
+ }
+ }
+
+ // trim off the ending ",", if any
+ trim(sb);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generated columns: [" + sb.toString() + "]");
+ }
+ }
+
+ /**
+ * Autogenerates the column types from the given serialization class
+ *
+ * @param tbl the hive table properties
+ * @param columnsMapping the hbase columns mapping determining hbase column families and
+ * qualifiers
+ * @param sb StringBuilder to form the list of columns
+ * @param conf configuration
+ * @throws IllegalArgumentException if any of the given arguments was null
+ * @throws SerDeException if there was an error generating the column types
+ * */
+ public static void generateColumnTypes(Properties tbl, List<ColumnMapping> columnsMapping,
+ StringBuilder sb, Configuration conf) throws SerDeException {
+
+ if (tbl == null) {
+ throw new IllegalArgumentException("tbl cannot be null");
+ }
+
+ if (columnsMapping == null) {
+ throw new IllegalArgumentException("columnsMapping cannot be null");
+ }
+
+ if (sb == null) {
+ throw new IllegalArgumentException("StringBuilder cannot be null");
+ }
+
+ // Generate the columns according to the column mapping provided
+ for (int i = 0; i < columnsMapping.size(); i++) {
+ if (sb.length() > 0) {
+ sb.append(":");
+ }
+
+ ColumnMapping colMap = columnsMapping.get(i);
+
+ if (colMap.hbaseRowKey) {
+
+ Map<String, String> compositeKeyParts = getCompositeKeyParts(tbl);
+ StringBuilder keyStruct = new StringBuilder();
+
+ if (compositeKeyParts == null || compositeKeyParts.isEmpty()) {
+ String compKeyClass = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
+ String compKeyTypes = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_TYPES);
+
+ if (compKeyTypes == null) {
+
+ if (compKeyClass != null) {
+ // a composite key class was provided. But neither the types
+ // property was set and
+ // neither the getParts() method of HBaseCompositeKey was
+ // overidden in the
+ // implementation. Flag exception.
+ throw new SerDeException(
+ "Either the hbase.composite.key.types property should be set or the getParts method must be overridden in "
+ + compKeyClass);
+ }
+
+ // the row key column becomes a STRING
+ sb.append(serdeConstants.STRING_TYPE_NAME);
+ } else {
+ generateKeyStruct(compKeyTypes, keyStruct);
+ }
+ } else {
+ generateKeyStruct(compositeKeyParts, keyStruct);
+ }
+ sb.append(keyStruct);
+ } else if (colMap.qualifierName == null) {
+
+ String serClassName = null;
+ String serType = null;
+ String schemaLiteral = null;
+ String schemaUrl = null;
+
+ if (colMap.qualifierPrefix != null) {
+
+ serType =
+ tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + HBaseSerDe.SERIALIZATION_TYPE);
+
+ if (serType == null) {
+ throw new SerDeException(HBaseSerDe.SERIALIZATION_TYPE
+ + " property not provided for column family [" + colMap.familyName
+ + "] and prefix [" + colMap.qualifierPrefix + "]");
+ }
+
+ // we are provided with a prefix
+ serClassName =
+ tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + serdeConstants.SERIALIZATION_CLASS);
+
+ if (serClassName == null) {
+ if (serType.equalsIgnoreCase(HBaseSerDeParameters.AVRO_SERIALIZATION_TYPE)) {
+ // for avro type, the serialization class parameter is optional
+ schemaLiteral =
+ tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + AvroSerdeUtils.SCHEMA_LITERAL);
+ schemaUrl =
+ tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + AvroSerdeUtils.SCHEMA_URL);
+
+ if (schemaLiteral == null && schemaUrl == null) {
+ // either schema literal, schema url or serialization class must
+ // be provided
+ throw new SerDeException("For an avro schema, either "
+ + AvroSerdeUtils.SCHEMA_LITERAL + ", " + AvroSerdeUtils.SCHEMA_URL + " or "
+ + serdeConstants.SERIALIZATION_CLASS + " property must be set.");
+ }
+
+ if (schemaUrl != null) {
+ schemaLiteral = getSchemaFromFS(schemaUrl, conf).toString();
+ }
+
+ } else {
+ throw new SerDeException(serdeConstants.SERIALIZATION_CLASS
+ + " property not provided for column family [" + colMap.familyName
+ + "] and prefix [" + colMap.qualifierPrefix + "]");
+ }
+ }
+ } else {
+ serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE);
+
+ if (serType == null) {
+ throw new SerDeException(HBaseSerDe.SERIALIZATION_TYPE
+ + " property not provided for column family [" + colMap.familyName + "]");
+ }
+
+ serClassName =
+ tbl.getProperty(colMap.familyName + "." + serdeConstants.SERIALIZATION_CLASS);
+
+ if (serClassName == null) {
+
+ if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) {
+ // for avro type, the serialization class parameter is optional
+ schemaLiteral =
+ tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_LITERAL);
+ schemaUrl = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_URL);
+
+ if (schemaLiteral == null && schemaUrl == null) {
+ // either schema literal or serialization class must be provided
+ throw new SerDeException("For an avro schema, either "
+ + AvroSerdeUtils.SCHEMA_LITERAL + " property or "
+ + serdeConstants.SERIALIZATION_CLASS + " property must be set.");
+ }
+
+ if (schemaUrl != null) {
+ schemaLiteral = getSchemaFromFS(schemaUrl, conf).toString();
+ }
+ } else {
+ throw new SerDeException(serdeConstants.SERIALIZATION_CLASS
+ + " property not provided for column family [" + colMap.familyName + "]");
+ }
+ }
+ }
+
+ StringBuilder generatedStruct = new StringBuilder();
+
+ // generate struct for each of the given prefixes
+ generateColumnStruct(serType, serClassName, schemaLiteral, colMap, generatedStruct);
+
+ // a column family becomes a MAP
+ sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
+ + generatedStruct + ">");
+
+ } else {
+
+ String qualifierName = colMap.qualifierName;
+
+ if (colMap.qualifierName.endsWith("*")) {
+ // we are provided with a prefix
+ qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1);
+ }
+
+ String serType =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+ + HBaseSerDe.SERIALIZATION_TYPE);
+
+ if (serType == null) {
+ throw new SerDeException(HBaseSerDe.SERIALIZATION_TYPE
+ + " property not provided for column family [" + colMap.familyName
+ + "] and qualifier [" + qualifierName + "]");
+ }
+
+ String serClassName =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+ + serdeConstants.SERIALIZATION_CLASS);
+
+ String schemaLiteral = null;
+ String schemaUrl = null;
+
+ if (serClassName == null) {
+
+ if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) {
+ // for avro type, the serialization class parameter is optional
+ schemaLiteral =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+ + AvroSerdeUtils.SCHEMA_LITERAL);
+ schemaUrl =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+ + AvroSerdeUtils.SCHEMA_URL);
+
+ if (schemaLiteral == null && schemaUrl == null) {
+ // either schema literal, schema url or serialization class must
+ // be provided
+ throw new SerDeException("For an avro schema, either "
+ + AvroSerdeUtils.SCHEMA_LITERAL + ", " + AvroSerdeUtils.SCHEMA_URL + " or "
+ + serdeConstants.SERIALIZATION_CLASS + " property must be set.");
+ }
+
+ if (schemaUrl != null) {
+ schemaLiteral = getSchemaFromFS(schemaUrl, conf).toString();
+ }
+ } else {
+ throw new SerDeException(serdeConstants.SERIALIZATION_CLASS
+ + " property not provided for column family [" + colMap.familyName
+ + "] and qualifier [" + qualifierName + "]");
+ }
+ }
+
+ StringBuilder generatedStruct = new StringBuilder();
+
+ generateColumnStruct(serType, serClassName, schemaLiteral, colMap, generatedStruct);
+
+ sb.append(generatedStruct);
+ }
+ }
+
+ // trim off ending ",", if any
+ trim(sb);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generated column types: [" + sb.toString() + "]");
+ }
+ }
+
+ /**
+ * Read the schema from the given hdfs url for the schema
+ * */
+ public static Schema getSchemaFromFS(String schemaFSUrl, Configuration conf)
+ throws SerDeException {
+ FSDataInputStream in = null;
+ FileSystem fs = null;
+ try {
+ fs = FileSystem.get(new URI(schemaFSUrl), conf);
+ in = fs.open(new Path(schemaFSUrl));
+ Schema s = Schema.parse(in);
+ return s;
+ } catch (URISyntaxException e) {
+ throw new SerDeException("Failure reading schema from filesystem", e);
+ } catch (IOException e) {
+ throw new SerDeException("Failure reading schema from filesystem", e);
+ } finally {
+ IOUtils.closeQuietly(in);
+ }
+ }
+
+ /**
+ * Auto-generates the key struct for composite keys
+ *
+ * @param compositeKeyParts map of composite key part name to its type. Usually this would be
+ * provided by the custom implementation of {@link HBaseCompositeKey composite key}
+ * @param sb StringBuilder object to construct the struct
+ * */
+ private static void generateKeyStruct(Map<String, String> compositeKeyParts, StringBuilder sb) {
+ sb.append("struct<");
+
+ for (Entry<String, String> entry : compositeKeyParts.entrySet()) {
+ sb.append(entry.getKey()).append(":").append(entry.getValue()).append(",");
+ }
+
+ // trim the trailing ","
+ trim(sb);
+ sb.append(">");
+ }
+
+ /**
+ * Auto-generates the key struct for composite keys
+ *
+ * @param compositeKeyTypes comma separated list of composite key types in order
+ * @param sb StringBuilder object to construct the struct
+ * */
+ private static void generateKeyStruct(String compositeKeyTypes, StringBuilder sb) {
+ sb.append("struct<");
+
+ // composite key types is a comma separated list of different parts of the
+ // composite keys in
+ // order in which they appear in the key
+ String[] keyTypes = compositeKeyTypes.split(",");
+
+ for (int i = 0; i < keyTypes.length; i++) {
+ sb.append("col" + i).append(":").append(keyTypes[i]).append(StringUtils.COMMA_STR);
+ }
+
+ // trim the trailing ","
+ trim(sb);
+ sb.append(">");
+ }
+
+ /**
+ * Auto-generates the column struct
+ *
+ * @param serType serialization type
+ * @param serClassName serialization class name
+ * @param schemaLiteral schema string
+ * @param colMap hbase column mapping
+ * @param sb StringBuilder to hold the generated struct
+ * @throws SerDeException if something goes wrong while generating the struct
+ * */
+ private static void generateColumnStruct(String serType, String serClassName,
+ String schemaLiteral, ColumnMapping colMap, StringBuilder sb) throws SerDeException {
+
+ if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) {
+
+ if (serClassName != null) {
+ generateAvroStructFromClass(serClassName, sb);
+ } else {
+ generateAvroStructFromSchema(schemaLiteral, sb);
+ }
+ } else {
+ throw new SerDeException("Unknown " + HBaseSerDe.SERIALIZATION_TYPE
+ + " found for column family [" + colMap.familyName + "]");
+ }
+ }
+
+ /**
+ * Auto-generate the avro struct from class
+ *
+ * @param serClassName serialization class for avro struct
+ * @param sb StringBuilder to hold the generated struct
+ * @throws SerDeException if something goes wrong while generating the struct
+ * */
+ private static void generateAvroStructFromClass(String serClassName, StringBuilder sb)
+ throws SerDeException {
+ Class<?> serClass;
+ try {
+ serClass = Class.forName(serClassName);
+ } catch (ClassNotFoundException e) {
+ throw new SerDeException("Error obtaining descriptor for " + serClassName, e);
+ }
+
+ Schema schema = ReflectData.get().getSchema(serClass);
+
+ generateAvroStructFromSchema(schema, sb);
+ }
+
+ /**
+ * Auto-generate the avro struct from schema
+ *
+ * @param schemaLiteral schema for the avro struct as string
+ * @param sb StringBuilder to hold the generated struct
+ * @throws SerDeException if something goes wrong while generating the struct
+ * */
+ private static void generateAvroStructFromSchema(String schemaLiteral, StringBuilder sb)
+ throws SerDeException {
+ Schema schema = Schema.parse(schemaLiteral);
+
+ generateAvroStructFromSchema(schema, sb);
+ }
+
+ /**
+ * Auto-generate the avro struct from schema
+ *
+ * @param schema schema for the avro struct
+ * @param sb StringBuilder to hold the generated struct
+ * @throws SerDeException if something goes wrong while generating the struct
+ * */
+ private static void generateAvroStructFromSchema(Schema schema, StringBuilder sb)
+ throws SerDeException {
+ AvroObjectInspectorGenerator avig = new AvroObjectInspectorGenerator(schema);
+
+ sb.append("struct<");
+
+ // Get the column names and their corresponding types
+ List<String> columnNames = avig.getColumnNames();
+ List<TypeInfo> columnTypes = avig.getColumnTypes();
+
+ if (columnNames.size() != columnTypes.size()) {
+ throw new AssertionError("The number of column names should be the same as column types");
+ }
+
+ for (int i = 0; i < columnNames.size(); i++) {
+ sb.append(columnNames.get(i));
+ sb.append(":");
+ sb.append(columnTypes.get(i).getTypeName());
+ sb.append(",");
+ }
+
+ trim(sb).append(">");
+ }
+
+ /**
+ * Trims by removing the trailing "," if any
+ *
+ * @param sb StringBuilder to trim
+ * @return StringBuilder trimmed StringBuilder
+ * */
+ private static StringBuilder trim(StringBuilder sb) {
+ if (sb.charAt(sb.length() - 1) == StringUtils.COMMA) {
+ return sb.deleteCharAt(sb.length() - 1);
+ }
+
+ return sb;
+ }
+
+ /**
+ * Filters the given name by removing any special character and convert to lowercase
+ * */
+ private static String filter(String name) {
+ return name.replaceAll("[^a-zA-Z0-9]+", "").toLowerCase();
+ }
+
+ /**
+ * Return the types for the composite key.
+ *
+ * @param tbl Properties for the table
+ * @return a comma-separated list of composite key types
+ * @throws SerDeException if something goes wrong while getting the composite key parts
+ * */
+ @SuppressWarnings("unchecked")
+ private static Map<String, String> getCompositeKeyParts(Properties tbl) throws SerDeException {
+ String compKeyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
+
+ if (compKeyClassName == null) {
+ // no custom composite key class provided. return null
+ return null;
+ }
+
+ CompositeHBaseKeyFactory<HBaseCompositeKey> keyFactory = null;
+
+ Class<?> keyClass;
+ try {
+ keyClass = Class.forName(compKeyClassName);
+ keyFactory = new CompositeHBaseKeyFactory(keyClass);
+ } catch (Exception e) {
+ throw new SerDeException(e);
+ }
+
+ HBaseCompositeKey compKey = keyFactory.createKey(null);
+ return compKey.getParts();
+ }
+}
\ No newline at end of file
Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java Tue Sep 9 15:18:29 2014
@@ -18,13 +18,20 @@
package org.apache.hadoop.hive.hbase;
+import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
+import org.apache.avro.Schema;
+import org.apache.avro.reflect.ReflectData;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
+import org.apache.hadoop.hive.hbase.struct.AvroHBaseValueFactory;
+import org.apache.hadoop.hive.hbase.struct.DefaultHBaseValueFactory;
+import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -37,10 +44,12 @@ import org.apache.hadoop.util.Reflection
*/
public class HBaseSerDeParameters {
+ public static final String AVRO_SERIALIZATION_TYPE = "avro";
+ public static final String STRUCT_SERIALIZATION_TYPE = "struct";
+
private final SerDeParameters serdeParams;
private final Configuration job;
- private final Properties tbl;
private final String columnMappingString;
private final ColumnMappings columnMappings;
@@ -48,57 +57,50 @@ public class HBaseSerDeParameters {
private final long putTimestamp;
private final HBaseKeyFactory keyFactory;
+ private final List<HBaseValueFactory> valueFactories;
HBaseSerDeParameters(Configuration job, Properties tbl, String serdeName) throws SerDeException {
this.job = job;
- this.tbl = tbl;
- this.serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName);
- this.putTimestamp = Long.valueOf(tbl.getProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP, "-1"));
// Read configuration parameters
columnMappingString = tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING);
- doColumnRegexMatching = Boolean.valueOf(tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true"));
+ doColumnRegexMatching =
+ Boolean.valueOf(tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true"));
// Parse and initialize the HBase columns mapping
columnMappings = HBaseSerDe.parseColumnsMapping(columnMappingString, doColumnRegexMatching);
- columnMappings.setHiveColumnDescription(serdeName, serdeParams.getColumnNames(), serdeParams.getColumnTypes());
-
- // Precondition: make sure this is done after the rest of the SerDe initialization is done.
- String hbaseTableStorageType = tbl.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE);
- columnMappings.parseColumnStorageTypes(hbaseTableStorageType);
// Build the type property string if not supplied
String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
- if (columnTypeProperty == null) {
- tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, columnMappings.toTypesString());
- }
+ String autogenerate = tbl.getProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT);
- this.keyFactory = initKeyFactory(job, tbl);
- }
+ if (columnTypeProperty == null || columnTypeProperty.isEmpty()) {
+ String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
+ if (columnNameProperty == null || columnNameProperty.isEmpty()) {
+ if (autogenerate == null || autogenerate.isEmpty()) {
+ throw new IllegalArgumentException("Either the columns must be specified or the "
+ + HBaseSerDe.HBASE_AUTOGENERATE_STRUCT + " property must be set to true.");
+ }
- private HBaseKeyFactory initKeyFactory(Configuration conf, Properties tbl) throws SerDeException {
- try {
- HBaseKeyFactory keyFactory = createKeyFactory(conf, tbl);
- if (keyFactory != null) {
- keyFactory.init(this, tbl);
+ tbl.setProperty(serdeConstants.LIST_COLUMNS,
+ columnMappings.toNamesString(tbl, autogenerate));
}
- return keyFactory;
- } catch (Exception e) {
- throw new SerDeException(e);
- }
- }
- private static HBaseKeyFactory createKeyFactory(Configuration job, Properties tbl) throws Exception {
- String factoryClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_FACTORY);
- if (factoryClassName != null) {
- Class<?> factoryClazz = Class.forName(factoryClassName);
- return (HBaseKeyFactory) ReflectionUtils.newInstance(factoryClazz, job);
+ tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
+ columnMappings.toTypesString(tbl, job, autogenerate));
}
- String keyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
- if (keyClassName != null) {
- Class<?> keyClass = Class.forName(keyClassName);
- return new CompositeHBaseKeyFactory(keyClass);
- }
- return new DefaultHBaseKeyFactory();
+
+ this.serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName);
+ this.putTimestamp = Long.valueOf(tbl.getProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP, "-1"));
+
+ columnMappings.setHiveColumnDescription(serdeName, serdeParams.getColumnNames(),
+ serdeParams.getColumnTypes());
+
+ // Precondition: make sure this is done after the rest of the SerDe initialization is done.
+ String hbaseTableStorageType = tbl.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE);
+ columnMappings.parseColumnStorageTypes(hbaseTableStorageType);
+
+ this.keyFactory = initKeyFactory(job, tbl);
+ this.valueFactories = initValueFactories(job, tbl);
}
public List<String> getColumnNames() {
@@ -133,6 +135,10 @@ public class HBaseSerDeParameters {
return keyFactory;
}
+ public List<HBaseValueFactory> getValueFactories() {
+ return valueFactories;
+ }
+
public Configuration getBaseConfiguration() {
return job;
}
@@ -151,4 +157,190 @@ public class HBaseSerDeParameters {
public String toString() {
return "[" + columnMappingString + ":" + getColumnNames() + ":" + getColumnTypes() + "]";
}
-}
+
+ private HBaseKeyFactory initKeyFactory(Configuration conf, Properties tbl) throws SerDeException {
+ try {
+ HBaseKeyFactory keyFactory = createKeyFactory(conf, tbl);
+ if (keyFactory != null) {
+ keyFactory.init(this, tbl);
+ }
+ return keyFactory;
+ } catch (Exception e) {
+ throw new SerDeException(e);
+ }
+ }
+
+ private static HBaseKeyFactory createKeyFactory(Configuration job, Properties tbl)
+ throws Exception {
+ String factoryClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_FACTORY);
+ if (factoryClassName != null) {
+ Class<?> factoryClazz = Class.forName(factoryClassName);
+ return (HBaseKeyFactory) ReflectionUtils.newInstance(factoryClazz, job);
+ }
+ String keyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
+ if (keyClassName != null) {
+ Class<?> keyClass = Class.forName(keyClassName);
+ return new CompositeHBaseKeyFactory(keyClass);
+ }
+ return new DefaultHBaseKeyFactory();
+ }
+
+ private List<HBaseValueFactory> initValueFactories(Configuration conf, Properties tbl)
+ throws SerDeException {
+ List<HBaseValueFactory> valueFactories = createValueFactories(conf, tbl);
+
+ for (HBaseValueFactory valueFactory : valueFactories) {
+ valueFactory.init(this, conf, tbl);
+ }
+
+ return valueFactories;
+ }
+
+ private List<HBaseValueFactory> createValueFactories(Configuration conf, Properties tbl)
+ throws SerDeException {
+ List<HBaseValueFactory> valueFactories = new ArrayList<HBaseValueFactory>();
+
+ try {
+ for (int i = 0; i < columnMappings.size(); i++) {
+ String serType = getSerializationType(conf, tbl, columnMappings.getColumnsMapping()[i]);
+
+ if (serType != null && serType.equals(AVRO_SERIALIZATION_TYPE)) {
+ Schema schema = getSchema(conf, tbl, columnMappings.getColumnsMapping()[i]);
+ valueFactories.add(new AvroHBaseValueFactory(schema));
+ } else {
+ valueFactories.add(new DefaultHBaseValueFactory());
+ }
+ }
+ } catch (Exception e) {
+ throw new SerDeException(e);
+ }
+
+ return valueFactories;
+ }
+
+ /**
+ * Get the type for the given {@link ColumnMapping colMap}
+ * */
+ private String getSerializationType(Configuration conf, Properties tbl,
+ ColumnMapping colMap) throws Exception {
+ String serType = null;
+
+ if (colMap.qualifierName == null) {
+ // only a column family
+
+ if (colMap.qualifierPrefix != null) {
+ serType = tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + HBaseSerDe.SERIALIZATION_TYPE);
+ } else {
+ serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE);
+ }
+ } else if (!colMap.hbaseRowKey) {
+ // not an hbase row key. This should either be a prefix or an individual qualifier
+ String qualifierName = colMap.qualifierName;
+
+ if (colMap.qualifierName.endsWith("*")) {
+ qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1);
+ }
+
+ serType =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+ + HBaseSerDe.SERIALIZATION_TYPE);
+ }
+
+ return serType;
+ }
+
+ private Schema getSchema(Configuration conf, Properties tbl, ColumnMapping colMap)
+ throws Exception {
+ String serType = null;
+ String serClassName = null;
+ String schemaLiteral = null;
+ String schemaUrl = null;
+
+ if (colMap.qualifierName == null) {
+ // only a column family
+
+ if (colMap.qualifierPrefix != null) {
+ serType =
+ tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + HBaseSerDe.SERIALIZATION_TYPE);
+
+ serClassName =
+ tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + serdeConstants.SERIALIZATION_CLASS);
+
+ schemaLiteral =
+ tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + AvroSerdeUtils.SCHEMA_LITERAL);
+
+ schemaUrl =
+ tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+ + AvroSerdeUtils.SCHEMA_URL);
+ } else {
+ serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE);
+
+ serClassName =
+ tbl.getProperty(colMap.familyName + "." + serdeConstants.SERIALIZATION_CLASS);
+
+ schemaLiteral = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_LITERAL);
+
+ schemaUrl = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_URL);
+ }
+ } else if (!colMap.hbaseRowKey) {
+ // not an hbase row key. This should either be a prefix or an individual qualifier
+ String qualifierName = colMap.qualifierName;
+
+ if (colMap.qualifierName.endsWith("*")) {
+ qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1);
+ }
+
+ serType =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+ + HBaseSerDe.SERIALIZATION_TYPE);
+
+ serClassName =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+ + serdeConstants.SERIALIZATION_CLASS);
+
+ schemaLiteral =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+ + AvroSerdeUtils.SCHEMA_LITERAL);
+
+ schemaUrl =
+ tbl.getProperty(colMap.familyName + "." + qualifierName + "." + AvroSerdeUtils.SCHEMA_URL);
+ }
+
+ String avroSchemaRetClass = tbl.getProperty(AvroSerdeUtils.SCHEMA_RETRIEVER);
+
+ if (schemaLiteral == null && serClassName == null && schemaUrl == null
+ && avroSchemaRetClass == null) {
+ throw new IllegalArgumentException("serialization.type was set to [" + serType
+ + "] but neither " + AvroSerdeUtils.SCHEMA_LITERAL + ", " + AvroSerdeUtils.SCHEMA_URL
+ + ", serialization.class or " + AvroSerdeUtils.SCHEMA_RETRIEVER + " property was set");
+ }
+
+ Class<?> deserializerClass = null;
+
+ if (serClassName != null) {
+ deserializerClass = conf.getClassByName(serClassName);
+ }
+
+ Schema schema = null;
+
+ // only worry about getting schema if we are dealing with Avro
+ if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) {
+ if (avroSchemaRetClass == null) {
+ // bother about generating a schema only if a schema retriever class wasn't provided
+ if (schemaLiteral != null) {
+ schema = Schema.parse(schemaLiteral);
+ } else if (schemaUrl != null) {
+ schema = HBaseSerDeHelper.getSchemaFromFS(schemaUrl, conf);
+ } else if (deserializerClass != null) {
+ schema = ReflectData.get().getSchema(deserializerClass);
+ }
+ }
+ }
+
+ return schema;
+ }
+}
\ No newline at end of file
Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java Tue Sep 9 15:18:29 2014
@@ -149,8 +149,16 @@ public class LazyHBaseCellMap extends La
}
if (keyI.equals(key)) {
// Got a match, return the value
- LazyObject<?> v = (LazyObject<?>) entry.getValue();
- return v == null ? v : v.getObject();
+ Object _value = entry.getValue();
+
+ // If the given value is a type of LazyObject, then only try and convert it to that form.
+ // Else return it as it is.
+ if (_value instanceof LazyObject) {
+ LazyObject<?> v = (LazyObject<?>) entry.getValue();
+ return v == null ? v : v.getObject();
+ } else {
+ return _value;
+ }
}
}
Added: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java (added)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java Tue Sep 9 15:18:29 2014
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.hbase.struct;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.hbase.HBaseSerDeParameters;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector;
+import org.apache.hadoop.hive.serde2.avro.AvroSchemaRetriever;
+import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
+import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+/**
+ * Avro specific implementation of the {@link HBaseValueFactory}
+ * */
+public class AvroHBaseValueFactory extends DefaultHBaseValueFactory {
+
+ private AvroSchemaRetriever avroSchemaRetriever;
+ private Schema schema;
+
+ /**
+ * Constructor
+ *
+ * @param schema the associated {@link Schema schema}
+ * */
+ public AvroHBaseValueFactory(Schema schema) {
+ this.schema = schema;
+ }
+
+ @Override
+ public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Properties properties)
+ throws SerDeException {
+ super.init(hbaseParams, conf, properties);
+ String avroSchemaRetClass = properties.getProperty(AvroSerdeUtils.SCHEMA_RETRIEVER);
+
+ if (avroSchemaRetClass != null) {
+ Class<?> avroSchemaRetrieverClass = null;
+ try {
+ avroSchemaRetrieverClass = conf.getClassByName(avroSchemaRetClass);
+ } catch (ClassNotFoundException e) {
+ throw new SerDeException(e);
+ }
+
+ initAvroSchemaRetriever(avroSchemaRetrieverClass, conf, properties);
+ }
+ }
+
+ @Override
+ public ObjectInspector createValueObjectInspector(TypeInfo type) throws SerDeException {
+ ObjectInspector oi =
+ LazyFactory.createLazyObjectInspector(type, serdeParams.getSeparators(), 1,
+ serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(),
+ ObjectInspectorOptions.AVRO);
+
+ // initialize the object inspectors
+ initInternalObjectInspectors(oi);
+
+ return oi;
+ }
+
+ @Override
+ public byte[] serializeValue(Object object, StructField field) throws IOException {
+ // Explicit avro serialization not supported yet. Revert to default
+ return super.serializeValue(object, field);
+ }
+
+ /**
+ * Initialize the instance for {@link AvroSchemaRetriever}
+ *
+ * @throws SerDeException
+ * */
+ private void initAvroSchemaRetriever(Class<?> avroSchemaRetrieverClass, Configuration conf,
+ Properties tbl) throws SerDeException {
+
+ try {
+ avroSchemaRetriever = (AvroSchemaRetriever) avroSchemaRetrieverClass.getDeclaredConstructor(
+ Configuration.class, Properties.class).newInstance(
+ conf, tbl);
+ } catch (NoSuchMethodException e) {
+ // the constructor wasn't defined in the implementation class. Flag error
+ throw new SerDeException("Constructor not defined in schema retriever class [" + avroSchemaRetrieverClass.getName() + "]", e);
+ } catch (Exception e) {
+ throw new SerDeException(e);
+ }
+ }
+
+ /**
+ * Initialize the internal object inspectors
+ * */
+ private void initInternalObjectInspectors(ObjectInspector oi) {
+ if (oi instanceof AvroLazyObjectInspector) {
+ initAvroObjectInspector(oi);
+ } else if (oi instanceof MapObjectInspector) {
+ // we found a map objectinspector. Grab the objectinspector for the value and initialize it
+ // aptly
+ ObjectInspector valueOI = ((MapObjectInspector) oi).getMapValueObjectInspector();
+
+ if (valueOI instanceof AvroLazyObjectInspector) {
+ initAvroObjectInspector(valueOI);
+ }
+ }
+ }
+
+ /**
+ * Recursively initialize the {@link AvroLazyObjectInspector} and all its nested ois
+ *
+ * @param oi ObjectInspector to be recursively initialized
+ * @param schema {@link Schema} to be initialized with
+ * @param schemaRetriever class to be used to retrieve schema
+ * */
+ private void initAvroObjectInspector(ObjectInspector oi) {
+ // Check for a list. If found, recursively init its members
+ if (oi instanceof ListObjectInspector) {
+ ListObjectInspector loi = (ListObjectInspector) oi;
+
+ initAvroObjectInspector(loi.getListElementObjectInspector());
+ return;
+ }
+
+ // Check for a nested message. If found, set the schema, else return.
+ if (!(oi instanceof AvroLazyObjectInspector)) {
+ return;
+ }
+
+ AvroLazyObjectInspector aoi = (AvroLazyObjectInspector) oi;
+
+ aoi.setSchemaRetriever(avroSchemaRetriever);
+ aoi.setReaderSchema(schema);
+
+ // call the method recursively over all the internal fields of the given avro
+ // objectinspector
+ for (StructField field : aoi.getAllStructFieldRefs()) {
+ initAvroObjectInspector(field.getFieldObjectInspector());
+ }
+ }
+}
\ No newline at end of file
Added: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java (added)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java Tue Sep 9 15:18:29 2014
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.hbase.struct;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.hbase.HBaseSerDeParameters;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+/**
+ * Default implementation of the {@link HBaseValueFactory}
+ * */
+public class DefaultHBaseValueFactory implements HBaseValueFactory{
+
+ protected LazySimpleSerDe.SerDeParameters serdeParams;
+ protected HBaseSerDeParameters hbaseParams;
+ protected Properties properties;
+ protected Configuration conf;
+
+ @Override
+ public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Properties properties)
+ throws SerDeException {
+ this.hbaseParams = hbaseParams;
+ this.serdeParams = hbaseParams.getSerdeParams();
+ this.properties = properties;
+ this.conf = conf;
+ }
+
+ @Override
+ public ObjectInspector createValueObjectInspector(TypeInfo type)
+ throws SerDeException {
+ return LazyFactory.createLazyObjectInspector(type, serdeParams.getSeparators(),
+ 1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar());
+ }
+
+ @Override
+ public byte[] serializeValue(Object object, StructField field)
+ throws IOException {
+ // TODO Add support for serialization of values here
+ return null;
+ }
+}
\ No newline at end of file
Added: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java (added)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java Tue Sep 9 15:18:29 2014
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.hbase.struct;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.hbase.HBaseSerDeParameters;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+/**
+ * Provides capability to plugin custom implementations for querying of data stored in HBase.
+ * */
+public interface HBaseValueFactory {
+
+ /**
+ * Initialize factory with properties
+ *
+ * @param hbaseParam the {@link HBaseParameters hbase parameters}
+ * @param conf the hadoop {@link Configuration configuration}
+ * @param properties the custom {@link Properties}
+ * @throws SerDeException if there was an issue initializing the factory
+ */
+ void init(HBaseSerDeParameters hbaseParam, Configuration conf, Properties properties)
+ throws SerDeException;
+
+ /**
+ * create custom object inspector for the value
+ *
+ * @param type type information
+ * @throws SerDeException if there was an issue creating the {@link ObjectInspector object inspector}
+ */
+ ObjectInspector createValueObjectInspector(TypeInfo type) throws SerDeException;
+
+ /**
+ * Serialize the given hive object
+ *
+ * @param object the object to be serialized
+ * @param field the {@link StructField}
+ * @return the serialized value
+ * @throws {@link IOException} if there was an issue serializing the value
+ */
+ byte[] serializeValue(Object object, StructField field) throws IOException;
+}
\ No newline at end of file
Added: hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java (added)
+++ hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java Tue Sep 9 15:18:29 2014
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.hbase;
+
+import java.util.Properties;
+
+import org.apache.avro.Schema;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hive.serde2.avro.AvroSchemaRetriever;
+
+/**
+ * Mock implementation
+ * */
+public class HBaseTestAvroSchemaRetriever extends AvroSchemaRetriever {
+
+ private static final byte[] TEST_BYTE_ARRAY = Bytes.toBytes("test");
+
+ public HBaseTestAvroSchemaRetriever(Configuration conf, Properties tbl) {
+ }
+
+ @Override
+ public Schema retrieveWriterSchema(Object source) {
+ Class<?> clazz;
+ try {
+ clazz = Class.forName("org.apache.hadoop.hive.hbase.avro.Employee");
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+
+ return ReflectData.get().getSchema(clazz);
+ }
+
+ @Override
+ public Schema retrieveReaderSchema(Object source) {
+ Class<?> clazz;
+ try {
+ clazz = Class.forName("org.apache.hadoop.hive.hbase.avro.Employee");
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+
+ return ReflectData.get().getSchema(clazz);
+ }
+
+ @Override
+ public int getOffset() {
+ return TEST_BYTE_ARRAY.length;
+ }
+}
\ No newline at end of file