You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by sh...@apache.org on 2016/09/12 22:47:19 UTC
incubator-hawq git commit: HAWQ-971. New HiveORC profile for ORC with
support for PPD and Col Projeciton
Repository: incubator-hawq
Updated Branches:
refs/heads/master 67400868c -> ecf00d087
HAWQ-971. New HiveORC profile for ORC with support for PPD and Col Projeciton
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/ecf00d08
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/ecf00d08
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/ecf00d08
Branch: refs/heads/master
Commit: ecf00d087d09b93a9b033e2c6da6e39ac029c7f7
Parents: 6740086
Author: Shivram Mani <sh...@gmail.com>
Authored: Mon Sep 12 15:47:13 2016 -0700
Committer: Shivram Mani <sh...@gmail.com>
Committed: Mon Sep 12 15:47:13 2016 -0700
----------------------------------------------------------------------
.../pxf/api/utilities/ColumnDescriptor.java | 20 ++-
.../plugins/hdfs/utilities/HdfsUtilities.java | 2 +
.../plugins/hive/HiveColumnarSerdeResolver.java | 18 ++-
.../plugins/hive/HiveInputFormatFragmenter.java | 14 +-
.../hawq/pxf/plugins/hive/HiveORCAccessor.java | 146 +++++++++++++++++++
.../pxf/plugins/hive/HiveORCSerdeResolver.java | 135 +++++++++++++++++
.../hawq/pxf/plugins/hive/HiveResolver.java | 82 +++++++----
.../hive/utilities/EnumHiveToHawqType.java | 32 +++-
.../plugins/hive/utilities/HiveUtilities.java | 5 +-
.../hive/utilities/HiveUtilitiesTest.java | 60 ++++++--
.../hawq/pxf/service/rest/RestResource.java | 8 +-
.../pxf/service/utilities/ProtocolData.java | 30 +++-
.../src/main/resources/pxf-profiles-default.xml | 15 +-
src/backend/access/external/pxfheaders.c | 2 +-
14 files changed, 507 insertions(+), 62 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
index 7ae15e4..87289a9 100644
--- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
+++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
@@ -31,6 +31,7 @@ public class ColumnDescriptor {
String dbColumnTypeName;
int dbColumnIndex;
Integer[] dbColumnTypeModifiers;
+ boolean isProjected;
/**
* Reserved word for a table record key.
@@ -46,13 +47,20 @@ public class ColumnDescriptor {
* @param index column index
* @param typename type name
* @param typemods type modifiers
+ * @param isProj does the column need to be projected
*/
+ public ColumnDescriptor(String name, int typecode, int index, String typename, Integer[] typemods, boolean isProj) {
+ this(name, typecode, index, typename, typemods);
+ isProjected = isProj;
+ }
+
public ColumnDescriptor(String name, int typecode, int index, String typename, Integer[] typemods) {
dbColumnTypeCode = typecode;
dbColumnTypeName = typename;
dbColumnName = name;
dbColumnIndex = index;
dbColumnTypeModifiers = typemods;
+ isProjected = true;
}
/**
@@ -72,6 +80,7 @@ public class ColumnDescriptor {
this.dbColumnTypeModifiers, 0,
copy.dbColumnTypeModifiers.length);
}
+ this.isProjected = copy.isProjected;
}
public String columnName() {
@@ -103,12 +112,21 @@ public class ColumnDescriptor {
return RECORD_KEY_NAME.equalsIgnoreCase(dbColumnName);
}
+ public boolean isProjected() {
+ return isProjected;
+ }
+
+ public void setProjected(boolean projected) {
+ isProjected = projected;
+ }
+
@Override
public String toString() {
return "ColumnDescriptor [dbColumnTypeCode=" + dbColumnTypeCode
+ ", dbColumnName=" + dbColumnName
+ ", dbColumnTypeName=" + dbColumnTypeName
+ ", dbColumnIndex=" + dbColumnIndex
- + ", dbColumnTypeModifiers=" + dbColumnTypeModifiers + "]";
+ + ", dbColumnTypeModifiers=" + dbColumnTypeModifiers
+ + ", isProjected=" + isProjected + "]";
}
}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-hdfs/src/main/java/org/apache/hawq/pxf/plugins/hdfs/utilities/HdfsUtilities.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hdfs/src/main/java/org/apache/hawq/pxf/plugins/hdfs/utilities/HdfsUtilities.java b/pxf/pxf-hdfs/src/main/java/org/apache/hawq/pxf/plugins/hdfs/utilities/HdfsUtilities.java
index 59551a9..c99ccd6 100644
--- a/pxf/pxf-hdfs/src/main/java/org/apache/hawq/pxf/plugins/hdfs/utilities/HdfsUtilities.java
+++ b/pxf/pxf-hdfs/src/main/java/org/apache/hawq/pxf/plugins/hdfs/utilities/HdfsUtilities.java
@@ -236,6 +236,8 @@ public class HdfsUtilities {
public static String toString(List<OneField> complexRecord, String delimiter) {
StringBuilder buff = new StringBuilder();
String delim = ""; // first iteration has no delimiter
+ if(complexRecord == null)
+ return "";
for (OneField complex : complexRecord) {
if (complex.type == DataType.BYTEA.getOID()) {
/** Serialize byte array as string */
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
index 606ddc6..987fc0f 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
@@ -19,7 +19,6 @@ package org.apache.hawq.pxf.plugins.hive;
* under the License.
*/
-
import org.apache.hawq.pxf.api.BadRecordException;
import org.apache.hawq.pxf.api.OneField;
import org.apache.hawq.pxf.api.OneRow;
@@ -41,6 +40,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.*;
+
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
@@ -77,7 +77,8 @@ public class HiveColumnarSerdeResolver extends HiveResolver {
serdeType = HiveInputFormatFragmenter.PXF_HIVE_SERDES.COLUMNAR_SERDE;
} else if (serdeEnumStr.equals(HiveInputFormatFragmenter.PXF_HIVE_SERDES.LAZY_BINARY_COLUMNAR_SERDE.name())) {
serdeType = HiveInputFormatFragmenter.PXF_HIVE_SERDES.LAZY_BINARY_COLUMNAR_SERDE;
- } else {
+ }
+ else {
throw new UnsupportedTypeException("Unsupported Hive Serde: " + serdeEnumStr);
}
parts = new StringBuilder();
@@ -123,14 +124,17 @@ public class HiveColumnarSerdeResolver extends HiveResolver {
StringBuilder columnNames = new StringBuilder(numberOfDataColumns * 2); // column + delimiter
StringBuilder columnTypes = new StringBuilder(numberOfDataColumns * 2); // column + delimiter
- String delim = "";
+ String delim = ",";
for (int i = 0; i < numberOfDataColumns; i++) {
ColumnDescriptor column = input.getColumn(i);
String columnName = column.columnName();
- String columnType = HiveUtilities.toCompatibleHiveType(DataType.get(column.columnTypeCode()));
- columnNames.append(delim).append(columnName);
- columnTypes.append(delim).append(columnType);
- delim = ",";
+ String columnType = HiveUtilities.toCompatibleHiveType(DataType.get(column.columnTypeCode()),column.columnTypeModifiers());
+ if(i > 0) {
+ columnNames.append(delim);
+ columnTypes.append(delim);
+ }
+ columnNames.append(columnName);
+ columnTypes.append(columnType);
}
serdeProperties.put(serdeConstants.LIST_COLUMNS, columnNames.toString());
serdeProperties.put(serdeConstants.LIST_COLUMN_TYPES, columnTypes.toString());
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
index ccc8fa7..b6a6041 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
@@ -58,9 +58,11 @@ public class HiveInputFormatFragmenter extends HiveDataFragmenter {
static final String STR_RC_FILE_INPUT_FORMAT = "org.apache.hadoop.hive.ql.io.RCFileInputFormat";
static final String STR_TEXT_FILE_INPUT_FORMAT = "org.apache.hadoop.mapred.TextInputFormat";
+ static final String STR_ORC_FILE_INPUT_FORMAT = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
static final String STR_COLUMNAR_SERDE = "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe";
static final String STR_LAZY_BINARY_COLUMNAR_SERDE = "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe";
static final String STR_LAZY_SIMPLE_SERDE = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe";
+ static final String STR_ORC_SERDE = "org.apache.hadoop.hive.ql.io.orc.OrcSerde";
private static final int EXPECTED_NUM_OF_TOKS = 3;
public static final int TOK_SERDE = 0;
public static final int TOK_KEYS = 1;
@@ -69,14 +71,16 @@ public class HiveInputFormatFragmenter extends HiveDataFragmenter {
/** Defines the Hive input formats currently supported in pxf */
public enum PXF_HIVE_INPUT_FORMATS {
RC_FILE_INPUT_FORMAT,
- TEXT_FILE_INPUT_FORMAT
+ TEXT_FILE_INPUT_FORMAT,
+ ORC_FILE_INPUT_FORMAT
}
/** Defines the Hive serializers (serde classes) currently supported in pxf */
public enum PXF_HIVE_SERDES {
COLUMNAR_SERDE,
LAZY_BINARY_COLUMNAR_SERDE,
- LAZY_SIMPLE_SERDE
+ LAZY_SIMPLE_SERDE,
+ ORC_SERDE
}
/**
@@ -172,6 +176,8 @@ public class HiveInputFormatFragmenter extends HiveDataFragmenter {
return PXF_HIVE_INPUT_FORMATS.RC_FILE_INPUT_FORMAT.name();
case STR_TEXT_FILE_INPUT_FORMAT:
return PXF_HIVE_INPUT_FORMATS.TEXT_FILE_INPUT_FORMAT.name();
+ case STR_ORC_FILE_INPUT_FORMAT:
+ return PXF_HIVE_INPUT_FORMATS.ORC_FILE_INPUT_FORMAT.name();
default:
throw new IllegalArgumentException(
"HiveInputFormatFragmenter does not yet support "
@@ -197,6 +203,8 @@ public class HiveInputFormatFragmenter extends HiveDataFragmenter {
return PXF_HIVE_SERDES.LAZY_BINARY_COLUMNAR_SERDE.name();
case STR_LAZY_SIMPLE_SERDE:
return PXF_HIVE_SERDES.LAZY_SIMPLE_SERDE.name();
+ case STR_ORC_SERDE:
+ return PXF_HIVE_SERDES.ORC_SERDE.name();
default:
throw new UnsupportedTypeException(
"HiveInputFormatFragmenter does not yet support "
@@ -224,6 +232,6 @@ public class HiveInputFormatFragmenter extends HiveDataFragmenter {
*/
@Override
public FragmentsStats getFragmentsStats() throws Exception {
- throw new UnsupportedOperationException("ANALYZE for HiveRc and HiveText plugins is not supported");
+ throw new UnsupportedOperationException("ANALYZE for HiveRc, HiveText, and HiveOrc plugins is not supported");
}
}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java
new file mode 100644
index 0000000..23fc66e
--- /dev/null
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCAccessor.java
@@ -0,0 +1,146 @@
+package org.apache.hawq.pxf.plugins.hive;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
+import org.apache.hawq.pxf.api.FilterParser;
+import org.apache.hawq.pxf.api.utilities.ColumnDescriptor;
+import org.apache.hawq.pxf.api.utilities.InputData;
+import org.apache.commons.lang.StringUtils;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.hawq.pxf.plugins.hive.HiveInputFormatFragmenter.PXF_HIVE_SERDES;
+
+/**
+ * Specialization of HiveAccessor for a Hive table that stores only ORC files.
+ * This class replaces the generic HiveAccessor for a case where a table is stored entirely as ORC files.
+ * Use together with {@link HiveInputFormatFragmenter}/{@link HiveColumnarSerdeResolver}
+ */
+public class HiveORCAccessor extends HiveAccessor {
+
+ private final String READ_COLUMN_IDS_CONF_STR = "hive.io.file.readcolumn.ids";
+ private final String READ_ALL_COLUMNS = "hive.io.file.read.all.columns";
+ private final String READ_COLUMN_NAMES_CONF_STR = "hive.io.file.readcolumn.names";
+ private final String SARG_PUSHDOWN = "sarg.pushdown";
+
+ /**
+ * Constructs a HiveORCFileAccessor.
+ *
+ * @param input input containing user data
+ * @throws Exception if user data was wrong
+ */
+ public HiveORCAccessor(InputData input) throws Exception {
+ super(input, new OrcInputFormat());
+ String[] toks = HiveInputFormatFragmenter.parseToks(input, PXF_HIVE_SERDES.ORC_SERDE.name());
+ initPartitionFields(toks[HiveInputFormatFragmenter.TOK_KEYS]);
+ filterInFragmenter = new Boolean(toks[HiveInputFormatFragmenter.TOK_FILTER_DONE]);
+ }
+
+ @Override
+ public boolean openForRead() throws Exception {
+ addColumns();
+ addFilters();
+ return super.openForRead();
+ }
+
+ /**
+ * Adds the table tuple description to JobConf ojbect
+ * so only these columns will be returned.
+ */
+ private void addColumns() throws Exception {
+
+ List<Integer> colIds = new ArrayList<Integer>();
+ List<String> colNames = new ArrayList<String>();
+ for(ColumnDescriptor col: inputData.getTupleDescription()) {
+ if(col.isProjected()) {
+ colIds.add(col.columnIndex());
+ colNames.add(col.columnName());
+ }
+ }
+ jobConf.set(READ_ALL_COLUMNS, "false");
+ jobConf.set(READ_COLUMN_IDS_CONF_STR, StringUtils.join(colIds, ","));
+ jobConf.set(READ_COLUMN_NAMES_CONF_STR, StringUtils.join(colNames, ","));
+ }
+
+ /**
+ * Uses {@link HiveFilterBuilder} to translate a filter string into a
+ * Hive {@link SearchArgument} object. The result is added as a filter to
+ * JobConf object
+ */
+ private void addFilters() throws Exception {
+ if (!inputData.hasFilter()) {
+ return;
+ }
+
+ /* Predicate pushdown configuration */
+ String filterStr = inputData.getFilterString();
+ HiveFilterBuilder eval = new HiveFilterBuilder(inputData);
+ Object filter = eval.getFilterObject(filterStr);
+
+ SearchArgument.Builder filterBuilder = SearchArgumentFactory.newBuilder();
+ filterBuilder.startAnd();
+ if (filter instanceof List) {
+ for (Object f : (List<?>) filter) {
+ buildArgument(filterBuilder, f);
+ }
+ } else {
+ buildArgument(filterBuilder, filter);
+ }
+ filterBuilder.end();
+ SearchArgument sarg = filterBuilder.build();
+ jobConf.set(SARG_PUSHDOWN, sarg.toKryo());
+ }
+
+ private void buildArgument(SearchArgument.Builder builder, Object filterObj) {
+ /* The below functions will not be compatible and requires update with Hive 2.0 APIs */
+ FilterParser.BasicFilter filter = (FilterParser.BasicFilter) filterObj;
+ int filterColumnIndex = filter.getColumn().index();
+ Object filterValue = filter.getConstant().constant();
+ ColumnDescriptor filterColumn = inputData.getColumn(filterColumnIndex);
+ String filterColumnName = filterColumn.columnName();
+
+ switch(filter.getOperation()) {
+ case HDOP_LT:
+ builder.lessThan(filterColumnName, filterValue);
+ break;
+ case HDOP_GT:
+ builder.startNot().lessThanEquals(filterColumnName, filterValue).end();
+ break;
+ case HDOP_LE:
+ builder.lessThanEquals(filterColumnName, filterValue);
+ break;
+ case HDOP_GE:
+ builder.startNot().lessThanEquals(filterColumnName, filterValue).end();
+ break;
+ case HDOP_EQ:
+ builder.equals(filterColumnName, filterValue);
+ break;
+ case HDOP_NE:
+ builder.startNot().equals(filterColumnName, filterValue).end();
+ break;
+ }
+ return;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCSerdeResolver.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCSerdeResolver.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCSerdeResolver.java
new file mode 100644
index 0000000..9e32633
--- /dev/null
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCSerdeResolver.java
@@ -0,0 +1,135 @@
+package org.apache.hawq.pxf.plugins.hive;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hawq.pxf.api.OneField;
+import org.apache.hawq.pxf.api.OneRow;
+import org.apache.hawq.pxf.api.UnsupportedTypeException;
+import org.apache.hawq.pxf.api.io.DataType;
+import org.apache.hawq.pxf.api.utilities.ColumnDescriptor;
+import org.apache.hawq.pxf.api.utilities.InputData;
+import org.apache.hawq.pxf.plugins.hive.utilities.HiveUtilities;
+
+import java.util.*;
+
+/**
+ * Specialized HiveResolver for a Hive table stored as RC file.
+ * Use together with HiveInputFormatFragmenter/HiveRCFileAccessor.
+ */
+public class HiveORCSerdeResolver extends HiveResolver {
+ private static final Log LOG = LogFactory.getLog(HiveORCSerdeResolver.class);
+ private OrcSerde deserializer;
+ private StringBuilder parts;
+ private int numberOfPartitions;
+ private HiveInputFormatFragmenter.PXF_HIVE_SERDES serdeType;
+
+ public HiveORCSerdeResolver(InputData input) throws Exception {
+ super(input);
+ }
+
+ /* read the data supplied by the fragmenter: inputformat name, serde name, partition keys */
+ @Override
+ void parseUserData(InputData input) throws Exception {
+ String[] toks = HiveInputFormatFragmenter.parseToks(input);
+ String serdeEnumStr = toks[HiveInputFormatFragmenter.TOK_SERDE];
+ if (serdeEnumStr.equals(HiveInputFormatFragmenter.PXF_HIVE_SERDES.ORC_SERDE.name())) {
+ serdeType = HiveInputFormatFragmenter.PXF_HIVE_SERDES.ORC_SERDE;
+ } else {
+ throw new UnsupportedTypeException("Unsupported Hive Serde: " + serdeEnumStr);
+ }
+ partitionKeys = toks[HiveInputFormatFragmenter.TOK_KEYS];
+ parseDelimiterChar(input);
+ collectionDelim = input.getUserProperty("COLLECTION_DELIM") == null ? COLLECTION_DELIM
+ : input.getUserProperty("COLLECTION_DELIM");
+ mapkeyDelim = input.getUserProperty("MAPKEY_DELIM") == null ? MAPKEY_DELIM
+ : input.getUserProperty("MAPKEY_DELIM");
+ }
+
+ @Override
+ void initPartitionFields() {
+ parts = new StringBuilder();
+ numberOfPartitions = initPartitionFields(parts);
+ }
+
+ /**
+ * getFields returns a singleton list of OneField item.
+ * OneField item contains two fields: an integer representing the VARCHAR type and a Java
+ * Object representing the field value.
+ */
+ @Override
+ public List<OneField> getFields(OneRow onerow) throws Exception {
+
+ Object tuple = deserializer.deserialize((Writable) onerow.getData());
+ // Each Hive record is a Struct
+ StructObjectInspector soi = (StructObjectInspector) deserializer.getObjectInspector();
+ List<OneField> record = traverseStruct(tuple, soi, false);
+
+ return record;
+
+ }
+
+ /*
+ * Get and init the deserializer for the records of this Hive data fragment.
+ * Suppress Warnings added because deserializer.initialize is an abstract function that is deprecated
+ * but its implementations (ColumnarSerDe, LazyBinaryColumnarSerDe) still use the deprecated interface.
+ */
+ @SuppressWarnings("deprecation")
+ @Override
+ void initSerde(InputData input) throws Exception {
+ Properties serdeProperties = new Properties();
+ int numberOfDataColumns = input.getColumns() - numberOfPartitions;
+
+ LOG.debug("Serde number of columns is " + numberOfDataColumns);
+
+ StringBuilder columnNames = new StringBuilder(numberOfDataColumns * 2); // column + delimiter
+ StringBuilder columnTypes = new StringBuilder(numberOfDataColumns * 2); // column + delimiter
+ String delim = ",";
+ for (int i = 0; i < numberOfDataColumns; i++) {
+ ColumnDescriptor column = input.getColumn(i);
+ String columnName = column.columnName();
+ String columnType = HiveUtilities.toCompatibleHiveType(DataType.get(column.columnTypeCode()), column.columnTypeModifiers());
+ if(i > 0) {
+ columnNames.append(delim);
+ columnTypes.append(delim);
+ }
+ columnNames.append(columnName);
+ columnTypes.append(columnType);
+ }
+ serdeProperties.put(serdeConstants.LIST_COLUMNS, columnNames.toString());
+ serdeProperties.put(serdeConstants.LIST_COLUMN_TYPES, columnTypes.toString());
+
+ if (serdeType == HiveInputFormatFragmenter.PXF_HIVE_SERDES.ORC_SERDE) {
+ deserializer = new OrcSerde();
+ } else {
+ throw new UnsupportedTypeException("Unsupported Hive Serde: " + serdeType.name()); /* we should not get here */
+ }
+
+ deserializer.initialize(new JobConf(new Configuration(), HiveORCSerdeResolver.class), serdeProperties);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveResolver.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveResolver.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveResolver.java
index 2562d3d..93d5bdb 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveResolver.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveResolver.java
@@ -19,8 +19,10 @@ package org.apache.hawq.pxf.plugins.hive;
* under the License.
*/
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hawq.pxf.api.*;
import org.apache.hawq.pxf.api.io.DataType;
+import org.apache.hawq.pxf.api.utilities.ColumnDescriptor;
import org.apache.hawq.pxf.api.utilities.InputData;
import org.apache.hawq.pxf.api.utilities.Plugin;
import org.apache.hawq.pxf.api.utilities.Utilities;
@@ -34,9 +36,9 @@ import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.*;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.objectinspector.*;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.*;
-import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
@@ -68,14 +70,14 @@ import static org.apache.hawq.pxf.api.io.DataType.*;
@SuppressWarnings("deprecation")
public class HiveResolver extends Plugin implements ReadResolver {
private static final Log LOG = LogFactory.getLog(HiveResolver.class);
- private static final String MAPKEY_DELIM = ":";
- private static final String COLLECTION_DELIM = ",";
+ protected static final String MAPKEY_DELIM = ":";
+ protected static final String COLLECTION_DELIM = ",";
+ protected String collectionDelim;
+ protected String mapkeyDelim;
private SerDe deserializer;
private List<OneField> partitionFields;
private String serdeName;
private String propsString;
- private String collectionDelim;
- private String mapkeyDelim;
String partitionKeys;
char delimiter;
String nullChar = "\\N";
@@ -346,42 +348,56 @@ public class HiveResolver extends Plugin implements ReadResolver {
* representing a composite sub-object (map, list,..) is null - then
* BadRecordException will be thrown. If a primitive field value is null,
* then a null will appear for the field in the record in the query result.
+ * flatten is true only when we are dealing with a non primitive field
*/
private void traverseTuple(Object obj, ObjectInspector objInspector,
List<OneField> record, boolean toFlatten)
throws IOException, BadRecordException {
ObjectInspector.Category category = objInspector.getCategory();
- if ((obj == null) && (category != ObjectInspector.Category.PRIMITIVE)) {
- throw new BadRecordException("NULL Hive composite object");
- }
switch (category) {
case PRIMITIVE:
resolvePrimitive(obj, (PrimitiveObjectInspector) objInspector,
record, toFlatten);
break;
case LIST:
- List<OneField> listRecord = traverseList(obj,
- (ListObjectInspector) objInspector);
- addOneFieldToRecord(record, TEXT, String.format("[%s]",
- HdfsUtilities.toString(listRecord, collectionDelim)));
+ if(obj == null) {
+ addOneFieldToRecord(record, TEXT, null);
+ } else {
+ List<OneField> listRecord = traverseList(obj,
+ (ListObjectInspector) objInspector);
+ addOneFieldToRecord(record, TEXT, String.format("[%s]",
+ HdfsUtilities.toString(listRecord, collectionDelim)));
+ }
break;
case MAP:
- List<OneField> mapRecord = traverseMap(obj,
- (MapObjectInspector) objInspector);
- addOneFieldToRecord(record, TEXT, String.format("{%s}",
- HdfsUtilities.toString(mapRecord, collectionDelim)));
+ if(obj == null) {
+ addOneFieldToRecord(record, TEXT, null);
+ } else {
+ List<OneField> mapRecord = traverseMap(obj,
+ (MapObjectInspector) objInspector);
+ addOneFieldToRecord(record, TEXT, String.format("{%s}",
+ HdfsUtilities.toString(mapRecord, collectionDelim)));
+ }
break;
case STRUCT:
- List<OneField> structRecord = traverseStruct(obj,
- (StructObjectInspector) objInspector, true);
- addOneFieldToRecord(record, TEXT, String.format("{%s}",
- HdfsUtilities.toString(structRecord, collectionDelim)));
+ if(obj == null) {
+ addOneFieldToRecord(record, TEXT, null);
+ } else {
+ List<OneField> structRecord = traverseStruct(obj,
+ (StructObjectInspector) objInspector, true);
+ addOneFieldToRecord(record, TEXT, String.format("{%s}",
+ HdfsUtilities.toString(structRecord, collectionDelim)));
+ }
break;
case UNION:
- List<OneField> unionRecord = traverseUnion(obj,
- (UnionObjectInspector) objInspector);
- addOneFieldToRecord(record, TEXT, String.format("[%s]",
- HdfsUtilities.toString(unionRecord, collectionDelim)));
+ if(obj == null) {
+ addOneFieldToRecord(record, TEXT, null);
+ } else {
+ List<OneField> unionRecord = traverseUnion(obj,
+ (UnionObjectInspector) objInspector);
+ addOneFieldToRecord(record, TEXT, String.format("[%s]",
+ HdfsUtilities.toString(unionRecord, collectionDelim)));
+ }
break;
default:
throw new UnsupportedTypeException("Unknown category type: "
@@ -417,7 +433,7 @@ public class HiveResolver extends Plugin implements ReadResolver {
return listRecord;
}
- private List<OneField> traverseStruct(Object struct,
+ protected List<OneField> traverseStruct(Object struct,
StructObjectInspector soi,
boolean toFlatten)
throws BadRecordException, IOException {
@@ -429,10 +445,18 @@ public class HiveResolver extends Plugin implements ReadResolver {
}
List<OneField> structRecord = new LinkedList<>();
List<OneField> complexRecord = new LinkedList<>();
+ List<ColumnDescriptor> colData = inputData.getTupleDescription();
for (int i = 0; i < structFields.size(); i++) {
if (toFlatten) {
complexRecord.add(new OneField(TEXT.getOID(), String.format(
"\"%s\"", fields.get(i).getFieldName())));
+ } else if (!colData.get(i).isProjected()) {
+ // Non-projected fields will be sent as null values.
+ // This case is invoked only in the top level of fields and
+ // not when interpreting fields of type struct.
+ traverseTuple(null, fields.get(i).getFieldObjectInspector(),
+ complexRecord, toFlatten);
+ continue;
}
traverseTuple(structFields.get(i),
fields.get(i).getFieldObjectInspector(), complexRecord,
@@ -484,7 +508,13 @@ public class HiveResolver extends Plugin implements ReadResolver {
break;
}
case SHORT: {
- val = (o != null) ? ((ShortObjectInspector) oi).get(o) : null;
+ if(o == null) {
+ val = null;
+ } else if( o.getClass().getSimpleName().equals("ByteWritable") ) {
+ val = new Short(((ByteWritable) o).get());
+ } else {
+ val = ((ShortObjectInspector) oi).get(o);
+ }
addOneFieldToRecord(record, SMALLINT, val);
break;
}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
index 0290f9c..cea52e0 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
@@ -19,7 +19,6 @@
package org.apache.hawq.pxf.plugins.hive.utilities;
-import java.util.Arrays;
import java.util.Comparator;
import java.util.SortedSet;
import java.util.TreeSet;
@@ -152,6 +151,37 @@ public enum EnumHiveToHawqType {
}
/**
+ *
+ * @param hiveToHawqType EnumHiveToHawqType enum
+ * @param modifiers Array of Modifiers
+ * @return full Hive type name including modifiers. eg: varchar(3)
+ * This function is used for datatypes with modifier information
+ * such as varchar, char, decimal, etc.
+ */
+ public static String getFullHiveTypeName(EnumHiveToHawqType hiveToHawqType, Integer[] modifiers) {
+ hiveToHawqType.getTypeName();
+ if(modifiers != null && modifiers.length > 0) {
+ String modExpression = hiveToHawqType.getSplitExpression();
+ StringBuilder fullType = new StringBuilder(hiveToHawqType.typeName);
+ Character start = modExpression.charAt(1);
+ Character separator = modExpression.charAt(2);
+ Character end = modExpression.charAt(modExpression.length()-2);
+ fullType.append(start);
+ int index = 0;
+ for (Integer modifier : modifiers) {
+ if(index++ > 0) {
+ fullType.append(separator);
+ }
+ fullType.append(modifier);
+ }
+ fullType.append(end);
+ return fullType.toString();
+ } else {
+ return hiveToHawqType.getTypeName();
+ }
+ }
+
+ /**
*
* @param hiveType full Hive data type, i.e. varchar(10) etc
* @return array of type modifiers
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
index 6abb5ba..806b041 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
@@ -37,7 +37,6 @@ import org.apache.hawq.pxf.api.Metadata;
import org.apache.hawq.pxf.api.UnsupportedTypeException;
import org.apache.hawq.pxf.api.utilities.EnumHawqType;
import org.apache.hawq.pxf.api.io.DataType;
-import org.apache.hawq.pxf.plugins.hive.utilities.EnumHiveToHawqType;
/**
* Class containing helper functions connecting
@@ -267,10 +266,10 @@ public class HiveUtilities {
* @return Hive type
* @throws UnsupportedTypeException if type is not supported
*/
- public static String toCompatibleHiveType(DataType type) {
+ public static String toCompatibleHiveType(DataType type, Integer[] modifiers) {
EnumHiveToHawqType hiveToHawqType = EnumHiveToHawqType.getCompatibleHiveToHawqType(type);
- return hiveToHawqType.getTypeName();
+ return EnumHiveToHawqType.getFullHiveTypeName(hiveToHawqType, modifiers);
}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilitiesTest.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilitiesTest.java b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilitiesTest.java
index 8c4d6b6..b736bba 100644
--- a/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilitiesTest.java
+++ b/pxf/pxf-hive/src/test/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilitiesTest.java
@@ -24,8 +24,8 @@ import static org.junit.Assert.*;
import java.util.Arrays;
+import com.google.common.base.Joiner;
import org.apache.hawq.pxf.api.io.DataType;
-import org.apache.hawq.pxf.api.utilities.EnumHawqType;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.junit.Test;
import org.apache.hawq.pxf.api.Metadata;
@@ -128,47 +128,77 @@ public class HiveUtilitiesTest {
@Test
public void testCompatibleHiveType() {
- String compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.BOOLEAN);
+ String compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.BOOLEAN, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.BooleanType.getTypeName());
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.BYTEA);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.BYTEA, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.BinaryType.getTypeName());
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.BPCHAR);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.BPCHAR, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.CharType.getTypeName());
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.BIGINT);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.BIGINT, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.BigintType.getTypeName());
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.SMALLINT);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.SMALLINT, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.SmallintType.getTypeName());
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.INTEGER);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.INTEGER, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.IntType.getTypeName());
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.TEXT);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.TEXT, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.StringType.getTypeName());
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.REAL);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.REAL, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.FloatType.getTypeName());
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.FLOAT8);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.FLOAT8, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.DoubleType.getTypeName());
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.VARCHAR);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.VARCHAR, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.VarcharType.getTypeName());
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.DATE);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.DATE, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.DateType.getTypeName());
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.TIMESTAMP);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.TIMESTAMP, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.TimestampType.getTypeName());
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.NUMERIC);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.NUMERIC, null);
assertEquals(compatibleTypeName, EnumHiveToHawqType.DecimalType.getTypeName());
try {
- compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.UNSUPPORTED_TYPE);
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.UNSUPPORTED_TYPE, null);
+ fail("should fail because there is no mapped Hive type");
+ }
+ catch (UnsupportedTypeException e) {
+ String errorMsg = "Unable to find compatible Hive type for given HAWQ's type: " + DataType.UNSUPPORTED_TYPE;
+ assertEquals(errorMsg, e.getMessage());
+ }
+
+
+ }
+
+ @Test
+ public void testCompatibleHiveTypeWithModifiers() {
+
+ Integer[] hawqModifiers;
+ String compatibleTypeName;
+
+ hawqModifiers = new Integer[]{5};
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.BPCHAR, hawqModifiers);
+ assertEquals(compatibleTypeName, EnumHiveToHawqType.CharType.getTypeName() + "(" + Joiner.on(",").join(hawqModifiers) + ")");
+
+ hawqModifiers = new Integer[]{10};
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.VARCHAR, hawqModifiers);
+ assertEquals(compatibleTypeName, EnumHiveToHawqType.VarcharType.getTypeName() + "(" + Joiner.on(",").join(hawqModifiers) + ")");
+
+ hawqModifiers = new Integer[]{38, 18};
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.NUMERIC, hawqModifiers);
+ assertEquals(compatibleTypeName, EnumHiveToHawqType.DecimalType.getTypeName() + "(" + Joiner.on(",").join(hawqModifiers) + ")");
+
+ try {
+ compatibleTypeName = HiveUtilities.toCompatibleHiveType(DataType.UNSUPPORTED_TYPE, hawqModifiers);
fail("should fail because there is no mapped Hive type");
}
catch (UnsupportedTypeException e) {
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/rest/RestResource.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/rest/RestResource.java b/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/rest/RestResource.java
index 60bb31e..633e78c 100644
--- a/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/rest/RestResource.java
+++ b/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/rest/RestResource.java
@@ -24,6 +24,7 @@ import javax.ws.rs.core.MultivaluedMap;
import org.apache.commons.codec.CharEncoding;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.commons.lang.StringUtils;
import java.io.UnsupportedEncodingException;
import java.util.List;
@@ -56,7 +57,12 @@ public abstract class RestResource {
String key = entry.getKey();
List<String> values = entry.getValue();
if (values != null) {
- String value = values.get(0);
+ String value;
+ if(values.size() > 1) {
+ value = StringUtils.join(values, ",");
+ } else {
+ value = values.get(0);
+ }
if (value != null) {
// converting to value UTF-8 encoding
value = new String(value.getBytes(CharEncoding.ISO_8859_1),
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java b/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
index 1797b88..a250b18 100644
--- a/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
+++ b/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
@@ -382,17 +382,41 @@ public class ProtocolData extends InputData {
/*
* Sets the tuple description for the record
+ * Attribute Projection information is optional
*/
void parseTupleDescription() {
+
+ /* Process column projection info */
+ String columnProjStr = getOptionalProperty("ATTRS-PROJ");
+ List<Integer> columnProjList = new ArrayList<Integer>();
+ if(columnProjStr != null) {
+ int columnProj = Integer.parseInt(columnProjStr);
+ if(columnProj > 0) {
+ String columnProjIndexStr = getProperty("ATTRS-PROJ-IDX");
+ String columnProjIdx[] = columnProjIndexStr.split(",");
+ for(int i = 0; i < columnProj; i++) {
+ columnProjList.add(Integer.valueOf(columnProjIdx[i]));
+ }
+ } else {
+ /* This is a special case to handle aggregate queries not related to any specific column
+ * eg: count(*) queries. */
+ columnProjList.add(0);
+ }
+ }
+
int columns = getIntProperty("ATTRS");
for (int i = 0; i < columns; ++i) {
String columnName = getProperty("ATTR-NAME" + i);
int columnTypeCode = getIntProperty("ATTR-TYPECODE" + i);
String columnTypeName = getProperty("ATTR-TYPENAME" + i);
Integer[] columnTypeMods = parseTypeMods(i);
-
- ColumnDescriptor column = new ColumnDescriptor(columnName,
- columnTypeCode, i, columnTypeName, columnTypeMods);
+ ColumnDescriptor column;
+ if(columnProjStr != null) {
+ column = new ColumnDescriptor(columnName, columnTypeCode, i, columnTypeName, columnTypeMods, columnProjList.contains(Integer.valueOf(i)));
+ } else {
+ /* For data formats that don't support column projection */
+ column = new ColumnDescriptor(columnName, columnTypeCode, i, columnTypeName, columnTypeMods);
+ }
tupleDescription.add(column);
if (columnName.equalsIgnoreCase(ColumnDescriptor.RECORD_KEY_NAME)) {
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/pxf/pxf-service/src/main/resources/pxf-profiles-default.xml
----------------------------------------------------------------------
diff --git a/pxf/pxf-service/src/main/resources/pxf-profiles-default.xml b/pxf/pxf-service/src/main/resources/pxf-profiles-default.xml
index d72df94..71e6845 100644
--- a/pxf/pxf-service/src/main/resources/pxf-profiles-default.xml
+++ b/pxf/pxf-service/src/main/resources/pxf-profiles-default.xml
@@ -28,7 +28,6 @@ under the License.
<plugins>
<plugin_A>...</plugin_A>
<plugin_B>...</plugin_B>
- ...
</plugins>
</profile>
-->
@@ -80,6 +79,20 @@ under the License.
</plugins>
</profile>
<profile>
+ <name>HiveORC</name>
+ <description>This profile is suitable only for Hive tables stored in ORC files
+ and serialized with either the ColumnarSerDe or the LazyBinaryColumnarSerDe.
+ It is much faster than the general purpose Hive profile.
+ DELIMITER parameter is mandatory.
+ </description>
+ <plugins>
+ <fragmenter>org.apache.hawq.pxf.plugins.hive.HiveInputFormatFragmenter</fragmenter>
+ <accessor>org.apache.hawq.pxf.plugins.hive.HiveORCAccessor</accessor>
+ <resolver>org.apache.hawq.pxf.plugins.hive.HiveORCSerdeResolver</resolver>
+ <metadata>org.apache.hawq.pxf.plugins.hive.HiveMetadataFetcher</metadata>
+ </plugins>
+ </profile>
+ <profile>
<name>HdfsTextSimple</name>
<description>This profile is suitable for using when reading delimited single line records from plain text files
on HDFS
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ecf00d08/src/backend/access/external/pxfheaders.c
----------------------------------------------------------------------
diff --git a/src/backend/access/external/pxfheaders.c b/src/backend/access/external/pxfheaders.c
index bafbae3..07c41c1 100644
--- a/src/backend/access/external/pxfheaders.c
+++ b/src/backend/access/external/pxfheaders.c
@@ -137,7 +137,7 @@ static void add_tuple_desc_httpheader(CHURL_HEADERS headers, Relation rel)
{
char long_number[sizeof(int32) * 8];
- StringInfoData formatter;
+ StringInfoData formatter;
TupleDesc tuple;
initStringInfo(&formatter);