You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by tr...@apache.org on 2012/08/15 22:26:40 UTC
svn commit: r1373657 - in /incubator/hcatalog/trunk: ./
hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/
hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/
src/java/org/apache/hcatalog/common/ src/java/org/apache/hcatalog/data/
src/...
Author: travis
Date: Wed Aug 15 22:26:40 2012
New Revision: 1373657
URL: http://svn.apache.org/viewvc?rev=1373657&view=rev
Log:
HCATALOG-460 Enable boolean to integer conversions
Added:
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatContext.java
Modified:
incubator/hcatalog/trunk/CHANGES.txt
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java
Modified: incubator/hcatalog/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1373657&r1=1373656&r2=1373657&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Wed Aug 15 22:26:40 2012
@@ -34,6 +34,8 @@ Trunk (unreleased changes)
HCAT-427 Document storage-based authorization (lefty via gates)
IMPROVEMENTS
+ HCAT-460 Enable boolean to integer conversions (traviscrawford)
+
HCAT-450 HCatalog should use transitive ivy dependencies (traviscrawford)
HCAT-350 Writing BINARY data to HCatRecord depends on a Hive class (thejas via traviscrawford)
Modified: incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java?rev=1373657&r1=1373656&r2=1373657&view=diff
==============================================================================
--- incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java (original)
+++ incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java Wed Aug 15 22:26:40 2012
@@ -32,6 +32,7 @@ import org.apache.hadoop.mapreduce.Input
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.security.Credentials;
import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.common.HCatContext;
import org.apache.hcatalog.common.HCatUtil;
import org.apache.hcatalog.data.Pair;
import org.apache.hcatalog.data.schema.HCatSchema;
@@ -185,6 +186,8 @@ public class HCatLoader extends HCatBase
@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {
+ HCatContext.getInstance().mergeConf(job.getConfiguration());
+
Table table = phutil.getTable(location,
hcatServerUri!=null?hcatServerUri:PigHCatUtil.getHCatServerUri(job),
PigHCatUtil.getHCatServerPrincipal(job));
Modified: incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java?rev=1373657&r1=1373656&r2=1373657&view=diff
==============================================================================
--- incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java (original)
+++ incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java Wed Aug 15 22:26:40 2012
@@ -26,6 +26,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Properties;
import junit.framework.TestCase;
@@ -37,6 +38,7 @@ import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hcatalog.HcatTestUtils;
+import org.apache.hcatalog.common.HCatConstants;
import org.apache.hcatalog.data.Pair;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
@@ -396,4 +398,44 @@ public class TestHCatLoader extends Test
ResourceStatistics statistics = hCatLoader.getStatistics(file.getAbsolutePath(), job);
assertEquals(2048, (long) statistics.getmBytes());
}
+
+ public void testConvertBooleanToInt() throws Exception {
+ String tbl = "test_convert_boolean_to_int";
+ String inputFileName = TEST_DATA_DIR + "/testConvertBooleanToInt/data.txt";
+ File inputDataDir = new File(inputFileName).getParentFile();
+ inputDataDir.mkdir();
+
+ String[] lines = new String[] {"llama\t1", "alpaca\t0"};
+ HcatTestUtils.createTestDataFile(inputFileName, lines);
+
+ assertEquals(0, driver.run("drop table if exists " + tbl).getResponseCode());
+ assertEquals(0, driver.run("create external table " + tbl +
+ " (a string, b boolean) row format delimited fields terminated by '\t'" +
+ " stored as textfile location 'file://" +
+ inputDataDir.getAbsolutePath() + "'").getResponseCode());
+
+ Properties properties = new Properties();
+ properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "true");
+ PigServer server = new PigServer(ExecType.LOCAL, properties);
+ server.registerQuery(
+ "data = load 'test_convert_boolean_to_int' using org.apache.hcatalog.pig.HCatLoader();");
+ Schema schema = server.dumpSchema("data");
+ assertEquals(2, schema.getFields().size());
+
+ assertEquals("a", schema.getField(0).alias);
+ assertEquals(DataType.CHARARRAY, schema.getField(0).type);
+ assertEquals("b", schema.getField(1).alias);
+ assertEquals(DataType.INTEGER, schema.getField(1).type);
+
+ Iterator<Tuple> iterator = server.openIterator("data");
+ Tuple t = iterator.next();
+ assertEquals("llama", t.get(0));
+ // TODO: Figure out how to load a text file into Hive with boolean columns. This next assert
+ // passes because data was loaded as integers, not because it was converted.
+ assertEquals(1, t.get(1));
+ t = iterator.next();
+ assertEquals("alpaca", t.get(0));
+ assertEquals(0, t.get(1));
+ assertFalse(iterator.hasNext());
+ }
}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java?rev=1373657&r1=1373656&r2=1373657&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java Wed Aug 15 22:26:40 2012
@@ -116,4 +116,17 @@ public final class HCatConstants {
// Hadoop Conf Var Names
public static final String CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary";
+ //***************************************************************************
+ // Data-related configuration properties.
+ //***************************************************************************
+
+ /**
+ * {@value} (default: {@value #HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT}).
+ * Pig < 0.10.0 does not have boolean support, and scripts written for pre-boolean Pig versions
+ * will not expect boolean values when upgrading Pig. For integration the option is offered to
+ * convert boolean fields to integers by setting this Hadoop configuration key.
+ */
+ public static final String HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER =
+ "hcat.data.convert.boolean.to.integer";
+ public static final boolean HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT = false;
}
Added: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatContext.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatContext.java?rev=1373657&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatContext.java (added)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatContext.java Wed Aug 15 22:26:40 2012
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.common;
+
+import org.apache.hadoop.conf.Configuration;
+
+import java.util.Map;
+
+/**
+ * HCatContext provides global access to configuration data.
+ */
+public class HCatContext {
+
+ private static final HCatContext hCatContext = new HCatContext();
+
+ private final Configuration conf;
+
+ private HCatContext() {
+ conf = new Configuration();
+ }
+
+ public static HCatContext getInstance() {
+ return hCatContext;
+ }
+
+ public Configuration getConf() {
+ return conf;
+ }
+
+ /**
+ * Merge the given configuration into the HCatContext conf, overwriting any existing keys.
+ */
+ public void mergeConf(Configuration conf) {
+ for (Map.Entry<String, String> entry : conf) {
+ this.conf.set(entry.getKey(), entry.getValue());
+ }
+ }
+}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java?rev=1373657&r1=1373656&r2=1373657&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java Wed Aug 15 22:26:40 2012
@@ -41,6 +41,8 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Writable;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.common.HCatContext;
import org.apache.hcatalog.data.schema.HCatSchema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -185,7 +187,15 @@ public class HCatRecordSerDe implements
ObjectInspector fieldObjectInspector) throws SerDeException {
Object res = null;
if (fieldObjectInspector.getCategory() == Category.PRIMITIVE){
- res = ((PrimitiveObjectInspector)fieldObjectInspector).getPrimitiveJavaObject(field);
+ if (field != null &&
+ HCatContext.getInstance().getConf().getBoolean(
+ HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER,
+ HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT) &&
+ field instanceof Boolean) {
+ res = ((Boolean) field) ? 1 : 0;
+ } else {
+ res = ((PrimitiveObjectInspector) fieldObjectInspector).getPrimitiveJavaObject(field);
+ }
} else if (fieldObjectInspector.getCategory() == Category.STRUCT){
res = serializeStruct(field,(StructObjectInspector)fieldObjectInspector);
} else if (fieldObjectInspector.getCategory() == Category.LIST){
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java?rev=1373657&r1=1373656&r2=1373657&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java Wed Aug 15 22:26:40 2012
@@ -30,6 +30,8 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.common.HCatContext;
import org.apache.hcatalog.common.HCatException;
import org.apache.hcatalog.data.schema.HCatFieldSchema.Type;
@@ -137,7 +139,10 @@ public class HCatSchemaUtils {
private static Type getPrimitiveHType(TypeInfo basePrimitiveTypeInfo) {
switch(((PrimitiveTypeInfo)basePrimitiveTypeInfo).getPrimitiveCategory()) {
case BOOLEAN:
- return Type.BOOLEAN;
+ return HCatContext.getInstance().getConf().getBoolean(
+ HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER,
+ HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT) ?
+ Type.INT : Type.BOOLEAN;
case BYTE:
return Type.TINYINT;
case DOUBLE: