You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by tr...@apache.org on 2012/08/15 22:26:40 UTC

svn commit: r1373657 - in /incubator/hcatalog/trunk: ./ hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/ hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/ src/java/org/apache/hcatalog/common/ src/java/org/apache/hcatalog/data/ src/...

Author: travis
Date: Wed Aug 15 22:26:40 2012
New Revision: 1373657

URL: http://svn.apache.org/viewvc?rev=1373657&view=rev
Log:
HCATALOG-460 Enable boolean to integer conversions

Added:
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatContext.java
Modified:
    incubator/hcatalog/trunk/CHANGES.txt
    incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java
    incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java

Modified: incubator/hcatalog/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1373657&r1=1373656&r2=1373657&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Wed Aug 15 22:26:40 2012
@@ -34,6 +34,8 @@ Trunk (unreleased changes)
   HCAT-427 Document storage-based authorization (lefty via gates)
 
   IMPROVEMENTS
+  HCAT-460 Enable boolean to integer conversions (traviscrawford)
+
   HCAT-450 HCatalog should use transitive ivy dependencies (traviscrawford)
 
   HCAT-350 Writing BINARY data to HCatRecord depends on a Hive class (thejas via traviscrawford)

Modified: incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java?rev=1373657&r1=1373656&r2=1373657&view=diff
==============================================================================
--- incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java (original)
+++ incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java Wed Aug 15 22:26:40 2012
@@ -32,6 +32,7 @@ import org.apache.hadoop.mapreduce.Input
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.common.HCatContext;
 import org.apache.hcatalog.common.HCatUtil;
 import org.apache.hcatalog.data.Pair;
 import org.apache.hcatalog.data.schema.HCatSchema;
@@ -185,6 +186,8 @@ public class HCatLoader extends HCatBase
 
   @Override
   public ResourceSchema getSchema(String location, Job job) throws IOException {
+    HCatContext.getInstance().mergeConf(job.getConfiguration());
+
     Table table = phutil.getTable(location,
         hcatServerUri!=null?hcatServerUri:PigHCatUtil.getHCatServerUri(job),
             PigHCatUtil.getHCatServerPrincipal(job));

Modified: incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java?rev=1373657&r1=1373656&r2=1373657&view=diff
==============================================================================
--- incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java (original)
+++ incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java Wed Aug 15 22:26:40 2012
@@ -26,6 +26,7 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 
 import junit.framework.TestCase;
 
@@ -37,6 +38,7 @@ import org.apache.hadoop.hive.ql.Driver;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hcatalog.HcatTestUtils;
+import org.apache.hcatalog.common.HCatConstants;
 import org.apache.hcatalog.data.Pair;
 import org.apache.pig.ExecType;
 import org.apache.pig.PigServer;
@@ -396,4 +398,44 @@ public class TestHCatLoader extends Test
     ResourceStatistics statistics = hCatLoader.getStatistics(file.getAbsolutePath(), job);
     assertEquals(2048, (long) statistics.getmBytes());
   }
+
+  public void testConvertBooleanToInt() throws Exception {
+    String tbl = "test_convert_boolean_to_int";
+    String inputFileName = TEST_DATA_DIR + "/testConvertBooleanToInt/data.txt";
+    File inputDataDir = new File(inputFileName).getParentFile();
+    inputDataDir.mkdir();
+
+    String[] lines = new String[] {"llama\t1", "alpaca\t0"};
+    HcatTestUtils.createTestDataFile(inputFileName, lines);
+
+    assertEquals(0, driver.run("drop table if exists " + tbl).getResponseCode());
+    assertEquals(0, driver.run("create external table " + tbl +
+        " (a string, b boolean) row format delimited fields terminated by '\t'" +
+        " stored as textfile location 'file://" +
+        inputDataDir.getAbsolutePath() + "'").getResponseCode());
+
+    Properties properties = new Properties();
+    properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "true");
+    PigServer server = new PigServer(ExecType.LOCAL, properties);
+    server.registerQuery(
+        "data = load 'test_convert_boolean_to_int' using org.apache.hcatalog.pig.HCatLoader();");
+    Schema schema = server.dumpSchema("data");
+    assertEquals(2, schema.getFields().size());
+
+    assertEquals("a", schema.getField(0).alias);
+    assertEquals(DataType.CHARARRAY, schema.getField(0).type);
+    assertEquals("b", schema.getField(1).alias);
+    assertEquals(DataType.INTEGER, schema.getField(1).type);
+
+    Iterator<Tuple> iterator = server.openIterator("data");
+    Tuple t = iterator.next();
+    assertEquals("llama", t.get(0));
+    // TODO: Figure out how to load a text file into Hive with boolean columns. This next assert
+    // passes because data was loaded as integers, not because it was converted.
+    assertEquals(1, t.get(1));
+    t = iterator.next();
+    assertEquals("alpaca", t.get(0));
+    assertEquals(0, t.get(1));
+    assertFalse(iterator.hasNext());
+  }
 }

Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java?rev=1373657&r1=1373656&r2=1373657&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java Wed Aug 15 22:26:40 2012
@@ -116,4 +116,17 @@ public final class HCatConstants {
   // Hadoop Conf Var Names
   public static final String CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary";
 
+  //***************************************************************************
+  // Data-related configuration properties.
+  //***************************************************************************
+
+  /**
+   * {@value} (default: {@value #HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT}).
+   * Pig < 0.10.0 does not have boolean support, and scripts written for pre-boolean Pig versions
+   * will not expect boolean values when upgrading Pig. For integration the option is offered to
+   * convert boolean fields to integers by setting this Hadoop configuration key.
+   */
+  public static final String HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER =
+      "hcat.data.convert.boolean.to.integer";
+  public static final boolean HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT = false;
 }

Added: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatContext.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatContext.java?rev=1373657&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatContext.java (added)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatContext.java Wed Aug 15 22:26:40 2012
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.common;
+
+import org.apache.hadoop.conf.Configuration;
+
+import java.util.Map;
+
+/**
+ * HCatContext provides global access to configuration data.
+ */
+public class HCatContext {
+
+  private static final HCatContext hCatContext = new HCatContext();
+
+  private final Configuration conf;
+
+  private HCatContext() {
+    conf = new Configuration();
+  }
+
+  public static HCatContext getInstance() {
+    return hCatContext;
+  }
+
+  public Configuration getConf() {
+    return conf;
+  }
+
+  /**
+   * Merge the given configuration into the HCatContext conf, overwriting any existing keys.
+   */
+  public void mergeConf(Configuration conf) {
+    for (Map.Entry<String, String> entry : conf) {
+      this.conf.set(entry.getKey(), entry.getValue());
+    }
+  }
+}

Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java?rev=1373657&r1=1373656&r2=1373657&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java Wed Aug 15 22:26:40 2012
@@ -41,6 +41,8 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.Writable;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.common.HCatContext;
 import org.apache.hcatalog.data.schema.HCatSchema;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -185,7 +187,15 @@ public class HCatRecordSerDe implements 
       ObjectInspector fieldObjectInspector) throws SerDeException {
     Object res = null;
     if (fieldObjectInspector.getCategory() == Category.PRIMITIVE){
-      res = ((PrimitiveObjectInspector)fieldObjectInspector).getPrimitiveJavaObject(field);
+      if (field != null &&
+          HCatContext.getInstance().getConf().getBoolean(
+              HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER,
+              HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT) &&
+          field instanceof Boolean) {
+        res = ((Boolean) field) ? 1 : 0;
+      } else {
+        res = ((PrimitiveObjectInspector) fieldObjectInspector).getPrimitiveJavaObject(field);
+      }
     } else if (fieldObjectInspector.getCategory() == Category.STRUCT){
       res = serializeStruct(field,(StructObjectInspector)fieldObjectInspector);
     } else if (fieldObjectInspector.getCategory() == Category.LIST){

Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java?rev=1373657&r1=1373656&r2=1373657&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java Wed Aug 15 22:26:40 2012
@@ -30,6 +30,8 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.common.HCatContext;
 import org.apache.hcatalog.common.HCatException;
 import org.apache.hcatalog.data.schema.HCatFieldSchema.Type;
 
@@ -137,7 +139,10 @@ public class HCatSchemaUtils {
     private static Type getPrimitiveHType(TypeInfo basePrimitiveTypeInfo) {
         switch(((PrimitiveTypeInfo)basePrimitiveTypeInfo).getPrimitiveCategory()) {
         case BOOLEAN:
-            return Type.BOOLEAN;
+            return HCatContext.getInstance().getConf().getBoolean(
+                HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER,
+                HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT) ?
+                Type.INT : Type.BOOLEAN;
         case BYTE:
             return Type.TINYINT;
         case DOUBLE: