You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by tr...@apache.org on 2012/07/27 01:15:18 UTC
svn commit: r1366256 - in /incubator/hcatalog/trunk: ./
hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/
hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/
src/java/org/apache/hcatalog/data/ src/test/org/apache/hcatalog/data/
src/te...
Author: travis
Date: Fri Jul 27 01:15:17 2012
New Revision: 1366256
URL: http://svn.apache.org/viewvc?rev=1366256&view=rev
Log:
HCATALOG-350 : Writing BINARY data to HCatRecord depends on a Hive class
Modified:
incubator/hcatalog/trunk/CHANGES.txt
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseStorer.java
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorer.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/DataType.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecord.java
incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/ReaderWriter.java
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestDefaultHCatRecord.java
incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java
Modified: incubator/hcatalog/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1366256&r1=1366255&r2=1366256&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Fri Jul 27 01:15:17 2012
@@ -28,6 +28,8 @@ Trunk (unreleased changes)
HCAT-328 HCatLoader should report its input size so pig can estimate the number of reducers (traviscrawford via gates)
IMPROVEMENTS
+ HCAT-350 Writing BINARY data to HCatRecord depends on a Hive class (thejas via traviscrawford)
+
HCAT-440 pig field names for arrays should be configurable (traviscrawford)
HCAT-434 Package HCatalog pig support as a separate jar (traviscrawford)
Modified: incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseStorer.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseStorer.java?rev=1366256&r1=1366255&r2=1366256&view=diff
==============================================================================
--- incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseStorer.java (original)
+++ incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseStorer.java Fri Jul 27 01:15:17 2012
@@ -27,7 +27,6 @@ import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordWriter;
@@ -42,7 +41,6 @@ import org.apache.pig.ResourceStatistics
import org.apache.pig.StoreFunc;
import org.apache.pig.StoreMetadata;
import org.apache.pig.backend.BackendException;
-import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
@@ -247,10 +245,10 @@ public abstract class HCatBaseStorer ext
switch(type){
case BINARY:
- ByteArrayRef ba = new ByteArrayRef();
- byte[] bytes = (null == pigObj) ? new byte[0] : ((DataByteArray)pigObj).get();
- ba.setData(bytes);
- return ba;
+ if (pigObj == null) {
+ return null;
+ }
+ return ((DataByteArray)pigObj).get();
case STRUCT:
if (pigObj == null) {
Modified: incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java?rev=1366256&r1=1366255&r2=1366256&view=diff
==============================================================================
--- incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java (original)
+++ incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java Fri Jul 27 01:15:17 2012
@@ -33,7 +33,6 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hcatalog.common.HCatConstants;
import org.apache.hcatalog.common.HCatException;
@@ -320,7 +319,7 @@ public class PigHCatUtil {
Type itemType = hfs.getType();
switch (itemType){
case BINARY:
- result = (o == null) ? null : new DataByteArray(((ByteArrayRef)o).getData());
+ result = (o == null) ? null : new DataByteArray((byte[])o);
break;
case STRUCT:
result = transformToTuple((List<Object>)o,hfs);
Modified: incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorer.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorer.java?rev=1366256&r1=1366255&r2=1366256&view=diff
==============================================================================
--- incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorer.java (original)
+++ incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorer.java Fri Jul 27 01:15:17 2012
@@ -473,7 +473,7 @@ public class TestHCatStorer extends Test
driver.getResults(res);
Iterator<String> itr = res.iterator();
- assertEquals( "0\tNULL\tNULL\tNULL\tNULL\t\tnull" ,itr.next());
+ assertEquals( "0\tNULL\tNULL\tNULL\tNULL\tnull\tnull" ,itr.next());
assertEquals( "NULL\t4.2\t2.2\t4\tlets hcat\tbinary-data\tnull" ,itr.next());
assertEquals( "3\t6.2999997\t3.3000000000000003\t6\tlets hcat\tbinary-data\tnull",itr.next());
assertFalse(itr.hasNext());
@@ -481,13 +481,19 @@ public class TestHCatStorer extends Test
server.registerQuery("B = load 'junit_unparted' using "+HCatLoader.class.getName()+";");
Iterator<Tuple> iter = server.openIterator("B");
int count = 0;
+ int num5nulls = 0;
while(iter.hasNext()){
Tuple t = iter.next();
- assertTrue(t.get(5) instanceof DataByteArray);
+ if(t.get(5) == null){
+ num5nulls++;
+ }else {
+ assertTrue(t.get(5) instanceof DataByteArray);
+ }
assertNull(t.get(6));
count++;
}
assertEquals(3, count);
+ assertEquals(1, num5nulls);
driver.run("drop table junit_unparted");
}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/DataType.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/DataType.java?rev=1366256&r1=1366255&r2=1366256&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/DataType.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/DataType.java Fri Jul 27 01:15:17 2012
@@ -36,6 +36,7 @@ public abstract class DataType {
public static final byte FLOAT = 20;
public static final byte DOUBLE = 25;
public static final byte STRING = 55;
+ public static final byte BINARY = 60;
public static final byte MAP = 100;
public static final byte STRUCT = 110;
@@ -75,6 +76,8 @@ public abstract class DataType {
return LIST;
} else if (o instanceof Map<?,?>) {
return MAP;
+ }else if (o instanceof byte[]) {
+ return BINARY;
} else {return ERROR;}
}
@@ -112,6 +115,9 @@ public abstract class DataType {
case SHORT:
return ((Short)o1).compareTo((Short)o2);
+
+ case BINARY:
+ return compareByteArray((byte[])o1, (byte[])o2);
case LIST:
List<?> l1 = (List<?>)o1;
@@ -170,4 +176,30 @@ public abstract class DataType {
return dt1 < dt2 ? -1 : 1;
}
}
+
+ private static int compareByteArray(byte[] o1, byte[] o2) {
+
+ for(int i = 0; i < o1.length; i++){
+ if(i == o2.length){
+ return 1;
+ }
+ if(o1[i] == o2[i]){
+ continue;
+ }
+ if(o1[i] > o1[i]){
+ return 1;
+ }
+ else{
+ return -1;
+ }
+ }
+
+ //bytes in o1 are same as o2
+ //in case o2 was longer
+ if(o2.length > o1.length){
+ return -1;
+ }
+ return 0; //equals
+ }
+
}
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecord.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecord.java?rev=1366256&r1=1366255&r2=1366256&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecord.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecord.java Fri Jul 27 01:15:17 2012
@@ -50,6 +50,14 @@ public abstract class HCatRecord impleme
public void setBoolean(String fieldName, HCatSchema recordSchema, Boolean value) throws HCatException {
set(fieldName,recordSchema,value);
}
+
+ public byte[] getByteArray(String fieldName, HCatSchema recordSchema) throws HCatException {
+ return (byte[]) get(fieldName, recordSchema, byte[].class);
+ }
+
+ public void setByteArray(String fieldName, HCatSchema recordSchema, byte[] value) throws HCatException {
+ set(fieldName,recordSchema,value);
+ }
public Byte getByte(String fieldName, HCatSchema recordSchema) throws HCatException {
//TINYINT
Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/ReaderWriter.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/ReaderWriter.java?rev=1366256&r1=1366255&r2=1366256&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/ReaderWriter.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/ReaderWriter.java Fri Jul 27 01:15:17 2012
@@ -73,7 +73,13 @@ public abstract class ReaderWriter {
case DataType.NULL:
return null;
-
+
+ case DataType.BINARY:
+ int len = in.readInt();
+ byte[] ba = new byte[len];
+ in.readFully(ba);
+ return ba;
+
case DataType.MAP:
int size = in.readInt();
Map<Object,Object> m = new HashMap<Object, Object>(size);
@@ -166,7 +172,13 @@ public abstract class ReaderWriter {
out.write(utfBytes);
return;
-
+ case DataType.BINARY:
+ byte[] ba = (byte[])val;
+ out.writeByte(DataType.BINARY);
+ out.writeInt(ba.length);
+ out.write(ba);
+ return;
+
case DataType.NULL:
out.writeByte(DataType.NULL);
return;
Modified: incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestDefaultHCatRecord.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestDefaultHCatRecord.java?rev=1366256&r1=1366255&r2=1366256&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestDefaultHCatRecord.java (original)
+++ incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestDefaultHCatRecord.java Fri Jul 27 01:15:17 2012
@@ -32,9 +32,12 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import org.apache.hcatalog.common.HCatException;
import org.apache.hcatalog.common.HCatUtil;
import org.apache.hcatalog.data.DefaultHCatRecord;
import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.data.schema.HCatSchemaUtils;
import junit.framework.Assert;
import junit.framework.TestCase;
@@ -75,12 +78,82 @@ public class TestDefaultHCatRecord exten
public void testCompareTo() {
HCatRecord[] recs = getHCatRecords();
Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[0],recs[1]) == 0);
+ Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[4],recs[5]) == 0);
}
public void testEqualsObject() {
HCatRecord[] recs = getHCatRecords();
Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[0],recs[1]));
+ Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[4],recs[5]));
+ }
+
+ /**
+ * Test get and set calls with type
+ * @throws HCatException
+ */
+ public void testGetSetByType1() throws HCatException{
+ HCatRecord inpRec = getHCatRecords()[0];
+ HCatRecord newRec = new DefaultHCatRecord(inpRec.size());
+ HCatSchema hsch =
+ HCatSchemaUtils.getHCatSchema(
+ "a:tinyint,b:smallint,c:int,d:bigint,e:float,f:double,g:boolean,h:string,i:binary,j:string");
+
+
+ newRec.setByte("a", hsch, inpRec.getByte("a", hsch) );
+ newRec.setShort("b", hsch, inpRec.getShort("b", hsch) );
+ newRec.setInteger("c", hsch, inpRec.getInteger("c", hsch) );
+ newRec.setLong("d", hsch, inpRec.getLong("d", hsch) );
+ newRec.setFloat("e", hsch, inpRec.getFloat("e", hsch) );
+ newRec.setDouble("f", hsch, inpRec.getDouble("f", hsch) );
+ newRec.setBoolean("g", hsch, inpRec.getBoolean("g", hsch) );
+ newRec.setString("h", hsch, inpRec.getString("h", hsch) );
+ newRec.setByteArray("i", hsch, inpRec.getByteArray("i", hsch) );
+ newRec.setString("j", hsch, inpRec.getString("j", hsch) );
+
+ Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec,inpRec));
+
+
+ }
+
+ /**
+ * Test get and set calls with type
+ * @throws HCatException
+ */
+ public void testGetSetByType2() throws HCatException{
+ HCatRecord inpRec = getGetSet2InpRec();
+
+ HCatRecord newRec = new DefaultHCatRecord(inpRec.size());
+ HCatSchema hsch =
+ HCatSchemaUtils.getHCatSchema("a:binary,b:map<string,string>,c:array<int>,d:struct<i:int>");
+
+
+ newRec.setByteArray("a", hsch, inpRec.getByteArray("a", hsch) );
+ newRec.setMap("b", hsch, inpRec.getMap("b", hsch) );
+ newRec.setList("c", hsch, inpRec.getList("c", hsch) );
+ newRec.setStruct("d", hsch, inpRec.getStruct("d", hsch) );
+
+ Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec,inpRec));
+ }
+
+
+ private HCatRecord getGetSet2InpRec() {
+ List<Object> rlist = new ArrayList<Object>();
+
+ rlist.add(new byte[]{1,2,3});
+
+ Map<Short, String> mapcol = new HashMap<Short, String>(3);
+ mapcol.put(new Short("2"), "hcat is cool");
+ mapcol.put(new Short("3"), "is it?");
+ mapcol.put(new Short("4"), "or is it not?");
+ rlist.add(mapcol);
+
+ List<Integer> listcol = new ArrayList<Integer>();
+ listcol.add(314);
+ listcol.add(007);
+ rlist.add( listcol);//list
+ rlist.add( listcol);//struct
+ return new DefaultHCatRecord(rlist);
}
private HCatRecord[] getHCatRecords(){
@@ -90,7 +163,9 @@ public class TestDefaultHCatRecord exten
rec_1.add(new Short("456"));
rec_1.add( new Integer(789));
rec_1.add( new Long(1000L));
+ rec_1.add( new Float(5.3F));
rec_1.add( new Double(5.3D));
+ rec_1.add( new Boolean(true));
rec_1.add( new String("hcat and hadoop"));
rec_1.add( null);
rec_1.add( "null");
@@ -102,7 +177,9 @@ public class TestDefaultHCatRecord exten
rec_2.add( new Short("456"));
rec_2.add( new Integer(789));
rec_2.add( new Long(1000L));
+ rec_2.add( new Float(5.3F));
rec_2.add( new Double(5.3D));
+ rec_2.add( new Boolean(true));
rec_2.add( new String("hcat and hadoop"));
rec_2.add( null);
rec_2.add( "null");
@@ -149,7 +226,37 @@ public class TestDefaultHCatRecord exten
rec_4.add( innerList2);
HCatRecord tup_4 = new DefaultHCatRecord(rec_4);
- return new HCatRecord[]{tup_1,tup_2,tup_3,tup_4};
+
+ List<Object> rec_5 = new ArrayList<Object>(3);
+ rec_5.add( getByteArray());
+ rec_5.add( getStruct());
+ rec_5.add( getList());
+ HCatRecord tup_5 = new DefaultHCatRecord(rec_5);
+
+
+ List<Object> rec_6 = new ArrayList<Object>(3);
+ rec_6.add( getByteArray());
+ rec_6.add( getStruct());
+ rec_6.add( getList());
+ HCatRecord tup_6 = new DefaultHCatRecord(rec_6);
+
+
+ return new HCatRecord[]{tup_1,tup_2,tup_3,tup_4,tup_5,tup_6};
+
+ }
+
+ private Object getList() {
+ return getStruct();
+ }
+
+ private Object getByteArray() {
+ return new byte[]{1,2,3,4};
+ }
+ private List<?> getStruct() {
+ List<Object> struct = new ArrayList<Object>();
+ struct.add(new Integer(1));
+ struct.add(new String("x"));
+ return struct;
}
}
Modified: incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java?rev=1366256&r1=1366255&r2=1366256&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java (original)
+++ incubator/hcatalog/trunk/src/test/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java Fri Jul 27 01:15:17 2012
@@ -376,7 +376,7 @@ public class TestHCatMultiOutputFormat {
partLocs.add(part.getLocation());
partDesc.add(Utilities.getPartitionDesc(part));
}
- work = new FetchWork(partLocs, partDesc);
+ work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl));
work.setLimit(100);
} else {
work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl));