You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2009/11/05 22:03:00 UTC
svn commit: r833166 [3/5] - in /hadoop/pig/trunk/contrib/zebra: ./
src/java/org/apache/hadoop/zebra/ src/java/org/apache/hadoop/zebra/io/
src/java/org/apache/hadoop/zebra/mapred/
src/java/org/apache/hadoop/zebra/pig/ src/java/org/apache/hadoop/zebra/pi...
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestBasicTableProjections.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestBasicTableProjections.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestBasicTableProjections.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestBasicTableProjections.java Thu Nov 5 21:02:57 2009
@@ -62,7 +62,7 @@
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, "a,b,c,d,e,f,g",
- "[a,b,c];[d,e,f,g]", false, conf);
+ "[a,b,c];[d,e,f,g]", conf);
writer.finish();
Schema schema = writer.getSchema();
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestBasicTableSplits.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestBasicTableSplits.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestBasicTableSplits.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestBasicTableSplits.java Thu Nov 5 21:02:57 2009
@@ -72,7 +72,7 @@
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestCollection.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestCollection.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestCollection.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestCollection.java Thu Nov 5 21:02:57 2009
@@ -80,7 +80,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
@@ -987,7 +987,7 @@
BasicTable.drop(path, conf);
try {
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
Assert.fail("should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -1012,7 +1012,7 @@
BasicTable.drop(path, conf);
try {
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
Assert.fail("should throw exception");
} catch (Exception e) {
System.out.println(e);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName1.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName1.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName1.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName1.java Thu Nov 5 21:02:57 2009
@@ -67,7 +67,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName2.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName2.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName2.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName2.java Thu Nov 5 21:02:57 2009
@@ -62,7 +62,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName3.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName3.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName3.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName3.java Thu Nov 5 21:02:57 2009
@@ -62,7 +62,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName4.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName4.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName4.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName4.java Thu Nov 5 21:02:57 2009
@@ -62,7 +62,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName5.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName5.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName5.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName5.java Thu Nov 5 21:02:57 2009
@@ -62,7 +62,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName6.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName6.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName6.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName6.java Thu Nov 5 21:02:57 2009
@@ -62,7 +62,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupProjections.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupProjections.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupProjections.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupProjections.java Thu Nov 5 21:02:57 2009
@@ -59,6 +59,10 @@
fs = new LocalFileSystem(rawLFS);
path = new Path(fs.getWorkingDirectory(), outputFile);
System.out.println("output file: " + path);
+
+ if (fs.exists(path)) {
+ ColumnGroup.drop(path, conf);
+ }
schema = new Schema("a,b,c,d,e,f,g");
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupSplits.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupSplits.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupSplits.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupSplits.java Thu Nov 5 21:02:57 2009
@@ -70,6 +70,10 @@
fs = new LocalFileSystem(rawLFS);
path = new Path(fs.getWorkingDirectory(), outputFile);
System.out.println("output file: " + path);
+
+ if (fs.exists(path)) {
+ ColumnGroup.drop(path, conf);
+ }
schema = new Schema(STR_SCHEMA);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDropColumnGroup.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDropColumnGroup.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDropColumnGroup.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDropColumnGroup.java Thu Nov 5 21:02:57 2009
@@ -121,7 +121,8 @@
}
int numRows = TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a, b]; [c, d]", path, true, false);
+ "[a, b]; [c, d]", null,
+ path, true);
int rowsToRead = Math.min(10, numRows);
@@ -163,8 +164,9 @@
*/
// 5 splits and 50 rows
- numRows = TestBasicTable.createBasicTable(5, 50, "a, b, c, d, e, f",
- "[a, b]; [c, d]; [e] as myCG", path, true, false);
+ numRows = TestBasicTable.createBasicTable(5, 50, "a, b, c, d, e, f",
+ "[a, b]; [c, d]; [e] as myCG",
+ null, path, true);
BasicTable.dropColumnGroup(path, conf, "myCG");
@@ -208,7 +210,7 @@
// first write the table :
BasicTable.Writer writer = new BasicTable.Writer(path, mixedSchema,
- mixedStorageHint, false, conf);
+ mixedStorageHint, conf);
writer.finish();
Schema schema = writer.getSchema();
@@ -413,7 +415,7 @@
"[f21];[f22];[f23];[f24];[f25];[f26];[f27];[f28];[f29];[f30];" +
"[f31];[f32];[f33];[f34];[f35];[f36];[f37];[f38];[f39];[f40];" +
"[f41];[f42];[f43];[f44];[f45];[f46];[f47];[f48];[f49];[f50]",
- path, true, false);
+ null, path, true);
System.out.println("First dump:");
BasicTable.dumpInfo(path.toString(), System.out, conf);
@@ -476,7 +478,7 @@
"[f21];[f22];[f23];[f24];[f25];[f26];[f27];[f28];[f29];[f30];" +
"[f31];[f32];[f33];[f34];[f35];[f36];[f37];[f38];[f39];[f40];" +
"[f41];[f42];[f43];[f44];[f45];[f46];[f47];[f48];[f49];[f50]",
- path, true, false);
+ null, path, true);
System.out.println("First dump:");
BasicTable.dumpInfo(path.toString(), System.out, conf);
@@ -533,7 +535,7 @@
}
int numRows = TestBasicTable.createBasicTable(1, 100000,
- "a, b, c, d, e, f, g, h, i, j, k, l, m, n", "[a, b]; [c, d]; [e]; [f]; [g]; [h]; [i]; [j]; [k]; [l]; [m]; [n]", path, true, false);
+ "a, b, c, d, e, f, g, h, i, j, k, l, m, n", "[a, b]; [c, d]; [e]; [f]; [g]; [h]; [i]; [j]; [k]; [l]; [m]; [n]", null, path, true);
System.out.println("in test5 , dump infor 1");
BasicTable.dumpInfo(path.toString(), System.out, conf);
@@ -624,7 +626,7 @@
}
TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a];[b];[c];[d];[e];[f]", path, true, false);
+ "[a];[b];[c];[d];[e];[f]", null, path, true);
Path wrongPath = new Path(path.toString() + "non-existing");
try {
BasicTable.dropColumnGroup(wrongPath, conf, "CG0");
@@ -643,7 +645,7 @@
Path path1 = new Path(path.toString() + "13");
TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a];[b];[c];[d];[e];[f]", path1, true, false);
+ "[a];[b];[c];[d];[e];[f]", null, path1, true);
try {
BasicTable.dropColumnGroup(path1, null, "CG0");
Assert.fail("should throw excepiton");
@@ -661,7 +663,7 @@
Path path1 = new Path(path.toString() + "14");
TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a];[b];[c];[d];[e];[f]", path1, true, false);
+ "[a];[b];[c];[d];[e];[f]", null, path1, true);
try {
BasicTable.dropColumnGroup(path1, conf, "");
Assert.fail("should throw excepiton");
@@ -680,7 +682,7 @@
Path path1 = new Path(path.toString() + "15");
TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a];[b];[c];[d];[e];[f]", path1, true, false);
+ "[a];[b];[c];[d];[e];[f]", null, path1, true);
try {
BasicTable.dropColumnGroup(path1, conf, null);
Assert.fail("should throw excepiton");
@@ -699,7 +701,7 @@
Path path1 = new Path(path.toString() + "16");
int numRows = TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a, b]; [c, d]", path1, true, false);
+ "[a, b]; [c, d]", null, path1, true);
int rowsToRead = Math.min(10, numRows);
@@ -731,7 +733,7 @@
Path path1 = new Path(path.toString() + "17");
TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f", "[a,b,c,d,e,f]",
- path1, true, false);
+ null, path1, true);
BasicTable.dropColumnGroup(path1, conf, "CG0");
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java Thu Nov 5 21:02:57 2009
@@ -77,7 +77,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java Thu Nov 5 21:02:57 2009
@@ -87,7 +87,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMixedType1.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMixedType1.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMixedType1.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMixedType1.java Thu Nov 5 21:02:57 2009
@@ -74,7 +74,7 @@
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java Thu Nov 5 21:02:57 2009
@@ -87,7 +87,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -114,7 +114,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -141,7 +141,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -168,7 +168,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -195,7 +195,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -222,7 +222,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -249,7 +249,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -276,7 +276,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -303,7 +303,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -330,7 +330,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -357,7 +357,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -384,7 +384,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -412,7 +412,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
System.out.println("HERE HERE");
Assert.fail("Should throw exception");
} catch (Exception e) {
@@ -441,7 +441,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
} catch (Exception e) {
e.printStackTrace();
Assert.fail("Should Not throw exception");
@@ -525,7 +525,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
} catch (Exception e) {
System.out.println(e);
Assert.fail("Should throw exception");
@@ -552,7 +552,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -580,7 +580,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
} catch (Exception e) {
e.printStackTrace();
Assert.fail("Should Not throw exception");
@@ -609,7 +609,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNonDefaultWholeMapSplit.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNonDefaultWholeMapSplit.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNonDefaultWholeMapSplit.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNonDefaultWholeMapSplit.java Thu Nov 5 21:02:57 2009
@@ -66,7 +66,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -279,4 +279,4 @@
reader.close();
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java Thu Nov 5 21:02:57 2009
@@ -75,7 +75,7 @@
BasicTable.drop(path, conf);
// Build Table and column groups
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord2Map.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord2Map.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord2Map.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord2Map.java Thu Nov 5 21:02:57 2009
@@ -78,7 +78,7 @@
BasicTable.drop(path, conf);
// Build Table and column groups
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord3Map.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord3Map.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord3Map.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord3Map.java Thu Nov 5 21:02:57 2009
@@ -77,7 +77,7 @@
BasicTable.drop(path, conf);
// Build Table and column groups
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecordMap.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecordMap.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecordMap.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecordMap.java Thu Nov 5 21:02:57 2009
@@ -78,7 +78,7 @@
BasicTable.drop(path, conf);
// Build Table and column groups
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSchema.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSchema.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSchema.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSchema.java Thu Nov 5 21:02:57 2009
@@ -114,7 +114,7 @@
// Build Table and column groups
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -210,7 +210,7 @@
System.out.println("in testRecord, get path: " + path.toString());
// Build Table and column groups
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -411,8 +411,9 @@
// String STR_STORAGE = "[m1#{a}]";
// Build Table and column groups
path = new Path(getCurrentMethodName());
+ BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -534,4 +535,4 @@
reader.close();
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java Thu Nov 5 21:02:57 2009
@@ -82,7 +82,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSortedBasicTableSplits.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSortedBasicTableSplits.java?rev=833166&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSortedBasicTableSplits.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSortedBasicTableSplits.java Thu Nov 5 21:02:57 2009
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.io;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ *
+ * Test projections on complicated column types.
+ *
+ */
+public class TestSortedBasicTableSplits {
+ final static String STR_SCHEMA = "f1:bool, r:record(f11:int, f12:long), m:map(string), c:collection(f13:double, f14:float, f15:bytes)";
+ // TODO: try map hash split later
+ final static String STR_STORAGE = "[r.f12, f1]; [m]";
+ private static Configuration conf;
+ private static Path path;
+ private static FileSystem fs;
+
+ @BeforeClass
+ public static void setUpOnce() throws IOException {
+ conf = new Configuration();
+ conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+ conf.setInt("table.input.split.minSize", 64 * 1024);
+ conf.set("table.output.tfile.compression", "none");
+
+ RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+ fs = new LocalFileSystem(rawLFS);
+ path = new Path(fs.getWorkingDirectory(), "TestBasicTableSplits");
+ fs = path.getFileSystem(conf);
+ // drop any previous tables
+ BasicTable.drop(path, conf);
+
+ BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+ STR_STORAGE, "r, c", null, conf);
+ writer.finish();
+
+ Schema schema = writer.getSchema();
+ Tuple tuple = TypesUtils.createTuple(schema);
+
+ BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
+ int part = 0;
+ TableInserter inserter = writer1.getInserter("part" + part, true);
+ TypesUtils.resetTuple(tuple);
+
+ tuple.set(0, true);
+
+ Tuple tupRecord;
+ try {
+ tupRecord = TypesUtils.createTuple(schema.getColumnSchema("r")
+ .getSchema());
+ } catch (ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+
+ // row 1
+ tupRecord.set(0, 1);
+ tupRecord.set(1, 1001L);
+ tuple.set(1, tupRecord);
+
+ Map<String, String> map = new HashMap<String, String>();
+ map.put("a", "x");
+ map.put("b", "y");
+ map.put("c", "z");
+ tuple.set(2, map);
+
+ DataBag bagColl = TypesUtils.createBag();
+ Schema schColl = schema.getColumn(3).getSchema();
+ Tuple tupColl1 = TypesUtils.createTuple(schColl);
+ Tuple tupColl2 = TypesUtils.createTuple(schColl);
+ byte[] abs1 = new byte[3];
+ byte[] abs2 = new byte[4];
+ tupColl1.set(0, 3.1415926);
+ tupColl1.set(1, 1.6);
+ abs1[0] = 11;
+ abs1[1] = 12;
+ abs1[2] = 13;
+ tupColl1.set(2, new DataByteArray(abs1));
+ bagColl.add(tupColl1);
+ tupColl2.set(0, 123.456789);
+ tupColl2.set(1, 100);
+ abs2[0] = 21;
+ abs2[1] = 22;
+ abs2[2] = 23;
+ abs2[3] = 24;
+ tupColl2.set(2, new DataByteArray(abs2));
+ bagColl.add(tupColl2);
+ tuple.set(3, bagColl);
+
+ int row = 0;
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ // row 2
+ row++;
+ TypesUtils.resetTuple(tuple);
+ TypesUtils.resetTuple(tupRecord);
+ map.clear();
+ tuple.set(0, false);
+ tupRecord.set(0, 2);
+ tupRecord.set(1, 1002L);
+ tuple.set(1, tupRecord);
+ map.put("boy", "girl");
+ map.put("adam", "amy");
+ map.put("bob", "becky");
+ map.put("carl", "cathy");
+ tuple.set(2, map);
+ bagColl.clear();
+ TypesUtils.resetTuple(tupColl1);
+ TypesUtils.resetTuple(tupColl2);
+ tupColl1.set(0, 7654.321);
+ tupColl1.set(1, 0.0001);
+ abs1[0] = 31;
+ abs1[1] = 32;
+ abs1[2] = 33;
+ tupColl1.set(2, new DataByteArray(abs1));
+ bagColl.add(tupColl1);
+ tupColl2.set(0, 0.123456789);
+ tupColl2.set(1, 0.3333);
+ abs2[0] = 41;
+ abs2[1] = 42;
+ abs2[2] = 43;
+ abs2[3] = 44;
+ tupColl2.set(2, new DataByteArray(abs2));
+ bagColl.add(tupColl2);
+ tuple.set(3, bagColl);
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ inserter.close();
+ writer1.finish();
+
+ writer.close();
+ }
+
+ @AfterClass
+ public static void tearDownOnce() throws IOException {
+ }
+
+ @Test
+ public void test1() throws IOException, ParseException {
+ String projection = new String("r.f12, f1");
+ BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ // long totalBytes = reader.getStatus().getSize();
+
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ reader.close();
+ reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple value = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+ scanner.getValue(value);
+
+ Assert.assertEquals(1001L, value.get(0));
+ Assert.assertEquals(true, value.get(1));
+
+ scanner.advance();
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
+ scanner.getValue(value);
+ Assert.assertEquals(1002L, value.get(0));
+ Assert.assertEquals(false, value.get(1));
+
+ reader.close();
+ }
+
+ @Test
+ public void testStitch() throws IOException, ParseException {
+ String projection = new String("f1, r");
+ BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ // long totalBytes = reader.getStatus().getSize();
+
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ reader.close();
+ reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple value = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+ scanner.getValue(value);
+
+ Tuple recordTuple = (Tuple) value.get(1);
+ Assert.assertEquals(1, recordTuple.get(0));
+ Assert.assertEquals(1001L, recordTuple.get(1));
+ Assert.assertEquals(true, value.get(0));
+ reader.close();
+ }
+}
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestWrite.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestWrite.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestWrite.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestWrite.java Thu Nov 5 21:02:57 2009
@@ -74,7 +74,7 @@
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSampleSortedTable.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSampleSortedTable.java?rev=833166&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSampleSortedTable.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSampleSortedTable.java Thu Nov 5 21:02:57 2009
@@ -0,0 +1,192 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.mapred;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.zebra.mapred.BasicTableOutputFormat;
+import org.apache.hadoop.zebra.mapred.TestBasicTableIOFormatLocalFS.InvIndex;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.types.SortInfo;
+import org.apache.hadoop.zebra.schema.*;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.hadoop.zebra.types.*;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DefaultTuple;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This is a sample a complete MR sample code for Table. It doens't contain
+ * 'read' part. But, it should be similar and easier to write. Refer to test
+ * cases in the same directory.
+ *
+ * Assume the input files contain rows of word and count, separated by a space:
+ *
+ * <pre>
+ * this 2
+ * is 1
+ * a 4
+ * test 2
+ * hello 1
+ * world 3
+ * </pre>
+ *
+ */
+public class TableMRSampleSortedTable {
+ static class MapClass implements
+ Mapper<LongWritable, Text, BytesWritable, Tuple> {
+ private BytesWritable bytesKey;
+ private Tuple tupleRow;
+ private Object javaObj;
+ private byte[] types;
+ private ColumnType[] sortColTypes;
+
+ @Override
+ public void map(LongWritable key, Text value,
+ OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
+ throws IOException {
+ // value should contain "word count"
+ String[] wdct = value.toString().split(" ");
+ if (wdct.length != 2) {
+ // LOG the error
+ return;
+ }
+
+ byte[] word = wdct[0].getBytes();
+
+ // This key has to be created by user
+ bytesKey.set(word, 0, word.length);
+
+ // This tuple has to be created by user
+ tupleRow.set(0, new String(word));
+ tupleRow.set(1, Integer.parseInt(wdct[1]));
+
+ output.collect(bytesKey, tupleRow);
+ }
+
+ @Override
+ public void configure(JobConf job) {
+ bytesKey = new BytesWritable();
+ try {
+
+ /* New M/R Interface to get sort information */
+ SortInfo sortInfo = BasicTableOutputFormat.getSortInfo(job);
+
+ /* New M/R Interface SortInfo is exposed to user
+ * To get types of sort columns.
+ * Similar interface to get names and indices
+ */
+ sortColTypes = sortInfo.getSortColumnTypes();
+ types = new byte[sortColTypes.length];
+ for(int i =0 ; i < sortColTypes.length; ++i){
+ types[i] = sortColTypes[i].pigDataType();
+ }
+ Schema outSchema = BasicTableOutputFormat.getSchema(job);
+ tupleRow = TypesUtils.createTuple(outSchema);
+
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ } catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ // no-op
+ }
+ }
+
+ static class ReduceClass implements
+ Reducer<BytesWritable, Tuple, BytesWritable, Tuple> {
+ Tuple outRow;
+
+
+ @Override
+ public void configure(JobConf job) {
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+ public void reduce(BytesWritable key, Iterator<Tuple> values,
+ OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
+ throws IOException {
+ try {
+ for(; values.hasNext();) {
+ output.collect(key, values.next());
+ }
+ } catch (ExecException e) {
+ e.printStackTrace();
+ }
+ }
+
+ }
+
+ public static void main(String[] args) throws ParseException, IOException, Exception {
+ JobConf jobConf = new JobConf();
+ jobConf.setJobName("tableMRSample");
+ jobConf.set("table.output.tfile.compression", "gz");
+
+ // input settings
+ jobConf.setInputFormat(TextInputFormat.class);
+ jobConf.setMapperClass(TableMRSampleSortedTable.MapClass.class);
+ jobConf.setReducerClass(TableMRSampleSortedTable.ReduceClass.class);
+ jobConf.setMapOutputKeyClass(BytesWritable.class);
+ jobConf.setMapOutputValueClass(DefaultTuple.class);
+ FileInputFormat.setInputPaths(jobConf, new Path(
+ "/home/gauravj/work/grid/myTesting/input.txt"));
+ jobConf.setNumMapTasks(1);
+
+ // output settings
+ Path outPath = new Path("/home/gauravj/work/grid/myTesting/tableOuts");
+ jobConf.setOutputFormat(BasicTableOutputFormat.class);
+ BasicTableOutputFormat.setOutputPath(jobConf, outPath);
+ // set the logical schema with 2 columns
+ BasicTableOutputFormat.setSchema(jobConf, "word:string, count:int");
+ // for demo purposes, create 2 physical column groups
+ BasicTableOutputFormat.setStorageHint(jobConf, "[word];[count]");
+
+ /* New M/R Interface */
+ /* Set sort columns in a comma separated string */
+ /* Each sort column should belong to schema columns */
+ BasicTableOutputFormat.setSortInfo(jobConf, "word, count");
+
+ // set map-only job.
+ jobConf.setNumReduceTasks(1);
+ JobClient.runJob(jobConf);
+ }
+}
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSortedTableZebraKeyGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSortedTableZebraKeyGenerator.java?rev=833166&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSortedTableZebraKeyGenerator.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSortedTableZebraKeyGenerator.java Thu Nov 5 21:02:57 2009
@@ -0,0 +1,194 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.mapred;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.zebra.mapred.BasicTableOutputFormat;
+import org.apache.hadoop.zebra.mapred.TestBasicTableIOFormatLocalFS.InvIndex;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DefaultTuple;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This is a sample a complete MR sample code for Table. It doens't contain
+ * 'read' part. But, it should be similar and easier to write. Refer to test
+ * cases in the same directory.
+ *
+ * Assume the input files contain rows of word and count, separated by a space:
+ *
+ * <pre>
+ * this 2
+ * is 1
+ * a 4
+ * test 2
+ * hello 1
+ * world 3
+ * </pre>
+ *
+ */
+public class TableMRSortedTableZebraKeyGenerator {
+ static class MapClass implements
+ Mapper<LongWritable, Text, BytesWritable, Tuple> {
+ private BytesWritable bytesKey;
+ private Tuple tupleRow;
+ private Object javaObj;
+
+ @Override
+ public void map(LongWritable key, Text value,
+ OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
+ throws IOException {
+ // value should contain "word count"
+ String[] wdct = value.toString().split(" ");
+ if (wdct.length != 2) {
+ // LOG the error
+ return;
+ }
+
+ byte[] word = wdct[0].getBytes();
+ bytesKey.set(word, 0, word.length);
+ tupleRow.set(0, new String(word));
+ tupleRow.set(1, Integer.parseInt(wdct[1]));
+
+ // This key has to be created by user
+ Tuple userKey = new DefaultTuple();
+ userKey.append(new String(word));
+ userKey.append(Integer.parseInt(wdct[1]));
+ try {
+
+ /* New M/R Interface */
+ /* Converts user key to zebra BytesWritable key */
+ /* using sort key expr tree */
+ /* Returns a java base object */
+ /* Done for each user key */
+
+ bytesKey = BasicTableOutputFormat.getSortKey(javaObj, userKey);
+ } catch(Exception e) {
+
+ }
+
+ output.collect(bytesKey, tupleRow);
+ }
+
+ @Override
+ public void configure(JobConf job) {
+ bytesKey = new BytesWritable();
+ try {
+ Schema outSchema = BasicTableOutputFormat.getSchema(job);
+ tupleRow = TypesUtils.createTuple(outSchema);
+
+ /* New M/R Interface */
+ /* returns an expression tree for sort keys */
+ /* Returns a java base object */
+ /* Done once per table */
+ javaObj = BasicTableOutputFormat.getSortKeyGenerator(job);
+
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ } catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ // no-op
+ }
+ }
+
+ static class ReduceClass implements
+ Reducer<BytesWritable, Tuple, BytesWritable, Tuple> {
+ Tuple outRow;
+
+
+ @Override
+ public void configure(JobConf job) {
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+ public void reduce(BytesWritable key, Iterator<Tuple> values,
+ OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
+ throws IOException {
+ try {
+ for(; values.hasNext();) {
+ output.collect(key, values.next());
+ }
+ } catch (ExecException e) {
+ e.printStackTrace();
+ }
+ }
+
+ }
+
+ public static void main(String[] args) throws ParseException, IOException, Exception {
+ JobConf jobConf = new JobConf();
+ jobConf.setJobName("tableMRSample");
+ jobConf.set("table.output.tfile.compression", "gz");
+
+ // input settings
+ jobConf.setInputFormat(TextInputFormat.class);
+ jobConf.setMapperClass(TableMRSortedTableZebraKeyGenerator.MapClass.class);
+ jobConf.setReducerClass(TableMRSortedTableZebraKeyGenerator.ReduceClass.class);
+ jobConf.setMapOutputKeyClass(BytesWritable.class);
+ jobConf.setMapOutputValueClass(DefaultTuple.class);
+ FileInputFormat.setInputPaths(jobConf, new Path(
+ "/home/gauravj/work/grid/myTesting/input.txt"));
+ jobConf.setNumMapTasks(1);
+
+ // output settings
+ Path outPath = new Path("/home/gauravj/work/grid/myTesting/tableOuts");
+ jobConf.setOutputFormat(BasicTableOutputFormat.class);
+ BasicTableOutputFormat.setOutputPath(jobConf, outPath);
+ // set the logical schema with 2 columns
+ BasicTableOutputFormat.setSchema(jobConf, "word:string, count:int");
+ // for demo purposes, create 2 physical column groups
+ BasicTableOutputFormat.setStorageHint(jobConf, "[word];[count]");
+
+ /* New M/R Interface */
+ /* Set sort columns in a comma separated string */
+ /* Each sort column should belong to schema columns */
+ BasicTableOutputFormat.setSortInfo(jobConf, "word, count");
+
+ // set map-only job.
+ jobConf.setNumReduceTasks(1);
+ JobClient.runJob(jobConf);
+ }
+}
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestBasicTableUnionLoader.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestBasicTableUnionLoader.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestBasicTableUnionLoader.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestBasicTableUnionLoader.java Thu Nov 5 21:02:57 2009
@@ -81,7 +81,7 @@
System.out.println("pathTable1 =" + pathTable1);
BasicTable.Writer writer = new BasicTable.Writer(pathTable1,
- "a:string,b,c:string", "[a,b];[c]", false, conf);
+ "a:string,b,c:string", "[a,b];[c]", conf);
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -116,7 +116,7 @@
System.out.println("pathTable2 =" + pathTable2);
writer = new BasicTable.Writer(pathTable2, "a:string,b,d:string",
- "[a,b];[d]", false, conf);
+ "[a,b];[d]", conf);
schema = writer.getSchema();
tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestBasicUnion.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestBasicUnion.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestBasicUnion.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestBasicUnion.java Thu Nov 5 21:02:57 2009
@@ -99,7 +99,7 @@
System.out.println("pathTable1 =" + pathTable1);
BasicTable.Writer writer = new BasicTable.Writer(pathTable1, STR_SCHEMA1,
- STR_STORAGE1, false, conf);
+ STR_STORAGE1, conf);
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -134,7 +134,7 @@
System.out.println("pathTable2 =" + pathTable2);
writer = new BasicTable.Writer(pathTable2, STR_SCHEMA2, STR_STORAGE2,
- false, conf);
+ conf);
schema = writer.getSchema();
tuple = TypesUtils.createTuple(schema);
@@ -167,7 +167,7 @@
System.out.println("pathTable3 =" + pathTable3);
writer = new BasicTable.Writer(pathTable3, STR_SCHEMA3, STR_STORAGE3,
- false, conf);
+ conf);
schema = writer.getSchema();
tuple = TypesUtils.createTuple(schema);
@@ -199,7 +199,7 @@
System.out.println("pathTable4 =" + pathTable4);
writer = new BasicTable.Writer(pathTable4, STR_SCHEMA4, STR_STORAGE4,
- false, conf);
+ conf);
schema = writer.getSchema();
tuple = TypesUtils.createTuple(schema);
@@ -231,7 +231,7 @@
System.out.println("pathTable5 =" + pathTable5);
writer = new BasicTable.Writer(pathTable5, STR_SCHEMA5, STR_STORAGE5,
- false, conf);
+ conf);
schema = writer.getSchema();
tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollection.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollection.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollection.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollection.java Thu Nov 5 21:02:57 2009
@@ -99,7 +99,7 @@
System.out.println("path =" + path);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
/*
* conf = new Configuration();
* conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableLoader.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableLoader.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableLoader.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableLoader.java Thu Nov 5 21:02:57 2009
@@ -74,7 +74,7 @@
BasicTable.drop(pathTable, conf);
BasicTable.Writer writer = new BasicTable.Writer(pathTable,
- "c:collection(a:double, b:float, c:bytes)", "[c]", false, conf);
+ "c:collection(a:double, b:float, c:bytes)", "[c]", conf);
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableStorer.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableStorer.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableStorer.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableStorer.java Thu Nov 5 21:02:57 2009
@@ -76,7 +76,7 @@
System.out.println("table path=" + pathTable);
BasicTable.Writer writer = new BasicTable.Writer(pathTable,
- "c:collection(a:double, b:float, c:bytes)", "[c]", false, conf);
+ "c:collection(a:double, b:float, c:bytes)", "[c]", conf);
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableLoader.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableLoader.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableLoader.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableLoader.java Thu Nov 5 21:02:57 2009
@@ -84,7 +84,7 @@
pathTable = new Path(pathWorking, "TestMapTableLoader");
BasicTable.Writer writer = new BasicTable.Writer(pathTable,
- "m1:map(string)", "[m1#{a}]", false, conf);
+ "m1:map(string)", "[m1#{a}]", conf);
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableStorer.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableStorer.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableStorer.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableStorer.java Thu Nov 5 21:02:57 2009
@@ -81,7 +81,7 @@
pathTable = new Path(pathWorking, "TestMapTableStorer");
System.out.println("table path=" + pathTable);
BasicTable.Writer writer = new BasicTable.Writer(pathTable,
- "m:map(string)", "[m#{a}]", false, conf);
+ "m:map(string)", "[m#{a}]", conf);
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapType.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapType.java?rev=833166&r1=833165&r2=833166&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapType.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapType.java Thu Nov 5 21:02:57 2009
@@ -97,7 +97,7 @@
System.out.println("path =" + path);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMergeJoin.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMergeJoin.java?rev=833166&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMergeJoin.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMergeJoin.java Thu Nov 5 21:02:57 2009
@@ -0,0 +1,603 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.pig;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import junit.framework.Assert;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.pig.TableStorer;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.test.MiniCluster;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestMergeJoin {
+ protected static ExecType execType = ExecType.MAPREDUCE;
+ private static MiniCluster cluster;
+ protected static PigServer pigServer;
+ private static Configuration conf;
+ private static FileSystem fs;
+ final static int numsBatch = 4;
+ final static int numsInserters = 1;
+ static Path pathWorking;
+ static Path pathTable1;
+ static Path pathTable2;
+ final static String STR_SCHEMA1 = "a:int,b:float,c:long,d:double,e:string,f:bytes,r1:record(f1:string, f2:string),m1:map(string)";
+ final static String STR_SCHEMA2 = "m1:map(string),r1:record(f1:string, f2:string),f:bytes,e:string,d:double,c:long,b:float,a:int";
+
+ final static String STR_STORAGE1 = "[a, b, c]; [e, f]; [r1.f1]; [m1#{a}]";
+ final static String STR_STORAGE2 = "[a];[b]; [c]; [e]; [f]; [r1.f1]; [m1#{a}]";
+ static int t1 =0;
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ if (System.getProperty("hadoop.log.dir") == null) {
+ String base = new File(".").getPath(); // getAbsolutePath();
+ System
+ .setProperty("hadoop.log.dir", new Path(base).toString() + "./logs");
+ }
+
+ if (execType == ExecType.MAPREDUCE) {
+ cluster = MiniCluster.buildCluster();
+ pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+ } else {
+ pigServer = new PigServer(ExecType.LOCAL);
+ }
+
+
+ fs = cluster.getFileSystem();
+
+
+ conf = new Configuration();
+
+
+ pathWorking = fs.getWorkingDirectory();
+ pathTable1 = new Path(pathWorking, "table1");
+ pathTable2 = new Path(pathWorking, "table2");
+ System.out.println("pathTable1 =" + pathTable1);
+ createFirstTable();
+ createSecondTable();
+ }
+ public static void createFirstTable() throws IOException, ParseException {
+ BasicTable.Writer writer = new BasicTable.Writer(pathTable1, STR_SCHEMA1,
+ STR_STORAGE1, conf);
+ Schema schema = writer.getSchema();
+ //System.out.println("typeName" + schema.getColumn("a").type.pigDataType());
+ Tuple tuple = TypesUtils.createTuple(schema);
+
+ TableInserter[] inserters = new TableInserter[numsInserters];
+ for (int i = 0; i < numsInserters; i++) {
+ inserters[i] = writer.getInserter("ins" + i, false);
+ }
+ Tuple tupRecord1;
+ try {
+ tupRecord1 = TypesUtils.createTuple(schema.getColumnSchema("r1")
+ .getSchema());
+ } catch (ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+ Map<String, String> m1 = new HashMap<String, String>();
+ for (int b = 0; b < numsBatch; b++) {
+ for (int i = 0; i < numsInserters; i++) {
+ TypesUtils.resetTuple(tupRecord1);
+ TypesUtils.resetTuple(tuple);
+ m1.clear();
+
+ try {
+ // first row of the table , the biggest row
+ if (i == 0 && b == 0) {
+ tuple.set(0, 100);
+ tuple.set(1, 100.1f);
+ tuple.set(2, 100L);
+ tuple.set(3, 50e+2);
+ tuple.set(4, "something");
+ tuple.set(5, new DataByteArray("something"));
+
+ }
+ // the middle + 1 row of the table, the smallest row
+ else if (i == 0 && b == (numsBatch / 2)) {
+ tuple.set(0, -100);
+ tuple.set(1, -100.1f);
+ tuple.set(2, -100L);
+ tuple.set(3, -50e+2);
+ tuple.set(4, "so");
+ tuple.set(5, new DataByteArray("so"));
+
+ }
+
+ else {
+ Float f = 1.1f;
+ long l = 11;
+ double d = 1.1;
+ tuple.set(0, b);
+ tuple.set(1, f);
+ tuple.set(2, l);
+ tuple.set(3, d);
+ tuple.set(4, "some");
+ tuple.set(5, new DataByteArray("some"));
+ }
+
+ // insert record
+ tupRecord1.set(0, "" + b);
+ tupRecord1.set(1, "" + b);
+ tuple.set(6, tupRecord1);
+
+ // insert map
+ m1.put("a", "" + b);
+ m1.put("b", "" + b);
+ m1.put("c", "" + b);
+ tuple.set(7, m1);
+
+ } catch (ExecException e) {
+ e.printStackTrace();
+ }
+
+ inserters[i].insert(new BytesWritable(("key_" + b).getBytes()), tuple);
+
+ }
+ }
+ for (int i = 0; i < numsInserters; i++) {
+ inserters[i].close();
+ }
+ writer.close();
+
+
+ //check table is setup correctly
+ String projection = new String("a,b,c,d,e,f,r1,m1");
+
+ BasicTable.Reader reader = new BasicTable.Reader(pathTable1, conf);
+ reader.setProjection(projection);
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getValue(RowValue);
+ System.out.println("rowvalue size:"+RowValue.size());
+ System.out.println("read a : " + RowValue.get(0).toString());
+ System.out.println("read string: " + RowValue.get(1).toString());
+
+ scanner.advance();
+ scanner.getValue(RowValue);
+ System.out.println("read float in 2nd row: "+ RowValue.get(1).toString());
+ System.out.println("done insert table");
+
+ reader.close();
+
+ }
+ public static void createSecondTable() throws IOException, ParseException {
+ BasicTable.Writer writer = new BasicTable.Writer(pathTable2, STR_SCHEMA2,
+ STR_STORAGE2, conf);
+ Schema schema = writer.getSchema();
+ //System.out.println("typeName" + schema.getColumn("a").type.pigDataType());
+ Tuple tuple = TypesUtils.createTuple(schema);
+
+ TableInserter[] inserters = new TableInserter[numsInserters];
+ for (int i = 0; i < numsInserters; i++) {
+ inserters[i] = writer.getInserter("ins" + i, false);
+ }
+ Tuple tupRecord1;
+ try {
+ tupRecord1 = TypesUtils.createTuple(schema.getColumnSchema("r1")
+ .getSchema());
+ } catch (ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+ Map<String, String> m1 = new HashMap<String, String>();
+ for (int b = 0; b < numsBatch; b++) {
+ for (int i = 0; i < numsInserters; i++) {
+ TypesUtils.resetTuple(tupRecord1);
+ TypesUtils.resetTuple(tuple);
+ m1.clear();
+
+ try {
+ // first row of the table , the biggest row
+ if (i == 0 && b == 0) {
+ tuple.set(7, 100);
+ tuple.set(6, 100.1f);
+ tuple.set(5, 100L);
+ tuple.set(4, 50e+2);
+ tuple.set(3, "something");
+ tuple.set(2, new DataByteArray("something"));
+
+ }
+ // the middle +1 row of the table, the smallest row
+ else if (i == 0 && b == (numsBatch / 2)) {
+ tuple.set(7, -100);
+ tuple.set(6, -100.1f);
+ tuple.set(5, -100L);
+ tuple.set(4, -50e+2);
+ tuple.set(3, "so");
+ tuple.set(2, new DataByteArray("so"));
+
+ }
+
+ else {
+ Float f = 2.1f;
+ long l = 12;
+ double d = 2.1;
+ tuple.set(7, b*2);
+ tuple.set(6, f);
+ tuple.set(5, l);
+ tuple.set(4, d);
+ tuple.set(3, "somee");
+ tuple.set(2, new DataByteArray("somee"));
+ }
+
+ // insert record
+ tupRecord1.set(0, "" + b);
+ tupRecord1.set(1, "" + b);
+ tuple.set(1, tupRecord1);
+
+ // insert map
+
+ m1.put("a", "" + b);
+ m1.put("b", "" + b);
+ m1.put("c", "" + b);
+ tuple.set(0, m1);
+
+ } catch (ExecException e) {
+ e.printStackTrace();
+ }
+
+ inserters[i].insert(new BytesWritable(("key" + b).getBytes()), tuple);
+ }
+ }
+ for (int i = 0; i < numsInserters; i++) {
+ inserters[i].close();
+ }
+ writer.close();
+
+
+
+
+ //check table is setup correctly
+ String projection = new String("a,b,c,d,e,f,r1,m1");
+
+ BasicTable.Reader reader = new BasicTable.Reader(pathTable2, conf);
+ reader.setProjection(projection);
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getValue(RowValue);
+ System.out.println("rowvalue size:"+RowValue.size());
+ System.out.println("read a : " + RowValue.get(7).toString());
+ System.out.println("read string: " + RowValue.get(6).toString());
+
+ scanner.advance();
+ scanner.getValue(RowValue);
+ System.out.println("read float in 2nd row: "+ RowValue.get(6).toString());
+ System.out.println("done insert table");
+
+
+ reader.close();
+
+ }
+
+ public static void sortTable(Path tablePath, String sortkey){
+
+ }
+ @AfterClass
+ public static void tearDown() throws Exception {
+ pigServer.shutdown();
+ }
+
+
+ public void verify(Iterator<Tuple> it3) throws ExecException {
+ int row = 0;
+ Tuple RowValue3 = null;
+ while (it3.hasNext()) {
+ RowValue3 = it3.next();
+ Assert.assertEquals(16, RowValue3.size());
+ row++;
+
+ if (row == 1) {
+ // smallest row, the middle row of original table
+ Assert.assertEquals(-100, RowValue3.get(0));// a
+ Assert.assertEquals(-100.1f, RowValue3.get(1)); // b
+ Assert.assertEquals(-100L, RowValue3.get(2)); // c
+ Assert.assertEquals(-5000.0, RowValue3.get(3)); // d
+ Assert.assertEquals("so", RowValue3.get(4)); // e
+ Assert.assertEquals("so", RowValue3.get(5).toString());// f
+ Assert.assertEquals("" + numsBatch / 2, ((Tuple) RowValue3.get(6))
+ .get(0));// r
+ Assert.assertEquals("" + numsBatch / 2, ((Tuple) RowValue3.get(6))
+ .get(1));// r
+ Assert.assertEquals("" + numsBatch / 2, ((Map) RowValue3.get(7))
+ .get("a"));// m
+ Assert.assertEquals("" + numsBatch / 2, ((Map) RowValue3.get(7))
+ .get("b"));// m
+ Assert.assertEquals("" + numsBatch / 2, ((Map) RowValue3.get(7))
+ .get("c"));// m
+ Assert.assertEquals(-100, RowValue3.get(15)); // a
+ Assert.assertEquals(-100.1f, RowValue3.get(14)); // b
+ Assert.assertEquals(-100L, RowValue3.get(13)); // c
+ Assert.assertEquals(-5000.0, RowValue3.get(12)); // d
+ Assert.assertEquals("so", RowValue3.get(11)); // e
+ Assert.assertEquals("so", RowValue3.get(10).toString());// f
+ Assert.assertEquals("" + numsBatch / 2, ((Tuple) RowValue3.get(9))
+ .get(0));// r
+ Assert.assertEquals("" + numsBatch / 2, ((Tuple) RowValue3.get(9))
+ .get(1));// r
+ Assert.assertEquals("" + numsBatch / 2, ((Map) RowValue3.get(8))
+ .get("a"));// m
+ Assert.assertEquals("" + numsBatch / 2, ((Map) RowValue3.get(8))
+ .get("b"));// m
+ Assert.assertEquals("" + numsBatch / 2, ((Map) RowValue3.get(8))
+ .get("c"));// m
+ }
+
+ // largest row, the first row of the original table
+ if (row == 2) {
+ Assert.assertEquals(100, RowValue3.get(0));// a
+ Assert.assertEquals(100.1f, RowValue3.get(1)); // b
+ Assert.assertEquals(100L, RowValue3.get(2)); // c
+ Assert.assertEquals(5000.0, RowValue3.get(3)); // d
+ Assert.assertEquals("something", RowValue3.get(4)); // e
+ Assert.assertEquals("something", RowValue3.get(5).toString());// f
+ Assert.assertEquals("" + 0, ((Tuple) RowValue3.get(6))
+ .get(0));// r
+ Assert.assertEquals("" + 0, ((Tuple) RowValue3.get(6))
+ .get(1));// r
+ Assert.assertEquals("" + 0, ((Map) RowValue3.get(7))
+ .get("a"));// m
+ Assert.assertEquals("" + 0, ((Map) RowValue3.get(7))
+ .get("b"));// m
+ Assert.assertEquals("" + 0, ((Map) RowValue3.get(7))
+ .get("c"));// m
+ Assert.assertEquals(100, RowValue3.get(15)); // a
+ Assert.assertEquals(100.1f, RowValue3.get(14)); // b
+ Assert.assertEquals(100L, RowValue3.get(13)); // c
+ Assert.assertEquals(5000.0, RowValue3.get(12)); // d
+ Assert.assertEquals("something", RowValue3.get(11)); // e
+ Assert.assertEquals("something", RowValue3.get(10).toString());// f
+ Assert.assertEquals("" + 0, ((Tuple) RowValue3.get(9)).get(0));// r
+ Assert.assertEquals("" + 0, ((Tuple) RowValue3.get(9)).get(1));// r
+ Assert.assertEquals("" + 0, ((Map) RowValue3.get(8)).get("a"));// m
+ Assert.assertEquals("" + 0, ((Map) RowValue3.get(8)).get("b"));// m
+ Assert.assertEquals("" + 0, ((Map) RowValue3.get(8)).get("c"));// m
+ }
+ }
+ Assert.assertEquals(2, row);
+ }
+
+ public Iterator<Tuple> joinTable(String table1, String table2, String sortkey1, String sortkey2) throws IOException {
+ String query1 = "records1 = LOAD '" + this.pathTable1.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
+ System.out.println("query1:" + query1);
+ pigServer.registerQuery(query1);
+
+ String query2 = "records2 = LOAD '" + this.pathTable2.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
+ System.out.println("query2:" + query2);
+ pigServer.registerQuery(query2);
+
+ /* Iterator<Tuple> it_before_order = pigServer.openIterator("records");
+ int row_before_order = 0;
+ Tuple RowValue_before_order = null;
+ while (it_before_order.hasNext()) {
+ RowValue_before_order = it_before_order.next();
+ row_before_order++;
+ System.out.println("row : " + row_before_order + " field f value: "
+ + RowValue_before_order.get(5));
+ }
+ System.out.println("total row for orig table before ordered:"
+ + row_before_order);*/
+ String orderby1 = "sort1 = ORDER records1 BY " + sortkey1 + " ;";
+ String orderby2 = "sort2 = ORDER records2 BY " + sortkey2 + " ;";
+ pigServer.registerQuery(orderby1);
+ pigServer.registerQuery(orderby2);
+
+ /*Iterator<Tuple> it_after_order = pigServer.openIterator("srecs");
+ int row_after_order = 0;
+ Tuple RowValue_after_order = null;
+ while (it_after_order.hasNext()) {
+ RowValue_after_order = it_after_order.next();
+ row_after_order++;
+ System.out.println("row : " + row_after_order + " field b value: "
+ + RowValue_after_order.get(1));
+ }
+ System.out.println("total row for orig table after ordered:"
+ + row_after_order);*/
+ // Path newPath = new Path(getCurrentMethodName());
+
+ /*
+ * Table1 creation
+ */
+ this.t1++;
+
+ String table1path = this.pathTable1.toString() + Integer.toString(this.t1);
+ pigServer.store("sort1", table1path, TableStorer.class.getCanonicalName()
+ + "('[a, b, c]; [d, e, f, r1, m1]')");
+
+ String query3 = "records1 = LOAD '"
+ + table1path
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, r1, m1', 'sorted');";
+
+ System.out.println("query3:" + query3);
+ pigServer.registerQuery(query3);
+
+ String foreach = "records11 = foreach records1 generate a as a, b as b, c as c, d as d, e as e, f as f, r1 as r1, m1 as m1;";
+ pigServer.registerQuery(foreach);
+ /* Iterator<Tuple> it_ordered = pigServer.openIterator("records1");
+ int row_ordered = 0;
+ Tuple RowValue_ordered = null;
+ while (it_ordered.hasNext()) {
+ RowValue_ordered = it_ordered.next();
+ row_ordered++;
+ System.out.println("row : " + row_ordered + " field a value: "
+ + RowValue_ordered.get(0));
+ }
+ System.out.println("total row for table 1 after ordered:" + row_ordered);*/
+
+ /*
+ * Table2 creation
+ */
+ this.t1++;
+ String table2path = this.pathTable2.toString() + Integer.toString(this.t1);
+ pigServer.store("sort2", table2path, TableStorer.class.getCanonicalName()
+ + "('[a, b, c]; [d,e,f,r1,m1]')");
+
+ String query4 = "records2 = LOAD '" + table2path
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
+ pigServer.registerQuery(query4);
+
+
+ String filter = "records22 = FILTER records2 BY a == '1.9';";
+ pigServer.registerQuery(filter);
+ /*Iterator<Tuple> it_ordered2 = pigServer.openIterator("records2");
+ int row_ordered2 = 0;
+ Tuple RowValue_ordered2 = null;
+ while (it_ordered2.hasNext()) {
+ RowValue_ordered2 = it_ordered2.next();
+ row_ordered2++;
+ System.out.println("row for table 2 after ordereed: " + row_ordered2
+ + " field a value: " + RowValue_ordered2.get(0));
+ }
+
+ System.out.println("total row for table 2:" + row_ordered2);
+ */
+ String join = "joinRecords = JOIN records11 BY " + "(" + sortkey1 + ")"
+ + " , records2 BY " + "("+ sortkey2 + ")"+" USING \"merge\";";
+ //TODO: can not use records22
+ pigServer.registerQuery(join);
+
+ // check JOIN content
+ Iterator<Tuple> it3 = pigServer.openIterator("joinRecords");
+ return it3;
+ }
+
+ //@Test
+ public void test1() throws ExecException, IOException {
+ /*
+ * join key: single integer column
+ */
+ Iterator<Tuple> it3 = joinTable(this.pathTable1.toString(), this.pathTable2.toString(), "a","a" );
+ verify (it3);
+ }
+
+ //@Test
+ public void test2() throws ExecException, IOException {
+ /*
+ * join key: single float column
+ */
+ Iterator<Tuple> it3 = joinTable(this.pathTable1.toString(), this.pathTable2.toString(), "b","b" );
+ verify(it3);
+ }
+
+ //@Test
+ public void test3() throws ExecException, IOException {
+ /*
+ * join key: single string column
+ */
+ Iterator<Tuple> it3 = joinTable(this.pathTable1.toString(), this.pathTable2.toString(), "e","e" );
+ verify(it3);
+ }
+
+ @Test
+ public void test4() throws ExecException, IOException {
+ /*
+ * join key: single byte column
+ */
+ Iterator<Tuple> it3 = joinTable(this.pathTable1.toString(), this.pathTable2.toString(), "f","f" );
+ verify(it3);
+ }
+
+ //@Test
+ public void test5() throws ExecException, IOException {
+ /*
+ * join key: single double column
+ */
+ Iterator<Tuple> it3 = joinTable(this.pathTable1.toString(), this.pathTable2.toString(), "d","d" );
+ verify(it3);
+ }
+
+ //@Test
+ public void test6() throws ExecException, IOException {
+ /*
+ * join key: single long column
+ */
+ Iterator<Tuple> it3 = joinTable(this.pathTable1.toString(), this.pathTable2.toString(), "c","c" );
+ verify(it3);
+ }
+
+ //@Test
+ public void test7() throws ExecException, IOException {
+ /*
+ * 2 join keys: integer and float
+ */
+ System.out.println ("helloo");
+ Iterator<Tuple> it3 = joinTable(this.pathTable1.toString(), this.pathTable2.toString(), "a,b","a,b" );
+ verify(it3);
+ }
+
+ //@Test
+ public void test8() throws ExecException, IOException {
+ /*
+ * multiple join keys: integer, float, long, double, string, bytes
+ */
+
+ // Failing with bytes (known bug)
+ Iterator<Tuple> it3 = joinTable(this.pathTable1.toString(), this.pathTable2.toString(), "a,b,c,d,e,f","a,b,c,d,e,f" );
+ verify(it3);
+ }
+
+ //@Test(expected = IOException.class)
+ public void test9a() throws ExecException, IOException {
+ /*
+ * Negative test case, one join key is not primitive type which is a record
+ * 2 join keys: integer and record
+ */
+ Iterator<Tuple> it3 = joinTable(this.pathTable1.toString(), this.pathTable2.toString(), "a,r1,e","a,r1,e");
+ }
+
+ //@Test(expected = IOException.class)
+ public void test9b() throws ExecException, IOException {
+ /*
+ * Negative test case, one join key is not primitive type which is a record
+ * 2 join keys: integer and map
+ */
+ Iterator<Tuple> it3 = joinTable(this.pathTable1.toString(), this.pathTable2.toString(), "a,m1,e","a,m1,e");
+ }
+
+}