You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2009/11/24 20:54:34 UTC
svn commit: r883836 [8/23] - in /hadoop/pig/branches/load-store-redesign: ./
contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/
contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/
contrib/zebra/ contrib/zebra...
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDropColumnGroup.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDropColumnGroup.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDropColumnGroup.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDropColumnGroup.java Tue Nov 24 19:54:19 2009
@@ -27,6 +27,7 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.zebra.parser.ParseException;
@@ -48,6 +49,7 @@
Log LOG = LogFactory.getLog(TestDropColumnGroup.class);
private static Path path;
private static Configuration conf;
+ private static FileSystem fs;
@BeforeClass
public static void setUpOnce() throws IOException {
@@ -55,7 +57,7 @@
path = new Path(TestBasicTable.rootPath, "DropCGTest");
conf = TestBasicTable.conf;
Log LOG = LogFactory.getLog(TestDropColumnGroup.class);
-
+ fs = path.getFileSystem(conf);
}
@AfterClass
@@ -114,10 +116,13 @@
* columns can be read the value returned is null.
*/
- BasicTable.drop(path, conf);
-
+ if (fs.exists(path)) {
+ BasicTable.drop(path, conf);
+ }
+
int numRows = TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a, b]; [c, d]", path, true, false);
+ "[a, b]; [c, d]", null,
+ path, true);
int rowsToRead = Math.min(10, numRows);
@@ -159,8 +164,9 @@
*/
// 5 splits and 50 rows
- numRows = TestBasicTable.createBasicTable(5, 50, "a, b, c, d, e, f",
- "[a, b]; [c, d]; [e] as myCG", path, true, false);
+ numRows = TestBasicTable.createBasicTable(5, 50, "a, b, c, d, e, f",
+ "[a, b]; [c, d]; [e] as myCG",
+ null, path, true);
BasicTable.dropColumnGroup(path, conf, "myCG");
@@ -180,7 +186,8 @@
}
@Test
- public void testDropColumnGroupsMixedTypes() throws IOException, ParseException {
+ public void testDropColumnGroupsMixedTypes() throws IOException,
+ ParseException {
String mixedSchema = /* roughly borrowed from testMixedType1.java */
"s1:bool, s2:int, s3:long, s4:float, s5:string, s6:bytes, "
@@ -195,15 +202,16 @@
+ "[c] as collectionCG; "
+ "[r1.f2, m1#{b}, m2#{z}] as mapRecordCG; ";
- Path path = new Path(TestBasicTable.rootPath, "DropCGTest");
+ //Path path = new Path(TestBasicTable.rootPath, "DropCGTest");
Configuration conf = TestBasicTable.conf;
conf.set("fs.default.name", "file:///");
-
- BasicTable.drop(path, conf);
-
+ if (fs.exists(path)) {
+ BasicTable.drop(path, conf);
+ }
+
// first write the table :
BasicTable.Writer writer = new BasicTable.Writer(path, mixedSchema,
- mixedStorageHint, false, conf);
+ mixedStorageHint, conf);
writer.finish();
Schema schema = writer.getSchema();
@@ -394,25 +402,35 @@
/*
* Tests concurrent drop CGs
*/
+ if (fs.exists(path)) {
+ BasicTable.drop(path, conf);
+ }
- BasicTable.drop(path, conf);
-
- int numRows = TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a];[b];[c];[d];[e];[f]", path, true, false);
- System.out.println("Frist dump:");
+ int numRows = TestBasicTable.createBasicTable(1, 10, "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10," +
+ "f11,f12,f13,f14,f15,f16,f17,f18,f19,f20," +
+ "f21,f22,f23,f24,f25,f26,f27,f28,f29,f30," +
+ "f31,f32,f33,f34,f35,f36,f37,f38,f39,f40," +
+ "f41,f42,f43,f44,f45,f46,f47,f48,f49,f50",
+ "[f1];[f2];[f3];[f4];[f5];[f6];[f7];[f8];[f9];[f10];" +
+ "[f11];[f12];[f13];[f14];[f15];[f16];[f17];[f18];[f19];[f20];" +
+ "[f21];[f22];[f23];[f24];[f25];[f26];[f27];[f28];[f29];[f30];" +
+ "[f31];[f32];[f33];[f34];[f35];[f36];[f37];[f38];[f39];[f40];" +
+ "[f41];[f42];[f43];[f44];[f45];[f46];[f47];[f48];[f49];[f50]",
+ null, path, true);
+
+ System.out.println("First dump:");
BasicTable.dumpInfo(path.toString(), System.out, conf);
int rowsToRead = Math.min(10, numRows);
// normal table.
- verifyScanner(path, conf, "a, c, x", new boolean[] { false, false, true },
+ verifyScanner(path, conf, "f1, f3, xx", new boolean[] { false, false, true },
rowsToRead);
// create a thread for each dropCG
- DropThread[] threads = new DropThread[6];
+ DropThread[] threads = new DropThread[50];
for (int i = 0; i < threads.length; i++) {
-
- threads[i] = new DropThread(i);
+ threads[i] = new DropThread(i, 50);
}
// start the threads
@@ -430,17 +448,16 @@
// check various read cases.
- verifyScanner(path, conf, "c, a, b, f, d, e", new boolean[] { true, true,
+ verifyScanner(path, conf, "f3, f1, f2, f6, f4, f5", new boolean[] { true, true,
true, true, true, true }, rowsToRead);
System.out.println("second dump");
BasicTable.dumpInfo(path.toString(), System.out, conf);
// Now make sure the reader reports zero rows.
- Assert.assertTrue(countRows(path, conf, "c, e, b") == 0);
+ Assert.assertTrue(countRows(path, conf, "f3, f5, f2") == 0);
// delete the table
BasicTable.drop(path, conf);
-
}
@Test
@@ -448,25 +465,35 @@
/*
* Tests concurrrent drop CGs while one fails
*/
+ if (fs.exists(path)) {
+ BasicTable.drop(path, conf);
+ }
- BasicTable.drop(path, conf);
-
- int numRows = TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a];[b];[c];[d];[e];[f]", path, true, false);
- System.out.println("Frist dump:");
+ int numRows = TestBasicTable.createBasicTable(1, 10, "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10," +
+ "f11,f12,f13,f14,f15,f16,f17,f18,f19,f20," +
+ "f21,f22,f23,f24,f25,f26,f27,f28,f29,f30," +
+ "f31,f32,f33,f34,f35,f36,f37,f38,f39,f40," +
+ "f41,f42,f43,f44,f45,f46,f47,f48,f49,f50",
+ "[f1];[f2];[f3];[f4];[f5];[f6];[f7];[f8];[f9];[f10];" +
+ "[f11];[f12];[f13];[f14];[f15];[f16];[f17];[f18];[f19];[f20];" +
+ "[f21];[f22];[f23];[f24];[f25];[f26];[f27];[f28];[f29];[f30];" +
+ "[f31];[f32];[f33];[f34];[f35];[f36];[f37];[f38];[f39];[f40];" +
+ "[f41];[f42];[f43];[f44];[f45];[f46];[f47];[f48];[f49];[f50]",
+ null, path, true);
+
+ System.out.println("First dump:");
BasicTable.dumpInfo(path.toString(), System.out, conf);
int rowsToRead = Math.min(10, numRows);
// normal table.
- verifyScanner(path, conf, "a, c, x", new boolean[] { false, false, true },
+ verifyScanner(path, conf, "f1, f3, xx", new boolean[] { false, false, true },
rowsToRead);
// create a thread for each dropCG
- DropThread[] threads = new DropThread[7];
+ DropThread[] threads = new DropThread[60];
for (int i = 0; i < threads.length; i++) {
-
- threads[i] = new DropThread(i);
+ threads[i] = new DropThread(i, 50);
}
// start the threads
@@ -478,18 +505,19 @@
try {
thr.join();
} catch (InterruptedException e) {
+ e.printStackTrace();
}
}
// check various read cases.
- verifyScanner(path, conf, "c, a, b, f, d, e", new boolean[] { true, true,
+ verifyScanner(path, conf, "f3, f1, f2, f6, f4, f5", new boolean[] { true, true,
true, true, true, true }, rowsToRead);
System.out.println("second dump");
BasicTable.dumpInfo(path.toString(), System.out, conf);
// Now make sure the reader reports zero rows.
- Assert.assertTrue(countRows(path, conf, "c, e, b") == 0);
+ Assert.assertTrue(countRows(path, conf, "f3, f5, f2") == 0);
// delete the table
BasicTable.drop(path, conf);
@@ -502,10 +530,13 @@
*/
System.out.println("######int test 5");
- BasicTable.drop(path, conf);
+
+ if (fs.exists(path)) {
+ BasicTable.drop(path, conf);
+ }
int numRows = TestBasicTable.createBasicTable(1, 100000,
- "a, b, c, d, e, f", "[a, b]; [c, d]", path, true, false);
+ "a, b, c, d, e, f, g, h, i, j, k, l, m, n", "[a, b]; [c, d]; [e]; [f]; [g]; [h]; [i]; [j]; [k]; [l]; [m]; [n]", null, path, true);
System.out.println("in test5 , dump infor 1");
BasicTable.dumpInfo(path.toString(), System.out, conf);
@@ -519,23 +550,21 @@
rowsToRead);
// create a thread for each dropCG
- DropThread[] dropThreads = new DropThread[3];
+ DropThread[] dropThreads = new DropThread[12];
for (int i = 0; i < dropThreads.length; i++) {
-
- dropThreads[i] = new DropThread(i);
+ dropThreads[i] = new DropThread(i, 12);
}
// start the threads
for (int j = 0; j < dropThreads.length; j++) {
dropThreads[j].start();
}
-
+
// create read threads
ReadThread[] readThreads = new ReadThread[numOfReadThreads];
for (int i = 0; i < readThreads.length; i++) {
-
readThreads[i] = new ReadThread(i, "a, b, c, d, e, f", 1000);
}
@@ -551,6 +580,7 @@
e.printStackTrace();
}
}
+
for (Thread thr : readThreads) {
try {
thr.join();
@@ -569,7 +599,6 @@
// delete the table
BasicTable.drop(path, conf);
-
}
@Test
@@ -593,11 +622,12 @@
/*
* Tests API, path is wrong
*/
-
- BasicTable.drop(path, conf);
-
+ if (fs.exists(path)) {
+ BasicTable.drop(path, conf);
+ }
+
TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a];[b];[c];[d];[e];[f]", path, true, false);
+ "[a];[b];[c];[d];[e];[f]", null, path, true);
Path wrongPath = new Path(path.toString() + "non-existing");
try {
BasicTable.dropColumnGroup(wrongPath, conf, "CG0");
@@ -616,7 +646,7 @@
Path path1 = new Path(path.toString() + "13");
TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a];[b];[c];[d];[e];[f]", path1, true, false);
+ "[a];[b];[c];[d];[e];[f]", null, path1, true);
try {
BasicTable.dropColumnGroup(path1, null, "CG0");
Assert.fail("should throw excepiton");
@@ -634,7 +664,7 @@
Path path1 = new Path(path.toString() + "14");
TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a];[b];[c];[d];[e];[f]", path1, true, false);
+ "[a];[b];[c];[d];[e];[f]", null, path1, true);
try {
BasicTable.dropColumnGroup(path1, conf, "");
Assert.fail("should throw excepiton");
@@ -653,7 +683,7 @@
Path path1 = new Path(path.toString() + "15");
TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a];[b];[c];[d];[e];[f]", path1, true, false);
+ "[a];[b];[c];[d];[e];[f]", null, path1, true);
try {
BasicTable.dropColumnGroup(path1, conf, null);
Assert.fail("should throw excepiton");
@@ -672,7 +702,7 @@
Path path1 = new Path(path.toString() + "16");
int numRows = TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
- "[a, b]; [c, d]", path1, true, false);
+ "[a, b]; [c, d]", null, path1, true);
int rowsToRead = Math.min(10, numRows);
@@ -704,7 +734,7 @@
Path path1 = new Path(path.toString() + "17");
TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f", "[a,b,c,d,e,f]",
- path1, true, false);
+ null, path1, true);
BasicTable.dropColumnGroup(path1, conf, "CG0");
@@ -730,17 +760,17 @@
BasicTable.drop(path1, conf);
}
- /**
+/**
* A thread that performs a DropColumnGroup.
*/
class DropThread extends Thread {
private int id;
+ private int cntCGs;
- public DropThread(int id) {
-
+ public DropThread(int id, int cntCGs) {
this.id = id;
-
+ this.cntCGs = cntCGs;
}
/**
@@ -748,13 +778,22 @@
*/
public void run() {
try {
- System.out.println("Droping CG: " + id);
- BasicTable.dropColumnGroup(path, conf, "CG" + id);
+ int total = cntCGs;
+ int digits = 1;
+ while (total >= 10) {
+ ++ digits;
+ total /= 10;
+ }
+ String formatString = "%0" + digits + "d";
+ String str = "CG" + String.format(formatString, id);
+
+ System.out.println(id + ": Droping CG: " + str);
+ BasicTable.dropColumnGroup(path, conf, str);
} catch (Exception e) {
System.out.println(id + " - error: " + e);
+ e.printStackTrace();
}
}
-
}
/**
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDuplicateMapKeyInDifferentCGs.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDuplicateMapKeyInDifferentCGs.java?rev=883836&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDuplicateMapKeyInDifferentCGs.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDuplicateMapKeyInDifferentCGs.java Tue Nov 24 19:54:19 2009
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.io;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import junit.framework.Assert;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.data.Tuple;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ *
+ * Test projections on complicated column types.
+ *
+ */
+public class TestDuplicateMapKeyInDifferentCGs {
+
+ final static String STR_SCHEMA = "m1:map(string),m2:map(map(int))";
+ final static String STR_STORAGE = "[m1#{a}, m2#{x}];[m2#{x|y}]; [m1#{b}, m2#{z}];[m1,m2]";
+ private static Configuration conf;
+ private static Path path;
+ private static FileSystem fs;
+
+ @BeforeClass
+ public static void setUpOnce() throws IOException {
+ conf = new Configuration();
+ conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+ conf.setInt("table.input.split.minSize", 64 * 1024);
+ conf.set("table.output.tfile.compression", "none");
+
+ RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+ fs = new LocalFileSystem(rawLFS);
+ path = new Path(fs.getWorkingDirectory(), "TestMap");
+ fs = path.getFileSystem(conf);
+ // drop any previous tables
+ BasicTable.drop(path, conf);
+ }
+
+ @AfterClass
+ public static void tearDownOnce() throws IOException {
+ }
+
+
+ @Test
+ public void testRead1() throws IOException, ParseException {
+ try {
+ BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+ STR_STORAGE, conf);
+ Assert.fail("duplicate keys in different column groups should throw an exception!");
+ } catch (IOException e) {
+ return;
+ }
+ }
+}
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java Tue Nov 24 19:54:19 2009
@@ -77,7 +77,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -269,15 +269,15 @@
Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
scanner.getValue(RowValue);
System.out.println("read1 : " + RowValue.toString());
- Assert.assertEquals("{nonexist=null}", RowValue.get(0).toString());
+ Assert.assertEquals("{}", RowValue.get(0).toString());
scanner.advance();
scanner.getKey(key);
Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
scanner.getValue(RowValue);
System.out.println(RowValue.get(0).toString());
- Assert.assertEquals("{nonexist=null}", RowValue.get(0).toString());
+ Assert.assertEquals("{}", RowValue.get(0).toString());
reader.close();
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java Tue Nov 24 19:54:19 2009
@@ -87,7 +87,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -388,4 +388,4 @@
reader.close();
}
-}
\ No newline at end of file
+}
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapSplitSchemaStorageColumnOutOfOrder.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapSplitSchemaStorageColumnOutOfOrder.java?rev=883836&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapSplitSchemaStorageColumnOutOfOrder.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapSplitSchemaStorageColumnOutOfOrder.java Tue Nov 24 19:54:19 2009
@@ -0,0 +1,321 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownershcolumn3. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.io;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import junit.framework.Assert;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ *
+ * Test projections on complicated column types: Jira 1026
+ *
+ */
+public class TestMapSplitSchemaStorageColumnOutOfOrder {
+ final static String STR_SCHEMA = "column1:bytes,column2:bytes, column3:bytes,column4:bytes,column5:map(String),column6:map(String),column7:map(String),column8:collection(f1:map(String))";
+ final static String STR_STORAGE = "[column1,column2,column3,column4];[column5#{key51|key52|key53|key54|key55|key56},column7#{key71|key72|key73|key74|key75}];[column5,column7,column6];[column8]";
+ private static Configuration conf;
+ private static Path path;
+ private static FileSystem fs;
+
+ @BeforeClass
+ public static void setUpOnce() throws IOException {
+ conf = new Configuration();
+ conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+ conf.setInt("table.input.split.minSize", 64 * 1024);
+ conf.set("table.output.tfile.compression", "none");
+
+ RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+ fs = new LocalFileSystem(rawLFS);
+ path = new Path(fs.getWorkingDirectory(), "TestMapSplitSchemaStorageColumnOutOfOrder");
+ fs = path.getFileSystem(conf);
+ // drop any previous tables
+ BasicTable.drop(path, conf);
+ BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+ STR_STORAGE, conf);
+ writer.finish();
+ Schema schema = writer.getSchema();
+ Tuple tuple = TypesUtils.createTuple(schema);
+ BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
+ int part = 0;
+ TableInserter inserter = writer1.getInserter("part" + part, true);
+ TypesUtils.resetTuple(tuple);
+
+ DataBag bag2 = TypesUtils.createBag();
+ Schema schColl2 = schema.getColumn(7).getSchema();
+ Tuple tupColl2_1 = TypesUtils.createTuple(schColl2);
+ Tuple tupColl2_2 = TypesUtils.createTuple(schColl2);
+ // add data to row 1
+ tuple.set(0, new DataByteArray("column1 row 1 ")); // byte
+ tuple.set(1,new DataByteArray("column2 row 1"));
+ tuple.set(2, new DataByteArray("column3 row 1"));
+ tuple.set(3, new DataByteArray("column4 row 1"));
+
+ // column5
+ Map<String, String> column5 = new HashMap<String, String>();
+ column5.put("key51", "key511");
+ column5.put("key52", "key521");
+ column5.put("key53", "key531");
+ column5.put("key54", "key541");
+ column5.put("key55", "key551");
+ column5.put("key56", "key561");
+ column5.put("key57", "key571");
+
+ tuple.set(4, column5);
+
+ //column5:map(bytes),column6:map(bytes),column7:map(bytes),column8:collection(f1:map(bytes)
+ //column7:map(String, column6:map(String)
+ HashMap<String, String> column7 = new HashMap<String, String>();
+ HashMap<String, String> column6 = new HashMap<String, String>();
+ column6.put("column61", "column61");
+ column7.put("key71", "key711");
+ column7.put("key72", "key721");
+ column7.put("key73", "key731");
+ column7.put("key74", "key741");
+ column7.put("key75", "key751");
+ tuple.set(6, column7);
+ tuple.set(5, column6);
+
+ //column8:collection(f1:map(bytes))
+ HashMap<String, String> mapInCollection = new HashMap<String, String>();
+ mapInCollection.put("mc", "mc1");
+ tupColl2_1.set(0, mapInCollection);
+ bag2.add(tupColl2_1);
+
+ tupColl2_2.set(0, mapInCollection);
+ bag2.add(tupColl2_2);
+ tuple.set(7, bag2);
+
+ int row = 0;
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ // row 2
+ row++;
+ TypesUtils.resetTuple(tuple);
+ column5.clear();
+ column7.clear();
+ column6.clear();
+ mapInCollection.clear();
+ bag2.clear();
+ TypesUtils.resetTuple(tupColl2_1);
+ TypesUtils.resetTuple(tupColl2_2);
+
+ tuple.set(0, new DataByteArray("column1 row 2 ")); // byte
+ tuple.set(1,new DataByteArray("column2 row 2"));
+ tuple.set(2, new DataByteArray("column3 row 2"));
+ tuple.set(3, new DataByteArray("column4 row 2"));
+
+ // column5
+ column5.put("key51", "key512");
+ column5.put("key52", "key522");
+ column5.put("key53", "key532");
+ column5.put("key54", "key542");
+ column5.put("key55", "key552");
+ column5.put("key56", "key562");
+ column5.put("key57", "key572");
+ tuple.set(4, column5);
+
+ // column6
+
+ column6.put("column6", "column62");
+ column7.put("key71", "key712");
+ column7.put("key72", "key722");
+ column7.put("key73", "key732");
+ column7.put("key74", "key742");
+ column7.put("key75", "key752");
+ tuple.set(6, column7);
+ tuple.set(5, column6);
+
+
+ //column8
+ //column8:collection(f1:map(bytes))
+ mapInCollection.put("mc", "mc2");
+ tupColl2_1.set(0, mapInCollection);
+ bag2.add(tupColl2_1);
+
+ tupColl2_2.set(0, mapInCollection);
+ bag2.add(tupColl2_2);
+ tuple.set(7, bag2);
+
+
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ // finish building table, closing out the inserter, writer, writer1
+ inserter.close();
+ writer1.finish();
+ writer.close();
+ }
+
+ @AfterClass
+ public static void tearDownOnce() throws IOException {
+ BasicTable.drop(path, conf);
+ }
+
+
+ @Test
+ public void testRead1() throws IOException, ParseException {
+ /*
+ * read one map
+ * column5.put("key51", "key511");
+ */
+ String projection = new String("column5#{key51}");
+ BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+ scanner.getValue(RowValue);
+ System.out.println("read1 : " + RowValue.toString());
+ Assert.assertEquals("{key51=key511}", RowValue.get(0).toString());
+
+ scanner.advance();
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
+ scanner.getValue(RowValue);
+ System.out.println(RowValue.get(0).toString());
+ Assert.assertEquals("{key51=key512}", RowValue.get(0).toString());
+
+ reader.close();
+ }
+
+
+ @Test
+ public void testRead2() throws IOException, ParseException {
+ /*
+ * read map , stitch
+ * [column5#{key51|key52|key53|key54|key55|key56},column7#{key71|key72|key73|key74|key75}];[column5,column7,column6]
+ */
+ String projection2 = new String("column5#{new}, column7#{key71|ytestid}");
+ BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+// reader.setProjection(projection2);
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+ scanner.getValue(RowValue);
+ System.out.println("map of map: " + RowValue.toString());
+ Assert.assertEquals("key571", ((Map) RowValue.get(4)).get("key57"));
+ Assert.assertEquals("key711", ((Map) RowValue.get(6)).get("key71"));
+
+ Assert.assertEquals(null, (((Map) RowValue.get(6)).get("ytestid")));
+ Assert.assertEquals(null, ((Map) ((Map) RowValue.get(6)).get("x")));
+ System.out.println("rowValue.get)1): " + RowValue.get(6).toString());
+ // rowValue.get)1): {z=null, x={m311=311, m321=321, m331=331}}
+
+ scanner.advance();
+
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
+ scanner.getValue(RowValue);
+ Assert.assertEquals("key572", ((Map) RowValue.get(4)).get("key57"));
+ Assert.assertEquals("key712", ((Map) RowValue.get(6)).get("key71"));
+
+ Assert.assertEquals("key722", ((Map) RowValue.get(6)).get("key72"));
+ Assert.assertEquals(null, ((Map) ((Map) RowValue.get(6)).get("x")));
+ reader.close();
+
+ }
+
+ @Test
+ public void testRead3() throws IOException, ParseException {
+ /*
+ * negative , read one map who is non-exist
+ */
+ String projection = new String("m5");
+ BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+ scanner.getValue(RowValue);
+ Assert.assertEquals(false, RowValue.isNull());
+ Assert.assertEquals(null, RowValue.get(0));
+ Assert.assertEquals(1, RowValue.size());
+
+ scanner.advance();
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
+ scanner.getValue(RowValue);
+ Assert.assertEquals(false, RowValue.isNull());
+ Assert.assertEquals(null, RowValue.get(0));
+ Assert.assertEquals(1, RowValue.size());
+ reader.close();
+ }
+
+ @Test
+ public void testRead4() throws IOException, ParseException {
+ /*
+ * Not exist key for all rows
+ */
+ String projection = new String("column5#{nonexist}");
+ BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+ scanner.getValue(RowValue);
+ System.out.println("read1 : " + RowValue.toString());
+ Assert.assertEquals("{}", RowValue.get(0).toString());
+
+ scanner.advance();
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
+ scanner.getValue(RowValue);
+ System.out.println(RowValue.get(0).toString());
+ Assert.assertEquals("{}", RowValue.get(0).toString());
+
+ reader.close();
+ }
+}
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMixedType1.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMixedType1.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMixedType1.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMixedType1.java Tue Nov 24 19:54:19 2009
@@ -74,7 +74,7 @@
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java Tue Nov 24 19:54:19 2009
@@ -87,7 +87,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -114,7 +114,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -141,7 +141,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -168,7 +168,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -195,7 +195,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -222,7 +222,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -249,7 +249,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -276,7 +276,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -303,7 +303,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -330,7 +330,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -357,7 +357,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -384,7 +384,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -412,7 +412,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
System.out.println("HERE HERE");
Assert.fail("Should throw exception");
} catch (Exception e) {
@@ -441,7 +441,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
} catch (Exception e) {
e.printStackTrace();
Assert.fail("Should Not throw exception");
@@ -525,7 +525,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
} catch (Exception e) {
System.out.println(e);
Assert.fail("Should throw exception");
@@ -552,7 +552,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
@@ -580,7 +580,7 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
} catch (Exception e) {
e.printStackTrace();
Assert.fail("Should Not throw exception");
@@ -609,11 +609,11 @@
// Build Table and column groups
BasicTable.Writer writer = null;
try {
- writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
Assert.fail("Should throw exception");
} catch (Exception e) {
System.out.println(e);
}
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNonDefaultWholeMapSplit.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNonDefaultWholeMapSplit.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNonDefaultWholeMapSplit.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNonDefaultWholeMapSplit.java Tue Nov 24 19:54:19 2009
@@ -47,6 +47,7 @@
public class TestNonDefaultWholeMapSplit {
final static String STR_SCHEMA = "m1:map(string),m2:map(map(int))";
+ //the comment STR_STORAGE is the one for jira 949
final static String STR_STORAGE = "[m1#{a}];[m2#{x|y}]; [m1#{b}, m2#{z}];[m1]";
private static Configuration conf;
private static Path path;
@@ -66,7 +67,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -268,15 +269,15 @@
Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
scanner.getValue(RowValue);
System.out.println("read1 : " + RowValue.toString());
- Assert.assertEquals("{nonexist=null}", RowValue.get(0).toString());
+ Assert.assertEquals("{}", RowValue.get(0).toString());
scanner.advance();
scanner.getKey(key);
Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
scanner.getValue(RowValue);
System.out.println(RowValue.get(0).toString());
- Assert.assertEquals("{nonexist=null}", RowValue.get(0).toString());
+ Assert.assertEquals("{}", RowValue.get(0).toString());
reader.close();
}
-}
\ No newline at end of file
+}
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java?rev=883836&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java Tue Nov 24 19:54:19 2009
@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.io;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ *
+ * Test projections on complicated column types.
+ *
+ */
+public class TestProjectionOnFullMap {
+ final static String STR_SCHEMA = "f1:string, f2:map";
+ final static String STR_STORAGE = "[f1]; [f2]";
+ private static Configuration conf;
+ private static Path path;
+ private static FileSystem fs;
+
+ @BeforeClass
+ public static void setUp() throws IOException {
+ conf = new Configuration();
+ conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+ conf.setInt("table.input.split.minSize", 64 * 1024);
+ conf.set("table.output.tfile.compression", "none");
+
+ RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+ fs = new LocalFileSystem(rawLFS);
+ path = new Path(fs.getWorkingDirectory(), "TestBasicTableMapSplits");
+ fs = path.getFileSystem(conf);
+ // drop any previous tables
+ BasicTable.drop(path, conf);
+
+ BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+ STR_STORAGE, conf);
+ writer.finish();
+
+ Schema schema = writer.getSchema();
+ Tuple tuple = TypesUtils.createTuple(schema);
+
+ BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
+ int part = 0;
+ TableInserter inserter = writer1.getInserter("part" + part, true);
+ TypesUtils.resetTuple(tuple);
+
+ tuple.set(0, "Kitty");
+
+
+ Map<String, String> map = new HashMap<String, String>();
+ map.put("a", "x");
+ map.put("b", "y");
+ map.put("c", "z");
+ tuple.set(1, map);
+
+ int row = 0;
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+ inserter.close();
+ writer1.finish();
+
+ writer.close();
+ }
+
+ @AfterClass
+ public static void tearDownOnce() throws IOException {
+ }
+
+ public void testDescribse() throws IOException {
+
+ BasicTable.dumpInfo(path.toString(), System.out, conf);
+
+ }
+
+ @Test
+ public void test1() throws IOException, ParseException {
+ String projection = new String("f2#{a}");
+ BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ // long totalBytes = reader.getStatus().getSize();
+
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ reader.close();
+ reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple value = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+ scanner.getValue(value);
+
+ Assert.assertEquals("x", ((Map)value.get(0)).get("a"));
+ reader.close();
+ }
+
+}
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java Tue Nov 24 19:54:19 2009
@@ -75,7 +75,7 @@
BasicTable.drop(path, conf);
// Build Table and column groups
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -412,4 +412,4 @@
reader.close();
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord2Map.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord2Map.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord2Map.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord2Map.java Tue Nov 24 19:54:19 2009
@@ -78,7 +78,7 @@
BasicTable.drop(path, conf);
// Build Table and column groups
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord3Map.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord3Map.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord3Map.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord3Map.java Tue Nov 24 19:54:19 2009
@@ -77,7 +77,7 @@
BasicTable.drop(path, conf);
// Build Table and column groups
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecordMap.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecordMap.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecordMap.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecordMap.java Tue Nov 24 19:54:19 2009
@@ -78,7 +78,7 @@
BasicTable.drop(path, conf);
// Build Table and column groups
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSchema.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSchema.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSchema.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSchema.java Tue Nov 24 19:54:19 2009
@@ -114,7 +114,7 @@
// Build Table and column groups
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -210,7 +210,7 @@
System.out.println("in testRecord, get path: " + path.toString());
// Build Table and column groups
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -411,8 +411,9 @@
// String STR_STORAGE = "[m1#{a}]";
// Build Table and column groups
path = new Path(getCurrentMethodName());
+ BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -534,4 +535,4 @@
reader.close();
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java Tue Nov 24 19:54:19 2009
@@ -82,7 +82,7 @@
// drop any previous tables
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Tuple tuple = TypesUtils.createTuple(schema);
@@ -184,4 +184,4 @@
reader.close();
}
-}
\ No newline at end of file
+}
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSortedBasicTableSplits.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSortedBasicTableSplits.java?rev=883836&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSortedBasicTableSplits.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSortedBasicTableSplits.java Tue Nov 24 19:54:19 2009
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.io;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ *
+ * Test projections on complicated column types.
+ *
+ */
+public class TestSortedBasicTableSplits {
+ final static String STR_SCHEMA = "f1:bool, r:record(f11:int, f12:long), m:map(string), c:collection(f13:double, f14:float, f15:bytes)";
+ // TODO: try map hash split later
+ final static String STR_STORAGE = "[r.f12, f1]; [m]";
+ private static Configuration conf;
+ private static Path path;
+ private static FileSystem fs;
+
+ @BeforeClass
+ public static void setUpOnce() throws IOException {
+ conf = new Configuration();
+ conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+ conf.setInt("table.input.split.minSize", 64 * 1024);
+ conf.set("table.output.tfile.compression", "none");
+
+ RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+ fs = new LocalFileSystem(rawLFS);
+ path = new Path(fs.getWorkingDirectory(), "TestBasicTableSplits");
+ fs = path.getFileSystem(conf);
+ // drop any previous tables
+ BasicTable.drop(path, conf);
+
+ BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+ STR_STORAGE, "r, c", null, conf);
+ writer.finish();
+
+ Schema schema = writer.getSchema();
+ Tuple tuple = TypesUtils.createTuple(schema);
+
+ BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
+ int part = 0;
+ TableInserter inserter = writer1.getInserter("part" + part, true);
+ TypesUtils.resetTuple(tuple);
+
+ tuple.set(0, true);
+
+ Tuple tupRecord;
+ try {
+ tupRecord = TypesUtils.createTuple(schema.getColumnSchema("r")
+ .getSchema());
+ } catch (ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+
+ // row 1
+ tupRecord.set(0, 1);
+ tupRecord.set(1, 1001L);
+ tuple.set(1, tupRecord);
+
+ Map<String, String> map = new HashMap<String, String>();
+ map.put("a", "x");
+ map.put("b", "y");
+ map.put("c", "z");
+ tuple.set(2, map);
+
+ DataBag bagColl = TypesUtils.createBag();
+ Schema schColl = schema.getColumn(3).getSchema();
+ Tuple tupColl1 = TypesUtils.createTuple(schColl);
+ Tuple tupColl2 = TypesUtils.createTuple(schColl);
+ byte[] abs1 = new byte[3];
+ byte[] abs2 = new byte[4];
+ tupColl1.set(0, 3.1415926);
+ tupColl1.set(1, 1.6);
+ abs1[0] = 11;
+ abs1[1] = 12;
+ abs1[2] = 13;
+ tupColl1.set(2, new DataByteArray(abs1));
+ bagColl.add(tupColl1);
+ tupColl2.set(0, 123.456789);
+ tupColl2.set(1, 100);
+ abs2[0] = 21;
+ abs2[1] = 22;
+ abs2[2] = 23;
+ abs2[3] = 24;
+ tupColl2.set(2, new DataByteArray(abs2));
+ bagColl.add(tupColl2);
+ tuple.set(3, bagColl);
+
+ int row = 0;
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ // row 2
+ row++;
+ TypesUtils.resetTuple(tuple);
+ TypesUtils.resetTuple(tupRecord);
+ map.clear();
+ tuple.set(0, false);
+ tupRecord.set(0, 2);
+ tupRecord.set(1, 1002L);
+ tuple.set(1, tupRecord);
+ map.put("boy", "girl");
+ map.put("adam", "amy");
+ map.put("bob", "becky");
+ map.put("carl", "cathy");
+ tuple.set(2, map);
+ bagColl.clear();
+ TypesUtils.resetTuple(tupColl1);
+ TypesUtils.resetTuple(tupColl2);
+ tupColl1.set(0, 7654.321);
+ tupColl1.set(1, 0.0001);
+ abs1[0] = 31;
+ abs1[1] = 32;
+ abs1[2] = 33;
+ tupColl1.set(2, new DataByteArray(abs1));
+ bagColl.add(tupColl1);
+ tupColl2.set(0, 0.123456789);
+ tupColl2.set(1, 0.3333);
+ abs2[0] = 41;
+ abs2[1] = 42;
+ abs2[2] = 43;
+ abs2[3] = 44;
+ tupColl2.set(2, new DataByteArray(abs2));
+ bagColl.add(tupColl2);
+ tuple.set(3, bagColl);
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ inserter.close();
+ writer1.finish();
+
+ writer.close();
+ }
+
+ @AfterClass
+ public static void tearDownOnce() throws IOException {
+ }
+
+ @Test
+ public void test1() throws IOException, ParseException {
+ String projection = new String("r.f12, f1");
+ BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ // long totalBytes = reader.getStatus().getSize();
+
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ reader.close();
+ reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple value = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+ scanner.getValue(value);
+
+ Assert.assertEquals(1001L, value.get(0));
+ Assert.assertEquals(true, value.get(1));
+
+ scanner.advance();
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
+ scanner.getValue(value);
+ Assert.assertEquals(1002L, value.get(0));
+ Assert.assertEquals(false, value.get(1));
+
+ reader.close();
+ }
+
+ @Test
+ public void testStitch() throws IOException, ParseException {
+ String projection = new String("f1, r");
+ BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ // long totalBytes = reader.getStatus().getSize();
+
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ reader.close();
+ reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple value = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+ scanner.getValue(value);
+
+ Tuple recordTuple = (Tuple) value.get(1);
+ Assert.assertEquals(1, recordTuple.get(0));
+ Assert.assertEquals(1001L, recordTuple.get(1));
+ Assert.assertEquals(true, value.get(0));
+ reader.close();
+ }
+}
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestWrite.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestWrite.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestWrite.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestWrite.java Tue Nov 24 19:54:19 2009
@@ -74,7 +74,7 @@
BasicTable.drop(path, conf);
BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
- STR_STORAGE, false, conf);
+ STR_STORAGE, conf);
writer.finish();
Schema schema = writer.getSchema();
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ArticleGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ArticleGenerator.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ArticleGenerator.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ArticleGenerator.java Tue Nov 24 19:54:19 2009
@@ -26,8 +26,8 @@
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.DiscreteRNG;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.Flat;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.DiscreteRNG;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.Flat;
/**
* Generate some input text files.
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/Dictionary.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/Dictionary.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/Dictionary.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/Dictionary.java Tue Nov 24 19:54:19 2009
@@ -24,9 +24,9 @@
import java.util.Random;
import java.util.Set;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.Binomial;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.DiscreteRNG;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.Zipf;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.Binomial;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.DiscreteRNG;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.Zipf;
/**
* A dictionary that generates English words, whose frequency follows Zipf
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSample2.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSample2.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSample2.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSample2.java Tue Nov 24 19:54:19 2009
@@ -113,4 +113,4 @@
JobClient.runJob(jobConf);
}
}
-}
\ No newline at end of file
+}
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSampleSortedTable.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSampleSortedTable.java?rev=883836&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSampleSortedTable.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSampleSortedTable.java Tue Nov 24 19:54:19 2009
@@ -0,0 +1,192 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.mapred;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.zebra.mapred.BasicTableOutputFormat;
+import org.apache.hadoop.zebra.mapred.TestBasicTableIOFormatLocalFS.InvIndex;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.types.SortInfo;
+import org.apache.hadoop.zebra.schema.*;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.hadoop.zebra.types.*;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DefaultTuple;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This is a sample a complete MR sample code for Table. It doens't contain
+ * 'read' part. But, it should be similar and easier to write. Refer to test
+ * cases in the same directory.
+ *
+ * Assume the input files contain rows of word and count, separated by a space:
+ *
+ * <pre>
+ * this 2
+ * is 1
+ * a 4
+ * test 2
+ * hello 1
+ * world 3
+ * </pre>
+ *
+ */
+public class TableMRSampleSortedTable {
+ static class MapClass implements
+ Mapper<LongWritable, Text, BytesWritable, Tuple> {
+ private BytesWritable bytesKey;
+ private Tuple tupleRow;
+ private Object javaObj;
+ private byte[] types;
+ private ColumnType[] sortColTypes;
+
+ @Override
+ public void map(LongWritable key, Text value,
+ OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
+ throws IOException {
+ // value should contain "word count"
+ String[] wdct = value.toString().split(" ");
+ if (wdct.length != 2) {
+ // LOG the error
+ return;
+ }
+
+ byte[] word = wdct[0].getBytes();
+
+ // This key has to be created by user
+ bytesKey.set(word, 0, word.length);
+
+ // This tuple has to be created by user
+ tupleRow.set(0, new String(word));
+ tupleRow.set(1, Integer.parseInt(wdct[1]));
+
+ output.collect(bytesKey, tupleRow);
+ }
+
+ @Override
+ public void configure(JobConf job) {
+ bytesKey = new BytesWritable();
+ try {
+
+ /* New M/R Interface to get sort information */
+ SortInfo sortInfo = BasicTableOutputFormat.getSortInfo(job);
+
+ /* New M/R Interface SortInfo is exposed to user
+ * To get types of sort columns.
+ * Similar interface to get names and indices
+ */
+ sortColTypes = sortInfo.getSortColumnTypes();
+ types = new byte[sortColTypes.length];
+ for(int i =0 ; i < sortColTypes.length; ++i){
+ types[i] = sortColTypes[i].pigDataType();
+ }
+ Schema outSchema = BasicTableOutputFormat.getSchema(job);
+ tupleRow = TypesUtils.createTuple(outSchema);
+
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ } catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ // no-op
+ }
+ }
+
+ static class ReduceClass implements
+ Reducer<BytesWritable, Tuple, BytesWritable, Tuple> {
+ Tuple outRow;
+
+
+ @Override
+ public void configure(JobConf job) {
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+ public void reduce(BytesWritable key, Iterator<Tuple> values,
+ OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
+ throws IOException {
+ try {
+ for(; values.hasNext();) {
+ output.collect(key, values.next());
+ }
+ } catch (ExecException e) {
+ e.printStackTrace();
+ }
+ }
+
+ }
+
+ public static void main(String[] args) throws ParseException, IOException, Exception {
+ JobConf jobConf = new JobConf();
+ jobConf.setJobName("tableMRSample");
+ jobConf.set("table.output.tfile.compression", "gz");
+
+ // input settings
+ jobConf.setInputFormat(TextInputFormat.class);
+ jobConf.setMapperClass(TableMRSampleSortedTable.MapClass.class);
+ jobConf.setReducerClass(TableMRSampleSortedTable.ReduceClass.class);
+ jobConf.setMapOutputKeyClass(BytesWritable.class);
+ jobConf.setMapOutputValueClass(DefaultTuple.class);
+ FileInputFormat.setInputPaths(jobConf, new Path(
+ "/home/gauravj/work/grid/myTesting/input.txt"));
+ jobConf.setNumMapTasks(1);
+
+ // output settings
+ Path outPath = new Path("/home/gauravj/work/grid/myTesting/tableOuts");
+ jobConf.setOutputFormat(BasicTableOutputFormat.class);
+ BasicTableOutputFormat.setOutputPath(jobConf, outPath);
+ // set the logical schema with 2 columns
+ BasicTableOutputFormat.setSchema(jobConf, "word:string, count:int");
+ // for demo purposes, create 2 physical column groups
+ BasicTableOutputFormat.setStorageHint(jobConf, "[word];[count]");
+
+ /* New M/R Interface */
+ /* Set sort columns in a comma separated string */
+ /* Each sort column should belong to schema columns */
+ BasicTableOutputFormat.setSortInfo(jobConf, "word, count");
+
+ // set map-only job.
+ jobConf.setNumReduceTasks(1);
+ JobClient.runJob(jobConf);
+ }
+}
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSortedTableZebraKeyGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSortedTableZebraKeyGenerator.java?rev=883836&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSortedTableZebraKeyGenerator.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSortedTableZebraKeyGenerator.java Tue Nov 24 19:54:19 2009
@@ -0,0 +1,194 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.mapred;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.zebra.mapred.BasicTableOutputFormat;
+import org.apache.hadoop.zebra.mapred.TestBasicTableIOFormatLocalFS.InvIndex;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DefaultTuple;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This is a sample a complete MR sample code for Table. It doens't contain
+ * 'read' part. But, it should be similar and easier to write. Refer to test
+ * cases in the same directory.
+ *
+ * Assume the input files contain rows of word and count, separated by a space:
+ *
+ * <pre>
+ * this 2
+ * is 1
+ * a 4
+ * test 2
+ * hello 1
+ * world 3
+ * </pre>
+ *
+ */
+public class TableMRSortedTableZebraKeyGenerator {
+ static class MapClass implements
+ Mapper<LongWritable, Text, BytesWritable, Tuple> {
+ private BytesWritable bytesKey;
+ private Tuple tupleRow;
+ private Object javaObj;
+
+ @Override
+ public void map(LongWritable key, Text value,
+ OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
+ throws IOException {
+ // value should contain "word count"
+ String[] wdct = value.toString().split(" ");
+ if (wdct.length != 2) {
+ // LOG the error
+ return;
+ }
+
+ byte[] word = wdct[0].getBytes();
+ bytesKey.set(word, 0, word.length);
+ tupleRow.set(0, new String(word));
+ tupleRow.set(1, Integer.parseInt(wdct[1]));
+
+ // This key has to be created by user
+ Tuple userKey = new DefaultTuple();
+ userKey.append(new String(word));
+ userKey.append(Integer.parseInt(wdct[1]));
+ try {
+
+ /* New M/R Interface */
+ /* Converts user key to zebra BytesWritable key */
+ /* using sort key expr tree */
+ /* Returns a java base object */
+ /* Done for each user key */
+
+ bytesKey = BasicTableOutputFormat.getSortKey(javaObj, userKey);
+ } catch(Exception e) {
+
+ }
+
+ output.collect(bytesKey, tupleRow);
+ }
+
+ @Override
+ public void configure(JobConf job) {
+ bytesKey = new BytesWritable();
+ try {
+ Schema outSchema = BasicTableOutputFormat.getSchema(job);
+ tupleRow = TypesUtils.createTuple(outSchema);
+
+ /* New M/R Interface */
+ /* returns an expression tree for sort keys */
+ /* Returns a java base object */
+ /* Done once per table */
+ javaObj = BasicTableOutputFormat.getSortKeyGenerator(job);
+
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ } catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ // no-op
+ }
+ }
+
+ static class ReduceClass implements
+ Reducer<BytesWritable, Tuple, BytesWritable, Tuple> {
+ Tuple outRow;
+
+
+ @Override
+ public void configure(JobConf job) {
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+ public void reduce(BytesWritable key, Iterator<Tuple> values,
+ OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
+ throws IOException {
+ try {
+ for(; values.hasNext();) {
+ output.collect(key, values.next());
+ }
+ } catch (ExecException e) {
+ e.printStackTrace();
+ }
+ }
+
+ }
+
+ public static void main(String[] args) throws ParseException, IOException, Exception {
+ JobConf jobConf = new JobConf();
+ jobConf.setJobName("tableMRSample");
+ jobConf.set("table.output.tfile.compression", "gz");
+
+ // input settings
+ jobConf.setInputFormat(TextInputFormat.class);
+ jobConf.setMapperClass(TableMRSortedTableZebraKeyGenerator.MapClass.class);
+ jobConf.setReducerClass(TableMRSortedTableZebraKeyGenerator.ReduceClass.class);
+ jobConf.setMapOutputKeyClass(BytesWritable.class);
+ jobConf.setMapOutputValueClass(DefaultTuple.class);
+ FileInputFormat.setInputPaths(jobConf, new Path(
+ "/home/gauravj/work/grid/myTesting/input.txt"));
+ jobConf.setNumMapTasks(1);
+
+ // output settings
+ Path outPath = new Path("/home/gauravj/work/grid/myTesting/tableOuts");
+ jobConf.setOutputFormat(BasicTableOutputFormat.class);
+ BasicTableOutputFormat.setOutputPath(jobConf, outPath);
+ // set the logical schema with 2 columns
+ BasicTableOutputFormat.setSchema(jobConf, "word:string, count:int");
+ // for demo purposes, create 2 physical column groups
+ BasicTableOutputFormat.setStorageHint(jobConf, "[word];[count]");
+
+ /* New M/R Interface */
+ /* Set sort columns in a comma separated string */
+ /* Each sort column should belong to schema columns */
+ BasicTableOutputFormat.setSortInfo(jobConf, "word, count");
+
+ // set map-only job.
+ jobConf.setNumReduceTasks(1);
+ JobClient.runJob(jobConf);
+ }
+}
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMapReduceExample.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMapReduceExample.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMapReduceExample.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMapReduceExample.java Tue Nov 24 19:54:19 2009
@@ -225,4 +225,4 @@
args);
System.exit(res);
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestBasicTableIOFormatLocalFS.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestBasicTableIOFormatLocalFS.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestBasicTableIOFormatLocalFS.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestBasicTableIOFormatLocalFS.java Tue Nov 24 19:54:19 2009
@@ -46,7 +46,7 @@
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.file.tfile.Utils;
+import org.apache.hadoop.zebra.tfile.Utils;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobClient;
@@ -163,6 +163,9 @@
JobConf getJobConf(String name) {
JobConf jobConf = mr.createJobConf();
jobConf.setJobName(name);
+ jobConf.setInt("table.input.split.minSize", 1);
+ options.minTableSplitSize = 1; // force more splits
+ jobConf.setInt("dfs.block.size", 1024); // force multiple blocks
jobConf.set("table.output.tfile.compression", options.compression);
jobConf.setInt("mapred.app.freqWords.count", options.numFreqWords);
return jobConf;
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestCheckin.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestCheckin.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestCheckin.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestCheckin.java Tue Nov 24 19:54:19 2009
@@ -24,7 +24,8 @@
@RunWith(Suite.class)
@Suite.SuiteClasses({
TestBasicTableIOFormatLocalFS.class,
- TestBasicTableIOFormatDFS.class
+ TestBasicTableIOFormatDFS.class,
+ TestTfileSplit.class
})
public class TestCheckin {