You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2009/08/12 00:27:47 UTC
svn commit: r803312 [14/16] - in /hadoop/pig/trunk: ./ contrib/zebra/
contrib/zebra/docs/ contrib/zebra/src/ contrib/zebra/src/java/
contrib/zebra/src/java/org/ contrib/zebra/src/java/org/apache/
contrib/zebra/src/java/org/apache/hadoop/ contrib/zebra/...
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollection.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollection.java?rev=803312&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollection.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollection.java Tue Aug 11 22:27:44 2009
@@ -0,0 +1,289 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.pig;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.StringTokenizer;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.types.ParseException;
+import org.apache.hadoop.zebra.types.Projection;
+import org.apache.hadoop.zebra.types.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.test.MiniCluster;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ *
+ * Test projections on complicated column types.
+ *
+ */
+public class TestCollection {
+
+ final static String STR_SCHEMA = "c:collection(a:double, b:float, c:bytes)";
+ final static String STR_STORAGE = "[c]";
+ private static Configuration conf;
+ private static FileSystem fs;
+
+ protected static ExecType execType = ExecType.MAPREDUCE;
+ private static MiniCluster cluster;
+ protected static PigServer pigServer;
+ private static Path path;
+
+ @BeforeClass
+ public static void setUpOnce() throws IOException {
+
+ System.out.println("ONCE SETUP !! ---------");
+ if (System.getProperty("hadoop.log.dir") == null) {
+ String base = new File(".").getPath(); // getAbsolutePath();
+ System
+ .setProperty("hadoop.log.dir", new Path(base).toString() + "./logs");
+ }
+
+ if (execType == ExecType.MAPREDUCE) {
+ cluster = MiniCluster.buildCluster();
+ pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+ } else {
+ pigServer = new PigServer(ExecType.LOCAL);
+ }
+
+ conf = new Configuration();
+ FileSystem fs = cluster.getFileSystem();
+ Path pathWorking = fs.getWorkingDirectory();
+ // path = new Path(pathWorking, this.getClass().getSimpleName());
+ path = fs.getWorkingDirectory();
+ System.out.println("path =" + path);
+
+ BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+ STR_STORAGE, false, conf);
+ /*
+ * conf = new Configuration();
+ * conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+ * conf.setInt("table.input.split.minSize", 64 * 1024);
+ * conf.set("table.output.tfile.compression", "none");
+ *
+ * RawLocalFileSystem rawLFS = new RawLocalFileSystem(); fs = new
+ * LocalFileSystem(rawLFS); path = new Path(fs.getWorkingDirectory(),
+ * this.getClass() .getSimpleName()); fs = path.getFileSystem(conf); // drop
+ * any previous tables BasicTable.drop(path, conf); BasicTable.Writer writer
+ * = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+ */
+ /*
+ * /* Schema schema = writer.getSchema(); Tuple tuple =
+ * TypesUtils.createTuple(schema);
+ *
+ * final int numsBatch = 10; final int numsInserters = 2; TableInserter[]
+ * inserters = new TableInserter[numsInserters]; for (int i = 0; i <
+ * numsInserters; i++) { inserters[i] = writer.getInserter("ins" + i,
+ * false); }
+ *
+ * for (int b = 0; b < numsBatch; b++) { for (int i = 0; i < numsInserters;
+ * i++) { TypesUtils.resetTuple(tuple);
+ *
+ * DataBag bagColl = TypesUtils.createBag(); Schema schColl =
+ * schema.getColumn(0).getSchema(); Tuple tupColl1 =
+ * TypesUtils.createTuple(schColl); Tuple tupColl2 =
+ * TypesUtils.createTuple(schColl); byte[] abs1 = new byte[3]; byte[] abs2 =
+ * new byte[4]; tupColl1.set(0, 3.1415926); tupColl1.set(1, 1.6); abs1[0] =
+ * 11; abs1[1] = 12; abs1[2] = 13; tupColl1.set(2, new DataByteArray(abs1));
+ * bagColl.add(tupColl1); tupColl2.set(0, 123.456789); tupColl2.set(1, 100);
+ * abs2[0] = 21; abs2[1] = 22; abs2[2] = 23; abs2[3] = 24; tupColl2.set(2,
+ * new DataByteArray(abs2)); bagColl.add(tupColl2); tuple.set(0, bagColl);
+ *
+ * inserters[i].insert(new BytesWritable(("key" + i).getBytes()), tuple); }
+ * } for (int i = 0; i < numsInserters; i++) { inserters[i].close(); }
+ */
+
+ writer.finish();
+
+ Schema schema = writer.getSchema();
+ Tuple tuple = TypesUtils.createTuple(schema);
+
+ BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
+ int part = 0;
+ TableInserter inserter = writer1.getInserter("part" + part, true);
+ TypesUtils.resetTuple(tuple);
+ DataBag bagColl = TypesUtils.createBag();
+ Schema schColl = schema.getColumn(0).getSchema();
+ Tuple tupColl1 = TypesUtils.createTuple(schColl);
+ Tuple tupColl2 = TypesUtils.createTuple(schColl);
+ byte[] abs1 = new byte[3];
+ byte[] abs2 = new byte[4];
+ tupColl1.set(0, 3.1415926);
+ tupColl1.set(1, 1.6);
+ abs1[0] = 11;
+ abs1[1] = 12;
+ abs1[2] = 13;
+ tupColl1.set(2, new DataByteArray(abs1));
+ bagColl.add(tupColl1);
+ tupColl2.set(0, 123.456789);
+ tupColl2.set(1, 100);
+ abs2[0] = 21;
+ abs2[1] = 22;
+ abs2[2] = 23;
+ abs2[3] = 24;
+ tupColl2.set(2, new DataByteArray(abs2));
+ bagColl.add(tupColl2);
+ tuple.set(0, bagColl);
+
+ int row = 0;
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ bagColl.clear();
+ TypesUtils.resetTuple(tupColl1);
+ TypesUtils.resetTuple(tupColl2);
+ tupColl1.set(0, 7654.321);
+ tupColl1.set(1, 0.0001);
+ abs1[0] = 31;
+ abs1[1] = 32;
+ abs1[2] = 33;
+ tupColl1.set(2, new DataByteArray(abs1));
+ bagColl.add(tupColl1);
+ tupColl2.set(0, 0.123456789);
+ tupColl2.set(1, 0.3333);
+ abs2[0] = 41;
+ abs2[1] = 42;
+ abs2[2] = 43;
+ abs2[3] = 44;
+ tupColl2.set(2, new DataByteArray(abs2));
+ bagColl.add(tupColl2);
+ tuple.set(0, bagColl);
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ inserter.close();
+ writer1.finish();
+
+ writer.close();
+ }
+
+ @AfterClass
+ public static void tearDownOnce() throws IOException {
+ BasicTable.drop(path, conf);
+ }
+
+ @Test
+ public void testRead1() throws IOException, ParseException {
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.pig.table.pig.TableLoader('c');";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ int row = 0;
+ int inner = 0;
+ while (it.hasNext()) {
+ Tuple cur = it.next();
+ row++;
+ if (row == 1) {
+ Iterator<Tuple> bag = ((DataBag) cur.get(0)).iterator();
+ while (bag.hasNext()) {
+ Tuple cur2 = bag.next();
+ inner++;
+ if (inner == 1) {
+ Assert.assertEquals(3.1415926, cur2.get(0));
+ Assert.assertEquals(1.6, cur2.get(1));
+ }
+ if (inner == 2) {
+ Assert.assertEquals(123.456789, cur2.get(0));
+ Assert.assertEquals(100, cur2.get(1));
+ }
+ }// inner while
+ } // if count ==1
+ if (row == 2) {
+ Iterator<Tuple> bag = ((DataBag) cur.get(0)).iterator();
+ while (bag.hasNext()) {
+ Tuple cur2 = bag.next();
+ inner++;
+ if (inner == 1) {
+ Assert.assertEquals(7654.321, cur2.get(0));
+ Assert.assertEquals(0.0001, cur2.get(1));
+ System.out.println("cur : " + cur2.toString());
+ System.out.println("byte : " + cur2.get(2).toString());
+ }
+ if (inner == 2) {
+ Assert.assertEquals(0.123456789, cur2.get(0));
+ Assert.assertEquals(0.3333, cur2.get(1));
+ System.out.println("byte : " + cur2.get(2).toString());
+ }
+ }// inner while
+ }// if count ==2
+ TypesUtils.resetTuple(cur);
+ }
+ }
+
+ @Test
+ // Negative none exist column, using IO layer impl
+ public void testRead2() throws IOException, ParseException {
+ String projection = new String("d");
+ BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getKey(key);
+ // Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+ scanner.getValue(RowValue);
+ // RowValue is an emplty record
+ Assert.assertEquals(false, RowValue.isNull());
+ Assert.assertEquals(null, RowValue.get(0));
+ Assert.assertEquals(1, RowValue.size());
+ reader.close();
+ }
+
+ // Negative none exist column, TODO: failed, throw null pointer
+ public void testReadNeg2() throws IOException, ParseException {
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('d');";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+ // TODO: verify it returns a tuple with null value
+ }
+
+}
\ No newline at end of file
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableLoader.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableLoader.java?rev=803312&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableLoader.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableLoader.java Tue Aug 11 22:27:44 2009
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.pig;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.types.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.test.MiniCluster;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestCollectionTableLoader {
+ protected static ExecType execType = ExecType.MAPREDUCE;
+ private static MiniCluster cluster;
+ protected static PigServer pigServer;
+ private static Path pathTable;
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ if (System.getProperty("hadoop.log.dir") == null) {
+ String base = new File(".").getPath(); // getAbsolutePath();
+ System
+ .setProperty("hadoop.log.dir", new Path(base).toString() + "./logs");
+ }
+
+ if (execType == ExecType.MAPREDUCE) {
+ cluster = MiniCluster.buildCluster();
+ pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+ } else {
+ pigServer = new PigServer(ExecType.LOCAL);
+ }
+
+ Configuration conf = new Configuration();
+ FileSystem fs = cluster.getFileSystem();
+ Path pathWorking = fs.getWorkingDirectory();
+ pathTable = new Path(pathWorking, "TestCollectionTable");
+ // drop any previous tables
+ BasicTable.drop(pathTable, conf);
+
+ BasicTable.Writer writer = new BasicTable.Writer(pathTable,
+ "c:collection(a:double, b:float, c:bytes)", "[c]", false, conf);
+ Schema schema = writer.getSchema();
+ Tuple tuple = TypesUtils.createTuple(schema);
+
+ final int numsBatch = 10;
+ final int numsInserters = 2;
+ TableInserter[] inserters = new TableInserter[numsInserters];
+ for (int i = 0; i < numsInserters; i++) {
+ inserters[i] = writer.getInserter("ins" + i, false);
+ }
+
+ for (int b = 0; b < numsBatch; b++) {
+ for (int i = 0; i < numsInserters; i++) {
+ TypesUtils.resetTuple(tuple);
+
+ DataBag bagColl = TypesUtils.createBag();
+ Schema schColl = schema.getColumn(0).getSchema();
+ Tuple tupColl1 = TypesUtils.createTuple(schColl);
+ Tuple tupColl2 = TypesUtils.createTuple(schColl);
+ byte[] abs1 = new byte[3];
+ byte[] abs2 = new byte[4];
+ tupColl1.set(0, 3.1415926);
+ tupColl1.set(1, 1.6);
+ abs1[0] = 11;
+ abs1[1] = 12;
+ abs1[2] = 13;
+ tupColl1.set(2, new DataByteArray(abs1));
+ bagColl.add(tupColl1);
+ tupColl2.set(0, 123.456789);
+ tupColl2.set(1, 100);
+ abs2[0] = 21;
+ abs2[1] = 22;
+ abs2[2] = 23;
+ abs2[3] = 24;
+ tupColl2.set(2, new DataByteArray(abs2));
+ bagColl.add(tupColl2);
+ tuple.set(0, bagColl);
+
+ inserters[i].insert(new BytesWritable(("key" + i).getBytes()), tuple);
+ }
+ }
+ for (int i = 0; i < numsInserters; i++) {
+ inserters[i].close();
+ }
+ }
+
+ @AfterClass
+ public static void tearDown() throws Exception {
+ pigServer.shutdown();
+ }
+
+ @Test
+ public void testReader() throws ExecException, IOException {
+ String query = "records = LOAD '" + pathTable.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('c');";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ while (it.hasNext()) {
+ Tuple cur = it.next();
+ System.out.println(cur);
+ }
+ }
+}
\ No newline at end of file
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableStorer.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableStorer.java?rev=803312&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableStorer.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableStorer.java Tue Aug 11 22:27:44 2009
@@ -0,0 +1,148 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.pig;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.pig.TableStorer;
+import org.apache.hadoop.zebra.types.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.test.MiniCluster;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestCollectionTableStorer {
+ protected static ExecType execType = ExecType.MAPREDUCE;
+ private static MiniCluster cluster;
+ protected static PigServer pigServer;
+ private static Path pathTable;
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ if (System.getProperty("hadoop.log.dir") == null) {
+ String base = new File(".").getPath(); // getAbsolutePath();
+ System
+ .setProperty("hadoop.log.dir", new Path(base).toString() + "./logs");
+ }
+
+ if (execType == ExecType.MAPREDUCE) {
+ cluster = MiniCluster.buildCluster();
+ pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+ } else {
+ pigServer = new PigServer(ExecType.LOCAL);
+ }
+
+ Configuration conf = new Configuration();
+ FileSystem fs = cluster.getFileSystem();
+ Path pathWorking = fs.getWorkingDirectory();
+ pathTable = new Path(pathWorking, "TestCollectionTable");
+ // drop any previous tables
+ BasicTable.drop(pathTable, conf);
+
+ System.out.println("table path=" + pathTable);
+ BasicTable.Writer writer = new BasicTable.Writer(pathTable,
+ "c:collection(a:double, b:float, c:bytes)", "[c]", false, conf);
+ Schema schema = writer.getSchema();
+ Tuple tuple = TypesUtils.createTuple(schema);
+
+ final int numsBatch = 10;
+ final int numsInserters = 2;
+ TableInserter[] inserters = new TableInserter[numsInserters];
+ for (int i = 0; i < numsInserters; i++) {
+ inserters[i] = writer.getInserter("ins" + i, false);
+ }
+
+ for (int b = 0; b < numsBatch; b++) {
+ for (int i = 0; i < numsInserters; i++) {
+ TypesUtils.resetTuple(tuple);
+
+ DataBag bagColl = TypesUtils.createBag();
+ Schema schColl = schema.getColumn(0).getSchema();
+ Tuple tupColl1 = TypesUtils.createTuple(schColl);
+ Tuple tupColl2 = TypesUtils.createTuple(schColl);
+ byte[] abs1 = new byte[3];
+ byte[] abs2 = new byte[4];
+ tupColl1.set(0, 3.1415926);
+ tupColl1.set(1, 1.6);
+ abs1[0] = 11;
+ abs1[1] = 12;
+ abs1[2] = 13;
+ tupColl1.set(2, new DataByteArray(abs1));
+ bagColl.add(tupColl1);
+ tupColl2.set(0, 123.456789);
+ tupColl2.set(1, 100);
+ abs2[0] = 21;
+ abs2[1] = 22;
+ abs2[2] = 23;
+ abs2[3] = 24;
+ tupColl2.set(2, new DataByteArray(abs2));
+ bagColl.add(tupColl2);
+ tuple.set(0, bagColl);
+
+ inserters[i].insert(new BytesWritable(("key" + i).getBytes()), tuple);
+ }
+ }
+ for (int i = 0; i < numsInserters; i++) {
+ inserters[i].close();
+ }
+ writer.close();
+ }
+
+ @AfterClass
+ public static void tearDown() throws Exception {
+ pigServer.shutdown();
+ }
+
+ @Test
+ public void testStorer() throws ExecException, IOException {
+ System.out.println("testStorer");
+ String query = "records = LOAD '" + pathTable.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
+ pigServer.registerQuery(query);
+
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ while (it.hasNext()) {
+ Tuple cur = it.next();
+ System.out.println(cur);
+ }
+
+ pigServer.store("records", new Path(pathTable, "store").toString(),
+ TableStorer.class.getCanonicalName()
+ + "('c:collection(a:double, b:float, c:bytes)', '[c]')");
+ }
+}
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableLoader.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableLoader.java?rev=803312&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableLoader.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableLoader.java Tue Aug 11 22:27:44 2009
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.pig;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.types.ParseException;
+import org.apache.hadoop.zebra.types.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.test.MiniCluster;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Note:
+ *
+ * Make sure you add the build/pig-0.1.0-dev-core.jar to the Classpath of the
+ * app/debug configuration, when run this from inside the Eclipse.
+ *
+ */
+public class TestMapTableLoader {
+ protected static ExecType execType = ExecType.MAPREDUCE;
+ private static MiniCluster cluster;
+ protected static PigServer pigServer;
+ private static Path pathTable;
+ private static Configuration conf;
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ if (System.getProperty("hadoop.log.dir") == null) {
+ String base = new File(".").getPath(); // getAbsolutePath();
+ System
+ .setProperty("hadoop.log.dir", new Path(base).toString() + "./logs");
+ }
+
+ if (execType == ExecType.MAPREDUCE) {
+ cluster = MiniCluster.buildCluster();
+ pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+ } else {
+ pigServer = new PigServer(ExecType.LOCAL);
+ }
+
+ conf = new Configuration();
+ FileSystem fs = cluster.getFileSystem();
+ Path pathWorking = fs.getWorkingDirectory();
+ pathTable = new Path(pathWorking, "TestMapTableLoader");
+
+ BasicTable.Writer writer = new BasicTable.Writer(pathTable,
+ "m1:map(string)", "[m1#{a}]", false, conf);
+ Schema schema = writer.getSchema();
+ Tuple tuple = TypesUtils.createTuple(schema);
+
+ final int numsBatch = 10;
+ final int numsInserters = 2;
+ TableInserter[] inserters = new TableInserter[numsInserters];
+ for (int i = 0; i < numsInserters; i++) {
+ inserters[i] = writer.getInserter("ins" + i, false);
+ }
+
+ for (int b = 0; b < numsBatch; b++) {
+ for (int i = 0; i < numsInserters; i++) {
+ TypesUtils.resetTuple(tuple);
+ Map<String, String> map = new HashMap<String, String>();
+ map.put("a", "x");
+ map.put("b", "y");
+ map.put("c", "z");
+ tuple.set(0, map);
+
+ try {
+ inserters[i].insert(new BytesWritable(("key" + i).getBytes()), tuple);
+ } catch (Exception e) {
+ System.out.println(e.getMessage());
+ }
+ }
+ }
+ for (int i = 0; i < numsInserters; i++) {
+ inserters[i].close();
+ }
+ }
+
+ @AfterClass
+ public static void tearDown() throws Exception {
+ pigServer.shutdown();
+ }
+
+ @Test
+ public void test1() throws IOException, ParseException {
+ String projection = new String("m1#{b}");
+ BasicTable.Reader reader = new BasicTable.Reader(pathTable, conf);
+ reader.setProjection(projection);
+ // long totalBytes = reader.getStatus().getSize();
+
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ reader.close();
+ reader = new BasicTable.Reader(pathTable, conf);
+ reader.setProjection(projection);
+
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple value = TypesUtils.createTuple(scanner.getSchema());
+ // HashMap<String, Object> mapval;
+ while (!scanner.atEnd()) {
+ scanner.getKey(key);
+ // Assert.assertEquals(key, new BytesWritable("key0".getBytes()));
+ scanner.getValue(value);
+ System.out.println("key = " + key + " value = " + value);
+
+ // mapval = (HashMap<String, Object>) value.get(0);
+ // Assert.assertEquals("x", mapval.get("a"));
+ // Assert.assertEquals(null, mapval.get("b"));
+ // Assert.assertEquals(null, mapval.get("c"));
+ scanner.advance();
+ }
+ reader.close();
+ }
+
+ @Test
+ public void testReader() throws ExecException, IOException {
+ String query = "records = LOAD '" + pathTable.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('m1#{a}');";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ while (it.hasNext()) {
+ Tuple cur = it.next();
+ System.out.println(cur);
+ }
+ }
+}
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableStorer.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableStorer.java?rev=803312&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableStorer.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableStorer.java Tue Aug 11 22:27:44 2009
@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.pig;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.pig.TableStorer;
+import org.apache.hadoop.zebra.types.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.test.MiniCluster;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Note:
+ *
+ * Make sure you add the build/pig-0.1.0-dev-core.jar to the Classpath of the
+ * app/debug configuration, when run this from inside the Eclipse.
+ *
+ */
+public class TestMapTableStorer {
+ protected static ExecType execType = ExecType.MAPREDUCE;
+ private static MiniCluster cluster;
+ protected static PigServer pigServer;
+ private static Path pathTable;
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ if (System.getProperty("hadoop.log.dir") == null) {
+ String base = new File(".").getPath(); // getAbsolutePath();
+ System
+ .setProperty("hadoop.log.dir", new Path(base).toString() + "./logs");
+ }
+
+ if (execType == ExecType.MAPREDUCE) {
+ cluster = MiniCluster.buildCluster();
+ pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+ } else {
+ pigServer = new PigServer(ExecType.LOCAL);
+ }
+
+ Configuration conf = new Configuration();
+ FileSystem fs = cluster.getFileSystem();
+ Path pathWorking = fs.getWorkingDirectory();
+
+ pathTable = new Path(pathWorking, "TestMapTableStorer");
+ System.out.println("table path=" + pathTable);
+ BasicTable.Writer writer = new BasicTable.Writer(pathTable,
+ "m:map(string)", "[m#{a}]", false, conf);
+ Schema schema = writer.getSchema();
+ Tuple tuple = TypesUtils.createTuple(schema);
+
+ final int numsBatch = 10;
+ final int numsInserters = 2;
+ TableInserter[] inserters = new TableInserter[numsInserters];
+ for (int i = 0; i < numsInserters; i++) {
+ inserters[i] = writer.getInserter("ins" + i, false);
+ }
+
+ for (int b = 0; b < numsBatch; b++) {
+ for (int i = 0; i < numsInserters; i++) {
+ TypesUtils.resetTuple(tuple);
+
+ Map<String, String> map = new HashMap<String, String>();
+ map.put("a", "x");
+ map.put("b", "y");
+ map.put("c", "z");
+ tuple.set(0, map);
+
+ inserters[i].insert(new BytesWritable(("key" + i).getBytes()), tuple);
+ }
+ }
+ for (int i = 0; i < numsInserters; i++) {
+ inserters[i].close();
+ }
+ writer.close();
+ }
+
+ @AfterClass
+ public static void tearDown() throws Exception {
+ pigServer.shutdown();
+ }
+
+ @Test
+ public void testStorer() throws ExecException, IOException {
+ System.out.println("testStorer");
+ String query = "records = LOAD '" + pathTable.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
+ pigServer.registerQuery(query);
+
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ while (it.hasNext()) {
+ Tuple cur = it.next();
+ System.out.println(cur);
+ }
+
+ pigServer
+ .store("records", new Path(pathTable, "store").toString(),
+ TableStorer.class.getCanonicalName()
+ + "('m:map(string)', '[m#{a|b}]')");
+ }
+}
\ No newline at end of file
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapType.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapType.java?rev=803312&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapType.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapType.java Tue Aug 11 22:27:44 2009
@@ -0,0 +1,472 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.pig;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.StringTokenizer;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.record.Record;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.types.ParseException;
+import org.apache.hadoop.zebra.types.Projection;
+import org.apache.hadoop.zebra.types.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.test.MiniCluster;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ *
+ * Test projections on complicated column types.
+ *
+ */
+public class TestMapType {
+ final static String STR_SCHEMA = "m1:map(string),m2:map(map(int)), m4:map(map(record(f1:int,f2:string)))";
+ final static String STR_STORAGE = "[m1#{a}];[m2#{x|y}]; [m1#{b}, m2#{z}]; [m4#{a4}]; [m4#{b4|c4}]";
+
+ private static Configuration conf;
+ private static FileSystem fs;
+
+ protected static ExecType execType = ExecType.MAPREDUCE;
+ private static MiniCluster cluster;
+ protected static PigServer pigServer;
+ private static Path path;
+
+ @BeforeClass
+ public static void setUpOnce() throws IOException {
+ System.out.println("ONCE SETUP !! ---------");
+ if (System.getProperty("hadoop.log.dir") == null) {
+ String base = new File(".").getPath(); // getAbsolutePath();
+ System
+ .setProperty("hadoop.log.dir", new Path(base).toString() + "./logs");
+ }
+
+ if (execType == ExecType.MAPREDUCE) {
+ cluster = MiniCluster.buildCluster();
+ pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+ } else {
+ pigServer = new PigServer(ExecType.LOCAL);
+ }
+
+ conf = new Configuration();
+ FileSystem fs = cluster.getFileSystem();
+ Path pathWorking = fs.getWorkingDirectory();
+ // path = new Path(pathWorking, this.getClass().getSimpleName());
+ path = fs.getWorkingDirectory();
+ System.out.println("path =" + path);
+
+ BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+ STR_STORAGE, false, conf);
+ writer.finish();
+ Schema schema = writer.getSchema();
+ Tuple tuple = TypesUtils.createTuple(schema);
+ BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
+ int part = 0;
+ TableInserter inserter = writer1.getInserter("part" + part, true);
+ TypesUtils.resetTuple(tuple);
+
+ Tuple record1;
+ try {
+ record1 = TypesUtils.createTuple(new Schema("f1:int, f2:string"));
+ } catch (ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+
+ Tuple record2;
+ try {
+ record2 = TypesUtils.createTuple(new Schema("f1:int, f2:string"));
+ } catch (ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+
+ Tuple record3;
+ try {
+ record3 = TypesUtils.createTuple(new Schema("f1:int, f2:string"));
+ } catch (ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+
+ // add data to row 1
+ // m1:map(string)
+ Map<String, String> m1 = new HashMap<String, String>();
+ m1.put("a", "A");
+ m1.put("b", "B");
+ m1.put("c", "C");
+ tuple.set(0, m1);
+
+ // m2:map(map(int))
+ HashMap<String, Map> m2 = new HashMap<String, Map>();
+ Map<String, Integer> m31 = new HashMap<String, Integer>();
+ m31.put("m311", 311);
+ m31.put("m321", 321);
+ m31.put("m331", 331);
+ Map<String, Integer> m32 = new HashMap<String, Integer>();
+ m32.put("m411", 411);
+ m32.put("m421", 421);
+ m32.put("m431", 431);
+ m2.put("x", m31);
+ m2.put("y", m32);
+ tuple.set(1, m2);
+
+ // m4:map(map(record(f1:int,f2:string)))
+ record1.set(0, 11);
+ record1.set(1, "record row 1.1");
+ Map<String, Tuple> m51 = new HashMap<String, Tuple>();
+ Map<String, Tuple> m52 = new HashMap<String, Tuple>();
+ Map<String, Tuple> m53 = new HashMap<String, Tuple>();
+ m51.put("ma4", (Tuple) record1);
+ m52.put("ma41", (Tuple) record1);
+ m53.put("ma43", (Tuple) record1);
+
+ record2.set(0, 12);
+ record2.set(1, "record row 1.2");
+ m51.put("mb4", (Tuple) record2);
+ m52.put("mb42", (Tuple) record2);
+ m53.put("ma43", (Tuple) record2);
+ System.out.println("record1-1: " + record1.toString());
+
+ record3.set(0, 13);
+ record3.set(1, "record row 1.3");
+ System.out.println("record1-3: " + record1.toString());
+
+ m51.put("mc4", (Tuple) record3);
+ m52.put("mc42", (Tuple) record3);
+ m53.put("ma43", (Tuple) record3);
+
+ Map<String, Map> m4 = new HashMap<String, Map>();
+ m4.put("a4", m51);
+ m4.put("b4", m52);
+ m4.put("c4", m53);
+ m4.put("d4", m53);
+ m4.put("ma43", m53);
+
+ tuple.set(2, m4);
+
+ int row = 0;
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ // row 2
+ row++;
+ TypesUtils.resetTuple(tuple);
+ TypesUtils.resetTuple(record1);
+ TypesUtils.resetTuple(record2);
+ TypesUtils.resetTuple(record3);
+ m1.clear();
+ m2.clear();
+ m31.clear();
+ m32.clear();
+ m4.clear();
+ m51.clear();
+ m52.clear();
+ m53.clear();
+ // m1:map(string)
+ m1.put("a", "A2");
+ m1.put("b2", "B2");
+ m1.put("c2", "C2");
+ tuple.set(0, m1);
+
+ // m2:map(map(int))
+ m31.put("m321", 321);
+ m31.put("m322", 322);
+ m31.put("m323", 323);
+ m2.put("z", m31);
+ tuple.set(1, m2);
+
+ // m4:map(map(record(f1:int,f2:string)))
+ record1.set(0, 21);
+ record1.set(1, "record row 2.1");
+ m51.put("ma4", (Tuple) record1);
+ m52.put("ma41", (Tuple) record1);
+ m53.put("ma43", (Tuple) record1);
+
+ record2.set(0, 22);
+ record2.set(1, "record row 2.2");
+ m51.put("mb4", (Tuple) record2);
+ m52.put("mb42", (Tuple) record2);
+ m53.put("ma43", (Tuple) record2);
+
+ record3.set(0, 33);
+ record3.set(1, "record row 3.3");
+ m51.put("mc4", (Tuple) record3);
+ m52.put("mc42", (Tuple) record3);
+ m53.put("ma43", (Tuple) record3);
+
+ m4.put("a4", m51);
+ m4.put("b4", m52);
+
+ m4.put("ma43", m53);
+
+ tuple.set(2, m4);
+
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ // finish building table, closing out the inserter, writer, writer1
+ inserter.close();
+ writer1.finish();
+ writer.close();
+ }
+
+ @AfterClass
+ public static void tearDownOnce() throws IOException {
+ BasicTable.drop(path, conf);
+ }
+
+ // read one map
+ @Test
+ public void testReadSimpleMap() throws IOException, ParseException {
+
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('m1#{a}');";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ BytesWritable key = new BytesWritable();
+ int row = 0;
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ row++;
+ if (row == 1) {
+ Assert.assertEquals("{a=A}", RowValue.get(0).toString());
+ }
+ if (row == 2) {
+ Assert.assertEquals("{a=A2}", RowValue.get(0).toString());
+ }
+ }
+ }
+
+ @Test
+ public void testReadMapOfMap() throws IOException, ParseException {
+ String query = "records = LOAD '"
+ + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('m1#{b}, m2#{x|z}');";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ BytesWritable key = new BytesWritable();
+ int row = 0;
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ row++;
+ if (row == 1) {
+ Assert.assertEquals("B", ((Map) RowValue.get(0)).get("b"));
+ Assert.assertEquals(321, ((Map) ((Map) RowValue.get(1)).get("x"))
+ .get("m321"));
+ Assert.assertEquals(311, ((Map) ((Map) RowValue.get(1)).get("x"))
+ .get("m311"));
+ Assert.assertEquals(331, ((Map) ((Map) RowValue.get(1)).get("x"))
+ .get("m331"));
+ Assert.assertEquals(null, ((Map) ((Map) RowValue.get(1)).get("x"))
+ .get("m341"));
+ Assert.assertEquals(null, ((Map) ((Map) RowValue.get(1)).get("z")));
+ Assert.assertEquals(null, ((Map) ((Map) RowValue.get(0)).get("a")));
+ Assert.assertEquals(null, ((Map) ((Map) RowValue.get(0)).get("c")));
+ }
+ if (row == 2) {
+ Assert.assertEquals(null, ((Map) RowValue.get(0)).get("b"));
+ Assert.assertEquals(null, ((Map) ((Map) RowValue.get(1)).get("x")));
+ Assert.assertEquals(323, ((Map) ((Map) RowValue.get(1)).get("z"))
+ .get("m323"));
+ Assert.assertEquals(322, ((Map) ((Map) RowValue.get(1)).get("z"))
+ .get("m322"));
+ Assert.assertEquals(321, ((Map) ((Map) RowValue.get(1)).get("z"))
+ .get("m321"));
+ Assert.assertEquals(null, ((Map) ((Map) RowValue.get(0)).get("a")));
+ Assert.assertEquals(null, ((Map) ((Map) RowValue.get(0)).get("b")));
+ Assert.assertEquals(null, ((Map) ((Map) RowValue.get(1)).get("a")));
+
+ }
+ }
+
+ }
+
+ @Test
+ public void testReadMapOfRecord1() throws IOException, ParseException {
+ String query = "records = LOAD '"
+ + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('m1#{b}, m4#{a4|c4}');";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ BytesWritable key = new BytesWritable();
+ int row = 0;
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ row++;
+ if (row == 1) {
+ Assert.assertEquals("B", ((Map) RowValue.get(0)).get("b"));
+ Assert.assertEquals(11, ((Tuple) ((Map) ((Map) RowValue.get(1))
+ .get("a4")).get("ma4")).get(0));
+ Assert.assertEquals("record row 1.1", ((Tuple) ((Map) ((Map) RowValue
+ .get(1)).get("a4")).get("ma4")).get(1));
+ Assert.assertEquals(12, ((Tuple) ((Map) ((Map) RowValue.get(1))
+ .get("a4")).get("mb4")).get(0));
+ Assert.assertEquals("record row 1.2", ((Tuple) ((Map) ((Map) RowValue
+ .get(1)).get("a4")).get("mb4")).get(1));
+ Assert.assertEquals(13, ((Tuple) ((Map) ((Map) RowValue.get(1))
+ .get("a4")).get("mc4")).get(0));
+ Assert.assertEquals("record row 1.3", ((Tuple) ((Map) ((Map) RowValue
+ .get(1)).get("a4")).get("mc4")).get(1));
+
+ Assert.assertEquals(13, ((Tuple) ((Map) ((Map) RowValue.get(1))
+ .get("c4")).get("ma43")).get(0));
+ Assert.assertEquals("record row 1.3", ((Tuple) ((Map) ((Map) RowValue
+ .get(1)).get("c4")).get("ma43")).get(1));
+
+ Assert.assertEquals(null, (((Map) ((Map) RowValue.get(1)).get("c4"))
+ .get("mc4")));
+ Assert.assertEquals(null, (((Map) ((Map) RowValue.get(1)).get("c4"))
+ .get("mb4")));
+
+ }
+ if (row == 2) {
+ Assert.assertEquals(21, ((Tuple) ((Map) ((Map) RowValue.get(1))
+ .get("a4")).get("ma4")).get(0));
+ Assert.assertEquals("record row 2.1", ((Tuple) ((Map) ((Map) RowValue
+ .get(1)).get("a4")).get("ma4")).get(1));
+ Assert.assertEquals(22, ((Tuple) ((Map) ((Map) RowValue.get(1))
+ .get("a4")).get("mb4")).get(0));
+ Assert.assertEquals("record row 2.2", ((Tuple) ((Map) ((Map) RowValue
+ .get(1)).get("a4")).get("mb4")).get(1));
+ Assert.assertEquals(33, ((Tuple) ((Map) ((Map) RowValue.get(1))
+ .get("a4")).get("mc4")).get(0));
+ Assert.assertEquals("record row 3.3", ((Tuple) ((Map) ((Map) RowValue
+ .get(1)).get("a4")).get("mc4")).get(1));
+
+ }
+ }
+
+ }
+
+ @Test
+ // Positive? map object column through pig loader
+ public void testRead4() throws IOException, ParseException {
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('m1');";
+
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ int row = 0;
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ row++;
+ if (row == 1) {
+ Assert.assertEquals("A", ((Map) RowValue.get(0)).get("a"));
+ Assert.assertEquals("B", ((Map) RowValue.get(0)).get("b"));
+ Assert.assertEquals("C", ((Map) RowValue.get(0)).get("c"));
+ Assert.assertEquals(null, ((Map) RowValue.get(0)).get("non-existent"));
+ Assert.assertEquals(null, ((Map) RowValue.get(0)).get("b2"));
+ Assert.assertEquals(null, ((Map) RowValue.get(0)).get("c2"));
+ }
+ if (row == 2) {
+ Assert.assertEquals("A2", ((Map) RowValue.get(0)).get("a"));
+ Assert.assertEquals("B2", ((Map) RowValue.get(0)).get("b2"));
+ Assert.assertEquals("C2", ((Map) RowValue.get(0)).get("c2"));
+ Assert.assertEquals(null, ((Map) RowValue.get(0)).get("non-existent"));
+ Assert.assertEquals(null, ((Map) RowValue.get(0)).get("b"));
+ Assert.assertEquals(null, ((Map) RowValue.get(0)).get("c"));
+ }
+ }
+ Assert.assertEquals(2, row);
+ }
+
+ @Test
+ // Negative non-exist column through pig loader
+ public void testReadNeg1() throws IOException, ParseException {
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('m5');";
+
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ int row = 0;
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ row++;
+ if (row == 1) {
+ Assert.assertEquals(false, RowValue.isNull());
+ Assert.assertEquals(null, RowValue.get(0));
+ Assert.assertEquals(1, RowValue.size());
+ }
+ }
+ Assert.assertEquals(2, row);
+ }
+
+ @Test
+ // non-existent column name through I/O
+ public void testNeg2() throws IOException, ParseException {
+ String projection = new String("m5");
+ BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+ scanner.getValue(RowValue);
+ Assert.assertEquals(false, RowValue.isNull());
+ Assert.assertEquals(null, RowValue.get(0));
+ Assert.assertEquals(1, RowValue.size());
+
+ scanner.advance();
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
+ scanner.getValue(RowValue);
+ Assert.assertEquals(false, RowValue.isNull());
+ Assert.assertEquals(null, RowValue.get(0));
+ Assert.assertEquals(1, RowValue.size());
+ reader.close();
+ }
+}
\ No newline at end of file
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMixedType1.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMixedType1.java?rev=803312&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMixedType1.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMixedType1.java Tue Aug 11 22:27:44 2009
@@ -0,0 +1,322 @@
+package org.apache.hadoop.zebra.pig;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.types.ParseException;
+import org.apache.hadoop.zebra.types.Projection;
+import org.apache.hadoop.zebra.types.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.test.MiniCluster;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ *
+ * Test projections on complicated column types.
+ *
+ */
+public class TestMixedType1 {
+ final static String STR_SCHEMA = "s1:bool, s2:int, s3:long, s4:float, s5:string, s6:bytes, r1:record(f1:int, f2:long), r2:record(r3:record(f3:float, f4)), m1:map(string),m2:map(map(int)), c:collection(f13:double, f14:float, f15:bytes)";
+ final static String STR_STORAGE = "[s1, s2]; [m1#{a}]; [r1.f1]; [s3, s4, r2.r3.f3]; [s5, s6, m2#{x|y}]; [r1.f2, m1#{b}]; [r2.r3.f4, m2#{z}]";
+ private static Configuration conf;
+ private static FileSystem fs;
+
+ protected static ExecType execType = ExecType.MAPREDUCE;
+ private static MiniCluster cluster;
+ protected static PigServer pigServer;
+ private static Path path;
+
+ @BeforeClass
+ public static void setUpOnce() throws IOException {
+ System.out.println("ONCE SETUP !! ---------");
+ if (System.getProperty("hadoop.log.dir") == null) {
+ String base = new File(".").getPath(); // getAbsolutePath();
+ System
+ .setProperty("hadoop.log.dir", new Path(base).toString() + "./logs");
+ }
+
+ if (execType == ExecType.MAPREDUCE) {
+ cluster = MiniCluster.buildCluster();
+ pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+ } else {
+ pigServer = new PigServer(ExecType.LOCAL);
+ }
+
+ conf = new Configuration();
+ FileSystem fs = cluster.getFileSystem();
+ Path pathWorking = fs.getWorkingDirectory();
+ // path = new Path(pathWorking, this.getClass().getSimpleName());
+ path = fs.getWorkingDirectory();
+ System.out.println("path =" + path);
+
+ BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+ STR_STORAGE, false, conf);
+ writer.finish();
+
+ Schema schema = writer.getSchema();
+ Tuple tuple = TypesUtils.createTuple(schema);
+ BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
+ int part = 0;
+ TableInserter inserter = writer1.getInserter("part" + part, true);
+ TypesUtils.resetTuple(tuple);
+
+ Tuple tupRecord1;
+ try {
+ tupRecord1 = TypesUtils.createTuple(schema.getColumnSchema("r1")
+ .getSchema());
+ } catch (ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+
+ Tuple tupRecord2;
+ try {
+ tupRecord2 = TypesUtils.createTuple(schema.getColumnSchema("r2")
+ .getSchema());
+ } catch (ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+
+ Tuple tupRecord3;
+ try {
+ tupRecord3 = TypesUtils.createTuple(new Schema("f3:float, f4"));
+ } catch (ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+
+ // row 1
+ tuple.set(0, true); // bool
+ tuple.set(1, 1); // int
+ tuple.set(2, 1001L); // long
+ tuple.set(3, 1.1); // float
+ tuple.set(4, "hello world 1"); // string
+ tuple.set(5, new DataByteArray("hello byte 1")); // byte
+
+ // r1:record(f1:int, f2:long
+ tupRecord1.set(0, 1);
+ tupRecord1.set(1, 1001L);
+ tuple.set(6, tupRecord1);
+
+ // r2:record(r3:record(f3:float, f4))
+ tupRecord2.set(0, tupRecord3);
+ tupRecord3.set(0, 1.3);
+ tupRecord3.set(1, new DataByteArray("r3 row 1 byte array "));
+ tuple.set(7, tupRecord2);
+
+ // m1:map(string)
+ Map<String, String> m1 = new HashMap<String, String>();
+ m1.put("a", "A");
+ m1.put("b", "B");
+ m1.put("c", "C");
+ tuple.set(8, m1);
+
+ // m2:map(map(int))
+ HashMap<String, Map> m2 = new HashMap<String, Map>();
+ Map<String, Integer> m3 = new HashMap<String, Integer>();
+ m3.put("m311", 311);
+ m3.put("m321", 321);
+ m3.put("m331", 331);
+ Map<String, Integer> m4 = new HashMap<String, Integer>();
+ m4.put("m411", 411);
+ m4.put("m421", 421);
+ m4.put("m431", 431);
+ m2.put("x", m3);
+ m2.put("y", m4);
+ tuple.set(9, m2);
+
+ // c:collection(f13:double, f14:float, f15:bytes)
+ DataBag bagColl = TypesUtils.createBag();
+ Schema schColl = schema.getColumn(10).getSchema();
+ Tuple tupColl1 = TypesUtils.createTuple(schColl);
+ Tuple tupColl2 = TypesUtils.createTuple(schColl);
+ byte[] abs1 = new byte[3];
+ byte[] abs2 = new byte[4];
+ tupColl1.set(0, 3.1415926);
+ tupColl1.set(1, 1.6);
+ abs1[0] = 11;
+ abs1[1] = 12;
+ abs1[2] = 13;
+ tupColl1.set(2, new DataByteArray(abs1));
+ bagColl.add(tupColl1);
+ tupColl2.set(0, 123.456789);
+ tupColl2.set(1, 100);
+ abs2[0] = 21;
+ abs2[1] = 22;
+ abs2[2] = 23;
+ abs2[3] = 24;
+ tupColl2.set(2, new DataByteArray(abs2));
+ bagColl.add(tupColl2);
+ tuple.set(10, bagColl);
+
+ int row = 0;
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ // row 2
+ row++;
+ TypesUtils.resetTuple(tuple);
+ TypesUtils.resetTuple(tupRecord1);
+ TypesUtils.resetTuple(tupRecord2);
+ TypesUtils.resetTuple(tupRecord3);
+ m1.clear();
+ m2.clear();
+ m3.clear();
+ m4.clear();
+ tuple.set(0, false);
+ tuple.set(1, 2); // int
+ tuple.set(2, 1002L); // long
+ tuple.set(3, 3.1); // float
+ tuple.set(4, "hello world 2"); // string
+ tuple.set(5, new DataByteArray("hello byte 2")); // byte
+
+ // r1:record(f1:int, f2:long
+ tupRecord1.set(0, 2);
+ tupRecord1.set(1, 1002L);
+ tuple.set(6, tupRecord1);
+
+ // r2:record(r3:record(f3:float, f4))
+ tupRecord2.set(0, tupRecord3);
+ tupRecord3.set(0, 2.3);
+ tupRecord3.set(1, new DataByteArray("r3 row2 byte array"));
+ tuple.set(7, tupRecord2);
+
+ // m1:map(string)
+ m1.put("a2", "A2");
+ m1.put("b2", "B2");
+ m1.put("c2", "C2");
+ tuple.set(8, m1);
+
+ // m2:map(map(int))
+ m3.put("m321", 321);
+ m3.put("m322", 322);
+ m3.put("m323", 323);
+ m2.put("z", m3);
+ tuple.set(9, m2);
+
+ // c:collection(f13:double, f14:float, f15:bytes)
+ bagColl.clear();
+ TypesUtils.resetTuple(tupColl1);
+ TypesUtils.resetTuple(tupColl2);
+ tupColl1.set(0, 7654.321);
+ tupColl1.set(1, 0.0001);
+ abs1[0] = 31;
+ abs1[1] = 32;
+ abs1[2] = 33;
+ tupColl1.set(2, new DataByteArray(abs1));
+ bagColl.add(tupColl1);
+ tupColl2.set(0, 0.123456789);
+ tupColl2.set(1, 0.3333);
+ abs2[0] = 41;
+ abs2[1] = 42;
+ abs2[2] = 43;
+ abs2[3] = 44;
+ tupColl2.set(2, new DataByteArray(abs2));
+ bagColl.add(tupColl2);
+ tuple.set(10, bagColl);
+
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ inserter.close();
+ writer1.finish();
+
+ writer.close();
+ }
+
+ @AfterClass
+ public static void tearDownOnce() throws IOException {
+ BasicTable.drop(path, conf);
+ }
+
+ @Test
+ public void test1() throws IOException, ParseException {
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('r1.f2, s1');";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ BytesWritable key = new BytesWritable();
+ int row = 0;
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ row++;
+ if (row == 1) {
+ Assert.assertEquals(1001L, RowValue.get(0));
+ Assert.assertEquals(true, RowValue.get(1));
+ }
+ if (row == 2) {
+ Assert.assertEquals(1002L, RowValue.get(0));
+ Assert.assertEquals(false, RowValue.get(1));
+ }
+ }
+ }
+
+ @Test
+ public void testStitch() throws IOException, ParseException {
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('s1, r1');";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ BytesWritable key = new BytesWritable();
+ int row = 0;
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ row++;
+ if (row == 1) {
+ Tuple recordTuple = (Tuple) RowValue.get(1);
+ Assert.assertEquals(1, recordTuple.get(0));
+ Assert.assertEquals(1001L, recordTuple.get(1));
+ Assert.assertEquals(true, RowValue.get(0));
+ }
+
+ }
+ }
+}
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestRealCluster.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestRealCluster.java?rev=803312&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestRealCluster.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestRealCluster.java Tue Aug 11 22:27:44 2009
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.pig;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.types.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.Tuple;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Note:
+ *
+ * Make sure you add the build/pig-0.1.0-dev-core.jar to the Classpath of the
+ * app/debug configuration, when run this from inside the Eclipse.
+ *
+ */
+public class TestRealCluster {
+
+ public static void main(String[] args) throws IOException, ExecException,
+ Exception {
+
+ Configuration conf = new Configuration();
+ PigServer pigServer = new PigServer(ExecType.MAPREDUCE, ConfigurationUtil
+ .toProperties(conf));
+ String pp = "/user/harmeek/outputdata1/t1";
+ pigServer
+ .registerJar("/homes/harmeek/july14_investigation/pig-zebra-jul14.jar");
+ String query = "records = LOAD '" + pp.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
+ pigServer.registerQuery(query);
+
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ while (it.hasNext()) {
+ Tuple cur = it.next();
+ System.out.println(cur);
+ }
+
+ pigServer.shutdown();
+ /*
+ * Use pig STORE to store testing data
+ */
+ /*
+ * pigServer.store("records", new Path(pathTable, "store").toString(),
+ * TableStorer.class.getCanonicalName() +
+ * "('SF_a,SF_b,SF_c,SF_d,SF_e,SF_f,SF_g', '[SF_a, SF_b, SF_c]; [SF_e]')" );
+ */
+ }
+}
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestSimpleType.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestSimpleType.java?rev=803312&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestSimpleType.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestSimpleType.java Tue Aug 11 22:27:44 2009
@@ -0,0 +1,554 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.pig;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.StringTokenizer;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.pig.TableStorer;
+import org.apache.hadoop.zebra.types.ParseException;
+import org.apache.hadoop.zebra.types.Projection;
+import org.apache.hadoop.zebra.types.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.test.MiniCluster;
+import org.apache.pig.backend.executionengine.ExecJob;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ *
+ * Test projections on complicated column types.
+ *
+ */
+public class TestSimpleType {
+
+ final static String STR_SCHEMA = "s1:bool, s2:int, s3:long, s4:float, s5:string, s6:bytes";
+ final static String STR_STORAGE = "[s1, s2]; [s3, s4]; [s5, s6]";
+ private static Configuration conf;
+ private static FileSystem fs;
+
+ protected static ExecType execType = ExecType.MAPREDUCE;
+ private static MiniCluster cluster;
+ protected static PigServer pigServer;
+ private static Path path;
+
+ @BeforeClass
+ public static void setUpOnce() throws IOException {
+ System.out.println("ONCE SETUP !! ---------");
+ if (System.getProperty("hadoop.log.dir") == null) {
+ String base = new File(".").getPath(); // getAbsolutePath();
+ System
+ .setProperty("hadoop.log.dir", new Path(base).toString() + "./logs");
+ }
+
+ if (execType == ExecType.MAPREDUCE) {
+ cluster = MiniCluster.buildCluster();
+ pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+ } else {
+ pigServer = new PigServer(ExecType.LOCAL);
+ }
+
+ conf = new Configuration();
+ FileSystem fs = cluster.getFileSystem();
+ Path pathWorking = fs.getWorkingDirectory();
+ // path = new Path(pathWorking, this.getClass().getSimpleName());
+ path = fs.getWorkingDirectory();
+ System.out.println("path =" + path);
+
+ BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+ STR_STORAGE, false, conf);
+ Schema schema = writer.getSchema();
+
+ BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
+ int part = 0;
+ TableInserter inserter = writer1.getInserter("part" + part, true);
+
+ Tuple tuple = TypesUtils.createTuple(schema);
+ TypesUtils.resetTuple(tuple);
+
+ // insert data in row 1
+ int row = 0;
+ tuple.set(0, true); // bool
+ tuple.set(1, 1); // int
+ tuple.set(2, 1001L); // long
+ tuple.set(3, 1.1); // float
+ tuple.set(4, "hello world 1"); // string
+ tuple.set(5, new DataByteArray("hello byte 1")); // byte
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ row++;
+ TypesUtils.resetTuple(tuple);
+
+ tuple.set(0, false);
+ tuple.set(1, 2); // int
+ tuple.set(2, 1002L); // long
+ tuple.set(3, 3.1); // float
+ tuple.set(4, "hello world 2"); // string
+ tuple.set(5, new DataByteArray("hello byte 2")); // byte
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+
+ inserter.close();
+ writer1.finish();
+ writer.close();
+ }
+
+ @AfterClass
+ public static void tearDownOnce() throws IOException {
+ BasicTable.drop(path, conf);
+ }
+
+ /**
+ * Return the name of the routine that called getCurrentMethodName
+ *
+ */
+ public String getCurrentMethodName() {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ PrintWriter pw = new PrintWriter(baos);
+ (new Throwable()).printStackTrace(pw);
+ pw.flush();
+ String stackTrace = baos.toString();
+ pw.close();
+
+ StringTokenizer tok = new StringTokenizer(stackTrace, "\n");
+ tok.nextToken(); // 'java.lang.Throwable'
+ tok.nextToken(); // 'at ...getCurrentMethodName'
+ String l = tok.nextToken(); // 'at ...<caller to getCurrentRoutine>'
+ // Parse line 3
+ tok = new StringTokenizer(l.trim(), " <(");
+ String t = tok.nextToken(); // 'at'
+ t = tok.nextToken(); // '...<caller to getCurrentRoutine>'
+ return t;
+ }
+
+ // @Test
+ public void testReadSimpleStitch() throws IOException, ParseException {
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('s5,s1');";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ BytesWritable key = new BytesWritable();
+ int row = 0;
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ row++;
+ if (row == 1) {
+ Assert.assertEquals("hello world 1", RowValue.get(0));
+ Assert.assertEquals(true, RowValue.get(1));
+ }
+ if (row == 2) {
+ Assert.assertEquals("hello world 2", RowValue.get(0));
+ Assert.assertEquals(false, RowValue.get(1));
+ }
+ }
+ }
+
+ // @Test
+ // Test reader
+ public void testReadSimple1() throws IOException, ParseException {
+ String query = "records = LOAD '"
+ + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('s6,s5,s4,s3,s2,s1');";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ BytesWritable key = new BytesWritable();
+ int row = 0;
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ row++;
+ if (row == 1) {
+ // Assert.assertEquals(key, new
+ // BytesWritable("k11".getBytes()));
+ Assert.assertEquals(true, RowValue.get(5));
+ Assert.assertEquals(1, RowValue.get(4));
+ Assert.assertEquals(1001L, RowValue.get(3));
+ Assert.assertEquals(1.1, RowValue.get(2));
+ Assert.assertEquals("hello world 1", RowValue.get(1));
+ Assert.assertEquals("hello byte 1", RowValue.get(0).toString());
+ }
+ if (row == 2) {
+ Assert.assertEquals(false, RowValue.get(5));
+ Assert.assertEquals(2, RowValue.get(4));
+ Assert.assertEquals(1002L, RowValue.get(3));
+ Assert.assertEquals(3.1, RowValue.get(2));
+ Assert.assertEquals("hello world 2", RowValue.get(1));
+ Assert.assertEquals("hello byte 2", RowValue.get(0).toString());
+ }
+ }
+ }
+
+ // @Test
+ // Test reader, negative. not exist field in the projection
+ public void testRead2() throws IOException, ParseException {
+ try {
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('s7');";
+ Assert.fail("Project should not take non-existent fields");
+ } catch (Exception e) {
+ System.out.println(e);
+ }
+
+ }
+
+ @Test
+ // Store same table
+ public void testStorer() throws ExecException, IOException {
+ /*
+ * Use pig LOAD to load testing data for store
+ */
+ String query = "records = LOAD '"
+ + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);";
+ pigServer.registerQuery(query);
+
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ }
+
+ /*
+ * Use pig STORE to store testing data
+ */
+ Path newPath = new Path(getCurrentMethodName());
+ pigServer
+ .store(
+ "records",
+ new Path(newPath, "store").toString(),
+ TableStorer.class.getCanonicalName()
+ + "('s1:bool, s2:int, s3:long, s4:float, s5:string, s6:bytes', '[s1, s2]; [s3, s4]')");
+
+ }
+
+ @Test
+ // store different records, second row of the previous table
+ public void testStorer2() throws ExecException, IOException {
+ // Load original table
+ String query = "records = LOAD '"
+ + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+
+ // filter the original table
+ String query2 = "newRecord = FILTER records BY (s2 >= 2);";
+ pigServer.registerQuery(query2);
+
+ // store the new records to new table
+ Path newPath = new Path(getCurrentMethodName());
+ pigServer
+ .store(
+ "newRecord",
+ newPath.toString(),
+ TableStorer.class.getCanonicalName()
+ + "('s1:bool, s2:int, s3:long, s4:float, s5:string, s6:bytes', '[s1, s2]; [s3, s4]')");
+
+ // check new table content
+ String query3 = "newRecords = LOAD '"
+ + newPath.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('s6,s5,s4,s3,s2,s1');";
+ System.out.println(query3);
+ // newRecords = LOAD
+ // 'org.apache.hadoop.zebra.pig.TestSimpleType.testStorer2' USING
+ // org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);
+ pigServer.registerQuery(query3);
+
+ Iterator<Tuple> it3 = pigServer.openIterator("newRecords");
+ // BytesWritable key2 = new BytesWritable();
+ int row = 0;
+ Tuple RowValue2 = null;
+ while (it3.hasNext()) {
+ // Last row value
+ RowValue2 = it3.next();
+ row++;
+ if (row == 1) {
+ Assert.assertEquals(false, RowValue2.get(5));
+ Assert.assertEquals(2, RowValue2.get(4));
+ Assert.assertEquals(1002L, RowValue2.get(3));
+ Assert.assertEquals(3.1, RowValue2.get(2));
+ Assert.assertEquals("hello world 2", RowValue2.get(1));
+ Assert.assertEquals("hello byte 2", RowValue2.get(0).toString());
+ }
+ }
+ Assert.assertEquals(1, row);
+ }
+
+ @Test
+ // store different records, with storage hint is empty
+ public void testStorer3() throws ExecException, IOException {
+ // Load original table
+ String query = "records = LOAD '"
+ + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+
+ // filter the original table
+ String query2 = "newRecord = FILTER records BY (s2 >= 2);";
+ pigServer.registerQuery(query2);
+
+ // store the new records to new table
+ Path newPath = new Path(getCurrentMethodName());
+ pigServer.store("newRecord", newPath.toString(), TableStorer.class
+ .getCanonicalName()
+ + "('s1:bool, s2:int, s3:long, s4:float, s5:string, s6:bytes', '')");
+
+ // check new table content
+ String query3 = "newRecords = LOAD '"
+ + newPath.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('s6,s5,s4,s3,s2,s1');";
+ System.out.println(query3);
+ // newRecords = LOAD
+ // 'org.apache.hadoop.zebra.pig.TestSimpleType.testStorer2' USING
+ // org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);
+ pigServer.registerQuery(query3);
+
+ Iterator<Tuple> it3 = pigServer.openIterator("newRecords");
+ // BytesWritable key2 = new BytesWritable();
+ int row = 0;
+ Tuple RowValue2 = null;
+ while (it3.hasNext()) {
+ // Last row value
+ RowValue2 = it3.next();
+ row++;
+ if (row == 1) {
+ Assert.assertEquals(false, RowValue2.get(5));
+ Assert.assertEquals(2, RowValue2.get(4));
+ Assert.assertEquals(1002L, RowValue2.get(3));
+ Assert.assertEquals(3.1, RowValue2.get(2));
+ Assert.assertEquals("hello world 2", RowValue2.get(1));
+ Assert.assertEquals("hello byte 2", RowValue2.get(0).toString());
+ }
+ }
+ Assert.assertEquals(1, row);
+ }
+
+ @Test
+ // store different records, with column group is empty
+ public void testStorer4() throws ExecException, IOException {
+ // Load original table
+ String query = "records = LOAD '"
+ + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);";
+ System.out.println(query);
+ pigServer.registerQuery(query);
+
+ // filter the original table
+ String query2 = "newRecord = FILTER records BY (s2 >= 2);";
+ pigServer.registerQuery(query2);
+
+ // store the new records to new table
+ Path newPath = new Path(getCurrentMethodName());
+ pigServer.store("newRecord", newPath.toString(), TableStorer.class
+ .getCanonicalName()
+ + "('s1:bool, s2:int, s3:long, s4:float, s5:string, s6:bytes', '[]')");
+
+ // check new table content
+ String query3 = "newRecords = LOAD '"
+ + newPath.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader('s6,s5,s4,s3,s2,s1');";
+ System.out.println(query3);
+ // newRecords = LOAD
+ // 'org.apache.hadoop.zebra.pig.TestSimpleType.testStorer2' USING
+ // org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);
+ pigServer.registerQuery(query3);
+
+ Iterator<Tuple> it3 = pigServer.openIterator("newRecords");
+ // BytesWritable key2 = new BytesWritable();
+ int row = 0;
+ Tuple RowValue2 = null;
+ while (it3.hasNext()) {
+ // Last row value
+ RowValue2 = it3.next();
+ row++;
+ if (row == 1) {
+ Assert.assertEquals(false, RowValue2.get(5));
+ Assert.assertEquals(2, RowValue2.get(4));
+ Assert.assertEquals(1002L, RowValue2.get(3));
+ Assert.assertEquals(3.1, RowValue2.get(2));
+ Assert.assertEquals("hello world 2", RowValue2.get(1));
+ Assert.assertEquals("hello byte 2", RowValue2.get(0).toString());
+ }
+ }
+ Assert.assertEquals(1, row);
+ }
+
+ @Test
+ // negative, schema description is different from input tuple, less column
+ // numbers
+ public void testStorerNegative1() throws ExecException, IOException {
+
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
+ pigServer.registerQuery(query);
+
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ }
+
+ Path newPath = new Path(getCurrentMethodName());
+ ExecJob pigJob = pigServer
+ .store(
+ "records",
+ new Path(newPath, "store").toString(),
+ TableStorer.class.getCanonicalName()
+ + "('s2:int, s3:long, s4:float, s5:string, s6:bytes', '[s1, s2]; [s3, s4]')");
+ Assert.assertNotNull(pigJob.getException());
+ System.out.println(pigJob.getException());
+ }
+
+ @Test
+ // negative, storage hint duplicate the columns
+ public void testStorerNegative2() throws ExecException, IOException {
+
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
+ pigServer.registerQuery(query);
+
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ }
+
+ Path newPath = new Path(getCurrentMethodName());
+
+ ExecJob pigJob = pigServer
+ .store(
+ "records",
+ new Path(newPath, "store").toString(),
+ TableStorer.class.getCanonicalName()
+ + "('s1:bool, s2:int, s3:long, s4:float, s5:string, s6:bytes', '[s1, s2]; [s1, s4]')");
+ Assert.assertNotNull(pigJob.getException());
+ System.out.println(pigJob.getException());
+ }
+
+ @Test
+ // negative, storage hint duplicate the column groups
+ public void testStorerNegative3() throws ExecException, IOException {
+
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
+ pigServer.registerQuery(query);
+
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ }
+
+ Path newPath = new Path(getCurrentMethodName());
+
+ ExecJob pigJob = pigServer
+ .store(
+ "records",
+ new Path(newPath, "store").toString(),
+ TableStorer.class.getCanonicalName()
+ + "('s1:bool, s2:int, s3:long, s4:float, s5:string, s6:bytes', '[s1]; [s1]')");
+ Assert.assertNotNull(pigJob.getException());
+ System.out.println(pigJob.getException());
+ }
+
+ // @Test
+ // negative, schema description is different from input tuple, different
+ // data types for columns
+ public void testStorerNegative4() throws ExecException, IOException {
+
+ String query = "records = LOAD '" + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
+ pigServer.registerQuery(query);
+
+ Iterator<Tuple> it = pigServer.openIterator("records");
+ while (it.hasNext()) {
+ Tuple RowValue = it.next();
+ System.out.println(RowValue);
+ }
+
+ Path newPath = new Path(getCurrentMethodName());
+ ExecJob pigJob = pigServer
+ .store(
+ "records",
+ new Path(newPath, "store").toString(),
+ TableStorer.class.getCanonicalName()
+ + "('s1:int, s2:int, s3:long, s4:float, s5:string, s6:bytes', '[s1, s2]; [s3, s4]')");
+ Assert.assertNotNull(pigJob.getException());
+ System.out.println(pigJob.getException());
+ }
+
+ @Test
+ // Store negative, store to same path. Store should fail
+ public void testStorer5() throws ExecException, IOException {
+ /*
+ * Use pig LOAD to load testing data for store
+ */
+ String query = "records = LOAD '"
+ + path.toString()
+ + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);";
+ pigServer.registerQuery(query);
+
+ /*
+ * Use pig STORE to store testing data
+ */
+
+ ExecJob pigJob = pigServer
+ .store(
+ "records",
+ path.toString(),
+ TableStorer.class.getCanonicalName()
+ + "('s1:bool, s2:int, s3:long, s4:float, s5:string, s6:bytes', '[s1, s2]; [s3, s4]')");
+ Assert.assertNotNull(pigJob.getException());
+ System.out.println(pigJob.getException());
+ }
+
+}