You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2009/11/24 20:54:34 UTC
svn commit: r883836 [8/23] - in /hadoop/pig/branches/load-store-redesign: ./ contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/ contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/ contrib/zebra/ contrib/zebra...

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDropColumnGroup.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDropColumnGroup.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDropColumnGroup.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDropColumnGroup.java Tue Nov 24 19:54:19 2009
@@ -27,6 +27,7 @@
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.zebra.parser.ParseException;
 
@@ -48,6 +49,7 @@
   Log LOG = LogFactory.getLog(TestDropColumnGroup.class);
   private static Path path;
   private static Configuration conf;
+  private static FileSystem fs;
 
   @BeforeClass
   public static void setUpOnce() throws IOException {
@@ -55,7 +57,7 @@
     path = new Path(TestBasicTable.rootPath, "DropCGTest");
     conf = TestBasicTable.conf;
     Log LOG = LogFactory.getLog(TestDropColumnGroup.class);
-
+    fs = path.getFileSystem(conf);
   }
 
   @AfterClass
@@ -114,10 +116,13 @@
      * columns can be read the value returned is null.
      */
 
-    BasicTable.drop(path, conf);
-
+    if (fs.exists(path)) {
+      BasicTable.drop(path, conf);
+    }
+    
     int numRows = TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
-        "[a, b]; [c, d]", path, true, false);
+                                                  "[a, b]; [c, d]", null,
+                                                  path, true);
 
     int rowsToRead = Math.min(10, numRows);
 
@@ -159,8 +164,9 @@
      */
 
     // 5 splits and 50 rows
-    numRows = TestBasicTable.createBasicTable(5, 50, "a, b, c, d, e, f",
-        "[a, b]; [c, d]; [e] as myCG", path, true, false);
+    numRows =  TestBasicTable.createBasicTable(5, 50, "a, b, c, d, e, f",
+                                               "[a, b]; [c, d]; [e] as myCG",
+                                               null, path, true);
 
     BasicTable.dropColumnGroup(path, conf, "myCG");
 
@@ -180,7 +186,8 @@
   }
 
   @Test
-  public void testDropColumnGroupsMixedTypes() throws IOException, ParseException {
+  public void testDropColumnGroupsMixedTypes() throws IOException,
+      ParseException {
 
     String mixedSchema = /* roughly borrowed from testMixedType1.java */
     "s1:bool, s2:int, s3:long, s4:float, s5:string, s6:bytes, "
@@ -195,15 +202,16 @@
         + "[c]                           as collectionCG; "  
         + "[r1.f2, m1#{b}, m2#{z}]       as mapRecordCG; ";
 
-    Path path = new Path(TestBasicTable.rootPath, "DropCGTest");
+    //Path path = new Path(TestBasicTable.rootPath, "DropCGTest");
     Configuration conf = TestBasicTable.conf;
     conf.set("fs.default.name", "file:///");
-
-    BasicTable.drop(path, conf);
-
+    if (fs.exists(path)) {
+      BasicTable.drop(path, conf);
+    }
+    
     // first write the table :
     BasicTable.Writer writer = new BasicTable.Writer(path, mixedSchema,
-        mixedStorageHint, false, conf);
+        mixedStorageHint, conf);
     writer.finish();
 
     Schema schema = writer.getSchema();
@@ -394,25 +402,35 @@
     /*
      * Tests concurrent drop CGs
      */
+    if (fs.exists(path)) {
+      BasicTable.drop(path, conf);
+    }
 
-    BasicTable.drop(path, conf);
-
-    int numRows = TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
-        "[a];[b];[c];[d];[e];[f]", path, true, false);
-    System.out.println("Frist dump:");
+    int numRows = TestBasicTable.createBasicTable(1, 10, "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10," +
+    		                                                 "f11,f12,f13,f14,f15,f16,f17,f18,f19,f20," +
+    		                                                 "f21,f22,f23,f24,f25,f26,f27,f28,f29,f30," +
+    		                                                 "f31,f32,f33,f34,f35,f36,f37,f38,f39,f40," +
+    		                                                 "f41,f42,f43,f44,f45,f46,f47,f48,f49,f50",
+      "[f1];[f2];[f3];[f4];[f5];[f6];[f7];[f8];[f9];[f10];" +
+      "[f11];[f12];[f13];[f14];[f15];[f16];[f17];[f18];[f19];[f20];" +
+      "[f21];[f22];[f23];[f24];[f25];[f26];[f27];[f28];[f29];[f30];" +
+      "[f31];[f32];[f33];[f34];[f35];[f36];[f37];[f38];[f39];[f40];" +
+      "[f41];[f42];[f43];[f44];[f45];[f46];[f47];[f48];[f49];[f50]",
+      null, path, true);
+    
+    System.out.println("First dump:");
     BasicTable.dumpInfo(path.toString(), System.out, conf);
     int rowsToRead = Math.min(10, numRows);
 
     // normal table.
-    verifyScanner(path, conf, "a, c, x", new boolean[] { false, false, true },
+    verifyScanner(path, conf, "f1, f3, xx", new boolean[] { false, false, true },
         rowsToRead);
 
     // create a thread for each dropCG
-    DropThread[] threads = new DropThread[6];
+    DropThread[] threads = new DropThread[50];
 
     for (int i = 0; i < threads.length; i++) {
-
-      threads[i] = new DropThread(i);
+      threads[i] = new DropThread(i, 50);
     }
 
     // start the threads
@@ -430,17 +448,16 @@
 
     // check various read cases.
 
-    verifyScanner(path, conf, "c, a, b, f, d, e", new boolean[] { true, true,
+    verifyScanner(path, conf, "f3, f1, f2, f6, f4, f5", new boolean[] { true, true,
         true, true, true, true }, rowsToRead);
     System.out.println("second dump");
     BasicTable.dumpInfo(path.toString(), System.out, conf);
 
     // Now make sure the reader reports zero rows.
-    Assert.assertTrue(countRows(path, conf, "c, e, b") == 0);
+    Assert.assertTrue(countRows(path, conf, "f3, f5, f2") == 0);
 
     // delete the table
     BasicTable.drop(path, conf);
-
   }
 
   @Test
@@ -448,25 +465,35 @@
     /*
      * Tests concurrrent drop CGs while one fails
      */
+    if (fs.exists(path)) {
+      BasicTable.drop(path, conf);
+    }
 
-    BasicTable.drop(path, conf);
-
-    int numRows = TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
-        "[a];[b];[c];[d];[e];[f]", path, true, false);
-    System.out.println("Frist dump:");
+    int numRows = TestBasicTable.createBasicTable(1, 10, "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10," +
+        "f11,f12,f13,f14,f15,f16,f17,f18,f19,f20," +
+        "f21,f22,f23,f24,f25,f26,f27,f28,f29,f30," +
+        "f31,f32,f33,f34,f35,f36,f37,f38,f39,f40," +
+        "f41,f42,f43,f44,f45,f46,f47,f48,f49,f50",
+        "[f1];[f2];[f3];[f4];[f5];[f6];[f7];[f8];[f9];[f10];" +
+        "[f11];[f12];[f13];[f14];[f15];[f16];[f17];[f18];[f19];[f20];" +
+        "[f21];[f22];[f23];[f24];[f25];[f26];[f27];[f28];[f29];[f30];" +
+        "[f31];[f32];[f33];[f34];[f35];[f36];[f37];[f38];[f39];[f40];" +
+        "[f41];[f42];[f43];[f44];[f45];[f46];[f47];[f48];[f49];[f50]",
+        null, path, true);
+    
+    System.out.println("First dump:");
     BasicTable.dumpInfo(path.toString(), System.out, conf);
     int rowsToRead = Math.min(10, numRows);
 
     // normal table.
-    verifyScanner(path, conf, "a, c, x", new boolean[] { false, false, true },
+    verifyScanner(path, conf, "f1, f3, xx", new boolean[] { false, false, true },
         rowsToRead);
 
     // create a thread for each dropCG
-    DropThread[] threads = new DropThread[7];
+    DropThread[] threads = new DropThread[60];
 
     for (int i = 0; i < threads.length; i++) {
-
-      threads[i] = new DropThread(i);
+      threads[i] = new DropThread(i, 50);
     }
 
     // start the threads
@@ -478,18 +505,19 @@
       try {
         thr.join();
       } catch (InterruptedException e) {
+        e.printStackTrace();
       }
     }
 
     // check various read cases.
 
-    verifyScanner(path, conf, "c, a, b, f, d, e", new boolean[] { true, true,
+    verifyScanner(path, conf, "f3, f1, f2, f6, f4, f5", new boolean[] { true, true,
         true, true, true, true }, rowsToRead);
     System.out.println("second dump");
     BasicTable.dumpInfo(path.toString(), System.out, conf);
 
     // Now make sure the reader reports zero rows.
-    Assert.assertTrue(countRows(path, conf, "c, e, b") == 0);
+    Assert.assertTrue(countRows(path, conf, "f3, f5, f2") == 0);
 
     // delete the table
     BasicTable.drop(path, conf);
@@ -502,10 +530,13 @@
      */
 
     System.out.println("######int test 5");
-    BasicTable.drop(path, conf);
+
+    if (fs.exists(path)) {
+      BasicTable.drop(path, conf);
+    }
 
     int numRows = TestBasicTable.createBasicTable(1, 100000,
-        "a, b, c, d, e, f", "[a, b]; [c, d]", path, true, false);
+        "a, b, c, d, e, f, g, h, i, j, k, l, m, n", "[a, b]; [c, d]; [e]; [f]; [g]; [h]; [i]; [j]; [k]; [l]; [m]; [n]", null, path, true);
 
     System.out.println("in test5 , dump infor 1");
     BasicTable.dumpInfo(path.toString(), System.out, conf);
@@ -519,23 +550,21 @@
         rowsToRead);
 
     // create a thread for each dropCG
-    DropThread[] dropThreads = new DropThread[3];
+    DropThread[] dropThreads = new DropThread[12];
 
     for (int i = 0; i < dropThreads.length; i++) {
-
-      dropThreads[i] = new DropThread(i);
+      dropThreads[i] = new DropThread(i, 12);
     }
 
     // start the threads
     for (int j = 0; j < dropThreads.length; j++) {
       dropThreads[j].start();
     }
-
+    
     // create read threads
     ReadThread[] readThreads = new ReadThread[numOfReadThreads];
 
     for (int i = 0; i < readThreads.length; i++) {
-
       readThreads[i] = new ReadThread(i, "a, b, c, d, e, f", 1000);
     }
 
@@ -551,6 +580,7 @@
         e.printStackTrace();
       }
     }
+    
     for (Thread thr : readThreads) {
       try {
         thr.join();
@@ -569,7 +599,6 @@
 
     // delete the table
     BasicTable.drop(path, conf);
-
   }
 
   @Test
@@ -593,11 +622,12 @@
     /*
      * Tests API, path is wrong
      */
-
-    BasicTable.drop(path, conf);
-
+    if (fs.exists(path)) {
+      BasicTable.drop(path, conf);
+    }
+    
     TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
-        "[a];[b];[c];[d];[e];[f]", path, true, false);
+        "[a];[b];[c];[d];[e];[f]", null, path, true);
     Path wrongPath = new Path(path.toString() + "non-existing");
     try {
       BasicTable.dropColumnGroup(wrongPath, conf, "CG0");
@@ -616,7 +646,7 @@
 
     Path path1 = new Path(path.toString() + "13");
     TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
-        "[a];[b];[c];[d];[e];[f]", path1, true, false);
+        "[a];[b];[c];[d];[e];[f]", null, path1, true);
     try {
       BasicTable.dropColumnGroup(path1, null, "CG0");
       Assert.fail("should throw excepiton");
@@ -634,7 +664,7 @@
 
     Path path1 = new Path(path.toString() + "14");
     TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
-        "[a];[b];[c];[d];[e];[f]", path1, true, false);
+        "[a];[b];[c];[d];[e];[f]", null, path1, true);
     try {
       BasicTable.dropColumnGroup(path1, conf, "");
       Assert.fail("should throw excepiton");
@@ -653,7 +683,7 @@
     Path path1 = new Path(path.toString() + "15");
 
     TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
-        "[a];[b];[c];[d];[e];[f]", path1, true, false);
+        "[a];[b];[c];[d];[e];[f]", null, path1, true);
     try {
       BasicTable.dropColumnGroup(path1, conf, null);
       Assert.fail("should throw excepiton");
@@ -672,7 +702,7 @@
     Path path1 = new Path(path.toString() + "16");
 
     int numRows = TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f",
-        "[a, b]; [c, d]", path1, true, false);
+        "[a, b]; [c, d]", null, path1, true);
 
     int rowsToRead = Math.min(10, numRows);
 
@@ -704,7 +734,7 @@
 
     Path path1 = new Path(path.toString() + "17");
     TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f", "[a,b,c,d,e,f]",
-        path1, true, false);
+        null, path1, true);
 
     BasicTable.dropColumnGroup(path1, conf, "CG0");
 
@@ -730,17 +760,17 @@
     BasicTable.drop(path1, conf);
   }
 
-  /**
+/**
    * A thread that performs a DropColumnGroup.
    */
   class DropThread extends Thread {
 
     private int id;
+    private int cntCGs;
 
-    public DropThread(int id) {
-
+    public DropThread(int id, int cntCGs) {
       this.id = id;
-
+      this.cntCGs = cntCGs;
     }
 
     /**
@@ -748,13 +778,22 @@
      */
     public void run() {
       try {
-        System.out.println("Droping CG: " + id);
-        BasicTable.dropColumnGroup(path, conf, "CG" + id);
+        int total = cntCGs;
+        int digits = 1;
+        while (total >= 10) {
+          ++ digits;
+          total /= 10;
+        }
+        String formatString = "%0" + digits + "d";
+        String str = "CG"  + String.format(formatString, id);
+
+        System.out.println(id + ": Droping CG: " + str);
+        BasicTable.dropColumnGroup(path, conf, str);
       } catch (Exception e) {
         System.out.println(id + " - error: " + e);
+        e.printStackTrace();
       }
     }
-
   }
 
   /**

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDuplicateMapKeyInDifferentCGs.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDuplicateMapKeyInDifferentCGs.java?rev=883836&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDuplicateMapKeyInDifferentCGs.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestDuplicateMapKeyInDifferentCGs.java Tue Nov 24 19:54:19 2009
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.io;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import junit.framework.Assert;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.data.Tuple;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * 
+ * Test projections on complicated column types.
+ * 
+ */
+public class TestDuplicateMapKeyInDifferentCGs {
+
+  final static String STR_SCHEMA = "m1:map(string),m2:map(map(int))";
+  final static String STR_STORAGE = "[m1#{a}, m2#{x}];[m2#{x|y}]; [m1#{b}, m2#{z}];[m1,m2]";
+  private static Configuration conf;
+  private static Path path;
+  private static FileSystem fs;
+
+  @BeforeClass
+  public static void setUpOnce() throws IOException {
+    conf = new Configuration();
+    conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+    conf.setInt("table.input.split.minSize", 64 * 1024);
+    conf.set("table.output.tfile.compression", "none");
+
+    RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+    fs = new LocalFileSystem(rawLFS);
+    path = new Path(fs.getWorkingDirectory(), "TestMap");
+    fs = path.getFileSystem(conf);
+    // drop any previous tables
+    BasicTable.drop(path, conf);
+  }
+
+  @AfterClass
+  public static void tearDownOnce() throws IOException {
+  }
+
+ 
+  @Test
+  public void testRead1() throws IOException, ParseException {
+    try {
+      BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+            STR_STORAGE, conf);
+      Assert.fail("duplicate keys in different column groups should throw an exception!");
+    } catch (IOException e) {
+      return;
+    }
+  }
+}

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java Tue Nov 24 19:54:19 2009
@@ -77,7 +77,7 @@
     // drop any previous tables
     BasicTable.drop(path, conf);
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
     Schema schema = writer.getSchema();
     Tuple tuple = TypesUtils.createTuple(schema);
@@ -269,15 +269,15 @@
     Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
     scanner.getValue(RowValue);
     System.out.println("read1 : " + RowValue.toString());
-    Assert.assertEquals("{nonexist=null}", RowValue.get(0).toString());
+    Assert.assertEquals("{}", RowValue.get(0).toString());
 
     scanner.advance();
     scanner.getKey(key);
     Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
     scanner.getValue(RowValue);
     System.out.println(RowValue.get(0).toString());
-    Assert.assertEquals("{nonexist=null}", RowValue.get(0).toString());
+    Assert.assertEquals("{}", RowValue.get(0).toString());
 
     reader.close();
   }
-}
\ No newline at end of file
+}

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java Tue Nov 24 19:54:19 2009
@@ -87,7 +87,7 @@
     // drop any previous tables
     BasicTable.drop(path, conf);
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
     Schema schema = writer.getSchema();
     Tuple tuple = TypesUtils.createTuple(schema);
@@ -388,4 +388,4 @@
     reader.close();
 
   }
-}
\ No newline at end of file
+}

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapSplitSchemaStorageColumnOutOfOrder.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapSplitSchemaStorageColumnOutOfOrder.java?rev=883836&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapSplitSchemaStorageColumnOutOfOrder.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapSplitSchemaStorageColumnOutOfOrder.java Tue Nov 24 19:54:19 2009
@@ -0,0 +1,321 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownershcolumn3. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.io;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import junit.framework.Assert;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * 
+ * Test projections on complicated column types: Jira 1026
+ * 
+ */
+public class TestMapSplitSchemaStorageColumnOutOfOrder {
+  final static String STR_SCHEMA = "column1:bytes,column2:bytes, column3:bytes,column4:bytes,column5:map(String),column6:map(String),column7:map(String),column8:collection(f1:map(String))";   
+  final static String STR_STORAGE = "[column1,column2,column3,column4];[column5#{key51|key52|key53|key54|key55|key56},column7#{key71|key72|key73|key74|key75}];[column5,column7,column6];[column8]";
+  private static Configuration conf;
+  private static Path path;
+  private static FileSystem fs;
+
+  @BeforeClass
+  public static void setUpOnce() throws IOException {
+    conf = new Configuration();
+    conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+    conf.setInt("table.input.split.minSize", 64 * 1024);
+    conf.set("table.output.tfile.compression", "none");
+
+    RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+    fs = new LocalFileSystem(rawLFS);
+    path = new Path(fs.getWorkingDirectory(), "TestMapSplitSchemaStorageColumnOutOfOrder");
+    fs = path.getFileSystem(conf);
+    // drop any previous tables
+    BasicTable.drop(path, conf);
+    BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+        STR_STORAGE, conf);
+    writer.finish();
+    Schema schema = writer.getSchema();
+    Tuple tuple = TypesUtils.createTuple(schema);
+    BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
+    int part = 0;
+    TableInserter inserter = writer1.getInserter("part" + part, true);
+    TypesUtils.resetTuple(tuple);
+
+    DataBag bag2 = TypesUtils.createBag();
+    Schema schColl2 = schema.getColumn(7).getSchema();
+    Tuple tupColl2_1 = TypesUtils.createTuple(schColl2);
+    Tuple tupColl2_2 = TypesUtils.createTuple(schColl2);
+    // add data to row 1
+    tuple.set(0, new DataByteArray("column1 row 1 ")); // byte
+    tuple.set(1,new DataByteArray("column2 row 1"));
+    tuple.set(2, new DataByteArray("column3 row 1"));
+    tuple.set(3, new DataByteArray("column4 row 1"));
+    
+    // column5
+    Map<String, String> column5 = new HashMap<String, String>();
+    column5.put("key51", "key511");
+    column5.put("key52", "key521");
+    column5.put("key53", "key531");
+    column5.put("key54", "key541");
+    column5.put("key55", "key551");
+    column5.put("key56", "key561");
+    column5.put("key57", "key571");
+   
+    tuple.set(4, column5);
+
+    //column5:map(bytes),column6:map(bytes),column7:map(bytes),column8:collection(f1:map(bytes)
+    //column7:map(String, column6:map(String)
+    HashMap<String, String> column7 = new HashMap<String, String>();
+    HashMap<String, String> column6 = new HashMap<String, String>();
+    column6.put("column61", "column61");
+    column7.put("key71", "key711");
+    column7.put("key72", "key721");
+    column7.put("key73", "key731");
+    column7.put("key74", "key741");
+    column7.put("key75", "key751");
+    tuple.set(6, column7);
+    tuple.set(5, column6);
+    
+    //column8:collection(f1:map(bytes))
+    HashMap<String, String> mapInCollection = new HashMap<String, String>();
+    mapInCollection.put("mc", "mc1");
+    tupColl2_1.set(0, mapInCollection);
+    bag2.add(tupColl2_1);
+
+    tupColl2_2.set(0, mapInCollection);
+    bag2.add(tupColl2_2);
+    tuple.set(7, bag2);
+
+    int row = 0;
+    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+        .getBytes()), tuple);
+
+    // row 2
+    row++;
+    TypesUtils.resetTuple(tuple);
+    column5.clear();
+    column7.clear();
+    column6.clear();
+    mapInCollection.clear();
+    bag2.clear();
+    TypesUtils.resetTuple(tupColl2_1);
+    TypesUtils.resetTuple(tupColl2_2);
+    
+    tuple.set(0, new DataByteArray("column1 row 2 ")); // byte
+    tuple.set(1,new DataByteArray("column2 row 2"));
+    tuple.set(2, new DataByteArray("column3 row 2"));
+    tuple.set(3, new DataByteArray("column4 row 2"));
+    
+    // column5
+    column5.put("key51", "key512");
+    column5.put("key52", "key522");
+    column5.put("key53", "key532");
+    column5.put("key54", "key542");
+    column5.put("key55", "key552");
+    column5.put("key56", "key562");
+    column5.put("key57", "key572");
+    tuple.set(4, column5);
+
+    // column6
+  
+    column6.put("column6", "column62");
+    column7.put("key71", "key712");
+    column7.put("key72", "key722");
+    column7.put("key73", "key732");
+    column7.put("key74", "key742");
+    column7.put("key75", "key752");
+    tuple.set(6, column7);
+    tuple.set(5, column6);
+    
+    
+    //column8
+    //column8:collection(f1:map(bytes))
+    mapInCollection.put("mc", "mc2");
+    tupColl2_1.set(0, mapInCollection);
+    bag2.add(tupColl2_1);
+
+    tupColl2_2.set(0, mapInCollection);
+    bag2.add(tupColl2_2);
+    tuple.set(7, bag2);
+
+    
+    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+        .getBytes()), tuple);
+
+    // finish building table, closing out the inserter, writer, writer1
+    inserter.close();
+    writer1.finish();
+    writer.close();
+  }
+
+  @AfterClass
+  public static void tearDownOnce() throws IOException {
+    BasicTable.drop(path, conf);
+  }
+
+ 
+  @Test
+  public void testRead1() throws IOException, ParseException {
+    /*
+     * read one map
+     *  column5.put("key51", "key511");
+     */
+    String projection = new String("column5#{key51}");
+    BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+    reader.setProjection(projection);
+    List<RangeSplit> splits = reader.rangeSplit(1);
+    TableScanner scanner = reader.getScanner(splits.get(0), true);
+    BytesWritable key = new BytesWritable();
+    Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());
+
+    scanner.getKey(key);
+    Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+    scanner.getValue(RowValue);
+    System.out.println("read1 : " + RowValue.toString());
+    Assert.assertEquals("{key51=key511}", RowValue.get(0).toString());
+
+    scanner.advance();
+    scanner.getKey(key);
+    Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
+    scanner.getValue(RowValue);
+    System.out.println(RowValue.get(0).toString());
+    Assert.assertEquals("{key51=key512}", RowValue.get(0).toString());
+
+    reader.close();
+  }
+
+ 
+  @Test
+  public void testRead2() throws IOException, ParseException {
+    /*
+     * read map , stitch
+     * [column5#{key51|key52|key53|key54|key55|key56},column7#{key71|key72|key73|key74|key75}];[column5,column7,column6]
+     */
+    String projection2 = new String("column5#{new}, column7#{key71|ytestid}");
+    BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+//    reader.setProjection(projection2);
+    List<RangeSplit> splits = reader.rangeSplit(1);
+    TableScanner scanner = reader.getScanner(splits.get(0), true);
+    BytesWritable key = new BytesWritable();
+    Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());
+    scanner.getKey(key);
+    Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+    scanner.getValue(RowValue);
+    System.out.println("map of map: " + RowValue.toString());
+    Assert.assertEquals("key571", ((Map) RowValue.get(4)).get("key57"));
+    Assert.assertEquals("key711", ((Map) RowValue.get(6)).get("key71"));
+       
+    Assert.assertEquals(null, (((Map) RowValue.get(6)).get("ytestid")));
+    Assert.assertEquals(null, ((Map) ((Map) RowValue.get(6)).get("x")));
+    System.out.println("rowValue.get)1): " + RowValue.get(6).toString());
+    // rowValue.get)1): {z=null, x={m311=311, m321=321, m331=331}}
+
+    scanner.advance();
+
+    scanner.getKey(key);
+    Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
+    scanner.getValue(RowValue);
+    Assert.assertEquals("key572", ((Map) RowValue.get(4)).get("key57"));
+    Assert.assertEquals("key712", ((Map) RowValue.get(6)).get("key71"));
+       
+    Assert.assertEquals("key722", ((Map) RowValue.get(6)).get("key72"));
+    Assert.assertEquals(null, ((Map) ((Map) RowValue.get(6)).get("x")));
+    reader.close();
+
+  }
+
+  @Test
+  public void testRead3() throws IOException, ParseException {
+    /*
+     * negative , read one map who is non-exist 
+     */
+    String projection = new String("m5");
+    BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+    reader.setProjection(projection);
+
+    List<RangeSplit> splits = reader.rangeSplit(1);
+    TableScanner scanner = reader.getScanner(splits.get(0), true);
+    BytesWritable key = new BytesWritable();
+    Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());
+
+    scanner.getKey(key);
+    Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+    scanner.getValue(RowValue);
+    Assert.assertEquals(false, RowValue.isNull());
+    Assert.assertEquals(null, RowValue.get(0));
+    Assert.assertEquals(1, RowValue.size());
+
+    scanner.advance();
+    scanner.getKey(key);
+    Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
+    scanner.getValue(RowValue);
+    Assert.assertEquals(false, RowValue.isNull());
+    Assert.assertEquals(null, RowValue.get(0));
+    Assert.assertEquals(1, RowValue.size());
+    reader.close();
+  }
+
+  @Test
+  public void testRead4() throws IOException, ParseException {
+    /*
+     *  Not exist key for all rows
+     */
+    String projection = new String("column5#{nonexist}");
+    BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+    reader.setProjection(projection);
+    List<RangeSplit> splits = reader.rangeSplit(1);
+    TableScanner scanner = reader.getScanner(splits.get(0), true);
+    BytesWritable key = new BytesWritable();
+    Tuple RowValue = TypesUtils.createTuple(scanner.getSchema());
+
+    scanner.getKey(key);
+    Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+    scanner.getValue(RowValue);
+    System.out.println("read1 : " + RowValue.toString());
+    Assert.assertEquals("{}", RowValue.get(0).toString());
+
+    scanner.advance();
+    scanner.getKey(key);
+    Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
+    scanner.getValue(RowValue);
+    System.out.println(RowValue.get(0).toString());
+    Assert.assertEquals("{}", RowValue.get(0).toString());
+
+    reader.close();
+  }
+}

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMixedType1.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMixedType1.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMixedType1.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMixedType1.java Tue Nov 24 19:54:19 2009
@@ -74,7 +74,7 @@
     BasicTable.drop(path, conf);
 
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
 
     Schema schema = writer.getSchema();

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java Tue Nov 24 19:54:19 2009
@@ -87,7 +87,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -114,7 +114,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -141,7 +141,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -168,7 +168,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -195,7 +195,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -222,7 +222,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -249,7 +249,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -276,7 +276,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -303,7 +303,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -330,7 +330,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -357,7 +357,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -384,7 +384,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -412,7 +412,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       System.out.println("HERE HERE");
       Assert.fail("Should throw exception");
     } catch (Exception e) {
@@ -441,7 +441,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
     } catch (Exception e) {
       e.printStackTrace();
       Assert.fail("Should Not throw exception");
@@ -525,7 +525,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
     } catch (Exception e) {
       System.out.println(e);
       Assert.fail("Should throw exception");
@@ -552,7 +552,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
@@ -580,7 +580,7 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
     } catch (Exception e) {
       e.printStackTrace();
       Assert.fail("Should Not throw exception");
@@ -609,11 +609,11 @@
     // Build Table and column groups
     BasicTable.Writer writer = null;
     try {
-      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, false, conf);
+      writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf);
       Assert.fail("Should throw exception");
     } catch (Exception e) {
       System.out.println(e);
     }
   }
 
-}
\ No newline at end of file
+}

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNonDefaultWholeMapSplit.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNonDefaultWholeMapSplit.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNonDefaultWholeMapSplit.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNonDefaultWholeMapSplit.java Tue Nov 24 19:54:19 2009
@@ -47,6 +47,7 @@
 public class TestNonDefaultWholeMapSplit {
 
   final static String STR_SCHEMA = "m1:map(string),m2:map(map(int))";
+  //the comment STR_STORAGE is the one for jira 949 
   final static String STR_STORAGE = "[m1#{a}];[m2#{x|y}]; [m1#{b}, m2#{z}];[m1]";
   private static Configuration conf;
   private static Path path;
@@ -66,7 +67,7 @@
     // drop any previous tables
     BasicTable.drop(path, conf);
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
     Schema schema = writer.getSchema();
     Tuple tuple = TypesUtils.createTuple(schema);
@@ -268,15 +269,15 @@
     Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
     scanner.getValue(RowValue);
     System.out.println("read1 : " + RowValue.toString());
-    Assert.assertEquals("{nonexist=null}", RowValue.get(0).toString());
+    Assert.assertEquals("{}", RowValue.get(0).toString());
 
     scanner.advance();
     scanner.getKey(key);
     Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
     scanner.getValue(RowValue);
     System.out.println(RowValue.get(0).toString());
-    Assert.assertEquals("{nonexist=null}", RowValue.get(0).toString());
+    Assert.assertEquals("{}", RowValue.get(0).toString());
 
     reader.close();
   }
-}
\ No newline at end of file
+}

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java?rev=883836&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java Tue Nov 24 19:54:19 2009
@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.io;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * 
+ * Test projections on complicated column types.
+ * 
+ */
+public class TestProjectionOnFullMap {
+  final static String STR_SCHEMA = "f1:string, f2:map";
+  final static String STR_STORAGE = "[f1]; [f2]";
+  private static Configuration conf;
+  private static Path path;
+  private static FileSystem fs;
+
+  @BeforeClass
+  public static void setUp() throws IOException {
+    conf = new Configuration();
+    conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+    conf.setInt("table.input.split.minSize", 64 * 1024);
+    conf.set("table.output.tfile.compression", "none");
+
+    RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+    fs = new LocalFileSystem(rawLFS);
+    path = new Path(fs.getWorkingDirectory(), "TestBasicTableMapSplits");
+    fs = path.getFileSystem(conf);
+    // drop any previous tables
+    BasicTable.drop(path, conf);
+
+    BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+        STR_STORAGE, conf);
+    writer.finish();
+
+    Schema schema = writer.getSchema();
+    Tuple tuple = TypesUtils.createTuple(schema);
+
+    BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
+    int part = 0;
+    TableInserter inserter = writer1.getInserter("part" + part, true);
+    TypesUtils.resetTuple(tuple);
+
+    tuple.set(0, "Kitty");
+
+   
+    Map<String, String> map = new HashMap<String, String>();
+    map.put("a", "x");
+    map.put("b", "y");
+    map.put("c", "z");
+    tuple.set(1, map);
+
+    int row = 0;
+    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+        .getBytes()), tuple);
+    inserter.close();
+    writer1.finish();
+
+    writer.close();
+  }
+
+  @AfterClass
+  public static void tearDownOnce() throws IOException {
+  }
+
+  public void testDescribse() throws IOException {
+
+    BasicTable.dumpInfo(path.toString(), System.out, conf);
+
+  }
+
+  @Test
+  public void test1() throws IOException, ParseException {
+    String projection = new String("f2#{a}");
+    BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+    reader.setProjection(projection);
+    // long totalBytes = reader.getStatus().getSize();
+
+    List<RangeSplit> splits = reader.rangeSplit(1);
+    reader.close();
+    reader = new BasicTable.Reader(path, conf);
+    reader.setProjection(projection);
+    TableScanner scanner = reader.getScanner(splits.get(0), true);
+    BytesWritable key = new BytesWritable();
+    Tuple value = TypesUtils.createTuple(scanner.getSchema());
+
+    scanner.getKey(key);
+    Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+    scanner.getValue(value);
+
+    Assert.assertEquals("x", ((Map)value.get(0)).get("a"));
+    reader.close();
+  }
+
+}

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java Tue Nov 24 19:54:19 2009
@@ -75,7 +75,7 @@
     BasicTable.drop(path, conf);
     // Build Table and column groups
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
     Schema schema = writer.getSchema();
     Tuple tuple = TypesUtils.createTuple(schema);
@@ -412,4 +412,4 @@
     reader.close();
 
   }
-}
\ No newline at end of file
+}

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord2Map.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord2Map.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord2Map.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord2Map.java Tue Nov 24 19:54:19 2009
@@ -78,7 +78,7 @@
     BasicTable.drop(path, conf);
     // Build Table and column groups
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
     Schema schema = writer.getSchema();
     Tuple tuple = TypesUtils.createTuple(schema);

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord3Map.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord3Map.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord3Map.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord3Map.java Tue Nov 24 19:54:19 2009
@@ -77,7 +77,7 @@
     BasicTable.drop(path, conf);
     // Build Table and column groups
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
     Schema schema = writer.getSchema();
     Tuple tuple = TypesUtils.createTuple(schema);

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecordMap.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecordMap.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecordMap.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecordMap.java Tue Nov 24 19:54:19 2009
@@ -78,7 +78,7 @@
     BasicTable.drop(path, conf);
     // Build Table and column groups
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
     Schema schema = writer.getSchema();
     Tuple tuple = TypesUtils.createTuple(schema);

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSchema.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSchema.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSchema.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSchema.java Tue Nov 24 19:54:19 2009
@@ -114,7 +114,7 @@
 
     // Build Table and column groups
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
     Schema schema = writer.getSchema();
     Tuple tuple = TypesUtils.createTuple(schema);
@@ -210,7 +210,7 @@
     System.out.println("in testRecord, get path: " + path.toString());
     // Build Table and column groups
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
     Schema schema = writer.getSchema();
     Tuple tuple = TypesUtils.createTuple(schema);
@@ -411,8 +411,9 @@
     // String STR_STORAGE = "[m1#{a}]";
     // Build Table and column groups
     path = new Path(getCurrentMethodName());
+    BasicTable.drop(path, conf);
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
     Schema schema = writer.getSchema();
     Tuple tuple = TypesUtils.createTuple(schema);
@@ -534,4 +535,4 @@
     reader.close();
 
   }
-}
\ No newline at end of file
+}

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java Tue Nov 24 19:54:19 2009
@@ -82,7 +82,7 @@
     // drop any previous tables
     BasicTable.drop(path, conf);
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
     Schema schema = writer.getSchema();
     Tuple tuple = TypesUtils.createTuple(schema);
@@ -184,4 +184,4 @@
     reader.close();
   }
 
-}
\ No newline at end of file
+}

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSortedBasicTableSplits.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSortedBasicTableSplits.java?rev=883836&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSortedBasicTableSplits.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSortedBasicTableSplits.java Tue Nov 24 19:54:19 2009
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.io;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * 
+ * Test projections on complicated column types.
+ * 
+ */
+public class TestSortedBasicTableSplits {
+  final static String STR_SCHEMA = "f1:bool, r:record(f11:int, f12:long), m:map(string), c:collection(f13:double, f14:float, f15:bytes)";
+  // TODO: try map hash split later
+  final static String STR_STORAGE = "[r.f12, f1]; [m]";
+  private static Configuration conf;
+  private static Path path;
+  private static FileSystem fs;
+
+  @BeforeClass
+  public static void setUpOnce() throws IOException {
+    conf = new Configuration();
+    conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+    conf.setInt("table.input.split.minSize", 64 * 1024);
+    conf.set("table.output.tfile.compression", "none");
+
+    RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+    fs = new LocalFileSystem(rawLFS);
+    path = new Path(fs.getWorkingDirectory(), "TestBasicTableSplits");
+    fs = path.getFileSystem(conf);
+    // drop any previous tables
+    BasicTable.drop(path, conf);
+
+    BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
+        STR_STORAGE, "r, c", null, conf);
+    writer.finish();
+
+    Schema schema = writer.getSchema();
+    Tuple tuple = TypesUtils.createTuple(schema);
+
+    BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
+    int part = 0;
+    TableInserter inserter = writer1.getInserter("part" + part, true);
+    TypesUtils.resetTuple(tuple);
+
+    tuple.set(0, true);
+
+    Tuple tupRecord;
+    try {
+      tupRecord = TypesUtils.createTuple(schema.getColumnSchema("r")
+          .getSchema());
+    } catch (ParseException e) {
+      e.printStackTrace();
+      throw new IOException(e);
+    }
+
+    // row 1
+    tupRecord.set(0, 1);
+    tupRecord.set(1, 1001L);
+    tuple.set(1, tupRecord);
+
+    Map<String, String> map = new HashMap<String, String>();
+    map.put("a", "x");
+    map.put("b", "y");
+    map.put("c", "z");
+    tuple.set(2, map);
+
+    DataBag bagColl = TypesUtils.createBag();
+    Schema schColl = schema.getColumn(3).getSchema();
+    Tuple tupColl1 = TypesUtils.createTuple(schColl);
+    Tuple tupColl2 = TypesUtils.createTuple(schColl);
+    byte[] abs1 = new byte[3];
+    byte[] abs2 = new byte[4];
+    tupColl1.set(0, 3.1415926);
+    tupColl1.set(1, 1.6);
+    abs1[0] = 11;
+    abs1[1] = 12;
+    abs1[2] = 13;
+    tupColl1.set(2, new DataByteArray(abs1));
+    bagColl.add(tupColl1);
+    tupColl2.set(0, 123.456789);
+    tupColl2.set(1, 100);
+    abs2[0] = 21;
+    abs2[1] = 22;
+    abs2[2] = 23;
+    abs2[3] = 24;
+    tupColl2.set(2, new DataByteArray(abs2));
+    bagColl.add(tupColl2);
+    tuple.set(3, bagColl);
+
+    int row = 0;
+    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+        .getBytes()), tuple);
+
+    // row 2
+    row++;
+    TypesUtils.resetTuple(tuple);
+    TypesUtils.resetTuple(tupRecord);
+    map.clear();
+    tuple.set(0, false);
+    tupRecord.set(0, 2);
+    tupRecord.set(1, 1002L);
+    tuple.set(1, tupRecord);
+    map.put("boy", "girl");
+    map.put("adam", "amy");
+    map.put("bob", "becky");
+    map.put("carl", "cathy");
+    tuple.set(2, map);
+    bagColl.clear();
+    TypesUtils.resetTuple(tupColl1);
+    TypesUtils.resetTuple(tupColl2);
+    tupColl1.set(0, 7654.321);
+    tupColl1.set(1, 0.0001);
+    abs1[0] = 31;
+    abs1[1] = 32;
+    abs1[2] = 33;
+    tupColl1.set(2, new DataByteArray(abs1));
+    bagColl.add(tupColl1);
+    tupColl2.set(0, 0.123456789);
+    tupColl2.set(1, 0.3333);
+    abs2[0] = 41;
+    abs2[1] = 42;
+    abs2[2] = 43;
+    abs2[3] = 44;
+    tupColl2.set(2, new DataByteArray(abs2));
+    bagColl.add(tupColl2);
+    tuple.set(3, bagColl);
+    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+        .getBytes()), tuple);
+
+    inserter.close();
+    writer1.finish();
+
+    writer.close();
+  }
+
+  @AfterClass
+  public static void tearDownOnce() throws IOException {
+  }
+
+  @Test
+  public void test1() throws IOException, ParseException {
+    String projection = new String("r.f12, f1");
+    BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+    reader.setProjection(projection);
+    // long totalBytes = reader.getStatus().getSize();
+
+    List<RangeSplit> splits = reader.rangeSplit(1);
+    reader.close();
+    reader = new BasicTable.Reader(path, conf);
+    reader.setProjection(projection);
+    TableScanner scanner = reader.getScanner(splits.get(0), true);
+    BytesWritable key = new BytesWritable();
+    Tuple value = TypesUtils.createTuple(scanner.getSchema());
+
+    scanner.getKey(key);
+    Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+    scanner.getValue(value);
+
+    Assert.assertEquals(1001L, value.get(0));
+    Assert.assertEquals(true, value.get(1));
+
+    scanner.advance();
+    scanner.getKey(key);
+    Assert.assertEquals(key, new BytesWritable("k12".getBytes()));
+    scanner.getValue(value);
+    Assert.assertEquals(1002L, value.get(0));
+    Assert.assertEquals(false, value.get(1));
+
+    reader.close();
+  }
+
+  @Test
+  public void testStitch() throws IOException, ParseException {
+    String projection = new String("f1, r");
+    BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+    reader.setProjection(projection);
+    // long totalBytes = reader.getStatus().getSize();
+
+    List<RangeSplit> splits = reader.rangeSplit(1);
+    reader.close();
+    reader = new BasicTable.Reader(path, conf);
+    reader.setProjection(projection);
+    TableScanner scanner = reader.getScanner(splits.get(0), true);
+    BytesWritable key = new BytesWritable();
+    Tuple value = TypesUtils.createTuple(scanner.getSchema());
+
+    scanner.getKey(key);
+    Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+    scanner.getValue(value);
+
+    Tuple recordTuple = (Tuple) value.get(1);
+    Assert.assertEquals(1, recordTuple.get(0));
+    Assert.assertEquals(1001L, recordTuple.get(1));
+    Assert.assertEquals(true, value.get(0));
+    reader.close();
+  }
+}

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestWrite.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestWrite.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestWrite.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestWrite.java Tue Nov 24 19:54:19 2009
@@ -74,7 +74,7 @@
     BasicTable.drop(path, conf);
 
     BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
-        STR_STORAGE, false, conf);
+        STR_STORAGE, conf);
     writer.finish();
 
     Schema schema = writer.getSchema();

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ArticleGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ArticleGenerator.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ArticleGenerator.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ArticleGenerator.java Tue Nov 24 19:54:19 2009
@@ -26,8 +26,8 @@
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.DiscreteRNG;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.Flat;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.DiscreteRNG;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.Flat;
 
 /**
  * Generate some input text files.

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/Dictionary.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/Dictionary.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/Dictionary.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/Dictionary.java Tue Nov 24 19:54:19 2009
@@ -24,9 +24,9 @@
 import java.util.Random;
 import java.util.Set;
 
-import org.apache.hadoop.io.file.tfile.RandomDistribution.Binomial;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.DiscreteRNG;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.Zipf;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.Binomial;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.DiscreteRNG;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.Zipf;
 
 /**
  * A dictionary that generates English words, whose frequency follows Zipf

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSample2.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSample2.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSample2.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSample2.java Tue Nov 24 19:54:19 2009
@@ -113,4 +113,4 @@
       JobClient.runJob(jobConf);
     }
   }
-}
\ No newline at end of file
+}

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSampleSortedTable.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSampleSortedTable.java?rev=883836&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSampleSortedTable.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSampleSortedTable.java Tue Nov 24 19:54:19 2009
@@ -0,0 +1,192 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.mapred;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.zebra.mapred.BasicTableOutputFormat;
+import org.apache.hadoop.zebra.mapred.TestBasicTableIOFormatLocalFS.InvIndex;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.types.SortInfo;
+import org.apache.hadoop.zebra.schema.*;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.hadoop.zebra.types.*;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DefaultTuple;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This is a sample a complete MR sample code for Table. It doens't contain
+ * 'read' part. But, it should be similar and easier to write. Refer to test
+ * cases in the same directory.
+ * 
+ * Assume the input files contain rows of word and count, separated by a space:
+ * 
+ * <pre>
+ * this 2
+ * is 1
+ * a 4 
+ * test 2 
+ * hello 1 
+ * world 3
+ * </pre>
+ * 
+ */
+public class TableMRSampleSortedTable {
+	static class MapClass implements
+      Mapper<LongWritable, Text, BytesWritable, Tuple> {
+    private BytesWritable bytesKey;
+    private Tuple tupleRow;
+    private Object javaObj;
+    private byte[] types;
+    private ColumnType[] sortColTypes;
+
+    @Override
+    public void map(LongWritable key, Text value,
+        OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
+        throws IOException {
+        // value should contain "word count"
+        String[] wdct = value.toString().split(" ");
+        if (wdct.length != 2) {
+          // LOG the error
+          return;
+        }
+
+        byte[] word = wdct[0].getBytes();
+
+        // This key has to be created by user
+        bytesKey.set(word, 0, word.length);
+        
+        // This tuple has to be created by user
+        tupleRow.set(0, new String(word));
+        tupleRow.set(1, Integer.parseInt(wdct[1]));
+
+        output.collect(bytesKey, tupleRow);
+    }
+
+    @Override
+    public void configure(JobConf job) {
+      bytesKey = new BytesWritable();
+      try {
+    	  
+    	/* New M/R Interface to get sort information */  
+	    SortInfo sortInfo = BasicTableOutputFormat.getSortInfo(job);
+	    
+	    /* New M/R Interface SortInfo is exposed to user 
+	     * To get types of sort columns.
+	     * Similar interface to get names and indices
+	     */
+   	    sortColTypes  = sortInfo.getSortColumnTypes();
+   	    types = new byte[sortColTypes.length];
+   	    for(int i =0 ; i < sortColTypes.length; ++i){
+   	      types[i] = sortColTypes[i].pigDataType();
+   	    }
+        Schema outSchema = BasicTableOutputFormat.getSchema(job);
+        tupleRow = TypesUtils.createTuple(outSchema);
+                
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      } catch (ParseException e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    @Override
+    public void close() throws IOException {
+      // no-op
+    }
+  }
+
+  static class ReduceClass implements
+    Reducer<BytesWritable, Tuple, BytesWritable, Tuple> {
+      Tuple outRow;
+     
+
+    @Override
+    public void configure(JobConf job) {
+    }
+
+    @Override
+    public void close() throws IOException {
+    }	
+    public void reduce(BytesWritable key, Iterator<Tuple> values,
+      OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
+   	    throws IOException {
+    	  try {
+    		for(; values.hasNext();)  {
+    	      output.collect(key, values.next());
+    		}  
+    	  } catch (ExecException e) {
+    	    e.printStackTrace();
+    	  }
+    }
+  
+  }  
+  
+  public static void main(String[] args) throws ParseException, IOException, Exception {
+    JobConf jobConf = new JobConf();
+    jobConf.setJobName("tableMRSample");
+    jobConf.set("table.output.tfile.compression", "gz");
+
+    // input settings
+    jobConf.setInputFormat(TextInputFormat.class);
+    jobConf.setMapperClass(TableMRSampleSortedTable.MapClass.class);
+    jobConf.setReducerClass(TableMRSampleSortedTable.ReduceClass.class);
+    jobConf.setMapOutputKeyClass(BytesWritable.class);
+    jobConf.setMapOutputValueClass(DefaultTuple.class);
+    FileInputFormat.setInputPaths(jobConf, new Path(
+        "/home/gauravj/work/grid/myTesting/input.txt"));
+    jobConf.setNumMapTasks(1);
+
+    // output settings
+    Path outPath = new Path("/home/gauravj/work/grid/myTesting/tableOuts");
+    jobConf.setOutputFormat(BasicTableOutputFormat.class);
+    BasicTableOutputFormat.setOutputPath(jobConf, outPath);
+    // set the logical schema with 2 columns
+    BasicTableOutputFormat.setSchema(jobConf, "word:string, count:int");
+    // for demo purposes, create 2 physical column groups
+    BasicTableOutputFormat.setStorageHint(jobConf, "[word];[count]");
+    
+    /* New M/R Interface */
+    /* Set sort columns in a comma separated string */
+    /* Each sort column should belong to schema columns */
+    BasicTableOutputFormat.setSortInfo(jobConf, "word, count");
+
+    // set map-only job.
+    jobConf.setNumReduceTasks(1);
+    JobClient.runJob(jobConf);
+  }
+}

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSortedTableZebraKeyGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSortedTableZebraKeyGenerator.java?rev=883836&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSortedTableZebraKeyGenerator.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSortedTableZebraKeyGenerator.java Tue Nov 24 19:54:19 2009
@@ -0,0 +1,194 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.mapred;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.zebra.mapred.BasicTableOutputFormat;
+import org.apache.hadoop.zebra.mapred.TestBasicTableIOFormatLocalFS.InvIndex;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DefaultTuple;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This is a sample a complete MR sample code for Table. It doens't contain
+ * 'read' part. But, it should be similar and easier to write. Refer to test
+ * cases in the same directory.
+ * 
+ * Assume the input files contain rows of word and count, separated by a space:
+ * 
+ * <pre>
+ * this 2
+ * is 1
+ * a 4 
+ * test 2 
+ * hello 1 
+ * world 3
+ * </pre>
+ * 
+ */
+public class TableMRSortedTableZebraKeyGenerator {
+	static class MapClass implements
+      Mapper<LongWritable, Text, BytesWritable, Tuple> {
+    private BytesWritable bytesKey;
+    private Tuple tupleRow;
+    private Object javaObj;
+
+    @Override
+    public void map(LongWritable key, Text value,
+        OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
+        throws IOException {
+        // value should contain "word count"
+        String[] wdct = value.toString().split(" ");
+        if (wdct.length != 2) {
+          // LOG the error
+          return;
+        }
+
+        byte[] word = wdct[0].getBytes();
+        bytesKey.set(word, 0, word.length);
+        tupleRow.set(0, new String(word));
+        tupleRow.set(1, Integer.parseInt(wdct[1]));
+
+        // This key has to be created by user
+        Tuple userKey = new DefaultTuple();
+        userKey.append(new String(word));
+        userKey.append(Integer.parseInt(wdct[1]));
+        try {
+
+            /* New M/R Interface */
+            /* Converts user key to zebra BytesWritable key */
+            /* using sort key expr tree  */
+            /* Returns a java base object */
+            /* Done for each user key */
+        	
+          bytesKey = BasicTableOutputFormat.getSortKey(javaObj, userKey);
+        } catch(Exception e) {
+        	
+        }
+
+        output.collect(bytesKey, tupleRow);
+    }
+
+    @Override
+    public void configure(JobConf job) {
+      bytesKey = new BytesWritable();
+      try {
+        Schema outSchema = BasicTableOutputFormat.getSchema(job);
+        tupleRow = TypesUtils.createTuple(outSchema);
+        
+        /* New M/R Interface */
+        /* returns an expression tree for sort keys */
+        /* Returns a java base object */
+        /* Done once per table */
+        javaObj = BasicTableOutputFormat.getSortKeyGenerator(job);
+        
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      } catch (ParseException e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    @Override
+    public void close() throws IOException {
+      // no-op
+    }
+  }
+
+  static class ReduceClass implements
+    Reducer<BytesWritable, Tuple, BytesWritable, Tuple> {
+      Tuple outRow;
+     
+
+    @Override
+    public void configure(JobConf job) {
+    }
+
+    @Override
+    public void close() throws IOException {
+    }	
+    public void reduce(BytesWritable key, Iterator<Tuple> values,
+      OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
+   	    throws IOException {
+    	  try {
+    		for(; values.hasNext();)  {
+    	      output.collect(key, values.next());
+    		}  
+    	  } catch (ExecException e) {
+    	    e.printStackTrace();
+    	  }
+    }
+  
+  }  
+  
+  public static void main(String[] args) throws ParseException, IOException, Exception {
+    JobConf jobConf = new JobConf();
+    jobConf.setJobName("tableMRSample");
+    jobConf.set("table.output.tfile.compression", "gz");
+
+    // input settings
+    jobConf.setInputFormat(TextInputFormat.class);
+    jobConf.setMapperClass(TableMRSortedTableZebraKeyGenerator.MapClass.class);
+    jobConf.setReducerClass(TableMRSortedTableZebraKeyGenerator.ReduceClass.class);
+    jobConf.setMapOutputKeyClass(BytesWritable.class);
+    jobConf.setMapOutputValueClass(DefaultTuple.class);
+    FileInputFormat.setInputPaths(jobConf, new Path(
+        "/home/gauravj/work/grid/myTesting/input.txt"));
+    jobConf.setNumMapTasks(1);
+
+    // output settings
+    Path outPath = new Path("/home/gauravj/work/grid/myTesting/tableOuts");
+    jobConf.setOutputFormat(BasicTableOutputFormat.class);
+    BasicTableOutputFormat.setOutputPath(jobConf, outPath);
+    // set the logical schema with 2 columns
+    BasicTableOutputFormat.setSchema(jobConf, "word:string, count:int");
+    // for demo purposes, create 2 physical column groups
+    BasicTableOutputFormat.setStorageHint(jobConf, "[word];[count]");
+    
+    /* New M/R Interface */
+    /* Set sort columns in a comma separated string */
+    /* Each sort column should belong to schema columns */
+    BasicTableOutputFormat.setSortInfo(jobConf, "word, count");
+
+    // set map-only job.
+    jobConf.setNumReduceTasks(1);
+    JobClient.runJob(jobConf);
+  }
+}

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMapReduceExample.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMapReduceExample.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMapReduceExample.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMapReduceExample.java Tue Nov 24 19:54:19 2009
@@ -225,4 +225,4 @@
         args);
     System.exit(res);
   }
-}
\ No newline at end of file
+}

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestBasicTableIOFormatLocalFS.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestBasicTableIOFormatLocalFS.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestBasicTableIOFormatLocalFS.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestBasicTableIOFormatLocalFS.java Tue Nov 24 19:54:19 2009
@@ -46,7 +46,7 @@
 import org.apache.hadoop.io.RawComparator;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.file.tfile.Utils;
+import org.apache.hadoop.zebra.tfile.Utils;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.JobClient;
@@ -163,6 +163,9 @@
   JobConf getJobConf(String name) {
     JobConf jobConf = mr.createJobConf();
     jobConf.setJobName(name);
+    jobConf.setInt("table.input.split.minSize", 1);
+    options.minTableSplitSize = 1; // force more splits
+    jobConf.setInt("dfs.block.size", 1024); // force multiple blocks
     jobConf.set("table.output.tfile.compression", options.compression);
     jobConf.setInt("mapred.app.freqWords.count", options.numFreqWords);
     return jobConf;

Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestCheckin.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestCheckin.java?rev=883836&r1=883835&r2=883836&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestCheckin.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestCheckin.java Tue Nov 24 19:54:19 2009
@@ -24,7 +24,8 @@
 @RunWith(Suite.class)
 @Suite.SuiteClasses({
   TestBasicTableIOFormatLocalFS.class,
-  TestBasicTableIOFormatDFS.class
+  TestBasicTableIOFormatDFS.class,
+  TestTfileSplit.class
 })
 
 public class TestCheckin {