You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2009/11/25 20:37:42 UTC

svn commit: r884235 - in /hadoop/pig/trunk/contrib/zebra: ./ src/java/org/apache/hadoop/zebra/schema/ src/test/org/apache/hadoop/zebra/types/

Author: gates
Date: Wed Nov 25 19:37:41 2009
New Revision: 884235

URL: http://svn.apache.org/viewvc?rev=884235&view=rev
Log:
PIG-1095: Schema support of anonymous fields in COLECTION fails.


Added:
    hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java
Modified:
    hadoop/pig/trunk/contrib/zebra/CHANGES.txt
    hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java
    hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt
    hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java

Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=884235&r1=884234&r2=884235&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Wed Nov 25 19:37:41 2009
@@ -28,7 +28,11 @@
   OPTIMIZATIONS
 
   BUG FIXES
-    PIG_1078: merge join with empty table failed (yanz via gates)
+
+    PIG-1095: Schema support of anonymous fields in COLECTION fails (yanz via
+	gates)
+
+    PIG-1078: merge join with empty table failed (yanz via gates)
 
     PIG-1091: Exception when load with projection of map keys on a map column
 	that is not map split (yanz via gates).

Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java?rev=884235&r1=884234&r2=884235&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java Wed Nov 25 19:37:41 2009
@@ -332,7 +332,7 @@
 
   private ArrayList<ColumnSchema> mFields;
   private HashMap<String, ColumnSchema> mNames;
-  private boolean dupColNameAllowed;
+  private boolean projection;
 
   /**
    * Constructor - schema for empty schema (zero-column) .
@@ -342,6 +342,17 @@
   }
 
   /**
+   * Constructor - schema for empty projection/schema (zero-column) .
+   *
+   * @param projection
+   *           A projection schema or not
+   */
+  public Schema(boolean projection) {
+    this.projection = projection;
+    init();
+  }
+
+  /**
    * Constructor - create a schema from a string representation.
    * 
    * @param schema
@@ -355,10 +366,9 @@
     init(schema, false);
   }
 
-  public Schema(String schema, boolean dupAllowed) throws ParseException {
-    dupColNameAllowed = dupAllowed;
-    // suppose if duplicate is allowed, then it's from projection and hence virtual column is allowed
-    init(schema, dupAllowed);
+  public Schema(String schema, boolean projection) throws ParseException {
+    this.projection = projection;
+    init(schema, projection);
   }
 
   public Schema(ColumnSchema fs) throws ParseException {
@@ -384,18 +394,16 @@
    *          Column to be added to the schema
    */
   public void add(ColumnSchema f) throws ParseException {
-    add(f, false);
-  }
-
-  private void add(ColumnSchema f, boolean dupAllowed) throws ParseException {
     if (f == null) {
+      if (!projection)
+        throw new ParseException("Empty column schema is not allowed");
       mFields.add(null);
       return;
     }
     f.index = mFields.size();
     mFields.add(f);
     if (null != f && null != f.name) {
-      if (mNames.put(f.name, f) != null && !dupAllowed && !dupColNameAllowed)
+      if (mNames.put(f.name, f) != null && !projection)
         throw new ParseException("Duplicate field name: " + f.name);
     }
   }
@@ -684,7 +692,7 @@
     org.apache.hadoop.zebra.tfile.Utils.writeString(out, toString());
   }
 
-  private void init(String[] columnNames, boolean virtualColAllowed) throws ParseException {
+  private void init(String[] columnNames, boolean projection) throws ParseException {
     // the arg must be of type or they will be treated as the default type
     mFields = new ArrayList<ColumnSchema>();
     mNames = new HashMap<String, ColumnSchema>();
@@ -698,7 +706,10 @@
     }
     TableSchemaParser parser =
         new TableSchemaParser(new StringReader(sb.toString()));
-    parser.RecordSchema(this, virtualColAllowed);
+    if (projection)
+      parser.ProjectionSchema(this);
+    else
+      parser.RecordSchema(this);
   }
 
   private void init() {
@@ -706,7 +717,7 @@
     mNames = new HashMap<String, ColumnSchema>();
   }
 
-  private void init(String columnString, boolean virtualColAllowed) throws ParseException {
+  private void init(String columnString, boolean projection) throws ParseException {
     String trimmedColumnStr;
     if (columnString == null || (trimmedColumnStr = columnString.trim()).isEmpty()) {
       init();
@@ -717,7 +728,7 @@
     for (int nx = 0; nx < parts.length; nx++) {
       parts[nx] = parts[nx].trim();
     }
-    init(parts, virtualColAllowed);
+    init(parts, projection);
   }
 
   /**
@@ -727,7 +738,7 @@
       HashMap<Schema.ColumnSchema, HashSet<String>> keysmap)
       throws ParseException {
     int ncols = projcols.length;
-    Schema result = new Schema();
+    Schema result = new Schema(true);
     ColumnSchema cs, mycs;
     String keysStr;
     String[] keys;
@@ -742,7 +753,7 @@
       pn.setName(projcols[i]);
       if ((cs = getColumnSchemaOnParsedName(pn)) != null) {
         mycs = new ColumnSchema(pn.mName, cs.schema, cs.type);
-        result.add(mycs, true);
+        result.add(mycs);
         if (pn.mDT == ColumnType.MAP) {
           keysStr = projcols[i].substring(pn.mKeyOffset);
           if (!keysStr.startsWith("{") || !keysStr.endsWith("}"))

Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt?rev=884235&r1=884234&r2=884235&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt (original)
+++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt Wed Nov 25 19:37:41 2009
@@ -40,10 +40,6 @@
 					objout.close();
 					System.out.println(schema.toString());
       }
-      public Schema RecordSchema(Schema list) throws ParseException
-      {
-        return RecordSchema(list, false);
-      }
 }
 PARSER_END(TableSchemaParser)
 
@@ -121,16 +117,36 @@
 }
 {
 	(
-	LOOKAHEAD(SchemaRecord()) fs = SchemaRecord()
-|	LOOKAHEAD(SchemaCollection()) fs = SchemaCollection()
-|	LOOKAHEAD(SchemaMap()) fs = SchemaMap()
-|	LOOKAHEAD(AtomSchema()) fs = AtomSchema()
+  LOOKAHEAD(3) fs = SchemaRecord()
+|	LOOKAHEAD(3) fs = SchemaCollection()
+|	LOOKAHEAD(3) fs = SchemaMap()
+|	fs = AtomSchema()
 	)
 	{
 		return fs;
 	}
 }
 
+Schema.ColumnSchema ProjectionColumnSchema() throws ParseException: 
+{
+	Token t1; 
+	Schema item = null; 
+	Schema.ColumnSchema fs = null; 
+}
+{
+  (
+	(
+  LOOKAHEAD(3) fs = SchemaRecord()
+|	LOOKAHEAD(3) fs = SchemaCollection()
+|	LOOKAHEAD(3) fs = SchemaMap()
+|	fs = AtomSchema()
+  )
+  { return fs; }
+|
+  { return null; }
+	)
+}
+
 Schema.ColumnSchema AtomSchema() throws ParseException : 
 {
 	Token t1 = null;
@@ -143,10 +159,6 @@
 			fs = new Schema.ColumnSchema(t1.image, type); 
 			return fs;
 		}
-|
-    {
-      return null;
-    }
 	)
 }
 
@@ -171,7 +183,7 @@
 	Schema.ColumnSchema fs;
 }
 { 
-	t1 = <IDENTIFIER> ":" <RECORD> "(" s = RecordSchemaInternal(null) ")" 
+	t1 = <IDENTIFIER> ":" <RECORD> "(" s = RecordSchemaInternal() ")" 
 	{
 		fs = new Schema.ColumnSchema(t1.image, s, ColumnType.RECORD);
 		return fs;
@@ -196,12 +208,14 @@
 	Schema.ColumnSchema fs = null;
 }
 {
-	(
-	LOOKAHEAD(RecordSchemaInternal(null)) s= RecordSchemaInternal(null)
-|
-	fs = AnonymousColumnSchema()
-	)
-	{ if (s == null) s = new Schema(fs); fs = new Schema.ColumnSchema(id, s, ColumnType.COLLECTION); return fs; }
+  (
+  s= RecordSchemaInternal()
+| fs = AnonymousColumnSchema()
+  )
+	{ if (s == null) s = new Schema(fs);
+    fs = new Schema.ColumnSchema(id, s, ColumnType.COLLECTION);
+    return fs;
+  }
 }
 
 Schema.ColumnSchema AnonymousColumnSchema() throws ParseException : 
@@ -212,10 +226,10 @@
 }
 {
 	(
-	LOOKAHEAD(AnonymousSchemaRecord()) fs = AnonymousSchemaRecord()
-|	LOOKAHEAD(AnonymousSchemaCollection()) fs = AnonymousSchemaCollection()
-|	LOOKAHEAD(AnonymousSchemaMap()) fs = AnonymousSchemaMap()
-|	LOOKAHEAD(AnonymousAtomSchema()) fs = AnonymousAtomSchema()
+	fs = AnonymousSchemaRecord()
+|	fs = AnonymousSchemaCollection()
+|	fs = AnonymousSchemaMap()
+|	fs = AnonymousAtomSchema()
 	)
 	{
 		return fs;
@@ -256,7 +270,7 @@
 	Schema.ColumnSchema fs;
 }
 { 
-	<RECORD> "(" s = RecordSchemaInternal(null) ")" 
+	<RECORD> "(" s = RecordSchemaInternal() ")" 
 	{
 		fs = new Schema.ColumnSchema(null, s, ColumnType.RECORD);
 		return fs;
@@ -269,56 +283,59 @@
 	Schema.ColumnSchema fs;
 }
 { 
-	( <COLLECTION> "(" s = RecordSchemaInternal(null) ")"  )
+	( <COLLECTION> "(" fs = SchemaCollectionEntry(null) ")"  )
 	{
+    s = new Schema(fs);
 		fs = new Schema.ColumnSchema(null, s, ColumnType.COLLECTION);
 		return fs;
 	} 
 }
 
-Schema RecordSchemaInternal(Schema list) throws ParseException : 
+Schema RecordSchemaInternal() throws ParseException : 
+{
+  Schema list = new Schema(); 
+	Schema.ColumnSchema fs = null;
+}
+{
+  fs = ColumnSchema() {list.add(fs);} ( "," fs = ColumnSchema() {list.add(fs);})*
+	{ return list; }
+}
+
+Schema RecordSchema(Schema list) throws ParseException : 
 {
-	Schema item = null; 
 	if (list == null)
 		list = new Schema(); 
 	Schema.ColumnSchema fs = null;
 }
 {
 	(
-	(
-		fs = ColumnSchema() {list.add(fs);}
-		( "," fs = ColumnSchema() {list.add(fs);})*
+		fs = ColumnSchema() { if (fs != null && Projection.isVirtualColumn(fs.getName())) throw new ParseException("["+fs.getName()+"] is a reserved virtual column name"); list.add(fs);}
+		( "," fs = ColumnSchema() { if (fs != null && Projection.isVirtualColumn(fs.getName())) throw new ParseException("["+fs.getName()+"] is a reserved virtual column name"); list.add(fs);})* <EOF>
 	)	
-	)
-	{ return list; }
+	{ return (list.getNumColumns() == 0 || (list.getNumColumns() == 1 && list.getColumn(0) == null) ? null : list); }
 }
 
-Schema RecordSchema(Schema list, boolean virtualColAllowed) throws ParseException : 
+Schema ProjectionSchema(Schema list) throws ParseException : 
 {
-	Schema item = null; 
 	if (list == null)
 		list = new Schema(); 
 	Schema.ColumnSchema fs = null;
 }
 {
 	(
-	(
-		fs = ColumnSchema() { if (!virtualColAllowed && fs != null && Projection.isVirtualColumn(fs.getName())) throw new ParseException("["+fs.getName()+"] is a reserved virtual column name"); list.add(fs);}
-		( "," fs = ColumnSchema() { if (!virtualColAllowed && fs != null && Projection.isVirtualColumn(fs.getName())) throw new ParseException("["+fs.getName()+"] is a reserved virtual column name"); list.add(fs);})* <EOF>
+		fs = ProjectionColumnSchema() { list.add(fs);}
+		( "," fs = ProjectionColumnSchema() { list.add(fs);})* <EOF>
 	)	
-	)
 	{ return (list.getNumColumns() == 0 || (list.getNumColumns() == 1 && list.getColumn(0) == null) ? null : list); }
 }
 
 Schema MapSchema() throws ParseException : 
 {
-	Schema item = null; 
 	Schema list = new Schema(); 
 	Schema.ColumnSchema fs = null;
 }
 {
 	(
-	LOOKAHEAD(3)
 	(
 		"(" fs = AnonymousColumnSchema() ")"
 	)

Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java?rev=884235&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java Wed Nov 25 19:37:41 2009
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.types;
+
+import java.io.StringReader;
+import junit.framework.Assert;
+
+import org.apache.hadoop.zebra.schema.ColumnType;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.parser.TableSchemaParser;
+import org.apache.hadoop.zebra.schema.Schema.ColumnSchema;
+import org.junit.Test;
+
+public class TestSchemaAnonymousCollection {
+  @Test
+  public void testSchemaValid1() throws ParseException {
+    String strSch = "c1:collection(f1:int, f2:int), c2:collection(collection(record(f3:float, f4)))";
+    TableSchemaParser parser;
+    Schema schema;
+
+    parser = new TableSchemaParser(new StringReader(strSch));
+    schema = parser.RecordSchema(null);
+    System.out.println(schema);
+
+    // test 1st level schema;
+    ColumnSchema f1 = schema.getColumn(0);
+    Assert.assertEquals("c1", f1.getName());
+    Assert.assertEquals(ColumnType.COLLECTION, f1.getType());
+
+    ColumnSchema f2 = schema.getColumn(1);
+    Assert.assertEquals("c2", f2.getName());
+    Assert.assertEquals(ColumnType.COLLECTION, f2.getType());
+
+    // test 2nd level schema;
+    Schema f1Schema = f1.getSchema();
+    ColumnSchema f11 = f1Schema.getColumn(0);
+    Assert.assertEquals("f1", f11.getName());
+    Assert.assertEquals(ColumnType.INT, f11.getType());
+    ColumnSchema f12 = f1Schema.getColumn(1);
+    Assert.assertEquals("f2", f12.getName());
+    Assert.assertEquals(ColumnType.INT, f12.getType());
+
+    Schema f2Schema = f2.getSchema();
+    ColumnSchema f21 = f2Schema.getColumn(0);
+    Assert.assertNull(f21.getName());
+    Assert.assertEquals(ColumnType.COLLECTION, f21.getType());
+
+    // test 3rd level schema;
+    Schema f21Schema = f21.getSchema();
+    ColumnSchema f211 = f21Schema.getColumn(0);
+    Assert.assertNull(f211.getName());
+    Assert.assertEquals(ColumnType.COLLECTION, f211.getType());
+    Schema f211Schema = f211.getSchema();
+    
+    ColumnSchema f212 = f211Schema.getColumn(0);
+    Assert.assertNull(f212.getName());
+    Assert.assertEquals(ColumnType.RECORD, f212.getType());
+    Schema f212Schema = f212.getSchema();
+    ColumnSchema f213 = f212Schema.getColumn(0);
+    Assert.assertEquals("f3", f213.getName());
+    Assert.assertEquals(ColumnType.FLOAT, f213.getType());
+    ColumnSchema f214 = f212Schema.getColumn(1);
+    Assert.assertEquals("f4", f214.getName());
+    Assert.assertEquals(ColumnType.BYTES, f214.getType());
+  }
+}
\ No newline at end of file

Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java?rev=884235&r1=884234&r2=884235&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java Wed Nov 25 19:37:41 2009
@@ -150,7 +150,7 @@
       System.out.println(schema);
     } catch (Exception e) {
       String errMsg = e.getMessage();
-      String str = "Encountered \" \"(\" \"( \"\" at line 1, column 7.";
+      String str = "Encountered \"<EOF>\" at line 1, column 10.";
       System.out.println(errMsg);
       System.out.println(str);
       Assert.assertEquals(errMsg.startsWith(str), true);
@@ -169,7 +169,7 @@
       System.out.println(schema);
     } catch (Exception e) {
       String errMsg = e.getMessage();
-      String str = "Encountered \" \"(\" \"( \"\" at line 1, column 7.";
+      String str = "Encountered \" \",\" \", \"\" at line 1, column 11.";
       System.out.println(errMsg);
       System.out.println(str);
       Assert.assertEquals(errMsg.startsWith(str), true);
@@ -188,7 +188,7 @@
       System.out.println(schema);
     } catch (Exception e) {
       String errMsg = e.getMessage();
-      String str = "Encountered \" \"(\" \"( \"\" at line 1, column 7.";
+      String str = "Encountered \" <IDENTIFIER> \"m2 \"\" at line 1, column 8.";
       System.out.println(errMsg);
       System.out.println(str);
       Assert.assertEquals(errMsg.startsWith(str), true);
@@ -207,7 +207,7 @@
       System.out.println(schema);
     } catch (Exception e) {
       String errMsg = e.getMessage();
-      String str = "Encountered \" \"(\" \"( \"\" at line 1, column 7.";
+      String str = "Encountered \" <IDENTIFIER> \"abc \"\" at line 1, column 8.";
       System.out.println(errMsg);
       System.out.println(str);
       Assert.assertEquals(errMsg.startsWith(str), true);