You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2009/11/25 20:37:42 UTC
svn commit: r884235 - in /hadoop/pig/trunk/contrib/zebra: ./
src/java/org/apache/hadoop/zebra/schema/
src/test/org/apache/hadoop/zebra/types/
Author: gates
Date: Wed Nov 25 19:37:41 2009
New Revision: 884235
URL: http://svn.apache.org/viewvc?rev=884235&view=rev
Log:
PIG-1095: Schema support of anonymous fields in COLECTION fails.
Added:
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java
Modified:
hadoop/pig/trunk/contrib/zebra/CHANGES.txt
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java
hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt
hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java
Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=884235&r1=884234&r2=884235&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Wed Nov 25 19:37:41 2009
@@ -28,7 +28,11 @@
OPTIMIZATIONS
BUG FIXES
- PIG_1078: merge join with empty table failed (yanz via gates)
+
+ PIG-1095: Schema support of anonymous fields in COLECTION fails (yanz via
+ gates)
+
+ PIG-1078: merge join with empty table failed (yanz via gates)
PIG-1091: Exception when load with projection of map keys on a map column
that is not map split (yanz via gates).
Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java?rev=884235&r1=884234&r2=884235&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java Wed Nov 25 19:37:41 2009
@@ -332,7 +332,7 @@
private ArrayList<ColumnSchema> mFields;
private HashMap<String, ColumnSchema> mNames;
- private boolean dupColNameAllowed;
+ private boolean projection;
/**
* Constructor - schema for empty schema (zero-column) .
@@ -342,6 +342,17 @@
}
/**
+ * Constructor - schema for empty projection/schema (zero-column) .
+ *
+ * @param projection
+ * A projection schema or not
+ */
+ public Schema(boolean projection) {
+ this.projection = projection;
+ init();
+ }
+
+ /**
* Constructor - create a schema from a string representation.
*
* @param schema
@@ -355,10 +366,9 @@
init(schema, false);
}
- public Schema(String schema, boolean dupAllowed) throws ParseException {
- dupColNameAllowed = dupAllowed;
- // suppose if duplicate is allowed, then it's from projection and hence virtual column is allowed
- init(schema, dupAllowed);
+ public Schema(String schema, boolean projection) throws ParseException {
+ this.projection = projection;
+ init(schema, projection);
}
public Schema(ColumnSchema fs) throws ParseException {
@@ -384,18 +394,16 @@
* Column to be added to the schema
*/
public void add(ColumnSchema f) throws ParseException {
- add(f, false);
- }
-
- private void add(ColumnSchema f, boolean dupAllowed) throws ParseException {
if (f == null) {
+ if (!projection)
+ throw new ParseException("Empty column schema is not allowed");
mFields.add(null);
return;
}
f.index = mFields.size();
mFields.add(f);
if (null != f && null != f.name) {
- if (mNames.put(f.name, f) != null && !dupAllowed && !dupColNameAllowed)
+ if (mNames.put(f.name, f) != null && !projection)
throw new ParseException("Duplicate field name: " + f.name);
}
}
@@ -684,7 +692,7 @@
org.apache.hadoop.zebra.tfile.Utils.writeString(out, toString());
}
- private void init(String[] columnNames, boolean virtualColAllowed) throws ParseException {
+ private void init(String[] columnNames, boolean projection) throws ParseException {
// the arg must be of type or they will be treated as the default type
mFields = new ArrayList<ColumnSchema>();
mNames = new HashMap<String, ColumnSchema>();
@@ -698,7 +706,10 @@
}
TableSchemaParser parser =
new TableSchemaParser(new StringReader(sb.toString()));
- parser.RecordSchema(this, virtualColAllowed);
+ if (projection)
+ parser.ProjectionSchema(this);
+ else
+ parser.RecordSchema(this);
}
private void init() {
@@ -706,7 +717,7 @@
mNames = new HashMap<String, ColumnSchema>();
}
- private void init(String columnString, boolean virtualColAllowed) throws ParseException {
+ private void init(String columnString, boolean projection) throws ParseException {
String trimmedColumnStr;
if (columnString == null || (trimmedColumnStr = columnString.trim()).isEmpty()) {
init();
@@ -717,7 +728,7 @@
for (int nx = 0; nx < parts.length; nx++) {
parts[nx] = parts[nx].trim();
}
- init(parts, virtualColAllowed);
+ init(parts, projection);
}
/**
@@ -727,7 +738,7 @@
HashMap<Schema.ColumnSchema, HashSet<String>> keysmap)
throws ParseException {
int ncols = projcols.length;
- Schema result = new Schema();
+ Schema result = new Schema(true);
ColumnSchema cs, mycs;
String keysStr;
String[] keys;
@@ -742,7 +753,7 @@
pn.setName(projcols[i]);
if ((cs = getColumnSchemaOnParsedName(pn)) != null) {
mycs = new ColumnSchema(pn.mName, cs.schema, cs.type);
- result.add(mycs, true);
+ result.add(mycs);
if (pn.mDT == ColumnType.MAP) {
keysStr = projcols[i].substring(pn.mKeyOffset);
if (!keysStr.startsWith("{") || !keysStr.endsWith("}"))
Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt?rev=884235&r1=884234&r2=884235&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt (original)
+++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt Wed Nov 25 19:37:41 2009
@@ -40,10 +40,6 @@
objout.close();
System.out.println(schema.toString());
}
- public Schema RecordSchema(Schema list) throws ParseException
- {
- return RecordSchema(list, false);
- }
}
PARSER_END(TableSchemaParser)
@@ -121,16 +117,36 @@
}
{
(
- LOOKAHEAD(SchemaRecord()) fs = SchemaRecord()
-| LOOKAHEAD(SchemaCollection()) fs = SchemaCollection()
-| LOOKAHEAD(SchemaMap()) fs = SchemaMap()
-| LOOKAHEAD(AtomSchema()) fs = AtomSchema()
+ LOOKAHEAD(3) fs = SchemaRecord()
+| LOOKAHEAD(3) fs = SchemaCollection()
+| LOOKAHEAD(3) fs = SchemaMap()
+| fs = AtomSchema()
)
{
return fs;
}
}
+Schema.ColumnSchema ProjectionColumnSchema() throws ParseException:
+{
+ Token t1;
+ Schema item = null;
+ Schema.ColumnSchema fs = null;
+}
+{
+ (
+ (
+ LOOKAHEAD(3) fs = SchemaRecord()
+| LOOKAHEAD(3) fs = SchemaCollection()
+| LOOKAHEAD(3) fs = SchemaMap()
+| fs = AtomSchema()
+ )
+ { return fs; }
+|
+ { return null; }
+ )
+}
+
Schema.ColumnSchema AtomSchema() throws ParseException :
{
Token t1 = null;
@@ -143,10 +159,6 @@
fs = new Schema.ColumnSchema(t1.image, type);
return fs;
}
-|
- {
- return null;
- }
)
}
@@ -171,7 +183,7 @@
Schema.ColumnSchema fs;
}
{
- t1 = <IDENTIFIER> ":" <RECORD> "(" s = RecordSchemaInternal(null) ")"
+ t1 = <IDENTIFIER> ":" <RECORD> "(" s = RecordSchemaInternal() ")"
{
fs = new Schema.ColumnSchema(t1.image, s, ColumnType.RECORD);
return fs;
@@ -196,12 +208,14 @@
Schema.ColumnSchema fs = null;
}
{
- (
- LOOKAHEAD(RecordSchemaInternal(null)) s= RecordSchemaInternal(null)
-|
- fs = AnonymousColumnSchema()
- )
- { if (s == null) s = new Schema(fs); fs = new Schema.ColumnSchema(id, s, ColumnType.COLLECTION); return fs; }
+ (
+ s= RecordSchemaInternal()
+| fs = AnonymousColumnSchema()
+ )
+ { if (s == null) s = new Schema(fs);
+ fs = new Schema.ColumnSchema(id, s, ColumnType.COLLECTION);
+ return fs;
+ }
}
Schema.ColumnSchema AnonymousColumnSchema() throws ParseException :
@@ -212,10 +226,10 @@
}
{
(
- LOOKAHEAD(AnonymousSchemaRecord()) fs = AnonymousSchemaRecord()
-| LOOKAHEAD(AnonymousSchemaCollection()) fs = AnonymousSchemaCollection()
-| LOOKAHEAD(AnonymousSchemaMap()) fs = AnonymousSchemaMap()
-| LOOKAHEAD(AnonymousAtomSchema()) fs = AnonymousAtomSchema()
+ fs = AnonymousSchemaRecord()
+| fs = AnonymousSchemaCollection()
+| fs = AnonymousSchemaMap()
+| fs = AnonymousAtomSchema()
)
{
return fs;
@@ -256,7 +270,7 @@
Schema.ColumnSchema fs;
}
{
- <RECORD> "(" s = RecordSchemaInternal(null) ")"
+ <RECORD> "(" s = RecordSchemaInternal() ")"
{
fs = new Schema.ColumnSchema(null, s, ColumnType.RECORD);
return fs;
@@ -269,56 +283,59 @@
Schema.ColumnSchema fs;
}
{
- ( <COLLECTION> "(" s = RecordSchemaInternal(null) ")" )
+ ( <COLLECTION> "(" fs = SchemaCollectionEntry(null) ")" )
{
+ s = new Schema(fs);
fs = new Schema.ColumnSchema(null, s, ColumnType.COLLECTION);
return fs;
}
}
-Schema RecordSchemaInternal(Schema list) throws ParseException :
+Schema RecordSchemaInternal() throws ParseException :
+{
+ Schema list = new Schema();
+ Schema.ColumnSchema fs = null;
+}
+{
+ fs = ColumnSchema() {list.add(fs);} ( "," fs = ColumnSchema() {list.add(fs);})*
+ { return list; }
+}
+
+Schema RecordSchema(Schema list) throws ParseException :
{
- Schema item = null;
if (list == null)
list = new Schema();
Schema.ColumnSchema fs = null;
}
{
(
- (
- fs = ColumnSchema() {list.add(fs);}
- ( "," fs = ColumnSchema() {list.add(fs);})*
+ fs = ColumnSchema() { if (fs != null && Projection.isVirtualColumn(fs.getName())) throw new ParseException("["+fs.getName()+"] is a reserved virtual column name"); list.add(fs);}
+ ( "," fs = ColumnSchema() { if (fs != null && Projection.isVirtualColumn(fs.getName())) throw new ParseException("["+fs.getName()+"] is a reserved virtual column name"); list.add(fs);})* <EOF>
)
- )
- { return list; }
+ { return (list.getNumColumns() == 0 || (list.getNumColumns() == 1 && list.getColumn(0) == null) ? null : list); }
}
-Schema RecordSchema(Schema list, boolean virtualColAllowed) throws ParseException :
+Schema ProjectionSchema(Schema list) throws ParseException :
{
- Schema item = null;
if (list == null)
list = new Schema();
Schema.ColumnSchema fs = null;
}
{
(
- (
- fs = ColumnSchema() { if (!virtualColAllowed && fs != null && Projection.isVirtualColumn(fs.getName())) throw new ParseException("["+fs.getName()+"] is a reserved virtual column name"); list.add(fs);}
- ( "," fs = ColumnSchema() { if (!virtualColAllowed && fs != null && Projection.isVirtualColumn(fs.getName())) throw new ParseException("["+fs.getName()+"] is a reserved virtual column name"); list.add(fs);})* <EOF>
+ fs = ProjectionColumnSchema() { list.add(fs);}
+ ( "," fs = ProjectionColumnSchema() { list.add(fs);})* <EOF>
)
- )
{ return (list.getNumColumns() == 0 || (list.getNumColumns() == 1 && list.getColumn(0) == null) ? null : list); }
}
Schema MapSchema() throws ParseException :
{
- Schema item = null;
Schema list = new Schema();
Schema.ColumnSchema fs = null;
}
{
(
- LOOKAHEAD(3)
(
"(" fs = AnonymousColumnSchema() ")"
)
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java?rev=884235&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java (added)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java Wed Nov 25 19:37:41 2009
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.zebra.types;
+
+import java.io.StringReader;
+import junit.framework.Assert;
+
+import org.apache.hadoop.zebra.schema.ColumnType;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.parser.TableSchemaParser;
+import org.apache.hadoop.zebra.schema.Schema.ColumnSchema;
+import org.junit.Test;
+
+public class TestSchemaAnonymousCollection {
+ @Test
+ public void testSchemaValid1() throws ParseException {
+ String strSch = "c1:collection(f1:int, f2:int), c2:collection(collection(record(f3:float, f4)))";
+ TableSchemaParser parser;
+ Schema schema;
+
+ parser = new TableSchemaParser(new StringReader(strSch));
+ schema = parser.RecordSchema(null);
+ System.out.println(schema);
+
+ // test 1st level schema;
+ ColumnSchema f1 = schema.getColumn(0);
+ Assert.assertEquals("c1", f1.getName());
+ Assert.assertEquals(ColumnType.COLLECTION, f1.getType());
+
+ ColumnSchema f2 = schema.getColumn(1);
+ Assert.assertEquals("c2", f2.getName());
+ Assert.assertEquals(ColumnType.COLLECTION, f2.getType());
+
+ // test 2nd level schema;
+ Schema f1Schema = f1.getSchema();
+ ColumnSchema f11 = f1Schema.getColumn(0);
+ Assert.assertEquals("f1", f11.getName());
+ Assert.assertEquals(ColumnType.INT, f11.getType());
+ ColumnSchema f12 = f1Schema.getColumn(1);
+ Assert.assertEquals("f2", f12.getName());
+ Assert.assertEquals(ColumnType.INT, f12.getType());
+
+ Schema f2Schema = f2.getSchema();
+ ColumnSchema f21 = f2Schema.getColumn(0);
+ Assert.assertNull(f21.getName());
+ Assert.assertEquals(ColumnType.COLLECTION, f21.getType());
+
+ // test 3rd level schema;
+ Schema f21Schema = f21.getSchema();
+ ColumnSchema f211 = f21Schema.getColumn(0);
+ Assert.assertNull(f211.getName());
+ Assert.assertEquals(ColumnType.COLLECTION, f211.getType());
+ Schema f211Schema = f211.getSchema();
+
+ ColumnSchema f212 = f211Schema.getColumn(0);
+ Assert.assertNull(f212.getName());
+ Assert.assertEquals(ColumnType.RECORD, f212.getType());
+ Schema f212Schema = f212.getSchema();
+ ColumnSchema f213 = f212Schema.getColumn(0);
+ Assert.assertEquals("f3", f213.getName());
+ Assert.assertEquals(ColumnType.FLOAT, f213.getType());
+ ColumnSchema f214 = f212Schema.getColumn(1);
+ Assert.assertEquals("f4", f214.getName());
+ Assert.assertEquals(ColumnType.BYTES, f214.getType());
+ }
+}
\ No newline at end of file
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java?rev=884235&r1=884234&r2=884235&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java (original)
+++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java Wed Nov 25 19:37:41 2009
@@ -150,7 +150,7 @@
System.out.println(schema);
} catch (Exception e) {
String errMsg = e.getMessage();
- String str = "Encountered \" \"(\" \"( \"\" at line 1, column 7.";
+ String str = "Encountered \"<EOF>\" at line 1, column 10.";
System.out.println(errMsg);
System.out.println(str);
Assert.assertEquals(errMsg.startsWith(str), true);
@@ -169,7 +169,7 @@
System.out.println(schema);
} catch (Exception e) {
String errMsg = e.getMessage();
- String str = "Encountered \" \"(\" \"( \"\" at line 1, column 7.";
+ String str = "Encountered \" \",\" \", \"\" at line 1, column 11.";
System.out.println(errMsg);
System.out.println(str);
Assert.assertEquals(errMsg.startsWith(str), true);
@@ -188,7 +188,7 @@
System.out.println(schema);
} catch (Exception e) {
String errMsg = e.getMessage();
- String str = "Encountered \" \"(\" \"( \"\" at line 1, column 7.";
+ String str = "Encountered \" <IDENTIFIER> \"m2 \"\" at line 1, column 8.";
System.out.println(errMsg);
System.out.println(str);
Assert.assertEquals(errMsg.startsWith(str), true);
@@ -207,7 +207,7 @@
System.out.println(schema);
} catch (Exception e) {
String errMsg = e.getMessage();
- String str = "Encountered \" \"(\" \"( \"\" at line 1, column 7.";
+ String str = "Encountered \" <IDENTIFIER> \"abc \"\" at line 1, column 8.";
System.out.println(errMsg);
System.out.println(str);
Assert.assertEquals(errMsg.startsWith(str), true);