You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2010/02/01 20:07:31 UTC

svn commit: r905373 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/impl/logicalLayer/schema/Schema.java test/org/apache/pig/test/InputSchemaUDF.java test/org/apache/pig/test/TestSchema.java

Author: pradeepkth
Date: Mon Feb  1 19:07:30 2010
New Revision: 905373

URL: http://svn.apache.org/viewvc?rev=905373&view=rev
Log:
PIG-1213: Schema serialization is broken (pradeepkth)

Added:
    hadoop/pig/trunk/test/org/apache/pig/test/InputSchemaUDF.java
Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/schema/Schema.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestSchema.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=905373&r1=905372&r2=905373&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Mon Feb  1 19:07:30 2010
@@ -80,6 +80,8 @@
 
 BUG FIXES
 
+PIG-1213: Schema serialization is broken (pradeepkth)
+
 PIG-1194:  ERROR 2055: Received Error while processing the map plan (rding via ashutoshc)
 
 PIG-1204:  Pig hangs when joining two streaming relations in local mode

Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/schema/Schema.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/schema/Schema.java?rev=905373&r1=905372&r2=905373&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/schema/Schema.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/schema/Schema.java Mon Feb  1 19:07:30 2010
@@ -99,7 +99,10 @@
          * logical operator, and the value is the canonical name
          * associated with the field for that operator.
          */
-        private Map<String, LogicalOperator> canonicalMap = null;
+        // marking transient since this data structure is only used in front-end
+        // query planning to figure out lineage for casts on bytearrays and need
+        // not be serialized to the backend
+        transient private Map<String, LogicalOperator> canonicalMap = null;
 
         /**
          * A reverse lookup of canonical names to logical operators. The reverse
@@ -107,7 +110,10 @@
          * cannot be determined. In such cases the keys of the reverse lookup
          * can be used to navigate the plan
          */
-        private MultiMap<LogicalOperator, String> reverseCanonicalMap = null;
+        // marking transient since this data structure is only used in front-end
+        // query planning to figure out lineage for casts on bytearrays and need
+        // not be serialized to the backend
+        transient private MultiMap<LogicalOperator, String> reverseCanonicalMap = null;
         
         /**
          * Canonical namer object to generate new canonical names on
@@ -848,6 +854,7 @@
      * Make a deep copy of a schema.
      * @throws CloneNotSupportedException
      */
+    @Override
     public Schema clone() throws CloneNotSupportedException {
         Schema s = new Schema();
 

Added: hadoop/pig/trunk/test/org/apache/pig/test/InputSchemaUDF.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/InputSchemaUDF.java?rev=905373&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/InputSchemaUDF.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/test/InputSchemaUDF.java Mon Feb  1 19:07:30 2010
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.test;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.util.UDFContext;
+
+/**
+ *
+ */
+public class InputSchemaUDF extends EvalFunc<String>{
+
+    @Override
+    public String exec(Tuple input) throws IOException {
+        Schema sch = (Schema)UDFContext.getUDFContext().getUDFProperties(this.getClass()).get("myschema");
+        return sch.toString();
+    }
+    
+    @Override
+    public Schema outputSchema(Schema input) {
+        Properties props = UDFContext.getUDFContext().getUDFProperties(this.getClass());
+        props.put("myschema", input);
+        return new Schema(new Schema.FieldSchema(null, DataType.INTEGER));
+    }
+
+}

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestSchema.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestSchema.java?rev=905373&r1=905372&r2=905373&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestSchema.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestSchema.java Mon Feb  1 19:07:30 2010
@@ -36,16 +36,22 @@
 
 package org.apache.pig.test;
 
-import java.util.* ;
-
-import org.apache.pig.data.* ;
-import org.apache.pig.impl.logicalLayer.schema.* ;
-import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
-
-import org.junit.* ;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
 
 import junit.framework.Assert;
-import junit.framework.TestCase ;
+import junit.framework.TestCase;
+
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.SchemaMergeException;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
+import org.junit.Test;
 
 public class TestSchema extends TestCase {
     
@@ -634,4 +640,20 @@
     		Assert.assertTrue(Schema.FieldSchema.castable(castFieldSchema, inputFieldSchema));
     	}
     }
+    
+    public void testSchemaSerialization() throws IOException {
+        MiniCluster cluster = MiniCluster.buildCluster();
+        PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+        String inputFileName = "testSchemaSerialization-input.txt";
+        String[] inputData = new String[] { "foo\t1", "hello\t2" };
+        Util.createInputFile(cluster, inputFileName, inputData);
+        String script = "a = load '"+ inputFileName +"' as (f1:chararray, f2:int);" +
+        		" b = group a all; c = foreach b generate org.apache.pig.test.InputSchemaUDF(a);";
+        Util.registerMultiLineQuery(pigServer, script);
+        Iterator<Tuple> it = pigServer.openIterator("c");
+        while(it.hasNext()) {
+            Tuple t = it.next();
+            assertEquals("{a: {f1: chararray,f2: int}}", t.get(0));
+        }
+    }
 }