You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2011/02/03 21:03:00 UTC

svn commit: r1066933 - in /pig/trunk: ./ src/org/apache/pig/impl/logicalLayer/ src/org/apache/pig/impl/util/ src/org/apache/pig/newplan/logical/relational/ test/org/apache/pig/test/ test/org/apache/pig/test/utils/

Author: gates
Date: Thu Feb  3 20:03:00 2011
New Revision: 1066933

URL: http://svn.apache.org/viewvc?rev=1066933&view=rev
Log:
PIG-1717 pig needs to call setPartitionFilter if schema is null but getPartitionKeys is not


Added:
    pig/trunk/test/org/apache/pig/test/TestLOLoadDeterminedSchema.java
    pig/trunk/test/org/apache/pig/test/utils/ScriptSchemaTestLoader.java
Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/impl/logicalLayer/LOLoad.java
    pig/trunk/src/org/apache/pig/impl/util/Utils.java
    pig/trunk/src/org/apache/pig/newplan/logical/relational/LOLoad.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1066933&r1=1066932&r2=1066933&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Feb  3 20:03:00 2011
@@ -90,6 +90,9 @@ PIG-1696: Performance: Use System.arrayc
 
 BUG FIXES
 
+PIG-1717 pig needs to call setPartitionFilter if schema is null but
+getPartitionKeys is not (gerritjvv via gates)
+
 PIG-313: Error handling aggregate of a computation (daijy)
 
 PIG-496: project of bags from complex data causes failures (daijy)

Modified: pig/trunk/src/org/apache/pig/impl/logicalLayer/LOLoad.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOLoad.java?rev=1066933&r1=1066932&r2=1066933&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/logicalLayer/LOLoad.java (original)
+++ pig/trunk/src/org/apache/pig/impl/logicalLayer/LOLoad.java Thu Feb  3 20:03:00 2011
@@ -25,14 +25,15 @@ import java.util.TreeSet;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Job;
 import org.apache.pig.LoadFunc;
 import org.apache.pig.LoadMetadata;
 import org.apache.pig.LoadPushDown;
-import org.apache.pig.PigException;
-import org.apache.pig.ResourceSchema;
 import org.apache.pig.LoadPushDown.RequiredField;
 import org.apache.pig.LoadPushDown.RequiredFieldList;
 import org.apache.pig.LoadPushDown.RequiredFieldResponse;
+import org.apache.pig.PigException;
+import org.apache.pig.ResourceSchema;
 import org.apache.pig.data.DataType;
 import org.apache.pig.impl.PigContext;
 import org.apache.pig.impl.io.FileSpec;
@@ -43,11 +44,8 @@ import org.apache.pig.impl.plan.Projecti
 import org.apache.pig.impl.plan.RequiredFields;
 import org.apache.pig.impl.plan.VisitorException;
 import org.apache.pig.impl.util.MultiMap;
+import org.apache.pig.impl.util.ObjectSerializer;
 import org.apache.pig.impl.util.Pair;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Job;
 
 public class LOLoad extends RelationalOperator {
     private static final long serialVersionUID = 2L;
@@ -385,10 +383,51 @@ public class LOLoad extends RelationalOp
     }
     
     @Override
-    public void setAlias(String newAlias) {
-        super.setAlias(newAlias);
-        mLoadFunc.setUDFContextSignature(getAlias());
-    }
+	public void setAlias(String newAlias) {
+		super.setAlias(newAlias);
+		mLoadFunc.setUDFContextSignature(getAlias());
+
+		// set the schema in this method using the new alias assigned
+		storeScriptSchema();
+	}
+
+	/**
+	 * This method will store the scriptSchema:Schema using ObjectSerializer to
+	 * the current configuration.<br/>
+	 * The schema can be retrieved by load functions or UDFs to know the schema
+	 * the user entered in the as clause.<br/>
+	 * The name format is:<br/>
+	 * 
+	 * <pre>
+	 * ${UDFSignature}.scriptSchema = ObjectSerializer.serialize(scriptSchema)
+	 * </pre>
+	 * <p/>
+	 * Note that this is not the schema the load functiona returns but will
+	 * always be the as clause schema.<br/>
+	 * That is a = LOAD 'input' as (a:chararray, b:chararray)<br/>
+	 * The schema wil lbe (a:chararray, b:chararray)<br/>
+	 * <p/>
+	 * 
+	 * TODO Find better solution to make script schema available to LoadFunc see
+	 * https://issues.apache.org/jira/browse/PIG-1717
+	 */
+	private void storeScriptSchema() {
+		String alias = getAlias();
+		if (!(conf == null || alias == null || scriptSchema == null)) {
+
+			try {
+
+				conf.set(alias + ".scriptSchema",
+						ObjectSerializer.serialize(scriptSchema));
+			} catch (IOException ioe) {
+				int errCode = 1018;
+				String msg = "Problem serializing script schema";
+				FrontendException fee = new FrontendException(msg, errCode,
+						PigException.INPUT, false, null, ioe);
+				throw new RuntimeException(fee);
+			}
+		}
+	}
 
     @Override
     public boolean pruneColumns(List<Pair<Integer, Integer>> columns)

Modified: pig/trunk/src/org/apache/pig/impl/util/Utils.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/util/Utils.java?rev=1066933&r1=1066932&r2=1066933&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/util/Utils.java (original)
+++ pig/trunk/src/org/apache/pig/impl/util/Utils.java Thu Feb  3 20:03:00 2011
@@ -97,6 +97,27 @@ public class Utils {
         }
     }
 
+	/**
+	 * A helper function for retrieving the script schema set by the LOLoad
+	 * function.
+	 * 
+	 * @param loadFuncSignature
+	 * @param conf
+	 * @return Schema
+	 * @throws IOException
+	 */
+	public static Schema getScriptSchema(String loadFuncSignature,
+			Configuration conf) throws IOException {
+		Schema scriptSchema = null;
+		String scriptField = conf.get(loadFuncSignature + ".scriptSchema");
+
+		if (scriptField != null) {
+			scriptSchema = (Schema) ObjectSerializer.deserialize(scriptField);
+		}
+
+		return scriptSchema;
+	}
+    
     public static ResourceSchema getSchema(LoadFunc wrappedLoadFunc, String location, boolean checkExistence, Job job)
     throws IOException {
         Configuration conf = job.getConfiguration();

Modified: pig/trunk/src/org/apache/pig/newplan/logical/relational/LOLoad.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/relational/LOLoad.java?rev=1066933&r1=1066932&r2=1066933&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/relational/LOLoad.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/relational/LOLoad.java Thu Feb  3 20:03:00 2011
@@ -25,12 +25,14 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.pig.LoadFunc;
 import org.apache.pig.LoadMetadata;
+import org.apache.pig.PigException;
 import org.apache.pig.ResourceSchema;
 import org.apache.pig.data.DataType;
 import org.apache.pig.impl.PigContext;
 import org.apache.pig.impl.io.FileSpec;
 import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.util.ObjectSerializer;
 import org.apache.pig.newplan.Operator;
 import org.apache.pig.newplan.PlanVisitor;
 import org.apache.pig.newplan.logical.Util;
@@ -154,6 +156,53 @@ public class LOLoad extends LogicalRelat
         return null;
     }
 
+	@Override
+	public void setAlias(String alias) {
+		super.setAlias(alias);
+
+		// set the schema in this method using the new alias assigned
+		storeScriptSchema();
+	}
+	
+	/**
+	 * This method will store the scriptSchema:Schema using ObjectSerializer to
+	 * the current configuration.<br/>
+	 * The schema can be retrieved by load functions or UDFs to know the schema
+	 * the user entered in the as clause.<br/>
+	 * The name format is:<br/>
+	 * 
+	 * <pre>
+	 * ${UDFSignature}.scriptSchema = ObjectSerializer.serialize(scriptSchema)
+	 * </pre>
+	 * <p/>
+	 * Note that this is not the schema the load functiona returns but will
+	 * always be the as clause schema.<br/>
+	 * That is a = LOAD 'input' as (a:chararray, b:chararray)<br/>
+	 * The schema wil lbe (a:chararray, b:chararray)<br/>
+	 * <p/>
+	 * 
+	 * TODO Find better solution to make script schema available to LoadFunc see
+	 * https://issues.apache.org/jira/browse/PIG-1717
+	 */
+	private void storeScriptSchema() {
+		String alias = getAlias();
+		if (!(conf == null || alias == null || scriptSchema == null)) {
+
+			try {
+
+				conf.set(alias + ".scriptSchema", ObjectSerializer
+						.serialize(Util.translateSchema(scriptSchema)));
+
+			} catch (IOException ioe) {
+				int errCode = 1018;
+				String msg = "Problem serializing script schema";
+				FrontendException fee = new FrontendException(msg, errCode,
+						PigException.INPUT, false, null, ioe);
+				throw new RuntimeException(fee);
+			}
+		}
+	}
+
     public FileSpec getFileSpec() {
         return fs;
     }

Added: pig/trunk/test/org/apache/pig/test/TestLOLoadDeterminedSchema.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestLOLoadDeterminedSchema.java?rev=1066933&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestLOLoadDeterminedSchema.java (added)
+++ pig/trunk/test/org/apache/pig/test/TestLOLoadDeterminedSchema.java Thu Feb  3 20:03:00 2011
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.test;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.pig.ExecType;
+import org.apache.pig.FuncSpec;
+import org.apache.pig.PigServer;
+import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.io.FileLocalizer;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.test.utils.ScriptSchemaTestLoader;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * 
+ * Tests that the LOLoad class sets the script schema correctly as expected.<br/>
+ * This way of passing the script schema is not an optimal solution but will be
+ * used currently inorder not to break code by adding new methods the to
+ * LoadFunc or other Classes. For more information please see
+ * https://issues.apache.org/jira/browse/PIG-1717
+ * 
+ */
+@RunWith(JUnit4.class)
+public class TestLOLoadDeterminedSchema extends junit.framework.TestCase {
+
+	PigContext pc;
+	PigServer server;
+
+	File baseDir;
+	File inputFile;
+
+	/**
+	 * Loads a test file using ScriptSchemaTestLoader with a user defined schema
+	 * a,b,c.<br/>
+	 * Then tests the the ScriptSchemaTestLoader found the schema.
+	 * 
+	 * @throws IOException
+	 */
+	@Test
+	public void testDeterminedSchema() throws IOException {
+
+		FuncSpec funcSpec = new FuncSpec(ScriptSchemaTestLoader.class.getName()
+				+ "()");
+
+		server.registerFunction(ScriptSchemaTestLoader.class.getName(),
+				funcSpec);
+
+		server.registerQuery("a = LOAD '" + inputFile.getAbsolutePath()
+				+ "' using " + ScriptSchemaTestLoader.class.getName()
+				+ "() as (a, b, c) ;");
+
+		server.openIterator("a");
+
+		Schema scriptSchema = ScriptSchemaTestLoader.getScriptSchema();
+
+		assertNotNull(scriptSchema);
+		assertEquals(3, scriptSchema.size());
+
+		assertNotNull(scriptSchema.getField("a"));
+		assertNotNull(scriptSchema.getField("b"));
+		assertNotNull(scriptSchema.getField("c"));
+
+	}
+
+	@Override
+	@Before
+	public void setUp() throws Exception {
+		FileLocalizer.deleteTempFiles();
+		server = new PigServer(ExecType.LOCAL, new Properties());
+
+		baseDir = new File("build/testLoLoadDeterminedSchema");
+
+		if (baseDir.exists()) {
+			FileUtil.fullyDelete(baseDir);
+		}
+
+		assertTrue(baseDir.mkdirs());
+
+		inputFile = new File(baseDir, "testInput.txt");
+		inputFile.createNewFile();
+
+		// write a short input
+		FileWriter writer = new FileWriter(inputFile);
+		try {
+			writer.write("a\tb\tc");
+		} finally {
+			writer.close();
+		}
+
+	}
+
+	@Override
+	@After
+	public void tearDown() throws Exception {
+		if (baseDir.exists())
+			FileUtil.fullyDelete(baseDir);
+
+		server.shutdown();
+	}
+
+}

Added: pig/trunk/test/org/apache/pig/test/utils/ScriptSchemaTestLoader.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/utils/ScriptSchemaTestLoader.java?rev=1066933&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/utils/ScriptSchemaTestLoader.java (added)
+++ pig/trunk/test/org/apache/pig/test/utils/ScriptSchemaTestLoader.java Thu Feb  3 20:03:00 2011
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.test.utils;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.util.Utils;
+import org.apache.pig.test.PigStorageWithSchema;
+
+/**
+ * 
+ * Used to test that the LOLoad operator is setting the script schema correctly.
+ * 
+ */
+public class ScriptSchemaTestLoader extends PigStorageWithSchema {
+
+	// this method requires to be static because pig will instantiate the
+	// instance
+	// and the unit test will have no other way of directly reaching this
+	// variable
+	static volatile Schema scriptSchema;
+
+	@Override
+	public ResourceSchema getSchema(String location, Job job)
+			throws IOException {
+
+		scriptSchema = Utils.getScriptSchema(getUDFContextSignature(),
+				job.getConfiguration());
+
+		return null;
+	}
+
+	public static Schema getScriptSchema() {
+		return scriptSchema;
+	}
+
+}