You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2011/02/03 21:03:00 UTC
svn commit: r1066933 - in /pig/trunk: ./
src/org/apache/pig/impl/logicalLayer/ src/org/apache/pig/impl/util/
src/org/apache/pig/newplan/logical/relational/ test/org/apache/pig/test/
test/org/apache/pig/test/utils/
Author: gates
Date: Thu Feb 3 20:03:00 2011
New Revision: 1066933
URL: http://svn.apache.org/viewvc?rev=1066933&view=rev
Log:
PIG-1717 pig needs to call setPartitionFilter if schema is null but getPartitionKeys is not
Added:
pig/trunk/test/org/apache/pig/test/TestLOLoadDeterminedSchema.java
pig/trunk/test/org/apache/pig/test/utils/ScriptSchemaTestLoader.java
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/impl/logicalLayer/LOLoad.java
pig/trunk/src/org/apache/pig/impl/util/Utils.java
pig/trunk/src/org/apache/pig/newplan/logical/relational/LOLoad.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1066933&r1=1066932&r2=1066933&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Feb 3 20:03:00 2011
@@ -90,6 +90,9 @@ PIG-1696: Performance: Use System.arrayc
BUG FIXES
+PIG-1717 pig needs to call setPartitionFilter if schema is null but
+getPartitionKeys is not (gerritjvv via gates)
+
PIG-313: Error handling aggregate of a computation (daijy)
PIG-496: project of bags from complex data causes failures (daijy)
Modified: pig/trunk/src/org/apache/pig/impl/logicalLayer/LOLoad.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOLoad.java?rev=1066933&r1=1066932&r2=1066933&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/logicalLayer/LOLoad.java (original)
+++ pig/trunk/src/org/apache/pig/impl/logicalLayer/LOLoad.java Thu Feb 3 20:03:00 2011
@@ -25,14 +25,15 @@ import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Job;
import org.apache.pig.LoadFunc;
import org.apache.pig.LoadMetadata;
import org.apache.pig.LoadPushDown;
-import org.apache.pig.PigException;
-import org.apache.pig.ResourceSchema;
import org.apache.pig.LoadPushDown.RequiredField;
import org.apache.pig.LoadPushDown.RequiredFieldList;
import org.apache.pig.LoadPushDown.RequiredFieldResponse;
+import org.apache.pig.PigException;
+import org.apache.pig.ResourceSchema;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.FileSpec;
@@ -43,11 +44,8 @@ import org.apache.pig.impl.plan.Projecti
import org.apache.pig.impl.plan.RequiredFields;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.util.MultiMap;
+import org.apache.pig.impl.util.ObjectSerializer;
import org.apache.pig.impl.util.Pair;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.Job;
public class LOLoad extends RelationalOperator {
private static final long serialVersionUID = 2L;
@@ -385,10 +383,51 @@ public class LOLoad extends RelationalOp
}
@Override
- public void setAlias(String newAlias) {
- super.setAlias(newAlias);
- mLoadFunc.setUDFContextSignature(getAlias());
- }
+ public void setAlias(String newAlias) {
+ super.setAlias(newAlias);
+ mLoadFunc.setUDFContextSignature(getAlias());
+
+ // set the schema in this method using the new alias assigned
+ storeScriptSchema();
+ }
+
+ /**
+ * This method will store the scriptSchema:Schema using ObjectSerializer to
+ * the current configuration.<br/>
+ * The schema can be retrieved by load functions or UDFs to know the schema
+ * the user entered in the as clause.<br/>
+ * The name format is:<br/>
+ *
+ * <pre>
+ * ${UDFSignature}.scriptSchema = ObjectSerializer.serialize(scriptSchema)
+ * </pre>
+ * <p/>
+ * Note that this is not the schema the load functiona returns but will
+ * always be the as clause schema.<br/>
+ * That is a = LOAD 'input' as (a:chararray, b:chararray)<br/>
+ * The schema wil lbe (a:chararray, b:chararray)<br/>
+ * <p/>
+ *
+ * TODO Find better solution to make script schema available to LoadFunc see
+ * https://issues.apache.org/jira/browse/PIG-1717
+ */
+ private void storeScriptSchema() {
+ String alias = getAlias();
+ if (!(conf == null || alias == null || scriptSchema == null)) {
+
+ try {
+
+ conf.set(alias + ".scriptSchema",
+ ObjectSerializer.serialize(scriptSchema));
+ } catch (IOException ioe) {
+ int errCode = 1018;
+ String msg = "Problem serializing script schema";
+ FrontendException fee = new FrontendException(msg, errCode,
+ PigException.INPUT, false, null, ioe);
+ throw new RuntimeException(fee);
+ }
+ }
+ }
@Override
public boolean pruneColumns(List<Pair<Integer, Integer>> columns)
Modified: pig/trunk/src/org/apache/pig/impl/util/Utils.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/util/Utils.java?rev=1066933&r1=1066932&r2=1066933&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/util/Utils.java (original)
+++ pig/trunk/src/org/apache/pig/impl/util/Utils.java Thu Feb 3 20:03:00 2011
@@ -97,6 +97,27 @@ public class Utils {
}
}
+ /**
+ * A helper function for retrieving the script schema set by the LOLoad
+ * function.
+ *
+ * @param loadFuncSignature
+ * @param conf
+ * @return Schema
+ * @throws IOException
+ */
+ public static Schema getScriptSchema(String loadFuncSignature,
+ Configuration conf) throws IOException {
+ Schema scriptSchema = null;
+ String scriptField = conf.get(loadFuncSignature + ".scriptSchema");
+
+ if (scriptField != null) {
+ scriptSchema = (Schema) ObjectSerializer.deserialize(scriptField);
+ }
+
+ return scriptSchema;
+ }
+
public static ResourceSchema getSchema(LoadFunc wrappedLoadFunc, String location, boolean checkExistence, Job job)
throws IOException {
Configuration conf = job.getConfiguration();
Modified: pig/trunk/src/org/apache/pig/newplan/logical/relational/LOLoad.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/relational/LOLoad.java?rev=1066933&r1=1066932&r2=1066933&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/relational/LOLoad.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/relational/LOLoad.java Thu Feb 3 20:03:00 2011
@@ -25,12 +25,14 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.mapreduce.Job;
import org.apache.pig.LoadFunc;
import org.apache.pig.LoadMetadata;
+import org.apache.pig.PigException;
import org.apache.pig.ResourceSchema;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.FileSpec;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.util.ObjectSerializer;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.PlanVisitor;
import org.apache.pig.newplan.logical.Util;
@@ -154,6 +156,53 @@ public class LOLoad extends LogicalRelat
return null;
}
+ @Override
+ public void setAlias(String alias) {
+ super.setAlias(alias);
+
+ // set the schema in this method using the new alias assigned
+ storeScriptSchema();
+ }
+
+ /**
+ * This method will store the scriptSchema:Schema using ObjectSerializer to
+ * the current configuration.<br/>
+ * The schema can be retrieved by load functions or UDFs to know the schema
+ * the user entered in the as clause.<br/>
+ * The name format is:<br/>
+ *
+ * <pre>
+ * ${UDFSignature}.scriptSchema = ObjectSerializer.serialize(scriptSchema)
+ * </pre>
+ * <p/>
+ * Note that this is not the schema the load functiona returns but will
+ * always be the as clause schema.<br/>
+ * That is a = LOAD 'input' as (a:chararray, b:chararray)<br/>
+ * The schema wil lbe (a:chararray, b:chararray)<br/>
+ * <p/>
+ *
+ * TODO Find better solution to make script schema available to LoadFunc see
+ * https://issues.apache.org/jira/browse/PIG-1717
+ */
+ private void storeScriptSchema() {
+ String alias = getAlias();
+ if (!(conf == null || alias == null || scriptSchema == null)) {
+
+ try {
+
+ conf.set(alias + ".scriptSchema", ObjectSerializer
+ .serialize(Util.translateSchema(scriptSchema)));
+
+ } catch (IOException ioe) {
+ int errCode = 1018;
+ String msg = "Problem serializing script schema";
+ FrontendException fee = new FrontendException(msg, errCode,
+ PigException.INPUT, false, null, ioe);
+ throw new RuntimeException(fee);
+ }
+ }
+ }
+
public FileSpec getFileSpec() {
return fs;
}
Added: pig/trunk/test/org/apache/pig/test/TestLOLoadDeterminedSchema.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestLOLoadDeterminedSchema.java?rev=1066933&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestLOLoadDeterminedSchema.java (added)
+++ pig/trunk/test/org/apache/pig/test/TestLOLoadDeterminedSchema.java Thu Feb 3 20:03:00 2011
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.test;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.pig.ExecType;
+import org.apache.pig.FuncSpec;
+import org.apache.pig.PigServer;
+import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.io.FileLocalizer;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.test.utils.ScriptSchemaTestLoader;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ *
+ * Tests that the LOLoad class sets the script schema correctly as expected.<br/>
+ * This way of passing the script schema is not an optimal solution but will be
+ * used currently inorder not to break code by adding new methods the to
+ * LoadFunc or other Classes. For more information please see
+ * https://issues.apache.org/jira/browse/PIG-1717
+ *
+ */
+@RunWith(JUnit4.class)
+public class TestLOLoadDeterminedSchema extends junit.framework.TestCase {
+
+ PigContext pc;
+ PigServer server;
+
+ File baseDir;
+ File inputFile;
+
+ /**
+ * Loads a test file using ScriptSchemaTestLoader with a user defined schema
+ * a,b,c.<br/>
+ * Then tests the the ScriptSchemaTestLoader found the schema.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testDeterminedSchema() throws IOException {
+
+ FuncSpec funcSpec = new FuncSpec(ScriptSchemaTestLoader.class.getName()
+ + "()");
+
+ server.registerFunction(ScriptSchemaTestLoader.class.getName(),
+ funcSpec);
+
+ server.registerQuery("a = LOAD '" + inputFile.getAbsolutePath()
+ + "' using " + ScriptSchemaTestLoader.class.getName()
+ + "() as (a, b, c) ;");
+
+ server.openIterator("a");
+
+ Schema scriptSchema = ScriptSchemaTestLoader.getScriptSchema();
+
+ assertNotNull(scriptSchema);
+ assertEquals(3, scriptSchema.size());
+
+ assertNotNull(scriptSchema.getField("a"));
+ assertNotNull(scriptSchema.getField("b"));
+ assertNotNull(scriptSchema.getField("c"));
+
+ }
+
+ @Override
+ @Before
+ public void setUp() throws Exception {
+ FileLocalizer.deleteTempFiles();
+ server = new PigServer(ExecType.LOCAL, new Properties());
+
+ baseDir = new File("build/testLoLoadDeterminedSchema");
+
+ if (baseDir.exists()) {
+ FileUtil.fullyDelete(baseDir);
+ }
+
+ assertTrue(baseDir.mkdirs());
+
+ inputFile = new File(baseDir, "testInput.txt");
+ inputFile.createNewFile();
+
+ // write a short input
+ FileWriter writer = new FileWriter(inputFile);
+ try {
+ writer.write("a\tb\tc");
+ } finally {
+ writer.close();
+ }
+
+ }
+
+ @Override
+ @After
+ public void tearDown() throws Exception {
+ if (baseDir.exists())
+ FileUtil.fullyDelete(baseDir);
+
+ server.shutdown();
+ }
+
+}
Added: pig/trunk/test/org/apache/pig/test/utils/ScriptSchemaTestLoader.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/utils/ScriptSchemaTestLoader.java?rev=1066933&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/utils/ScriptSchemaTestLoader.java (added)
+++ pig/trunk/test/org/apache/pig/test/utils/ScriptSchemaTestLoader.java Thu Feb 3 20:03:00 2011
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.test.utils;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.util.Utils;
+import org.apache.pig.test.PigStorageWithSchema;
+
+/**
+ *
+ * Used to test that the LOLoad operator is setting the script schema correctly.
+ *
+ */
+public class ScriptSchemaTestLoader extends PigStorageWithSchema {
+
+ // this method requires to be static because pig will instantiate the
+ // instance
+ // and the unit test will have no other way of directly reaching this
+ // variable
+ static volatile Schema scriptSchema;
+
+ @Override
+ public ResourceSchema getSchema(String location, Job job)
+ throws IOException {
+
+ scriptSchema = Utils.getScriptSchema(getUDFContextSignature(),
+ job.getConfiguration());
+
+ return null;
+ }
+
+ public static Schema getScriptSchema() {
+ return scriptSchema;
+ }
+
+}