You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2010/03/26 00:55:58 UTC
svn commit: r927645 - in /hadoop/pig/branches/branch-0.7: CHANGES.txt
src/org/apache/pig/impl/logicalLayer/LOLoad.java
test/org/apache/pig/test/TestLogicalOptimizer.java
Author: pradeepkth
Date: Thu Mar 25 23:55:57 2010
New Revision: 927645
URL: http://svn.apache.org/viewvc?rev=927645&view=rev
Log:
PIG-1317: LOLoad should cache results of LoadMetadata.getSchema() for use in subsequent calls to LOLoad.getSchema() or LOLoad.determineSchema() (pradeepkth)
Modified:
hadoop/pig/branches/branch-0.7/CHANGES.txt
hadoop/pig/branches/branch-0.7/src/org/apache/pig/impl/logicalLayer/LOLoad.java
hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestLogicalOptimizer.java
Modified: hadoop/pig/branches/branch-0.7/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.7/CHANGES.txt?rev=927645&r1=927644&r2=927645&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.7/CHANGES.txt (original)
+++ hadoop/pig/branches/branch-0.7/CHANGES.txt Thu Mar 25 23:55:57 2010
@@ -68,6 +68,10 @@ manner (rding via pradeepkth)
IMPROVEMENTS
+PIG-1317: LOLoad should cache results of LoadMetadata.getSchema() for use in
+subsequent calls to LOLoad.getSchema() or LOLoad.determineSchema()
+(pradeepkth)
+
PIG-1320: documentation updates for Pig 0.7.0 (chandec via olgan)
PIG-1325: Provide a way to exclude a testcase when running "ant test"
Modified: hadoop/pig/branches/branch-0.7/src/org/apache/pig/impl/logicalLayer/LOLoad.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.7/src/org/apache/pig/impl/logicalLayer/LOLoad.java?rev=927645&r1=927644&r2=927645&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.7/src/org/apache/pig/impl/logicalLayer/LOLoad.java (original)
+++ hadoop/pig/branches/branch-0.7/src/org/apache/pig/impl/logicalLayer/LOLoad.java Thu Mar 25 23:55:57 2010
@@ -60,6 +60,8 @@ public class LOLoad extends RelationalOp
private static Log log = LogFactory.getLog(LOLoad.class);
private Schema mDeterminedSchema = null;
private RequiredFieldList requiredFieldList;
+
+ private boolean mDeterminedSchemaCached = false;
/**
* @param plan
@@ -146,7 +148,6 @@ public class LOLoad extends RelationalOp
if(null == mDeterminedSchema) {
mSchema = determineSchema();
- mDeterminedSchema = mSchema;
}
mIsSchemaComputed = true;
} catch (IOException ioe) {
@@ -162,13 +163,18 @@ public class LOLoad extends RelationalOp
}
private Schema determineSchema() throws IOException {
- if(LoadMetadata.class.isAssignableFrom(mLoadFunc.getClass())) {
- LoadMetadata loadMetadata = (LoadMetadata)mLoadFunc;
- ResourceSchema rSchema = loadMetadata.getSchema(
- mInputFileSpec.getFileName(), new Job(conf));
- return Schema.getPigSchema(rSchema);
+ if(!mDeterminedSchemaCached) {
+ if(LoadMetadata.class.isAssignableFrom(mLoadFunc.getClass())) {
+ LoadMetadata loadMetadata = (LoadMetadata)mLoadFunc;
+ ResourceSchema rSchema = loadMetadata.getSchema(
+ mInputFileSpec.getFileName(), new Job(conf));
+ mDeterminedSchema = Schema.getPigSchema(rSchema);
+ }
+ // set the flag so that future calls just use mDeterminedSchema
+ mDeterminedSchemaCached = true;
+ return mDeterminedSchema;
} else {
- return null;
+ return mDeterminedSchema;
}
}
/* (non-Javadoc)
Modified: hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestLogicalOptimizer.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestLogicalOptimizer.java?rev=927645&r1=927644&r2=927645&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestLogicalOptimizer.java (original)
+++ hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestLogicalOptimizer.java Thu Mar 25 23:55:57 2010
@@ -17,15 +17,27 @@
*/
package org.apache.pig.test;
+import java.io.File;
import java.io.FileInputStream;
+import java.io.IOException;
+import org.apache.hadoop.mapreduce.Job;
import org.apache.pig.ExecType;
+import org.apache.pig.Expression;
+import org.apache.pig.LoadMetadata;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.ResourceStatistics;
+import org.apache.pig.builtin.BinStorage;
+import org.apache.pig.impl.logicalLayer.LOPrinter;
import org.apache.pig.impl.logicalLayer.LogicalPlan;
+import org.apache.pig.impl.logicalLayer.PlanSetter;
import org.apache.pig.impl.logicalLayer.optimizer.ImplicitSplitInserter;
import org.apache.pig.impl.logicalLayer.optimizer.LogicalOptimizer;
import org.apache.pig.impl.logicalLayer.optimizer.OpLimitOptimizer;
import org.apache.pig.impl.logicalLayer.optimizer.TypeCastInserter;
+import org.apache.pig.impl.logicalLayer.parser.ParseException;
import org.apache.pig.impl.plan.optimizer.OptimizerException;
+import org.apache.pig.impl.util.Utils;
import org.apache.pig.test.utils.LogicalPlanTester;
import org.junit.Test;
@@ -255,6 +267,37 @@ public class TestLogicalOptimizer extend
optimizePlan(plan);
}
+ /**
+ * test to check that {@link LoadMetadata#getSchema(String, Job)} is called
+ * only once even if the optimizer is fired and schemas and projection maps
+ * are rebuilt
+ */
+ @Test
+ public void testLoadGetSchemaCalledOnce() throws Exception {
+ String checkFileName = "checkLoadGetSchemaCalledOnce.txt";
+ new File(checkFileName).delete();
+ try{
+
+ planTester.buildPlan("A = load 'myfile' using "
+ + DummyMetadataLoader.class.getName() + "('"+ checkFileName +"');");
+ planTester.buildPlan("B = foreach A generate $0 ;");
+ LogicalPlan plan = planTester.buildPlan("C = limit B 10;");
+ new LOPrinter(System.err, plan).visit();
+ // Set the logical plan values correctly in all the operators
+ PlanSetter ps = new PlanSetter(plan);
+ ps.visit();
+ // the optimizer should run atleast one iteration
+ LogicalOptimizerDerivative optimizer =
+ new LogicalOptimizerDerivative(plan);
+ int numIterations = optimizer.optimize();
+ assertTrue(numIterations > 0);
+ assertTrue(new File(checkFileName).exists());
+ } finally {
+ new File(checkFileName).delete();
+ }
+
+ }
+
// a subclass of LogicalOptimizer which can return the maximum iterations
// the optimizer would try the check() and transform() methods
static class LogicalOptimizerDerivative extends LogicalOptimizer {
@@ -266,5 +309,59 @@ public class TestLogicalOptimizer extend
return mMaxIterations;
}
}
+
+ /**
+ * A dummy loader which extends {@link LoadMetadata} and in the
+ * {@link LoadMetadata#getSchema(String, Job)} implementation checks that
+ * the method is only called once.
+ */
+ public static class DummyMetadataLoader extends BinStorage implements LoadMetadata {
+
+ String checkFileName;
+
+ public DummyMetadataLoader() {
+
+ }
+
+ public DummyMetadataLoader(String checkFileName) {
+ this.checkFileName = checkFileName;
+ }
+
+ @Override
+ public String[] getPartitionKeys(String location, Job job)
+ throws IOException {
+ return null;
+ }
+
+ @Override
+ public ResourceSchema getSchema(String location, Job job)
+ throws IOException {
+ try {
+
+ // the create() below will fail is this method gets called
+ // more than once
+ if(!new File(checkFileName).createNewFile()) {
+ throw new RuntimeException(checkFileName + " already exists!");
+ }
+ return new ResourceSchema(
+ Utils.getSchemaFromString("a:chararray,b:int"));
+ } catch (ParseException e) {
+ throw new IOException(e);
+ }
+ }
+
+ @Override
+ public ResourceStatistics getStatistics(String location, Job job)
+ throws IOException {
+ return null;
+ }
+
+ @Override
+ public void setPartitionFilter(Expression partitionFilter)
+ throws IOException {
+
+ }
+
+ }
}