You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2012/07/27 20:47:06 UTC

svn commit: r1366493 - in /pig/branches/branch-0.10: ./ ivy/ shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/ shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/ shims/test/hadoop20/org/apache/pig/test/ shims/...

Author: daijy
Date: Fri Jul 27 18:47:05 2012
New Revision: 1366493

URL: http://svn.apache.org/viewvc?rev=1366493&view=rev
Log:
PIG-2791: Pig does not work with Namenode Federation

Modified:
    pig/branches/branch-0.10/CHANGES.txt
    pig/branches/branch-0.10/build.xml
    pig/branches/branch-0.10/ivy.xml
    pig/branches/branch-0.10/ivy/libraries.properties
    pig/branches/branch-0.10/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java
    pig/branches/branch-0.10/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java
    pig/branches/branch-0.10/shims/test/hadoop20/org/apache/pig/test/MiniCluster.java
    pig/branches/branch-0.10/shims/test/hadoop23/org/apache/pig/test/MiniCluster.java
    pig/branches/branch-0.10/src/org/apache/pig/backend/datastorage/DataStorage.java
    pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/datastorage/HDataStorage.java
    pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
    pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
    pig/branches/branch-0.10/test/org/apache/pig/test/TestJobSubmission.java
    pig/branches/branch-0.10/test/org/apache/pig/test/TestPigRunner.java
    pig/branches/branch-0.10/test/org/apache/pig/test/TestScriptUDF.java
    pig/branches/branch-0.10/test/org/apache/pig/test/Util.java

Modified: pig/branches/branch-0.10/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/CHANGES.txt?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/CHANGES.txt (original)
+++ pig/branches/branch-0.10/CHANGES.txt Fri Jul 27 18:47:05 2012
@@ -28,6 +28,8 @@ PIG-2727: PigStorage Source tagging does
 
 BUG FIXES
 
+PIG-2791: Pig does not work with Namenode Federation (rohini via daijy)
+
 PIG-2783: Fix Iterator_1 e2e test for Hadoop 23 (rohini via daijy)
 
 PIG-2761: With hadoop23 importing modules inside python script does not work (rohini via daijy)

Modified: pig/branches/branch-0.10/build.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/build.xml?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/build.xml (original)
+++ pig/branches/branch-0.10/build.xml Fri Jul 27 18:47:05 2012
@@ -420,7 +420,11 @@
     	
     	<copy file="${basedir}/test/hbase-site.xml" tofile="${test.build.classes}/hbase-site.xml"/>
    
-        <ivy:cachepath pathid="mr-apps-test.classpath" />
+        <ivy:cachepath pathid="mr-apps-test-ivy.classpath" />
+        <path id="mr-apps-test.classpath">
+            <pathelement path="${clover.jar}"/>
+            <path refid="mr-apps-test-ivy.classpath"/>
+        </path>
         <property name="mr-apps-classpath" refid="mr-apps-test.classpath" />
         <echo file="${test.build.classes}/mrapp-generated-classpath" message="${mr-apps-classpath}" />
     </target>

Modified: pig/branches/branch-0.10/ivy.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/ivy.xml?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/ivy.xml (original)
+++ pig/branches/branch-0.10/ivy.xml Fri Jul 27 18:47:05 2012
@@ -66,6 +66,8 @@
       conf="hadoop23->master"/>
     <dependency org="commons-el" name="commons-el" rev="${commons-el.version}"
       conf="compile->master"/>
+    <dependency org="commons-io" name="commons-io" rev="${commons-io.version}"
+      conf="compile->master"/>
     <dependency org="commons-httpclient" name="commons-httpclient" rev="${commons-httpclient.version}"
       conf="hadoop23->master"/>
     <dependency org="commons-configuration" name="commons-configuration" rev="${commons-configuration.version}"
@@ -82,6 +84,12 @@
       conf="hadoop23->master"/>
     <dependency org="org.mortbay.jetty" name="jetty-util" rev="${jetty-util.version}"
       conf="hadoop23->master"/>
+    <dependency org="javax.inject" name="javax.inject" rev="${javax-inject.version}"
+      conf="hadoop23->master"/>
+    <dependency org="javax.xml.bind" name="jaxb-api" rev="${jaxb-api.version}"
+      conf="hadoop23->master"/>
+    <dependency org="com.sun.xml.bind" name="jaxb-impl" rev="${jaxb-impl.version}"
+      conf="hadoop23->master"/> 
     <dependency org="com.google.inject" name="guice" rev="${guice.version}"
       conf="hadoop23->master"/>
     <dependency org="com.google.inject.extensions" name="guice-servlet" rev="${guice-servlet.version}"

Modified: pig/branches/branch-0.10/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/ivy/libraries.properties?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/ivy/libraries.properties (original)
+++ pig/branches/branch-0.10/ivy/libraries.properties Fri Jul 27 18:47:05 2012
@@ -19,6 +19,7 @@ avro.version=1.5.3
 commons-beanutils.version=1.7.0
 commons-cli.version=1.0
 commons-codec.version=1.4
+commons-io.version=2.3
 commons-el.version=1.0
 commons-logging.version=1.1.1
 commons-lang.version=2.4
@@ -35,14 +36,17 @@ guava.version=11.0
 jersey-core.version=1.8
 hadoop-core.version=1.0.0
 hadoop-test.version=1.0.0
-hadoop-common.version=0.23.1
-hadoop-hdfs.version=0.23.1
-hadoop-mapreduce.version=0.23.1
+hadoop-common.version=2.0.0-alpha
+hadoop-hdfs.version=2.0.0-alpha
+hadoop-mapreduce.version=2.0.0-alpha
 hbase.version=0.90.0
 hsqldb.version=1.8.0.10
 hive.version=0.8.0
-jackson.version=1.7.3
+jackson.version=1.8.8
 javacc.version=4.2
+javax-inject.version=1
+jaxb-api.version=2.2.2
+jaxb-impl.version=2.2.3-1
 jdeb.version=0.8
 jdiff.version=1.0.9
 jetty.version=6.1.26
@@ -68,7 +72,7 @@ zookeeper.version=3.3.3
 servlet.version=4.0.6
 servlet-api.version=2.5
 protobuf-java.version=2.4.0a
-guice.version=2.0
-guice-servlet.version=2.0
+guice.version=3.0
+guice-servlet.version=3.0
 aopalliance.version=1.0
 jsr311-api.version=1.1.1

Modified: pig/branches/branch-0.10/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java (original)
+++ pig/branches/branch-0.10/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java Fri Jul 27 18:47:05 2012
@@ -20,6 +20,8 @@ package org.apache.pig.backend.hadoop.ex
 import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.jobcontrol.Job;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.JobID;
@@ -86,4 +88,8 @@ public class HadoopShims {
     static public void commitOrCleanup(OutputCommitter oc, JobContext jc) throws IOException {
         oc.cleanupJob(jc);
     }
+    
+    public static long getDefaultBlockSize(FileSystem fs, Path path) {
+        return fs.getDefaultBlockSize();
+    }
 }

Modified: pig/branches/branch-0.10/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java (original)
+++ pig/branches/branch-0.10/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java Fri Jul 27 18:47:05 2012
@@ -22,6 +22,8 @@ import java.lang.reflect.Constructor;
 import java.lang.reflect.Method;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.jobcontrol.Job;
 import org.apache.hadoop.mapreduce.ContextFactory;
 import org.apache.hadoop.mapreduce.JobContext;
@@ -86,4 +88,8 @@ public class HadoopShims {
     static public void commitOrCleanup(OutputCommitter oc, JobContext jc) throws IOException {
         oc.commitJob(jc);
     }
+    
+    public static long getDefaultBlockSize(FileSystem fs, Path path) {
+        return fs.getDefaultBlockSize(path);
+    }
 }

Modified: pig/branches/branch-0.10/shims/test/hadoop20/org/apache/pig/test/MiniCluster.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/shims/test/hadoop20/org/apache/pig/test/MiniCluster.java?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/shims/test/hadoop20/org/apache/pig/test/MiniCluster.java (original)
+++ pig/branches/branch-0.10/shims/test/hadoop20/org/apache/pig/test/MiniCluster.java Fri Jul 27 18:47:05 2012
@@ -41,9 +41,7 @@ public class MiniCluster extends MiniGen
             File conf_dir = new File(System.getProperty("user.home"), "pigtest/conf/");
             conf_dir.mkdirs();
             File conf_file = new File(conf_dir, "hadoop-site.xml");
-            
-            conf_file.delete();
-            
+
             // Builds and starts the mini dfs and mapreduce clusters
             Configuration config = new Configuration();
             m_dfs = new MiniDFSCluster(config, dataNodes, true, null);

Modified: pig/branches/branch-0.10/shims/test/hadoop23/org/apache/pig/test/MiniCluster.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/shims/test/hadoop23/org/apache/pig/test/MiniCluster.java?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/shims/test/hadoop23/org/apache/pig/test/MiniCluster.java (original)
+++ pig/branches/branch-0.10/shims/test/hadoop23/org/apache/pig/test/MiniCluster.java Fri Jul 27 18:47:05 2012
@@ -60,15 +60,16 @@ public class MiniCluster extends MiniGen
             File conf_dir = new File(System.getProperty("user.home"), "pigtest/conf/");
             conf_dir.mkdirs();
             File conf_file = new File(conf_dir, "hadoop-site.xml");
-            
-            conf_file.delete();
-            
+   
             // Builds and starts the mini dfs and mapreduce clusters
             Configuration config = new Configuration();
             m_dfs = new MiniDFSCluster(config, dataNodes, true, null);
             m_fileSys = m_dfs.getFileSystem();
             m_dfs_conf = m_dfs.getConfiguration(0);
-            
+
+            //Create user home directory
+            m_fileSys.mkdirs(m_fileSys.getWorkingDirectory());
+
             m_mr = new MiniMRYarnCluster("PigMiniCluster", taskTrackers);
             m_mr.init(m_dfs_conf);
             //m_mr.init(m_dfs_conf);

Modified: pig/branches/branch-0.10/src/org/apache/pig/backend/datastorage/DataStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/src/org/apache/pig/backend/datastorage/DataStorage.java?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/src/org/apache/pig/backend/datastorage/DataStorage.java (original)
+++ pig/branches/branch-0.10/src/org/apache/pig/backend/datastorage/DataStorage.java Fri Jul 27 18:47:05 2012
@@ -30,7 +30,6 @@ import java.io.IOException;
 
 public interface DataStorage {
         
-        public static final String DEFAULT_REPLICATION_FACTOR_KEY = "pig.default.replication.factor";
         public static final String USED_BYTES_KEY = "pig.used.bytes";
         public static final String RAW_CAPACITY_KEY = "pig.raw.capacity.bytes";    // replication is disregarded
         public static final String RAW_USED_KEY = "pig.raw.used.capacity.bytes";   // replication is disregarded

Modified: pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/datastorage/HDataStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/datastorage/HDataStorage.java?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/datastorage/HDataStorage.java (original)
+++ pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/datastorage/HDataStorage.java Fri Jul 27 18:47:05 2012
@@ -74,9 +74,6 @@ public class HDataStorage implements Dat
         } catch (IOException e) {
             throw new RuntimeException("Failed to create DataStorage", e);
         }
-        short defaultReplication = fs.getDefaultReplication();
-        properties.setProperty(DEFAULT_REPLICATION_FACTOR_KEY, 
-                               Short.valueOf(defaultReplication).toString());
     }
 
     public void close() throws IOException {

Modified: pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java (original)
+++ pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java Fri Jul 27 18:47:05 2012
@@ -1271,8 +1271,9 @@ public class MRCompiler extends PhyPlanV
                             InputFormat inf = loader.getInputFormat();
                             List<InputSplit> splits = inf.getSplits(HadoopShims.cloneJobContext(job));
                             List<List<InputSplit>> results = MapRedUtil
-                            .getCombinePigSplits(splits, fs
-                                    .getDefaultBlockSize(), conf);
+                            .getCombinePigSplits(splits,
+                                    HadoopShims.getDefaultBlockSize(fs, path),
+                                    conf);
                             numFiles += results.size();
                         } else {
                             List<MapReduceOper> preds = MRPlan.getPredecessors(mro);

Modified: pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java (original)
+++ pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java Fri Jul 27 18:47:05 2012
@@ -225,7 +225,7 @@ public class PigInputFormat extends Inpu
                 Path path = new Path(inputs.get(i).getFileName());
                                 
                 FileSystem fs;
-                
+                boolean isFsPath = true;
                 try {
                     fs = path.getFileSystem(conf);
                 } catch (Exception e) {
@@ -235,6 +235,7 @@ public class PigInputFormat extends Inpu
                     // getting the file system. That's
                     // ok, we just use the dfs in that case.
                     fs = new Path("/").getFileSystem(conf);
+                    isFsPath = false;
                 }
 
                 // if the execution is against Mapred DFS, set
@@ -274,7 +275,9 @@ public class PigInputFormat extends Inpu
                         HadoopShims.createJobContext(inputSpecificJob.getConfiguration(), 
                                 jobcontext.getJobID()));
                 List<InputSplit> oneInputPigSplits = getPigSplits(
-                        oneInputSplits, i, inpTargets.get(i), fs.getDefaultBlockSize(), combinable, confClone);
+                        oneInputSplits, i, inpTargets.get(i),
+                        HadoopShims.getDefaultBlockSize(fs, isFsPath? path: fs.getWorkingDirectory()),
+                        combinable, confClone);
                 splits.addAll(oneInputPigSplits);
             } catch (ExecException ee) {
                 throw ee;

Modified: pig/branches/branch-0.10/test/org/apache/pig/test/TestJobSubmission.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/test/org/apache/pig/test/TestJobSubmission.java?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/test/org/apache/pig/test/TestJobSubmission.java (original)
+++ pig/branches/branch-0.10/test/org/apache/pig/test/TestJobSubmission.java Fri Jul 27 18:47:05 2012
@@ -533,7 +533,7 @@ public class TestJobSubmission {
     @Test
     public void testReducerNumEstimation() throws Exception{
         // skip this test for 23 until HBASE-4850
-        if (Util.isHadoop23())
+        if (Util.isHadoop23() || Util.isHadoop2_0())
             return;
         // use the estimation
         Configuration conf = cluster.getConfiguration();

Modified: pig/branches/branch-0.10/test/org/apache/pig/test/TestPigRunner.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/test/org/apache/pig/test/TestPigRunner.java?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/test/org/apache/pig/test/TestPigRunner.java (original)
+++ pig/branches/branch-0.10/test/org/apache/pig/test/TestPigRunner.java Fri Jul 27 18:47:05 2012
@@ -578,7 +578,7 @@ public class TestPigRunner {
     @Test
     public void classLoaderTest() throws Exception {
         // Skip in hadoop 23 test, see PIG-2449
-        if (Util.isHadoop23())
+        if (Util.isHadoop23() || Util.isHadoop2_0())
             return;
         PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE));
         w.println("register test/org/apache/pig/test/data/pigtestloader.jar");

Modified: pig/branches/branch-0.10/test/org/apache/pig/test/TestScriptUDF.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/test/org/apache/pig/test/TestScriptUDF.java?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/test/org/apache/pig/test/TestScriptUDF.java (original)
+++ pig/branches/branch-0.10/test/org/apache/pig/test/TestScriptUDF.java Fri Jul 27 18:47:05 2012
@@ -539,7 +539,7 @@ public class TestScriptUDF{
     @Test
     public void testPythonNestedImport() throws Exception {
         // Skip for hadoop 23 until PIG-2433 fixed
-        if (Util.isHadoop23())
+        if (Util.isHadoop23() || Util.isHadoop2_0())
             return;
         
         String[] scriptA = {

Modified: pig/branches/branch-0.10/test/org/apache/pig/test/Util.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/test/org/apache/pig/test/Util.java?rev=1366493&r1=1366492&r2=1366493&view=diff
==============================================================================
--- pig/branches/branch-0.10/test/org/apache/pig/test/Util.java (original)
+++ pig/branches/branch-0.10/test/org/apache/pig/test/Util.java Fri Jul 27 18:47:05 2012
@@ -538,6 +538,14 @@ public class Util {
          } 
      }
      
+     static private String getMkDirCommandForHadoop2_0(String fileName) {
+         if (Util.isHadoop23() || Util.isHadoop2_0()) {
+             Path parentDir = new Path(fileName).getParent();
+             String mkdirCommand = parentDir.getName().isEmpty() ? "" : "fs -mkdir -p " + parentDir + "\n";
+             return mkdirCommand;
+         }
+         return "";
+     }
      
     /**
 	 * Utility method to copy a file form local filesystem to the dfs on
@@ -549,7 +557,7 @@ public class Util {
 	 */
 	static public void copyFromLocalToCluster(MiniCluster cluster, String localFileName, String fileNameOnCluster) throws IOException {
         PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
-        String script = "fs -put " + localFileName + " " + fileNameOnCluster;
+        String script = getMkDirCommandForHadoop2_0(fileNameOnCluster) + "fs -put " + localFileName + " " + fileNameOnCluster;
 
 	    GruntParser parser = new GruntParser(new StringReader(script));
         parser.setInteractive(false);
@@ -564,7 +572,7 @@ public class Util {
     static public void copyFromLocalToLocal(String fromLocalFileName,
             String toLocalFileName) throws IOException {
         PigServer ps = new PigServer(ExecType.LOCAL, new Properties());
-        String script = "fs -cp " + fromLocalFileName + " " + toLocalFileName;
+        String script = getMkDirCommandForHadoop2_0(toLocalFileName) + "fs -cp " + fromLocalFileName + " " + toLocalFileName;
 
         new File(toLocalFileName).deleteOnExit();
         
@@ -1169,4 +1177,11 @@ public class Util {
             return true;
         return false;
     }
+
+    public static boolean isHadoop2_0() {
+        String version = org.apache.hadoop.util.VersionInfo.getVersion();
+        if (version.matches("\\b2\\.0\\..+"))
+            return true;
+        return false;
+    }
 }