You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by jc...@apache.org on 2012/03/27 04:46:59 UTC

svn commit: r1305715 - in /pig/trunk: ./ contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/ contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/

Author: jcoveney
Date: Tue Mar 27 02:46:59 2012
New Revision: 1305715

URL: http://svn.apache.org/viewvc?rev=1305715&view=rev
Log:
PIG-2540 piggybank trunk AvroStorage can't read schema on s3 in e/r mode

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java
    pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java
    pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1305715&r1=1305714&r2=1305715&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Mar 27 02:46:59 2012
@@ -106,6 +106,8 @@ OPTIMIZATIONS
 
 BUG FIXES
 
+PIG-2540: [piggybank] AvroStorage can't read schema on amazon s3 in elastic mapreduce (rjurney via jcoveney)
+
 PIG-2618: e2e local fails to build
 
 PIG-2608: Typo in PigStorage documentation for source tagging (prkommireddi via daijy)

Modified: pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java
URL: http://svn.apache.org/viewvc/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java?rev=1305715&r1=1305714&r2=1305715&view=diff
==============================================================================
--- pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java (original)
+++ pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java Tue Mar 27 02:46:59 2012
@@ -27,6 +27,7 @@ import java.util.Map.Entry;
 import java.util.Properties;
 import java.util.Set;
 import java.util.HashSet;
+import java.net.URI;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
 import org.apache.avro.file.DataFileStream;
@@ -131,7 +132,7 @@ public class AvroStorage extends FileInp
     @Override
     public void setLocation(String location, Job job) throws IOException {
         HashSet<Path> paths = new HashSet<Path>();
-    	if(AvroStorageUtils.getAllSubDirs(new Path(location), job, paths) && inputAvroSchema == null) {
+        if(AvroStorageUtils.getAllSubDirs(URI.create(location), job, paths) && inputAvroSchema == null) {
             FileInputFormat.setInputPaths(job, paths.toArray(new Path[0]));
             inputAvroSchema = getAvroSchema(location, job);
         }
@@ -139,7 +140,7 @@ public class AvroStorage extends FileInp
 
     protected Schema getAvroSchema(String location, Job job) throws IOException {
         Configuration conf = job.getConfiguration();
-        FileSystem fs = FileSystem.get(conf);
+        FileSystem fs = FileSystem.get(URI.create(location), conf);
         Path path = new Path(location);
         return getAvroSchema(path, fs);
     }

Modified: pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java
URL: http://svn.apache.org/viewvc/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java?rev=1305715&r1=1305714&r2=1305715&view=diff
==============================================================================
--- pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java (original)
+++ pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java Tue Mar 27 02:46:59 2012
@@ -26,6 +26,7 @@ import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Set;
+import java.net.URI;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
 import org.apache.hadoop.conf.Configuration;
@@ -100,7 +101,7 @@ public class AvroStorageUtils {
       Configuration conf = job.getConfiguration();
       FileSystem fs = FileSystem.get(conf);
       HashSet<Path> paths = new  HashSet<Path>();
-      if (getAllSubDirs(new Path(pathString), job, paths))
+      if (getAllSubDirs(URI.create(pathString), job, paths))
       {
         paths.addAll(Arrays.asList(FileInputFormat.getInputPaths(job)));
         FileInputFormat.setInputPaths(job, paths.toArray(new Path[0]));
@@ -115,20 +116,22 @@ public class AvroStorageUtils {
      * 
      * @throws IOException
      */
-     static boolean getAllSubDirs(Path path, Job job, Set<Path> paths) throws IOException {
-  		FileSystem fs = FileSystem.get(job.getConfiguration());
+    static boolean getAllSubDirs(URI location, Job job, Set<Path> paths) throws IOException {
+        FileSystem fs = FileSystem.get(location, job.getConfiguration());
+        Path path = new Path(location.getPath());
   		if (PATH_FILTER.accept(path)) {
   			try {
   				FileStatus file = fs.getFileStatus(path);
   				if (file.isDir()) {
   					for (FileStatus sub : fs.listStatus(path)) {
-  						getAllSubDirs(sub.getPath(), job, paths);
+                        getAllSubDirs(sub.getPath().toUri(), job, paths);
   					}
   				} else {
   					AvroStorageLog.details("Add input file:" + file);
   					paths.add(file.getPath());
   				}
   			} catch (FileNotFoundException e) {
+                AvroStorageLog.details("getAllSubDirs: RETURN FALSE; Input path does not exist: " + path);
   				AvroStorageLog.details("Input path does not exist: " + path);
   				return false;
   			}

Modified: pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java
URL: http://svn.apache.org/viewvc/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java?rev=1305715&r1=1305714&r2=1305715&view=diff
==============================================================================
--- pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java (original)
+++ pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java Tue Mar 27 02:46:59 2012
@@ -61,7 +61,7 @@ public class TestAvroStorage {
       };
 
     private static String getInputFile(String file) {
-        return "file:///" + System.getProperty("user.dir") + "/" + basedir + file;
+        return "file://" + System.getProperty("user.dir") + "/" + basedir + file;
     }
 
     final private String testArrayFile = getInputFile("test_array.avro");