You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ha...@apache.org on 2010/07/02 08:05:22 UTC

svn commit: r959865 - in /hadoop/pig/trunk/contrib: CHANGES.txt piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java

Author: hashutosh
Date: Fri Jul  2 06:05:22 2010
New Revision: 959865

URL: http://svn.apache.org/viewvc?rev=959865&view=rev
Log:
PIG-1449: RegExLoader hangs on lines that don't match the regular expression

Modified:
    hadoop/pig/trunk/contrib/CHANGES.txt
    hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java
    hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java

Modified: hadoop/pig/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/CHANGES.txt?rev=959865&r1=959864&r2=959865&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/CHANGES.txt Fri Jul  2 06:05:22 2010
@@ -32,6 +32,8 @@ OPTIMIZATIONS
 
 BUG FIXES
 
+PIG-1449 RegExLoader hangs on lines that don't match the regular expression
+(Christian Hargraves via hashutosh)
 
 PIG 0.7.0
 

Modified: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java?rev=959865&r1=959864&r2=959865&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java (original)
+++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java Fri Jul  2 06:05:22 2010
@@ -48,21 +48,13 @@ public abstract class RegExLoader extend
 
   @Override
   public Tuple getNext() throws IOException {
-    if (!in.nextKeyValue()) {
-      return null;
-    }
-    
     Pattern pattern = getPattern();
     Matcher matcher = pattern.matcher("");
     TupleFactory mTupleFactory = DefaultTupleFactory.getInstance();
     String line;
     
-    boolean tryNext = true;
-    while (tryNext) {
-      Text val = in.getCurrentValue();
-      if (val == null) {
-        break;
-      }
+    while (in.nextKeyValue()) {
+	  Text val = in.getCurrentValue();
       line = val.toString();
       if (line.length() > 0 && line.charAt(line.length() - 1) == '\r') {
         line = line.substring(0, line.length() - 1);
@@ -70,14 +62,12 @@ public abstract class RegExLoader extend
       matcher = matcher.reset(line);
       ArrayList<DataByteArray> list = new ArrayList<DataByteArray>();
       if (matcher.find()) {
-        tryNext=false;
         for (int i = 1; i <= matcher.groupCount(); i++) {
           list.add(new DataByteArray(matcher.group(i)));
         }
         return mTupleFactory.newTuple(list);  
       }
     }
-
     return null;
   }
   

Modified: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java?rev=959865&r1=959864&r2=959865&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java (original)
+++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java Fri Jul  2 06:05:22 2010
@@ -31,6 +31,8 @@ import org.junit.Test;
 public class TestRegExLoader extends TestCase {
     private static String patternString = "(\\w+),(\\w+);(\\w+)";
     private final static Pattern pattern = Pattern.compile(patternString);
+    private static String patternString2 = "(3),(three);(iii)";
+    private final static Pattern pattern2 = Pattern.compile(patternString);
 
     public static class DummyRegExLoader extends RegExLoader {
         public DummyRegExLoader() {}
@@ -41,6 +43,15 @@ public class TestRegExLoader extends Tes
         }
     }
 
+    public static class DummyRegExLoader2 extends RegExLoader {
+        public DummyRegExLoader2() {}
+        
+        @Override
+        public Pattern getPattern() {
+            return Pattern.compile(patternString2);
+        }
+    }
+
     public static ArrayList<String[]> data = new ArrayList<String[]>();
     static {
         data.add(new String[] { "1,one;i" });
@@ -71,4 +82,30 @@ public class TestRegExLoader extends Tes
         assertEquals(data.size(), tupleCount);
     }
         
+    @Test
+    public void testOnlyLastMatch() throws Exception {       
+        PigServer pigServer = new PigServer(LOCAL);
+        
+        String filename = TestHelper.createTempFile(data, "");
+
+    	ArrayList<String[]> dataE = new ArrayList<String[]>();
+        dataE.add(new String[] { "3,three;iii" });
+       	ArrayList<DataByteArray[]> expected = TestHelper.getExpected(dataE, pattern2);
+        
+        pigServer.registerQuery("A = LOAD 'file:" + Util.encodeEscape(filename) + 
+                "' USING " + DummyRegExLoader2.class.getName() + "() AS (key, val);");
+        Iterator<?> it = pigServer.openIterator("A");
+        int tupleCount = 0;
+        while (it.hasNext()) {
+            Tuple tuple = (Tuple) it.next();
+            if (tuple == null)
+              break;
+            else {
+              TestHelper.examineTuple(expected, tuple, tupleCount);
+              tupleCount++;
+            }
+          }
+        assertEquals(1, tupleCount);
+    }
+        
 }