You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ha...@apache.org on 2010/07/02 08:05:22 UTC
svn commit: r959865 - in /hadoop/pig/trunk/contrib: CHANGES.txt
piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java
piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java
Author: hashutosh
Date: Fri Jul 2 06:05:22 2010
New Revision: 959865
URL: http://svn.apache.org/viewvc?rev=959865&view=rev
Log:
PIG-1449: RegExLoader hangs on lines that don't match the regular expression
Modified:
hadoop/pig/trunk/contrib/CHANGES.txt
hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java
hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java
Modified: hadoop/pig/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/CHANGES.txt?rev=959865&r1=959864&r2=959865&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/CHANGES.txt Fri Jul 2 06:05:22 2010
@@ -32,6 +32,8 @@ OPTIMIZATIONS
BUG FIXES
+PIG-1449 RegExLoader hangs on lines that don't match the regular expression
+(Christian Hargraves via hashutosh)
PIG 0.7.0
Modified: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java?rev=959865&r1=959864&r2=959865&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java (original)
+++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/RegExLoader.java Fri Jul 2 06:05:22 2010
@@ -48,21 +48,13 @@ public abstract class RegExLoader extend
@Override
public Tuple getNext() throws IOException {
- if (!in.nextKeyValue()) {
- return null;
- }
-
Pattern pattern = getPattern();
Matcher matcher = pattern.matcher("");
TupleFactory mTupleFactory = DefaultTupleFactory.getInstance();
String line;
- boolean tryNext = true;
- while (tryNext) {
- Text val = in.getCurrentValue();
- if (val == null) {
- break;
- }
+ while (in.nextKeyValue()) {
+ Text val = in.getCurrentValue();
line = val.toString();
if (line.length() > 0 && line.charAt(line.length() - 1) == '\r') {
line = line.substring(0, line.length() - 1);
@@ -70,14 +62,12 @@ public abstract class RegExLoader extend
matcher = matcher.reset(line);
ArrayList<DataByteArray> list = new ArrayList<DataByteArray>();
if (matcher.find()) {
- tryNext=false;
for (int i = 1; i <= matcher.groupCount(); i++) {
list.add(new DataByteArray(matcher.group(i)));
}
return mTupleFactory.newTuple(list);
}
}
-
return null;
}
Modified: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java?rev=959865&r1=959864&r2=959865&view=diff
==============================================================================
--- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java (original)
+++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestRegExLoader.java Fri Jul 2 06:05:22 2010
@@ -31,6 +31,8 @@ import org.junit.Test;
public class TestRegExLoader extends TestCase {
private static String patternString = "(\\w+),(\\w+);(\\w+)";
private final static Pattern pattern = Pattern.compile(patternString);
+ private static String patternString2 = "(3),(three);(iii)";
+ private final static Pattern pattern2 = Pattern.compile(patternString);
public static class DummyRegExLoader extends RegExLoader {
public DummyRegExLoader() {}
@@ -41,6 +43,15 @@ public class TestRegExLoader extends Tes
}
}
+ public static class DummyRegExLoader2 extends RegExLoader {
+ public DummyRegExLoader2() {}
+
+ @Override
+ public Pattern getPattern() {
+ return Pattern.compile(patternString2);
+ }
+ }
+
public static ArrayList<String[]> data = new ArrayList<String[]>();
static {
data.add(new String[] { "1,one;i" });
@@ -71,4 +82,30 @@ public class TestRegExLoader extends Tes
assertEquals(data.size(), tupleCount);
}
+ @Test
+ public void testOnlyLastMatch() throws Exception {
+ PigServer pigServer = new PigServer(LOCAL);
+
+ String filename = TestHelper.createTempFile(data, "");
+
+ ArrayList<String[]> dataE = new ArrayList<String[]>();
+ dataE.add(new String[] { "3,three;iii" });
+ ArrayList<DataByteArray[]> expected = TestHelper.getExpected(dataE, pattern2);
+
+ pigServer.registerQuery("A = LOAD 'file:" + Util.encodeEscape(filename) +
+ "' USING " + DummyRegExLoader2.class.getName() + "() AS (key, val);");
+ Iterator<?> it = pigServer.openIterator("A");
+ int tupleCount = 0;
+ while (it.hasNext()) {
+ Tuple tuple = (Tuple) it.next();
+ if (tuple == null)
+ break;
+ else {
+ TestHelper.examineTuple(expected, tuple, tupleCount);
+ tupleCount++;
+ }
+ }
+ assertEquals(1, tupleCount);
+ }
+
}