You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ha...@apache.org on 2010/02/13 22:46:50 UTC
svn commit: r909921 - in /hadoop/pig/trunk: CHANGES.txt
src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java
test/org/apache/pig/test/TestJoin.java
Author: hashutosh
Date: Sat Feb 13 21:46:49 2010
New Revision: 909921
URL: http://svn.apache.org/viewvc?rev=909921&view=rev
Log:
PIG-1131: Pig simple join does not work when it contains empty lines
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java
hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=909921&r1=909920&r2=909921&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Sat Feb 13 21:46:49 2010
@@ -93,6 +93,8 @@
BUG FIXES
+PIG-1131: Pig simple join does not work when it contains empty lines (ashutoshc)
+
PIG-834: incorrect plan when algebraic functions are nested (ashutoshc)
PIG-1217: Fix argToFuncMapping in Piggybank Top function (dvryaboy via gates)
Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java?rev=909921&r1=909920&r2=909921&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java Sat Feb 13 21:46:49 2010
@@ -118,9 +118,6 @@
private int mProjectedColsMapSize = 0;
private int mSecondaryProjectedColsMapSize = 0;
- private ArrayList<Integer> minValuePositions;
- private int minValuePositionsSize = 0;
-
private Tuple lrOutput;
private boolean useSecondaryKey = false;
@@ -459,27 +456,14 @@
Tuple minimalValue = null;
if(!mProjectStar) {
- if(minValuePositions == null) {
- // the very first time, we will have to build
- // the "value" tuple piecemeal but we can
- // do better next time round
- minValuePositions = new ArrayList<Integer>();
- minimalValue = mTupleFactory.newTuple();
- // look for individual columns that we are
- // projecting
- for (int i = 0; i < value.size(); i++) {
- if(mProjectedColsMap.get(i) == null) {
- // this column was not found in the "key"
- // so send it in the "value"
- minimalValue.append(value.get(i));
- minValuePositions.add(i);
- }
- }
- minValuePositionsSize = minValuePositions.size();
- } else {
- minimalValue = mTupleFactory.newTuple(minValuePositionsSize);
- for(int i = 0; i < minValuePositionsSize; i++) {
- minimalValue.set(i, value.get(minValuePositions.get(i)));
+ minimalValue = mTupleFactory.newTuple();
+ // look for individual columns that we are
+ // projecting
+ for (int i = 0; i < value.size(); i++) {
+ if(mProjectedColsMap.get(i) == null) {
+ // this column was not found in the "key"
+ // so send it in the "value"
+ minimalValue.append(value.get(i));
}
}
} else {
@@ -487,7 +471,7 @@
// we would send out an empty tuple as
// the "value" since all elements are in the
// "key"
- minimalValue = mTupleFactory.newTuple();
+ minimalValue = mTupleFactory.newTuple(0);
}
lrOutput.set(2, minimalValue);
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java?rev=909921&r1=909920&r2=909921&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Sat Feb 13 21:46:49 2010
@@ -98,6 +98,40 @@
}
@Test
+ public void testJoinWithMissingFieldsInTuples() throws IOException{
+
+ setUp(ExecType.MAPREDUCE);
+ String[] input1 = {
+ "ff ff ff",
+ "",
+ "",
+ "",
+ "",
+ "ff ff ff",
+ "",
+ ""
+ };
+ String[] input2 = {
+ "",
+ "",
+ "",
+ "",
+ ""
+ };
+
+ String firstInput = createInputFile(ExecType.MAPREDUCE, "a.txt", input1);
+ String secondInput = createInputFile(ExecType.MAPREDUCE, "b.txt", input2);
+ String script = "a = load 'a.txt' using PigStorage(' ');" +
+ "b = load 'b.txt' using PigStorage('\u0001');" +
+ "c = join a by $0, b by $0;";
+ Util.registerMultiLineQuery(pigServer, script);
+ Iterator<Tuple> it = pigServer.openIterator("c");
+ assertFalse(it.hasNext());
+ deleteInputFile(ExecType.MAPREDUCE, firstInput);
+ deleteInputFile(ExecType.MAPREDUCE, secondInput);
+ }
+
+ @Test
public void testJoinUnkownSchema() throws Exception {
// If any of the input schema is unknown, the resulting schema should be unknown as well
for (ExecType execType : execTypes) {