You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ol...@apache.org on 2009/11/30 17:09:07 UTC
svn commit: r885473 - in /hadoop/pig/branches/branch-0.6: CHANGES.txt
src/org/apache/pig/impl/io/PigLineRecordReader.java
test/org/apache/pig/test/TestPigLineRecordReader.java
Author: olga
Date: Mon Nov 30 16:09:07 2009
New Revision: 885473
URL: http://svn.apache.org/viewvc?rev=885473&view=rev
Log:
PIG-1107: PigLineRecordReader bails out on an empty line for compressed data (ankit.modi via )
Modified:
hadoop/pig/branches/branch-0.6/CHANGES.txt
hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/io/PigLineRecordReader.java
hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPigLineRecordReader.java
Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/CHANGES.txt?rev=885473&r1=885472&r2=885473&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/CHANGES.txt (original)
+++ hadoop/pig/branches/branch-0.6/CHANGES.txt Mon Nov 30 16:09:07 2009
@@ -122,6 +122,8 @@
BUG FIXES
+PIG-1107: PigLineRecordReader bails out on an empty line for compressed data (ankit.modi via )
+
PIG-1080: PigStorage may miss records when loading a file (rding via olgan)
PIG-1071: Support comma separated file/directory names in load statements
Modified: hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/io/PigLineRecordReader.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/io/PigLineRecordReader.java?rev=885473&r1=885472&r2=885473&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/io/PigLineRecordReader.java (original)
+++ hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/io/PigLineRecordReader.java Mon Nov 30 16:09:07 2009
@@ -133,7 +133,8 @@
if (b == '\n' ) {
byte[] array = mBuf.toByteArray();
- if (array[array.length-1]=='\r' && os==OS_WINDOWS) {
+ if (array.length != 0 && array[array.length-1]=='\r'
+ && os==OS_WINDOWS) {
// Here we dont copy the last '\r' in the Text Value
value.append(array, 0, array.length - 1 );
} else {
Modified: hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPigLineRecordReader.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPigLineRecordReader.java?rev=885473&r1=885472&r2=885473&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPigLineRecordReader.java (original)
+++ hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPigLineRecordReader.java Mon Nov 30 16:09:07 2009
@@ -614,4 +614,97 @@
fail(e.getMessage());
}
}
+
+ /**
+ * This tests check if PigLineRecordReader can read a file which has an empty line
+ */
+ @Test
+ public void testEmptyLineSimpleFile() {
+ try {
+ File testFile = File.createTempFile("testPigLineRecordReader", ".txt");
+ String text = "This is a text";
+
+ PrintStream ps = new PrintStream( testFile );
+ for( int i = 0; i < LOOP_COUNT; i++ ) {
+ ps.println( text );
+ // Add an empty line
+ ps.println("");
+ }
+ ps.close();
+
+ LocalSeekableInputStream is = new LocalSeekableInputStream( testFile );
+ BufferedPositionedInputStream bpis = new BufferedPositionedInputStream( is );
+ PigLineRecordReader reader = new PigLineRecordReader( bpis, 0, Integer.MAX_VALUE );
+
+ Text value = new Text();
+ int counter = 0;
+ while( reader.next(value) ) {
+ if( counter % 2 == 0 ) {
+ assertTrue( "Invalid Text", value.toString().compareTo(text) == 0 );
+ } else {
+ assertTrue( "Invalid Text", value.toString().compareTo("") == 0 );
+ }
+ counter++;
+ }
+ assertEquals("Invalid number of lines", counter, LOOP_COUNT*2 );
+ testFile.deleteOnExit();
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ fail( e.getMessage() );
+ } catch (SecurityException e) {
+ e.printStackTrace();
+ fail( e.getMessage() );
+ } catch (IllegalArgumentException e) {
+ e.printStackTrace();
+ fail(e.getMessage());
+ }
+ }
+
+ /**
+ * This tests check if PigLineRecordReader can read a file which has an empty line
+ */
+ @Test
+ public void testEmptyLineBZFile() {
+ try {
+ File testFile = File.createTempFile("testPigLineRecordReader", ".txt.bz2");
+ String text = "This is a text";
+
+ PrintStream ps = new PrintStream( new CBZip2OutputStream( new FileOutputStream( testFile )) );
+ for( int i = 0; i < LOOP_COUNT; i++ ) {
+ ps.println( text );
+ // Add an empty line
+ ps.println("");
+ }
+ ps.close();
+
+ LocalSeekableInputStream is = new LocalSeekableInputStream( testFile );
+ CBZip2InputStream bzis = new CBZip2InputStream( is );
+ BufferedPositionedInputStream bpis = new BufferedPositionedInputStream( bzis );
+ PigLineRecordReader reader = new PigLineRecordReader( bpis, 0, Integer.MAX_VALUE );
+
+ Text value = new Text();
+ int counter = 0;
+ while( reader.next(value) ) {
+ if( counter % 2 == 0 ) {
+ assertTrue( "Invalid Text", value.toString().compareTo(text) == 0 );
+ } else {
+ assertTrue( "Invalid Text", value.toString().compareTo("") == 0 );
+ }
+ counter++;
+ }
+ assertEquals("Invalid number of lines", counter, LOOP_COUNT*2 );
+ testFile.deleteOnExit();
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ fail( e.getMessage() );
+ } catch (SecurityException e) {
+ e.printStackTrace();
+ fail( e.getMessage() );
+ } catch (IllegalArgumentException e) {
+ e.printStackTrace();
+ fail(e.getMessage());
+ }
+ }
}