You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2009/10/28 00:29:10 UTC

svn commit: r830400 - in /hadoop/pig/branches/load-store-redesign/src/org/apache/pig: builtin/BinStorage.java builtin/PigStorage.java impl/io/BinStorageInputFormat.java impl/io/BinStorageOutputFormat.java impl/io/BinStorageRecordWriter.java

Author: pradeepkth
Date: Tue Oct 27 23:29:09 2009
New Revision: 830400

URL: http://svn.apache.org/viewvc?rev=830400&view=rev
Log:
code to make BinStorage work with new StoreFunc

Added:
    hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageOutputFormat.java
    hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageRecordWriter.java
Modified:
    hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java
    hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java
    hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageInputFormat.java

Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java?rev=830400&r1=830399&r2=830400&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java Tue Oct 27 23:29:09 2009
@@ -30,6 +30,7 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.Job;
@@ -37,6 +38,7 @@
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.pig.LoadCaster;
 import org.apache.pig.PigException;
 import org.apache.pig.PigWarning;
@@ -48,7 +50,9 @@
 import org.apache.pig.data.DataReaderWriter;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.io.BinStorageInputFormat;
+import org.apache.pig.impl.io.BinStorageOutputFormat;
 import org.apache.pig.impl.io.BinStorageRecordReader;
+import org.apache.pig.impl.io.BinStorageRecordWriter;
 import org.apache.pig.impl.util.LogUtils;
 
 public class BinStorage implements ReversibleLoadStoreFunc, LoadCaster {
@@ -61,6 +65,7 @@
     protected long                end            = Long.MAX_VALUE;
     
     private BinStorageRecordReader recReader = null;
+    private BinStorageRecordWriter recWriter = null;
     
     /**
      * Simple binary nested reader format
@@ -76,21 +81,12 @@
         }
     }
 
-    DataOutputStream         out     = null;
-  
-    public void bindTo(OutputStream os) throws IOException {
-        this.out = new DataOutputStream(new BufferedOutputStream(os));
-    }
-
-    public void finish() throws IOException {
-        out.flush();
-    }
-
     public void putNext(Tuple t) throws IOException {
-        out.write(RECORD_1);
-        out.write(RECORD_2);
-        out.write(RECORD_3);
-        t.write(out);
+        try {
+            recWriter.write(null, t);
+        } catch (InterruptedException e) {
+            throw new IOException(e);
+        }
     }
 
     public DataBag bytesToBag(byte[] b){
@@ -371,8 +367,7 @@
      */
     @Override
     public OutputFormat getOutputFormat() {
-        // TODO Auto-generated method stub
-        return null;
+        return new BinStorageOutputFormat();
     }
 
     /* (non-Javadoc)
@@ -380,8 +375,7 @@
      */
     @Override
     public void prepareToWrite(RecordWriter writer) {
-        // TODO Auto-generated method stub
-        
+        this.recWriter = (BinStorageRecordWriter) writer;        
     }
 
     /* (non-Javadoc)
@@ -398,7 +392,6 @@
      */
     @Override
     public void setStoreLocation(String location, Job job) throws IOException {
-        // TODO Auto-generated method stub
-        
+        FileOutputFormat.setOutputPath(job, new Path(location));
     }
 }

Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java?rev=830400&r1=830399&r2=830400&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java Tue Oct 27 23:29:09 2009
@@ -29,6 +29,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
@@ -369,7 +370,7 @@
      */
     @Override
     public OutputFormat getOutputFormat() {
-        return new TextOutputFormat<Text, Text>();
+        return new TextOutputFormat<WritableComparable, Text>();
     }
 
     /* (non-Javadoc)

Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageInputFormat.java?rev=830400&r1=830399&r2=830400&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageInputFormat.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageInputFormat.java Tue Oct 27 23:29:09 2009
@@ -1,5 +1,19 @@
-/**
- * 
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 package org.apache.pig.impl.io;
 

Added: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageOutputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageOutputFormat.java?rev=830400&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageOutputFormat.java (added)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageOutputFormat.java Tue Oct 27 23:29:09 2009
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.impl.io;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.pig.data.Tuple;
+
+/**
+ *
+ */
+public class BinStorageOutputFormat extends
+        FileOutputFormat<org.apache.hadoop.io.WritableComparable, Tuple> {
+
+    /* (non-Javadoc)
+     * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext)
+     */
+    @Override
+    public RecordWriter<WritableComparable, Tuple> getRecordWriter(
+            TaskAttemptContext job) throws IOException, InterruptedException {
+        Configuration conf = job.getConfiguration();
+        Path file = getDefaultWorkFile(job, "");
+        FileSystem fs = file.getFileSystem(conf);
+        FSDataOutputStream fileOut = fs.create(file, false);
+        return new BinStorageRecordWriter(fileOut);
+    }
+}

Added: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageRecordWriter.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageRecordWriter.java?rev=830400&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageRecordWriter.java (added)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageRecordWriter.java Tue Oct 27 23:29:09 2009
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.impl.io;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.pig.data.Tuple;
+
+/**
+ *
+ */
+public class BinStorageRecordWriter extends
+        RecordWriter<org.apache.hadoop.io.WritableComparable, Tuple> {
+
+    public static final int RECORD_1 = 0x01;
+    public static final int RECORD_2 = 0x02;
+    public static final int RECORD_3 = 0x03;
+
+    /**
+     * the outputstream to write out on
+     */
+    private DataOutputStream out;
+    
+    /**
+     * 
+     */
+    public BinStorageRecordWriter(DataOutputStream out) {
+        this.out = out;
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext)
+     */
+    @Override
+    public void close(TaskAttemptContext arg0) throws IOException,
+            InterruptedException {
+        out.close();        
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object)
+     */
+    @Override
+    public void write(WritableComparable wc, Tuple t) throws IOException,
+            InterruptedException {
+        // we really only want to write the tuple (value) out here
+        out.write(RECORD_1);
+        out.write(RECORD_2);
+        out.write(RECORD_3);
+        t.write(out);
+        
+    }
+
+}