You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2009/10/28 00:29:10 UTC
svn commit: r830400 - in
/hadoop/pig/branches/load-store-redesign/src/org/apache/pig:
builtin/BinStorage.java builtin/PigStorage.java
impl/io/BinStorageInputFormat.java impl/io/BinStorageOutputFormat.java
impl/io/BinStorageRecordWriter.java
Author: pradeepkth
Date: Tue Oct 27 23:29:09 2009
New Revision: 830400
URL: http://svn.apache.org/viewvc?rev=830400&view=rev
Log:
code to make BinStorage work with new StoreFunc
Added:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageOutputFormat.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageRecordWriter.java
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageInputFormat.java
Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java?rev=830400&r1=830399&r2=830400&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java Tue Oct 27 23:29:09 2009
@@ -30,6 +30,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
@@ -37,6 +38,7 @@
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.pig.LoadCaster;
import org.apache.pig.PigException;
import org.apache.pig.PigWarning;
@@ -48,7 +50,9 @@
import org.apache.pig.data.DataReaderWriter;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.BinStorageInputFormat;
+import org.apache.pig.impl.io.BinStorageOutputFormat;
import org.apache.pig.impl.io.BinStorageRecordReader;
+import org.apache.pig.impl.io.BinStorageRecordWriter;
import org.apache.pig.impl.util.LogUtils;
public class BinStorage implements ReversibleLoadStoreFunc, LoadCaster {
@@ -61,6 +65,7 @@
protected long end = Long.MAX_VALUE;
private BinStorageRecordReader recReader = null;
+ private BinStorageRecordWriter recWriter = null;
/**
* Simple binary nested reader format
@@ -76,21 +81,12 @@
}
}
- DataOutputStream out = null;
-
- public void bindTo(OutputStream os) throws IOException {
- this.out = new DataOutputStream(new BufferedOutputStream(os));
- }
-
- public void finish() throws IOException {
- out.flush();
- }
-
public void putNext(Tuple t) throws IOException {
- out.write(RECORD_1);
- out.write(RECORD_2);
- out.write(RECORD_3);
- t.write(out);
+ try {
+ recWriter.write(null, t);
+ } catch (InterruptedException e) {
+ throw new IOException(e);
+ }
}
public DataBag bytesToBag(byte[] b){
@@ -371,8 +367,7 @@
*/
@Override
public OutputFormat getOutputFormat() {
- // TODO Auto-generated method stub
- return null;
+ return new BinStorageOutputFormat();
}
/* (non-Javadoc)
@@ -380,8 +375,7 @@
*/
@Override
public void prepareToWrite(RecordWriter writer) {
- // TODO Auto-generated method stub
-
+ this.recWriter = (BinStorageRecordWriter) writer;
}
/* (non-Javadoc)
@@ -398,7 +392,6 @@
*/
@Override
public void setStoreLocation(String location, Job job) throws IOException {
- // TODO Auto-generated method stub
-
+ FileOutputFormat.setOutputPath(job, new Path(location));
}
}
Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java?rev=830400&r1=830399&r2=830400&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java Tue Oct 27 23:29:09 2009
@@ -29,6 +29,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
@@ -369,7 +370,7 @@
*/
@Override
public OutputFormat getOutputFormat() {
- return new TextOutputFormat<Text, Text>();
+ return new TextOutputFormat<WritableComparable, Text>();
}
/* (non-Javadoc)
Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageInputFormat.java?rev=830400&r1=830399&r2=830400&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageInputFormat.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageInputFormat.java Tue Oct 27 23:29:09 2009
@@ -1,5 +1,19 @@
-/**
- *
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*/
package org.apache.pig.impl.io;
Added: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageOutputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageOutputFormat.java?rev=830400&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageOutputFormat.java (added)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageOutputFormat.java Tue Oct 27 23:29:09 2009
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.impl.io;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.pig.data.Tuple;
+
+/**
+ *
+ */
+public class BinStorageOutputFormat extends
+ FileOutputFormat<org.apache.hadoop.io.WritableComparable, Tuple> {
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext)
+ */
+ @Override
+ public RecordWriter<WritableComparable, Tuple> getRecordWriter(
+ TaskAttemptContext job) throws IOException, InterruptedException {
+ Configuration conf = job.getConfiguration();
+ Path file = getDefaultWorkFile(job, "");
+ FileSystem fs = file.getFileSystem(conf);
+ FSDataOutputStream fileOut = fs.create(file, false);
+ return new BinStorageRecordWriter(fileOut);
+ }
+}
Added: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageRecordWriter.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageRecordWriter.java?rev=830400&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageRecordWriter.java (added)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/BinStorageRecordWriter.java Tue Oct 27 23:29:09 2009
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.impl.io;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.pig.data.Tuple;
+
+/**
+ *
+ */
+public class BinStorageRecordWriter extends
+ RecordWriter<org.apache.hadoop.io.WritableComparable, Tuple> {
+
+ public static final int RECORD_1 = 0x01;
+ public static final int RECORD_2 = 0x02;
+ public static final int RECORD_3 = 0x03;
+
+ /**
+ * the outputstream to write out on
+ */
+ private DataOutputStream out;
+
+ /**
+ *
+ */
+ public BinStorageRecordWriter(DataOutputStream out) {
+ this.out = out;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext)
+ */
+ @Override
+ public void close(TaskAttemptContext arg0) throws IOException,
+ InterruptedException {
+ out.close();
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object)
+ */
+ @Override
+ public void write(WritableComparable wc, Tuple t) throws IOException,
+ InterruptedException {
+ // we really only want to write the tuple (value) out here
+ out.write(RECORD_1);
+ out.write(RECORD_2);
+ out.write(RECORD_3);
+ t.write(out);
+
+ }
+
+}