You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2008/12/16 22:51:08 UTC

svn commit: r727177 - in /hadoop/hive/trunk: CHANGES.txt build.xml serde/build.xml serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java

Author: zshao
Date: Tue Dec 16 13:51:08 2008
New Revision: 727177

URL: http://svn.apache.org/viewvc?rev=727177&view=rev
Log:
HIVE-180. Data Generator for thrift-serialized sequence files. (zshao)

Added:
    hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/build.xml
    hadoop/hive/trunk/serde/build.xml

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=727177&r1=727176&r2=727177&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Dec 16 13:51:08 2008
@@ -26,6 +26,8 @@
 
   IMPROVEMENTS
 
+    HIVE-180. Data Generator for thrift-serialized sequence files. (zshao)
+
     HIVE-157. Update README.txt to remove refs to mirror.facebook.com. (zshao)
 
     HIVE-95. Improve cli error messages by lowering backtracking to 1.

Modified: hadoop/hive/trunk/build.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/build.xml?rev=727177&r1=727176&r2=727177&view=diff
==============================================================================
--- hadoop/hive/trunk/build.xml (original)
+++ hadoop/hive/trunk/build.xml Tue Dec 16 13:51:08 2008
@@ -114,7 +114,7 @@
   </target>
 
   <!-- ====================================================== -->
-  <!-- Clean all the contribs.                              -->
+  <!-- Clean all the contribs.                                -->
   <!-- ====================================================== -->
   <target name="clean">
     <subant target="clean">
@@ -124,6 +124,15 @@
     <delete dir="${build.dir.hive}"/>
   </target>
 
+  <!-- ====================================================== -->
+  <!-- Generate some of the test data.                        -->
+  <!-- ====================================================== -->
+  <target name="gen-testdata" depends="deploy">
+    <subant target="gen-testdata">
+      <fileset dir="." includes="serde/build.xml"/>
+    </subant>
+  </target>
+
   <target name="package" depends="deploy">
     <echo message="Deploying Hive jars to ${target.dir}"/>
     <mkdir dir="${target.dir}"/>

Modified: hadoop/hive/trunk/serde/build.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/build.xml?rev=727177&r1=727176&r2=727177&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/build.xml (original)
+++ hadoop/hive/trunk/serde/build.xml Tue Dec 16 13:51:08 2008
@@ -67,4 +67,16 @@
     </exec>
   </target>
 
+  <target name="gen-testdata" depends="compile-test,test-jar">
+    <echo>Generating data/files/complex.seq... </echo>
+    <java
+     dir="${hive.root}"
+     classname="org.apache.hadoop.hive.serde2.thrift_test.CreateSequenceFile"
+     fork="true"
+     failonerror="true">
+      <arg value="data/files/complex.seq"/>
+      <classpath refid="${test.classpath.id}"/>
+    </java>
+  </target>
+
 </project>

Added: hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java?rev=727177&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java (added)
+++ hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java Tue Dec 16 13:51:08 2008
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.thrift_test;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Properties;
+import java.util.Random;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.ByteWritable;
+import org.apache.hadoop.hive.serde2.thrift.test.Complex;
+import org.apache.hadoop.hive.serde2.thrift.test.IntString;
+import org.apache.hadoop.hive.serde.thrift.ThriftSerDe;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.hive.serde.Constants;
+
+public class CreateSequenceFile {
+
+  public static void usage() {
+    System.out.println("Usage: CreateSequenceFile <output_sequencefile>");
+    System.exit(1);
+  }
+
+  public static void main(String[] args) throws Exception {
+
+    // Read parameters
+    int lines = 10;
+    List<String> extraArgs = new ArrayList<String>(); 
+    for(int ai=0; ai<args.length; ai++) {
+      if (args[ai].equals("-line") && ai + 1 < args.length) {
+        lines = Integer.parseInt(args[ai+1]);
+        ai++;
+      } else {
+        extraArgs.add(args[ai]);
+      }
+    }
+    if (extraArgs.size() != 1) {
+      usage();
+    }
+    
+    JobConf conf = new JobConf(CreateSequenceFile.class);
+    ThriftSerDe serde = new ThriftSerDe();
+    Properties p = new Properties();
+    p.put(Constants.SERIALIZATION_CLASS, Complex.class.getName());
+    // p.put(Constants.SERIALIZATION_FORMAT, null);
+    p.put(Constants.SERIALIZATION_FORMAT, com.facebook.thrift.protocol.TBinaryProtocol.class.getName());
+    serde.initialize(conf, p);
+    
+    // Open files
+    SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), conf, new Path(extraArgs.get(0)), 
+        ByteWritable.class, BytesWritable.class);
+
+    // write to file
+    ByteWritable key = new ByteWritable(0);
+    
+    Random rand = new Random(20081215);
+    
+    for(int i=0; i<lines; i++) {
+      
+      ArrayList<Integer> alist = new ArrayList<Integer>();
+      alist.add(i); alist.add(i*2); alist.add(i*3);
+      ArrayList<String> slist = new ArrayList<String>();
+      slist.add("" + i*10); slist.add("" + i*100); slist.add("" + i*1000);
+      ArrayList<IntString> islist = new ArrayList<IntString>();
+      islist.add(new IntString(i*i, ""+ i*i*i));
+      HashMap<String,String> hash = new HashMap<String,String>();
+      hash.put("key_" + i, "value_" + i);
+      
+      Complex complex = new Complex( rand.nextInt(), 
+          "record_" + (new Integer(i)).toString(),
+          alist,
+          slist,
+          islist,
+          hash);
+
+      Writable value = serde.serialize(complex);
+      writer.append(key, value);
+    }
+    
+    // Close files
+    writer.close();
+  }
+
+}