You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2008/12/16 22:51:08 UTC
svn commit: r727177 - in /hadoop/hive/trunk: CHANGES.txt build.xml
serde/build.xml
serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java
Author: zshao
Date: Tue Dec 16 13:51:08 2008
New Revision: 727177
URL: http://svn.apache.org/viewvc?rev=727177&view=rev
Log:
HIVE-180. Data Generator for thrift-serialized sequence files. (zshao)
Added:
hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/build.xml
hadoop/hive/trunk/serde/build.xml
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=727177&r1=727176&r2=727177&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Dec 16 13:51:08 2008
@@ -26,6 +26,8 @@
IMPROVEMENTS
+ HIVE-180. Data Generator for thrift-serialized sequence files. (zshao)
+
HIVE-157. Update README.txt to remove refs to mirror.facebook.com. (zshao)
HIVE-95. Improve cli error messages by lowering backtracking to 1.
Modified: hadoop/hive/trunk/build.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/build.xml?rev=727177&r1=727176&r2=727177&view=diff
==============================================================================
--- hadoop/hive/trunk/build.xml (original)
+++ hadoop/hive/trunk/build.xml Tue Dec 16 13:51:08 2008
@@ -114,7 +114,7 @@
</target>
<!-- ====================================================== -->
- <!-- Clean all the contribs. -->
+ <!-- Clean all the contribs. -->
<!-- ====================================================== -->
<target name="clean">
<subant target="clean">
@@ -124,6 +124,15 @@
<delete dir="${build.dir.hive}"/>
</target>
+ <!-- ====================================================== -->
+ <!-- Generate some of the test data. -->
+ <!-- ====================================================== -->
+ <target name="gen-testdata" depends="deploy">
+ <subant target="gen-testdata">
+ <fileset dir="." includes="serde/build.xml"/>
+ </subant>
+ </target>
+
<target name="package" depends="deploy">
<echo message="Deploying Hive jars to ${target.dir}"/>
<mkdir dir="${target.dir}"/>
Modified: hadoop/hive/trunk/serde/build.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/build.xml?rev=727177&r1=727176&r2=727177&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/build.xml (original)
+++ hadoop/hive/trunk/serde/build.xml Tue Dec 16 13:51:08 2008
@@ -67,4 +67,16 @@
</exec>
</target>
+ <target name="gen-testdata" depends="compile-test,test-jar">
+ <echo>Generating data/files/complex.seq... </echo>
+ <java
+ dir="${hive.root}"
+ classname="org.apache.hadoop.hive.serde2.thrift_test.CreateSequenceFile"
+ fork="true"
+ failonerror="true">
+ <arg value="data/files/complex.seq"/>
+ <classpath refid="${test.classpath.id}"/>
+ </java>
+ </target>
+
</project>
Added: hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java?rev=727177&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java (added)
+++ hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java Tue Dec 16 13:51:08 2008
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.thrift_test;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Properties;
+import java.util.Random;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.ByteWritable;
+import org.apache.hadoop.hive.serde2.thrift.test.Complex;
+import org.apache.hadoop.hive.serde2.thrift.test.IntString;
+import org.apache.hadoop.hive.serde.thrift.ThriftSerDe;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.hive.serde.Constants;
+
+public class CreateSequenceFile {
+
+ public static void usage() {
+ System.out.println("Usage: CreateSequenceFile <output_sequencefile>");
+ System.exit(1);
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ // Read parameters
+ int lines = 10;
+ List<String> extraArgs = new ArrayList<String>();
+ for(int ai=0; ai<args.length; ai++) {
+ if (args[ai].equals("-line") && ai + 1 < args.length) {
+ lines = Integer.parseInt(args[ai+1]);
+ ai++;
+ } else {
+ extraArgs.add(args[ai]);
+ }
+ }
+ if (extraArgs.size() != 1) {
+ usage();
+ }
+
+ JobConf conf = new JobConf(CreateSequenceFile.class);
+ ThriftSerDe serde = new ThriftSerDe();
+ Properties p = new Properties();
+ p.put(Constants.SERIALIZATION_CLASS, Complex.class.getName());
+ // p.put(Constants.SERIALIZATION_FORMAT, null);
+ p.put(Constants.SERIALIZATION_FORMAT, com.facebook.thrift.protocol.TBinaryProtocol.class.getName());
+ serde.initialize(conf, p);
+
+ // Open files
+ SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), conf, new Path(extraArgs.get(0)),
+ ByteWritable.class, BytesWritable.class);
+
+ // write to file
+ ByteWritable key = new ByteWritable(0);
+
+ Random rand = new Random(20081215);
+
+ for(int i=0; i<lines; i++) {
+
+ ArrayList<Integer> alist = new ArrayList<Integer>();
+ alist.add(i); alist.add(i*2); alist.add(i*3);
+ ArrayList<String> slist = new ArrayList<String>();
+ slist.add("" + i*10); slist.add("" + i*100); slist.add("" + i*1000);
+ ArrayList<IntString> islist = new ArrayList<IntString>();
+ islist.add(new IntString(i*i, ""+ i*i*i));
+ HashMap<String,String> hash = new HashMap<String,String>();
+ hash.put("key_" + i, "value_" + i);
+
+ Complex complex = new Complex( rand.nextInt(),
+ "record_" + (new Integer(i)).toString(),
+ alist,
+ slist,
+ islist,
+ hash);
+
+ Writable value = serde.serialize(complex);
+ writer.append(key, value);
+ }
+
+ // Close files
+ writer.close();
+ }
+
+}