You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2018/02/06 21:33:30 UTC
[3/3] orc git commit: ORC-298. Move the benchmark code to a non-Apache repository.

ORC-298. Move the benchmark code to a non-Apache repository.

Fix #215

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/b86d70aa
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/b86d70aa
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/b86d70aa

Branch: refs/heads/master
Commit: b86d70aa73289b86e066cc019ea11e0d83c1e40d
Parents: 541890a
Author: Owen O'Malley <om...@apache.org>
Authored: Tue Feb 6 13:06:56 2018 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Tue Feb 6 13:32:48 2018 -0800

----------------------------------------------------------------------
 java/bench/.gitignore                           |   5 -
 java/bench/fetch-data.sh                        |  21 -
 java/bench/pom.xml                              | 162 -----
 java/bench/src/assembly/uber.xml                |  33 -
 java/bench/src/findbugs/exclude.xml             |  25 -
 .../hadoop/fs/TrackingLocalFileSystem.java      |  57 --
 .../hive/ql/io/orc/OrcBenchmarkUtilities.java   |  54 --
 .../orc/bench/ColumnProjectionBenchmark.java    | 188 -----
 .../org/apache/orc/bench/CompressionKind.java   |  87 ---
 .../src/java/org/apache/orc/bench/Driver.java   |  78 ---
 .../org/apache/orc/bench/FullReadBenchmark.java | 223 ------
 .../org/apache/orc/bench/RandomGenerator.java   | 524 --------------
 .../org/apache/orc/bench/SalesGenerator.java    | 206 ------
 .../java/org/apache/orc/bench/Utilities.java    | 127 ----
 .../apache/orc/bench/convert/BatchReader.java   |  34 -
 .../apache/orc/bench/convert/BatchWriter.java   |  34 -
 .../orc/bench/convert/GenerateVariants.java     | 220 ------
 .../apache/orc/bench/convert/ScanVariants.java  |  87 ---
 .../orc/bench/convert/avro/AvroReader.java      | 299 --------
 .../orc/bench/convert/avro/AvroSchemaUtils.java | 192 -----
 .../orc/bench/convert/avro/AvroWriter.java      | 363 ----------
 .../apache/orc/bench/convert/csv/CsvReader.java | 175 -----
 .../orc/bench/convert/json/JsonReader.java      | 279 --------
 .../orc/bench/convert/json/JsonWriter.java      | 217 ------
 .../apache/orc/bench/convert/orc/OrcReader.java |  50 --
 .../apache/orc/bench/convert/orc/OrcWriter.java |  54 --
 .../bench/convert/parquet/ParquetReader.java    | 297 --------
 .../bench/convert/parquet/ParquetWriter.java    |  86 ---
 java/bench/src/main/resources/github.schema     | 702 -------------------
 java/bench/src/main/resources/log4j.properties  |  18 -
 java/bench/src/main/resources/sales.schema      |  56 --
 java/bench/src/main/resources/taxi.schema       |  21 -
 java/pom.xml                                    |   1 -
 33 files changed, 4975 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/.gitignore
----------------------------------------------------------------------
diff --git a/java/bench/.gitignore b/java/bench/.gitignore
deleted file mode 100644
index babcae6..0000000
--- a/java/bench/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-.*.crc
-*.json.gz
-*.avro
-*.parquet
-*.orc

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/fetch-data.sh
----------------------------------------------------------------------
diff --git a/java/bench/fetch-data.sh b/java/bench/fetch-data.sh
deleted file mode 100755
index 129d83f..0000000
--- a/java/bench/fetch-data.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-mkdir -p data/sources/taxi
-(cd data/sources/taxi; wget https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-{11,12}.csv)
-(cd data/sources/taxi; gzip *.csv)
-mkdir -p data/sources/github
-(cd data/sources/github; wget http://data.githubarchive.org/2015-11-{01..15}-{0..23}.json.gz)

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/pom.xml
----------------------------------------------------------------------
diff --git a/java/bench/pom.xml b/java/bench/pom.xml
deleted file mode 100644
index 7e679bd..0000000
--- a/java/bench/pom.xml
+++ /dev/null
@@ -1,162 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.orc</groupId>
-    <artifactId>orc</artifactId>
-    <version>1.5.0-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>orc-benchmarks</artifactId>
-  <packaging>jar</packaging>
-  <name>ORC Benchmarks</name>
-  <description>
-    Benchmarks for comparing ORC, Parquet, and Avro performance.
-  </description>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.orc</groupId>
-      <artifactId>orc-core</artifactId>
-    </dependency>
-
-    <!-- inter-project -->
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-core</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.google.code.gson</groupId>
-      <artifactId>gson</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>commons-cli</groupId>
-      <artifactId>commons-cli</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>io.airlift</groupId>
-      <artifactId>aircompressor</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro-mapred</artifactId>
-      <classifier>hadoop2</classifier>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-csv</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-common</artifactId>
-      <version>${hadoop.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-hdfs</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>runtime</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-mapreduce-client-core</artifactId>
-      <version>${hadoop.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hive</groupId>
-      <artifactId>hive-common</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hive</groupId>
-      <artifactId>hive-exec</artifactId>
-      <classifier>core</classifier>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hive</groupId>
-      <artifactId>hive-serde</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hive</groupId>
-      <artifactId>hive-storage-api</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.parquet</groupId>
-      <artifactId>parquet-hadoop-bundle</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.jodd</groupId>
-      <artifactId>jodd-core</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.openjdk.jmh</groupId>
-      <artifactId>jmh-core</artifactId>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <sourceDirectory>${basedir}/src/java</sourceDirectory>
-    <testSourceDirectory>${basedir}/src/test</testSourceDirectory>
-    <testResources>
-      <testResource>
-        <directory>${basedir}/src/test/resources</directory>
-      </testResource>
-    </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-      </plugin>
-      <plugin>
-        <artifactId>maven-assembly-plugin</artifactId>
-        <configuration>
-          <archive>
-            <manifest>
-              <mainClass>org.apache.orc.bench.Driver</mainClass>
-            </manifest>
-          </archive>
-	  <descriptors>
-            <descriptor>src/assembly/uber.xml</descriptor>
-          </descriptors>
-        </configuration>
-        <executions>
-          <execution>
-            <id>make-assembly</id> <!-- this is used for inheritance merges -->
-            <phase>package</phase> <!-- bind to the packaging phase -->
-            <goals>
-              <goal>single</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-    </plugins>
-  </build>
-
-  <profiles>
-    <profile>
-      <id>cmake</id>
-      <build>
-        <directory>${build.dir}/bench</directory>
-      </build>
-    </profile>
-  </profiles>
-</project>

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/assembly/uber.xml
----------------------------------------------------------------------
diff --git a/java/bench/src/assembly/uber.xml b/java/bench/src/assembly/uber.xml
deleted file mode 100644
index 014eab9..0000000
--- a/java/bench/src/assembly/uber.xml
+++ /dev/null
@@ -1,33 +0,0 @@
-<!--
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<assembly>
-  <id>uber</id>
-  <formats>
-    <format>jar</format>
-  </formats>
-  <includeBaseDirectory>false</includeBaseDirectory>
-  <dependencySets>
-    <dependencySet>
-      <outputDirectory>/</outputDirectory>
-      <useProjectArtifact>true</useProjectArtifact>
-      <unpack>true</unpack>
-      <scope>runtime</scope>
-    </dependencySet>
-  </dependencySets>
-  <containerDescriptorHandlers>
-    <containerDescriptorHandler>
-      <handlerName>metaInf-services</handlerName>
-    </containerDescriptorHandler>
-  </containerDescriptorHandlers>
-</assembly>

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/findbugs/exclude.xml
----------------------------------------------------------------------
diff --git a/java/bench/src/findbugs/exclude.xml b/java/bench/src/findbugs/exclude.xml
deleted file mode 100644
index dde1471..0000000
--- a/java/bench/src/findbugs/exclude.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<FindBugsFilter>
-  <Match>
-    <Bug pattern="EI_EXPOSE_REP,EI_EXPOSE_REP2"/>
-  </Match>
-  <Match>
-    <Class name="~org\.openjdk\.jmh\.infra\.generated.*"/>
-  </Match>
-  <Match>
-    <Class name="~org\.apache\.orc\.bench\.generated.*"/>
-  </Match>
-</FindBugsFilter>

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/hadoop/fs/TrackingLocalFileSystem.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/hadoop/fs/TrackingLocalFileSystem.java b/java/bench/src/java/org/apache/hadoop/fs/TrackingLocalFileSystem.java
deleted file mode 100644
index 0440495..0000000
--- a/java/bench/src/java/org/apache/hadoop/fs/TrackingLocalFileSystem.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.fs;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-
-public class TrackingLocalFileSystem extends RawLocalFileSystem {
-
-  class TrackingFileInputStream extends RawLocalFileSystem.LocalFSFileInputStream {
-    public TrackingFileInputStream(Path f) throws IOException {
-      super(f);
-    }
-
-    public int read() throws IOException {
-      statistics.incrementReadOps(1);
-      return super.read();
-    }
-
-    public int read(byte[] b, int off, int len) throws IOException {
-      statistics.incrementReadOps(1);
-      return super.read(b, off, len);
-    }
-
-    public int read(long position, byte[] b, int off, int len) throws IOException {
-      statistics.incrementReadOps(1);
-      return super.read(position, b, off, len);
-    }
-  }
-
-  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
-    if (!exists(f)) {
-      throw new FileNotFoundException(f.toString());
-    }
-    return new FSDataInputStream(new BufferedFSInputStream(
-        new TrackingFileInputStream(f), bufferSize));
-  }
-
-  public FileSystem.Statistics getLocalStatistics() {
-    return statistics;
-  }
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/hadoop/hive/ql/io/orc/OrcBenchmarkUtilities.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/hadoop/hive/ql/io/orc/OrcBenchmarkUtilities.java b/java/bench/src/java/org/apache/hadoop/hive/ql/io/orc/OrcBenchmarkUtilities.java
deleted file mode 100644
index 18c5d06..0000000
--- a/java/bench/src/java/org/apache/hadoop/hive/ql/io/orc/OrcBenchmarkUtilities.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.io.Writable;
-import org.apache.orc.OrcProto;
-import org.apache.orc.OrcUtils;
-import org.apache.orc.TypeDescription;
-
-import java.util.List;
-
-/**
- * Utilities that need the non-public methods from Hive.
- */
-public class OrcBenchmarkUtilities {
-
-  public static StructObjectInspector createObjectInspector(TypeDescription schema) {
-    List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema);
-    return (StructObjectInspector) OrcStruct.createObjectInspector(0, types);
-  }
-
-  public static Writable nextObject(VectorizedRowBatch batch,
-                                    TypeDescription schema,
-                                    int rowId,
-                                    Writable obj) {
-    OrcStruct result = (OrcStruct) obj;
-    if (result == null) {
-      result = new OrcStruct(batch.cols.length);
-    }
-    List<TypeDescription> childrenTypes = schema.getChildren();
-    for(int c=0; c < batch.cols.length; ++c) {
-      result.setFieldValue(c, RecordReaderImpl.nextValue(batch.cols[c], rowId,
-          childrenTypes.get(c), result.getFieldValue(c)));
-    }
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/orc/bench/ColumnProjectionBenchmark.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/ColumnProjectionBenchmark.java b/java/bench/src/java/org/apache/orc/bench/ColumnProjectionBenchmark.java
deleted file mode 100644
index 4afaaf1..0000000
--- a/java/bench/src/java/org/apache/orc/bench/ColumnProjectionBenchmark.java
+++ /dev/null
@@ -1,188 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.bench;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.TrackingLocalFileSystem;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
-import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
-import org.apache.hadoop.io.ArrayWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.orc.OrcFile;
-import org.apache.orc.Reader;
-import org.apache.orc.RecordReader;
-import org.apache.orc.TypeDescription;
-import org.apache.parquet.hadoop.ParquetInputFormat;
-import org.openjdk.jmh.annotations.AuxCounters;
-import org.openjdk.jmh.annotations.Benchmark;
-import org.openjdk.jmh.annotations.BenchmarkMode;
-import org.openjdk.jmh.annotations.Fork;
-import org.openjdk.jmh.annotations.Level;
-import org.openjdk.jmh.annotations.Measurement;
-import org.openjdk.jmh.annotations.Mode;
-import org.openjdk.jmh.annotations.OutputTimeUnit;
-import org.openjdk.jmh.annotations.Param;
-import org.openjdk.jmh.annotations.Scope;
-import org.openjdk.jmh.annotations.Setup;
-import org.openjdk.jmh.annotations.State;
-import org.openjdk.jmh.annotations.TearDown;
-import org.openjdk.jmh.annotations.Warmup;
-import org.openjdk.jmh.runner.Runner;
-import org.openjdk.jmh.runner.options.OptionsBuilder;
-
-import java.net.URI;
-import java.util.List;
-import java.util.concurrent.TimeUnit;
-
-@BenchmarkMode(Mode.AverageTime)
-@Warmup(iterations=1, time=10, timeUnit = TimeUnit.SECONDS)
-@Measurement(iterations=3, time=10, timeUnit = TimeUnit.SECONDS)
-@State(Scope.Thread)
-@OutputTimeUnit(TimeUnit.MICROSECONDS)
-@Fork(1)
-public class ColumnProjectionBenchmark {
-
-  private static final String ROOT_ENVIRONMENT_NAME = "bench.root.dir";
-  private static final Path root;
-  static {
-    String value = System.getProperty(ROOT_ENVIRONMENT_NAME);
-    root = value == null ? null : new Path(value);
-  }
-
-  @Param({ "github", "sales", "taxi"})
-  public String dataset;
-
-  @Param({"none", "snappy", "zlib"})
-  public String compression;
-
-  @AuxCounters
-  @State(Scope.Thread)
-  public static class ExtraCounters {
-    long bytesRead;
-    long reads;
-    long records;
-    long invocations;
-
-    @Setup(Level.Iteration)
-    public void clean() {
-      bytesRead = 0;
-      reads = 0;
-      records = 0;
-      invocations = 0;
-    }
-
-    @TearDown(Level.Iteration)
-    public void print() {
-      System.out.println();
-      System.out.println("Reads: " + reads);
-      System.out.println("Bytes: " + bytesRead);
-      System.out.println("Records: " + records);
-      System.out.println("Invocations: " + invocations);
-    }
-
-    public long kilobytes() {
-      return bytesRead / 1024;
-    }
-
-    public long records() {
-      return records;
-    }
-  }
-
-  @Benchmark
-  public void orc(ExtraCounters counters) throws Exception{
-    Configuration conf = new Configuration();
-    TrackingLocalFileSystem fs = new TrackingLocalFileSystem();
-    fs.initialize(new URI("file:///"), conf);
-    FileSystem.Statistics statistics = fs.getLocalStatistics();
-    statistics.reset();
-    OrcFile.ReaderOptions options = OrcFile.readerOptions(conf).filesystem(fs);
-    Path path = Utilities.getVariant(root, dataset, "orc", compression);
-    Reader reader = OrcFile.createReader(path, options);
-    TypeDescription schema = reader.getSchema();
-    boolean[] include = new boolean[schema.getMaximumId() + 1];
-    // select first two columns
-    List<TypeDescription> children = schema.getChildren();
-    for(int c= children.get(0).getId(); c <= children.get(1).getMaximumId(); ++c) {
-      include[c] = true;
-    }
-    RecordReader rows = reader.rows(new Reader.Options()
-        .include(include));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    while (rows.nextBatch(batch)) {
-      counters.records += batch.size;
-    }
-    rows.close();
-    counters.bytesRead += statistics.getBytesRead();
-    counters.reads += statistics.getReadOps();
-    counters.invocations += 1;
-  }
-
-  @Benchmark
-  public void parquet(ExtraCounters counters) throws Exception {
-    JobConf conf = new JobConf();
-    conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName());
-    conf.set("fs.defaultFS", "track:///");
-    if ("taxi".equals(dataset)) {
-      conf.set("columns", "vendor_id,pickup_time");
-      conf.set("columns.types", "int,timestamp");
-    } else if ("sales".equals(dataset)) {
-        conf.set("columns", "sales_id,customer_id");
-        conf.set("columns.types", "bigint,bigint");
-    } else if ("github".equals(dataset)) {
-      conf.set("columns", "actor,created_at");
-      conf.set("columns.types", "struct<avatar_url:string,gravatar_id:string," +
-          "id:int,login:string,url:string>,timestamp");
-    } else {
-      throw new IllegalArgumentException("Unknown data set " + dataset);
-    }
-    Path path = Utilities.getVariant(root, dataset, "parquet", compression);
-    FileSystem.Statistics statistics = FileSystem.getStatistics("track:///",
-        TrackingLocalFileSystem.class);
-    statistics.reset();
-    ParquetInputFormat<ArrayWritable> inputFormat =
-        new ParquetInputFormat<>(DataWritableReadSupport.class);
-
-    NullWritable nada = NullWritable.get();
-    FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[]{});
-    org.apache.hadoop.mapred.RecordReader<NullWritable,ArrayWritable> recordReader =
-        new ParquetRecordReaderWrapper(inputFormat, split, conf, Reporter.NULL);
-    ArrayWritable value = recordReader.createValue();
-    while (recordReader.next(nada, value)) {
-      counters.records += 1;
-    }
-    recordReader.close();
-    counters.bytesRead += statistics.getBytesRead();
-    counters.reads += statistics.getReadOps();
-    counters.invocations += 1;
-  }
-  public static void main(String[] args) throws Exception {
-    new Runner(new OptionsBuilder()
-        .include(ColumnProjectionBenchmark.class.getSimpleName())
-        .jvmArgs("-server", "-Xms256m", "-Xmx2g",
-            "-D" + ROOT_ENVIRONMENT_NAME + "=" + args[0]).build()
-    ).run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/orc/bench/CompressionKind.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/CompressionKind.java b/java/bench/src/java/org/apache/orc/bench/CompressionKind.java
deleted file mode 100644
index 9274de3..0000000
--- a/java/bench/src/java/org/apache/orc/bench/CompressionKind.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.bench;
-
-import io.airlift.compress.snappy.SnappyCodec;
-import org.apache.hadoop.fs.Path;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-
-/**
- * Enum for handling the compression codecs for the benchmark
- */
-public enum CompressionKind {
-  NONE(".none"),
-  ZLIB(".gz"),
-  SNAPPY(".snappy");
-
-  CompressionKind(String extendsion) {
-    this.extension = extendsion;
-  }
-
-  private final String extension;
-
-  public String getExtension() {
-    return extension;
-  }
-
-  public OutputStream create(OutputStream out) throws IOException {
-    switch (this) {
-      case NONE:
-        return out;
-      case ZLIB:
-        return new GZIPOutputStream(out);
-      case SNAPPY:
-        return new SnappyCodec().createOutputStream(out);
-      default:
-        throw new IllegalArgumentException("Unhandled kind " + this);
-    }
-  }
-
-  public InputStream read(InputStream in) throws IOException {
-    switch (this) {
-      case NONE:
-        return in;
-      case ZLIB:
-        return new GZIPInputStream(in);
-      case SNAPPY:
-        return new SnappyCodec().createInputStream(in);
-      default:
-        throw new IllegalArgumentException("Unhandled kind " + this);
-    }
-  }
-
-  public static CompressionKind fromPath(Path path) {
-    String name = path.getName();
-    int lastDot = name.lastIndexOf('.');
-    if (lastDot >= 0) {
-      String ext = name.substring(lastDot);
-      for (CompressionKind value : values()) {
-        if (ext.equals(value.getExtension())) {
-          return value;
-        }
-      }
-    }
-    return NONE;
-  }
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/orc/bench/Driver.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/Driver.java b/java/bench/src/java/org/apache/orc/bench/Driver.java
deleted file mode 100644
index c8f1592..0000000
--- a/java/bench/src/java/org/apache/orc/bench/Driver.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.bench;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.DefaultParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.ParseException;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.orc.bench.convert.GenerateVariants;
-import org.apache.orc.bench.convert.ScanVariants;
-
-import java.util.Arrays;
-
-/**
- * A driver tool to call the various benchmark classes.
- */
-public class Driver {
-
-  static CommandLine parseCommandLine(String[] args) throws ParseException {
-    Options options = new Options()
-        .addOption("h", "help", false, "Provide help")
-        .addOption("D", "define", true, "Change configuration settings");
-    CommandLine result = new DefaultParser().parse(options, args, true);
-    if (result.hasOption("help") || result.getArgs().length == 0) {
-      new HelpFormatter().printHelp("benchmark <command>", options);
-      System.err.println();
-      System.err.println("Commands:");
-      System.err.println("  generate  - Generate data variants");
-      System.err.println("  scan      - Scan data variants");
-      System.err.println("  read-all  - Full table scan benchmark");
-      System.err.println("  read-some - Column projection benchmark");
-      System.exit(1);
-    }
-    return result;
-  }
-
-  public static void main(String[] args) throws Exception {
-    CommandLine cli = parseCommandLine(args);
-    args = cli.getArgs();
-    String command = args[0];
-    args = Arrays.copyOfRange(args, 1, args.length);
-    switch (command) {
-      case "generate":
-        GenerateVariants.main(args);
-        break;
-      case "scan":
-        ScanVariants.main(args);
-        break;
-      case "read-all":
-        FullReadBenchmark.main(args);
-        break;
-      case "read-some":
-        ColumnProjectionBenchmark.main(args);
-        break;
-      default:
-        System.err.println("Unknown command " + command);
-        System.exit(1);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/orc/bench/FullReadBenchmark.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/FullReadBenchmark.java b/java/bench/src/java/org/apache/orc/bench/FullReadBenchmark.java
deleted file mode 100644
index 952f18d..0000000
--- a/java/bench/src/java/org/apache/orc/bench/FullReadBenchmark.java
+++ /dev/null
@@ -1,223 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.bench;
-
-import com.google.gson.JsonStreamParser;
-import org.apache.avro.file.DataFileReader;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.io.DatumReader;
-import org.apache.avro.mapred.FsInput;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.TrackingLocalFileSystem;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
-import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
-import org.apache.hadoop.io.ArrayWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.orc.OrcFile;
-import org.apache.orc.Reader;
-import org.apache.orc.RecordReader;
-import org.apache.orc.TypeDescription;
-import org.apache.parquet.hadoop.ParquetInputFormat;
-import org.openjdk.jmh.annotations.AuxCounters;
-import org.openjdk.jmh.annotations.Benchmark;
-import org.openjdk.jmh.annotations.BenchmarkMode;
-import org.openjdk.jmh.annotations.Fork;
-import org.openjdk.jmh.annotations.Level;
-import org.openjdk.jmh.annotations.Measurement;
-import org.openjdk.jmh.annotations.Mode;
-import org.openjdk.jmh.annotations.OutputTimeUnit;
-import org.openjdk.jmh.annotations.Param;
-import org.openjdk.jmh.annotations.Scope;
-import org.openjdk.jmh.annotations.Setup;
-import org.openjdk.jmh.annotations.State;
-import org.openjdk.jmh.annotations.TearDown;
-import org.openjdk.jmh.annotations.Warmup;
-import org.openjdk.jmh.runner.Runner;
-import org.openjdk.jmh.runner.options.OptionsBuilder;
-
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.URI;
-import java.nio.charset.StandardCharsets;
-import java.util.concurrent.TimeUnit;
-
-@BenchmarkMode(Mode.AverageTime)
-@Warmup(iterations=1, time=10, timeUnit = TimeUnit.SECONDS)
-@Measurement(iterations=3, time=10, timeUnit = TimeUnit.SECONDS)
-@State(Scope.Thread)
-@OutputTimeUnit(TimeUnit.MICROSECONDS)
-@Fork(1)
-public class FullReadBenchmark {
-
-  private static final String ROOT_ENVIRONMENT_NAME = "bench.root.dir";
-  private static final Path root;
-  static {
-    String value = System.getProperty(ROOT_ENVIRONMENT_NAME);
-    root = value == null ? null : new Path(value);
-  }
-
-  @Param({"taxi", "sales", "github"})
-  public String dataset;
-
-  @Param({"none", "zlib", "snappy"})
-  public String compression;
-
-  @AuxCounters
-  @State(Scope.Thread)
-  public static class ExtraCounters {
-    long bytesRead;
-    long reads;
-    long records;
-    long invocations;
-
-    @Setup(Level.Iteration)
-    public void clean() {
-      bytesRead = 0;
-      reads = 0;
-      records = 0;
-      invocations = 0;
-    }
-
-    @TearDown(Level.Iteration)
-    public void print() {
-      System.out.println();
-      System.out.println("Reads: " + reads);
-      System.out.println("Bytes: " + bytesRead);
-      System.out.println("Records: " + records);
-      System.out.println("Invocations: " + invocations);
-    }
-
-    public long kilobytes() {
-      return bytesRead / 1024;
-    }
-
-    public long records() {
-      return records;
-    }
-  }
-
-  @Benchmark
-  public void orc(ExtraCounters counters) throws Exception{
-    Configuration conf = new Configuration();
-    TrackingLocalFileSystem fs = new TrackingLocalFileSystem();
-    fs.initialize(new URI("file:///"), conf);
-    FileSystem.Statistics statistics = fs.getLocalStatistics();
-    statistics.reset();
-    OrcFile.ReaderOptions options = OrcFile.readerOptions(conf).filesystem(fs);
-    Path path = Utilities.getVariant(root, dataset, "orc", compression);
-    Reader reader = OrcFile.createReader(path, options);
-    TypeDescription schema = reader.getSchema();
-    RecordReader rows = reader.rows();
-    VectorizedRowBatch batch = schema.createRowBatch();
-    while (rows.nextBatch(batch)) {
-      counters.records += batch.size;
-    }
-    rows.close();
-    counters.bytesRead += statistics.getBytesRead();
-    counters.reads += statistics.getReadOps();
-    counters.invocations += 1;
-  }
-
-  @Benchmark
-  public void avro(ExtraCounters counters) throws Exception {
-    Configuration conf = new Configuration();
-    conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName());
-    conf.set("fs.defaultFS", "track:///");
-    Path path = Utilities.getVariant(root, dataset, "avro", compression);
-    FileSystem.Statistics statistics = FileSystem.getStatistics("track:///",
-        TrackingLocalFileSystem.class);
-    statistics.reset();
-    FsInput file = new FsInput(path, conf);
-    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
-    DataFileReader<GenericRecord> dataFileReader =
-        new DataFileReader<>(file, datumReader);
-    GenericRecord record = null;
-    while (dataFileReader.hasNext()) {
-      record = dataFileReader.next(record);
-      counters.records += 1;
-    }
-    counters.bytesRead += statistics.getBytesRead();
-    counters.reads += statistics.getReadOps();
-    counters.invocations += 1;
-  }
-
-  @Benchmark
-  public void parquet(ExtraCounters counters) throws Exception {
-    JobConf conf = new JobConf();
-    conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName());
-    conf.set("fs.defaultFS", "track:///");
-    Path path = Utilities.getVariant(root, dataset, "parquet", compression);
-    FileSystem.Statistics statistics = FileSystem.getStatistics("track:///",
-        TrackingLocalFileSystem.class);
-    statistics.reset();
-    ParquetInputFormat<ArrayWritable> inputFormat =
-        new ParquetInputFormat<>(DataWritableReadSupport.class);
-
-    NullWritable nada = NullWritable.get();
-    FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[]{});
-    org.apache.hadoop.mapred.RecordReader<NullWritable,ArrayWritable> recordReader =
-        new ParquetRecordReaderWrapper(inputFormat, split, conf, Reporter.NULL);
-    ArrayWritable value = recordReader.createValue();
-    while (recordReader.next(nada, value)) {
-      counters.records += 1;
-    }
-    recordReader.close();
-    counters.bytesRead += statistics.getBytesRead();
-    counters.reads += statistics.getReadOps();
-    counters.invocations += 1;
-  }
-
-  @Benchmark
-  public void json(ExtraCounters counters) throws Exception {
-    Configuration conf = new Configuration();
-    TrackingLocalFileSystem fs = new TrackingLocalFileSystem();
-    fs.initialize(new URI("file:///"), conf);
-    FileSystem.Statistics statistics = fs.getLocalStatistics();
-    statistics.reset();
-    Path path = Utilities.getVariant(root, dataset, "json", compression);
-    CompressionKind compress = CompressionKind.valueOf(compression);
-    InputStream input = compress.read(fs.open(path));
-    JsonStreamParser parser =
-        new JsonStreamParser(new InputStreamReader(input,
-            StandardCharsets.UTF_8));
-    while (parser.hasNext()) {
-      parser.next();
-      counters.records += 1;
-    }
-    counters.bytesRead += statistics.getBytesRead();
-    counters.reads += statistics.getReadOps();
-    counters.invocations += 1;
-  }
-
-  public static void main(String[] args) throws Exception {
-    new Runner(new OptionsBuilder()
-        .include(FullReadBenchmark.class.getSimpleName())
-        .addProfiler("hs_gc")
-        .jvmArgs("-server", "-Xms256m", "-Xmx2g",
-            "-D" + ROOT_ENVIRONMENT_NAME + "=" + args[0]).build()
-    ).run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/orc/bench/RandomGenerator.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/RandomGenerator.java b/java/bench/src/java/org/apache/orc/bench/RandomGenerator.java
deleted file mode 100644
index dfe7d43..0000000
--- a/java/bench/src/java/org/apache/orc/bench/RandomGenerator.java
+++ /dev/null
@@ -1,524 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.bench;
-
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.orc.TypeDescription;
-
-import java.nio.charset.StandardCharsets;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-
-public class RandomGenerator {
-  private final TypeDescription schema = TypeDescription.createStruct();
-  private final List<Field> fields = new ArrayList<>();
-  private final Random random;
-
-  public RandomGenerator(int seed) {
-    random = new Random(seed);
-  }
-
-  private abstract class ValueGenerator {
-    double nullProbability = 0;
-    abstract void generate(ColumnVector vector, int valueCount);
-  }
-
-  private class RandomBoolean extends ValueGenerator {
-    public void generate(ColumnVector v, int valueCount) {
-      LongColumnVector vector = (LongColumnVector) v;
-      for(int r=0; r < valueCount; ++r) {
-        if (nullProbability != 0 && random.nextDouble() < nullProbability) {
-          v.noNulls = false;
-          v.isNull[r] = true;
-        } else {
-          vector.vector[r] = random.nextInt(2);
-        }
-      }
-    }
-  }
-
-  private class RandomList extends ValueGenerator {
-    private final int minSize;
-    private final int sizeRange;
-    private final Field child;
-
-    public RandomList(int minSize, int maxSize, Field child) {
-      this.minSize = minSize;
-      this.sizeRange = maxSize - minSize + 1;
-      this.child = child;
-    }
-
-    public void generate(ColumnVector v, int valueCount) {
-      ListColumnVector vector = (ListColumnVector) v;
-      for(int r=0; r < valueCount; ++r) {
-        if (nullProbability != 0 && random.nextDouble() < nullProbability) {
-          v.noNulls = false;
-          v.isNull[r] = true;
-        } else {
-          vector.offsets[r] = vector.childCount;
-          vector.lengths[r] = random.nextInt(sizeRange) + minSize;
-          vector.childCount += vector.lengths[r];
-        }
-      }
-      vector.child.ensureSize(vector.childCount, false);
-      child.generator.generate(vector.child, vector.childCount);
-    }
-  }
-
-  private class RandomStruct extends ValueGenerator {
-    private final Field[] children;
-
-    public RandomStruct(Field[] children) {
-      this.children = children;
-    }
-
-    public void generate(ColumnVector v, int valueCount) {
-      StructColumnVector vector = (StructColumnVector) v;
-      for(int r=0; r < valueCount; ++r) {
-        if (nullProbability != 0 && random.nextDouble() < nullProbability) {
-          v.noNulls = false;
-          v.isNull[r] = true;
-        }
-      }
-      for(int c=0; c < children.length; ++c) {
-        children[c].generator.generate(vector.fields[c], valueCount);
-      }
-    }
-  }
-
-  private abstract class IntegerGenerator extends ValueGenerator {
-    private final long sign;
-    private final long mask;
-
-    private IntegerGenerator(TypeDescription.Category kind) {
-      int bits = getIntegerLength(kind);
-      mask = bits == 64 ? 0 : -1L << bits;
-      sign = 1L << (bits - 1);
-    }
-
-    protected void normalize(LongColumnVector vector, int valueCount) {
-      // make sure the value stays in range by sign extending it
-      for(int r=0; r < valueCount; ++r) {
-        if ((vector.vector[r] & sign) == 0) {
-          vector.vector[r] &= ~mask;
-        } else {
-          vector.vector[r] |= mask;
-        }
-      }
-    }
-  }
-
-  private class AutoIncrement extends IntegerGenerator {
-    private long value;
-    private final long increment;
-
-    private AutoIncrement(TypeDescription.Category kind, long start,
-                          long increment) {
-      super(kind);
-      this.value = start;
-      this.increment = increment;
-    }
-
-    public void generate(ColumnVector v, int valueCount) {
-      LongColumnVector vector = (LongColumnVector) v;
-      for(int r=0; r < valueCount; ++r) {
-        if (nullProbability != 0 && random.nextDouble() >= nullProbability) {
-          v.noNulls = false;
-          v.isNull[r] = true;
-        } else {
-          vector.vector[r] = value;
-          value += increment;
-        }
-      }
-      normalize(vector, valueCount);
-    }
-  }
-
-  private class RandomInteger extends IntegerGenerator {
-
-    private RandomInteger(TypeDescription.Category kind) {
-      super(kind);
-    }
-
-    public void generate(ColumnVector v, int valueCount) {
-      LongColumnVector vector = (LongColumnVector) v;
-      for(int r=0; r < valueCount; ++r) {
-        if (nullProbability != 0 && random.nextDouble() < nullProbability) {
-          v.noNulls = false;
-          v.isNull[r] = true;
-        } else {
-          vector.vector[r] = random.nextLong();
-        }
-      }
-      normalize(vector, valueCount);
-    }
-  }
-
-  private class IntegerRange extends IntegerGenerator {
-    private final long minimum;
-    private final long range;
-    private final long limit;
-
-    private IntegerRange(TypeDescription.Category kind, long minimum,
-                         long maximum) {
-      super(kind);
-      this.minimum = minimum;
-      this.range = maximum - minimum + 1;
-      if (this.range < 0) {
-        throw new IllegalArgumentException("Can't support a negative range "
-            + range);
-      }
-      limit = (Long.MAX_VALUE / range) * range;
-    }
-
-    public void generate(ColumnVector v, int valueCount) {
-      LongColumnVector vector = (LongColumnVector) v;
-      for(int r=0; r < valueCount; ++r) {
-        if (nullProbability != 0 && random.nextDouble() < nullProbability) {
-          v.noNulls = false;
-          v.isNull[r] = true;
-        } else {
-          long rand;
-          do {
-            // clear the sign bit
-            rand = random.nextLong() & Long.MAX_VALUE;
-          } while (rand >= limit);
-          vector.vector[r] = (rand % range) + minimum;
-        }
-      }
-      normalize(vector, valueCount);
-    }
-  }
-
-  private class StringChooser extends ValueGenerator {
-    private final byte[][] choices;
-    private StringChooser(String[] values) {
-      choices = new byte[values.length][];
-      for(int e=0; e < values.length; ++e) {
-        choices[e] = values[e].getBytes(StandardCharsets.UTF_8);
-      }
-    }
-
-    public void generate(ColumnVector v, int valueCount) {
-      BytesColumnVector vector = (BytesColumnVector) v;
-      for(int r=0; r < valueCount; ++r) {
-        if (nullProbability != 0 && random.nextDouble() < nullProbability) {
-          v.noNulls = false;
-          v.isNull[r] = true;
-        } else {
-          int val = random.nextInt(choices.length);
-          vector.setRef(r, choices[val], 0, choices[val].length);
-        }
-      }
-    }
-  }
-
-  private static byte[] concat(byte[] left, byte[] right) {
-    byte[] result = new byte[left.length + right.length];
-    System.arraycopy(left, 0, result, 0, left.length);
-    System.arraycopy(right, 0, result, left.length, right.length);
-    return result;
-  }
-
-  private static byte pickOne(byte[] choices, Random random) {
-    return choices[random.nextInt(choices.length)];
-  }
-
-  private static final byte[] LOWER_CONSONANTS =
-      "bcdfghjklmnpqrstvwxyz".getBytes(StandardCharsets.UTF_8);
-  private static final byte[] UPPER_CONSONANTS =
-      "BCDFGHJKLMNPQRSTVWXYZ".getBytes(StandardCharsets.UTF_8);
-  private static final byte[] CONSONANTS =
-      concat(LOWER_CONSONANTS, UPPER_CONSONANTS);
-  private static final byte[] LOWER_VOWELS = "aeiou".getBytes(StandardCharsets.UTF_8);
-  private static final byte[] UPPER_VOWELS = "AEIOU".getBytes(StandardCharsets.UTF_8);
-  private static final byte[] VOWELS = concat(LOWER_VOWELS, UPPER_VOWELS);
-  private static final byte[] LOWER_LETTERS =
-      concat(LOWER_CONSONANTS, LOWER_VOWELS);
-  private static final byte[] UPPER_LETTERS =
-      concat(UPPER_CONSONANTS, UPPER_VOWELS);
-  private static final byte[] LETTERS = concat(LOWER_LETTERS, UPPER_LETTERS);
-  private static final byte[] NATURAL_DIGITS = "123456789".getBytes(StandardCharsets.UTF_8);
-  private static final byte[] DIGITS = "0123456789".getBytes(StandardCharsets.UTF_8);
-
-  private class StringPattern extends ValueGenerator {
-    private final byte[] buffer;
-    private final byte[][] choices;
-    private final int[] locations;
-
-    private StringPattern(String pattern) {
-      buffer = pattern.getBytes(StandardCharsets.UTF_8);
-      int locs = 0;
-      for(int i=0; i < buffer.length; ++i) {
-        switch (buffer[i]) {
-          case 'C':
-          case 'c':
-          case 'E':
-          case 'V':
-          case 'v':
-          case 'F':
-          case 'l':
-          case 'L':
-          case 'D':
-          case 'x':
-          case 'X':
-            locs += 1;
-            break;
-          default:
-            break;
-        }
-      }
-      locations = new int[locs];
-      choices = new byte[locs][];
-      locs = 0;
-      for(int i=0; i < buffer.length; ++i) {
-        switch (buffer[i]) {
-          case 'C':
-            locations[locs] = i;
-            choices[locs++] = UPPER_CONSONANTS;
-            break;
-          case 'c':
-            locations[locs] = i;
-            choices[locs++] = LOWER_CONSONANTS;
-            break;
-          case 'E':
-            locations[locs] = i;
-            choices[locs++] = CONSONANTS;
-            break;
-          case 'V':
-            locations[locs] = i;
-            choices[locs++] = UPPER_VOWELS;
-            break;
-          case 'v':
-            locations[locs] = i;
-            choices[locs++] = LOWER_VOWELS;
-            break;
-          case 'F':
-            locations[locs] = i;
-            choices[locs++] = VOWELS;
-            break;
-          case 'l':
-            locations[locs] = i;
-            choices[locs++] = LOWER_LETTERS;
-            break;
-          case 'L':
-            locations[locs] = i;
-            choices[locs++] = UPPER_LETTERS;
-            break;
-          case 'D':
-            locations[locs] = i;
-            choices[locs++] = LETTERS;
-            break;
-          case 'x':
-            locations[locs] = i;
-            choices[locs++] = NATURAL_DIGITS;
-            break;
-          case 'X':
-            locations[locs] = i;
-            choices[locs++] = DIGITS;
-            break;
-          default:
-            break;
-        }
-      }
-    }
-
-    public void generate(ColumnVector v, int valueCount) {
-      BytesColumnVector vector = (BytesColumnVector) v;
-      for(int r=0; r < valueCount; ++r) {
-        if (nullProbability != 0 && random.nextDouble() < nullProbability) {
-          v.noNulls = false;
-          v.isNull[r] = true;
-        } else {
-          for(int m=0; m < locations.length; ++m) {
-            buffer[locations[m]] = pickOne(choices[m], random);
-          }
-          vector.setVal(r, buffer, 0, buffer.length);
-        }
-      }
-    }
-  }
-
-  private class TimestampRange extends ValueGenerator {
-    private final long minimum;
-    private final long range;
-    private final long limit;
-
-    private TimestampRange(String min, String max) {
-      minimum = Timestamp.valueOf(min).getTime();
-      range = Timestamp.valueOf(max).getTime() - minimum + 1;
-      if (range < 0) {
-        throw new IllegalArgumentException("Negative range " + range);
-      }
-      limit = (Long.MAX_VALUE / range) * range;
-    }
-
-    public void generate(ColumnVector v, int valueCount) {
-      TimestampColumnVector vector = (TimestampColumnVector) v;
-      for(int r=0; r < valueCount; ++r) {
-        if (nullProbability != 0 && random.nextDouble() < nullProbability) {
-          v.noNulls = false;
-          v.isNull[r] = true;
-        } else {
-          long rand;
-          do {
-            // clear the sign bit
-            rand = random.nextLong() & Long.MAX_VALUE;
-          } while (rand >= limit);
-          vector.time[r] = (rand % range) + minimum;
-          vector.nanos[r] = random.nextInt(1000000);
-        }
-      }
-    }
-  }
-
-  private static int getIntegerLength(TypeDescription.Category kind) {
-    switch (kind) {
-      case BYTE:
-        return 8;
-      case SHORT:
-        return 16;
-      case INT:
-        return 32;
-      case LONG:
-        return 64;
-      default:
-        throw new IllegalArgumentException("Unhandled type " + kind);
-    }
-  }
-
-  public class Field {
-    private final TypeDescription type;
-    private Field[] children;
-    private ValueGenerator generator;
-
-    private Field(TypeDescription type) {
-      this.type = type;
-      if (!type.getCategory().isPrimitive()) {
-        List<TypeDescription> childrenTypes = type.getChildren();
-        children = new Field[childrenTypes.size()];
-        for(int c=0; c < children.length; ++c) {
-          children[c] = new Field(childrenTypes.get(c));
-        }
-      }
-    }
-
-    public Field addAutoIncrement(long start, long increment) {
-      generator = new AutoIncrement(type.getCategory(), start, increment);
-      return this;
-    }
-
-    public Field addIntegerRange(long min, long max) {
-      generator = new IntegerRange(type.getCategory(), min, max);
-      return this;
-    }
-
-    public Field addRandomInt() {
-      generator = new RandomInteger(type.getCategory());
-      return this;
-    }
-
-    public Field addStringChoice(String... choices) {
-      if (type.getCategory() != TypeDescription.Category.STRING) {
-        throw new IllegalArgumentException("Must be string - " + type);
-      }
-      generator = new StringChooser(choices);
-      return this;
-    }
-
-    public Field addStringPattern(String pattern) {
-      if (type.getCategory() != TypeDescription.Category.STRING) {
-        throw new IllegalArgumentException("Must be string - " + type);
-      }
-      generator = new StringPattern(pattern);
-      return this;
-    }
-
-    public Field addTimestampRange(String start, String end) {
-      if (type.getCategory() != TypeDescription.Category.TIMESTAMP) {
-        throw new IllegalArgumentException("Must be timestamp - " + type);
-      }
-      generator = new TimestampRange(start, end);
-      return this;
-    }
-
-    public Field addBoolean() {
-      if (type.getCategory() != TypeDescription.Category.BOOLEAN) {
-        throw new IllegalArgumentException("Must be boolean - " + type);
-      }
-      generator = new RandomBoolean();
-      return this;
-    }
-
-    public Field hasNulls(double probability) {
-      generator.nullProbability = probability;
-      return this;
-    }
-
-    public Field addStruct() {
-      generator = new RandomStruct(children);
-      return this;
-    }
-
-    public Field addList(int minSize, int maxSize) {
-      generator = new RandomList(minSize, maxSize, children[0]);
-      return this;
-    }
-
-    public Field getChildField(int child) {
-      return children[child];
-    }
-  }
-
-  public Field addField(String name, TypeDescription.Category kind) {
-    TypeDescription type = new TypeDescription(kind);
-    return addField(name, type);
-  }
-
-  public Field addField(String name, TypeDescription type) {
-    schema.addField(name, type);
-    Field result = new Field(type);
-    fields.add(result);
-    return result;
-  }
-
-  public void generate(VectorizedRowBatch batch, int rowCount) {
-    batch.reset();
-    for(int c=0; c < batch.cols.length; ++c) {
-      fields.get(c).generator.generate(batch.cols[c], rowCount);
-    }
-    batch.size = rowCount;
-  }
-
-  /**
-   * Get the schema for the table that is being generated.
-   * @return
-   */
-  public TypeDescription getSchema() {
-    return schema;
-  }
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/orc/bench/SalesGenerator.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/SalesGenerator.java b/java/bench/src/java/org/apache/orc/bench/SalesGenerator.java
deleted file mode 100644
index 2be3537..0000000
--- a/java/bench/src/java/org/apache/orc/bench/SalesGenerator.java
+++ /dev/null
@@ -1,206 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.bench;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.bench.convert.BatchReader;
-
-public class SalesGenerator implements BatchReader {
-  private final RandomGenerator generator;
-  private long rowsRemaining;
-  private final static double MOSTLY = 0.99999;
-
-  public SalesGenerator(long rows) {
-    this(rows, 42);
-  }
-
-  public SalesGenerator(long rows, int seed) {
-    generator = new RandomGenerator(seed);
-    // column 1
-    generator.addField("sales_id", TypeDescription.Category.LONG)
-        .addAutoIncrement(1000000000, 1);
-    generator.addField("customer_id", TypeDescription.Category.LONG)
-        .addIntegerRange(1000000000, 2000000000);
-    generator.addField("col3", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 10000).hasNulls(0.9993100389335173);
-
-    // column 4
-    generator.addField("item_category", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 1000000).hasNulls(0.00014784879996054823);
-    generator.addField("item_count", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 1000);
-    generator.addField("change_ts", TypeDescription.Category.TIMESTAMP)
-        .addTimestampRange("2003-01-01 00:00:00", "2017-03-14 23:59:59");
-
-    // column 7
-    generator.addField("store_location", TypeDescription.Category.STRING)
-        .addStringChoice("Los Angeles", "New York", "Cupertino", "Sunnyvale",
-            "Boston", "Chicago", "Seattle", "Jackson",
-            "Palo Alto", "San Mateo", "San Jose", "Santa Clara",
-            "Irvine", "Torrance", "Gardena", "Hermosa", "Manhattan")
-        .hasNulls(0.0004928293332019384);
-    generator.addField("associate_id", TypeDescription.Category.STRING)
-        .addStringPattern("MR V").hasNulls(0.05026859198659506);
-    generator.addField("col9", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 1000000000).hasNulls(MOSTLY);
-
-    // column 10
-    generator.addField("rebate_id", TypeDescription.Category.STRING)
-        .addStringPattern("xxxxxx").hasNulls(MOSTLY);
-    generator.addField("create_ts", TypeDescription.Category.TIMESTAMP)
-        .addTimestampRange("2003-01-01 00:00:00", "2017-03-14 23:59:59");
-    generator.addField("col13", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 100000).hasNulls(MOSTLY);
-
-    // column 13
-    generator.addField("size", TypeDescription.Category.STRING)
-        .addStringChoice("Small", "Medium", "Large", "XL")
-        .hasNulls(0.9503720861465674);
-    generator.addField("col14", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 100000);
-    generator.addField("fulfilled", TypeDescription.Category.BOOLEAN)
-        .addBoolean();
-
-    // column 16
-    generator.addField("global_id", TypeDescription.Category.STRING)
-        .addStringPattern("xxxxxxxxxxxxxxxx").hasNulls(0.021388793060962974);
-    generator.addField("col17", TypeDescription.Category.STRING)
-        .addStringPattern("L-xx").hasNulls(MOSTLY);
-    generator.addField("col18", TypeDescription.Category.STRING)
-        .addStringPattern("ll").hasNulls(MOSTLY);
-
-    // column 19
-    generator.addField("col19", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 100000);
-    generator.addField("has_rebate", TypeDescription.Category.BOOLEAN)
-        .addBoolean();
-    RandomGenerator.Field list =
-        generator.addField("col21",
-        TypeDescription.fromString("array<struct<sub1:bigint,sub2:string," +
-            "sub3:string,sub4:bigint,sub5:bigint,sub6:string>>"))
-        .addList(0, 3)
-        .hasNulls(MOSTLY);
-    RandomGenerator.Field struct = list.getChildField(0).addStruct();
-    struct.getChildField(0).addIntegerRange(0, 10000000);
-    struct.getChildField(1).addStringPattern("VVVVV");
-    struct.getChildField(2).addStringPattern("VVVVVVVV");
-    struct.getChildField(3).addIntegerRange(0, 10000000);
-    struct.getChildField(4).addIntegerRange(0, 10000000);
-    struct.getChildField(5).addStringPattern("VVVVVVVV");
-
-    // column 38
-    generator.addField("vendor_id", TypeDescription.Category.STRING)
-        .addStringPattern("Lxxxxxx").hasNulls(0.1870780148834459);
-    generator.addField("country", TypeDescription.Category.STRING)
-        .addStringChoice("USA", "Germany", "Ireland", "Canada", "Mexico",
-            "Denmark").hasNulls(0.0004928293332019384);
-
-    // column 40
-    generator.addField("backend_version", TypeDescription.Category.STRING)
-        .addStringPattern("X.xx").hasNulls(0.0005913951998423039);
-    generator.addField("col41", TypeDescription.Category.LONG)
-        .addIntegerRange(1000000000, 100000000000L);
-    generator.addField("col42", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 1000000000);
-
-    // column 43
-    generator.addField("col43", TypeDescription.Category.LONG)
-        .addIntegerRange(1000000000, 10000000000L).hasNulls(0.9763934749396284);
-    generator.addField("col44", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 100000000);
-    generator.addField("col45", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 100000000);
-
-    // column 46
-    generator.addField("col46", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 10000000);
-    generator.addField("col47", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 1000);
-    generator.addField("col48", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 1000000).hasNulls(MOSTLY);
-
-    // column 49
-    generator.addField("col49", TypeDescription.Category.STRING)
-        .addStringPattern("xxxx").hasNulls(0.0004928293332019384);
-    generator.addField("col50", TypeDescription.Category.STRING)
-        .addStringPattern("ll").hasNulls(0.9496821250800848);
-    generator.addField("col51", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 1000000).hasNulls(0.9999014341333596);
-
-    // column 52
-    generator.addField("col52", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 1000000).hasNulls(0.9980779656005125);
-    generator.addField("col53", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 1000000000);
-    generator.addField("col54", TypeDescription.Category.LONG)
-        .addIntegerRange(1,  1000000000);
-
-    // column 55
-    generator.addField("col55", TypeDescription.Category.STRING)
-        .addStringChoice("X");
-    generator.addField("col56", TypeDescription.Category.TIMESTAMP)
-        .addTimestampRange("2003-01-01 00:00:00", "2017-03-14 23:59:59");
-    generator.addField("col57", TypeDescription.Category.TIMESTAMP)
-        .addTimestampRange("2003-01-01 00:00:00", "2017-03-14 23:59:59");
-
-    // column 58
-    generator.addField("md5", TypeDescription.Category.LONG)
-        .addRandomInt();
-    generator.addField("col59", TypeDescription.Category.LONG)
-        .addIntegerRange(1000000000, 10000000000L);
-    generator.addField("col69", TypeDescription.Category.TIMESTAMP)
-        .addTimestampRange("2003-01-01 00:00:00", "2017-03-14 23:59:59")
-        .hasNulls(MOSTLY);
-
-    // column 61
-    generator.addField("col61", TypeDescription.Category.STRING)
-        .addStringPattern("X.xx").hasNulls(0.11399142476960233);
-    generator.addField("col62", TypeDescription.Category.STRING)
-        .addStringPattern("X.xx").hasNulls(0.9986200778670347);
-    generator.addField("col63", TypeDescription.Category.TIMESTAMP)
-        .addTimestampRange("2003-01-01 00:00:00", "2017-03-14 23:59:59");
-
-    // column 64
-    generator.addField("col64", TypeDescription.Category.LONG)
-        .addIntegerRange(1, 1000000).hasNulls(MOSTLY);
-    rowsRemaining = rows;
-  }
-
-  public boolean nextBatch(VectorizedRowBatch batch) {
-    int rows = (int) Math.min(batch.getMaxSize(), rowsRemaining);
-    generator.generate(batch, rows);
-    rowsRemaining -= rows;
-    return rows != 0;
-  }
-
-  @Override
-  public void close() {
-    // PASS
-  }
-
-  public TypeDescription getSchema() {
-    return generator.getSchema();
-  }
-
-  public static void main(String[] args) throws Exception {
-    SalesGenerator sales = new SalesGenerator(10, 42);
-    System.out.println("Schema " + sales.getSchema());
-  }
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/orc/bench/Utilities.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/Utilities.java b/java/bench/src/java/org/apache/orc/bench/Utilities.java
deleted file mode 100644
index 7016f5e..0000000
--- a/java/bench/src/java/org/apache/orc/bench/Utilities.java
+++ /dev/null
@@ -1,127 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.bench;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.orc.TypeDescription;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.Properties;
-
-public class Utilities {
-
-  public static TypeDescription loadSchema(String name) throws IOException {
-    InputStream in = Utilities.class.getClassLoader().getResourceAsStream(name);
-    byte[] buffer= new byte[1 * 1024];
-    int len = in.read(buffer);
-    StringBuilder string = new StringBuilder();
-    while (len > 0) {
-      for(int i=0; i < len; ++i) {
-        // strip out
-        if (buffer[i] != '\n' && buffer[i] != ' ') {
-          string.append((char) buffer[i]);
-        }
-      }
-      len = in.read(buffer);
-    }
-    return TypeDescription.fromString(string.toString());
-  }
-
-  public static org.apache.orc.CompressionKind getCodec(CompressionKind compression) {
-    switch (compression) {
-      case NONE:
-        return org.apache.orc.CompressionKind.NONE;
-      case ZLIB:
-        return org.apache.orc.CompressionKind.ZLIB;
-      case SNAPPY:
-        return org.apache.orc.CompressionKind.SNAPPY;
-      default:
-        throw new IllegalArgumentException("Unknown compression " + compression);
-    }
-  }
-
-  public static Iterable<String> sliceArray(final String[] array,
-                                            final int start) {
-    return new Iterable<String>() {
-      String[] values = array;
-      int posn = start;
-
-      @Override
-      public Iterator<String> iterator() {
-        return new Iterator<String>() {
-          @Override
-          public boolean hasNext() {
-            return posn < values.length;
-          }
-
-          @Override
-          public String next() {
-            if (posn >= values.length) {
-              throw new NoSuchElementException("Index off end of array." +
-                  " index = " + posn + " length = " + values.length);
-            } else {
-              return values[posn++];
-            }
-          }
-
-          @Override
-          public void remove() {
-            throw new UnsupportedOperationException("No remove");
-          }
-        };
-      }
-    };
-  }
-
-  public static Properties convertSchemaToHiveConfig(TypeDescription schema) {
-    Properties result = new Properties();
-    if (schema.getCategory() != TypeDescription.Category.STRUCT) {
-      throw new IllegalArgumentException("Hive requires struct root types" +
-          " instead of " + schema);
-    }
-    StringBuilder columns = new StringBuilder();
-    StringBuilder types = new StringBuilder();
-    List<String> columnNames = schema.getFieldNames();
-    List<TypeDescription> columnTypes = schema.getChildren();
-    for(int c=0; c < columnNames.size(); ++c) {
-      if (c != 0) {
-        columns.append(",");
-        types.append(",");
-      }
-      columns.append(columnNames.get(c));
-      types.append(columnTypes.get(c));
-    }
-    result.setProperty(serdeConstants.LIST_COLUMNS, columns.toString());
-    result.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString());
-    return result;
-  }
-
-  public static Path getVariant(Path root,
-                                String data,
-                                String format,
-                                String compress) {
-    return new Path(root, "generated/" + data + "/" + format + "." + compress);
-  }
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/orc/bench/convert/BatchReader.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/convert/BatchReader.java b/java/bench/src/java/org/apache/orc/bench/convert/BatchReader.java
deleted file mode 100644
index b9ea356..0000000
--- a/java/bench/src/java/org/apache/orc/bench/convert/BatchReader.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.bench.convert;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-import java.io.IOException;
-
-/**
- * Generic interface for reading data.
- */
-public interface BatchReader extends AutoCloseable {
-
-  boolean nextBatch(VectorizedRowBatch batch) throws IOException;
-
-  @Override
-  void close() throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/orc/bench/convert/BatchWriter.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/convert/BatchWriter.java b/java/bench/src/java/org/apache/orc/bench/convert/BatchWriter.java
deleted file mode 100644
index c79d937..0000000
--- a/java/bench/src/java/org/apache/orc/bench/convert/BatchWriter.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.bench.convert;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-import java.io.IOException;
-
-/**
- * Generic interface for writing data.
- */
-public interface BatchWriter extends AutoCloseable {
-
-  void writeBatch(VectorizedRowBatch batch) throws IOException;
-
-  @Override
-  void close() throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/orc/bench/convert/GenerateVariants.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/convert/GenerateVariants.java b/java/bench/src/java/org/apache/orc/bench/convert/GenerateVariants.java
deleted file mode 100644
index 7f57468..0000000
--- a/java/bench/src/java/org/apache/orc/bench/convert/GenerateVariants.java
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.bench.convert;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.DefaultParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.ParseException;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.bench.CompressionKind;
-import org.apache.orc.bench.SalesGenerator;
-import org.apache.orc.bench.Utilities;
-import org.apache.orc.bench.convert.avro.AvroReader;
-import org.apache.orc.bench.convert.avro.AvroWriter;
-import org.apache.orc.bench.convert.csv.CsvReader;
-import org.apache.orc.bench.convert.json.JsonReader;
-import org.apache.orc.bench.convert.json.JsonWriter;
-import org.apache.orc.bench.convert.orc.OrcReader;
-import org.apache.orc.bench.convert.orc.OrcWriter;
-import org.apache.orc.bench.convert.parquet.ParquetReader;
-import org.apache.orc.bench.convert.parquet.ParquetWriter;
-
-import java.io.IOException;
-
-/**
- * A tool to create the different variants that we need to benchmark against.
- */
-public class GenerateVariants {
-
-  public static BatchWriter createFileWriter(Path file,
-                                             String format,
-                                             TypeDescription schema,
-                                             Configuration conf,
-                                             CompressionKind compress
-                                             ) throws IOException {
-    FileSystem fs = file.getFileSystem(conf);
-    fs.delete(file, false);
-    fs.mkdirs(file.getParent());
-    switch (format) {
-      case "json":
-        return new JsonWriter(file, schema, conf, compress);
-      case "orc":
-        return new OrcWriter(file, schema, conf, compress);
-      case "avro":
-        return new AvroWriter(file, schema, conf, compress);
-      case "parquet":
-        return new ParquetWriter(file, schema, conf, compress);
-      default:
-        throw new IllegalArgumentException("Unknown format " + format);
-    }
-  }
-
-  public static BatchReader createFileReader(Path file,
-                                             String format,
-                                             TypeDescription schema,
-                                             Configuration conf,
-                                             CompressionKind compress
-                                             ) throws IOException {
-    switch (format) {
-      case "csv":
-        return new CsvReader(file, schema, conf, compress);
-      case "json":
-        return new JsonReader(file, schema, conf, compress);
-      case "orc":
-        return new OrcReader(file, schema, conf);
-      case "avro":
-        return new AvroReader(file, schema, conf);
-      case "parquet":
-        return new ParquetReader(file, schema, conf);
-      default:
-        throw new IllegalArgumentException("Unknown format " + format);
-    }
-  }
-
-  static class RecursiveReader implements BatchReader {
-    private final RemoteIterator<LocatedFileStatus> filenames;
-    private final String format;
-    private final TypeDescription schema;
-    private final Configuration conf;
-    private final CompressionKind compress;
-    private BatchReader current = null;
-
-    RecursiveReader(Path root,
-                    String format,
-                    TypeDescription schema,
-                    Configuration conf,
-                    CompressionKind compress) throws IOException {
-      FileSystem fs = root.getFileSystem(conf);
-      filenames = fs.listFiles(root, true);
-      this.format = format;
-      this.schema = schema;
-      this.conf = conf;
-      this.compress = compress;
-    }
-
-    @Override
-    public boolean nextBatch(VectorizedRowBatch batch) throws IOException {
-      while (current == null || !current.nextBatch(batch)) {
-        if (filenames.hasNext()) {
-          LocatedFileStatus next = filenames.next();
-          if (next.isFile()) {
-            current = createFileReader(next.getPath(), format, schema, conf,
-                compress);
-          }
-        } else {
-          return false;
-        }
-      }
-      return true;
-    }
-
-    @Override
-    public void close() throws IOException {
-      if (current != null) {
-        current.close();
-      }
-    }
-  }
-
-  public static BatchReader createReader(Path root,
-                                         String dataName,
-                                         TypeDescription schema,
-                                         Configuration conf,
-                                         long salesRecords) throws IOException {
-    switch (dataName) {
-      case "taxi":
-        return new RecursiveReader(new Path(root, "sources/" + dataName), "csv",
-            schema, conf, CompressionKind.ZLIB);
-      case "sales":
-        return new SalesGenerator(salesRecords);
-      case "github":
-        return new RecursiveReader(new Path(root, "sources/" + dataName), "json",
-            schema, conf, CompressionKind.ZLIB);
-      default:
-        throw new IllegalArgumentException("Unknown data name " + dataName);
-    }
-  }
-
-  static CommandLine parseCommandLine(String[] args) throws ParseException {
-    Options options = new Options()
-        .addOption("h", "help", false, "Provide help")
-        .addOption("c", "compress", true, "List of compression")
-        .addOption("d", "data", true, "List of data sets")
-        .addOption("f", "format", true, "List of formats")
-        .addOption("s", "sales", true, "Number of records for sales");
-    CommandLine result = new DefaultParser().parse(options, args);
-    if (result.hasOption("help") || result.getArgs().length == 0) {
-      new HelpFormatter().printHelp("convert <root>", options);
-      System.exit(1);
-    }
-    return result;
-  }
-
-  public static void main(String[] args) throws Exception {
-    CommandLine cli = parseCommandLine(args);
-    String[] compressList =
-        cli.getOptionValue("compress", "none,snappy,zlib").split(",");
-    String[] dataList =
-        cli.getOptionValue("data", "taxi,sales,github").split(",");
-    String[] formatList =
-        cli.getOptionValue("format", "avro,json,orc,parquet").split(",");
-    long records = Long.parseLong(cli.getOptionValue("sales", "25000000"));
-    Configuration conf = new Configuration();
-    Path root = new Path(cli.getArgs()[0]);
-    for(String data: dataList) {
-      // Set up the reader
-      TypeDescription schema = Utilities.loadSchema(data + ".schema");
-      BatchReader reader = createReader(root, data, schema, conf, records);
-
-      // Set up the writers for each combination
-      BatchWriter[] writers = new BatchWriter[compressList.length * formatList.length];
-      for(int compress=0; compress < compressList.length; ++compress) {
-        CompressionKind compressionKind =
-            CompressionKind.valueOf(compressList[compress].toUpperCase());
-        for(int format=0; format < formatList.length; ++format) {
-          Path outPath = Utilities.getVariant(root, data, formatList[format],
-              compressList[compress]);
-          writers[compress * formatList.length + format] =
-              createFileWriter(outPath, formatList[format], schema, conf,
-                  compressionKind);
-        }
-      }
-
-      // Copy the rows
-      VectorizedRowBatch batch = schema.createRowBatch();
-      while (reader.nextBatch(batch)) {
-        for(BatchWriter writer: writers) {
-          writer.writeBatch(batch);
-        }
-      }
-      reader.close();
-      for(BatchWriter writer: writers) {
-        writer.close();
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/orc/blob/b86d70aa/java/bench/src/java/org/apache/orc/bench/convert/ScanVariants.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/convert/ScanVariants.java b/java/bench/src/java/org/apache/orc/bench/convert/ScanVariants.java
deleted file mode 100644
index ae76238..0000000
--- a/java/bench/src/java/org/apache/orc/bench/convert/ScanVariants.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.bench.convert;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.DefaultParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.ParseException;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.bench.CompressionKind;
-import org.apache.orc.bench.Utilities;
-
-/**
- * A tool to create the different variants that we need to benchmark against.
- */
-public class ScanVariants {
-
-
-  static CommandLine parseCommandLine(String[] args) throws ParseException {
-    Options options = new Options()
-        .addOption("h", "help", false, "Provide help")
-        .addOption("c", "compress", true, "List of compression")
-        .addOption("d", "data", true, "List of data sets")
-        .addOption("f", "format", true, "List of formats");
-    CommandLine result = new DefaultParser().parse(options, args);
-    if (result.hasOption("help") || result.getArgs().length == 0) {
-      new HelpFormatter().printHelp("scan <root>", options);
-      System.exit(1);
-    }
-    return result;
-  }
-
-  public static void main(String[] args) throws Exception {
-    CommandLine cli = parseCommandLine(args);
-    String[] compressList =
-        cli.getOptionValue("compress", "none,snappy,zlib").split(",");
-    String[] dataList =
-        cli.getOptionValue("data", "taxi,sales,github").split(",");
-    String[] formatList =
-        cli.getOptionValue("format", "avro,json,orc,parquet").split(",");
-    Configuration conf = new Configuration();
-    Path root = new Path(cli.getArgs()[0]);
-    for(String data: dataList) {
-      TypeDescription schema = Utilities.loadSchema(data + ".schema");
-      VectorizedRowBatch batch = schema.createRowBatch();
-      for (String compress : compressList) {
-        CompressionKind compressKind =
-            CompressionKind.valueOf(compress.toUpperCase());
-        for (String format : formatList) {
-           Path filename = Utilities.getVariant(root, data, format,
-               compress);
-           BatchReader reader = GenerateVariants.createFileReader(filename,
-               format, schema, conf, compressKind);
-           long rows = 0;
-           long batches = 0;
-           while (reader.nextBatch(batch)) {
-             batches += 1;
-             rows += batch.size;
-           }
-           System.out.println(filename + " rows: " + rows + " batches: "
-               + batches);
-           reader.close();
-        }
-      }
-    }
-  }
-}