You are viewing a plain text version of this content. The canonical link for it is here.
Posted to blur-dev@incubator.apache.org by Tim Williams <wi...@gmail.com> on 2016/08/30 11:05:36 UTC

Re: [04/13] git commit: Third round of updates.

NoNot sure what this is yet but itPlease be more considerate with your
commit messages... it's a lot of code to look through without having
any context besides "N round of updates."


On Mon, Aug 29, 2016 at 9:57 PM,  <am...@apache.org> wrote:
> Third round of updates.
>
>
> Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
> Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/ea50630a
> Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/ea50630a
> Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/ea50630a
>
> Branch: refs/heads/master
> Commit: ea50630a38d67675a61a916b144f3c0ce85d7f7a
> Parents: 0141656
> Author: Aaron McCurry <am...@gmail.com>
> Authored: Sat May 7 13:11:54 2016 -0400
> Committer: Aaron McCurry <am...@gmail.com>
> Committed: Sat May 7 13:11:54 2016 -0400
>
> ----------------------------------------------------------------------
>  blur-indexer/pom.xml                            |  58 +++
>  blur-indexer/src/main/assemble/bin.xml          |  45 ++
>  .../mapreduce/lib/update/BlurIndexCounter.java  |  17 +
>  .../mapreduce/lib/update/ClusterDriver.java     | 362 ++++++++++++++
>  .../blur/mapreduce/lib/update/FasterDriver.java | 486 +++++++++++++++++++
>  .../update/HdfsConfigurationNamespaceMerge.java | 115 +++++
>  .../lib/update/InputSplitPruneUtil.java         | 133 +++++
>  .../lib/update/LookupBuilderMapper.java         |  18 +
>  .../lib/update/LookupBuilderReducer.java        | 165 +++++++
>  .../lib/update/MapperForExistingDataMod.java    |  46 ++
>  .../MapperForExistingDataWithIndexLookup.java   | 228 +++++++++
>  .../lib/update/MapperForNewDataMod.java         |  82 ++++
>  .../lib/update/MergeSortRowIdMatcher.java       | 372 ++++++++++++++
>  .../lib/update/PrunedBlurInputFormat.java       |  57 +++
>  .../update/PrunedSequenceFileInputFormat.java   |  59 +++
>  .../src/main/resources/blur-site.properties     |   1 +
>  .../src/main/resources/program-log4j.xml        |  29 ++
>  blur-indexer/src/main/resources/test-log4j.xml  |  46 ++
>  18 files changed, 2319 insertions(+)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ea50630a/blur-indexer/pom.xml
> ----------------------------------------------------------------------
> diff --git a/blur-indexer/pom.xml b/blur-indexer/pom.xml
> new file mode 100644
> index 0000000..c7c1753
> --- /dev/null
> +++ b/blur-indexer/pom.xml
> @@ -0,0 +1,58 @@
> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> +       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
> +       <modelVersion>4.0.0</modelVersion>
> +       <groupId>org.apache.blur</groupId>
> +       <artifactId>blur-indexer</artifactId>
> +       <version>0.2.8</version>
> +       <name>blur-indexer</name>
> +       <packaging>jar</packaging>
> +
> +       <properties>
> +               <blur.version>0.3.0.incubating.2.5.0.cdh5.3.3-SNAPSHOT</blur.version>
> +       </properties>
> +       <dependencies>
> +               <dependency>
> +                       <groupId>org.apache.blur</groupId>
> +                       <artifactId>blur-mapred</artifactId>
> +                       <version>${blur.version}</version>
> +               </dependency>
> +               <dependency>
> +                       <groupId>junit</groupId>
> +                       <artifactId>junit</artifactId>
> +                       <version>4.9</version>
> +                       <scope>test</scope>
> +               </dependency>
> +       </dependencies>
> +
> +       <build>
> +               <pluginManagement>
> +                       <plugins>
> +                               <plugin>
> +                                       <groupId>org.apache.maven.plugins</groupId>
> +                                       <artifactId>maven-compiler-plugin</artifactId>
> +                                       <configuration>
> +                                               <source>1.8</source>
> +                                               <target>1.8</target>
> +                                       </configuration>
> +                               </plugin>
> +                       </plugins>
> +               </pluginManagement>
> +               <plugins>
> +                       <plugin>
> +                               <artifactId>maven-assembly-plugin</artifactId>
> +                               <configuration>
> +                                       <descriptor>src/main/assemble/bin.xml</descriptor>
> +                                       <finalName>blur-indexer-${project.version}</finalName>
> +                               </configuration>
> +                               <executions>
> +                                       <execution>
> +                                               <phase>package</phase>
> +                                               <goals>
> +                                                       <goal>single</goal>
> +                                               </goals>
> +                                       </execution>
> +                               </executions>
> +                       </plugin>
> +               </plugins>
> +       </build>
> +</project>
>
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ea50630a/blur-indexer/src/main/assemble/bin.xml
> ----------------------------------------------------------------------
> diff --git a/blur-indexer/src/main/assemble/bin.xml b/blur-indexer/src/main/assemble/bin.xml
> new file mode 100644
> index 0000000..5fddd56
> --- /dev/null
> +++ b/blur-indexer/src/main/assemble/bin.xml
> @@ -0,0 +1,45 @@
> +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
> +    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> +           xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
> +  <formats>
> +    <format>tar.gz</format>
> +  </formats>
> +  <includeBaseDirectory>false</includeBaseDirectory>
> +
> +  <dependencySets>
> +    <dependencySet>
> +      <useProjectArtifact>true</useProjectArtifact>
> +      <outputDirectory>blur-indexer-${project.version}/lib</outputDirectory>
> +      <unpack>false</unpack>
> +      <includes>
> +        <include>org.apache.blur:blur-indexer</include>
> +        <include>org.apache.blur:*</include>
> +        <include>org.apache.zookeeper:zookeeper</include>
> +        <include>org.slf4j:slf4j-api</include>
> +        <include>org.slf4j:slf4j-log4j12</include>
> +        <include>org.json:json</include>
> +        <include>log4j:log4j</include>
> +        <include>com.yammer.metrics:*</include>
> +        <include>com.google.guava:guava</include>
> +        <include>org.apache.httpcomponents:*</include>
> +        <include>org.apache.lucene:*</include>
> +        <include>com.spatial4j:spatial4j</include>
> +        <include>commons-cli:commons-cli</include>
> +        <include>org.eclipse.jetty:*</include>
> +        <include>com.googlecode.concurrentlinkedhashmap:concurrentlinkedhashmap-lru</include>
> +        <include>jline:jline</include>
> +        <include>com.fasterxml.jackson.core:*</include>
> +      </includes>
> +    </dependencySet>
> +  </dependencySets>
> +
> +  <fileSets>
> +    <fileSet>
> +      <directory>${project.build.scriptSourceDirectory}</directory>
> +      <outputDirectory>blur-indexer-${project.version}</outputDirectory>
> +      <excludes>
> +        <exclude>**/.empty</exclude>
> +      </excludes>
> +    </fileSet>
> +  </fileSets>
> +</assembly>
>
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
> ----------------------------------------------------------------------
> diff --git a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
> new file mode 100644
> index 0000000..a9caabb
> --- /dev/null
> +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
> @@ -0,0 +1,17 @@
> +package org.apache.blur.mapreduce.lib.update;
> +
> +public enum BlurIndexCounter {
> +
> +  NEW_RECORDS,
> +  ROW_IDS_FROM_INDEX,
> +  ROW_IDS_TO_UPDATE_FROM_NEW_DATA,
> +  ROW_IDS_FROM_NEW_DATA,
> +
> +  INPUT_FORMAT_MAPPER,
> +  INPUT_FORMAT_EXISTING_RECORDS,
> +
> +  LOOKUP_MAPPER,
> +  LOOKUP_MAPPER_EXISTING_RECORDS,
> +  LOOKUP_MAPPER_ROW_LOOKUP_ATTEMPT
> +
> +}
>
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java
> ----------------------------------------------------------------------
> diff --git a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java
> new file mode 100644
> index 0000000..d44adf1
> --- /dev/null
> +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java
> @@ -0,0 +1,362 @@
> +package org.apache.blur.mapreduce.lib.update;
> +
> +import java.io.ByteArrayInputStream;
> +import java.io.ByteArrayOutputStream;
> +import java.io.IOException;
> +import java.io.InputStream;
> +import java.net.URL;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.List;
> +import java.util.Map;
> +import java.util.Map.Entry;
> +import java.util.Set;
> +import java.util.UUID;
> +import java.util.concurrent.Callable;
> +import java.util.concurrent.ExecutionException;
> +import java.util.concurrent.ExecutorService;
> +import java.util.concurrent.Executors;
> +import java.util.concurrent.Future;
> +import java.util.concurrent.TimeUnit;
> +import java.util.concurrent.atomic.AtomicBoolean;
> +
> +import org.apache.blur.log.Log;
> +import org.apache.blur.log.LogFactory;
> +import org.apache.blur.mapreduce.lib.BlurInputFormat;
> +import org.apache.blur.thirdparty.thrift_0_9_0.TException;
> +import org.apache.blur.thrift.BlurClient;
> +import org.apache.blur.thrift.generated.Blur.Iface;
> +import org.apache.blur.thrift.generated.BlurException;
> +import org.apache.blur.thrift.generated.TableDescriptor;
> +import org.apache.blur.thrift.generated.TableStats;
> +import org.apache.blur.utils.BlurConstants;
> +import org.apache.commons.io.IOUtils;
> +import org.apache.hadoop.conf.Configuration;
> +import org.apache.hadoop.conf.Configured;
> +import org.apache.hadoop.fs.FSDataInputStream;
> +import org.apache.hadoop.fs.FileStatus;
> +import org.apache.hadoop.fs.FileSystem;
> +import org.apache.hadoop.fs.Path;
> +import org.apache.hadoop.fs.permission.FsAction;
> +import org.apache.hadoop.mapreduce.Cluster;
> +import org.apache.hadoop.mapreduce.Job;
> +import org.apache.hadoop.mapreduce.JobID;
> +import org.apache.hadoop.mapreduce.JobStatus;
> +import org.apache.hadoop.util.Tool;
> +import org.apache.hadoop.util.ToolRunner;
> +import org.apache.hadoop.yarn.exceptions.YarnException;
> +import org.apache.log4j.LogManager;
> +import org.apache.log4j.xml.DOMConfigurator;
> +
> +public class ClusterDriver extends Configured implements Tool {
> +
> +  private static final String BLUR_ENV = "blur.env";
> +  private static final Log LOG = LogFactory.getLog(ClusterDriver.class);
> +  private static final String _SEP = "_";
> +  private static final String IMPORT = "import";
> +
> +  public static void main(String[] args) throws Exception {
> +    String logFilePath = System.getenv("BLUR_INDEXER_LOG_FILE");
> +    System.out.println("Log file path [" + logFilePath + "]");
> +    System.setProperty("BLUR_INDEXER_LOG_FILE", logFilePath);
> +    URL url = ClusterDriver.class.getResource("/program-log4j.xml");
> +    if (url != null) {
> +      LOG.info("Reseting log4j config from classpath resource [{0}]", url);
> +      LogManager.resetConfiguration();
> +      DOMConfigurator.configure(url);
> +    }
> +    int res = ToolRunner.run(new Configuration(), new ClusterDriver(), args);

Not sure what this thing does yet but it seems we should validate
those args since their accessed blindly in run...

--tim

Re: [03/13] git commit: Third round of updates.

Posted by Aaron McCurry <am...@gmail.com>.
Will do.  :-)

On Tue, Aug 30, 2016 at 9:10 AM, Tim Williams <wi...@gmail.com> wrote:

> No worries, just a friendly reminder:)  If you get time, I think it'd
> be helpful to a couple sentences about any new stuff/big changes...
> seems like there's a new project for example...
>
> Thanks,
> --tim
>
>
> On Tue, Aug 30, 2016 at 7:49 AM, Aaron McCurry <am...@gmail.com> wrote:
> > I apologize for the big commits without proper messaging.  It was
> difficult
> > to remember the changs and the original commit messages were lost due to
> an
> > offline git repo (which is no longer is use).  I only had the diff
> between
> > the original git repo and everything after the changes.  Plus the diff
> > didn't apply cleanly so that's why I broke it up in to different
> sections.
> >
> > I suppose I should have broke up the changes manually out of the diff and
> > applied them separately and recreated all the commit messages but I
> didn't
> > have the time to work through all of them.  Sorry.
> >
> > Aaron
> >
> >
> > On Tuesday, August 30, 2016, Tim Williams <wi...@gmail.com> wrote:
> >
> >> NoNot sure what this is yet but itPlease be more considerate with your
> >> commit messages... it's a lot of code to look through without having
> >> any context besides "N round of updates."
> >>
> >>
> >> On Mon, Aug 29, 2016 at 9:57 PM,  <amccurry@apache.org <javascript:;>>
> >> wrote:
> >> > Third round of updates.
> >> >
> >> >
> >> > Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
> >> > Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/
> >> commit/ea50630a
> >> > Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/
> >> ea50630a
> >> > Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/
> >> ea50630a
> >> >
> >> > Branch: refs/heads/master
> >> > Commit: ea50630a38d67675a61a916b144f3c0ce85d7f7a
> >> > Parents: 0141656
> >> > Author: Aaron McCurry <amccurry@gmail.com <javascript:;>>
> >> > Authored: Sat May 7 13:11:54 2016 -0400
> >> > Committer: Aaron McCurry <amccurry@gmail.com <javascript:;>>
> >> > Committed: Sat May 7 13:11:54 2016 -0400
> >> >
> >> > ------------------------------------------------------------
> ----------
> >> >  blur-indexer/pom.xml                            |  58 +++
> >> >  blur-indexer/src/main/assemble/bin.xml          |  45 ++
> >> >  .../mapreduce/lib/update/BlurIndexCounter.java  |  17 +
> >> >  .../mapreduce/lib/update/ClusterDriver.java     | 362 ++++++++++++++
> >> >  .../blur/mapreduce/lib/update/FasterDriver.java | 486
> >> +++++++++++++++++++
> >> >  .../update/HdfsConfigurationNamespaceMerge.java | 115 +++++
> >> >  .../lib/update/InputSplitPruneUtil.java         | 133 +++++
> >> >  .../lib/update/LookupBuilderMapper.java         |  18 +
> >> >  .../lib/update/LookupBuilderReducer.java        | 165 +++++++
> >> >  .../lib/update/MapperForExistingDataMod.java    |  46 ++
> >> >  .../MapperForExistingDataWithIndexLookup.java   | 228 +++++++++
> >> >  .../lib/update/MapperForNewDataMod.java         |  82 ++++
> >> >  .../lib/update/MergeSortRowIdMatcher.java       | 372 ++++++++++++++
> >> >  .../lib/update/PrunedBlurInputFormat.java       |  57 +++
> >> >  .../update/PrunedSequenceFileInputFormat.java   |  59 +++
> >> >  .../src/main/resources/blur-site.properties     |   1 +
> >> >  .../src/main/resources/program-log4j.xml        |  29 ++
> >> >  blur-indexer/src/main/resources/test-log4j.xml  |  46 ++
> >> >  18 files changed, 2319 insertions(+)
> >> > ------------------------------------------------------------
> ----------
> >> >
> >> >
> >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> >> ea50630a/blur-indexer/pom.xml
> >> > ------------------------------------------------------------
> ----------
> >> > diff --git a/blur-indexer/pom.xml b/blur-indexer/pom.xml
> >> > new file mode 100644
> >> > index 0000000..c7c1753
> >> > --- /dev/null
> >> > +++ b/blur-indexer/pom.xml
> >> > @@ -0,0 +1,58 @@
> >> > +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="
> >> http://www.w3.org/2001/XMLSchema-instance"
> >> > +       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
> >> http://maven.apache.org/xsd/maven-4.0.0.xsd">
> >> > +       <modelVersion>4.0.0</modelVersion>
> >> > +       <groupId>org.apache.blur</groupId>
> >> > +       <artifactId>blur-indexer</artifactId>
> >> > +       <version>0.2.8</version>
> >> > +       <name>blur-indexer</name>
> >> > +       <packaging>jar</packaging>
> >> > +
> >> > +       <properties>
> >> > +               <blur.version>0.3.0.incubating.2.5.0.cdh5.3.3-
> >> SNAPSHOT</blur.version>
> >> > +       </properties>
> >> > +       <dependencies>
> >> > +               <dependency>
> >> > +                       <groupId>org.apache.blur</groupId>
> >> > +                       <artifactId>blur-mapred</artifactId>
> >> > +                       <version>${blur.version}</version>
> >> > +               </dependency>
> >> > +               <dependency>
> >> > +                       <groupId>junit</groupId>
> >> > +                       <artifactId>junit</artifactId>
> >> > +                       <version>4.9</version>
> >> > +                       <scope>test</scope>
> >> > +               </dependency>
> >> > +       </dependencies>
> >> > +
> >> > +       <build>
> >> > +               <pluginManagement>
> >> > +                       <plugins>
> >> > +                               <plugin>
> >> > +                                       <groupId>org.apache.maven.
> >> plugins</groupId>
> >> > +                                       <artifactId>maven-compiler-
> >> plugin</artifactId>
> >> > +                                       <configuration>
> >> > +                                               <source>1.8</source>
> >> > +                                               <target>1.8</target>
> >> > +                                       </configuration>
> >> > +                               </plugin>
> >> > +                       </plugins>
> >> > +               </pluginManagement>
> >> > +               <plugins>
> >> > +                       <plugin>
> >> > +                               <artifactId>maven-assembly-
> >> plugin</artifactId>
> >> > +                               <configuration>
> >> > +                                       <descriptor>src/main/assemble/
> >> bin.xml</descriptor>
> >> > +                                       <finalName>blur-indexer-${
> >> project.version}</finalName>
> >> > +                               </configuration>
> >> > +                               <executions>
> >> > +                                       <execution>
> >> > +                                               <phase>package</phase>
> >> > +                                               <goals>
> >> > +
> >>  <goal>single</goal>
> >> > +                                               </goals>
> >> > +                                       </execution>
> >> > +                               </executions>
> >> > +                       </plugin>
> >> > +               </plugins>
> >> > +       </build>
> >> > +</project>
> >> >
> >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> >> ea50630a/blur-indexer/src/main/assemble/bin.xml
> >> > ------------------------------------------------------------
> ----------
> >> > diff --git a/blur-indexer/src/main/assemble/bin.xml
> >> b/blur-indexer/src/main/assemble/bin.xml
> >> > new file mode 100644
> >> > index 0000000..5fddd56
> >> > --- /dev/null
> >> > +++ b/blur-indexer/src/main/assemble/bin.xml
> >> > @@ -0,0 +1,45 @@
> >> > +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-
> >> plugin/assembly/1.1.2"
> >> > +    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> >> > +           xsi:schemaLocation="http://maven.apache.org/plugins/
> >> maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/
> >> assembly-1.1.2.xsd">
> >> > +  <formats>
> >> > +    <format>tar.gz</format>
> >> > +  </formats>
> >> > +  <includeBaseDirectory>false</includeBaseDirectory>
> >> > +
> >> > +  <dependencySets>
> >> > +    <dependencySet>
> >> > +      <useProjectArtifact>true</useProjectArtifact>
> >> > +      <outputDirectory>blur-indexer-${project.version}/lib</
> >> outputDirectory>
> >> > +      <unpack>false</unpack>
> >> > +      <includes>
> >> > +        <include>org.apache.blur:blur-indexer</include>
> >> > +        <include>org.apache.blur:*</include>
> >> > +        <include>org.apache.zookeeper:zookeeper</include>
> >> > +        <include>org.slf4j:slf4j-api</include>
> >> > +        <include>org.slf4j:slf4j-log4j12</include>
> >> > +        <include>org.json:json</include>
> >> > +        <include>log4j:log4j</include>
> >> > +        <include>com.yammer.metrics:*</include>
> >> > +        <include>com.google.guava:guava</include>
> >> > +        <include>org.apache.httpcomponents:*</include>
> >> > +        <include>org.apache.lucene:*</include>
> >> > +        <include>com.spatial4j:spatial4j</include>
> >> > +        <include>commons-cli:commons-cli</include>
> >> > +        <include>org.eclipse.jetty:*</include>
> >> > +        <include>com.googlecode.concurrentlinkedhashmap:
> >> concurrentlinkedhashmap-lru</include>
> >> > +        <include>jline:jline</include>
> >> > +        <include>com.fasterxml.jackson.core:*</include>
> >> > +      </includes>
> >> > +    </dependencySet>
> >> > +  </dependencySets>
> >> > +
> >> > +  <fileSets>
> >> > +    <fileSet>
> >> > +      <directory>${project.build.scriptSourceDirectory}</directory>
> >> > +      <outputDirectory>blur-indexer-${project.version}</
> >> outputDirectory>
> >> > +      <excludes>
> >> > +        <exclude>**/.empty</exclude>
> >> > +      </excludes>
> >> > +    </fileSet>
> >> > +  </fileSets>
> >> > +</assembly>
> >> >
> >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> >> ea50630a/blur-indexer/src/main/java/org/apache/blur/
> mapreduce/lib/update/
> >> BlurIndexCounter.java
> >> > ------------------------------------------------------------
> ----------
> >> > diff --git a/blur-indexer/src/main/java/
> org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
> >> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> >> update/BlurIndexCounter.java
> >> > new file mode 100644
> >> > index 0000000..a9caabb
> >> > --- /dev/null
> >> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> >> update/BlurIndexCounter.java
> >> > @@ -0,0 +1,17 @@
> >> > +package org.apache.blur.mapreduce.lib.update;
> >> > +
> >> > +public enum BlurIndexCounter {
> >> > +
> >> > +  NEW_RECORDS,
> >> > +  ROW_IDS_FROM_INDEX,
> >> > +  ROW_IDS_TO_UPDATE_FROM_NEW_DATA,
> >> > +  ROW_IDS_FROM_NEW_DATA,
> >> > +
> >> > +  INPUT_FORMAT_MAPPER,
> >> > +  INPUT_FORMAT_EXISTING_RECORDS,
> >> > +
> >> > +  LOOKUP_MAPPER,
> >> > +  LOOKUP_MAPPER_EXISTING_RECORDS,
> >> > +  LOOKUP_MAPPER_ROW_LOOKUP_ATTEMPT
> >> > +
> >> > +}
> >> >
> >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> >> ea50630a/blur-indexer/src/main/java/org/apache/blur/
> mapreduce/lib/update/
> >> ClusterDriver.java
> >> > ------------------------------------------------------------
> ----------
> >> > diff --git a/blur-indexer/src/main/java/
> org/apache/blur/mapreduce/lib/update/ClusterDriver.java
> >> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> >> update/ClusterDriver.java
> >> > new file mode 100644
> >> > index 0000000..d44adf1
> >> > --- /dev/null
> >> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> >> update/ClusterDriver.java
> >> > @@ -0,0 +1,362 @@
> >> > +package org.apache.blur.mapreduce.lib.update;
> >> > +
> >> > +import java.io.ByteArrayInputStream;
> >> > +import java.io.ByteArrayOutputStream;
> >> > +import java.io.IOException;
> >> > +import java.io.InputStream;
> >> > +import java.net.URL;
> >> > +import java.util.HashMap;
> >> > +import java.util.HashSet;
> >> > +import java.util.List;
> >> > +import java.util.Map;
> >> > +import java.util.Map.Entry;
> >> > +import java.util.Set;
> >> > +import java.util.UUID;
> >> > +import java.util.concurrent.Callable;
> >> > +import java.util.concurrent.ExecutionException;
> >> > +import java.util.concurrent.ExecutorService;
> >> > +import java.util.concurrent.Executors;
> >> > +import java.util.concurrent.Future;
> >> > +import java.util.concurrent.TimeUnit;
> >> > +import java.util.concurrent.atomic.AtomicBoolean;
> >> > +
> >> > +import org.apache.blur.log.Log;
> >> > +import org.apache.blur.log.LogFactory;
> >> > +import org.apache.blur.mapreduce.lib.BlurInputFormat;
> >> > +import org.apache.blur.thirdparty.thrift_0_9_0.TException;
> >> > +import org.apache.blur.thrift.BlurClient;
> >> > +import org.apache.blur.thrift.generated.Blur.Iface;
> >> > +import org.apache.blur.thrift.generated.BlurException;
> >> > +import org.apache.blur.thrift.generated.TableDescriptor;
> >> > +import org.apache.blur.thrift.generated.TableStats;
> >> > +import org.apache.blur.utils.BlurConstants;
> >> > +import org.apache.commons.io.IOUtils;
> >> > +import org.apache.hadoop.conf.Configuration;
> >> > +import org.apache.hadoop.conf.Configured;
> >> > +import org.apache.hadoop.fs.FSDataInputStream;
> >> > +import org.apache.hadoop.fs.FileStatus;
> >> > +import org.apache.hadoop.fs.FileSystem;
> >> > +import org.apache.hadoop.fs.Path;
> >> > +import org.apache.hadoop.fs.permission.FsAction;
> >> > +import org.apache.hadoop.mapreduce.Cluster;
> >> > +import org.apache.hadoop.mapreduce.Job;
> >> > +import org.apache.hadoop.mapreduce.JobID;
> >> > +import org.apache.hadoop.mapreduce.JobStatus;
> >> > +import org.apache.hadoop.util.Tool;
> >> > +import org.apache.hadoop.util.ToolRunner;
> >> > +import org.apache.hadoop.yarn.exceptions.YarnException;
> >> > +import org.apache.log4j.LogManager;
> >> > +import org.apache.log4j.xml.DOMConfigurator;
> >> > +
> >> > +public class ClusterDriver extends Configured implements Tool {
> >> > +
> >> > +  private static final String BLUR_ENV = "blur.env";
> >> > +  private static final Log LOG = LogFactory.getLog(
> >> ClusterDriver.class);
> >> > +  private static final String _SEP = "_";
> >> > +  private static final String IMPORT = "import";
> >> > +
> >> > +  public static void main(String[] args) throws Exception {
> >> > +    String logFilePath = System.getenv("BLUR_INDEXER_LOG_FILE");
> >> > +    System.out.println("Log file path [" + logFilePath + "]");
> >> > +    System.setProperty("BLUR_INDEXER_LOG_FILE", logFilePath);
> >> > +    URL url = ClusterDriver.class.getResource("/program-log4j.xml");
> >> > +    if (url != null) {
> >> > +      LOG.info("Reseting log4j config from classpath resource [{0}]",
> >> url);
> >> > +      LogManager.resetConfiguration();
> >> > +      DOMConfigurator.configure(url);
> >> > +    }
> >> > +    int res = ToolRunner.run(new Configuration(), new
> ClusterDriver(),
> >> args);
> >>
> >> Not sure what this thing does yet but it seems we should validate
> >> those args since their accessed blindly in run...
> >>
> >> --tim
> >>
>

Re: [03/13] git commit: Third round of updates.

Posted by Tim Williams <wi...@gmail.com>.
No worries, just a friendly reminder:)  If you get time, I think it'd
be helpful to a couple sentences about any new stuff/big changes...
seems like there's a new project for example...

Thanks,
--tim


On Tue, Aug 30, 2016 at 7:49 AM, Aaron McCurry <am...@gmail.com> wrote:
> I apologize for the big commits without proper messaging.  It was difficult
> to remember the changs and the original commit messages were lost due to an
> offline git repo (which is no longer is use).  I only had the diff between
> the original git repo and everything after the changes.  Plus the diff
> didn't apply cleanly so that's why I broke it up in to different sections.
>
> I suppose I should have broke up the changes manually out of the diff and
> applied them separately and recreated all the commit messages but I didn't
> have the time to work through all of them.  Sorry.
>
> Aaron
>
>
> On Tuesday, August 30, 2016, Tim Williams <wi...@gmail.com> wrote:
>
>> NoNot sure what this is yet but itPlease be more considerate with your
>> commit messages... it's a lot of code to look through without having
>> any context besides "N round of updates."
>>
>>
>> On Mon, Aug 29, 2016 at 9:57 PM,  <amccurry@apache.org <javascript:;>>
>> wrote:
>> > Third round of updates.
>> >
>> >
>> > Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
>> > Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/
>> commit/ea50630a
>> > Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/
>> ea50630a
>> > Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/
>> ea50630a
>> >
>> > Branch: refs/heads/master
>> > Commit: ea50630a38d67675a61a916b144f3c0ce85d7f7a
>> > Parents: 0141656
>> > Author: Aaron McCurry <amccurry@gmail.com <javascript:;>>
>> > Authored: Sat May 7 13:11:54 2016 -0400
>> > Committer: Aaron McCurry <amccurry@gmail.com <javascript:;>>
>> > Committed: Sat May 7 13:11:54 2016 -0400
>> >
>> > ----------------------------------------------------------------------
>> >  blur-indexer/pom.xml                            |  58 +++
>> >  blur-indexer/src/main/assemble/bin.xml          |  45 ++
>> >  .../mapreduce/lib/update/BlurIndexCounter.java  |  17 +
>> >  .../mapreduce/lib/update/ClusterDriver.java     | 362 ++++++++++++++
>> >  .../blur/mapreduce/lib/update/FasterDriver.java | 486
>> +++++++++++++++++++
>> >  .../update/HdfsConfigurationNamespaceMerge.java | 115 +++++
>> >  .../lib/update/InputSplitPruneUtil.java         | 133 +++++
>> >  .../lib/update/LookupBuilderMapper.java         |  18 +
>> >  .../lib/update/LookupBuilderReducer.java        | 165 +++++++
>> >  .../lib/update/MapperForExistingDataMod.java    |  46 ++
>> >  .../MapperForExistingDataWithIndexLookup.java   | 228 +++++++++
>> >  .../lib/update/MapperForNewDataMod.java         |  82 ++++
>> >  .../lib/update/MergeSortRowIdMatcher.java       | 372 ++++++++++++++
>> >  .../lib/update/PrunedBlurInputFormat.java       |  57 +++
>> >  .../update/PrunedSequenceFileInputFormat.java   |  59 +++
>> >  .../src/main/resources/blur-site.properties     |   1 +
>> >  .../src/main/resources/program-log4j.xml        |  29 ++
>> >  blur-indexer/src/main/resources/test-log4j.xml  |  46 ++
>> >  18 files changed, 2319 insertions(+)
>> > ----------------------------------------------------------------------
>> >
>> >
>> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
>> ea50630a/blur-indexer/pom.xml
>> > ----------------------------------------------------------------------
>> > diff --git a/blur-indexer/pom.xml b/blur-indexer/pom.xml
>> > new file mode 100644
>> > index 0000000..c7c1753
>> > --- /dev/null
>> > +++ b/blur-indexer/pom.xml
>> > @@ -0,0 +1,58 @@
>> > +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="
>> http://www.w3.org/2001/XMLSchema-instance"
>> > +       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
>> http://maven.apache.org/xsd/maven-4.0.0.xsd">
>> > +       <modelVersion>4.0.0</modelVersion>
>> > +       <groupId>org.apache.blur</groupId>
>> > +       <artifactId>blur-indexer</artifactId>
>> > +       <version>0.2.8</version>
>> > +       <name>blur-indexer</name>
>> > +       <packaging>jar</packaging>
>> > +
>> > +       <properties>
>> > +               <blur.version>0.3.0.incubating.2.5.0.cdh5.3.3-
>> SNAPSHOT</blur.version>
>> > +       </properties>
>> > +       <dependencies>
>> > +               <dependency>
>> > +                       <groupId>org.apache.blur</groupId>
>> > +                       <artifactId>blur-mapred</artifactId>
>> > +                       <version>${blur.version}</version>
>> > +               </dependency>
>> > +               <dependency>
>> > +                       <groupId>junit</groupId>
>> > +                       <artifactId>junit</artifactId>
>> > +                       <version>4.9</version>
>> > +                       <scope>test</scope>
>> > +               </dependency>
>> > +       </dependencies>
>> > +
>> > +       <build>
>> > +               <pluginManagement>
>> > +                       <plugins>
>> > +                               <plugin>
>> > +                                       <groupId>org.apache.maven.
>> plugins</groupId>
>> > +                                       <artifactId>maven-compiler-
>> plugin</artifactId>
>> > +                                       <configuration>
>> > +                                               <source>1.8</source>
>> > +                                               <target>1.8</target>
>> > +                                       </configuration>
>> > +                               </plugin>
>> > +                       </plugins>
>> > +               </pluginManagement>
>> > +               <plugins>
>> > +                       <plugin>
>> > +                               <artifactId>maven-assembly-
>> plugin</artifactId>
>> > +                               <configuration>
>> > +                                       <descriptor>src/main/assemble/
>> bin.xml</descriptor>
>> > +                                       <finalName>blur-indexer-${
>> project.version}</finalName>
>> > +                               </configuration>
>> > +                               <executions>
>> > +                                       <execution>
>> > +                                               <phase>package</phase>
>> > +                                               <goals>
>> > +
>>  <goal>single</goal>
>> > +                                               </goals>
>> > +                                       </execution>
>> > +                               </executions>
>> > +                       </plugin>
>> > +               </plugins>
>> > +       </build>
>> > +</project>
>> >
>> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
>> ea50630a/blur-indexer/src/main/assemble/bin.xml
>> > ----------------------------------------------------------------------
>> > diff --git a/blur-indexer/src/main/assemble/bin.xml
>> b/blur-indexer/src/main/assemble/bin.xml
>> > new file mode 100644
>> > index 0000000..5fddd56
>> > --- /dev/null
>> > +++ b/blur-indexer/src/main/assemble/bin.xml
>> > @@ -0,0 +1,45 @@
>> > +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-
>> plugin/assembly/1.1.2"
>> > +    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
>> > +           xsi:schemaLocation="http://maven.apache.org/plugins/
>> maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/
>> assembly-1.1.2.xsd">
>> > +  <formats>
>> > +    <format>tar.gz</format>
>> > +  </formats>
>> > +  <includeBaseDirectory>false</includeBaseDirectory>
>> > +
>> > +  <dependencySets>
>> > +    <dependencySet>
>> > +      <useProjectArtifact>true</useProjectArtifact>
>> > +      <outputDirectory>blur-indexer-${project.version}/lib</
>> outputDirectory>
>> > +      <unpack>false</unpack>
>> > +      <includes>
>> > +        <include>org.apache.blur:blur-indexer</include>
>> > +        <include>org.apache.blur:*</include>
>> > +        <include>org.apache.zookeeper:zookeeper</include>
>> > +        <include>org.slf4j:slf4j-api</include>
>> > +        <include>org.slf4j:slf4j-log4j12</include>
>> > +        <include>org.json:json</include>
>> > +        <include>log4j:log4j</include>
>> > +        <include>com.yammer.metrics:*</include>
>> > +        <include>com.google.guava:guava</include>
>> > +        <include>org.apache.httpcomponents:*</include>
>> > +        <include>org.apache.lucene:*</include>
>> > +        <include>com.spatial4j:spatial4j</include>
>> > +        <include>commons-cli:commons-cli</include>
>> > +        <include>org.eclipse.jetty:*</include>
>> > +        <include>com.googlecode.concurrentlinkedhashmap:
>> concurrentlinkedhashmap-lru</include>
>> > +        <include>jline:jline</include>
>> > +        <include>com.fasterxml.jackson.core:*</include>
>> > +      </includes>
>> > +    </dependencySet>
>> > +  </dependencySets>
>> > +
>> > +  <fileSets>
>> > +    <fileSet>
>> > +      <directory>${project.build.scriptSourceDirectory}</directory>
>> > +      <outputDirectory>blur-indexer-${project.version}</
>> outputDirectory>
>> > +      <excludes>
>> > +        <exclude>**/.empty</exclude>
>> > +      </excludes>
>> > +    </fileSet>
>> > +  </fileSets>
>> > +</assembly>
>> >
>> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
>> ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/
>> BlurIndexCounter.java
>> > ----------------------------------------------------------------------
>> > diff --git a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
>> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
>> update/BlurIndexCounter.java
>> > new file mode 100644
>> > index 0000000..a9caabb
>> > --- /dev/null
>> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
>> update/BlurIndexCounter.java
>> > @@ -0,0 +1,17 @@
>> > +package org.apache.blur.mapreduce.lib.update;
>> > +
>> > +public enum BlurIndexCounter {
>> > +
>> > +  NEW_RECORDS,
>> > +  ROW_IDS_FROM_INDEX,
>> > +  ROW_IDS_TO_UPDATE_FROM_NEW_DATA,
>> > +  ROW_IDS_FROM_NEW_DATA,
>> > +
>> > +  INPUT_FORMAT_MAPPER,
>> > +  INPUT_FORMAT_EXISTING_RECORDS,
>> > +
>> > +  LOOKUP_MAPPER,
>> > +  LOOKUP_MAPPER_EXISTING_RECORDS,
>> > +  LOOKUP_MAPPER_ROW_LOOKUP_ATTEMPT
>> > +
>> > +}
>> >
>> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
>> ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/
>> ClusterDriver.java
>> > ----------------------------------------------------------------------
>> > diff --git a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java
>> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
>> update/ClusterDriver.java
>> > new file mode 100644
>> > index 0000000..d44adf1
>> > --- /dev/null
>> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
>> update/ClusterDriver.java
>> > @@ -0,0 +1,362 @@
>> > +package org.apache.blur.mapreduce.lib.update;
>> > +
>> > +import java.io.ByteArrayInputStream;
>> > +import java.io.ByteArrayOutputStream;
>> > +import java.io.IOException;
>> > +import java.io.InputStream;
>> > +import java.net.URL;
>> > +import java.util.HashMap;
>> > +import java.util.HashSet;
>> > +import java.util.List;
>> > +import java.util.Map;
>> > +import java.util.Map.Entry;
>> > +import java.util.Set;
>> > +import java.util.UUID;
>> > +import java.util.concurrent.Callable;
>> > +import java.util.concurrent.ExecutionException;
>> > +import java.util.concurrent.ExecutorService;
>> > +import java.util.concurrent.Executors;
>> > +import java.util.concurrent.Future;
>> > +import java.util.concurrent.TimeUnit;
>> > +import java.util.concurrent.atomic.AtomicBoolean;
>> > +
>> > +import org.apache.blur.log.Log;
>> > +import org.apache.blur.log.LogFactory;
>> > +import org.apache.blur.mapreduce.lib.BlurInputFormat;
>> > +import org.apache.blur.thirdparty.thrift_0_9_0.TException;
>> > +import org.apache.blur.thrift.BlurClient;
>> > +import org.apache.blur.thrift.generated.Blur.Iface;
>> > +import org.apache.blur.thrift.generated.BlurException;
>> > +import org.apache.blur.thrift.generated.TableDescriptor;
>> > +import org.apache.blur.thrift.generated.TableStats;
>> > +import org.apache.blur.utils.BlurConstants;
>> > +import org.apache.commons.io.IOUtils;
>> > +import org.apache.hadoop.conf.Configuration;
>> > +import org.apache.hadoop.conf.Configured;
>> > +import org.apache.hadoop.fs.FSDataInputStream;
>> > +import org.apache.hadoop.fs.FileStatus;
>> > +import org.apache.hadoop.fs.FileSystem;
>> > +import org.apache.hadoop.fs.Path;
>> > +import org.apache.hadoop.fs.permission.FsAction;
>> > +import org.apache.hadoop.mapreduce.Cluster;
>> > +import org.apache.hadoop.mapreduce.Job;
>> > +import org.apache.hadoop.mapreduce.JobID;
>> > +import org.apache.hadoop.mapreduce.JobStatus;
>> > +import org.apache.hadoop.util.Tool;
>> > +import org.apache.hadoop.util.ToolRunner;
>> > +import org.apache.hadoop.yarn.exceptions.YarnException;
>> > +import org.apache.log4j.LogManager;
>> > +import org.apache.log4j.xml.DOMConfigurator;
>> > +
>> > +public class ClusterDriver extends Configured implements Tool {
>> > +
>> > +  private static final String BLUR_ENV = "blur.env";
>> > +  private static final Log LOG = LogFactory.getLog(
>> ClusterDriver.class);
>> > +  private static final String _SEP = "_";
>> > +  private static final String IMPORT = "import";
>> > +
>> > +  public static void main(String[] args) throws Exception {
>> > +    String logFilePath = System.getenv("BLUR_INDEXER_LOG_FILE");
>> > +    System.out.println("Log file path [" + logFilePath + "]");
>> > +    System.setProperty("BLUR_INDEXER_LOG_FILE", logFilePath);
>> > +    URL url = ClusterDriver.class.getResource("/program-log4j.xml");
>> > +    if (url != null) {
>> > +      LOG.info("Reseting log4j config from classpath resource [{0}]",
>> url);
>> > +      LogManager.resetConfiguration();
>> > +      DOMConfigurator.configure(url);
>> > +    }
>> > +    int res = ToolRunner.run(new Configuration(), new ClusterDriver(),
>> args);
>>
>> Not sure what this thing does yet but it seems we should validate
>> those args since their accessed blindly in run...
>>
>> --tim
>>

Re: [03/13] git commit: Third round of updates.

Posted by Aaron McCurry <am...@gmail.com>.
I apologize for the big commits without proper messaging.  It was difficult
to remember the changs and the original commit messages were lost due to an
offline git repo (which is no longer is use).  I only had the diff between
the original git repo and everything after the changes.  Plus the diff
didn't apply cleanly so that's why I broke it up in to different sections.

I suppose I should have broke up the changes manually out of the diff and
applied them separately and recreated all the commit messages but I didn't
have the time to work through all of them.  Sorry.

Aaron


On Tuesday, August 30, 2016, Tim Williams <wi...@gmail.com> wrote:

> NoNot sure what this is yet but itPlease be more considerate with your
> commit messages... it's a lot of code to look through without having
> any context besides "N round of updates."
>
>
> On Mon, Aug 29, 2016 at 9:57 PM,  <amccurry@apache.org <javascript:;>>
> wrote:
> > Third round of updates.
> >
> >
> > Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
> > Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/
> commit/ea50630a
> > Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/
> ea50630a
> > Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/
> ea50630a
> >
> > Branch: refs/heads/master
> > Commit: ea50630a38d67675a61a916b144f3c0ce85d7f7a
> > Parents: 0141656
> > Author: Aaron McCurry <amccurry@gmail.com <javascript:;>>
> > Authored: Sat May 7 13:11:54 2016 -0400
> > Committer: Aaron McCurry <amccurry@gmail.com <javascript:;>>
> > Committed: Sat May 7 13:11:54 2016 -0400
> >
> > ----------------------------------------------------------------------
> >  blur-indexer/pom.xml                            |  58 +++
> >  blur-indexer/src/main/assemble/bin.xml          |  45 ++
> >  .../mapreduce/lib/update/BlurIndexCounter.java  |  17 +
> >  .../mapreduce/lib/update/ClusterDriver.java     | 362 ++++++++++++++
> >  .../blur/mapreduce/lib/update/FasterDriver.java | 486
> +++++++++++++++++++
> >  .../update/HdfsConfigurationNamespaceMerge.java | 115 +++++
> >  .../lib/update/InputSplitPruneUtil.java         | 133 +++++
> >  .../lib/update/LookupBuilderMapper.java         |  18 +
> >  .../lib/update/LookupBuilderReducer.java        | 165 +++++++
> >  .../lib/update/MapperForExistingDataMod.java    |  46 ++
> >  .../MapperForExistingDataWithIndexLookup.java   | 228 +++++++++
> >  .../lib/update/MapperForNewDataMod.java         |  82 ++++
> >  .../lib/update/MergeSortRowIdMatcher.java       | 372 ++++++++++++++
> >  .../lib/update/PrunedBlurInputFormat.java       |  57 +++
> >  .../update/PrunedSequenceFileInputFormat.java   |  59 +++
> >  .../src/main/resources/blur-site.properties     |   1 +
> >  .../src/main/resources/program-log4j.xml        |  29 ++
> >  blur-indexer/src/main/resources/test-log4j.xml  |  46 ++
> >  18 files changed, 2319 insertions(+)
> > ----------------------------------------------------------------------
> >
> >
> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> ea50630a/blur-indexer/pom.xml
> > ----------------------------------------------------------------------
> > diff --git a/blur-indexer/pom.xml b/blur-indexer/pom.xml
> > new file mode 100644
> > index 0000000..c7c1753
> > --- /dev/null
> > +++ b/blur-indexer/pom.xml
> > @@ -0,0 +1,58 @@
> > +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="
> http://www.w3.org/2001/XMLSchema-instance"
> > +       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
> http://maven.apache.org/xsd/maven-4.0.0.xsd">
> > +       <modelVersion>4.0.0</modelVersion>
> > +       <groupId>org.apache.blur</groupId>
> > +       <artifactId>blur-indexer</artifactId>
> > +       <version>0.2.8</version>
> > +       <name>blur-indexer</name>
> > +       <packaging>jar</packaging>
> > +
> > +       <properties>
> > +               <blur.version>0.3.0.incubating.2.5.0.cdh5.3.3-
> SNAPSHOT</blur.version>
> > +       </properties>
> > +       <dependencies>
> > +               <dependency>
> > +                       <groupId>org.apache.blur</groupId>
> > +                       <artifactId>blur-mapred</artifactId>
> > +                       <version>${blur.version}</version>
> > +               </dependency>
> > +               <dependency>
> > +                       <groupId>junit</groupId>
> > +                       <artifactId>junit</artifactId>
> > +                       <version>4.9</version>
> > +                       <scope>test</scope>
> > +               </dependency>
> > +       </dependencies>
> > +
> > +       <build>
> > +               <pluginManagement>
> > +                       <plugins>
> > +                               <plugin>
> > +                                       <groupId>org.apache.maven.
> plugins</groupId>
> > +                                       <artifactId>maven-compiler-
> plugin</artifactId>
> > +                                       <configuration>
> > +                                               <source>1.8</source>
> > +                                               <target>1.8</target>
> > +                                       </configuration>
> > +                               </plugin>
> > +                       </plugins>
> > +               </pluginManagement>
> > +               <plugins>
> > +                       <plugin>
> > +                               <artifactId>maven-assembly-
> plugin</artifactId>
> > +                               <configuration>
> > +                                       <descriptor>src/main/assemble/
> bin.xml</descriptor>
> > +                                       <finalName>blur-indexer-${
> project.version}</finalName>
> > +                               </configuration>
> > +                               <executions>
> > +                                       <execution>
> > +                                               <phase>package</phase>
> > +                                               <goals>
> > +
>  <goal>single</goal>
> > +                                               </goals>
> > +                                       </execution>
> > +                               </executions>
> > +                       </plugin>
> > +               </plugins>
> > +       </build>
> > +</project>
> >
> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> ea50630a/blur-indexer/src/main/assemble/bin.xml
> > ----------------------------------------------------------------------
> > diff --git a/blur-indexer/src/main/assemble/bin.xml
> b/blur-indexer/src/main/assemble/bin.xml
> > new file mode 100644
> > index 0000000..5fddd56
> > --- /dev/null
> > +++ b/blur-indexer/src/main/assemble/bin.xml
> > @@ -0,0 +1,45 @@
> > +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-
> plugin/assembly/1.1.2"
> > +    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> > +           xsi:schemaLocation="http://maven.apache.org/plugins/
> maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/
> assembly-1.1.2.xsd">
> > +  <formats>
> > +    <format>tar.gz</format>
> > +  </formats>
> > +  <includeBaseDirectory>false</includeBaseDirectory>
> > +
> > +  <dependencySets>
> > +    <dependencySet>
> > +      <useProjectArtifact>true</useProjectArtifact>
> > +      <outputDirectory>blur-indexer-${project.version}/lib</
> outputDirectory>
> > +      <unpack>false</unpack>
> > +      <includes>
> > +        <include>org.apache.blur:blur-indexer</include>
> > +        <include>org.apache.blur:*</include>
> > +        <include>org.apache.zookeeper:zookeeper</include>
> > +        <include>org.slf4j:slf4j-api</include>
> > +        <include>org.slf4j:slf4j-log4j12</include>
> > +        <include>org.json:json</include>
> > +        <include>log4j:log4j</include>
> > +        <include>com.yammer.metrics:*</include>
> > +        <include>com.google.guava:guava</include>
> > +        <include>org.apache.httpcomponents:*</include>
> > +        <include>org.apache.lucene:*</include>
> > +        <include>com.spatial4j:spatial4j</include>
> > +        <include>commons-cli:commons-cli</include>
> > +        <include>org.eclipse.jetty:*</include>
> > +        <include>com.googlecode.concurrentlinkedhashmap:
> concurrentlinkedhashmap-lru</include>
> > +        <include>jline:jline</include>
> > +        <include>com.fasterxml.jackson.core:*</include>
> > +      </includes>
> > +    </dependencySet>
> > +  </dependencySets>
> > +
> > +  <fileSets>
> > +    <fileSet>
> > +      <directory>${project.build.scriptSourceDirectory}</directory>
> > +      <outputDirectory>blur-indexer-${project.version}</
> outputDirectory>
> > +      <excludes>
> > +        <exclude>**/.empty</exclude>
> > +      </excludes>
> > +    </fileSet>
> > +  </fileSets>
> > +</assembly>
> >
> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/
> BlurIndexCounter.java
> > ----------------------------------------------------------------------
> > diff --git a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> update/BlurIndexCounter.java
> > new file mode 100644
> > index 0000000..a9caabb
> > --- /dev/null
> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> update/BlurIndexCounter.java
> > @@ -0,0 +1,17 @@
> > +package org.apache.blur.mapreduce.lib.update;
> > +
> > +public enum BlurIndexCounter {
> > +
> > +  NEW_RECORDS,
> > +  ROW_IDS_FROM_INDEX,
> > +  ROW_IDS_TO_UPDATE_FROM_NEW_DATA,
> > +  ROW_IDS_FROM_NEW_DATA,
> > +
> > +  INPUT_FORMAT_MAPPER,
> > +  INPUT_FORMAT_EXISTING_RECORDS,
> > +
> > +  LOOKUP_MAPPER,
> > +  LOOKUP_MAPPER_EXISTING_RECORDS,
> > +  LOOKUP_MAPPER_ROW_LOOKUP_ATTEMPT
> > +
> > +}
> >
> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/
> ClusterDriver.java
> > ----------------------------------------------------------------------
> > diff --git a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java
> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> update/ClusterDriver.java
> > new file mode 100644
> > index 0000000..d44adf1
> > --- /dev/null
> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> update/ClusterDriver.java
> > @@ -0,0 +1,362 @@
> > +package org.apache.blur.mapreduce.lib.update;
> > +
> > +import java.io.ByteArrayInputStream;
> > +import java.io.ByteArrayOutputStream;
> > +import java.io.IOException;
> > +import java.io.InputStream;
> > +import java.net.URL;
> > +import java.util.HashMap;
> > +import java.util.HashSet;
> > +import java.util.List;
> > +import java.util.Map;
> > +import java.util.Map.Entry;
> > +import java.util.Set;
> > +import java.util.UUID;
> > +import java.util.concurrent.Callable;
> > +import java.util.concurrent.ExecutionException;
> > +import java.util.concurrent.ExecutorService;
> > +import java.util.concurrent.Executors;
> > +import java.util.concurrent.Future;
> > +import java.util.concurrent.TimeUnit;
> > +import java.util.concurrent.atomic.AtomicBoolean;
> > +
> > +import org.apache.blur.log.Log;
> > +import org.apache.blur.log.LogFactory;
> > +import org.apache.blur.mapreduce.lib.BlurInputFormat;
> > +import org.apache.blur.thirdparty.thrift_0_9_0.TException;
> > +import org.apache.blur.thrift.BlurClient;
> > +import org.apache.blur.thrift.generated.Blur.Iface;
> > +import org.apache.blur.thrift.generated.BlurException;
> > +import org.apache.blur.thrift.generated.TableDescriptor;
> > +import org.apache.blur.thrift.generated.TableStats;
> > +import org.apache.blur.utils.BlurConstants;
> > +import org.apache.commons.io.IOUtils;
> > +import org.apache.hadoop.conf.Configuration;
> > +import org.apache.hadoop.conf.Configured;
> > +import org.apache.hadoop.fs.FSDataInputStream;
> > +import org.apache.hadoop.fs.FileStatus;
> > +import org.apache.hadoop.fs.FileSystem;
> > +import org.apache.hadoop.fs.Path;
> > +import org.apache.hadoop.fs.permission.FsAction;
> > +import org.apache.hadoop.mapreduce.Cluster;
> > +import org.apache.hadoop.mapreduce.Job;
> > +import org.apache.hadoop.mapreduce.JobID;
> > +import org.apache.hadoop.mapreduce.JobStatus;
> > +import org.apache.hadoop.util.Tool;
> > +import org.apache.hadoop.util.ToolRunner;
> > +import org.apache.hadoop.yarn.exceptions.YarnException;
> > +import org.apache.log4j.LogManager;
> > +import org.apache.log4j.xml.DOMConfigurator;
> > +
> > +public class ClusterDriver extends Configured implements Tool {
> > +
> > +  private static final String BLUR_ENV = "blur.env";
> > +  private static final Log LOG = LogFactory.getLog(
> ClusterDriver.class);
> > +  private static final String _SEP = "_";
> > +  private static final String IMPORT = "import";
> > +
> > +  public static void main(String[] args) throws Exception {
> > +    String logFilePath = System.getenv("BLUR_INDEXER_LOG_FILE");
> > +    System.out.println("Log file path [" + logFilePath + "]");
> > +    System.setProperty("BLUR_INDEXER_LOG_FILE", logFilePath);
> > +    URL url = ClusterDriver.class.getResource("/program-log4j.xml");
> > +    if (url != null) {
> > +      LOG.info("Reseting log4j config from classpath resource [{0}]",
> url);
> > +      LogManager.resetConfiguration();
> > +      DOMConfigurator.configure(url);
> > +    }
> > +    int res = ToolRunner.run(new Configuration(), new ClusterDriver(),
> args);
>
> Not sure what this thing does yet but it seems we should validate
> those args since their accessed blindly in run...
>
> --tim
>