You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@apex.apache.org by th...@apache.org on 2017/05/23 01:23:59 UTC

[01/13] apex-malhar git commit: Changed DT references to Apex

Repository: apex-malhar
Updated Branches:
  refs/heads/master c84a2c867 -> 2f70751e7


Changed DT references to Apex


Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo
Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/2f70751e
Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/2f70751e
Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/2f70751e

Branch: refs/heads/master
Commit: 2f70751e7852cc8e2be94189e1bbf8be85a19559
Parents: c48ec8c
Author: Pramod Immaneni <pr...@datatorrent.com>
Authored: Mon May 22 14:27:02 2017 -0700
Committer: Pramod Immaneni <pr...@datatorrent.com>
Committed: Mon May 22 16:47:34 2017 -0700

----------------------------------------------------------------------
 flume/README.md                                 |   4 +-
 .../apex/malhar/flume/discovery/Discovery.java  |   6 +-
 .../flume/discovery/ZKAssistedDiscovery.java    |   4 +-
 .../operator/AbstractFlumeInputOperator.java    |   2 +-
 .../apex/malhar/flume/sink/DTFlumeSink.java     | 574 -------------------
 .../apex/malhar/flume/sink/FlumeSink.java       | 574 +++++++++++++++++++
 .../flume-conf/flume-conf.sample.properties     |   2 +-
 .../resources/flume-conf/flume-env.sample.sh    |   6 +-
 .../discovery/ZKAssistedDiscoveryTest.java      |  12 +-
 .../apex/malhar/flume/sink/DTFlumeSinkTest.java | 146 -----
 .../apex/malhar/flume/sink/FlumeSinkTest.java   | 146 +++++
 .../resources/flume/conf/flume-conf.properties  |   4 +-
 .../src/test/resources/flume/conf/flume-env.sh  |   6 +-
 .../test/resources/flume/conf/flume_simple.conf |   2 +-
 .../resources/flume/conf/flume_zkdiscovery.conf |   4 +-
 15 files changed, 746 insertions(+), 746 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/README.md
----------------------------------------------------------------------
diff --git a/flume/README.md b/flume/README.md
index ec8fae9..5f9c320 100644
--- a/flume/README.md
+++ b/flume/README.md
@@ -23,7 +23,7 @@ and all the needed dependencies into `plugins.d/custom-plugin-name/libext/`
 (Alternatively to flume's automatic plugins.d detection, jars can be added to the
 FLUME_CLASSPATH using a `flume-env.sh` script. (See 'resources/flume-conf/flume-env.sample.sh')
 Therefore a maven repository must be available under $HOME/.m2 and the environment variable
-DT_FLUME_JAR must point to the plugin JAR.)
+APEX_FLUME_JAR must point to the plugin JAR.)
 
 ***Flume configuration***  
 A basic flume configuration can be found in `src/test/resources/flume/conf/flume_simple.conf`.  
@@ -31,7 +31,7 @@ A flume configuration using discovery service can be found in `src/test/resource
   Configuration files should be placed in flumes 'conf' directory and will be explicitly selected
   when running flume-ng
 
-In the configuration file set `org.apache.apex.malhar.flume.sink.DTFlumeSink` for the **type**  
+In the configuration file set `org.apache.apex.malhar.flume.sink.FlumeSink` for the **type**  
 and `org.apache.apex.malhar.flume.storage.HDFSStorage` for the **storage**,  
 as well as a **HDFS directory** for `baseDir`. The HDFS base directory needs
 to be created on HDFS.

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/main/java/org/apache/apex/malhar/flume/discovery/Discovery.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/discovery/Discovery.java b/flume/src/main/java/org/apache/apex/malhar/flume/discovery/Discovery.java
index 619a625..c32c15b 100644
--- a/flume/src/main/java/org/apache/apex/malhar/flume/discovery/Discovery.java
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/discovery/Discovery.java
@@ -21,12 +21,12 @@ package org.apache.apex.malhar.flume.discovery;
 import java.util.Collection;
 
 /**
- * When DTFlumeSink server instance binds to the network interface, it can publish
+ * When FlumeSink server instance binds to the network interface, it can publish
  * its whereabouts by invoking advertise method on the Discovery object. Similarly
  * when it ceases accepting any more connections, it can publish its intent to do
  * so by invoking unadvertise.<p />
  * Interesting parties can call discover method to get the list of addresses where
- * they can find an available DTFlumeSink server instance.
+ * they can find an available FlumeSink server instance.
  *
  * @param <T> - Type of the objects which can be discovered
  * @since 0.9.3
@@ -41,7 +41,7 @@ public interface Discovery<T>
   void unadvertise(Service<T> service);
 
   /**
-   * Advertise the host/port address where DTFlumeSink is accepting a client connection.
+   * Advertise the host/port address where FlumeSink is accepting a client connection.
    *
    * @param service
    */

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/main/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscovery.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscovery.java b/flume/src/main/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscovery.java
index 9a7dd3c..1988d62 100644
--- a/flume/src/main/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscovery.java
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscovery.java
@@ -70,7 +70,7 @@ public class ZKAssistedDiscovery implements Discovery<byte[]>,
 
   public ZKAssistedDiscovery()
   {
-    this.serviceName = "DTFlume";
+    this.serviceName = "ApexFlume";
     this.conntectionRetrySleepMillis = 500;
     this.connectionRetryCount = 10;
     this.connectionTimeoutMillis = 1000;
@@ -333,7 +333,7 @@ public class ZKAssistedDiscovery implements Discovery<byte[]>,
   @Override
   public void configure(org.apache.flume.Context context)
   {
-    serviceName = context.getString("serviceName", "DTFlume");
+    serviceName = context.getString("serviceName", "ApexFlume");
     connectionString = context.getString("connectionString");
     basePath = context.getString("basePath");
 

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java b/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java
index f9beb71..93b01af 100644
--- a/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java
@@ -119,7 +119,7 @@ public abstract class AbstractFlumeInputOperator<T>
   public void activate(OperatorContext ctx)
   {
     if (connectionSpecs.length == 0) {
-      logger.info("Discovered zero DTFlumeSink");
+      logger.info("Discovered zero FlumeSink");
     } else if (connectionSpecs.length == 1) {
       for (String connectAddresse: connectionSpecs) {
         logger.debug("Connection spec is {}", connectAddresse);

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java b/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java
deleted file mode 100644
index 4f28850..0000000
--- a/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java
+++ /dev/null
@@ -1,574 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.apex.malhar.flume.sink;
-
-import java.io.IOError;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.ServiceConfigurationError;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.apex.malhar.flume.discovery.Discovery;
-import org.apache.apex.malhar.flume.sink.Server.Client;
-import org.apache.apex.malhar.flume.sink.Server.Request;
-import org.apache.apex.malhar.flume.storage.EventCodec;
-import org.apache.apex.malhar.flume.storage.Storage;
-
-import org.apache.flume.Context;
-import org.apache.flume.Event;
-import org.apache.flume.EventDeliveryException;
-import org.apache.flume.Transaction;
-import org.apache.flume.conf.Configurable;
-import org.apache.flume.sink.AbstractSink;
-
-import com.datatorrent.api.Component;
-import com.datatorrent.api.StreamCodec;
-
-import com.datatorrent.netlet.DefaultEventLoop;
-import com.datatorrent.netlet.NetletThrowable;
-import com.datatorrent.netlet.NetletThrowable.NetletRuntimeException;
-import com.datatorrent.netlet.util.Slice;
-
-/**
- * DTFlumeSink is a flume sink developed to ingest the data into DataTorrent DAG
- * from flume. It's essentially a flume sink which acts as a server capable of
- * talking to one client at a time. The client for this server is AbstractFlumeInputOperator.
- * <p />
- * &lt;experimental&gt;DTFlumeSink auto adjusts the rate at which it consumes the data from channel to
- * match the throughput of the DAG.&lt;/experimental&gt;
- * <p />
- * The properties you can set on the DTFlumeSink are: <br />
- * id - string unique value identifying this sink <br />
- * hostname - string value indicating the fqdn or ip address of the interface on which the server should listen <br />
- * port - integer value indicating the numeric port to which the server should bind <br />
- * sleepMillis - integer value indicating the number of milliseconds the process should sleep when there are no events
- * before checking for next event again <br />
- * throughputAdjustmentPercent - integer value indicating by what percentage the flume transaction size should be
- * adjusted upward or downward at a time <br />
- * minimumEventsPerTransaction - integer value indicating the minimum number of events per transaction <br />
- * maximumEventsPerTransaction - integer value indicating the maximum number of events per transaction. This value can
- * not be more than channel's transaction capacity.<br />
- *
- * @since 0.9.2
- */
-public class DTFlumeSink extends AbstractSink implements Configurable
-{
-  private static final String HOSTNAME_STRING = "hostname";
-  private static final String HOSTNAME_DEFAULT = "locahost";
-  private static final long ACCEPTED_TOLERANCE = 20000;
-  private DefaultEventLoop eventloop;
-  private Server server;
-  private int outstandingEventsCount;
-  private int lastConsumedEventsCount;
-  private int idleCount;
-  private byte[] playback;
-  private Client client;
-  private String hostname;
-  private int port;
-  private String id;
-  private long acceptedTolerance;
-  private long sleepMillis;
-  private double throughputAdjustmentFactor;
-  private int minimumEventsPerTransaction;
-  private int maximumEventsPerTransaction;
-  private long commitEventTimeoutMillis;
-  private transient long lastCommitEventTimeMillis;
-  private Storage storage;
-  Discovery<byte[]> discovery;
-  StreamCodec<Event> codec;
-  /* Begin implementing Flume Sink interface */
-
-  @Override
-  @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch", "UseSpecificCatch", "SleepWhileInLoop"})
-  public Status process() throws EventDeliveryException
-  {
-    Slice slice;
-    synchronized (server.requests) {
-      for (Request r : server.requests) {
-        logger.debug("found {}", r);
-        switch (r.type) {
-          case SEEK:
-            lastCommitEventTimeMillis = System.currentTimeMillis();
-            slice = r.getAddress();
-            playback = storage.retrieve(Arrays.copyOfRange(slice.buffer, slice.offset, slice.offset + slice.length));
-            client = r.client;
-            break;
-
-          case COMMITTED:
-            lastCommitEventTimeMillis = System.currentTimeMillis();
-            slice = r.getAddress();
-            storage.clean(Arrays.copyOfRange(slice.buffer, slice.offset, slice.offset + slice.length));
-            break;
-
-          case CONNECTED:
-            logger.debug("Connected received, ignoring it!");
-            break;
-
-          case DISCONNECTED:
-            if (r.client == client) {
-              client = null;
-              outstandingEventsCount = 0;
-            }
-            break;
-
-          case WINDOWED:
-            lastConsumedEventsCount = r.getEventCount();
-            idleCount = r.getIdleCount();
-            outstandingEventsCount -= lastConsumedEventsCount;
-            break;
-
-          case SERVER_ERROR:
-            throw new IOError(null);
-
-          default:
-            logger.debug("Cannot understand the request {}", r);
-            break;
-        }
-      }
-
-      server.requests.clear();
-    }
-
-    if (client == null) {
-      logger.info("No client expressed interest yet to consume the events.");
-      return Status.BACKOFF;
-    } else if (System.currentTimeMillis() - lastCommitEventTimeMillis > commitEventTimeoutMillis) {
-      logger.info("Client has not processed the workload given for the last {} milliseconds, so backing off.",
-          System.currentTimeMillis() - lastCommitEventTimeMillis);
-      return Status.BACKOFF;
-    }
-
-    int maxTuples;
-    // the following logic needs to be fixed... this is a quick put together.
-    if (outstandingEventsCount < 0) {
-      if (idleCount > 1) {
-        maxTuples = (int)((1 + throughputAdjustmentFactor * idleCount) * lastConsumedEventsCount);
-      } else {
-        maxTuples = (int)((1 + throughputAdjustmentFactor) * lastConsumedEventsCount);
-      }
-    } else if (outstandingEventsCount > lastConsumedEventsCount) {
-      maxTuples = (int)((1 - throughputAdjustmentFactor) * lastConsumedEventsCount);
-    } else {
-      if (idleCount > 0) {
-        maxTuples = (int)((1 + throughputAdjustmentFactor * idleCount) * lastConsumedEventsCount);
-        if (maxTuples <= 0) {
-          maxTuples = minimumEventsPerTransaction;
-        }
-      } else {
-        maxTuples = lastConsumedEventsCount;
-      }
-    }
-
-    if (maxTuples >= maximumEventsPerTransaction) {
-      maxTuples = maximumEventsPerTransaction;
-    } else if (maxTuples <= 0) {
-      maxTuples = minimumEventsPerTransaction;
-    }
-
-    if (maxTuples > 0) {
-      if (playback != null) {
-        try {
-          int i = 0;
-          do {
-            if (!client.write(playback)) {
-              retryWrite(playback, null);
-            }
-            outstandingEventsCount++;
-            playback = storage.retrieveNext();
-          }
-          while (++i < maxTuples && playback != null);
-        } catch (Exception ex) {
-          logger.warn("Playback Failed", ex);
-          if (ex instanceof NetletThrowable) {
-            try {
-              eventloop.disconnect(client);
-            } finally {
-              client = null;
-              outstandingEventsCount = 0;
-            }
-          }
-          return Status.BACKOFF;
-        }
-      } else {
-        int storedTuples = 0;
-
-        Transaction t = getChannel().getTransaction();
-        try {
-          t.begin();
-
-          Event e;
-          while (storedTuples < maxTuples && (e = getChannel().take()) != null) {
-            Slice event = codec.toByteArray(e);
-            byte[] address = storage.store(event);
-            if (address != null) {
-              if (!client.write(address, event)) {
-                retryWrite(address, event);
-              }
-              outstandingEventsCount++;
-            } else {
-              logger.debug("Detected the condition of recovery from flume crash!");
-            }
-            storedTuples++;
-          }
-
-          if (storedTuples > 0) {
-            storage.flush();
-          }
-
-          t.commit();
-
-          if (storedTuples > 0) { /* log less frequently */
-            logger.debug("Transaction details maxTuples = {}, storedTuples = {}, outstanding = {}",
-                maxTuples, storedTuples, outstandingEventsCount);
-          }
-        } catch (Error er) {
-          t.rollback();
-          throw er;
-        } catch (Exception ex) {
-          logger.error("Transaction Failed", ex);
-          if (ex instanceof NetletRuntimeException && client != null) {
-            try {
-              eventloop.disconnect(client);
-            } finally {
-              client = null;
-              outstandingEventsCount = 0;
-            }
-          }
-          t.rollback();
-          return Status.BACKOFF;
-        } finally {
-          t.close();
-        }
-
-        if (storedTuples == 0) {
-          sleep();
-        }
-      }
-    }
-
-    return Status.READY;
-  }
-
-  private void sleep()
-  {
-    try {
-      Thread.sleep(sleepMillis);
-    } catch (InterruptedException ex) {
-      Thread.currentThread().interrupt();
-    }
-  }
-
-  @Override
-  public void start()
-  {
-    try {
-      if (storage instanceof Component) {
-        @SuppressWarnings("unchecked")
-        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)storage;
-        component.setup(null);
-      }
-      if (discovery instanceof Component) {
-        @SuppressWarnings("unchecked")
-        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)discovery;
-        component.setup(null);
-      }
-      if (codec instanceof Component) {
-        @SuppressWarnings("unchecked")
-        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)codec;
-        component.setup(null);
-      }
-      eventloop = new DefaultEventLoop("EventLoop-" + id);
-      server = new Server(id, discovery,acceptedTolerance);
-    } catch (Error error) {
-      throw error;
-    } catch (RuntimeException re) {
-      throw re;
-    } catch (IOException ex) {
-      throw new RuntimeException(ex);
-    }
-
-    eventloop.start();
-    eventloop.start(hostname, port, server);
-    super.start();
-  }
-
-  @Override
-  public void stop()
-  {
-    try {
-      super.stop();
-    } finally {
-      try {
-        if (client != null) {
-          eventloop.disconnect(client);
-          client = null;
-        }
-
-        eventloop.stop(server);
-        eventloop.stop();
-
-        if (codec instanceof Component) {
-          @SuppressWarnings("unchecked")
-          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)codec;
-          component.teardown();
-        }
-        if (discovery instanceof Component) {
-          @SuppressWarnings("unchecked")
-          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)discovery;
-          component.teardown();
-        }
-        if (storage instanceof Component) {
-          @SuppressWarnings("unchecked")
-          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)storage;
-          component.teardown();
-        }
-      } catch (Throwable cause) {
-        throw new ServiceConfigurationError("Failed Stop", cause);
-      }
-    }
-  }
-
-  /* End implementing Flume Sink interface */
-
-  /* Begin Configurable Interface */
-  @Override
-  public void configure(Context context)
-  {
-    hostname = context.getString(HOSTNAME_STRING, HOSTNAME_DEFAULT);
-    port = context.getInteger("port", 0);
-    id = context.getString("id");
-    if (id == null) {
-      id = getName();
-    }
-    acceptedTolerance = context.getLong("acceptedTolerance", ACCEPTED_TOLERANCE);
-    sleepMillis = context.getLong("sleepMillis", 5L);
-    throughputAdjustmentFactor = context.getInteger("throughputAdjustmentPercent", 5) / 100.0;
-    maximumEventsPerTransaction = context.getInteger("maximumEventsPerTransaction", 10000);
-    minimumEventsPerTransaction = context.getInteger("minimumEventsPerTransaction", 100);
-    commitEventTimeoutMillis = context.getLong("commitEventTimeoutMillis", Long.MAX_VALUE);
-
-    @SuppressWarnings("unchecked")
-    Discovery<byte[]> ldiscovery = configure("discovery", Discovery.class, context);
-    if (ldiscovery == null) {
-      logger.warn("Discovery agent not configured for the sink!");
-      discovery = new Discovery<byte[]>()
-      {
-        @Override
-        public void unadvertise(Service<byte[]> service)
-        {
-          logger.debug("Sink {} stopped listening on {}:{}", service.getId(), service.getHost(), service.getPort());
-        }
-
-        @Override
-        public void advertise(Service<byte[]> service)
-        {
-          logger.debug("Sink {} started listening on {}:{}", service.getId(), service.getHost(), service.getPort());
-        }
-
-        @Override
-        @SuppressWarnings("unchecked")
-        public Collection<Service<byte[]>> discover()
-        {
-          return Collections.EMPTY_SET;
-        }
-
-      };
-    } else {
-      discovery = ldiscovery;
-    }
-
-    storage = configure("storage", Storage.class, context);
-    if (storage == null) {
-      logger.warn("storage key missing... DTFlumeSink may lose data!");
-      storage = new Storage()
-      {
-        @Override
-        public byte[] store(Slice slice)
-        {
-          return null;
-        }
-
-        @Override
-        public byte[] retrieve(byte[] identifier)
-        {
-          return null;
-        }
-
-        @Override
-        public byte[] retrieveNext()
-        {
-          return null;
-        }
-
-        @Override
-        public void clean(byte[] identifier)
-        {
-        }
-
-        @Override
-        public void flush()
-        {
-        }
-
-      };
-    }
-
-    @SuppressWarnings("unchecked")
-    StreamCodec<Event> lCodec = configure("codec", StreamCodec.class, context);
-    if (lCodec == null) {
-      codec = new EventCodec();
-    } else {
-      codec = lCodec;
-    }
-
-  }
-
-  /* End Configurable Interface */
-
-  @SuppressWarnings({"UseSpecificCatch", "BroadCatchBlock", "TooBroadCatch"})
-  private static <T> T configure(String key, Class<T> clazz, Context context)
-  {
-    String classname = context.getString(key);
-    if (classname == null) {
-      return null;
-    }
-
-    try {
-      Class<?> loadClass = Thread.currentThread().getContextClassLoader().loadClass(classname);
-      if (clazz.isAssignableFrom(loadClass)) {
-        @SuppressWarnings("unchecked")
-        T object = (T)loadClass.newInstance();
-        if (object instanceof Configurable) {
-          Context context1 = new Context(context.getSubProperties(key + '.'));
-          String id = context1.getString(Storage.ID);
-          if (id == null) {
-            id = context.getString(Storage.ID);
-            logger.debug("{} inherited id={} from sink", key, id);
-            context1.put(Storage.ID, id);
-          }
-          ((Configurable)object).configure(context1);
-        }
-
-        return object;
-      } else {
-        logger.error("key class {} does not implement {} interface", classname, Storage.class.getCanonicalName());
-        throw new Error("Invalid storage " + classname);
-      }
-    } catch (Error error) {
-      throw error;
-    } catch (RuntimeException re) {
-      throw re;
-    } catch (Throwable t) {
-      throw new RuntimeException(t);
-    }
-  }
-
-  /**
-   * @return the hostname
-   */
-  String getHostname()
-  {
-    return hostname;
-  }
-
-  /**
-   * @param hostname the hostname to set
-   */
-  void setHostname(String hostname)
-  {
-    this.hostname = hostname;
-  }
-
-  /**
-   * @return the port
-   */
-  int getPort()
-  {
-    return port;
-  }
-
-  public long getAcceptedTolerance()
-  {
-    return acceptedTolerance;
-  }
-
-  public void setAcceptedTolerance(long acceptedTolerance)
-  {
-    this.acceptedTolerance = acceptedTolerance;
-  }
-
-  /**
-   * @param port the port to set
-   */
-  void setPort(int port)
-  {
-    this.port = port;
-  }
-
-  /**
-   * @return the discovery
-   */
-  Discovery<byte[]> getDiscovery()
-  {
-    return discovery;
-  }
-
-  /**
-   * @param discovery the discovery to set
-   */
-  void setDiscovery(Discovery<byte[]> discovery)
-  {
-    this.discovery = discovery;
-  }
-
-  /**
-   * Attempt the sequence of writing after sleeping twice and upon failure assume
-   * that the client connection has problems and hence close it.
-   *
-   * @param address
-   * @param e
-   * @throws IOException
-   */
-  private void retryWrite(byte[] address, Slice event) throws IOException
-  {
-    if (event == null) {  /* this happens for playback where address and event are sent as single object */
-      while (client.isConnected()) {
-        sleep();
-        if (client.write(address)) {
-          return;
-        }
-      }
-    } else {  /* this happens when the events are taken from the flume channel and writing first time failed */
-      while (client.isConnected()) {
-        sleep();
-        if (client.write(address, event)) {
-          return;
-        }
-      }
-    }
-
-    throw new IOException("Client disconnected!");
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(DTFlumeSink.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/main/java/org/apache/apex/malhar/flume/sink/FlumeSink.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/sink/FlumeSink.java b/flume/src/main/java/org/apache/apex/malhar/flume/sink/FlumeSink.java
new file mode 100644
index 0000000..99cc1d5
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/sink/FlumeSink.java
@@ -0,0 +1,574 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.sink;
+
+import java.io.IOError;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.ServiceConfigurationError;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.apex.malhar.flume.discovery.Discovery;
+import org.apache.apex.malhar.flume.sink.Server.Client;
+import org.apache.apex.malhar.flume.sink.Server.Request;
+import org.apache.apex.malhar.flume.storage.EventCodec;
+import org.apache.apex.malhar.flume.storage.Storage;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.sink.AbstractSink;
+
+import com.datatorrent.api.Component;
+import com.datatorrent.api.StreamCodec;
+
+import com.datatorrent.netlet.DefaultEventLoop;
+import com.datatorrent.netlet.NetletThrowable;
+import com.datatorrent.netlet.NetletThrowable.NetletRuntimeException;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * FlumeSink is a flume sink developed to ingest the data into DataTorrent DAG
+ * from flume. It's essentially a flume sink which acts as a server capable of
+ * talking to one client at a time. The client for this server is AbstractFlumeInputOperator.
+ * <p />
+ * &lt;experimental&gt;FlumeSink auto adjusts the rate at which it consumes the data from channel to
+ * match the throughput of the DAG.&lt;/experimental&gt;
+ * <p />
+ * The properties you can set on the FlumeSink are: <br />
+ * id - string unique value identifying this sink <br />
+ * hostname - string value indicating the fqdn or ip address of the interface on which the server should listen <br />
+ * port - integer value indicating the numeric port to which the server should bind <br />
+ * sleepMillis - integer value indicating the number of milliseconds the process should sleep when there are no events
+ * before checking for next event again <br />
+ * throughputAdjustmentPercent - integer value indicating by what percentage the flume transaction size should be
+ * adjusted upward or downward at a time <br />
+ * minimumEventsPerTransaction - integer value indicating the minimum number of events per transaction <br />
+ * maximumEventsPerTransaction - integer value indicating the maximum number of events per transaction. This value can
+ * not be more than channel's transaction capacity.<br />
+ *
+ * @since 0.9.2
+ */
+public class FlumeSink extends AbstractSink implements Configurable
+{
+  private static final String HOSTNAME_STRING = "hostname";
+  private static final String HOSTNAME_DEFAULT = "locahost";
+  private static final long ACCEPTED_TOLERANCE = 20000;
+  private DefaultEventLoop eventloop;
+  private Server server;
+  private int outstandingEventsCount;
+  private int lastConsumedEventsCount;
+  private int idleCount;
+  private byte[] playback;
+  private Client client;
+  private String hostname;
+  private int port;
+  private String id;
+  private long acceptedTolerance;
+  private long sleepMillis;
+  private double throughputAdjustmentFactor;
+  private int minimumEventsPerTransaction;
+  private int maximumEventsPerTransaction;
+  private long commitEventTimeoutMillis;
+  private transient long lastCommitEventTimeMillis;
+  private Storage storage;
+  Discovery<byte[]> discovery;
+  StreamCodec<Event> codec;
+  /* Begin implementing Flume Sink interface */
+
+  @Override
+  @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch", "UseSpecificCatch", "SleepWhileInLoop"})
+  public Status process() throws EventDeliveryException
+  {
+    Slice slice;
+    synchronized (server.requests) {
+      for (Request r : server.requests) {
+        logger.debug("found {}", r);
+        switch (r.type) {
+          case SEEK:
+            lastCommitEventTimeMillis = System.currentTimeMillis();
+            slice = r.getAddress();
+            playback = storage.retrieve(Arrays.copyOfRange(slice.buffer, slice.offset, slice.offset + slice.length));
+            client = r.client;
+            break;
+
+          case COMMITTED:
+            lastCommitEventTimeMillis = System.currentTimeMillis();
+            slice = r.getAddress();
+            storage.clean(Arrays.copyOfRange(slice.buffer, slice.offset, slice.offset + slice.length));
+            break;
+
+          case CONNECTED:
+            logger.debug("Connected received, ignoring it!");
+            break;
+
+          case DISCONNECTED:
+            if (r.client == client) {
+              client = null;
+              outstandingEventsCount = 0;
+            }
+            break;
+
+          case WINDOWED:
+            lastConsumedEventsCount = r.getEventCount();
+            idleCount = r.getIdleCount();
+            outstandingEventsCount -= lastConsumedEventsCount;
+            break;
+
+          case SERVER_ERROR:
+            throw new IOError(null);
+
+          default:
+            logger.debug("Cannot understand the request {}", r);
+            break;
+        }
+      }
+
+      server.requests.clear();
+    }
+
+    if (client == null) {
+      logger.info("No client expressed interest yet to consume the events.");
+      return Status.BACKOFF;
+    } else if (System.currentTimeMillis() - lastCommitEventTimeMillis > commitEventTimeoutMillis) {
+      logger.info("Client has not processed the workload given for the last {} milliseconds, so backing off.",
+          System.currentTimeMillis() - lastCommitEventTimeMillis);
+      return Status.BACKOFF;
+    }
+
+    int maxTuples;
+    // the following logic needs to be fixed... this is a quick put together.
+    if (outstandingEventsCount < 0) {
+      if (idleCount > 1) {
+        maxTuples = (int)((1 + throughputAdjustmentFactor * idleCount) * lastConsumedEventsCount);
+      } else {
+        maxTuples = (int)((1 + throughputAdjustmentFactor) * lastConsumedEventsCount);
+      }
+    } else if (outstandingEventsCount > lastConsumedEventsCount) {
+      maxTuples = (int)((1 - throughputAdjustmentFactor) * lastConsumedEventsCount);
+    } else {
+      if (idleCount > 0) {
+        maxTuples = (int)((1 + throughputAdjustmentFactor * idleCount) * lastConsumedEventsCount);
+        if (maxTuples <= 0) {
+          maxTuples = minimumEventsPerTransaction;
+        }
+      } else {
+        maxTuples = lastConsumedEventsCount;
+      }
+    }
+
+    if (maxTuples >= maximumEventsPerTransaction) {
+      maxTuples = maximumEventsPerTransaction;
+    } else if (maxTuples <= 0) {
+      maxTuples = minimumEventsPerTransaction;
+    }
+
+    if (maxTuples > 0) {
+      if (playback != null) {
+        try {
+          int i = 0;
+          do {
+            if (!client.write(playback)) {
+              retryWrite(playback, null);
+            }
+            outstandingEventsCount++;
+            playback = storage.retrieveNext();
+          }
+          while (++i < maxTuples && playback != null);
+        } catch (Exception ex) {
+          logger.warn("Playback Failed", ex);
+          if (ex instanceof NetletThrowable) {
+            try {
+              eventloop.disconnect(client);
+            } finally {
+              client = null;
+              outstandingEventsCount = 0;
+            }
+          }
+          return Status.BACKOFF;
+        }
+      } else {
+        int storedTuples = 0;
+
+        Transaction t = getChannel().getTransaction();
+        try {
+          t.begin();
+
+          Event e;
+          while (storedTuples < maxTuples && (e = getChannel().take()) != null) {
+            Slice event = codec.toByteArray(e);
+            byte[] address = storage.store(event);
+            if (address != null) {
+              if (!client.write(address, event)) {
+                retryWrite(address, event);
+              }
+              outstandingEventsCount++;
+            } else {
+              logger.debug("Detected the condition of recovery from flume crash!");
+            }
+            storedTuples++;
+          }
+
+          if (storedTuples > 0) {
+            storage.flush();
+          }
+
+          t.commit();
+
+          if (storedTuples > 0) { /* log less frequently */
+            logger.debug("Transaction details maxTuples = {}, storedTuples = {}, outstanding = {}",
+                maxTuples, storedTuples, outstandingEventsCount);
+          }
+        } catch (Error er) {
+          t.rollback();
+          throw er;
+        } catch (Exception ex) {
+          logger.error("Transaction Failed", ex);
+          if (ex instanceof NetletRuntimeException && client != null) {
+            try {
+              eventloop.disconnect(client);
+            } finally {
+              client = null;
+              outstandingEventsCount = 0;
+            }
+          }
+          t.rollback();
+          return Status.BACKOFF;
+        } finally {
+          t.close();
+        }
+
+        if (storedTuples == 0) {
+          sleep();
+        }
+      }
+    }
+
+    return Status.READY;
+  }
+
+  private void sleep()
+  {
+    try {
+      Thread.sleep(sleepMillis);
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+    }
+  }
+
+  @Override
+  public void start()
+  {
+    try {
+      if (storage instanceof Component) {
+        @SuppressWarnings("unchecked")
+        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)storage;
+        component.setup(null);
+      }
+      if (discovery instanceof Component) {
+        @SuppressWarnings("unchecked")
+        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)discovery;
+        component.setup(null);
+      }
+      if (codec instanceof Component) {
+        @SuppressWarnings("unchecked")
+        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)codec;
+        component.setup(null);
+      }
+      eventloop = new DefaultEventLoop("EventLoop-" + id);
+      server = new Server(id, discovery,acceptedTolerance);
+    } catch (Error error) {
+      throw error;
+    } catch (RuntimeException re) {
+      throw re;
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    }
+
+    eventloop.start();
+    eventloop.start(hostname, port, server);
+    super.start();
+  }
+
+  @Override
+  public void stop()
+  {
+    try {
+      super.stop();
+    } finally {
+      try {
+        if (client != null) {
+          eventloop.disconnect(client);
+          client = null;
+        }
+
+        eventloop.stop(server);
+        eventloop.stop();
+
+        if (codec instanceof Component) {
+          @SuppressWarnings("unchecked")
+          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)codec;
+          component.teardown();
+        }
+        if (discovery instanceof Component) {
+          @SuppressWarnings("unchecked")
+          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)discovery;
+          component.teardown();
+        }
+        if (storage instanceof Component) {
+          @SuppressWarnings("unchecked")
+          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)storage;
+          component.teardown();
+        }
+      } catch (Throwable cause) {
+        throw new ServiceConfigurationError("Failed Stop", cause);
+      }
+    }
+  }
+
+  /* End implementing Flume Sink interface */
+
+  /* Begin Configurable Interface */
+  @Override
+  public void configure(Context context)
+  {
+    hostname = context.getString(HOSTNAME_STRING, HOSTNAME_DEFAULT);
+    port = context.getInteger("port", 0);
+    id = context.getString("id");
+    if (id == null) {
+      id = getName();
+    }
+    acceptedTolerance = context.getLong("acceptedTolerance", ACCEPTED_TOLERANCE);
+    sleepMillis = context.getLong("sleepMillis", 5L);
+    throughputAdjustmentFactor = context.getInteger("throughputAdjustmentPercent", 5) / 100.0;
+    maximumEventsPerTransaction = context.getInteger("maximumEventsPerTransaction", 10000);
+    minimumEventsPerTransaction = context.getInteger("minimumEventsPerTransaction", 100);
+    commitEventTimeoutMillis = context.getLong("commitEventTimeoutMillis", Long.MAX_VALUE);
+
+    @SuppressWarnings("unchecked")
+    Discovery<byte[]> ldiscovery = configure("discovery", Discovery.class, context);
+    if (ldiscovery == null) {
+      logger.warn("Discovery agent not configured for the sink!");
+      discovery = new Discovery<byte[]>()
+      {
+        @Override
+        public void unadvertise(Service<byte[]> service)
+        {
+          logger.debug("Sink {} stopped listening on {}:{}", service.getId(), service.getHost(), service.getPort());
+        }
+
+        @Override
+        public void advertise(Service<byte[]> service)
+        {
+          logger.debug("Sink {} started listening on {}:{}", service.getId(), service.getHost(), service.getPort());
+        }
+
+        @Override
+        @SuppressWarnings("unchecked")
+        public Collection<Service<byte[]>> discover()
+        {
+          return Collections.EMPTY_SET;
+        }
+
+      };
+    } else {
+      discovery = ldiscovery;
+    }
+
+    storage = configure("storage", Storage.class, context);
+    if (storage == null) {
+      logger.warn("storage key missing... FlumeSink may lose data!");
+      storage = new Storage()
+      {
+        @Override
+        public byte[] store(Slice slice)
+        {
+          return null;
+        }
+
+        @Override
+        public byte[] retrieve(byte[] identifier)
+        {
+          return null;
+        }
+
+        @Override
+        public byte[] retrieveNext()
+        {
+          return null;
+        }
+
+        @Override
+        public void clean(byte[] identifier)
+        {
+        }
+
+        @Override
+        public void flush()
+        {
+        }
+
+      };
+    }
+
+    @SuppressWarnings("unchecked")
+    StreamCodec<Event> lCodec = configure("codec", StreamCodec.class, context);
+    if (lCodec == null) {
+      codec = new EventCodec();
+    } else {
+      codec = lCodec;
+    }
+
+  }
+
+  /* End Configurable Interface */
+
+  @SuppressWarnings({"UseSpecificCatch", "BroadCatchBlock", "TooBroadCatch"})
+  private static <T> T configure(String key, Class<T> clazz, Context context)
+  {
+    String classname = context.getString(key);
+    if (classname == null) {
+      return null;
+    }
+
+    try {
+      Class<?> loadClass = Thread.currentThread().getContextClassLoader().loadClass(classname);
+      if (clazz.isAssignableFrom(loadClass)) {
+        @SuppressWarnings("unchecked")
+        T object = (T)loadClass.newInstance();
+        if (object instanceof Configurable) {
+          Context context1 = new Context(context.getSubProperties(key + '.'));
+          String id = context1.getString(Storage.ID);
+          if (id == null) {
+            id = context.getString(Storage.ID);
+            logger.debug("{} inherited id={} from sink", key, id);
+            context1.put(Storage.ID, id);
+          }
+          ((Configurable)object).configure(context1);
+        }
+
+        return object;
+      } else {
+        logger.error("key class {} does not implement {} interface", classname, Storage.class.getCanonicalName());
+        throw new Error("Invalid storage " + classname);
+      }
+    } catch (Error error) {
+      throw error;
+    } catch (RuntimeException re) {
+      throw re;
+    } catch (Throwable t) {
+      throw new RuntimeException(t);
+    }
+  }
+
+  /**
+   * @return the hostname
+   */
+  String getHostname()
+  {
+    return hostname;
+  }
+
+  /**
+   * @param hostname the hostname to set
+   */
+  void setHostname(String hostname)
+  {
+    this.hostname = hostname;
+  }
+
+  /**
+   * @return the port
+   */
+  int getPort()
+  {
+    return port;
+  }
+
+  public long getAcceptedTolerance()
+  {
+    return acceptedTolerance;
+  }
+
+  public void setAcceptedTolerance(long acceptedTolerance)
+  {
+    this.acceptedTolerance = acceptedTolerance;
+  }
+
+  /**
+   * @param port the port to set
+   */
+  void setPort(int port)
+  {
+    this.port = port;
+  }
+
+  /**
+   * @return the discovery
+   */
+  Discovery<byte[]> getDiscovery()
+  {
+    return discovery;
+  }
+
+  /**
+   * @param discovery the discovery to set
+   */
+  void setDiscovery(Discovery<byte[]> discovery)
+  {
+    this.discovery = discovery;
+  }
+
+  /**
+   * Attempt the sequence of writing after sleeping twice and upon failure assume
+   * that the client connection has problems and hence close it.
+   *
+   * @param address
+   * @param e
+   * @throws IOException
+   */
+  private void retryWrite(byte[] address, Slice event) throws IOException
+  {
+    if (event == null) {  /* this happens for playback where address and event are sent as single object */
+      while (client.isConnected()) {
+        sleep();
+        if (client.write(address)) {
+          return;
+        }
+      }
+    } else {  /* this happens when the events are taken from the flume channel and writing first time failed */
+      while (client.isConnected()) {
+        sleep();
+        if (client.write(address, event)) {
+          return;
+        }
+      }
+    }
+
+    throw new IOException("Client disconnected!");
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(FlumeSink.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/main/resources/flume-conf/flume-conf.sample.properties
----------------------------------------------------------------------
diff --git a/flume/src/main/resources/flume-conf/flume-conf.sample.properties b/flume/src/main/resources/flume-conf/flume-conf.sample.properties
index af59e52..9504441 100644
--- a/flume/src/main/resources/flume-conf/flume-conf.sample.properties
+++ b/flume/src/main/resources/flume-conf/flume-conf.sample.properties
@@ -23,7 +23,7 @@
  agent1.sinks = dt
 
 # first sink - dt
- agent1.sinks.dt.type = org.apache.apex.malhar.flume.sink.DTFlumeSink
+ agent1.sinks.dt.type = org.apache.apex.malhar.flume.sink.FlumeSink
  agent1.sinks.dt.id = sink1
  agent1.sinks.dt.hostname = localhost
  agent1.sinks.dt.port = 8080

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/main/resources/flume-conf/flume-env.sample.sh
----------------------------------------------------------------------
diff --git a/flume/src/main/resources/flume-conf/flume-env.sample.sh b/flume/src/main/resources/flume-conf/flume-env.sample.sh
index 570411b..41d093f 100644
--- a/flume/src/main/resources/flume-conf/flume-env.sample.sh
+++ b/flume/src/main/resources/flume-conf/flume-env.sample.sh
@@ -22,9 +22,9 @@
 # This script runs on the machine which have maven repository populated under
 # $HOME/.m2 If that's not the case, please adjust the JARPATH variable below
 # to point to colon separated list of directories where jar files can be found
-if test -z "$DT_FLUME_JAR"
+if test -z "$APEX_FLUME_JAR"
 then
-  echo [ERROR]: Environment variable DT_FLUME_JAR should point to a valid jar file which contains DTFlumeSink class >&2
+  echo [ERROR]: Environment variable APEX_FLUME_JAR should point to a valid jar file which contains FlumeSink class >&2
   exit 2
 fi
 
@@ -35,4 +35,4 @@ then
 else
   JAVA=${JAVA_HOME}/bin/java
 fi
-FLUME_CLASSPATH=`JARPATH=$JARPATH $JAVA -cp $DT_FLUME_JAR com.datatorrent.jarpath.JarPath -N $DT_FLUME_JAR -Xdt-jarpath -Xdt-netlet`
+FLUME_CLASSPATH=`JARPATH=$JARPATH $JAVA -cp $APEX_FLUME_JAR com.datatorrent.jarpath.JarPath -N $APEX_FLUME_JAR -Xdt-jarpath -Xdt-netlet`

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java
index 9db5d32..71381d7 100644
--- a/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java
@@ -46,9 +46,9 @@ public class ZKAssistedDiscoveryTest
   public void testSerialization() throws Exception
   {
     ZKAssistedDiscovery discovery = new ZKAssistedDiscovery();
-    discovery.setServiceName("DTFlumeTest");
+    discovery.setServiceName("ApexFlumeTest");
     discovery.setConnectionString("localhost:2181");
-    discovery.setBasePath("/HelloDT");
+    discovery.setBasePath("/HelloApex");
     discovery.setup(null);
     ServiceInstance<byte[]> instance = discovery.getInstance(new Service<byte[]>()
     {
@@ -91,9 +91,9 @@ public class ZKAssistedDiscoveryTest
   public void testDiscover()
   {
     ZKAssistedDiscovery discovery = new ZKAssistedDiscovery();
-    discovery.setServiceName("DTFlumeTest");
+    discovery.setServiceName("ApexFlumeTest");
     discovery.setConnectionString("localhost:2181");
-    discovery.setBasePath("/HelloDT");
+    discovery.setBasePath("/HelloApex");
     discovery.setup(null);
     assertNotNull("Discovered Sinks", discovery.discover());
     discovery.teardown();
@@ -103,9 +103,9 @@ public class ZKAssistedDiscoveryTest
   public void testAdvertize()
   {
     ZKAssistedDiscovery discovery = new ZKAssistedDiscovery();
-    discovery.setServiceName("DTFlumeTest");
+    discovery.setServiceName("ApexFlumeTest");
     discovery.setConnectionString("localhost:2181");
-    discovery.setBasePath("/HelloDT");
+    discovery.setBasePath("/HelloApex");
     discovery.setup(null);
 
     Service<byte[]> service = new Service<byte[]>()

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java
deleted file mode 100644
index f97d9c0..0000000
--- a/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.apex.malhar.flume.sink;
-
-import java.io.IOException;
-import java.net.InetSocketAddress;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.apex.malhar.flume.discovery.Discovery;
-
-import org.apache.flume.channel.MemoryChannel;
-
-import com.datatorrent.netlet.AbstractLengthPrependerClient;
-import com.datatorrent.netlet.DefaultEventLoop;
-import com.datatorrent.netlet.util.Slice;
-
-/**
- *
- */
-public class DTFlumeSinkTest
-{
-  static final String hostname = "localhost";
-  int port = 0;
-
-  @Test
-  @SuppressWarnings("SleepWhileInLoop")
-  public void testServer() throws InterruptedException, IOException
-  {
-    Discovery<byte[]> discovery = new Discovery<byte[]>()
-    {
-      @Override
-      public synchronized void unadvertise(Service<byte[]> service)
-      {
-        notify();
-      }
-
-      @Override
-      public synchronized void advertise(Service<byte[]> service)
-      {
-        port = service.getPort();
-        logger.debug("listening at {}", service);
-        notify();
-      }
-
-      @Override
-      @SuppressWarnings("unchecked")
-      public synchronized Collection<Service<byte[]>> discover()
-      {
-        try {
-          wait();
-        } catch (InterruptedException ie) {
-          throw new RuntimeException(ie);
-        }
-        return Collections.EMPTY_LIST;
-      }
-
-    };
-    DTFlumeSink sink = new DTFlumeSink();
-    sink.setName("TeskSink");
-    sink.setHostname(hostname);
-    sink.setPort(0);
-    sink.setAcceptedTolerance(2000);
-    sink.setChannel(new MemoryChannel());
-    sink.setDiscovery(discovery);
-    sink.start();
-    AbstractLengthPrependerClient client = new AbstractLengthPrependerClient()
-    {
-      private byte[] array;
-      private int offset = 2;
-
-      @Override
-      public void onMessage(byte[] buffer, int offset, int size)
-      {
-        Slice received = new Slice(buffer, offset, size);
-        logger.debug("Client Received = {}", received);
-        Assert.assertEquals(received,
-            new Slice(Arrays.copyOfRange(array, this.offset, array.length), 0, Server.Request.FIXED_SIZE));
-        synchronized (DTFlumeSinkTest.this) {
-          DTFlumeSinkTest.this.notify();
-        }
-      }
-
-      @Override
-      public void connected()
-      {
-        super.connected();
-        array = new byte[Server.Request.FIXED_SIZE + offset];
-        array[offset] = Server.Command.ECHO.getOrdinal();
-        array[offset + 1] = 1;
-        array[offset + 2] = 2;
-        array[offset + 3] = 3;
-        array[offset + 4] = 4;
-        array[offset + 5] = 5;
-        array[offset + 6] = 6;
-        array[offset + 7] = 7;
-        array[offset + 8] = 8;
-        Server.writeLong(array, offset + Server.Request.TIME_OFFSET, System.currentTimeMillis());
-        write(array, offset, Server.Request.FIXED_SIZE);
-      }
-
-    };
-
-    DefaultEventLoop eventloop = new DefaultEventLoop("Eventloop-TestClient");
-    eventloop.start();
-    discovery.discover();
-    try {
-      eventloop.connect(new InetSocketAddress(hostname, port), client);
-      try {
-        synchronized (this) {
-          this.wait();
-        }
-      } finally {
-        eventloop.disconnect(client);
-      }
-    } finally {
-      eventloop.stop();
-    }
-
-    sink.stop();
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(DTFlumeSinkTest.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/test/java/org/apache/apex/malhar/flume/sink/FlumeSinkTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/sink/FlumeSinkTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/sink/FlumeSinkTest.java
new file mode 100644
index 0000000..e1bc7b8
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/sink/FlumeSinkTest.java
@@ -0,0 +1,146 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.sink;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.apex.malhar.flume.discovery.Discovery;
+
+import org.apache.flume.channel.MemoryChannel;
+
+import com.datatorrent.netlet.AbstractLengthPrependerClient;
+import com.datatorrent.netlet.DefaultEventLoop;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ *
+ */
+public class FlumeSinkTest
+{
+  static final String hostname = "localhost";
+  int port = 0;
+
+  @Test
+  @SuppressWarnings("SleepWhileInLoop")
+  public void testServer() throws InterruptedException, IOException
+  {
+    Discovery<byte[]> discovery = new Discovery<byte[]>()
+    {
+      @Override
+      public synchronized void unadvertise(Service<byte[]> service)
+      {
+        notify();
+      }
+
+      @Override
+      public synchronized void advertise(Service<byte[]> service)
+      {
+        port = service.getPort();
+        logger.debug("listening at {}", service);
+        notify();
+      }
+
+      @Override
+      @SuppressWarnings("unchecked")
+      public synchronized Collection<Service<byte[]>> discover()
+      {
+        try {
+          wait();
+        } catch (InterruptedException ie) {
+          throw new RuntimeException(ie);
+        }
+        return Collections.EMPTY_LIST;
+      }
+
+    };
+    FlumeSink sink = new FlumeSink();
+    sink.setName("TeskSink");
+    sink.setHostname(hostname);
+    sink.setPort(0);
+    sink.setAcceptedTolerance(2000);
+    sink.setChannel(new MemoryChannel());
+    sink.setDiscovery(discovery);
+    sink.start();
+    AbstractLengthPrependerClient client = new AbstractLengthPrependerClient()
+    {
+      private byte[] array;
+      private int offset = 2;
+
+      @Override
+      public void onMessage(byte[] buffer, int offset, int size)
+      {
+        Slice received = new Slice(buffer, offset, size);
+        logger.debug("Client Received = {}", received);
+        Assert.assertEquals(received,
+            new Slice(Arrays.copyOfRange(array, this.offset, array.length), 0, Server.Request.FIXED_SIZE));
+        synchronized (FlumeSinkTest.this) {
+          FlumeSinkTest.this.notify();
+        }
+      }
+
+      @Override
+      public void connected()
+      {
+        super.connected();
+        array = new byte[Server.Request.FIXED_SIZE + offset];
+        array[offset] = Server.Command.ECHO.getOrdinal();
+        array[offset + 1] = 1;
+        array[offset + 2] = 2;
+        array[offset + 3] = 3;
+        array[offset + 4] = 4;
+        array[offset + 5] = 5;
+        array[offset + 6] = 6;
+        array[offset + 7] = 7;
+        array[offset + 8] = 8;
+        Server.writeLong(array, offset + Server.Request.TIME_OFFSET, System.currentTimeMillis());
+        write(array, offset, Server.Request.FIXED_SIZE);
+      }
+
+    };
+
+    DefaultEventLoop eventloop = new DefaultEventLoop("Eventloop-TestClient");
+    eventloop.start();
+    discovery.discover();
+    try {
+      eventloop.connect(new InetSocketAddress(hostname, port), client);
+      try {
+        synchronized (this) {
+          this.wait();
+        }
+      } finally {
+        eventloop.disconnect(client);
+      }
+    } finally {
+      eventloop.stop();
+    }
+
+    sink.stop();
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(FlumeSinkTest.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/test/resources/flume/conf/flume-conf.properties
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/flume/conf/flume-conf.properties b/flume/src/test/resources/flume/conf/flume-conf.properties
index 73dc79a..3498d67 100644
--- a/flume/src/test/resources/flume/conf/flume-conf.properties
+++ b/flume/src/test/resources/flume/conf/flume-conf.properties
@@ -63,7 +63,7 @@ agent1.sources.netcatSource.command = src/test/bash/subcat_periodically src/test
 
 # first sink - dt
 agent1.sinks.dt.id = CEVL00P
-agent1.sinks.dt.type = org.apache.apex.malhar.flume.sink.DTFlumeSink
+agent1.sinks.dt.type = org.apache.apex.malhar.flume.sink.FlumeSink
 agent1.sinks.dt.hostname = localhost
 agent1.sinks.dt.port = 8080
 agent1.sinks.dt.sleepMillis = 7
@@ -80,7 +80,7 @@ agent1.sinks.dt.minimumEventsPerTransaction = 1
 # Ensure that we are able to detect flume sinks (and failures) automatically.
    agent1.sinks.dt.discovery = org.apache.apex.malhar.flume.discovery.ZKAssistedDiscovery
    agent1.sinks.dt.discovery.connectionString = 127.0.0.1:2181
-   agent1.sinks.dt.discovery.basePath = /HelloDT
+   agent1.sinks.dt.discovery.basePath = /HelloApex
    agent1.sinks.dt.discovery.connectionTimeoutMillis = 1000
    agent1.sinks.dt.discovery.connectionRetryCount = 10
    agent1.sinks.dt.discovery.connectionRetrySleepMillis = 500

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/test/resources/flume/conf/flume-env.sh
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/flume/conf/flume-env.sh b/flume/src/test/resources/flume/conf/flume-env.sh
index 436e670..c03f98d 100644
--- a/flume/src/test/resources/flume/conf/flume-env.sh
+++ b/flume/src/test/resources/flume/conf/flume-env.sh
@@ -22,9 +22,9 @@
 # This script runs on the machine which have maven repository populated under
 # $HOME/.m2 If that's not the case, please adjust the JARPATH variable below
 # to point to colon separated list of directories where jar files can be found
-if test -z "$DT_FLUME_JAR"
+if test -z "$APEX_FLUME_JAR"
 then
-  echo [ERROR]: Environment variable DT_FLUME_JAR should point to a valid jar file which contains DTFlumeSink class >&2
+  echo [ERROR]: Environment variable APEX_FLUME_JAR should point to a valid jar file which contains FlumeSink class >&2
   exit 2
 fi
 
@@ -35,4 +35,4 @@ then
 else
   JAVA=${JAVA_HOME}/bin/java
 fi
-FLUME_CLASSPATH=`JARPATH=$JARPATH $JAVA -cp $DT_FLUME_JAR com.datatorrent.jarpath.JarPath -N $DT_FLUME_JAR -Xdt-jarpath -Xdt-netlet`
\ No newline at end of file
+FLUME_CLASSPATH=`JARPATH=$JARPATH $JAVA -cp $APEX_FLUME_JAR com.datatorrent.jarpath.JarPath -N $APEX_FLUME_JAR -Xdt-jarpath -Xdt-netlet`
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/test/resources/flume/conf/flume_simple.conf
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/flume/conf/flume_simple.conf b/flume/src/test/resources/flume/conf/flume_simple.conf
index b902881..2ed2614 100644
--- a/flume/src/test/resources/flume/conf/flume_simple.conf
+++ b/flume/src/test/resources/flume/conf/flume_simple.conf
@@ -29,7 +29,7 @@ a1.sources.r1.type = seq
 a1.sources.r1.totalEvents = 10
 
 # sink - dt
- a1.sinks.dt.type = org.apache.apex.malhar.flume.sink.DTFlumeSink
+ a1.sinks.dt.type = org.apache.apex.malhar.flume.sink.FlumeSink
  a1.sinks.dt.id = sink1
  a1.sinks.dt.hostname = 127.0.0.1
  a1.sinks.dt.port = 9098

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2f70751e/flume/src/test/resources/flume/conf/flume_zkdiscovery.conf
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/flume/conf/flume_zkdiscovery.conf b/flume/src/test/resources/flume/conf/flume_zkdiscovery.conf
index 6f8932c..723805d 100644
--- a/flume/src/test/resources/flume/conf/flume_zkdiscovery.conf
+++ b/flume/src/test/resources/flume/conf/flume_zkdiscovery.conf
@@ -34,7 +34,7 @@ a1.sources.r1.type = seq
 a1.sources.r1.totalEvents = 10
 
 # first sink - dt
- a1.sinks.dt.type = org.apache.apex.malhar.flume.sink.DTFlumeSink
+ a1.sinks.dt.type = org.apache.apex.malhar.flume.sink.FlumeSink
  a1.sinks.dt.id = sink1
  a1.sinks.dt.hostname = 127.0.0.1
  a1.sinks.dt.port = 9098
@@ -48,7 +48,7 @@ a1.sources.r1.totalEvents = 10
  a1.sinks.dt.channel = c1
 
 # second sink - dt2
- a1.sinks.dt2.type = org.apache.apex.malhar.flume.sink.DTFlumeSink
+ a1.sinks.dt2.type = org.apache.apex.malhar.flume.sink.FlumeSink
  a1.sinks.dt2.id = sink2
  a1.sinks.dt2.hostname = 127.0.0.1
  a1.sinks.dt2.port = 9099


[07/13] apex-malhar git commit: Changed package path for files to be included under malhar. Modifications to build files for project to build under malhar.

Posted by th...@apache.org.
Changed package path for files to be included under malhar. Modifications to build files for project to build under malhar.


Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo
Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/d200737b
Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/d200737b
Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/d200737b

Branch: refs/heads/master
Commit: d200737b631d9678a72e9e4fc817493de9b77ac0
Parents: bb89fe9
Author: Pramod Immaneni <pr...@datatorrent.com>
Authored: Mon Feb 20 00:26:00 2017 +0530
Committer: Pramod Immaneni <pr...@datatorrent.com>
Committed: Mon May 22 16:47:34 2017 -0700

----------------------------------------------------------------------
 flume/README.md                                 |   6 +
 flume/pom.xml                                   |  28 +-
 .../datatorrent/flume/discovery/Discovery.java  |  69 --
 .../flume/discovery/ZKAssistedDiscovery.java    | 430 ---------
 .../ColumnFilteringFormattingInterceptor.java   | 229 -----
 .../interceptor/ColumnFilteringInterceptor.java | 205 ----
 .../operator/AbstractFlumeInputOperator.java    | 761 ---------------
 .../com/datatorrent/flume/sink/DTFlumeSink.java | 572 -----------
 .../java/com/datatorrent/flume/sink/Server.java | 420 --------
 .../flume/source/HdfsTestSource.java            | 224 -----
 .../datatorrent/flume/source/TestSource.java    | 250 -----
 .../datatorrent/flume/storage/DebugWrapper.java | 132 ---
 .../flume/storage/ErrorMaskingEventCodec.java   |  62 --
 .../datatorrent/flume/storage/EventCodec.java   |  92 --
 .../datatorrent/flume/storage/HDFSStorage.java  | 947 -------------------
 .../com/datatorrent/flume/storage/Storage.java  |  74 --
 .../apex/malhar/flume/discovery/Discovery.java  |  69 ++
 .../flume/discovery/ZKAssistedDiscovery.java    | 430 +++++++++
 .../ColumnFilteringFormattingInterceptor.java   | 227 +++++
 .../interceptor/ColumnFilteringInterceptor.java | 205 ++++
 .../operator/AbstractFlumeInputOperator.java    | 759 +++++++++++++++
 .../apex/malhar/flume/sink/DTFlumeSink.java     | 572 +++++++++++
 .../apache/apex/malhar/flume/sink/Server.java   | 419 ++++++++
 .../malhar/flume/source/HdfsTestSource.java     | 224 +++++
 .../apex/malhar/flume/source/TestSource.java    | 250 +++++
 .../apex/malhar/flume/storage/DebugWrapper.java | 132 +++
 .../flume/storage/ErrorMaskingEventCodec.java   |  62 ++
 .../apex/malhar/flume/storage/EventCodec.java   |  92 ++
 .../apex/malhar/flume/storage/HDFSStorage.java  | 947 +++++++++++++++++++
 .../apex/malhar/flume/storage/Storage.java      |  74 ++
 .../flume-conf/flume-conf.sample.properties     |   4 +-
 .../discovery/ZKAssistedDiscoveryTest.java      | 143 ---
 .../flume/integration/ApplicationTest.java      | 117 ---
 ...olumnFilteringFormattingInterceptorTest.java | 134 ---
 .../ColumnFilteringInterceptorTest.java         |  87 --
 .../interceptor/InterceptorTestHelper.java      | 216 -----
 .../datatorrent/flume/interceptor/RawEvent.java | 120 ---
 .../AbstractFlumeInputOperatorTest.java         |  57 --
 .../datatorrent/flume/sink/DTFlumeSinkTest.java | 145 ---
 .../com/datatorrent/flume/sink/ServerTest.java  |  93 --
 .../flume/storage/HDFSStorageMatching.java      | 111 ---
 .../flume/storage/HDFSStoragePerformance.java   |  87 --
 .../storage/HDFSStoragePerformanceTest.java     | 113 ---
 .../flume/storage/HDFSStorageTest.java          | 695 --------------
 .../discovery/ZKAssistedDiscoveryTest.java      | 143 +++
 .../flume/integration/ApplicationTest.java      | 117 +++
 ...olumnFilteringFormattingInterceptorTest.java | 134 +++
 .../ColumnFilteringInterceptorTest.java         |  87 ++
 .../interceptor/InterceptorTestHelper.java      | 216 +++++
 .../apex/malhar/flume/interceptor/RawEvent.java | 120 +++
 .../AbstractFlumeInputOperatorTest.java         |  57 ++
 .../apex/malhar/flume/sink/DTFlumeSinkTest.java | 145 +++
 .../apex/malhar/flume/sink/ServerTest.java      |  93 ++
 .../flume/storage/HDFSStorageMatching.java      | 111 +++
 .../flume/storage/HDFSStoragePerformance.java   |  87 ++
 .../storage/HDFSStoragePerformanceTest.java     | 113 +++
 .../malhar/flume/storage/HDFSStorageTest.java   | 695 ++++++++++++++
 .../resources/flume/conf/flume-conf.properties  |   6 +-
 flume/src/test/resources/log4j.properties       |  12 +-
 .../test/resources/test_data/gentxns/2013121500 | Bin 225010 -> 0 bytes
 .../resources/test_data/gentxns/2013121500.txt  | Bin 0 -> 225010 bytes
 .../test/resources/test_data/gentxns/2013121501 | Bin 224956 -> 0 bytes
 .../resources/test_data/gentxns/2013121501.txt  | Bin 0 -> 224956 bytes
 .../test/resources/test_data/gentxns/2013121502 | Bin 225028 -> 0 bytes
 .../resources/test_data/gentxns/2013121502.txt  | Bin 0 -> 225028 bytes
 .../test/resources/test_data/gentxns/2013121503 | Bin 225068 -> 0 bytes
 .../resources/test_data/gentxns/2013121503.txt  | Bin 0 -> 225068 bytes
 .../test/resources/test_data/gentxns/2013121504 | Bin 224845 -> 0 bytes
 .../resources/test_data/gentxns/2013121504.txt  | Bin 0 -> 224845 bytes
 .../test/resources/test_data/gentxns/2013121505 | Bin 225004 -> 0 bytes
 .../resources/test_data/gentxns/2013121505.txt  | Bin 0 -> 225004 bytes
 .../test/resources/test_data/gentxns/2013121506 | Bin 224929 -> 0 bytes
 .../resources/test_data/gentxns/2013121506.txt  | Bin 0 -> 224929 bytes
 .../test/resources/test_data/gentxns/2013121507 | Bin 224879 -> 0 bytes
 .../resources/test_data/gentxns/2013121507.txt  | Bin 0 -> 224879 bytes
 .../test/resources/test_data/gentxns/2013121508 | Bin 224963 -> 0 bytes
 .../resources/test_data/gentxns/2013121508.txt  | Bin 0 -> 224963 bytes
 .../test/resources/test_data/gentxns/2013121509 | Bin 224963 -> 0 bytes
 .../resources/test_data/gentxns/2013121509.txt  | Bin 0 -> 224963 bytes
 .../test/resources/test_data/gentxns/2013121510 | Bin 225007 -> 0 bytes
 .../resources/test_data/gentxns/2013121510.txt  | Bin 0 -> 225007 bytes
 .../test/resources/test_data/gentxns/2013121511 | Bin 224913 -> 0 bytes
 .../resources/test_data/gentxns/2013121511.txt  | Bin 0 -> 224913 bytes
 .../test/resources/test_data/gentxns/2013121512 | Bin 224929 -> 0 bytes
 .../resources/test_data/gentxns/2013121512.txt  | Bin 0 -> 224929 bytes
 .../test/resources/test_data/gentxns/2013121513 | Bin 225078 -> 0 bytes
 .../resources/test_data/gentxns/2013121513.txt  | Bin 0 -> 225078 bytes
 .../test/resources/test_data/gentxns/2013121514 | Bin 224882 -> 0 bytes
 .../resources/test_data/gentxns/2013121514.txt  | Bin 0 -> 224882 bytes
 .../test/resources/test_data/gentxns/2013121515 | Bin 224958 -> 0 bytes
 .../resources/test_data/gentxns/2013121515.txt  | Bin 0 -> 224958 bytes
 .../test/resources/test_data/gentxns/2013121516 | Bin 225032 -> 0 bytes
 .../resources/test_data/gentxns/2013121516.txt  | Bin 0 -> 225032 bytes
 .../test/resources/test_data/gentxns/2013121517 | Bin 225059 -> 0 bytes
 .../resources/test_data/gentxns/2013121517.txt  | Bin 0 -> 225059 bytes
 .../test/resources/test_data/gentxns/2013121518 | Bin 224890 -> 0 bytes
 .../resources/test_data/gentxns/2013121518.txt  | Bin 0 -> 224890 bytes
 .../test/resources/test_data/gentxns/2013121519 | Bin 225000 -> 0 bytes
 .../resources/test_data/gentxns/2013121519.txt  | Bin 0 -> 225000 bytes
 .../test/resources/test_data/gentxns/2013121520 | Bin 225064 -> 0 bytes
 .../resources/test_data/gentxns/2013121520.txt  | Bin 0 -> 225064 bytes
 .../test/resources/test_data/gentxns/2013121521 | Bin 225091 -> 0 bytes
 .../resources/test_data/gentxns/2013121521.txt  | Bin 0 -> 225091 bytes
 pom.xml                                         |   1 +
 104 files changed, 6613 insertions(+), 6609 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/README.md
----------------------------------------------------------------------
diff --git a/flume/README.md b/flume/README.md
new file mode 100644
index 0000000..1d0b2d9
--- /dev/null
+++ b/flume/README.md
@@ -0,0 +1,6 @@
+Flume
+===============================
+
+The folder contains support for flume to be used with Apex. It comprises mainly of two components. First is an agent that sits on the flume side, receives data from flume and makes it available via a socket server. In effect it converts a push to a pull model. The second component is the input operator that reads from the agent.
+
+The project is started with the latest code at the time of the sub-module creation. For older history look at the flume sub-module in the older project called Megh (git@github.com:DataTorrent/Megh).

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/pom.xml
----------------------------------------------------------------------
diff --git a/flume/pom.xml b/flume/pom.xml
index 6522148..735a13b 100644
--- a/flume/pom.xml
+++ b/flume/pom.xml
@@ -23,14 +23,14 @@
   <modelVersion>4.0.0</modelVersion>
 
   <parent>
-    <artifactId>dt-megh</artifactId>
-    <groupId>com.datatorrent</groupId>
-    <version>3.6.0-SNAPSHOT</version>
+    <artifactId>malhar</artifactId>
+    <groupId>org.apache.apex</groupId>
+    <version>3.8.0-SNAPSHOT</version>
   </parent>
 
-  <artifactId>dt-flume</artifactId>
+  <artifactId>malhar-flume</artifactId>
   <packaging>jar</packaging>
-  <name>DataTorrent Flume Integration</name>
+  <name>Apache Apex Malhar Flume Support</name>
 
   <profiles>
     <profile>
@@ -57,16 +57,16 @@
                   <goal>attached-rpm</goal>
                 </goals>
                 <configuration>
-                  <license>Copyright &copy; 2014 DataTorrent, Inc.</license>
+                  <license>Apache License, Version 2.0</license>
                   <version>${parsedVersion.majorVersion}.${parsedVersion.minorVersion}.${parsedVersion.incrementalVersion}</version>
                   <release>${parsedVersion.qualifier}${parsedVersion.buildNumber}</release>
                   <workarea>target/sink-rpm</workarea>
                   <classifier>sink</classifier>
-                  <name>datatorrent-flume-sink</name>
-                  <distribution>DataTorrent Enterprise ${project.version}</distribution>
+                  <name>apex-malhar-flume-sink</name>
+                  <distribution>Apache Apex Malhar ${project.version}</distribution>
                   <group>Messaging Client Support</group>
                   <icon>src/main/resources/logo.gif</icon>
-                  <packager>DataTorrent Build System</packager>
+                  <packager>Apex Build System</packager>
                   <prefix>${package.prefix}</prefix>
                   <changelogFile>src/changelog</changelogFile>
                   <defineStatements>
@@ -82,7 +82,7 @@
                       <dependency>
                         <includes>
                           <include>org.apache.apex:apex-api:jar:${apex.core.version}</include>
-                          <include>com.datatorrent:dt-netlet:jar:1.2.0</include>
+                          <include>com.datatorrent:netlet:jar</include>
                           <include>org.apache.apex:apex-common:jar:${apex.core.version}</include>
                           <include>com.esotericsoftware.kryo:kryo:jar:2.24.0</include>
                           <include>com.esotericsoftware.minlog:minlog:jar:1.2</include>
@@ -120,15 +120,15 @@
                 </goals>
                 <configuration>
                   <version>${parsedVersion.majorVersion}.${parsedVersion.minorVersion}.${parsedVersion.incrementalVersion}</version>
-                  <license>Copyright &copy; 2014 DataTorrent, Inc.</license>
+                  <license>Apache License, Version 2.0</license>
                   <release>${parsedVersion.qualifier}${parsedVersion.buildNumber}</release>
                   <workarea>target/operator-rpm</workarea>
                   <classifier>operator</classifier>
-                  <name>datatorrent-flume-operator</name>
-                  <distribution>DataTorrent Enterprise ${project.version}</distribution>
+                  <name>apex-malhar-flume-operator</name>
+                  <distribution>Apache Apex Malhar ${project.version}</distribution>
                   <group>Messaging Client Support</group>
                   <icon>src/main/resources/logo.gif</icon>
-                  <packager>DataTorrent Build System</packager>
+                  <packager>Apex Build System</packager>
                   <prefix>${package.prefix}</prefix>
                   <changelogFile>src/changelog</changelogFile>
                   <description>${rpm.release}</description>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java b/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java
deleted file mode 100644
index 72a1440..0000000
--- a/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.discovery;
-
-import java.util.Collection;
-
-/**
- * When DTFlumeSink server instance binds to the network interface, it can publish
- * its whereabouts by invoking advertise method on the Discovery object. Similarly
- * when it ceases accepting any more connections, it can publish its intent to do
- * so by invoking unadvertise.<p />
- * Interesting parties can call discover method to get the list of addresses where
- * they can find an available DTFlumeSink server instance.
- *
- * @param <T> - Type of the objects which can be discovered
- * @since 0.9.3
- */
-public interface Discovery<T>
-{
-  /**
-   * Recall the previously published address as it's no longer valid.
-   *
-   * @param service
-   */
-  void unadvertise(Service<T> service);
-
-  /**
-   * Advertise the host/port address where DTFlumeSink is accepting a client connection.
-   *
-   * @param service
-   */
-  void advertise(Service<T> service);
-
-  /**
-   * Discover all the addresses which are actively accepting the client connections.
-   *
-   * @return - Active server addresses which can accept the connections.
-   */
-  Collection<Service<T>> discover();
-
-  interface Service<T>
-  {
-    String getHost();
-
-    int getPort();
-
-    T getPayload();
-
-    String getId();
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java b/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java
deleted file mode 100644
index 97ad8f0..0000000
--- a/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java
+++ /dev/null
@@ -1,430 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.discovery;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Collection;
-
-import javax.validation.constraints.NotNull;
-
-import org.codehaus.jackson.map.ObjectMapper;
-import org.codehaus.jackson.map.ObjectReader;
-import org.codehaus.jackson.map.ObjectWriter;
-import org.codehaus.jackson.type.TypeReference;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.curator.framework.CuratorFramework;
-import org.apache.curator.framework.CuratorFrameworkFactory;
-import org.apache.curator.retry.RetryNTimes;
-import org.apache.curator.utils.EnsurePath;
-import org.apache.curator.x.discovery.ServiceDiscovery;
-import org.apache.curator.x.discovery.ServiceDiscoveryBuilder;
-import org.apache.curator.x.discovery.ServiceInstance;
-import org.apache.curator.x.discovery.details.InstanceSerializer;
-import org.apache.flume.conf.Configurable;
-
-import com.google.common.base.Throwables;
-
-import com.datatorrent.api.Component;
-
-/**
- * <p>ZKAssistedDiscovery class.</p>
- *
- * @since 0.9.3
- */
-public class ZKAssistedDiscovery implements Discovery<byte[]>,
-    Component<com.datatorrent.api.Context>, Configurable, Serializable
-{
-  @NotNull
-  private String serviceName;
-  @NotNull
-  private String connectionString;
-  @NotNull
-  private String basePath;
-  private int connectionTimeoutMillis;
-  private int connectionRetryCount;
-  private int conntectionRetrySleepMillis;
-  private transient InstanceSerializerFactory instanceSerializerFactory;
-  private transient CuratorFramework curatorFramework;
-  private transient ServiceDiscovery<byte[]> discovery;
-
-  public ZKAssistedDiscovery()
-  {
-    this.serviceName = "DTFlume";
-    this.conntectionRetrySleepMillis = 500;
-    this.connectionRetryCount = 10;
-    this.connectionTimeoutMillis = 1000;
-  }
-
-  @Override
-  public void unadvertise(Service<byte[]> service)
-  {
-    doAdvertise(service, false);
-  }
-
-  @Override
-  public void advertise(Service<byte[]> service)
-  {
-    doAdvertise(service, true);
-  }
-
-  public void doAdvertise(Service<byte[]> service, boolean flag)
-  {
-    try {
-      new EnsurePath(basePath).ensure(curatorFramework.getZookeeperClient());
-
-      ServiceInstance<byte[]> instance = getInstance(service);
-      if (flag) {
-        discovery.registerService(instance);
-      } else {
-        discovery.unregisterService(instance);
-      }
-    } catch (Exception e) {
-      throw Throwables.propagate(e);
-    }
-  }
-
-  @Override
-  public Collection<Service<byte[]>> discover()
-  {
-    try {
-      new EnsurePath(basePath).ensure(curatorFramework.getZookeeperClient());
-
-      Collection<ServiceInstance<byte[]>> services = discovery.queryForInstances(serviceName);
-      ArrayList<Service<byte[]>> returnable = new ArrayList<Service<byte[]>>(services.size());
-      for (final ServiceInstance<byte[]> service : services) {
-        returnable.add(new Service<byte[]>()
-        {
-          @Override
-          public String getHost()
-          {
-            return service.getAddress();
-          }
-
-          @Override
-          public int getPort()
-          {
-            return service.getPort();
-          }
-
-          @Override
-          public byte[] getPayload()
-          {
-            return service.getPayload();
-          }
-
-          @Override
-          public String getId()
-          {
-            return service.getId();
-          }
-
-          @Override
-          public String toString()
-          {
-            return "{" + getId() + " => " + getHost() + ':' + getPort() + '}';
-          }
-
-        });
-      }
-      return returnable;
-    } catch (Exception e) {
-      throw Throwables.propagate(e);
-    }
-  }
-
-  @Override
-  public String toString()
-  {
-    return "ZKAssistedDiscovery{" + "serviceName=" + serviceName + ", connectionString=" + connectionString +
-        ", basePath=" + basePath + ", connectionTimeoutMillis=" + connectionTimeoutMillis + ", connectionRetryCount=" +
-        connectionRetryCount + ", conntectionRetrySleepMillis=" + conntectionRetrySleepMillis + '}';
-  }
-
-  @Override
-  public int hashCode()
-  {
-    int hash = 7;
-    hash = 47 * hash + this.serviceName.hashCode();
-    hash = 47 * hash + this.connectionString.hashCode();
-    hash = 47 * hash + this.basePath.hashCode();
-    hash = 47 * hash + this.connectionTimeoutMillis;
-    hash = 47 * hash + this.connectionRetryCount;
-    hash = 47 * hash + this.conntectionRetrySleepMillis;
-    return hash;
-  }
-
-  @Override
-  public boolean equals(Object obj)
-  {
-    if (obj == null) {
-      return false;
-    }
-    if (getClass() != obj.getClass()) {
-      return false;
-    }
-    final ZKAssistedDiscovery other = (ZKAssistedDiscovery)obj;
-    if (!this.serviceName.equals(other.serviceName)) {
-      return false;
-    }
-    if (!this.connectionString.equals(other.connectionString)) {
-      return false;
-    }
-    if (!this.basePath.equals(other.basePath)) {
-      return false;
-    }
-    if (this.connectionTimeoutMillis != other.connectionTimeoutMillis) {
-      return false;
-    }
-    if (this.connectionRetryCount != other.connectionRetryCount) {
-      return false;
-    }
-    if (this.conntectionRetrySleepMillis != other.conntectionRetrySleepMillis) {
-      return false;
-    }
-    return true;
-  }
-
-  ServiceInstance<byte[]> getInstance(Service<byte[]> service) throws Exception
-  {
-    return ServiceInstance.<byte[]>builder()
-            .name(serviceName)
-            .address(service.getHost())
-            .port(service.getPort())
-            .id(service.getId())
-            .payload(service.getPayload())
-            .build();
-  }
-
-  private ServiceDiscovery<byte[]> getDiscovery(CuratorFramework curatorFramework)
-  {
-    return ServiceDiscoveryBuilder.builder(byte[].class)
-            .basePath(basePath)
-            .client(curatorFramework)
-            .serializer(instanceSerializerFactory.getInstanceSerializer(
-            new TypeReference<ServiceInstance<byte[]>>()
-              {})).build();
-  }
-
-  /**
-   * @return the instanceSerializerFactory
-   */
-  InstanceSerializerFactory getInstanceSerializerFactory()
-  {
-    return instanceSerializerFactory;
-  }
-
-  /**
-   * @return the connectionString
-   */
-  public String getConnectionString()
-  {
-    return connectionString;
-  }
-
-  /**
-   * @param connectionString the connectionString to set
-   */
-  public void setConnectionString(String connectionString)
-  {
-    this.connectionString = connectionString;
-  }
-
-  /**
-   * @return the basePath
-   */
-  public String getBasePath()
-  {
-    return basePath;
-  }
-
-  /**
-   * @param basePath the basePath to set
-   */
-  public void setBasePath(String basePath)
-  {
-    this.basePath = basePath;
-  }
-
-  /**
-   * @return the connectionTimeoutMillis
-   */
-  public int getConnectionTimeoutMillis()
-  {
-    return connectionTimeoutMillis;
-  }
-
-  /**
-   * @param connectionTimeoutMillis the connectionTimeoutMillis to set
-   */
-  public void setConnectionTimeoutMillis(int connectionTimeoutMillis)
-  {
-    this.connectionTimeoutMillis = connectionTimeoutMillis;
-  }
-
-  /**
-   * @return the connectionRetryCount
-   */
-  public int getConnectionRetryCount()
-  {
-    return connectionRetryCount;
-  }
-
-  /**
-   * @param connectionRetryCount the connectionRetryCount to set
-   */
-  public void setConnectionRetryCount(int connectionRetryCount)
-  {
-    this.connectionRetryCount = connectionRetryCount;
-  }
-
-  /**
-   * @return the conntectionRetrySleepMillis
-   */
-  public int getConntectionRetrySleepMillis()
-  {
-    return conntectionRetrySleepMillis;
-  }
-
-  /**
-   * @param conntectionRetrySleepMillis the conntectionRetrySleepMillis to set
-   */
-  public void setConntectionRetrySleepMillis(int conntectionRetrySleepMillis)
-  {
-    this.conntectionRetrySleepMillis = conntectionRetrySleepMillis;
-  }
-
-  /**
-   * @return the serviceName
-   */
-  public String getServiceName()
-  {
-    return serviceName;
-  }
-
-  /**
-   * @param serviceName the serviceName to set
-   */
-  public void setServiceName(String serviceName)
-  {
-    this.serviceName = serviceName;
-  }
-
-  @Override
-  public void configure(org.apache.flume.Context context)
-  {
-    serviceName = context.getString("serviceName", "DTFlume");
-    connectionString = context.getString("connectionString");
-    basePath = context.getString("basePath");
-
-    connectionTimeoutMillis = context.getInteger("connectionTimeoutMillis", 1000);
-    connectionRetryCount = context.getInteger("connectionRetryCount", 10);
-    conntectionRetrySleepMillis = context.getInteger("connectionRetrySleepMillis", 500);
-  }
-
-  @Override
-  public void setup(com.datatorrent.api.Context context)
-  {
-    ObjectMapper om = new ObjectMapper();
-    instanceSerializerFactory = new InstanceSerializerFactory(om.reader(), om.writer());
-
-    curatorFramework = CuratorFrameworkFactory.builder()
-            .connectionTimeoutMs(connectionTimeoutMillis)
-            .retryPolicy(new RetryNTimes(connectionRetryCount, conntectionRetrySleepMillis))
-            .connectString(connectionString)
-            .build();
-    curatorFramework.start();
-
-    discovery = getDiscovery(curatorFramework);
-    try {
-      discovery.start();
-    } catch (Exception ex) {
-      Throwables.propagate(ex);
-    }
-  }
-
-  @Override
-  public void teardown()
-  {
-    try {
-      discovery.close();
-    } catch (IOException ex) {
-      throw new RuntimeException(ex);
-    } finally {
-      curatorFramework.close();
-      curatorFramework = null;
-    }
-  }
-
-  public class InstanceSerializerFactory
-  {
-    private final ObjectReader objectReader;
-    private final ObjectWriter objectWriter;
-
-    InstanceSerializerFactory(ObjectReader objectReader, ObjectWriter objectWriter)
-    {
-      this.objectReader = objectReader;
-      this.objectWriter = objectWriter;
-    }
-
-    public <T> InstanceSerializer<T> getInstanceSerializer(
-        TypeReference<ServiceInstance<T>> typeReference)
-    {
-      return new JacksonInstanceSerializer<T>(objectReader, objectWriter, typeReference);
-    }
-
-    final class JacksonInstanceSerializer<T> implements InstanceSerializer<T>
-    {
-      private final TypeReference<ServiceInstance<T>> typeRef;
-      private final ObjectWriter objectWriter;
-      private final ObjectReader objectReader;
-
-      JacksonInstanceSerializer(ObjectReader objectReader, ObjectWriter objectWriter,
-          TypeReference<ServiceInstance<T>> typeRef)
-      {
-        this.objectReader = objectReader;
-        this.objectWriter = objectWriter;
-        this.typeRef = typeRef;
-      }
-
-      @Override
-      public ServiceInstance<T> deserialize(byte[] bytes) throws Exception
-      {
-        return objectReader.withType(typeRef).readValue(bytes);
-      }
-
-      @Override
-      public byte[] serialize(ServiceInstance<T> serviceInstance) throws Exception
-      {
-        ByteArrayOutputStream out = new ByteArrayOutputStream();
-        objectWriter.writeValue(out, serviceInstance);
-        return out.toByteArray();
-      }
-
-    }
-
-  }
-
-  private static final long serialVersionUID = 201401221145L;
-  private static final Logger logger = LoggerFactory.getLogger(ZKAssistedDiscovery.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java b/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java
deleted file mode 100644
index fd20f99..0000000
--- a/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java
+++ /dev/null
@@ -1,229 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.interceptor;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flume.Context;
-import org.apache.flume.Event;
-import org.apache.flume.interceptor.Interceptor;
-
-import com.google.common.base.Strings;
-import com.google.common.collect.Lists;
-import com.google.common.primitives.Ints;
-
-import static com.datatorrent.flume.interceptor.ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER;
-import static com.datatorrent.flume.interceptor.ColumnFilteringFormattingInterceptor.Constants.SRC_SEPARATOR;
-import static com.datatorrent.flume.interceptor.ColumnFilteringFormattingInterceptor.Constants.SRC_SEPARATOR_DFLT;
-
-/**
- * <p>ColumnFilteringFormattingInterceptor class.</p>
- *
- * @since 0.9.4
- */
-public class ColumnFilteringFormattingInterceptor implements Interceptor
-{
-  private final byte srcSeparator;
-  private final byte[][] dstSeparators;
-  private final byte[] prefix;
-  private final int maxIndex;
-  private final int maxColumn;
-  private final int[] columns;
-  private final int[] positions;
-
-  private ColumnFilteringFormattingInterceptor(int[] columns, byte srcSeparator, byte[][] dstSeparators, byte[] prefix)
-  {
-    this.columns = columns;
-
-    int tempMaxColumn = Integer.MIN_VALUE;
-    for (int column : columns) {
-      if (column > tempMaxColumn) {
-        tempMaxColumn = column;
-      }
-    }
-    maxIndex = tempMaxColumn;
-    maxColumn = tempMaxColumn + 1;
-    positions = new int[maxColumn + 1];
-    this.srcSeparator = srcSeparator;
-    this.dstSeparators = dstSeparators;
-    this.prefix = prefix;
-  }
-
-  @Override
-  public void initialize()
-  {
-    /* no-op */
-  }
-
-  @Override
-  public Event intercept(Event event)
-  {
-    byte[] body = event.getBody();
-    if (body == null) {
-      return event;
-    }
-
-    final int length = body.length;
-
-    /* store positions of character after the separators */
-    int i = 0;
-    int index = 0;
-    while (i < length) {
-      if (body[i++] == srcSeparator) {
-        positions[++index] = i;
-        if (index >= maxIndex) {
-          break;
-        }
-      }
-    }
-
-    int nextVirginIndex;
-    boolean separatorAtEnd = true;
-    if (i == length && index < maxColumn) {
-      nextVirginIndex = index + 2;
-      positions[nextVirginIndex - 1] = length;
-      separatorAtEnd = length > 0 ? body[length - 1] == srcSeparator : false;
-    } else {
-      nextVirginIndex = index + 1;
-    }
-
-    int newArrayLen = prefix.length;
-    for (i = columns.length; i-- > 0; ) {
-      int column = columns[i];
-      int len = positions[column + 1] - positions[column];
-      if (len > 0) {
-        if (positions[column + 1] == length && !separatorAtEnd) {
-          newArrayLen += len;
-        } else {
-          newArrayLen += len - 1;
-        }
-      }
-      newArrayLen += dstSeparators[i].length;
-    }
-
-    byte[] newBody = new byte[newArrayLen];
-    int newOffset = 0;
-    if (prefix.length > 0) {
-      System.arraycopy(prefix, 0, newBody, 0, prefix.length);
-      newOffset += prefix.length;
-    }
-    int dstSeparatorsIdx = 0;
-    for (int column : columns) {
-      int len = positions[column + 1] - positions[column];
-      byte[] separator = dstSeparators[dstSeparatorsIdx++];
-      if (len > 0) {
-        System.arraycopy(body, positions[column], newBody, newOffset, len);
-        newOffset += len;
-        if (newBody[newOffset - 1] == srcSeparator) {
-          newOffset--;
-        }
-      }
-      System.arraycopy(separator, 0, newBody, newOffset, separator.length);
-      newOffset += separator.length;
-    }
-    event.setBody(newBody);
-    Arrays.fill(positions, 1, nextVirginIndex, 0);
-    return event;
-  }
-
-  @Override
-  public List<Event> intercept(List<Event> events)
-  {
-    for (Event event : events) {
-      intercept(event);
-    }
-    return events;
-  }
-
-  @Override
-  public void close()
-  {
-  }
-
-  public static class Builder implements Interceptor.Builder
-  {
-    private int[] columns;
-    private byte srcSeparator;
-    private byte[][] dstSeparators;
-    private byte[] prefix;
-
-    @Override
-    public Interceptor build()
-    {
-      return new ColumnFilteringFormattingInterceptor(columns, srcSeparator, dstSeparators, prefix);
-    }
-
-    @Override
-    public void configure(Context context)
-    {
-      String formatter = context.getString(COLUMNS_FORMATTER);
-      if (Strings.isNullOrEmpty(formatter)) {
-        throw new IllegalArgumentException("This interceptor requires columns format to be specified!");
-      }
-      List<String> lSeparators = Lists.newArrayList();
-      List<Integer> lColumns = Lists.newArrayList();
-      Pattern colPat = Pattern.compile("\\{\\d+?\\}");
-      Matcher matcher = colPat.matcher(formatter);
-      int separatorStart = 0;
-      String lPrefix = "";
-      while (matcher.find()) {
-        String col = matcher.group();
-        lColumns.add(Integer.parseInt(col.substring(1, col.length() - 1)));
-        if (separatorStart == 0 && matcher.start() > 0) {
-          lPrefix = formatter.substring(0, matcher.start());
-        } else if (separatorStart > 0) {
-          lSeparators.add(formatter.substring(separatorStart, matcher.start()));
-        }
-
-        separatorStart = matcher.end();
-      }
-      if (separatorStart < formatter.length()) {
-        lSeparators.add(formatter.substring(separatorStart, formatter.length()));
-      }
-      columns = Ints.toArray(lColumns);
-      byte[] emptyStringBytes = "".getBytes();
-
-      dstSeparators = new byte[columns.length][];
-
-      for (int i = 0; i < columns.length; i++) {
-        if (i < lSeparators.size()) {
-          dstSeparators[i] = lSeparators.get(i).getBytes();
-        } else {
-          dstSeparators[i] = emptyStringBytes;
-        }
-      }
-      srcSeparator = context.getInteger(SRC_SEPARATOR, (int)SRC_SEPARATOR_DFLT).byteValue();
-      this.prefix = lPrefix.getBytes();
-    }
-  }
-
-  public static class Constants extends ColumnFilteringInterceptor.Constants
-  {
-    public static final String COLUMNS_FORMATTER = "columnsFormatter";
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(ColumnFilteringFormattingInterceptor.class);
-
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java b/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java
deleted file mode 100644
index a2f598f..0000000
--- a/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java
+++ /dev/null
@@ -1,205 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.interceptor;
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flume.Context;
-import org.apache.flume.Event;
-import org.apache.flume.interceptor.Interceptor;
-
-import static com.datatorrent.flume.interceptor.ColumnFilteringInterceptor.Constants.COLUMNS;
-import static com.datatorrent.flume.interceptor.ColumnFilteringInterceptor.Constants.DST_SEPARATOR;
-import static com.datatorrent.flume.interceptor.ColumnFilteringInterceptor.Constants.DST_SEPARATOR_DFLT;
-import static com.datatorrent.flume.interceptor.ColumnFilteringInterceptor.Constants.SRC_SEPARATOR;
-import static com.datatorrent.flume.interceptor.ColumnFilteringInterceptor.Constants.SRC_SEPARATOR_DFLT;
-
-/**
- * <p>ColumnFilteringInterceptor class.</p>
- *
- * @since 0.9.4
- */
-public class ColumnFilteringInterceptor implements Interceptor
-{
-  private final byte srcSeparator;
-  private final byte dstSeparator;
-
-  private final int maxIndex;
-  private final int maxColumn;
-  private final int[] columns;
-  private final int[] positions;
-
-  private ColumnFilteringInterceptor(int[] columns, byte srcSeparator, byte dstSeparator)
-  {
-    this.columns = columns;
-
-    int tempMaxColumn = Integer.MIN_VALUE;
-    for (int column: columns) {
-      if (column > tempMaxColumn) {
-        tempMaxColumn = column;
-      }
-    }
-    maxIndex = tempMaxColumn;
-    maxColumn = tempMaxColumn + 1;
-    positions = new int[maxColumn + 1];
-
-    this.srcSeparator = srcSeparator;
-    this.dstSeparator = dstSeparator;
-  }
-
-  @Override
-  public void initialize()
-  {
-    /* no-op */
-  }
-
-  @Override
-  public Event intercept(Event event)
-  {
-    byte[] body = event.getBody();
-    if (body == null) {
-      return event;
-    }
-
-    final int length = body.length;
-
-    /* store positions of character after the separators */
-    int i = 0;
-    int index = 0;
-    while (i < length) {
-      if (body[i++] == srcSeparator) {
-        positions[++index] = i;
-        if (index >= maxIndex) {
-          break;
-        }
-      }
-    }
-
-    int nextVirginIndex;
-    boolean separatorTerminated;
-    if (i == length && index < maxColumn) {
-      nextVirginIndex = index + 2;
-      positions[nextVirginIndex - 1] = length;
-      separatorTerminated = length > 0 ? body[length - 1]  != srcSeparator : false;
-    } else {
-      nextVirginIndex = index + 1;
-      separatorTerminated = true;
-    }
-
-    int newArrayLen = 0;
-    for (i = columns.length; i-- > 0;) {
-      int column = columns[i];
-      int len = positions[column + 1] - positions[column];
-      if (len <= 0) {
-        newArrayLen++;
-      } else {
-        if (separatorTerminated && positions[column + 1] == length) {
-          newArrayLen++;
-        }
-        newArrayLen += len;
-      }
-    }
-
-    byte[] newbody = new byte[newArrayLen];
-    int newoffset = 0;
-    for (int column: columns) {
-      int len = positions[column + 1] - positions[column];
-      if (len > 0) {
-        System.arraycopy(body, positions[column], newbody, newoffset, len);
-        newoffset += len;
-        if (newbody[newoffset - 1] == srcSeparator) {
-          newbody[newoffset - 1] = dstSeparator;
-        } else {
-          newbody[newoffset++] = dstSeparator;
-        }
-      } else {
-        newbody[newoffset++] = dstSeparator;
-      }
-    }
-
-    event.setBody(newbody);
-    Arrays.fill(positions, 1, nextVirginIndex, 0);
-    return event;
-  }
-
-  @Override
-  public List<Event> intercept(List<Event> events)
-  {
-    for (Event event: events) {
-      intercept(event);
-    }
-    return events;
-  }
-
-  @Override
-  public void close()
-  {
-  }
-
-  public static class Builder implements Interceptor.Builder
-  {
-    private int[] columns;
-    private byte srcSeparator;
-    private byte dstSeparator;
-
-    @Override
-    public Interceptor build()
-    {
-      return new ColumnFilteringInterceptor(columns, srcSeparator, dstSeparator);
-    }
-
-    @Override
-    public void configure(Context context)
-    {
-      String sColumns = context.getString(COLUMNS);
-      if (sColumns == null || sColumns.trim().isEmpty()) {
-        throw new Error("This interceptor requires filtered columns to be specified!");
-      }
-
-      String[] parts = sColumns.split(" ");
-      columns = new int[parts.length];
-      for (int i = parts.length; i-- > 0;) {
-        columns[i] = Integer.parseInt(parts[i]);
-      }
-
-      srcSeparator = context.getInteger(SRC_SEPARATOR, (int)SRC_SEPARATOR_DFLT).byteValue();
-      dstSeparator = context.getInteger(DST_SEPARATOR, (int)DST_SEPARATOR_DFLT).byteValue();
-    }
-
-  }
-
-  @SuppressWarnings("ClassMayBeInterface") /* adhering to flume until i understand it completely */
-
-  public static class Constants
-  {
-    public static final String SRC_SEPARATOR = "srcSeparator";
-    public static final byte SRC_SEPARATOR_DFLT = 2;
-
-    public static final String DST_SEPARATOR = "dstSeparator";
-    public static final byte DST_SEPARATOR_DFLT = 1;
-
-    public static final String COLUMNS = "columns";
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(ColumnFilteringInterceptor.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java b/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java
deleted file mode 100644
index d772ff5..0000000
--- a/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java
+++ /dev/null
@@ -1,761 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.operator;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.net.InetSocketAddress;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.concurrent.ArrayBlockingQueue;
-
-import javax.validation.constraints.Min;
-import javax.validation.constraints.NotNull;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flume.Event;
-
-import com.datatorrent.api.Context;
-import com.datatorrent.api.Context.OperatorContext;
-import com.datatorrent.api.DefaultOutputPort;
-import com.datatorrent.api.DefaultPartition;
-import com.datatorrent.api.InputOperator;
-import com.datatorrent.api.Operator;
-import com.datatorrent.api.Partitioner;
-import com.datatorrent.api.Stats.OperatorStats;
-import com.datatorrent.api.StreamCodec;
-import com.datatorrent.flume.discovery.Discovery.Service;
-import com.datatorrent.flume.discovery.ZKAssistedDiscovery;
-import com.datatorrent.flume.sink.Server;
-import com.datatorrent.flume.sink.Server.Command;
-import com.datatorrent.flume.sink.Server.Request;
-import com.datatorrent.netlet.AbstractLengthPrependerClient;
-import com.datatorrent.netlet.DefaultEventLoop;
-import com.datatorrent.netlet.util.Slice;
-
-import static java.lang.Thread.sleep;
-
-/**
- * <p>
- * Abstract AbstractFlumeInputOperator class.</p>
- *
- * @param <T> Type of the output payload.
- * @since 0.9.2
- */
-public abstract class AbstractFlumeInputOperator<T>
-    implements InputOperator, Operator.ActivationListener<OperatorContext>, Operator.IdleTimeHandler,
-    Operator.CheckpointListener, Partitioner<AbstractFlumeInputOperator<T>>
-{
-  public final transient DefaultOutputPort<T> output = new DefaultOutputPort<T>();
-  public final transient DefaultOutputPort<Slice> drop = new DefaultOutputPort<Slice>();
-  @NotNull
-  private String[] connectionSpecs;
-  @NotNull
-  private StreamCodec<Event> codec;
-  private final ArrayList<RecoveryAddress> recoveryAddresses;
-  @SuppressWarnings("FieldMayBeFinal") // it's not final because that mucks with the serialization somehow
-  private transient ArrayBlockingQueue<Slice> handoverBuffer;
-  private transient int idleCounter;
-  private transient int eventCounter;
-  private transient DefaultEventLoop eventloop;
-  private transient volatile boolean connected;
-  private transient OperatorContext context;
-  private transient Client client;
-  private transient long windowId;
-  private transient byte[] address;
-  @Min(0)
-  private long maxEventsPerSecond;
-  //This is calculated from maxEventsPerSecond, App window count and streaming window size
-  private transient long maxEventsPerWindow;
-
-  public AbstractFlumeInputOperator()
-  {
-    handoverBuffer = new ArrayBlockingQueue<Slice>(1024 * 5);
-    connectionSpecs = new String[0];
-    recoveryAddresses = new ArrayList<RecoveryAddress>();
-    maxEventsPerSecond = Long.MAX_VALUE;
-  }
-
-  @Override
-  public void setup(OperatorContext context)
-  {
-    long windowDurationMillis = context.getValue(OperatorContext.APPLICATION_WINDOW_COUNT) *
-        context.getValue(Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS);
-    maxEventsPerWindow = (long)(windowDurationMillis / 1000.0 * maxEventsPerSecond);
-    logger.debug("max-events per-second {} per-window {}", maxEventsPerSecond, maxEventsPerWindow);
-
-    try {
-      eventloop = new DefaultEventLoop("EventLoop-" + context.getId());
-      eventloop.start();
-    } catch (IOException ex) {
-      throw new RuntimeException(ex);
-    }
-  }
-
-  @Override
-  @SuppressWarnings({"unchecked"})
-  public void activate(OperatorContext ctx)
-  {
-    if (connectionSpecs.length == 0) {
-      logger.info("Discovered zero DTFlumeSink");
-    } else if (connectionSpecs.length == 1) {
-      for (String connectAddresse: connectionSpecs) {
-        logger.debug("Connection spec is {}", connectAddresse);
-        String[] parts = connectAddresse.split(":");
-        eventloop.connect(new InetSocketAddress(parts[1], Integer.parseInt(parts[2])), client = new Client(parts[0]));
-      }
-    } else {
-      throw new IllegalArgumentException(
-          String.format("A physical %s operator cannot connect to more than 1 addresses!",
-              this.getClass().getSimpleName()));
-    }
-
-    context = ctx;
-  }
-
-  @Override
-  public void beginWindow(long windowId)
-  {
-    this.windowId = windowId;
-    idleCounter = 0;
-    eventCounter = 0;
-  }
-
-  @Override
-  public void emitTuples()
-  {
-    int i = handoverBuffer.size();
-    if (i > 0 && eventCounter < maxEventsPerWindow) {
-
-      while (--i > 0 && eventCounter < maxEventsPerWindow - 1) {
-        final Slice slice = handoverBuffer.poll();
-        slice.offset += 8;
-        slice.length -= 8;
-        T convert = convert((Event)codec.fromByteArray(slice));
-        if (convert == null) {
-          drop.emit(slice);
-        } else {
-          output.emit(convert);
-        }
-        eventCounter++;
-      }
-
-      final Slice slice = handoverBuffer.poll();
-      slice.offset += 8;
-      slice.length -= 8;
-      T convert = convert((Event)codec.fromByteArray(slice));
-      if (convert == null) {
-        drop.emit(slice);
-      } else {
-        output.emit(convert);
-      }
-      eventCounter++;
-
-      address = Arrays.copyOfRange(slice.buffer, slice.offset - 8, slice.offset);
-    }
-  }
-
-  @Override
-  public void endWindow()
-  {
-    if (connected) {
-      byte[] array = new byte[Request.FIXED_SIZE];
-
-      array[0] = Command.WINDOWED.getOrdinal();
-      Server.writeInt(array, 1, eventCounter);
-      Server.writeInt(array, 5, idleCounter);
-      Server.writeLong(array, Request.TIME_OFFSET, System.currentTimeMillis());
-
-      logger.debug("wrote {} with eventCounter = {} and idleCounter = {}", Command.WINDOWED, eventCounter, idleCounter);
-      client.write(array);
-    }
-
-    if (address != null) {
-      RecoveryAddress rAddress = new RecoveryAddress();
-      rAddress.address = address;
-      address = null;
-      rAddress.windowId = windowId;
-      recoveryAddresses.add(rAddress);
-    }
-  }
-
-  @Override
-  public void deactivate()
-  {
-    if (connected) {
-      eventloop.disconnect(client);
-    }
-    context = null;
-  }
-
-  @Override
-  public void teardown()
-  {
-    eventloop.stop();
-    eventloop = null;
-  }
-
-  @Override
-  public void handleIdleTime()
-  {
-    idleCounter++;
-    try {
-      sleep(context.getValue(OperatorContext.SPIN_MILLIS));
-    } catch (InterruptedException ex) {
-      throw new RuntimeException(ex);
-    }
-  }
-
-  public abstract T convert(Event event);
-
-  /**
-   * @return the connectAddress
-   */
-  public String[] getConnectAddresses()
-  {
-    return connectionSpecs.clone();
-  }
-
-  /**
-   * @param specs - sinkid:host:port specification of all the sinks.
-   */
-  public void setConnectAddresses(String[] specs)
-  {
-    this.connectionSpecs = specs.clone();
-  }
-
-  /**
-   * @return the codec
-   */
-  public StreamCodec<Event> getCodec()
-  {
-    return codec;
-  }
-
-  /**
-   * @param codec the codec to set
-   */
-  public void setCodec(StreamCodec<Event> codec)
-  {
-    this.codec = codec;
-  }
-
-  private static class RecoveryAddress implements Serializable
-  {
-    long windowId;
-    byte[] address;
-
-    @Override
-    public String toString()
-    {
-      return "RecoveryAddress{" + "windowId=" + windowId + ", address=" + Arrays.toString(address) + '}';
-    }
-
-    @Override
-    public boolean equals(Object o)
-    {
-      if (this == o) {
-        return true;
-      }
-      if (!(o instanceof RecoveryAddress)) {
-        return false;
-      }
-
-      RecoveryAddress that = (RecoveryAddress)o;
-
-      if (windowId != that.windowId) {
-        return false;
-      }
-      return Arrays.equals(address, that.address);
-    }
-
-    @Override
-    public int hashCode()
-    {
-      int result = (int)(windowId ^ (windowId >>> 32));
-      result = 31 * result + (address != null ? Arrays.hashCode(address) : 0);
-      return result;
-    }
-
-    private static final long serialVersionUID = 201312021432L;
-  }
-
-  @Override
-  public void checkpointed(long windowId)
-  {
-    /* dont do anything */
-  }
-
-  @Override
-  public void committed(long windowId)
-  {
-    if (!connected) {
-      return;
-    }
-
-    synchronized (recoveryAddresses) {
-      byte[] addr = null;
-
-      Iterator<RecoveryAddress> iterator = recoveryAddresses.iterator();
-      while (iterator.hasNext()) {
-        RecoveryAddress ra = iterator.next();
-        if (ra.windowId > windowId) {
-          break;
-        }
-
-        iterator.remove();
-        if (ra.address != null) {
-          addr = ra.address;
-        }
-      }
-
-      if (addr != null) {
-        /*
-         * Make sure that we store the last valid address processed
-         */
-        if (recoveryAddresses.isEmpty()) {
-          RecoveryAddress ra = new RecoveryAddress();
-          ra.address = addr;
-          recoveryAddresses.add(ra);
-        }
-
-        int arraySize = 1/* for the type of the message */
-            + 8 /* for the location to commit */
-            + 8 /* for storing the current time stamp*/;
-        byte[] array = new byte[arraySize];
-
-        array[0] = Command.COMMITTED.getOrdinal();
-        System.arraycopy(addr, 0, array, 1, 8);
-        Server.writeLong(array, Request.TIME_OFFSET, System.currentTimeMillis());
-        logger.debug("wrote {} with recoveryOffset = {}", Command.COMMITTED, Arrays.toString(addr));
-        client.write(array);
-      }
-    }
-  }
-
-  @Override
-  public Collection<Partition<AbstractFlumeInputOperator<T>>> definePartitions(
-      Collection<Partition<AbstractFlumeInputOperator<T>>> partitions, PartitioningContext context)
-  {
-    Collection<Service<byte[]>> discovered = discoveredFlumeSinks.get();
-    if (discovered == null) {
-      return partitions;
-    }
-
-    HashMap<String, ArrayList<RecoveryAddress>> allRecoveryAddresses = abandonedRecoveryAddresses.get();
-    ArrayList<String> allConnectAddresses = new ArrayList<String>(partitions.size());
-    for (Partition<AbstractFlumeInputOperator<T>> partition: partitions) {
-      String[] lAddresses = partition.getPartitionedInstance().connectionSpecs;
-      allConnectAddresses.addAll(Arrays.asList(lAddresses));
-      for (int i = lAddresses.length; i-- > 0;) {
-        String[] parts = lAddresses[i].split(":", 2);
-        allRecoveryAddresses.put(parts[0], partition.getPartitionedInstance().recoveryAddresses);
-      }
-    }
-
-    HashMap<String, String> connections = new HashMap<String, String>(discovered.size());
-    for (Service<byte[]> service: discovered) {
-      String previousSpec = connections.get(service.getId());
-      String newspec = service.getId() + ':' + service.getHost() + ':' + service.getPort();
-      if (previousSpec == null) {
-        connections.put(service.getId(), newspec);
-      } else {
-        boolean found = false;
-        for (ConnectionStatus cs: partitionedInstanceStatus.get().values()) {
-          if (previousSpec.equals(cs.spec) && !cs.connected) {
-            connections.put(service.getId(), newspec);
-            found = true;
-            break;
-          }
-        }
-
-        if (!found) {
-          logger.warn("2 sinks found with the same id: {} and {}... Ignoring previous.", previousSpec, newspec);
-          connections.put(service.getId(), newspec);
-        }
-      }
-    }
-
-    for (int i = allConnectAddresses.size(); i-- > 0;) {
-      String[] parts = allConnectAddresses.get(i).split(":");
-      String connection = connections.remove(parts[0]);
-      if (connection == null) {
-        allConnectAddresses.remove(i);
-      } else {
-        allConnectAddresses.set(i, connection);
-      }
-    }
-
-    allConnectAddresses.addAll(connections.values());
-
-    partitions.clear();
-    try {
-      if (allConnectAddresses.isEmpty()) {
-        /* return at least one of them; otherwise stram becomes grumpy */
-        @SuppressWarnings("unchecked")
-        AbstractFlumeInputOperator<T> operator = getClass().newInstance();
-        operator.setCodec(codec);
-        operator.setMaxEventsPerSecond(maxEventsPerSecond);
-        for (ArrayList<RecoveryAddress> lRecoveryAddresses: allRecoveryAddresses.values()) {
-          operator.recoveryAddresses.addAll(lRecoveryAddresses);
-        }
-        operator.connectionSpecs = new String[allConnectAddresses.size()];
-        for (int i = connectionSpecs.length; i-- > 0;) {
-          connectionSpecs[i] = allConnectAddresses.get(i);
-        }
-
-        partitions.add(new DefaultPartition<AbstractFlumeInputOperator<T>>(operator));
-      } else {
-        long maxEventsPerSecondPerOperator = maxEventsPerSecond / allConnectAddresses.size();
-        for (int i = allConnectAddresses.size(); i-- > 0;) {
-          @SuppressWarnings("unchecked")
-          AbstractFlumeInputOperator<T> operator = getClass().newInstance();
-          operator.setCodec(codec);
-          operator.setMaxEventsPerSecond(maxEventsPerSecondPerOperator);
-          String connectAddress = allConnectAddresses.get(i);
-          operator.connectionSpecs = new String[] {connectAddress};
-
-          String[] parts = connectAddress.split(":", 2);
-          ArrayList<RecoveryAddress> remove = allRecoveryAddresses.remove(parts[0]);
-          if (remove != null) {
-            operator.recoveryAddresses.addAll(remove);
-          }
-
-          partitions.add(new DefaultPartition<AbstractFlumeInputOperator<T>>(operator));
-        }
-      }
-    } catch (IllegalAccessException ex) {
-      throw new RuntimeException(ex);
-    } catch (InstantiationException ex) {
-      throw new RuntimeException(ex);
-    }
-
-    logger.debug("Requesting partitions: {}", partitions);
-    return partitions;
-  }
-
-  @Override
-  public void partitioned(Map<Integer, Partition<AbstractFlumeInputOperator<T>>> partitions)
-  {
-    logger.debug("Partitioned Map: {}", partitions);
-    HashMap<Integer, ConnectionStatus> map = partitionedInstanceStatus.get();
-    map.clear();
-    for (Entry<Integer, Partition<AbstractFlumeInputOperator<T>>> entry: partitions.entrySet()) {
-      if (map.containsKey(entry.getKey())) {
-        // what can be done here?
-      } else {
-        map.put(entry.getKey(), null);
-      }
-    }
-  }
-
-  @Override
-  public String toString()
-  {
-    return "AbstractFlumeInputOperator{" + "connected=" + connected + ", connectionSpecs=" +
-        (connectionSpecs.length == 0 ? "empty" : connectionSpecs[0]) + ", recoveryAddresses=" + recoveryAddresses + '}';
-  }
-
-  class Client extends AbstractLengthPrependerClient
-  {
-    private final String id;
-
-    Client(String id)
-    {
-      this.id = id;
-    }
-
-    @Override
-    public void onMessage(byte[] buffer, int offset, int size)
-    {
-      try {
-        handoverBuffer.put(new Slice(buffer, offset, size));
-      } catch (InterruptedException ex) {
-        handleException(ex, eventloop);
-      }
-    }
-
-    @Override
-    public void connected()
-    {
-      super.connected();
-
-      byte[] address;
-      synchronized (recoveryAddresses) {
-        if (recoveryAddresses.size() > 0) {
-          address = recoveryAddresses.get(recoveryAddresses.size() - 1).address;
-        } else {
-          address = new byte[8];
-        }
-      }
-
-      int len = 1 /* for the message type SEEK */
-          + 8 /* for the address */
-          + 8 /* for storing the current time stamp*/;
-
-      byte[] array = new byte[len];
-      array[0] = Command.SEEK.getOrdinal();
-      System.arraycopy(address, 0, array, 1, 8);
-      Server.writeLong(array, 9, System.currentTimeMillis());
-      write(array);
-
-      connected = true;
-      ConnectionStatus connectionStatus = new ConnectionStatus();
-      connectionStatus.connected = true;
-      connectionStatus.spec = connectionSpecs[0];
-      OperatorContext ctx = context;
-      synchronized (ctx) {
-        logger.debug("{} Submitting ConnectionStatus = {}", AbstractFlumeInputOperator.this, connectionStatus);
-        context.setCounters(connectionStatus);
-      }
-    }
-
-    @Override
-    public void disconnected()
-    {
-      connected = false;
-      ConnectionStatus connectionStatus = new ConnectionStatus();
-      connectionStatus.connected = false;
-      connectionStatus.spec = connectionSpecs[0];
-      OperatorContext ctx = context;
-      synchronized (ctx) {
-        logger.debug("{} Submitting ConnectionStatus = {}", AbstractFlumeInputOperator.this, connectionStatus);
-        context.setCounters(connectionStatus);
-      }
-      super.disconnected();
-    }
-
-  }
-
-  public static class ZKStatsListner extends ZKAssistedDiscovery implements com.datatorrent.api.StatsListener,
-      Serializable
-  {
-    /*
-     * In the current design, one input operator is able to connect
-     * to only one flume adapter. Sometime in future, we should support
-     * any number of input operators connecting to any number of flume
-     * sinks and vice a versa.
-     *
-     * Until that happens the following map should be sufficient to
-     * keep track of which input operator is connected to which flume sink.
-     */
-    long intervalMillis;
-    private final Response response;
-    private transient long nextMillis;
-
-    public ZKStatsListner()
-    {
-      intervalMillis = 60 * 1000L;
-      response = new Response();
-    }
-
-    @Override
-    public Response processStats(BatchedOperatorStats stats)
-    {
-      final HashMap<Integer, ConnectionStatus> map = partitionedInstanceStatus.get();
-      response.repartitionRequired = false;
-
-      Object lastStat = null;
-      List<OperatorStats> lastWindowedStats = stats.getLastWindowedStats();
-      for (OperatorStats os: lastWindowedStats) {
-        if (os.counters != null) {
-          lastStat = os.counters;
-          logger.debug("Received custom stats = {}", lastStat);
-        }
-      }
-
-      if (lastStat instanceof ConnectionStatus) {
-        ConnectionStatus cs = (ConnectionStatus)lastStat;
-        map.put(stats.getOperatorId(), cs);
-        if (!cs.connected) {
-          logger.debug("setting repatitioned = true because of lastStat = {}", lastStat);
-          response.repartitionRequired = true;
-        }
-      }
-
-      if (System.currentTimeMillis() >= nextMillis) {
-        logger.debug("nextMillis = {}", nextMillis);
-        try {
-          super.setup(null);
-          Collection<Service<byte[]>> addresses;
-          try {
-            addresses = discover();
-          } finally {
-            super.teardown();
-          }
-          AbstractFlumeInputOperator.discoveredFlumeSinks.set(addresses);
-          logger.debug("\ncurrent map = {}\ndiscovered sinks = {}", map, addresses);
-          switch (addresses.size()) {
-            case 0:
-              response.repartitionRequired = map.size() != 1;
-              break;
-
-            default:
-              if (addresses.size() == map.size()) {
-                for (ConnectionStatus value: map.values()) {
-                  if (value == null || !value.connected) {
-                    response.repartitionRequired = true;
-                    break;
-                  }
-                }
-              } else {
-                response.repartitionRequired = true;
-              }
-              break;
-          }
-        } catch (Error er) {
-          throw er;
-        } catch (Throwable cause) {
-          logger.warn("Unable to discover services, using values from last successful discovery", cause);
-        } finally {
-          nextMillis = System.currentTimeMillis() + intervalMillis;
-          logger.debug("Proposed NextMillis = {}", nextMillis);
-        }
-      }
-
-      return response;
-    }
-
-    /**
-     * @return the intervalMillis
-     */
-    public long getIntervalMillis()
-    {
-      return intervalMillis;
-    }
-
-    /**
-     * @param intervalMillis the intervalMillis to set
-     */
-    public void setIntervalMillis(long intervalMillis)
-    {
-      this.intervalMillis = intervalMillis;
-    }
-
-    private static final long serialVersionUID = 201312241646L;
-  }
-
-  public static class ConnectionStatus implements Serializable
-  {
-    int id;
-    String spec;
-    boolean connected;
-
-    @Override
-    public int hashCode()
-    {
-      return spec.hashCode();
-    }
-
-    @Override
-    public boolean equals(Object obj)
-    {
-      if (obj == null) {
-        return false;
-      }
-      if (getClass() != obj.getClass()) {
-        return false;
-      }
-      final ConnectionStatus other = (ConnectionStatus)obj;
-      return spec == null ? other.spec == null : spec.equals(other.spec);
-    }
-
-    @Override
-    public String toString()
-    {
-      return "ConnectionStatus{" + "id=" + id + ", spec=" + spec + ", connected=" + connected + '}';
-    }
-
-    private static final long serialVersionUID = 201312261615L;
-  }
-
-  private static final transient ThreadLocal<HashMap<Integer, ConnectionStatus>> partitionedInstanceStatus =
-      new ThreadLocal<HashMap<Integer, ConnectionStatus>>()
-    {
-      @Override
-      protected HashMap<Integer, ConnectionStatus> initialValue()
-      {
-        return new HashMap<Integer, ConnectionStatus>();
-      }
-
-    };
-  /**
-   * When a sink goes away and a replacement sink is not found, we stash the recovery addresses associated
-   * with the sink in a hope that the new sink may show up in near future.
-   */
-  private static final transient ThreadLocal<HashMap<String, ArrayList<RecoveryAddress>>> abandonedRecoveryAddresses =
-      new ThreadLocal<HashMap<String, ArrayList<RecoveryAddress>>>()
-  {
-    @Override
-    protected HashMap<String, ArrayList<RecoveryAddress>> initialValue()
-    {
-      return new HashMap<String, ArrayList<RecoveryAddress>>();
-    }
-
-  };
-  private static final transient ThreadLocal<Collection<Service<byte[]>>> discoveredFlumeSinks =
-      new ThreadLocal<Collection<Service<byte[]>>>();
-
-  @Override
-  public boolean equals(Object o)
-  {
-    if (this == o) {
-      return true;
-    }
-    if (!(o instanceof AbstractFlumeInputOperator)) {
-      return false;
-    }
-
-    AbstractFlumeInputOperator<?> that = (AbstractFlumeInputOperator<?>)o;
-
-    if (!Arrays.equals(connectionSpecs, that.connectionSpecs)) {
-      return false;
-    }
-    return recoveryAddresses.equals(that.recoveryAddresses);
-
-  }
-
-  @Override
-  public int hashCode()
-  {
-    int result = connectionSpecs != null ? Arrays.hashCode(connectionSpecs) : 0;
-    result = 31 * result + (recoveryAddresses.hashCode());
-    return result;
-  }
-
-  public void setMaxEventsPerSecond(long maxEventsPerSecond)
-  {
-    this.maxEventsPerSecond = maxEventsPerSecond;
-  }
-
-  public long getMaxEventsPerSecond()
-  {
-    return maxEventsPerSecond;
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(AbstractFlumeInputOperator.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java b/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java
deleted file mode 100644
index 55d3d61..0000000
--- a/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java
+++ /dev/null
@@ -1,572 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.sink;
-
-import java.io.IOError;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.ServiceConfigurationError;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flume.Context;
-import org.apache.flume.Event;
-import org.apache.flume.EventDeliveryException;
-import org.apache.flume.Transaction;
-import org.apache.flume.conf.Configurable;
-import org.apache.flume.sink.AbstractSink;
-
-import com.datatorrent.api.Component;
-import com.datatorrent.api.StreamCodec;
-import com.datatorrent.flume.discovery.Discovery;
-import com.datatorrent.flume.sink.Server.Client;
-import com.datatorrent.flume.sink.Server.Request;
-import com.datatorrent.flume.storage.EventCodec;
-import com.datatorrent.flume.storage.Storage;
-import com.datatorrent.netlet.DefaultEventLoop;
-import com.datatorrent.netlet.NetletThrowable;
-import com.datatorrent.netlet.NetletThrowable.NetletRuntimeException;
-import com.datatorrent.netlet.util.Slice;
-
-/**
- * DTFlumeSink is a flume sink developed to ingest the data into DataTorrent DAG
- * from flume. It's essentially a flume sink which acts as a server capable of
- * talking to one client at a time. The client for this server is AbstractFlumeInputOperator.
- * <p />
- * &lt;experimental&gt;DTFlumeSink auto adjusts the rate at which it consumes the data from channel to
- * match the throughput of the DAG.&lt;/experimental&gt;
- * <p />
- * The properties you can set on the DTFlumeSink are: <br />
- * id - string unique value identifying this sink <br />
- * hostname - string value indicating the fqdn or ip address of the interface on which the server should listen <br />
- * port - integer value indicating the numeric port to which the server should bind <br />
- * sleepMillis - integer value indicating the number of milliseconds the process should sleep when there are no events
- * before checking for next event again <br />
- * throughputAdjustmentPercent - integer value indicating by what percentage the flume transaction size should be
- * adjusted upward or downward at a time <br />
- * minimumEventsPerTransaction - integer value indicating the minimum number of events per transaction <br />
- * maximumEventsPerTransaction - integer value indicating the maximum number of events per transaction. This value can
- * not be more than channel's transaction capacity.<br />
- *
- * @since 0.9.2
- */
-public class DTFlumeSink extends AbstractSink implements Configurable
-{
-  private static final String HOSTNAME_STRING = "hostname";
-  private static final String HOSTNAME_DEFAULT = "locahost";
-  private static final long ACCEPTED_TOLERANCE = 20000;
-  private DefaultEventLoop eventloop;
-  private Server server;
-  private int outstandingEventsCount;
-  private int lastConsumedEventsCount;
-  private int idleCount;
-  private byte[] playback;
-  private Client client;
-  private String hostname;
-  private int port;
-  private String id;
-  private long acceptedTolerance;
-  private long sleepMillis;
-  private double throughputAdjustmentFactor;
-  private int minimumEventsPerTransaction;
-  private int maximumEventsPerTransaction;
-  private long commitEventTimeoutMillis;
-  private transient long lastCommitEventTimeMillis;
-  private Storage storage;
-  Discovery<byte[]> discovery;
-  StreamCodec<Event> codec;
-  /* Begin implementing Flume Sink interface */
-
-  @Override
-  @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch", "UseSpecificCatch", "SleepWhileInLoop"})
-  public Status process() throws EventDeliveryException
-  {
-    Slice slice;
-    synchronized (server.requests) {
-      for (Request r : server.requests) {
-        logger.debug("found {}", r);
-        switch (r.type) {
-          case SEEK:
-            lastCommitEventTimeMillis = System.currentTimeMillis();
-            slice = r.getAddress();
-            playback = storage.retrieve(Arrays.copyOfRange(slice.buffer, slice.offset, slice.offset + slice.length));
-            client = r.client;
-            break;
-
-          case COMMITTED:
-            lastCommitEventTimeMillis = System.currentTimeMillis();
-            slice = r.getAddress();
-            storage.clean(Arrays.copyOfRange(slice.buffer, slice.offset, slice.offset + slice.length));
-            break;
-
-          case CONNECTED:
-            logger.debug("Connected received, ignoring it!");
-            break;
-
-          case DISCONNECTED:
-            if (r.client == client) {
-              client = null;
-              outstandingEventsCount = 0;
-            }
-            break;
-
-          case WINDOWED:
-            lastConsumedEventsCount = r.getEventCount();
-            idleCount = r.getIdleCount();
-            outstandingEventsCount -= lastConsumedEventsCount;
-            break;
-
-          case SERVER_ERROR:
-            throw new IOError(null);
-
-          default:
-            logger.debug("Cannot understand the request {}", r);
-            break;
-        }
-      }
-
-      server.requests.clear();
-    }
-
-    if (client == null) {
-      logger.info("No client expressed interest yet to consume the events.");
-      return Status.BACKOFF;
-    } else if (System.currentTimeMillis() - lastCommitEventTimeMillis > commitEventTimeoutMillis) {
-      logger.info("Client has not processed the workload given for the last {} milliseconds, so backing off.",
-          System.currentTimeMillis() - lastCommitEventTimeMillis);
-      return Status.BACKOFF;
-    }
-
-    int maxTuples;
-    // the following logic needs to be fixed... this is a quick put together.
-    if (outstandingEventsCount < 0) {
-      if (idleCount > 1) {
-        maxTuples = (int)((1 + throughputAdjustmentFactor * idleCount) * lastConsumedEventsCount);
-      } else {
-        maxTuples = (int)((1 + throughputAdjustmentFactor) * lastConsumedEventsCount);
-      }
-    } else if (outstandingEventsCount > lastConsumedEventsCount) {
-      maxTuples = (int)((1 - throughputAdjustmentFactor) * lastConsumedEventsCount);
-    } else {
-      if (idleCount > 0) {
-        maxTuples = (int)((1 + throughputAdjustmentFactor * idleCount) * lastConsumedEventsCount);
-        if (maxTuples <= 0) {
-          maxTuples = minimumEventsPerTransaction;
-        }
-      } else {
-        maxTuples = lastConsumedEventsCount;
-      }
-    }
-
-    if (maxTuples >= maximumEventsPerTransaction) {
-      maxTuples = maximumEventsPerTransaction;
-    } else if (maxTuples <= 0) {
-      maxTuples = minimumEventsPerTransaction;
-    }
-
-    if (maxTuples > 0) {
-      if (playback != null) {
-        try {
-          int i = 0;
-          do {
-            if (!client.write(playback)) {
-              retryWrite(playback, null);
-            }
-            outstandingEventsCount++;
-            playback = storage.retrieveNext();
-          }
-          while (++i < maxTuples && playback != null);
-        } catch (Exception ex) {
-          logger.warn("Playback Failed", ex);
-          if (ex instanceof NetletThrowable) {
-            try {
-              eventloop.disconnect(client);
-            } finally {
-              client = null;
-              outstandingEventsCount = 0;
-            }
-          }
-          return Status.BACKOFF;
-        }
-      } else {
-        int storedTuples = 0;
-
-        Transaction t = getChannel().getTransaction();
-        try {
-          t.begin();
-
-          Event e;
-          while (storedTuples < maxTuples && (e = getChannel().take()) != null) {
-            Slice event = codec.toByteArray(e);
-            byte[] address = storage.store(event);
-            if (address != null) {
-              if (!client.write(address, event)) {
-                retryWrite(address, event);
-              }
-              outstandingEventsCount++;
-            } else {
-              logger.debug("Detected the condition of recovery from flume crash!");
-            }
-            storedTuples++;
-          }
-
-          if (storedTuples > 0) {
-            storage.flush();
-          }
-
-          t.commit();
-
-          if (storedTuples > 0) { /* log less frequently */
-            logger.debug("Transaction details maxTuples = {}, storedTuples = {}, outstanding = {}",
-                maxTuples, storedTuples, outstandingEventsCount);
-          }
-        } catch (Error er) {
-          t.rollback();
-          throw er;
-        } catch (Exception ex) {
-          logger.error("Transaction Failed", ex);
-          if (ex instanceof NetletRuntimeException && client != null) {
-            try {
-              eventloop.disconnect(client);
-            } finally {
-              client = null;
-              outstandingEventsCount = 0;
-            }
-          }
-          t.rollback();
-          return Status.BACKOFF;
-        } finally {
-          t.close();
-        }
-
-        if (storedTuples == 0) {
-          sleep();
-        }
-      }
-    }
-
-    return Status.READY;
-  }
-
-  private void sleep()
-  {
-    try {
-      Thread.sleep(sleepMillis);
-    } catch (InterruptedException ex) {
-      Thread.currentThread().interrupt();
-    }
-  }
-
-  @Override
-  public void start()
-  {
-    try {
-      if (storage instanceof Component) {
-        @SuppressWarnings("unchecked")
-        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)storage;
-        component.setup(null);
-      }
-      if (discovery instanceof Component) {
-        @SuppressWarnings("unchecked")
-        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)discovery;
-        component.setup(null);
-      }
-      if (codec instanceof Component) {
-        @SuppressWarnings("unchecked")
-        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)codec;
-        component.setup(null);
-      }
-      eventloop = new DefaultEventLoop("EventLoop-" + id);
-      server = new Server(id, discovery,acceptedTolerance);
-    } catch (Error error) {
-      throw error;
-    } catch (RuntimeException re) {
-      throw re;
-    } catch (IOException ex) {
-      throw new RuntimeException(ex);
-    }
-
-    eventloop.start();
-    eventloop.start(hostname, port, server);
-    super.start();
-  }
-
-  @Override
-  public void stop()
-  {
-    try {
-      super.stop();
-    } finally {
-      try {
-        if (client != null) {
-          eventloop.disconnect(client);
-          client = null;
-        }
-
-        eventloop.stop(server);
-        eventloop.stop();
-
-        if (codec instanceof Component) {
-          @SuppressWarnings("unchecked")
-          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)codec;
-          component.teardown();
-        }
-        if (discovery instanceof Component) {
-          @SuppressWarnings("unchecked")
-          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)discovery;
-          component.teardown();
-        }
-        if (storage instanceof Component) {
-          @SuppressWarnings("unchecked")
-          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)storage;
-          component.teardown();
-        }
-      } catch (Throwable cause) {
-        throw new ServiceConfigurationError("Failed Stop", cause);
-      }
-    }
-  }
-
-  /* End implementing Flume Sink interface */
-
-  /* Begin Configurable Interface */
-  @Override
-  public void configure(Context context)
-  {
-    hostname = context.getString(HOSTNAME_STRING, HOSTNAME_DEFAULT);
-    port = context.getInteger("port", 0);
-    id = context.getString("id");
-    if (id == null) {
-      id = getName();
-    }
-    acceptedTolerance = context.getLong("acceptedTolerance", ACCEPTED_TOLERANCE);
-    sleepMillis = context.getLong("sleepMillis", 5L);
-    throughputAdjustmentFactor = context.getInteger("throughputAdjustmentPercent", 5) / 100.0;
-    maximumEventsPerTransaction = context.getInteger("maximumEventsPerTransaction", 10000);
-    minimumEventsPerTransaction = context.getInteger("minimumEventsPerTransaction", 100);
-    commitEventTimeoutMillis = context.getLong("commitEventTimeoutMillis", Long.MAX_VALUE);
-
-    @SuppressWarnings("unchecked")
-    Discovery<byte[]> ldiscovery = configure("discovery", Discovery.class, context);
-    if (ldiscovery == null) {
-      logger.warn("Discovery agent not configured for the sink!");
-      discovery = new Discovery<byte[]>()
-      {
-        @Override
-        public void unadvertise(Service<byte[]> service)
-        {
-          logger.debug("Sink {} stopped listening on {}:{}", service.getId(), service.getHost(), service.getPort());
-        }
-
-        @Override
-        public void advertise(Service<byte[]> service)
-        {
-          logger.debug("Sink {} started listening on {}:{}", service.getId(), service.getHost(), service.getPort());
-        }
-
-        @Override
-        @SuppressWarnings("unchecked")
-        public Collection<Service<byte[]>> discover()
-        {
-          return Collections.EMPTY_SET;
-        }
-
-      };
-    } else {
-      discovery = ldiscovery;
-    }
-
-    storage = configure("storage", Storage.class, context);
-    if (storage == null) {
-      logger.warn("storage key missing... DTFlumeSink may lose data!");
-      storage = new Storage()
-      {
-        @Override
-        public byte[] store(Slice slice)
-        {
-          return null;
-        }
-
-        @Override
-        public byte[] retrieve(byte[] identifier)
-        {
-          return null;
-        }
-
-        @Override
-        public byte[] retrieveNext()
-        {
-          return null;
-        }
-
-        @Override
-        public void clean(byte[] identifier)
-        {
-        }
-
-        @Override
-        public void flush()
-        {
-        }
-
-      };
-    }
-
-    @SuppressWarnings("unchecked")
-    StreamCodec<Event> lCodec = configure("codec", StreamCodec.class, context);
-    if (lCodec == null) {
-      codec = new EventCodec();
-    } else {
-      codec = lCodec;
-    }
-
-  }
-
-  /* End Configurable Interface */
-
-  @SuppressWarnings({"UseSpecificCatch", "BroadCatchBlock", "TooBroadCatch"})
-  private static <T> T configure(String key, Class<T> clazz, Context context)
-  {
-    String classname = context.getString(key);
-    if (classname == null) {
-      return null;
-    }
-
-    try {
-      Class<?> loadClass = Thread.currentThread().getContextClassLoader().loadClass(classname);
-      if (clazz.isAssignableFrom(loadClass)) {
-        @SuppressWarnings("unchecked")
-        T object = (T)loadClass.newInstance();
-        if (object instanceof Configurable) {
-          Context context1 = new Context(context.getSubProperties(key + '.'));
-          String id = context1.getString(Storage.ID);
-          if (id == null) {
-            id = context.getString(Storage.ID);
-            logger.debug("{} inherited id={} from sink", key, id);
-            context1.put(Storage.ID, id);
-          }
-          ((Configurable)object).configure(context1);
-        }
-
-        return object;
-      } else {
-        logger.error("key class {} does not implement {} interface", classname, Storage.class.getCanonicalName());
-        throw new Error("Invalid storage " + classname);
-      }
-    } catch (Error error) {
-      throw error;
-    } catch (RuntimeException re) {
-      throw re;
-    } catch (Throwable t) {
-      throw new RuntimeException(t);
-    }
-  }
-
-  /**
-   * @return the hostname
-   */
-  String getHostname()
-  {
-    return hostname;
-  }
-
-  /**
-   * @param hostname the hostname to set
-   */
-  void setHostname(String hostname)
-  {
-    this.hostname = hostname;
-  }
-
-  /**
-   * @return the port
-   */
-  int getPort()
-  {
-    return port;
-  }
-
-  public long getAcceptedTolerance()
-  {
-    return acceptedTolerance;
-  }
-
-  public void setAcceptedTolerance(long acceptedTolerance)
-  {
-    this.acceptedTolerance = acceptedTolerance;
-  }
-
-  /**
-   * @param port the port to set
-   */
-  void setPort(int port)
-  {
-    this.port = port;
-  }
-
-  /**
-   * @return the discovery
-   */
-  Discovery<byte[]> getDiscovery()
-  {
-    return discovery;
-  }
-
-  /**
-   * @param discovery the discovery to set
-   */
-  void setDiscovery(Discovery<byte[]> discovery)
-  {
-    this.discovery = discovery;
-  }
-
-  /**
-   * Attempt the sequence of writing after sleeping twice and upon failure assume
-   * that the client connection has problems and hence close it.
-   *
-   * @param address
-   * @param e
-   * @throws IOException
-   */
-  private void retryWrite(byte[] address, Slice event) throws IOException
-  {
-    if (event == null) {  /* this happens for playback where address and event are sent as single object */
-      while (client.isConnected()) {
-        sleep();
-        if (client.write(address)) {
-          return;
-        }
-      }
-    } else {  /* this happens when the events are taken from the flume channel and writing first time failed */
-      while (client.isConnected()) {
-        sleep();
-        if (client.write(address, event)) {
-          return;
-        }
-      }
-    }
-
-    throw new IOException("Client disconnected!");
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(DTFlumeSink.class);
-}


[12/13] apex-malhar git commit: Add and test ApplicationTests & complete README

Posted by th...@apache.org.
Add and test ApplicationTests & complete README


Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo
Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/c48ec8c5
Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/c48ec8c5
Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/c48ec8c5

Branch: refs/heads/master
Commit: c48ec8c51f91c7cd3e24d1fed62cd7072af57865
Parents: d200737
Author: Oliver Winke <ol...@datatorrent.com>
Authored: Tue Mar 14 12:12:07 2017 -0700
Committer: Pramod Immaneni <pr...@datatorrent.com>
Committed: Mon May 22 16:47:34 2017 -0700

----------------------------------------------------------------------
 flume/README.md                                 |  73 +++++++++
 flume/pom.xml                                   |  18 +++
 .../ColumnFilteringFormattingInterceptor.java   |   2 +-
 .../operator/AbstractFlumeInputOperator.java    |   8 +-
 .../apex/malhar/flume/sink/DTFlumeSink.java     |  10 +-
 .../apache/apex/malhar/flume/sink/Server.java   |   3 +-
 .../apex/malhar/flume/storage/HDFSStorage.java  |   3 +-
 .../discovery/ZKAssistedDiscoveryTest.java      |   4 +-
 .../integration/ApplicationDiscoveryTest.java   | 151 +++++++++++++++++++
 .../flume/integration/ApplicationTest.java      |  26 +++-
 .../apex/malhar/flume/sink/DTFlumeSinkTest.java |   3 +-
 .../resources/flume/conf/flume-conf.properties  |   4 +-
 .../test/resources/flume/conf/flume_simple.conf |  52 +++++++
 .../resources/flume/conf/flume_zkdiscovery.conf |  91 +++++++++++
 14 files changed, 425 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/README.md
----------------------------------------------------------------------
diff --git a/flume/README.md b/flume/README.md
index 1d0b2d9..ec8fae9 100644
--- a/flume/README.md
+++ b/flume/README.md
@@ -4,3 +4,76 @@ Flume
 The folder contains support for flume to be used with Apex. It comprises mainly of two components. First is an agent that sits on the flume side, receives data from flume and makes it available via a socket server. In effect it converts a push to a pull model. The second component is the input operator that reads from the agent.
 
 The project is started with the latest code at the time of the sub-module creation. For older history look at the flume sub-module in the older project called Megh (git@github.com:DataTorrent/Megh).
+
+
+## Setup flume agent:
+
+To set up the flume agent for Apex input operator, flumes plugin-based
+architecture is used.
+
+Set up flume and make sure JAVA_HOME is set.
+
+Build malhar-flume `mvn clean package -DskipTests`.
+The plugin `malhar-flume-ver.jar` and all necessary dependencies `target/deps` can now be found in the target directory.
+To add the plugin to your flume service create a plugins.d directories in FLUME_HOME.
+
+Put the malhar-flume-ver.jar in `plugins.d/custom-plugin-name/lib/`
+and all the needed dependencies into `plugins.d/custom-plugin-name/libext/`
+
+(Alternatively to flume's automatic plugins.d detection, jars can be added to the
+FLUME_CLASSPATH using a `flume-env.sh` script. (See 'resources/flume-conf/flume-env.sample.sh')
+Therefore a maven repository must be available under $HOME/.m2 and the environment variable
+DT_FLUME_JAR must point to the plugin JAR.)
+
+***Flume configuration***  
+A basic flume configuration can be found in `src/test/resources/flume/conf/flume_simple.conf`.  
+A flume configuration using discovery service can be found in `src/test/resources/flume/conf/flume_zkdiscovery.conf`.  
+  Configuration files should be placed in flumes 'conf' directory and will be explicitly selected
+  when running flume-ng
+
+In the configuration file set `org.apache.apex.malhar.flume.sink.DTFlumeSink` for the **type**  
+and `org.apache.apex.malhar.flume.storage.HDFSStorage` for the **storage**,  
+as well as a **HDFS directory** for `baseDir`. The HDFS base directory needs
+to be created on HDFS.
+
+For discovery set `org.apache.apex.malhar.flume.discovery.ZKAssistedDiscovery` for each sink
+and configure them to use the zookeeper service by adding the zookeeper address in `connectionString` as well as a `basePath`.
+These values also need to be set for **ZKListener** in the apex application.
+
+### Operator Usage
+
+An implementation of AbstractFlumeInputOperator can either simply connect
+to one flume sink or use discovery/zookeeper to detect flume sinks automatically
+and partition the operator accordingly at the beginning.
+
+Implement abstract method to convert the Flume event to tuple:
+```java
+public abstract T convert(Event event);
+```
+
+Additionally a StreamCodec for Flume events must be set. A codec implementation
+ can be found in storage/EventCodec.java
+```java
+setCodec(new EventCodec());
+```
+
+See `ApplicationDiscoveryTest.FlumeInputOperator` for an example operator implementation
+##### Simple connection setup to one flume sink:
+For a simple connection to only one flume sink set the connection address in the form of `sinkid:host:port`:
+```java
+public void setConnectAddresses(String[] specs)
+```
+
+
+##### Setup using discovery/zookeeper:
+For a flume input operator to discover flume sinks and partition accordingly
+a zookeeper service needs to be set up.
+
+An implementation of AbstractFlumeInputOperator needs to initialize a ZKStatsListener.
+It additionally needs to override **definePartitions** to setup ZKStatsListener, discover addresses using discover()
+and set them in discoveredFlumeSinks before calling the parents definePartitions method.
+
+
+See `src/test/java/org/apache/apex/malhar/flume/integration/ApplicationDiscoveryTest.java`
+and `src/test/java/org/apache/apex/malhar/flume/integration/ApplicationTest.java`
+for test implementations.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/pom.xml
----------------------------------------------------------------------
diff --git a/flume/pom.xml b/flume/pom.xml
index 735a13b..851697e 100644
--- a/flume/pom.xml
+++ b/flume/pom.xml
@@ -175,6 +175,24 @@
           <argLine>-Xmx5000M</argLine>
         </configuration>
       </plugin>
+      <plugin>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <version>2.8</version>
+        <executions>
+          <execution>
+            <id>copy-dependencies</id>
+            <phase>prepare-package</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>target/deps</outputDirectory>
+              <includeScope>runtime</includeScope>
+              <excludeGroupIds>org.apache.hadoop</excludeGroupIds>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
   <dependencies>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptor.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptor.java b/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptor.java
index bd7e5e0..11ec3ef 100644
--- a/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptor.java
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptor.java
@@ -212,7 +212,7 @@ public class ColumnFilteringFormattingInterceptor implements Interceptor
           dstSeparators[i] = emptyStringBytes;
         }
       }
-      srcSeparator = context.getInteger(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, (int) ColumnFilteringInterceptor.Constants.SRC_SEPARATOR_DFLT).byteValue();
+      srcSeparator = context.getInteger(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, (int)ColumnFilteringInterceptor.Constants.SRC_SEPARATOR_DFLT).byteValue();
       this.prefix = lPrefix.getBytes();
     }
   }

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java b/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java
index da1a8aa..f9beb71 100644
--- a/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java
@@ -33,11 +33,12 @@ import java.util.concurrent.ArrayBlockingQueue;
 
 import javax.validation.constraints.Min;
 import javax.validation.constraints.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import org.apache.apex.malhar.flume.discovery.Discovery;
+import org.apache.apex.malhar.flume.discovery.ZKAssistedDiscovery;
 import org.apache.apex.malhar.flume.sink.Server;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import org.apache.flume.Event;
 
@@ -50,7 +51,6 @@ import com.datatorrent.api.Operator;
 import com.datatorrent.api.Partitioner;
 import com.datatorrent.api.Stats.OperatorStats;
 import com.datatorrent.api.StreamCodec;
-import org.apache.apex.malhar.flume.discovery.ZKAssistedDiscovery;
 import com.datatorrent.netlet.AbstractLengthPrependerClient;
 import com.datatorrent.netlet.DefaultEventLoop;
 import com.datatorrent.netlet.util.Slice;
@@ -715,7 +715,7 @@ public abstract class AbstractFlumeInputOperator<T>
     }
 
   };
-  private static final transient ThreadLocal<Collection<Discovery.Service<byte[]>>> discoveredFlumeSinks =
+  protected static final transient ThreadLocal<Collection<Discovery.Service<byte[]>>> discoveredFlumeSinks =
       new ThreadLocal<Collection<Discovery.Service<byte[]>>>();
 
   @Override

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java b/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java
index 306ce13..4f28850 100644
--- a/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java
@@ -25,11 +25,14 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.ServiceConfigurationError;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.apex.malhar.flume.discovery.Discovery;
+import org.apache.apex.malhar.flume.sink.Server.Client;
+import org.apache.apex.malhar.flume.sink.Server.Request;
 import org.apache.apex.malhar.flume.storage.EventCodec;
 import org.apache.apex.malhar.flume.storage.Storage;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import org.apache.flume.Context;
 import org.apache.flume.Event;
@@ -40,8 +43,7 @@ import org.apache.flume.sink.AbstractSink;
 
 import com.datatorrent.api.Component;
 import com.datatorrent.api.StreamCodec;
-import org.apache.apex.malhar.flume.sink.Server.Client;
-import org.apache.apex.malhar.flume.sink.Server.Request;
+
 import com.datatorrent.netlet.DefaultEventLoop;
 import com.datatorrent.netlet.NetletThrowable;
 import com.datatorrent.netlet.NetletThrowable.NetletRuntimeException;

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/src/main/java/org/apache/apex/malhar/flume/sink/Server.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/sink/Server.java b/flume/src/main/java/org/apache/apex/malhar/flume/sink/Server.java
index a771cb3..c8a8440 100644
--- a/flume/src/main/java/org/apache/apex/malhar/flume/sink/Server.java
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/sink/Server.java
@@ -25,10 +25,11 @@ import java.nio.channels.SocketChannel;
 import java.util.ArrayList;
 import java.util.Arrays;
 
-import org.apache.apex.malhar.flume.discovery.Discovery;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.apex.malhar.flume.discovery.Discovery;
+
 import com.datatorrent.netlet.AbstractLengthPrependerClient;
 import com.datatorrent.netlet.AbstractServer;
 import com.datatorrent.netlet.EventLoop;

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/src/main/java/org/apache/apex/malhar/flume/storage/HDFSStorage.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/storage/HDFSStorage.java b/flume/src/main/java/org/apache/apex/malhar/flume/storage/HDFSStorage.java
index 77aeb68..54716b7 100644
--- a/flume/src/main/java/org/apache/apex/malhar/flume/storage/HDFSStorage.java
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/storage/HDFSStorage.java
@@ -32,6 +32,8 @@ import javax.validation.constraints.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.apex.malhar.flume.sink.Server;
+
 import org.apache.flume.Context;
 import org.apache.flume.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
@@ -45,7 +47,6 @@ import com.google.common.primitives.Longs;
 
 import com.datatorrent.api.Component;
 import com.datatorrent.common.util.NameableThreadFactory;
-import org.apache.apex.malhar.flume.sink.Server;
 import com.datatorrent.netlet.util.Slice;
 
 /**

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java
index 6503357..9db5d32 100644
--- a/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java
@@ -24,11 +24,11 @@ import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.apex.malhar.flume.discovery.Discovery.Service;
+
 import org.apache.curator.x.discovery.ServiceInstance;
 import org.apache.curator.x.discovery.details.InstanceSerializer;
 
-import com.datatorrent.flume.discovery.Discovery.Service;
-
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertNotNull;
 

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationDiscoveryTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationDiscoveryTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationDiscoveryTest.java
new file mode 100644
index 0000000..5486469
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationDiscoveryTest.java
@@ -0,0 +1,151 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.integration;
+
+import java.util.Collection;
+
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.apex.malhar.flume.discovery.Discovery;
+import org.apache.apex.malhar.flume.operator.AbstractFlumeInputOperator;
+import org.apache.apex.malhar.flume.storage.EventCodec;
+import org.apache.flume.Event;
+import org.apache.hadoop.conf.Configuration;
+
+import com.datatorrent.api.Context.OperatorContext;
+import com.datatorrent.api.DAG;
+import com.datatorrent.api.DAG.Locality;
+import com.datatorrent.api.DefaultInputPort;
+import com.datatorrent.api.LocalMode;
+import com.datatorrent.api.Operator;
+import com.datatorrent.api.StreamingApplication;
+
+/**
+ * baseDir needs to be created in HDFS
+ * Local zookeeper service needs to be running on default 127.0.0.1:2181
+ * Local flume service needs to be running using src/test/resources/flume/conf/flume_zkdiscovery.conf configuration
+ */
+@Ignore
+public class ApplicationDiscoveryTest implements StreamingApplication
+{
+  static int globalCount;
+
+  public static class FlumeInputOperator extends AbstractFlumeInputOperator<Event>
+  {
+    public ZKStatsListner zkListener = new AbstractFlumeInputOperator.ZKStatsListner();
+    private boolean first = true;
+
+
+    @Override
+    public Event convert(Event event)
+    {
+      return event;
+    }
+
+
+    @Override
+    public Collection<Partition<AbstractFlumeInputOperator<Event>>> definePartitions(Collection<Partition<AbstractFlumeInputOperator<Event>>> partitions, PartitioningContext context)
+    {
+      if (first) {
+        first = false;
+        zkListener.setup(null);
+      }
+      Collection<Discovery.Service<byte[]>> addresses;
+      addresses = zkListener.discover();
+      discoveredFlumeSinks.set(addresses);
+
+      return super.definePartitions(partitions, context);
+    }
+  }
+
+  public static class Counter implements Operator
+  {
+    private int count;
+    private transient Event event;
+    public final transient DefaultInputPort<Event> input = new DefaultInputPort<Event>()
+    {
+      @Override
+      public void process(Event tuple)
+      {
+        count++;
+        event = tuple;
+      }
+
+    };
+
+    @Override
+    public void beginWindow(long windowId)
+    {
+    }
+
+    @Override
+    public void endWindow()
+    {
+      if (event != null) {
+        logger.info("total count = {}, tuple = {}", count, new String(event.getBody()));
+      } else {
+        logger.info("total count = {}, tuple = {}", count, event);
+      }
+      globalCount = count;
+    }
+
+    @Override
+    public void setup(OperatorContext context)
+    {
+    }
+
+    @Override
+    public void teardown()
+    {
+    }
+
+    private static final Logger logger = LoggerFactory.getLogger(Counter.class);
+  }
+
+  @Override
+  public void populateDAG(DAG dag, Configuration conf)
+  {
+    dag.setAttribute(com.datatorrent.api.Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS, 1000);
+    FlumeInputOperator flume = dag.addOperator("FlumeOperator", new FlumeInputOperator());
+    flume.setCodec(new EventCodec());
+    flume.zkListener.setConnectionString("127.0.0.1:2181");
+    flume.zkListener.setBasePath("/flume/basepath");
+    Counter counter = dag.addOperator("Counter", new Counter());
+
+    dag.addStream("Slices", flume.output, counter.input).setLocality(Locality.CONTAINER_LOCAL);
+  }
+
+  @Test
+  public void test()
+  {
+    try {
+      LocalMode.runApp(this, 10000);
+    } catch (Exception ex) {
+      logger.warn("The dag seems to be not testable yet, if it's - remove this exception handling", ex);
+    }
+    //flume source sequence generator is set to 10 in flume configuration going to two source -> 20
+    Assert.assertEquals(20, globalCount);
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(ApplicationDiscoveryTest.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationTest.java
index 10153bc..67c911c 100644
--- a/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationTest.java
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationTest.java
@@ -18,11 +18,15 @@
  */
 package org.apache.apex.malhar.flume.integration;
 
+import org.junit.Assert;
 import org.junit.Ignore;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.apex.malhar.flume.operator.AbstractFlumeInputOperator;
+import org.apache.apex.malhar.flume.storage.EventCodec;
+
 import org.apache.flume.Event;
 import org.apache.hadoop.conf.Configuration;
 
@@ -33,15 +37,17 @@ import com.datatorrent.api.DefaultInputPort;
 import com.datatorrent.api.LocalMode;
 import com.datatorrent.api.Operator;
 import com.datatorrent.api.StreamingApplication;
-import com.datatorrent.flume.operator.AbstractFlumeInputOperator;
-import com.datatorrent.flume.storage.EventCodec;
+
 
 /**
- *
+ * baseDir needs to be created in HDFS
+ * Local zookeeper service needs to be running on default 127.0.0.1:2181
+ * Local flume service needs to be running using src/test/resources/flume/conf/flume_simple.conf configuration
  */
 @Ignore
 public class ApplicationTest implements StreamingApplication
 {
+  static int globalCount;
   public static class FlumeInputOperator extends AbstractFlumeInputOperator<Event>
   {
     @Override
@@ -74,7 +80,12 @@ public class ApplicationTest implements StreamingApplication
     @Override
     public void endWindow()
     {
-      logger.debug("total count = {}, tuple = {}", count, event);
+      if (event != null) {
+        logger.info("total count = {}, tuple = {}", count, new String(event.getBody()));
+      } else {
+        logger.info("total count = {}, tuple = {}", count, event);
+      }
+      globalCount = count;
     }
 
     @Override
@@ -95,7 +106,7 @@ public class ApplicationTest implements StreamingApplication
   {
     dag.setAttribute(com.datatorrent.api.Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS, 1000);
     FlumeInputOperator flume = dag.addOperator("FlumeOperator", new FlumeInputOperator());
-    flume.setConnectAddresses(new String[]{"test:127.0.0.1:8080"});
+    flume.setConnectAddresses(new String[]{"sink1:127.0.0.1:9098"});
     flume.setCodec(new EventCodec());
     Counter counter = dag.addOperator("Counter", new Counter());
 
@@ -106,11 +117,12 @@ public class ApplicationTest implements StreamingApplication
   public void test()
   {
     try {
-      LocalMode.runApp(this, Integer.MAX_VALUE);
+      LocalMode.runApp(this, 10000);
     } catch (Exception ex) {
       logger.warn("The dag seems to be not testable yet, if it's - remove this exception handling", ex);
     }
-
+    //flume source sequence generator is set to 10 in flume configuration
+    Assert.assertEquals(10, globalCount);
   }
 
   private static final Logger logger = LoggerFactory.getLogger(ApplicationTest.class);

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java
index 9bc69e8..f97d9c0 100644
--- a/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java
@@ -29,9 +29,10 @@ import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.apex.malhar.flume.discovery.Discovery;
+
 import org.apache.flume.channel.MemoryChannel;
 
-import com.datatorrent.flume.discovery.Discovery;
 import com.datatorrent.netlet.AbstractLengthPrependerClient;
 import com.datatorrent.netlet.DefaultEventLoop;
 import com.datatorrent.netlet.util.Slice;

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/src/test/resources/flume/conf/flume-conf.properties
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/flume/conf/flume-conf.properties b/flume/src/test/resources/flume/conf/flume-conf.properties
index b796e6d..73dc79a 100644
--- a/flume/src/test/resources/flume/conf/flume-conf.properties
+++ b/flume/src/test/resources/flume/conf/flume-conf.properties
@@ -33,12 +33,12 @@ agent1.sources.netcatSource.channels = ch1
 agent1.sources.netcatSource.command = src/test/bash/subcat_periodically src/test/resources/test_data/dt_spend 10000 1
 # Pick and Reorder the columns we need from a larger record for efficiency
   agent1.sources.netcatSource.interceptors = columnchooser
-  agent1.sources.netcatSource.interceptors.columnchooser.type = com.datatorrent.flume.interceptor.ColumnFilteringInterceptor$Builder
+  agent1.sources.netcatSource.interceptors.columnchooser.type = org.apache.apex.malhar.flume.interceptor.ColumnFilteringInterceptor$Builder
   agent1.sources.netcatSource.interceptors.columnchooser.srcSeparator = 2
   agent1.sources.netcatSource.interceptors.columnchooser.dstSeparator = 1
   agent1.sources.netcatSource.interceptors.columnchooser.columns = 0 43 62 69 68 139 190 70 71 52 75 37 39 42 191 138
 
- agent2.sources.netcatSource.interceptors.columnchooser.type = com.datatorrent.flume.interceptor.ColumnFilteringFormattingInterceptor$Builder
+ agent2.sources.netcatSource.interceptors.columnchooser.type = org.apache.apex.malhar.flume.interceptor.ColumnFilteringFormattingInterceptor$Builder
  agent2.sources.netcatSource.interceptors.columnchooser.srcSeparator = 2
  agent2.sources.netcatSource.interceptors.columnchooser.columnsFormatter = {0}\u0001{43}\u0001{62}\u0001{69}\u0001{68}\u0001{139}\u0001{190}\u0001{70}\u0001{71}\u0001{52}\u0001{75}\u0001{37}\u0001{39}\u0001{42}\u0001{191}\u0001{138}\u0001
 

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/src/test/resources/flume/conf/flume_simple.conf
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/flume/conf/flume_simple.conf b/flume/src/test/resources/flume/conf/flume_simple.conf
new file mode 100644
index 0000000..b902881
--- /dev/null
+++ b/flume/src/test/resources/flume/conf/flume_simple.conf
@@ -0,0 +1,52 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# apex_example.conf: A single-node Flume configuration
+
+# Name the components on this agent
+a1.sources = r1
+a1.sinks = dt 
+a1.channels = c1
+
+# sequence generator source that generates numbers from 0 to 9
+a1.sources.r1.type = seq
+a1.sources.r1.totalEvents = 10
+
+# sink - dt
+ a1.sinks.dt.type = org.apache.apex.malhar.flume.sink.DTFlumeSink
+ a1.sinks.dt.id = sink1
+ a1.sinks.dt.hostname = 127.0.0.1
+ a1.sinks.dt.port = 9098
+ a1.sinks.dt.sleepMillis = 7
+ a1.sinks.dt.throughputAdjustmentFactor = 2
+ a1.sinks.dt.maximumEventsPerTransaction = 5000
+ a1.sinks.dt.minimumEventsPerTransaction = 1
+ a1.sinks.dt.storage = org.apache.apex.malhar.flume.storage.HDFSStorage
+ a1.sinks.dt.storage.restore = false
+ a1.sinks.dt.storage.baseDir = /tmp/flume101
+ a1.sinks.dt.channel = c1
+
+# Use a channel which buffers events in memory
+a1.channels.c1.type = memory
+a1.channels.c1.capacity = 1000
+a1.channels.c1.transactionCapacity = 100
+
+# Bind the source and sink to the channel
+a1.sources.r1.channels = c1
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/c48ec8c5/flume/src/test/resources/flume/conf/flume_zkdiscovery.conf
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/flume/conf/flume_zkdiscovery.conf b/flume/src/test/resources/flume/conf/flume_zkdiscovery.conf
new file mode 100644
index 0000000..6f8932c
--- /dev/null
+++ b/flume/src/test/resources/flume/conf/flume_zkdiscovery.conf
@@ -0,0 +1,91 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# example.conf: A single-node Flume configuration
+
+# Name the components on this agent
+a1.sources = r1
+a1.sinks = dt dt2
+a1.channels = c1 c2
+
+# Alternative source for custom inputs
+#a1.sources.r1.type = netcat
+#a1.sources.r1.bind = 127.0.0.1
+#a1.sources.r1.port = 9097
+
+# sequence generator source that generates numbers from 0 to 9
+a1.sources.r1.type = seq
+a1.sources.r1.totalEvents = 10
+
+# first sink - dt
+ a1.sinks.dt.type = org.apache.apex.malhar.flume.sink.DTFlumeSink
+ a1.sinks.dt.id = sink1
+ a1.sinks.dt.hostname = 127.0.0.1
+ a1.sinks.dt.port = 9098
+ a1.sinks.dt.sleepMillis = 7
+ a1.sinks.dt.throughputAdjustmentFactor = 2
+ a1.sinks.dt.maximumEventsPerTransaction = 5000
+ a1.sinks.dt.minimumEventsPerTransaction = 1
+ a1.sinks.dt.storage = org.apache.apex.malhar.flume.storage.HDFSStorage
+ a1.sinks.dt.storage.restore = false
+ a1.sinks.dt.storage.baseDir = /tmp/flume101
+ a1.sinks.dt.channel = c1
+
+# second sink - dt2
+ a1.sinks.dt2.type = org.apache.apex.malhar.flume.sink.DTFlumeSink
+ a1.sinks.dt2.id = sink2
+ a1.sinks.dt2.hostname = 127.0.0.1
+ a1.sinks.dt2.port = 9099
+ a1.sinks.dt2.sleepMillis = 7
+ a1.sinks.dt2.throughputAdjustmentFactor = 2
+ a1.sinks.dt2.maximumEventsPerTransaction = 5000
+ a1.sinks.dt2.minimumEventsPerTransaction = 1
+ a1.sinks.dt2.storage = org.apache.apex.malhar.flume.storage.HDFSStorage
+ a1.sinks.dt2.storage.restore = false
+ a1.sinks.dt2.storage.baseDir = /tmp/flume101
+ a1.sinks.dt2.channel = c2
+
+# Use a channel which buffers events in memory
+ a1.channels.c1.type = memory
+ a1.channels.c1.capacity = 1000
+ a1.channels.c1.transactionCapacity = 100
+
+# Ensure that we are able to detect flume sinks (and failures) automatically.
+ a1.sinks.dt.discovery = org.apache.apex.malhar.flume.discovery.ZKAssistedDiscovery
+ a1.sinks.dt.discovery.connectionString = 127.0.0.1:2181
+ a1.sinks.dt.discovery.basePath = /flume/basepath
+ a1.sinks.dt.discovery.connectionTimeoutMillis = 1000
+ a1.sinks.dt.discovery.connectionRetryCount = 10
+ a1.sinks.dt.discovery.connectionRetrySleepMillis = 500
+
+# Ensure that we are able to detect flume sinks (and failures) automatically.
+ a1.sinks.dt2.discovery = org.apache.apex.malhar.flume.discovery.ZKAssistedDiscovery
+ a1.sinks.dt2.discovery.connectionString = 127.0.0.1:2181
+ a1.sinks.dt2.discovery.basePath = /flume/basepath
+ a1.sinks.dt2.discovery.connectionTimeoutMillis = 1000
+ a1.sinks.dt2.discovery.connectionRetryCount = 10
+ a1.sinks.dt2.discovery.connectionRetrySleepMillis = 500
+
+# Use a channel which buffers events in memory
+ a1.channels.c2.type = memory
+ a1.channels.c2.capacity = 1000
+ a1.channels.c2.transactionCapacity = 100
+
+# Bind the source and sink to the channel
+ a1.sources.r1.channels = c1 c2


[02/13] apex-malhar git commit: Changed package path for files to be included under malhar. Modifications to build files for project to build under malhar.

Posted by th...@apache.org.
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStorageTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStorageTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStorageTest.java
new file mode 100644
index 0000000..d0c27f7
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStorageTest.java
@@ -0,0 +1,695 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.storage;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestWatcher;
+import org.junit.runner.Description;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.flume.Context;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ *
+ */
+public class HDFSStorageTest
+{
+  public static class TestMeta extends TestWatcher
+  {
+    public String baseDir;
+    public String testFile;
+    private String testData = "No and yes. There is also IdleTimeHandler that allows the operator to emit tuples. " +
+        "There is overlap, why not have a single interface. \n" +
+        "Also consider the possibility of an operator that does other processing and not consume nor emit tuples,";
+
+    @Override
+    protected void starting(org.junit.runner.Description description)
+    {
+      String className = description.getClassName();
+      baseDir = "target/" + className;
+      try {
+        baseDir = (new File(baseDir)).getAbsolutePath();
+        FileUtils.forceMkdir(new File(baseDir));
+        testFile = baseDir + "/testInput.txt";
+        FileOutputStream outputStream = FileUtils.openOutputStream(new File(testFile));
+        outputStream.write(testData.getBytes());
+        outputStream.close();
+
+      } catch (IOException ex) {
+        throw new RuntimeException(ex);
+      }
+    }
+
+    @Override
+    protected void finished(Description description)
+    {
+      try {
+        FileUtils.deleteDirectory(new File(baseDir));
+      } catch (IOException ex) {
+        throw new RuntimeException(ex);
+      }
+    }
+  }
+
+  @Rule
+  public TestMeta testMeta = new TestMeta();
+
+  private String STORAGE_DIRECTORY;
+
+  private HDFSStorage getStorage(String id, boolean restore)
+  {
+    Context ctx = new Context();
+    STORAGE_DIRECTORY = testMeta.baseDir;
+    ctx.put(HDFSStorage.BASE_DIR_KEY, testMeta.baseDir);
+    ctx.put(HDFSStorage.RESTORE_KEY, Boolean.toString(restore));
+    ctx.put(HDFSStorage.ID, id);
+    ctx.put(HDFSStorage.BLOCKSIZE, "256");
+    HDFSStorage lstorage = new HDFSStorage();
+    lstorage.configure(ctx);
+    lstorage.setup(null);
+    return lstorage;
+  }
+
+  private HDFSStorage storage;
+
+  @Before
+  public void setup()
+  {
+    storage = getStorage("1", false);
+  }
+
+  @After
+  public void teardown()
+  {
+    storage.teardown();
+    try {
+      Thread.sleep(100);
+    } catch (InterruptedException e) {
+      throw new RuntimeException(e);
+    }
+    storage.cleanHelperFiles();
+  }
+
+  /**
+   * This test covers following use case 1. Some data is stored 2. File is flush but the file is not close 3. Some more
+   * data is stored but the file doesn't roll-overs 4. Retrieve is called for the last returned address and it return
+   * nulls 5. Some more data is stored again but the address is returned null because of previous retrieve call
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlush() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = "ab".getBytes();
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    b = "cb".getBytes();
+    byte[] addr = storage.store(new Slice(b, 0, b.length));
+    match(storage.retrieve(new byte[8]), "ab");
+    Assert.assertNull(storage.retrieve(addr));
+    Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    storage.flush();
+    match(storage.retrieve(address), "cb");
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+  }
+
+  /**
+   * This test covers following use case 1. Some data is stored to make sure that there is no roll over 2. File is
+   * flushed but the file is not closed 3. Some more data is stored. The data stored is enough to make the file roll
+   * over 4. Retrieve is called for the last returned address and it return nulls as the data is not flushed 5. Some
+   * more data is stored again but the address is returned null because of previous retrieve call 6. The data is flushed
+   * to make sure that the data is committed. 7. Now the data is retrieved from the starting and data returned matches
+   * the data stored
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushRollOver() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53, 52,
+        51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49, 49,
+        45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49, 54,
+        49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49, 53,
+        52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48, 1,
+        48, 46, 48, 1, 48, 46, 48};
+    byte[] b_org = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
+        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
+        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
+        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
+        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
+        1, 48, 46, 48, 1, 48, 46, 48};
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    byte[] addr = null;
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      addr = storage.store(new Slice(b, 0, b.length));
+    }
+    Assert.assertNull(storage.retrieve(addr));
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    }
+    storage.flush();
+    match(storage.retrieve(new byte[8]), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieve(address), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+
+  }
+
+  /**
+   * This test covers following use case 1. Some data is stored to make sure that there is no roll over 2. File is
+   * flushed but the file is not closed 3. Some more data is stored. The data stored is enough to make the file roll
+   * over 4. The storage crashes and new storage is instiated. 5. Retrieve is called for the last returned address and
+   * it return nulls as the data is not flushed 6. Some more data is stored again but the address is returned null
+   * because of previous retrieve call 7. The data is flushed to make sure that the data is committed. 8. Now the data
+   * is retrieved from the starting and data returned matches the data stored
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushRollOverWithFailure() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53, 52,
+        51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49, 49,
+        45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49, 54,
+        49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49, 53,
+        52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48, 1,
+        48, 46, 48, 1, 48, 46, 48};
+    byte[] b_org = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
+        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
+        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
+        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
+        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
+        1, 48, 46, 48, 1, 48, 46, 48};
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    byte[] addr = null;
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      addr = storage.store(new Slice(b, 0, b.length));
+    }
+    storage = getStorage("1", true);
+    Assert.assertNull(storage.retrieve(addr));
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    }
+    storage.flush();
+    match(storage.retrieve(new byte[8]), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieve(address), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+
+  }
+
+  /**
+   * This tests clean when the file doesn't roll over
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushWithClean() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = "ab".getBytes();
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    storage.clean(address);
+    b = "cb".getBytes();
+    byte[] addr = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNull(storage.retrieve(addr));
+    Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    storage.flush();
+    match(storage.retrieve(new byte[8]), "cb");
+    match(storage.retrieve(address), "cb");
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+  }
+
+  /**
+   * This tests clean when the file doesn't roll over
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushWithCleanAndFailure() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = "ab".getBytes();
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    storage.clean(address);
+    b = "cb".getBytes();
+    byte[] addr = storage.store(new Slice(b, 0, b.length));
+    storage = getStorage("1", true);
+    Assert.assertNull(storage.retrieve(addr));
+    Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    storage.flush();
+    match(storage.retrieve(new byte[8]), "cb");
+    match(storage.retrieve(address), "cb");
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+  }
+
+  /**
+   * This test covers following use case 1. Some data is stored to make sure that there is no roll over 2. File is
+   * flushed but the file is not closed 3. The data is cleaned till the last returned address 4. Some more data is
+   * stored. The data stored is enough to make the file roll over 5. Retrieve is called for the last returned address
+   * and it return nulls as the data is not flushed 6. Some more data is stored again but the address is returned null
+   * because of previous retrieve call 7. The data is flushed to make sure that the data is committed. 8. Now the data
+   * is retrieved from the starting and data returned matches the data stored
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushWithCleanAndRollOver() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53, 52,
+        51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49, 49,
+        45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49, 54,
+        49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49, 53,
+        52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48, 1,
+        48, 46, 48, 1, 48, 46, 48};
+    byte[] b_org = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
+        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
+        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
+        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
+        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
+        1, 48, 46, 48, 1, 48, 46, 48};
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    storage.clean(address);
+
+    byte[] addr = null;
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      addr = storage.store(new Slice(b, 0, b.length));
+    }
+    Assert.assertNull(storage.retrieve(addr));
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    }
+    storage.flush();
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieve(new byte[8]), new String(b_org));
+    match(storage.retrieve(address), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+
+  }
+
+  /**
+   * This tests the clean when the files are roll-over and the storage fails
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushWithCleanAndRollOverAndFailure() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53, 52,
+        51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49, 49,
+        45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49, 54,
+        49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49, 53,
+        52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48, 1,
+        48, 46, 48, 1, 48, 46, 48};
+    byte[] b_org = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
+        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
+        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
+        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
+        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
+        1, 48, 46, 48, 1, 48, 46, 48};
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    storage.clean(address);
+    byte[] addr = null;
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      addr = storage.store(new Slice(b, 0, b.length));
+    }
+    storage = getStorage("1", true);
+    Assert.assertNull(storage.retrieve(addr));
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    }
+    storage.flush();
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieve(address), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+
+  }
+
+  /**
+   * This test covers following use case The file is flushed and then more data is written to the same file, but the new
+   * data is not flushed and file is not roll over and storage fails The new storage comes up and client asks for data
+   * at the last returned address from earlier storage instance. The new storage returns null. Client stores the data
+   * again but the address returned this time is null and the retrieval of the earlier address now returns data
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushWithFailure() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = "ab".getBytes();
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    b = "cb".getBytes();
+    byte[] addr = storage.store(new Slice(b, 0, b.length));
+    storage = getStorage("1", true);
+    Assert.assertNull(storage.retrieve(addr));
+    Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    storage.flush();
+    match(storage.retrieve(address), "cb");
+  }
+
+  private void match(byte[] data, String match)
+  {
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    Assert.assertEquals("matched the stored value with retrieved value", match, new String(tempData));
+  }
+
+  @Test
+  public void testStorage() throws IOException
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = new byte[200];
+    byte[] identifier;
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+    storage.flush();
+    byte[] data = storage.retrieve(new byte[8]);
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+    identifier = storage.store(new Slice(b, 0, b.length));
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    Assert.assertEquals("matched the stored value with retrieved value", new String(b), new String(tempData));
+    Assert.assertNull(storage.retrieve(identifier));
+  }
+
+  @Test
+  public void testStorageWithRestore() throws IOException
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = new byte[200];
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+    storage.flush();
+    storage.teardown();
+
+    storage = getStorage("1", true);
+    storage.store(new Slice(b, 0, b.length));
+    storage.flush();
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+    boolean exists = fs.exists(new Path(STORAGE_DIRECTORY + "/1/" + "1"));
+    Assert.assertEquals("file should exist", true, exists);
+  }
+
+  @Test
+  public void testCleanup() throws IOException
+  {
+    RandomAccessFile r = new RandomAccessFile(testMeta.testFile, "r");
+    r.seek(0);
+    byte[] b = r.readLine().getBytes();
+    storage.store(new Slice(b, 0, b.length));
+    byte[] val = storage.store(new Slice(b, 0, b.length));
+    storage.flush();
+    storage.clean(val);
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+    boolean exists = fs.exists(new Path(STORAGE_DIRECTORY + "/" + "0"));
+    Assert.assertEquals("file should not exist", false, exists);
+    r.close();
+  }
+
+  @Test
+  public void testNext() throws IOException
+  {
+    RandomAccessFile r = new RandomAccessFile(testMeta.testFile, "r");
+    r.seek(0);
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = r.readLine().getBytes();
+    storage.store(new Slice(b, 0, b.length));
+    byte[] b1 = r.readLine().getBytes();
+    storage.store(new Slice(b1, 0, b1.length));
+    storage.store(new Slice(b, 0, b.length));
+    storage.flush();
+    storage.store(new Slice(b1, 0, b1.length));
+    storage.store(new Slice(b, 0, b.length));
+    storage.flush();
+    byte[] data = storage.retrieve(new byte[8]);
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    Assert.assertEquals("matched the stored value with retrieved value", new String(b), new String(tempData));
+    data = storage.retrieveNext();
+    tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    Assert.assertEquals("matched the stored value with retrieved value", new String(b1), new String(tempData));
+    data = storage.retrieveNext();
+    tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    Assert.assertEquals("matched the stored value with retrieved value", new String(b), new String(tempData));
+    r.close();
+  }
+
+  @Test
+  public void testFailure() throws IOException
+  {
+    byte[] address;
+    byte[] b = new byte[200];
+    storage.retrieve(new byte[8]);
+    for (int i = 0; i < 5; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      address = storage.store(new Slice(b, 0, b.length));
+      storage.flush();
+      storage.clean(address);
+    }
+    storage.teardown();
+
+    byte[] identifier = new byte[8];
+    storage = getStorage("1", true);
+
+    storage.retrieve(identifier);
+
+    storage.store(new Slice(b, 0, b.length));
+    storage.store(new Slice(b, 0, b.length));
+    storage.store(new Slice(b, 0, b.length));
+    storage.flush();
+    byte[] data = storage.retrieve(identifier);
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    Assert.assertEquals("matched the stored value with retrieved value", new String(b), new String(tempData));
+  }
+
+  /**
+   * This test case tests the clean call before any flush is called.
+   *
+   * @throws IOException
+   */
+  @Test
+  public void testCleanUnflushedData() throws IOException
+  {
+    for (int i = 0; i < 5; i++) {
+      final byte[] bytes = (i + "").getBytes();
+      storage.store(new Slice(bytes, 0, bytes.length));
+    }
+    storage.clean(new byte[8]);
+    storage.flush();
+    match(storage.retrieve(new byte[8]), "0");
+    match(storage.retrieveNext(), "1");
+  }
+
+  @Test
+  public void testCleanForUnflushedData() throws IOException
+  {
+    byte[] address = null;
+    byte[] b = new byte[200];
+    storage.retrieve(new byte[8]);
+    for (int i = 0; i < 5; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      address = storage.store(new Slice(b, 0, b.length));
+      storage.flush();
+      // storage.clean(address);
+    }
+    byte[] lastWrittenAddress = null;
+    for (int i = 0; i < 5; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      lastWrittenAddress = storage.store(new Slice(b, 0, b.length));
+    }
+    storage.clean(lastWrittenAddress);
+    byte[] cleanedOffset = storage.readData(new Path(STORAGE_DIRECTORY + "/1/cleanoffsetFile"));
+    Assert.assertArrayEquals(address, cleanedOffset);
+
+  }
+
+  @Test
+  public void testCleanForFlushedData() throws IOException
+  {
+    byte[] b = new byte[200];
+    storage.retrieve(new byte[8]);
+    for (int i = 0; i < 5; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      storage.store(new Slice(b, 0, b.length));
+      storage.flush();
+      // storage.clean(address);
+    }
+    byte[] lastWrittenAddress = null;
+    for (int i = 0; i < 5; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      lastWrittenAddress = storage.store(new Slice(b, 0, b.length));
+    }
+    storage.flush();
+    storage.clean(lastWrittenAddress);
+    byte[] cleanedOffset = storage.readData(new Path(STORAGE_DIRECTORY + "/1/cleanoffsetFile"));
+    Assert.assertArrayEquals(lastWrittenAddress, cleanedOffset);
+
+  }
+
+  @Test
+  public void testCleanForPartialFlushedData() throws IOException
+  {
+    byte[] b = new byte[8];
+    storage.retrieve(new byte[8]);
+
+    storage.store(new Slice(b, 0, b.length));
+    byte[] bytes = "1a".getBytes();
+    byte[] address = storage.store(new Slice(bytes, 0, bytes.length));
+    storage.flush();
+    storage.clean(address);
+
+    byte[] lastWrittenAddress = null;
+    for (int i = 0; i < 5; i++) {
+      final byte[] bytes1 = (i + "").getBytes();
+      storage.store(new Slice(bytes1, 0, bytes1.length));
+      lastWrittenAddress = storage.store(new Slice(b, 0, b.length));
+    }
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    Assert.assertNull(storage.retrieve(lastWrittenAddress));
+    storage.store(new Slice(b, 0, b.length));
+    storage.flush();
+    Assert.assertNull(storage.retrieve(lastWrittenAddress));
+  }
+
+  @Test
+  public void testRandomSequence() throws IOException
+  {
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    byte[] bytes = new byte[]{48, 48, 48, 51, 101, 100, 55, 56, 55, 49, 53, 99, 52, 101, 55, 50, 97, 52, 48, 49, 51,
+        99, 97, 54, 102, 57, 55, 53, 57, 100, 49, 99, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51,
+        45, 49, 49, 45, 48, 55, 32, 48, 48, 58, 48, 48, 58, 52, 54, 1, 52, 50, 49, 50, 51, 1, 50, 1, 49, 53, 49, 49,
+        52, 50, 54, 53, 1, 49, 53, 49, 49, 57, 51, 53, 49, 1, 49, 53, 49, 50, 57, 56, 50, 52, 1, 49, 53, 49, 50, 49,
+        55, 48, 55, 1, 49, 48, 48, 55, 55, 51, 57, 51, 1, 49, 57, 49, 52, 55, 50, 53, 52, 54, 49, 1, 49, 1, 48, 1, 48,
+        46, 48, 1, 48, 46, 48, 1, 48, 46, 48};
+    storage.store(new Slice(bytes, 0, bytes.length));
+    storage.flush();
+    storage.clean(new byte[]{-109, 0, 0, 0, 0, 0, 0, 0});
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 2555; i++) {
+      byte[] bytes1 = new byte[]{48, 48, 48, 55, 56, 51, 98, 101, 50, 54, 50, 98, 52, 102, 50, 54, 56, 97, 55, 56, 102,
+          48, 54, 54, 50, 49, 49, 54, 99, 98, 101, 99, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51,
+          45, 49, 49, 45, 48, 55, 32, 48, 48, 58, 48, 48, 58, 53, 49, 1, 49, 49, 49, 49, 54, 51, 57, 1, 50, 1, 49, 53,
+          49, 48, 57, 57, 56, 51, 1, 49, 53, 49, 49, 49, 55, 48, 52, 1, 49, 53, 49, 50, 49, 51, 55, 49, 1, 49, 53, 49,
+          49, 52, 56, 51, 49, 1, 49, 48, 48, 55, 49, 57, 56, 49, 1, 49, 50, 48, 50, 55, 54, 49, 54, 56, 53, 1, 49, 1,
+          48, 1, 48, 46, 48, 1, 48, 46, 48, 1, 48, 46, 48};
+      storage.store(new Slice(bytes1, 0, bytes1.length));
+      storage.flush();
+    }
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 1297; i++) {
+      storage.retrieveNext();
+    }
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 1302; i++) {
+      storage.retrieveNext();
+    }
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 1317; i++) {
+      storage.retrieveNext();
+    }
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 2007; i++) {
+      storage.retrieveNext();
+    }
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 2556; i++) {
+      storage.retrieveNext();
+    }
+    byte[] bytes1 = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
+        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
+        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
+        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
+        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
+        1, 48, 46, 48, 1, 48, 46, 48};
+    storage.store(new Slice(bytes1, 0, bytes1.length));
+    storage.flush();
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 2062; i++) {
+      storage.retrieveNext();
+
+    }
+  }
+
+  @SuppressWarnings("unused")
+  private static final Logger logger = LoggerFactory.getLogger(HDFSStorageTest.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/flume/conf/flume-conf.properties
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/flume/conf/flume-conf.properties b/flume/src/test/resources/flume/conf/flume-conf.properties
index 29c81c0..b796e6d 100644
--- a/flume/src/test/resources/flume/conf/flume-conf.properties
+++ b/flume/src/test/resources/flume/conf/flume-conf.properties
@@ -63,7 +63,7 @@ agent1.sources.netcatSource.command = src/test/bash/subcat_periodically src/test
 
 # first sink - dt
 agent1.sinks.dt.id = CEVL00P
-agent1.sinks.dt.type = com.datatorrent.flume.sink.DTFlumeSink
+agent1.sinks.dt.type = org.apache.apex.malhar.flume.sink.DTFlumeSink
 agent1.sinks.dt.hostname = localhost
 agent1.sinks.dt.port = 8080
 agent1.sinks.dt.sleepMillis = 7
@@ -72,13 +72,13 @@ agent1.sinks.dt.maximumEventsPerTransaction = 5000
 agent1.sinks.dt.minimumEventsPerTransaction = 1
 
 # Ensure that we do not lose the data handed over to us by flume.
-    agent1.sinks.dt.storage = com.datatorrent.flume.storage.HDFSStorage
+    agent1.sinks.dt.storage = org.apache.apex.malhar.flume.storage.HDFSStorage
     agent1.sinks.dt.storage.restore = false
     agent1.sinks.dt.storage.baseDir = /tmp/flume101
     agent1.sinks.dt.channel = ch1
 
 # Ensure that we are able to detect flume sinks (and failures) automatically.
-   agent1.sinks.dt.discovery = com.datatorrent.flume.discovery.ZKAssistedDiscovery
+   agent1.sinks.dt.discovery = org.apache.apex.malhar.flume.discovery.ZKAssistedDiscovery
    agent1.sinks.dt.discovery.connectionString = 127.0.0.1:2181
    agent1.sinks.dt.discovery.basePath = /HelloDT
    agent1.sinks.dt.discovery.connectionTimeoutMillis = 1000

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/log4j.properties b/flume/src/test/resources/log4j.properties
index ac0a107..b7516d4 100644
--- a/flume/src/test/resources/log4j.properties
+++ b/flume/src/test/resources/log4j.properties
@@ -1,9 +1,11 @@
 #
-# Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121500
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121500 b/flume/src/test/resources/test_data/gentxns/2013121500
deleted file mode 100644
index 3ce5646..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121500 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121500.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121500.txt b/flume/src/test/resources/test_data/gentxns/2013121500.txt
new file mode 100644
index 0000000..3ce5646
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121500.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121501
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121501 b/flume/src/test/resources/test_data/gentxns/2013121501
deleted file mode 100644
index b2e70c0..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121501 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121501.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121501.txt b/flume/src/test/resources/test_data/gentxns/2013121501.txt
new file mode 100644
index 0000000..b2e70c0
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121501.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121502
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121502 b/flume/src/test/resources/test_data/gentxns/2013121502
deleted file mode 100644
index ec13862..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121502 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121502.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121502.txt b/flume/src/test/resources/test_data/gentxns/2013121502.txt
new file mode 100644
index 0000000..ec13862
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121502.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121503
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121503 b/flume/src/test/resources/test_data/gentxns/2013121503
deleted file mode 100644
index 8267dd3..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121503 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121503.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121503.txt b/flume/src/test/resources/test_data/gentxns/2013121503.txt
new file mode 100644
index 0000000..8267dd3
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121503.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121504
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121504 b/flume/src/test/resources/test_data/gentxns/2013121504
deleted file mode 100644
index addfe62..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121504 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121504.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121504.txt b/flume/src/test/resources/test_data/gentxns/2013121504.txt
new file mode 100644
index 0000000..addfe62
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121504.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121505
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121505 b/flume/src/test/resources/test_data/gentxns/2013121505
deleted file mode 100644
index d76aa9f..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121505 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121505.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121505.txt b/flume/src/test/resources/test_data/gentxns/2013121505.txt
new file mode 100644
index 0000000..d76aa9f
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121505.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121506
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121506 b/flume/src/test/resources/test_data/gentxns/2013121506
deleted file mode 100644
index 2f5bbb6..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121506 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121506.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121506.txt b/flume/src/test/resources/test_data/gentxns/2013121506.txt
new file mode 100644
index 0000000..2f5bbb6
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121506.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121507
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121507 b/flume/src/test/resources/test_data/gentxns/2013121507
deleted file mode 100644
index a022dad..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121507 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121507.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121507.txt b/flume/src/test/resources/test_data/gentxns/2013121507.txt
new file mode 100644
index 0000000..a022dad
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121507.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121508
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121508 b/flume/src/test/resources/test_data/gentxns/2013121508
deleted file mode 100644
index d1e7f5c..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121508 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121508.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121508.txt b/flume/src/test/resources/test_data/gentxns/2013121508.txt
new file mode 100644
index 0000000..d1e7f5c
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121508.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121509
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121509 b/flume/src/test/resources/test_data/gentxns/2013121509
deleted file mode 100644
index 10d61de..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121509 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121509.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121509.txt b/flume/src/test/resources/test_data/gentxns/2013121509.txt
new file mode 100644
index 0000000..10d61de
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121509.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121510
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121510 b/flume/src/test/resources/test_data/gentxns/2013121510
deleted file mode 100644
index c2f76c8..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121510 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121510.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121510.txt b/flume/src/test/resources/test_data/gentxns/2013121510.txt
new file mode 100644
index 0000000..c2f76c8
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121510.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121511
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121511 b/flume/src/test/resources/test_data/gentxns/2013121511
deleted file mode 100644
index bf16cfe..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121511 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121511.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121511.txt b/flume/src/test/resources/test_data/gentxns/2013121511.txt
new file mode 100644
index 0000000..bf16cfe
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121511.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121512
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121512 b/flume/src/test/resources/test_data/gentxns/2013121512
deleted file mode 100644
index fe75419..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121512 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121512.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121512.txt b/flume/src/test/resources/test_data/gentxns/2013121512.txt
new file mode 100644
index 0000000..fe75419
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121512.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121513
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121513 b/flume/src/test/resources/test_data/gentxns/2013121513
deleted file mode 100644
index 3094cae..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121513 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121513.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121513.txt b/flume/src/test/resources/test_data/gentxns/2013121513.txt
new file mode 100644
index 0000000..3094cae
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121513.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121514
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121514 b/flume/src/test/resources/test_data/gentxns/2013121514
deleted file mode 100644
index 6e00e4a..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121514 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121514.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121514.txt b/flume/src/test/resources/test_data/gentxns/2013121514.txt
new file mode 100644
index 0000000..6e00e4a
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121514.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121515
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121515 b/flume/src/test/resources/test_data/gentxns/2013121515
deleted file mode 100644
index b860e43..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121515 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121515.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121515.txt b/flume/src/test/resources/test_data/gentxns/2013121515.txt
new file mode 100644
index 0000000..b860e43
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121515.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121516
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121516 b/flume/src/test/resources/test_data/gentxns/2013121516
deleted file mode 100644
index dfb5854..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121516 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121516.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121516.txt b/flume/src/test/resources/test_data/gentxns/2013121516.txt
new file mode 100644
index 0000000..dfb5854
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121516.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121517
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121517 b/flume/src/test/resources/test_data/gentxns/2013121517
deleted file mode 100644
index c8da2cc..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121517 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121517.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121517.txt b/flume/src/test/resources/test_data/gentxns/2013121517.txt
new file mode 100644
index 0000000..c8da2cc
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121517.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121518
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121518 b/flume/src/test/resources/test_data/gentxns/2013121518
deleted file mode 100644
index 2cb628b..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121518 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121518.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121518.txt b/flume/src/test/resources/test_data/gentxns/2013121518.txt
new file mode 100644
index 0000000..2cb628b
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121518.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121519
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121519 b/flume/src/test/resources/test_data/gentxns/2013121519
deleted file mode 100644
index 6fab9d9..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121519 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121519.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121519.txt b/flume/src/test/resources/test_data/gentxns/2013121519.txt
new file mode 100644
index 0000000..6fab9d9
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121519.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121520
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121520 b/flume/src/test/resources/test_data/gentxns/2013121520
deleted file mode 100644
index ba56d49..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121520 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121520.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121520.txt b/flume/src/test/resources/test_data/gentxns/2013121520.txt
new file mode 100644
index 0000000..ba56d49
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121520.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121521
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121521 b/flume/src/test/resources/test_data/gentxns/2013121521
deleted file mode 100644
index 37de926..0000000
Binary files a/flume/src/test/resources/test_data/gentxns/2013121521 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/resources/test_data/gentxns/2013121521.txt
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121521.txt b/flume/src/test/resources/test_data/gentxns/2013121521.txt
new file mode 100644
index 0000000..37de926
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121521.txt differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 5b531ea..adc6de5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -202,6 +202,7 @@
         <module>apps</module>
         <module>samples</module>
         <module>sql</module>
+        <module>flume</module>
       </modules>
     </profile>
   </profiles>


[13/13] apex-malhar git commit: Licensing changes to make it ready for addition into apex

Posted by th...@apache.org.
Licensing changes to make it ready for addition into apex


Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo
Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/bb89fe9e
Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/bb89fe9e
Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/bb89fe9e

Branch: refs/heads/master
Commit: bb89fe9ee62537a4228aa649c9b0d899f65cc87e
Parents: 2cfe153
Author: Apex Dev <de...@apex.apache.org>
Authored: Sun Feb 19 22:01:34 2017 +0530
Committer: Pramod Immaneni <pr...@datatorrent.com>
Committed: Mon May 22 16:47:34 2017 -0700

----------------------------------------------------------------------
 flume/pom.xml                                         | 12 +++++++-----
 .../com/datatorrent/flume/discovery/Discovery.java    | 13 +++++++------
 .../flume/discovery/ZKAssistedDiscovery.java          | 13 +++++++------
 .../ColumnFilteringFormattingInterceptor.java         | 13 +++++++------
 .../flume/interceptor/ColumnFilteringInterceptor.java | 13 +++++++------
 .../flume/operator/AbstractFlumeInputOperator.java    | 13 +++++++------
 .../java/com/datatorrent/flume/sink/DTFlumeSink.java  | 13 +++++++------
 .../main/java/com/datatorrent/flume/sink/Server.java  | 13 +++++++------
 .../com/datatorrent/flume/source/HdfsTestSource.java  | 12 +++++++-----
 .../java/com/datatorrent/flume/source/TestSource.java | 12 +++++++-----
 .../com/datatorrent/flume/storage/DebugWrapper.java   | 13 +++++++------
 .../flume/storage/ErrorMaskingEventCodec.java         | 13 +++++++------
 .../com/datatorrent/flume/storage/EventCodec.java     | 13 +++++++------
 .../com/datatorrent/flume/storage/HDFSStorage.java    | 13 +++++++------
 .../java/com/datatorrent/flume/storage/Storage.java   | 13 +++++++------
 .../resources/flume-conf/flume-conf.sample.properties | 12 +++++++-----
 .../src/main/resources/flume-conf/flume-env.sample.sh | 12 +++++++-----
 .../flume/discovery/ZKAssistedDiscoveryTest.java      | 13 +++++++------
 .../flume/integration/ApplicationTest.java            | 13 +++++++------
 .../interceptor/ColumnFilteringInterceptorTest.java   | 14 ++++++++------
 .../flume/interceptor/InterceptorTestHelper.java      | 14 ++++++++------
 .../com/datatorrent/flume/interceptor/RawEvent.java   | 13 +++++++------
 .../operator/AbstractFlumeInputOperatorTest.java      | 13 +++++++------
 .../com/datatorrent/flume/sink/DTFlumeSinkTest.java   | 14 ++++++++------
 .../java/com/datatorrent/flume/sink/ServerTest.java   | 13 +++++++------
 .../flume/storage/HDFSStorageMatching.java            | 14 ++++++++------
 .../flume/storage/HDFSStoragePerformance.java         | 14 ++++++++------
 .../flume/storage/HDFSStoragePerformanceTest.java     | 13 +++++++------
 .../datatorrent/flume/storage/HDFSStorageTest.java    | 14 ++++++++------
 .../test/resources/flume/conf/flume-conf.properties   | 12 +++++++-----
 flume/src/test/resources/flume/conf/flume-env.sh      | 12 +++++++-----
 31 files changed, 223 insertions(+), 179 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/pom.xml
----------------------------------------------------------------------
diff --git a/flume/pom.xml b/flume/pom.xml
index ade05a0..6522148 100644
--- a/flume/pom.xml
+++ b/flume/pom.xml
@@ -1,11 +1,13 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
 
-    Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
 
       http://www.apache.org/licenses/LICENSE-2.0
 

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java b/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java
index d802002..72a1440 100644
--- a/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java
+++ b/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -26,7 +28,6 @@ import java.util.Collection;
  * Interesting parties can call discover method to get the list of addresses where
  * they can find an available DTFlumeSink server instance.
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  * @param <T> - Type of the objects which can be discovered
  * @since 0.9.3
  */

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java b/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java
index 460a478..97ad8f0 100644
--- a/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java
+++ b/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -48,7 +50,6 @@ import com.datatorrent.api.Component;
 /**
  * <p>ZKAssistedDiscovery class.</p>
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  * @since 0.9.3
  */
 public class ZKAssistedDiscovery implements Discovery<byte[]>,

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java b/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java
index ce92f6d..fd20f99 100644
--- a/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java
+++ b/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -39,7 +41,6 @@ import static com.datatorrent.flume.interceptor.ColumnFilteringFormattingInterce
 /**
  * <p>ColumnFilteringFormattingInterceptor class.</p>
  *
- * @author Chandni Singh <ch...@datatorrent.com>
  * @since 0.9.4
  */
 public class ColumnFilteringFormattingInterceptor implements Interceptor

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java b/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java
index 90c3a04..a2f598f 100644
--- a/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java
+++ b/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -35,7 +37,6 @@ import static com.datatorrent.flume.interceptor.ColumnFilteringInterceptor.Const
 /**
  * <p>ColumnFilteringInterceptor class.</p>
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  * @since 0.9.4
  */
 public class ColumnFilteringInterceptor implements Interceptor

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java b/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java
index 1ab7182..d772ff5 100644
--- a/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java
+++ b/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -62,7 +64,6 @@ import static java.lang.Thread.sleep;
  * Abstract AbstractFlumeInputOperator class.</p>
  *
  * @param <T> Type of the output payload.
- * @author Chetan Narsude <ch...@datatorrent.com>
  * @since 0.9.2
  */
 public abstract class AbstractFlumeInputOperator<T>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java b/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java
index 35d0c5f..55d3d61 100644
--- a/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java
+++ b/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -65,7 +67,6 @@ import com.datatorrent.netlet.util.Slice;
  * maximumEventsPerTransaction - integer value indicating the maximum number of events per transaction. This value can
  * not be more than channel's transaction capacity.<br />
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  * @since 0.9.2
  */
 public class DTFlumeSink extends AbstractSink implements Configurable

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/sink/Server.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/sink/Server.java b/flume/src/main/java/com/datatorrent/flume/sink/Server.java
index 14d9ff4..03c1ff0 100644
--- a/flume/src/main/java/com/datatorrent/flume/sink/Server.java
+++ b/flume/src/main/java/com/datatorrent/flume/sink/Server.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -37,7 +39,6 @@ import com.datatorrent.netlet.util.Slice;
  * <p>
  * Server class.</p>
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  * @since 0.9.2
  */
 public class Server extends AbstractServer

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java b/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java
index 18aac37..72e1913 100644
--- a/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java
+++ b/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/source/TestSource.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/source/TestSource.java b/flume/src/main/java/com/datatorrent/flume/source/TestSource.java
index 490ac35..5773de3 100644
--- a/flume/src/main/java/com/datatorrent/flume/source/TestSource.java
+++ b/flume/src/main/java/com/datatorrent/flume/source/TestSource.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java b/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java
index c416418..da94154 100644
--- a/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java
+++ b/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -28,7 +30,6 @@ import com.datatorrent.netlet.util.Slice;
 /**
  * <p>DebugWrapper class.</p>
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  * @since 0.9.4
  */
 public class DebugWrapper implements Storage, Configurable, Component<com.datatorrent.api.Context>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java b/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java
index 59c7fd3..76f663c 100644
--- a/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java
+++ b/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -26,7 +28,6 @@ import com.datatorrent.netlet.util.Slice;
 /**
  * <p>ErrorMaskingEventCodec class.</p>
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  * @since 1.0.4
  */
 public class ErrorMaskingEventCodec extends EventCodec

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java b/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java
index 03d0d87..0ece548 100644
--- a/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java
+++ b/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -37,7 +39,6 @@ import com.datatorrent.netlet.util.Slice;
 /**
  * <p>EventCodec class.</p>
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  * @since 0.9.4
  */
 public class EventCodec implements StreamCodec<Event>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java b/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java
index 74849e9..4dcddcd 100644
--- a/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java
+++ b/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -54,7 +56,6 @@ import com.datatorrent.netlet.util.Slice;
  * restore - This is used to restore the application from previous failure <br />
  * blockSize - The maximum size of the each file to created. <br />
  *
- * @author Gaurav Gupta <ga...@datatorrent.com>
  * @since 0.9.3
  */
 public class HDFSStorage implements Storage, Configurable, Component<com.datatorrent.api.Context>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/java/com/datatorrent/flume/storage/Storage.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/Storage.java b/flume/src/main/java/com/datatorrent/flume/storage/Storage.java
index 9f3a010..5130f3c 100644
--- a/flume/src/main/java/com/datatorrent/flume/storage/Storage.java
+++ b/flume/src/main/java/com/datatorrent/flume/storage/Storage.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -21,7 +23,6 @@ import com.datatorrent.netlet.util.Slice;
 /**
  * <p>Storage interface.</p>
  *
- * @author Gaurav Gupta  <ga...@datatorrent.com>
  * @since 0.9.2
  */
 public interface Storage

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/resources/flume-conf/flume-conf.sample.properties
----------------------------------------------------------------------
diff --git a/flume/src/main/resources/flume-conf/flume-conf.sample.properties b/flume/src/main/resources/flume-conf/flume-conf.sample.properties
index 9d3e430..1782d4a 100644
--- a/flume/src/main/resources/flume-conf/flume-conf.sample.properties
+++ b/flume/src/main/resources/flume-conf/flume-conf.sample.properties
@@ -1,9 +1,11 @@
 #
-# Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/main/resources/flume-conf/flume-env.sample.sh
----------------------------------------------------------------------
diff --git a/flume/src/main/resources/flume-conf/flume-env.sample.sh b/flume/src/main/resources/flume-conf/flume-env.sample.sh
index aca341c..570411b 100644
--- a/flume/src/main/resources/flume-conf/flume-env.sample.sh
+++ b/flume/src/main/resources/flume-conf/flume-env.sample.sh
@@ -1,10 +1,12 @@
 #!/bin/bash
 #
-# Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java b/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java
index 4acf764..f182edc 100644
--- a/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java
+++ b/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -32,7 +34,6 @@ import static org.junit.Assert.assertNotNull;
 
 /**
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  */
 @Ignore
 public class ZKAssistedDiscoveryTest

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java b/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java
index 41364c8..8256916 100644
--- a/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java
+++ b/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -36,7 +38,6 @@ import com.datatorrent.flume.storage.EventCodec;
 
 /**
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  */
 @Ignore
 public class ApplicationTest implements StreamingApplication

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java b/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java
index 464df42..11ee23f 100644
--- a/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java
+++ b/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -29,7 +31,7 @@ import org.apache.flume.interceptor.Interceptor;
 import static org.junit.Assert.assertArrayEquals;
 
 /**
- * @author Chetan Narsude <ch...@datatorrent.com>
+ *
  */
 public class ColumnFilteringInterceptorTest
 {

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java b/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java
index 739184f..dc95f08 100644
--- a/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java
+++ b/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -42,7 +44,7 @@ import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertNotNull;
 
 /**
- * @author Chetan Narsude <ch...@datatorrent.com>
+ *
  */
 public class InterceptorTestHelper
 {

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java b/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java
index 049609b..c029cd0 100644
--- a/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java
+++ b/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -27,7 +29,6 @@ import com.datatorrent.netlet.util.Slice;
 
 /**
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  */
 public class RawEvent implements Serializable
 {

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java b/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java
index a615496..2f162a8 100644
--- a/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java
+++ b/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -25,7 +27,6 @@ import static org.junit.Assert.assertTrue;
 
 /**
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  */
 public class AbstractFlumeInputOperatorTest
 {

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java b/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java
index 833a353..7949e63 100644
--- a/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java
+++ b/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -35,7 +37,7 @@ import com.datatorrent.netlet.DefaultEventLoop;
 import com.datatorrent.netlet.util.Slice;
 
 /**
- * @author Chetan Narsude <ch...@datatorrent.com>
+ *
  */
 public class DTFlumeSinkTest
 {

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java b/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java
index 64495db..8c225d1 100644
--- a/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java
+++ b/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -23,7 +25,6 @@ import org.junit.Test;
 
 /**
  *
- * @author Chetan Narsude <ch...@datatorrent.com>
  */
 public class ServerTest
 {

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java
index 05eeb4e..6b6adcb 100644
--- a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java
+++ b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -24,7 +26,7 @@ import com.google.common.primitives.Ints;
 import com.datatorrent.netlet.util.Slice;
 
 /**
- * @author Gaurav Gupta  <ga...@datatorrent.com>
+ *
  */
 public class HDFSStorageMatching
 {

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java
index 394ce0e..098f3f7 100644
--- a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java
+++ b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -22,7 +24,7 @@ import org.slf4j.LoggerFactory;
 import com.datatorrent.netlet.util.Slice;
 
 /**
- * @author Gaurav Gupta  <ga...@datatorrent.com>
+ *
  */
 public class HDFSStoragePerformance
 {

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java
index 08476c2..97e9aa8 100644
--- a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java
+++ b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -26,7 +28,6 @@ import com.datatorrent.netlet.util.Slice;
 /**
  * <p>HDFSStoragePerformanceTest class.</p>
  *
- * @author Gaurav Gupta  <ga...@datatorrent.com>
  * @since 1.0.1
  */
 public class HDFSStoragePerformanceTest

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java
index b348c8f..0cb9935 100644
--- a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java
+++ b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java
@@ -1,9 +1,11 @@
 /**
- * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -40,7 +42,7 @@ import org.apache.hadoop.fs.Path;
 import com.datatorrent.netlet.util.Slice;
 
 /**
- * @author Gaurav Gupta <ga...@datatorrent.com>
+ *
  */
 public class HDFSStorageTest
 {

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/resources/flume/conf/flume-conf.properties
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/flume/conf/flume-conf.properties b/flume/src/test/resources/flume/conf/flume-conf.properties
index c892c53..29c81c0 100644
--- a/flume/src/test/resources/flume/conf/flume-conf.properties
+++ b/flume/src/test/resources/flume/conf/flume-conf.properties
@@ -1,9 +1,11 @@
 #
-# Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bb89fe9e/flume/src/test/resources/flume/conf/flume-env.sh
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/flume/conf/flume-env.sh b/flume/src/test/resources/flume/conf/flume-env.sh
index c2232ea..436e670 100644
--- a/flume/src/test/resources/flume/conf/flume-env.sh
+++ b/flume/src/test/resources/flume/conf/flume-env.sh
@@ -1,10 +1,12 @@
 #!/bin/bash
 #
-# Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #


[04/13] apex-malhar git commit: Changed package path for files to be included under malhar. Modifications to build files for project to build under malhar.

Posted by th...@apache.org.
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/storage/ErrorMaskingEventCodec.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/storage/ErrorMaskingEventCodec.java b/flume/src/main/java/org/apache/apex/malhar/flume/storage/ErrorMaskingEventCodec.java
new file mode 100644
index 0000000..b8d2725
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/storage/ErrorMaskingEventCodec.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.storage;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Event;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * <p>ErrorMaskingEventCodec class.</p>
+ *
+ * @since 1.0.4
+ */
+public class ErrorMaskingEventCodec extends EventCodec
+{
+
+  @Override
+  public Object fromByteArray(Slice fragment)
+  {
+    try {
+      return super.fromByteArray(fragment);
+    } catch (RuntimeException re) {
+      logger.warn("Cannot deserialize event {}", fragment, re);
+    }
+
+    return null;
+  }
+
+  @Override
+  public Slice toByteArray(Event event)
+  {
+    try {
+      return super.toByteArray(event);
+    } catch (RuntimeException re) {
+      logger.warn("Cannot serialize event {}", event, re);
+    }
+
+    return null;
+  }
+
+
+  private static final Logger logger = LoggerFactory.getLogger(ErrorMaskingEventCodec.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/storage/EventCodec.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/storage/EventCodec.java b/flume/src/main/java/org/apache/apex/malhar/flume/storage/EventCodec.java
new file mode 100644
index 0000000..463551e
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/storage/EventCodec.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.storage;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Event;
+import org.apache.flume.event.EventBuilder;
+
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.io.Input;
+import com.esotericsoftware.kryo.io.Output;
+
+import com.datatorrent.api.StreamCodec;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * <p>EventCodec class.</p>
+ *
+ * @since 0.9.4
+ */
+public class EventCodec implements StreamCodec<Event>
+{
+  private final transient Kryo kryo;
+
+  public EventCodec()
+  {
+    this.kryo = new Kryo();
+    this.kryo.setClassLoader(Thread.currentThread().getContextClassLoader());
+  }
+
+  @Override
+  public Object fromByteArray(Slice fragment)
+  {
+    ByteArrayInputStream is = new ByteArrayInputStream(fragment.buffer, fragment.offset, fragment.length);
+    Input input = new Input(is);
+
+    @SuppressWarnings("unchecked")
+    HashMap<String, String> headers = kryo.readObjectOrNull(input, HashMap.class);
+    byte[] body = kryo.readObjectOrNull(input, byte[].class);
+    return EventBuilder.withBody(body, headers);
+  }
+
+  @Override
+  public Slice toByteArray(Event event)
+  {
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    Output output = new Output(os);
+
+    Map<String, String> headers = event.getHeaders();
+    if (headers != null && headers.getClass() != HashMap.class) {
+      HashMap<String, String> tmp = new HashMap<String, String>(headers.size());
+      tmp.putAll(headers);
+      headers = tmp;
+    }
+    kryo.writeObjectOrNull(output, headers, HashMap.class);
+    kryo.writeObjectOrNull(output, event.getBody(), byte[].class);
+    output.flush();
+    final byte[] bytes = os.toByteArray();
+    return new Slice(bytes, 0, bytes.length);
+  }
+
+  @Override
+  public int getPartition(Event o)
+  {
+    return o.hashCode();
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(EventCodec.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/storage/HDFSStorage.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/storage/HDFSStorage.java b/flume/src/main/java/org/apache/apex/malhar/flume/storage/HDFSStorage.java
new file mode 100644
index 0000000..77aeb68
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/storage/HDFSStorage.java
@@ -0,0 +1,947 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.storage;
+
+import java.io.DataInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import javax.validation.constraints.NotNull;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.google.common.primitives.Ints;
+import com.google.common.primitives.Longs;
+
+import com.datatorrent.api.Component;
+import com.datatorrent.common.util.NameableThreadFactory;
+import org.apache.apex.malhar.flume.sink.Server;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * HDFSStorage is developed to store and retrieve the data from HDFS
+ * <p />
+ * The properties that can be set on HDFSStorage are: <br />
+ * baseDir - The base directory where the data is going to be stored <br />
+ * restore - This is used to restore the application from previous failure <br />
+ * blockSize - The maximum size of the each file to created. <br />
+ *
+ * @since 0.9.3
+ */
+public class HDFSStorage implements Storage, Configurable, Component<com.datatorrent.api.Context>
+{
+  public static final int DEFAULT_BLOCK_SIZE = 64 * 1024 * 1024;
+  public static final String BASE_DIR_KEY = "baseDir";
+  public static final String RESTORE_KEY = "restore";
+  public static final String BLOCKSIZE = "blockSize";
+  public static final String BLOCK_SIZE_MULTIPLE = "blockSizeMultiple";
+  public static final String NUMBER_RETRY = "retryCount";
+
+  private static final String OFFSET_SUFFIX = "-offsetFile";
+  private static final String BOOK_KEEPING_FILE_OFFSET = "-bookKeepingOffsetFile";
+  private static final String FLUSHED_IDENTITY_FILE = "flushedCounter";
+  private static final String CLEAN_OFFSET_FILE = "cleanoffsetFile";
+  private static final String FLUSHED_IDENTITY_FILE_TEMP = "flushedCounter.tmp";
+  private static final String CLEAN_OFFSET_FILE_TEMP = "cleanoffsetFile.tmp";
+  private static final int IDENTIFIER_SIZE = 8;
+  private static final int DATA_LENGTH_BYTE_SIZE = 4;
+
+  /**
+   * Number of times the storage will try to get the filesystem
+   */
+  private int retryCount = 3;
+  /**
+   * The multiple of block size
+   */
+  private int blockSizeMultiple = 1;
+  /**
+   * Identifier for this storage.
+   */
+  @NotNull
+  private String id;
+  /**
+   * The baseDir where the storage facility is going to create files.
+   */
+  @NotNull
+  private String baseDir;
+  /**
+   * The block size to be used to create the storage files
+   */
+  private long blockSize;
+  /**
+   *
+   */
+  private boolean restore;
+  /**
+   * This identifies the current file number
+   */
+  private long currentWrittenFile;
+  /**
+   * This identifies the file number that has been flushed
+   */
+  private long flushedFileCounter;
+  /**
+   * The file that stores the fileCounter information
+   */
+  // private Path fileCounterFile;
+  /**
+   * The file that stores the flushed fileCounter information
+   */
+  private Path flushedCounterFile;
+  private Path flushedCounterFileTemp;
+  /**
+   * This identifies the last cleaned file number
+   */
+  private long cleanedFileCounter;
+  /**
+   * The file that stores the clean file counter information
+   */
+  // private Path cleanFileCounterFile;
+  /**
+   * The file that stores the clean file offset information
+   */
+  private Path cleanFileOffsetFile;
+  private Path cleanFileOffsetFileTemp;
+  private FileSystem fs;
+  private FSDataOutputStream dataStream;
+  ArrayList<DataBlock> files2Commit = new ArrayList<DataBlock>();
+  /**
+   * The offset in the current opened file
+   */
+  private long fileWriteOffset;
+  private FSDataInputStream readStream;
+  private long retrievalOffset;
+  private long retrievalFile;
+  private int offset;
+  private long flushedLong;
+  private long flushedFileWriteOffset;
+  private long bookKeepingFileOffset;
+  private byte[] cleanedOffset = new byte[8];
+  private long skipOffset;
+  private long skipFile;
+  private transient Path basePath;
+  private ExecutorService storageExecutor;
+  private byte[] currentData;
+  private FSDataInputStream nextReadStream;
+  private long nextFlushedLong;
+  private long nextRetrievalFile;
+  private byte[] nextRetrievalData;
+
+  public HDFSStorage()
+  {
+    this.restore = true;
+  }
+
+  /**
+   * This stores the Identifier information identified in the last store function call
+   *
+   * @param ctx
+   */
+  @Override
+  public void configure(Context ctx)
+  {
+    String tempId = ctx.getString(ID);
+    if (tempId == null) {
+      if (id == null) {
+        throw new IllegalArgumentException("id can't be  null.");
+      }
+    } else {
+      id = tempId;
+    }
+
+    String tempBaseDir = ctx.getString(BASE_DIR_KEY);
+    if (tempBaseDir != null) {
+      baseDir = tempBaseDir;
+    }
+
+    restore = ctx.getBoolean(RESTORE_KEY, restore);
+    Long tempBlockSize = ctx.getLong(BLOCKSIZE);
+    if (tempBlockSize != null) {
+      blockSize = tempBlockSize;
+    }
+    blockSizeMultiple = ctx.getInteger(BLOCK_SIZE_MULTIPLE, blockSizeMultiple);
+    retryCount = ctx.getInteger(NUMBER_RETRY,retryCount);
+  }
+
+  /**
+   * This function reads the file at a location and return the bytes stored in the file "
+   *
+   * @param path - the location of the file
+   * @return
+   * @throws IOException
+   */
+  byte[] readData(Path path) throws IOException
+  {
+    DataInputStream is = new DataInputStream(fs.open(path));
+    byte[] bytes = new byte[is.available()];
+    is.readFully(bytes);
+    is.close();
+    return bytes;
+  }
+
+  /**
+   * This function writes the bytes to a file specified by the path
+   *
+   * @param path the file location
+   * @param data the data to be written to the file
+   * @return
+   * @throws IOException
+   */
+  private FSDataOutputStream writeData(Path path, byte[] data) throws IOException
+  {
+    FSDataOutputStream fsOutputStream;
+    if (fs.getScheme().equals("file")) {
+      // local FS does not support hflush and does not flush native stream
+      fsOutputStream = new FSDataOutputStream(
+          new FileOutputStream(Path.getPathWithoutSchemeAndAuthority(path).toString()), null);
+    } else {
+      fsOutputStream = fs.create(path);
+    }
+    fsOutputStream.write(data);
+    return fsOutputStream;
+  }
+
+  private long calculateOffset(long fileOffset, long fileCounter)
+  {
+    return ((fileCounter << 32) | (fileOffset & 0xffffffffL));
+  }
+
+  @Override
+  public byte[] store(Slice slice)
+  {
+    // logger.debug("store message ");
+    int bytesToWrite = slice.length + DATA_LENGTH_BYTE_SIZE;
+    if (currentWrittenFile < skipFile) {
+      fileWriteOffset += bytesToWrite;
+      if (fileWriteOffset >= bookKeepingFileOffset) {
+        files2Commit.add(new DataBlock(null, bookKeepingFileOffset,
+            new Path(basePath, currentWrittenFile + OFFSET_SUFFIX), currentWrittenFile));
+        currentWrittenFile++;
+        if (fileWriteOffset > bookKeepingFileOffset) {
+          fileWriteOffset = bytesToWrite;
+        } else {
+          fileWriteOffset = 0;
+        }
+        try {
+          bookKeepingFileOffset = getFlushedFileWriteOffset(
+              new Path(basePath, currentWrittenFile + BOOK_KEEPING_FILE_OFFSET));
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+      return null;
+    }
+
+    if (flushedFileCounter == currentWrittenFile && dataStream == null) {
+      currentWrittenFile++;
+      fileWriteOffset = 0;
+    }
+
+    if (flushedFileCounter == skipFile && skipFile != -1) {
+      skipFile++;
+    }
+
+    if (fileWriteOffset + bytesToWrite < blockSize) {
+      try {
+        /* write length and the actual data to the file */
+        if (fileWriteOffset == 0) {
+          // writeData(flushedCounterFile, String.valueOf(currentWrittenFile).getBytes()).close();
+          dataStream = writeData(new Path(basePath, String.valueOf(currentWrittenFile)),
+              Ints.toByteArray(slice.length));
+          dataStream.write(slice.buffer, slice.offset, slice.length);
+        } else {
+          dataStream.write(Ints.toByteArray(slice.length));
+          dataStream.write(slice.buffer, slice.offset, slice.length);
+        }
+        fileWriteOffset += bytesToWrite;
+
+        byte[] fileOffset = null;
+        if ((currentWrittenFile > skipFile) || (currentWrittenFile == skipFile && fileWriteOffset > skipOffset)) {
+          skipFile = -1;
+          fileOffset = new byte[IDENTIFIER_SIZE];
+          Server.writeLong(fileOffset, 0, calculateOffset(fileWriteOffset, currentWrittenFile));
+        }
+        return fileOffset;
+      } catch (IOException ex) {
+        logger.warn("Error while storing the bytes {}", ex.getMessage());
+        closeFs();
+        throw new RuntimeException(ex);
+      }
+    }
+    DataBlock db = new DataBlock(dataStream, fileWriteOffset,
+        new Path(basePath, currentWrittenFile + OFFSET_SUFFIX), currentWrittenFile);
+    db.close();
+    files2Commit.add(db);
+    fileWriteOffset = 0;
+    ++currentWrittenFile;
+    return store(slice);
+  }
+
+  /**
+   * @param b
+   * @param startIndex
+   * @return
+   */
+  long byteArrayToLong(byte[] b, int startIndex)
+  {
+    final byte b1 = 0;
+    return Longs.fromBytes(b1, b1, b1, b1, b[3 + startIndex], b[2 + startIndex], b[1 + startIndex], b[startIndex]);
+  }
+
+  @Override
+  public byte[] retrieve(byte[] identifier)
+  {
+    skipFile = -1;
+    skipOffset = 0;
+    logger.debug("retrieve with address {}", Arrays.toString(identifier));
+    // flushing the last incomplete flushed file
+    closeUnflushedFiles();
+
+    retrievalOffset = byteArrayToLong(identifier, 0);
+    retrievalFile = byteArrayToLong(identifier, offset);
+
+    if (retrievalFile == 0 && retrievalOffset == 0 && currentWrittenFile == 0 && fileWriteOffset == 0) {
+      skipOffset = 0;
+      return null;
+    }
+
+    // making sure that the deleted address is not requested again
+    if (retrievalFile != 0 || retrievalOffset != 0) {
+      long cleanedFile = byteArrayToLong(cleanedOffset, offset);
+      if (retrievalFile < cleanedFile || (retrievalFile == cleanedFile &&
+          retrievalOffset < byteArrayToLong(cleanedOffset, 0))) {
+        logger.warn("The address asked has been deleted retrievalFile={}, cleanedFile={}, retrievalOffset={}, " +
+            "cleanedOffset={}", retrievalFile, cleanedFile, retrievalOffset, byteArrayToLong(cleanedOffset, 0));
+        closeFs();
+        throw new IllegalArgumentException(String.format("The data for address %s has already been deleted",
+            Arrays.toString(identifier)));
+      }
+    }
+
+    // we have just started
+    if (retrievalFile == 0 && retrievalOffset == 0) {
+      retrievalFile = byteArrayToLong(cleanedOffset, offset);
+      retrievalOffset = byteArrayToLong(cleanedOffset, 0);
+    }
+
+    if ((retrievalFile > flushedFileCounter)) {
+      skipFile = retrievalFile;
+      skipOffset = retrievalOffset;
+      retrievalFile = -1;
+      return null;
+    }
+    if ((retrievalFile == flushedFileCounter && retrievalOffset >= flushedFileWriteOffset)) {
+      skipFile = retrievalFile;
+      skipOffset = retrievalOffset - flushedFileWriteOffset;
+      retrievalFile = -1;
+      return null;
+    }
+
+    try {
+      if (readStream != null) {
+        readStream.close();
+        readStream = null;
+      }
+      Path path = new Path(basePath, String.valueOf(retrievalFile));
+      if (!fs.exists(path)) {
+        retrievalFile = -1;
+        closeFs();
+        throw new RuntimeException(String.format("File %s does not exist", path.toString()));
+      }
+
+      byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
+      flushedLong = Server.readLong(flushedOffset, 0);
+      while (retrievalOffset >= flushedLong && retrievalFile < flushedFileCounter) {
+        retrievalOffset -= flushedLong;
+        retrievalFile++;
+        flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
+        flushedLong = Server.readLong(flushedOffset, 0);
+      }
+
+      if (retrievalOffset >= flushedLong) {
+        logger.warn("data not flushed for the given identifier");
+        retrievalFile = -1;
+        return null;
+      }
+      synchronized (HDFSStorage.this) {
+        if (nextReadStream != null) {
+          nextReadStream.close();
+          nextReadStream = null;
+        }
+      }
+      currentData = null;
+      path = new Path(basePath, String.valueOf(retrievalFile));
+      //readStream = new FSDataInputStream(fs.open(path));
+      currentData = readData(path);
+      //readStream.seek(retrievalOffset);
+      storageExecutor.submit(getNextStream());
+      return retrieveHelper();
+    } catch (IOException e) {
+      closeFs();
+      throw new RuntimeException(e);
+    }
+  }
+
+  private byte[] retrieveHelper() throws IOException
+  {
+    int tempRetrievalOffset = (int)retrievalOffset;
+    int length = Ints.fromBytes(currentData[tempRetrievalOffset], currentData[tempRetrievalOffset + 1],
+        currentData[tempRetrievalOffset + 2], currentData[tempRetrievalOffset + 3]);
+    byte[] data = new byte[length + IDENTIFIER_SIZE];
+    System.arraycopy(currentData, tempRetrievalOffset + 4, data, IDENTIFIER_SIZE, length);
+    retrievalOffset += length + DATA_LENGTH_BYTE_SIZE;
+    if (retrievalOffset >= flushedLong) {
+      Server.writeLong(data, 0, calculateOffset(0, retrievalFile + 1));
+    } else {
+      Server.writeLong(data, 0, calculateOffset(retrievalOffset, retrievalFile));
+    }
+    return data;
+  }
+
+  @Override
+  public byte[] retrieveNext()
+  {
+    if (retrievalFile == -1) {
+      closeFs();
+      throw new RuntimeException("Call retrieve first");
+    }
+
+    if (retrievalFile > flushedFileCounter) {
+      logger.warn("data is not flushed");
+      return null;
+    }
+
+    try {
+      if (currentData == null) {
+        synchronized (HDFSStorage.this) {
+          if (nextRetrievalData != null && (retrievalFile == nextRetrievalFile)) {
+            currentData = nextRetrievalData;
+            flushedLong = nextFlushedLong;
+            nextRetrievalData = null;
+          } else {
+            currentData = null;
+            currentData = readData(new Path(basePath, String.valueOf(retrievalFile)));
+            byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
+            flushedLong = Server.readLong(flushedOffset, 0);
+          }
+        }
+        storageExecutor.submit(getNextStream());
+      }
+
+      if (retrievalOffset >= flushedLong) {
+        retrievalFile++;
+        retrievalOffset = 0;
+
+        if (retrievalFile > flushedFileCounter) {
+          logger.warn("data is not flushed");
+          return null;
+        }
+
+        //readStream.close();
+        // readStream = new FSDataInputStream(fs.open(new Path(basePath, String.valueOf(retrievalFile))));
+        // byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
+        // flushedLong = Server.readLong(flushedOffset, 0);
+
+        synchronized (HDFSStorage.this) {
+          if (nextRetrievalData != null && (retrievalFile == nextRetrievalFile)) {
+            currentData = nextRetrievalData;
+            flushedLong = nextFlushedLong;
+            nextRetrievalData = null;
+          } else {
+            currentData = null;
+            currentData = readData(new Path(basePath, String.valueOf(retrievalFile)));
+            byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
+            flushedLong = Server.readLong(flushedOffset, 0);
+          }
+        }
+        storageExecutor.submit(getNextStream());
+      }
+      //readStream.seek(retrievalOffset);
+      return retrieveHelper();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  @SuppressWarnings("AssignmentToCollectionOrArrayFieldFromParameter")
+  public void clean(byte[] identifier)
+  {
+    logger.info("clean {}", Arrays.toString(identifier));
+    long cleanFileIndex = byteArrayToLong(identifier, offset);
+
+    long cleanFileOffset = byteArrayToLong(identifier, 0);
+    if (flushedFileCounter == -1) {
+      identifier = new byte[8];
+    } else if (cleanFileIndex > flushedFileCounter ||
+        (cleanFileIndex == flushedFileCounter && cleanFileOffset >= flushedFileWriteOffset)) {
+      // This is to make sure that we clean only the data that is flushed
+      cleanFileIndex = flushedFileCounter;
+      cleanFileOffset = flushedFileWriteOffset;
+      Server.writeLong(identifier, 0, calculateOffset(cleanFileOffset, cleanFileIndex));
+    }
+    cleanedOffset = identifier;
+
+    try {
+      writeData(cleanFileOffsetFileTemp, identifier).close();
+      fs.rename(cleanFileOffsetFileTemp, cleanFileOffsetFile);
+      if (cleanedFileCounter >= cleanFileIndex) {
+        return;
+      }
+      do {
+        Path path = new Path(basePath, String.valueOf(cleanedFileCounter));
+        if (fs.exists(path) && fs.isFile(path)) {
+          fs.delete(path, false);
+        }
+        path = new Path(basePath, cleanedFileCounter + OFFSET_SUFFIX);
+        if (fs.exists(path) && fs.isFile(path)) {
+          fs.delete(path, false);
+        }
+        path = new Path(basePath, cleanedFileCounter + BOOK_KEEPING_FILE_OFFSET);
+        if (fs.exists(path) && fs.isFile(path)) {
+          fs.delete(path, false);
+        }
+        logger.info("deleted file {}", cleanedFileCounter);
+        ++cleanedFileCounter;
+      } while (cleanedFileCounter < cleanFileIndex);
+      // writeData(cleanFileCounterFile, String.valueOf(cleanedFileCounter).getBytes()).close();
+
+    } catch (IOException e) {
+      logger.warn("not able to close the streams {}", e.getMessage());
+      closeFs();
+      throw new RuntimeException(e);
+    }
+  }
+
+  /**
+   * This is used mainly for cleaning up of counter files created
+   */
+  void cleanHelperFiles()
+  {
+    try {
+      fs.delete(basePath, true);
+    } catch (IOException e) {
+      logger.warn(e.getMessage());
+    }
+  }
+
+  private void closeUnflushedFiles()
+  {
+    try {
+      files2Commit.clear();
+      // closing the stream
+      if (dataStream != null) {
+        dataStream.close();
+        dataStream = null;
+        // currentWrittenFile++;
+        // fileWriteOffset = 0;
+      }
+
+      if (!fs.exists(new Path(basePath, currentWrittenFile + OFFSET_SUFFIX))) {
+        fs.delete(new Path(basePath, String.valueOf(currentWrittenFile)), false);
+      }
+
+      if (fs.exists(new Path(basePath, flushedFileCounter + OFFSET_SUFFIX))) {
+        // This means that flush was called
+        flushedFileWriteOffset = getFlushedFileWriteOffset(new Path(basePath, flushedFileCounter + OFFSET_SUFFIX));
+        bookKeepingFileOffset = getFlushedFileWriteOffset(
+            new Path(basePath, flushedFileCounter + BOOK_KEEPING_FILE_OFFSET));
+      }
+
+      if (flushedFileCounter != -1) {
+        currentWrittenFile = flushedFileCounter;
+        fileWriteOffset = flushedFileWriteOffset;
+      } else {
+        currentWrittenFile = 0;
+        fileWriteOffset = 0;
+      }
+
+      flushedLong = 0;
+
+    } catch (IOException e) {
+      closeFs();
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  public void flush()
+  {
+    nextReadStream = null;
+    StringBuilder builder = new StringBuilder();
+    Iterator<DataBlock> itr = files2Commit.iterator();
+    DataBlock db;
+    try {
+      while (itr.hasNext()) {
+        db = itr.next();
+        db.updateOffsets();
+        builder.append(db.fileName).append(", ");
+      }
+      files2Commit.clear();
+
+      if (dataStream != null) {
+        dataStream.hflush();
+        writeData(flushedCounterFileTemp, String.valueOf(currentWrittenFile).getBytes()).close();
+        fs.rename(flushedCounterFileTemp, flushedCounterFile);
+        updateFlushedOffset(new Path(basePath, currentWrittenFile + OFFSET_SUFFIX), fileWriteOffset);
+        flushedFileWriteOffset = fileWriteOffset;
+        builder.append(currentWrittenFile);
+      }
+      logger.debug("flushed files {}", builder.toString());
+    } catch (IOException ex) {
+      logger.warn("not able to close the stream {}", ex.getMessage());
+      closeFs();
+      throw new RuntimeException(ex);
+    }
+    flushedFileCounter = currentWrittenFile;
+    // logger.debug("flushedFileCounter in flush {}",flushedFileCounter);
+  }
+
+  /**
+   * This updates the flushed offset
+   */
+  private void updateFlushedOffset(Path file, long bytesWritten)
+  {
+    byte[] lastStoredOffset = new byte[IDENTIFIER_SIZE];
+    Server.writeLong(lastStoredOffset, 0, bytesWritten);
+    try {
+      writeData(file, lastStoredOffset).close();
+    } catch (IOException e) {
+      try {
+        if (!Arrays.equals(readData(file), lastStoredOffset)) {
+          closeFs();
+          throw new RuntimeException(e);
+        }
+      } catch (Exception e1) {
+        closeFs();
+        throw new RuntimeException(e1);
+      }
+    }
+  }
+
+  public int getBlockSizeMultiple()
+  {
+    return blockSizeMultiple;
+  }
+
+  public void setBlockSizeMultiple(int blockSizeMultiple)
+  {
+    this.blockSizeMultiple = blockSizeMultiple;
+  }
+
+  /**
+   * @return the baseDir
+   */
+  public String getBaseDir()
+  {
+    return baseDir;
+  }
+
+  /**
+   * @param baseDir the baseDir to set
+   */
+  public void setBaseDir(String baseDir)
+  {
+    this.baseDir = baseDir;
+  }
+
+  /**
+   * @return the id
+   */
+  public String getId()
+  {
+    return id;
+  }
+
+  /**
+   * @param id the id to set
+   */
+  public void setId(String id)
+  {
+    this.id = id;
+  }
+
+  /**
+   * @return the blockSize
+   */
+  public long getBlockSize()
+  {
+    return blockSize;
+  }
+
+  /**
+   * @param blockSize the blockSize to set
+   */
+  public void setBlockSize(long blockSize)
+  {
+    this.blockSize = blockSize;
+  }
+
+  /**
+   * @return the restore
+   */
+  public boolean isRestore()
+  {
+    return restore;
+  }
+
+  /**
+   * @param restore the restore to set
+   */
+  public void setRestore(boolean restore)
+  {
+    this.restore = restore;
+  }
+
+  class DataBlock
+  {
+    FSDataOutputStream dataStream;
+    long dataOffset;
+    Path path2FlushedData;
+    long fileName;
+    private Path bookKeepingPath;
+
+    DataBlock(FSDataOutputStream stream, long bytesWritten, Path path2FlushedData, long fileName)
+    {
+      this.dataStream = stream;
+      this.dataOffset = bytesWritten;
+      this.path2FlushedData = path2FlushedData;
+      this.fileName = fileName;
+    }
+
+    public void close()
+    {
+      if (dataStream != null) {
+        try {
+          dataStream.close();
+          bookKeepingPath = new Path(basePath, fileName + BOOK_KEEPING_FILE_OFFSET);
+          updateFlushedOffset(bookKeepingPath, dataOffset);
+        } catch (IOException ex) {
+          logger.warn("not able to close the stream {}", ex.getMessage());
+          closeFs();
+          throw new RuntimeException(ex);
+        }
+      }
+    }
+
+    public void updateOffsets() throws IOException
+    {
+      updateFlushedOffset(path2FlushedData, dataOffset);
+      if (bookKeepingPath != null && fs.exists(bookKeepingPath)) {
+        fs.delete(bookKeepingPath, false);
+      }
+    }
+
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(HDFSStorage.class);
+
+  @Override
+  public void setup(com.datatorrent.api.Context context)
+  {
+    Configuration conf = new Configuration();
+    if (baseDir == null) {
+      baseDir = conf.get("hadoop.tmp.dir");
+      if (baseDir == null || baseDir.isEmpty()) {
+        throw new IllegalArgumentException("baseDir cannot be null.");
+      }
+    }
+    offset = 4;
+    skipOffset = -1;
+    skipFile = -1;
+    int tempRetryCount = 0;
+    while (tempRetryCount < retryCount && fs == null) {
+      try {
+        fs = FileSystem.newInstance(conf);
+        tempRetryCount++;
+      } catch (Throwable throwable) {
+        logger.warn("Not able to get file system ", throwable);
+      }
+    }
+
+    try {
+      Path path = new Path(baseDir);
+      basePath = new Path(path, id);
+      if (fs == null) {
+        fs = FileSystem.newInstance(conf);
+      }
+      if (!fs.exists(path)) {
+        closeFs();
+        throw new RuntimeException(String.format("baseDir passed (%s) doesn't exist.", baseDir));
+      }
+      if (!fs.isDirectory(path)) {
+        closeFs();
+        throw new RuntimeException(String.format("baseDir passed (%s) is not a directory.", baseDir));
+      }
+      if (!restore) {
+        fs.delete(basePath, true);
+      }
+      if (!fs.exists(basePath) || !fs.isDirectory(basePath)) {
+        fs.mkdirs(basePath);
+      }
+
+      if (blockSize == 0) {
+        blockSize = fs.getDefaultBlockSize(new Path(basePath, "tempData"));
+      }
+      if (blockSize == 0) {
+        blockSize = DEFAULT_BLOCK_SIZE;
+      }
+
+      blockSize = blockSizeMultiple * blockSize;
+
+      currentWrittenFile = 0;
+      cleanedFileCounter = -1;
+      retrievalFile = -1;
+      // fileCounterFile = new Path(basePath, IDENTITY_FILE);
+      flushedFileCounter = -1;
+      // cleanFileCounterFile = new Path(basePath, CLEAN_FILE);
+      cleanFileOffsetFile = new Path(basePath, CLEAN_OFFSET_FILE);
+      cleanFileOffsetFileTemp = new Path(basePath, CLEAN_OFFSET_FILE_TEMP);
+      flushedCounterFile = new Path(basePath, FLUSHED_IDENTITY_FILE);
+      flushedCounterFileTemp = new Path(basePath, FLUSHED_IDENTITY_FILE_TEMP);
+
+      if (restore) {
+        //
+        // if (fs.exists(fileCounterFile) && fs.isFile(fileCounterFile)) {
+        // //currentWrittenFile = Long.valueOf(new String(readData(fileCounterFile)));
+        // }
+
+        if (fs.exists(cleanFileOffsetFile) && fs.isFile(cleanFileOffsetFile)) {
+          cleanedOffset = readData(cleanFileOffsetFile);
+        }
+
+        if (fs.exists(flushedCounterFile) && fs.isFile(flushedCounterFile)) {
+          String strFlushedFileCounter = new String(readData(flushedCounterFile));
+          if (strFlushedFileCounter.isEmpty()) {
+            logger.warn("empty flushed file");
+          } else {
+            flushedFileCounter = Long.valueOf(strFlushedFileCounter);
+            flushedFileWriteOffset = getFlushedFileWriteOffset(new Path(basePath, flushedFileCounter + OFFSET_SUFFIX));
+            bookKeepingFileOffset = getFlushedFileWriteOffset(
+                new Path(basePath, flushedFileCounter + BOOK_KEEPING_FILE_OFFSET));
+          }
+
+        }
+      }
+      fileWriteOffset = flushedFileWriteOffset;
+      currentWrittenFile = flushedFileCounter;
+      cleanedFileCounter = byteArrayToLong(cleanedOffset, offset) - 1;
+      if (currentWrittenFile == -1) {
+        ++currentWrittenFile;
+        fileWriteOffset = 0;
+      }
+
+    } catch (IOException io) {
+
+      throw new RuntimeException(io);
+    }
+    storageExecutor = Executors.newSingleThreadExecutor(new NameableThreadFactory("StorageHelper"));
+  }
+
+  private void closeFs()
+  {
+    if (fs != null) {
+      try {
+        fs.close();
+        fs = null;
+      } catch (IOException e) {
+        logger.debug(e.getMessage());
+      }
+    }
+  }
+
+  private long getFlushedFileWriteOffset(Path filePath) throws IOException
+  {
+    if (flushedFileCounter != -1 && fs.exists(filePath)) {
+      byte[] flushedFileOffsetByte = readData(filePath);
+      if (flushedFileOffsetByte != null && flushedFileOffsetByte.length == 8) {
+        return Server.readLong(flushedFileOffsetByte, 0);
+      }
+    }
+    return 0;
+  }
+
+  @Override
+  public void teardown()
+  {
+    logger.debug("called teardown");
+    try {
+      if (readStream != null) {
+        readStream.close();
+      }
+      synchronized (HDFSStorage.this) {
+        if (nextReadStream != null) {
+          nextReadStream.close();
+        }
+      }
+
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    } finally {
+      closeUnflushedFiles();
+      storageExecutor.shutdown();
+    }
+
+  }
+
+  private Runnable getNextStream()
+  {
+    return new Runnable()
+    {
+      @Override
+      public void run()
+      {
+        try {
+          synchronized (HDFSStorage.this) {
+            nextRetrievalFile = retrievalFile + 1;
+            if (nextRetrievalFile > flushedFileCounter) {
+              nextRetrievalData = null;
+              return;
+            }
+            Path path = new Path(basePath, String.valueOf(nextRetrievalFile));
+            Path offsetPath = new Path(basePath, nextRetrievalFile + OFFSET_SUFFIX);
+            nextRetrievalData = null;
+            nextRetrievalData = readData(path);
+            byte[] flushedOffset = readData(offsetPath);
+            nextFlushedLong = Server.readLong(flushedOffset, 0);
+          }
+        } catch (Throwable e) {
+          logger.warn("in storage executor ", e);
+
+        }
+      }
+    };
+  }
+
+}
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/storage/Storage.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/storage/Storage.java b/flume/src/main/java/org/apache/apex/malhar/flume/storage/Storage.java
new file mode 100644
index 0000000..add1831
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/storage/Storage.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.storage;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * <p>Storage interface.</p>
+ *
+ * @since 0.9.2
+ */
+public interface Storage
+{
+  /**
+   * key in the context for Unique identifier for the storage which may be used to recover from failure.
+   */
+  String ID = "id";
+
+  /**
+   * This stores the bytes and returns the unique identifier to retrieve these bytes
+   *
+   * @param bytes
+   * @return
+   */
+  byte[] store(Slice bytes);
+
+  /**
+   * This returns the data bytes for the current identifier and the identifier for next data bytes. <br/>
+   * The first eight bytes contain the identifier and the remaining bytes contain the data
+   *
+   * @param identifier
+   * @return
+   */
+  byte[] retrieve(byte[] identifier);
+
+  /**
+   * This returns data bytes and the identifier for the next data bytes. The identifier for current data bytes is based
+   * on the retrieve method call and number of retrieveNext method calls after retrieve method call. <br/>
+   * The first eight bytes contain the identifier and the remaining bytes contain the data
+   *
+   * @return
+   */
+  byte[] retrieveNext();
+
+  /**
+   * This is used to clean up the files identified by identifier
+   *
+   * @param identifier
+   */
+  void clean(byte[] identifier);
+
+  /**
+   * This flushes the data from stream
+   *
+   */
+  void flush();
+
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/resources/flume-conf/flume-conf.sample.properties
----------------------------------------------------------------------
diff --git a/flume/src/main/resources/flume-conf/flume-conf.sample.properties b/flume/src/main/resources/flume-conf/flume-conf.sample.properties
index 1782d4a..af59e52 100644
--- a/flume/src/main/resources/flume-conf/flume-conf.sample.properties
+++ b/flume/src/main/resources/flume-conf/flume-conf.sample.properties
@@ -23,7 +23,7 @@
  agent1.sinks = dt
 
 # first sink - dt
- agent1.sinks.dt.type = com.datatorrent.flume.sink.DTFlumeSink
+ agent1.sinks.dt.type = org.apache.apex.malhar.flume.sink.DTFlumeSink
  agent1.sinks.dt.id = sink1
  agent1.sinks.dt.hostname = localhost
  agent1.sinks.dt.port = 8080
@@ -31,7 +31,7 @@
  agent1.sinks.dt.throughputAdjustmentFactor = 2
  agent1.sinks.dt.maximumEventsPerTransaction = 5000
  agent1.sinks.dt.minimumEventsPerTransaction = 1
- agent1.sinks.dt.storage = com.datatorrent.flume.storage.HDFSStorage
+ agent1.sinks.dt.storage = org.apache.apex.malhar.flume.storage.HDFSStorage
  agent1.sinks.dt.storage.restore = false
  agent1.sinks.dt.storage.baseDir = /tmp/flume101
  agent1.sinks.dt.channel = ch1

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java b/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java
deleted file mode 100644
index f182edc..0000000
--- a/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.discovery;
-
-import org.codehaus.jackson.type.TypeReference;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.curator.x.discovery.ServiceInstance;
-import org.apache.curator.x.discovery.details.InstanceSerializer;
-
-import com.datatorrent.flume.discovery.Discovery.Service;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertNotNull;
-
-/**
- *
- */
-@Ignore
-public class ZKAssistedDiscoveryTest
-{
-  public ZKAssistedDiscoveryTest()
-  {
-  }
-
-  @Test
-  public void testSerialization() throws Exception
-  {
-    ZKAssistedDiscovery discovery = new ZKAssistedDiscovery();
-    discovery.setServiceName("DTFlumeTest");
-    discovery.setConnectionString("localhost:2181");
-    discovery.setBasePath("/HelloDT");
-    discovery.setup(null);
-    ServiceInstance<byte[]> instance = discovery.getInstance(new Service<byte[]>()
-    {
-      @Override
-      public String getHost()
-      {
-        return "localhost";
-      }
-
-      @Override
-      public int getPort()
-      {
-        return 8080;
-      }
-
-      @Override
-      public byte[] getPayload()
-      {
-        return null;
-      }
-
-      @Override
-      public String getId()
-      {
-        return "localhost8080";
-      }
-
-    });
-    InstanceSerializer<byte[]> instanceSerializer =
-        discovery.getInstanceSerializerFactory().getInstanceSerializer(new TypeReference<ServiceInstance<byte[]>>()
-        {
-        });
-    byte[] serialize = instanceSerializer.serialize(instance);
-    logger.debug("serialized json = {}", new String(serialize));
-    ServiceInstance<byte[]> deserialize = instanceSerializer.deserialize(serialize);
-    assertArrayEquals("Metadata", instance.getPayload(), deserialize.getPayload());
-  }
-
-  @Test
-  public void testDiscover()
-  {
-    ZKAssistedDiscovery discovery = new ZKAssistedDiscovery();
-    discovery.setServiceName("DTFlumeTest");
-    discovery.setConnectionString("localhost:2181");
-    discovery.setBasePath("/HelloDT");
-    discovery.setup(null);
-    assertNotNull("Discovered Sinks", discovery.discover());
-    discovery.teardown();
-  }
-
-  @Test
-  public void testAdvertize()
-  {
-    ZKAssistedDiscovery discovery = new ZKAssistedDiscovery();
-    discovery.setServiceName("DTFlumeTest");
-    discovery.setConnectionString("localhost:2181");
-    discovery.setBasePath("/HelloDT");
-    discovery.setup(null);
-
-    Service<byte[]> service = new Service<byte[]>()
-    {
-      @Override
-      public String getHost()
-      {
-        return "chetan";
-      }
-
-      @Override
-      public int getPort()
-      {
-        return 5033;
-      }
-
-      @Override
-      public byte[] getPayload()
-      {
-        return new byte[] {3, 2, 1};
-      }
-
-      @Override
-      public String getId()
-      {
-        return "uniqueId";
-      }
-
-    };
-    discovery.advertise(service);
-    discovery.teardown();
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(ZKAssistedDiscoveryTest.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java b/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java
deleted file mode 100644
index 8256916..0000000
--- a/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.integration;
-
-import org.junit.Ignore;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flume.Event;
-import org.apache.hadoop.conf.Configuration;
-
-import com.datatorrent.api.Context.OperatorContext;
-import com.datatorrent.api.DAG;
-import com.datatorrent.api.DAG.Locality;
-import com.datatorrent.api.DefaultInputPort;
-import com.datatorrent.api.LocalMode;
-import com.datatorrent.api.Operator;
-import com.datatorrent.api.StreamingApplication;
-import com.datatorrent.flume.operator.AbstractFlumeInputOperator;
-import com.datatorrent.flume.storage.EventCodec;
-
-/**
- *
- */
-@Ignore
-public class ApplicationTest implements StreamingApplication
-{
-  public static class FlumeInputOperator extends AbstractFlumeInputOperator<Event>
-  {
-    @Override
-    public Event convert(Event event)
-    {
-      return event;
-    }
-  }
-
-  public static class Counter implements Operator
-  {
-    private int count;
-    private transient Event event;
-    public final transient DefaultInputPort<Event> input = new DefaultInputPort<Event>()
-    {
-      @Override
-      public void process(Event tuple)
-      {
-        count++;
-        event = tuple;
-      }
-
-    };
-
-    @Override
-    public void beginWindow(long windowId)
-    {
-    }
-
-    @Override
-    public void endWindow()
-    {
-      logger.debug("total count = {}, tuple = {}", count, event);
-    }
-
-    @Override
-    public void setup(OperatorContext context)
-    {
-    }
-
-    @Override
-    public void teardown()
-    {
-    }
-
-    private static final Logger logger = LoggerFactory.getLogger(Counter.class);
-  }
-
-  @Override
-  public void populateDAG(DAG dag, Configuration conf)
-  {
-    dag.setAttribute(com.datatorrent.api.Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS, 1000);
-    FlumeInputOperator flume = dag.addOperator("FlumeOperator", new FlumeInputOperator());
-    flume.setConnectAddresses(new String[]{"test:127.0.0.1:8080"});
-    flume.setCodec(new EventCodec());
-    Counter counter = dag.addOperator("Counter", new Counter());
-
-    dag.addStream("Slices", flume.output, counter.input).setLocality(Locality.CONTAINER_LOCAL);
-  }
-
-  @Test
-  public void test()
-  {
-    try {
-      LocalMode.runApp(this, Integer.MAX_VALUE);
-    } catch (Exception ex) {
-      logger.warn("The dag seems to be not testable yet, if it's - remove this exception handling", ex);
-    }
-
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(ApplicationTest.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptorTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptorTest.java b/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptorTest.java
deleted file mode 100644
index aca99c3..0000000
--- a/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptorTest.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.interceptor;
-
-import java.io.IOException;
-import java.net.URISyntaxException;
-import java.util.HashMap;
-
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import org.apache.flume.Context;
-import org.apache.flume.interceptor.Interceptor;
-
-import static org.junit.Assert.assertArrayEquals;
-
-/**
- * Tests for {@link ColumnFilteringFormattingInterceptor}
- */
-public class ColumnFilteringFormattingInterceptorTest
-{
-  private static InterceptorTestHelper helper;
-
-  @BeforeClass
-  public static void startUp()
-  {
-    HashMap<String, String> contextMap = new HashMap<String, String>();
-    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
-    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "{1}\001{2}\001{3}\001");
-
-    helper = new InterceptorTestHelper(new ColumnFilteringFormattingInterceptor.Builder(), contextMap);
-  }
-
-  @Test
-  public void testInterceptEvent()
-  {
-    helper.testIntercept_Event();
-  }
-
-  @Test
-  public void testFiles() throws IOException, URISyntaxException
-  {
-    helper.testFiles();
-  }
-
-  @Test
-  public void testInterceptEventWithPrefix()
-  {
-    HashMap<String, String> contextMap = new HashMap<String, String>();
-    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
-    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "\001{1}\001{2}\001{3}\001");
-
-    ColumnFilteringFormattingInterceptor.Builder builder = new ColumnFilteringFormattingInterceptor.Builder();
-    builder.configure(new Context(contextMap));
-    Interceptor interceptor = builder.build();
-
-    assertArrayEquals("Six Fields",
-        "\001\001Second\001\001".getBytes(),
-        interceptor.intercept(
-        new InterceptorTestHelper.MyEvent("First\002\002Second\002\002\002".getBytes())).getBody());
-  }
-
-  @Test
-  public void testInterceptEventWithLongSeparator()
-  {
-    HashMap<String, String> contextMap = new HashMap<String, String>();
-    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
-    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "a{1}bc{2}def{3}ghi");
-
-    ColumnFilteringFormattingInterceptor.Builder builder = new ColumnFilteringFormattingInterceptor.Builder();
-    builder.configure(new Context(contextMap));
-    Interceptor interceptor = builder.build();
-    byte[] body = interceptor.intercept(
-        new InterceptorTestHelper.MyEvent("First\002\002Second\002\002\002".getBytes())).getBody();
-
-    assertArrayEquals("Six Fields, " + new String(body), "abcSeconddefghi".getBytes(), body);
-  }
-
-  @Test
-  public void testInterceptEventWithTerminatingSeparator()
-  {
-    HashMap<String, String> contextMap = new HashMap<String, String>();
-    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
-    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "a{1}bc{2}def{3}");
-
-    ColumnFilteringFormattingInterceptor.Builder builder = new ColumnFilteringFormattingInterceptor.Builder();
-    builder.configure(new Context(contextMap));
-    Interceptor interceptor = builder.build();
-    byte[] body = interceptor.intercept(
-        new InterceptorTestHelper.MyEvent("First\002\002Second\002\002\002".getBytes())).getBody();
-
-    assertArrayEquals("Six Fields, " + new String(body), "abcSeconddef".getBytes(), body);
-  }
-
-  @Test
-  public void testInterceptEventWithColumnZero()
-  {
-    HashMap<String, String> contextMap = new HashMap<String, String>();
-    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
-    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "{0}\001");
-
-    ColumnFilteringFormattingInterceptor.Builder builder = new ColumnFilteringFormattingInterceptor.Builder();
-    builder.configure(new Context(contextMap));
-    Interceptor interceptor = builder.build();
-
-    assertArrayEquals("Empty Bytes",
-        "\001".getBytes(),
-        interceptor.intercept(new InterceptorTestHelper.MyEvent("".getBytes())).getBody());
-
-    assertArrayEquals("One Field",
-        "First\001".getBytes(),
-        interceptor.intercept(new InterceptorTestHelper.MyEvent("First".getBytes())).getBody());
-
-    assertArrayEquals("Two Fields",
-        "\001".getBytes(),
-        interceptor.intercept(new InterceptorTestHelper.MyEvent("\002First".getBytes())).getBody());
-  }
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java b/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java
deleted file mode 100644
index 11ee23f..0000000
--- a/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.interceptor;
-
-import java.io.IOException;
-import java.net.URISyntaxException;
-import java.util.HashMap;
-
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import org.apache.flume.Context;
-import org.apache.flume.interceptor.Interceptor;
-
-import static org.junit.Assert.assertArrayEquals;
-
-/**
- *
- */
-public class ColumnFilteringInterceptorTest
-{
-  private static InterceptorTestHelper helper;
-
-  @BeforeClass
-  public static void startUp()
-  {
-    HashMap<String, String> contextMap = new HashMap<String, String>();
-    contextMap.put(ColumnFilteringInterceptor.Constants.DST_SEPARATOR, Byte.toString((byte)1));
-    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
-    contextMap.put(ColumnFilteringInterceptor.Constants.COLUMNS, "1 2 3");
-
-    helper = new InterceptorTestHelper(new ColumnFilteringInterceptor.Builder(), contextMap);
-  }
-
-  @Test
-  public void testInterceptEvent()
-  {
-    helper.testIntercept_Event();
-  }
-
-  @Test
-  public void testFiles() throws IOException, URISyntaxException
-  {
-    helper.testFiles();
-  }
-
-  @Test
-  public void testInterceptEventWithColumnZero()
-  {
-    HashMap<String, String> contextMap = new HashMap<String, String>();
-    contextMap.put(ColumnFilteringInterceptor.Constants.DST_SEPARATOR, Byte.toString((byte)1));
-    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
-    contextMap.put(ColumnFilteringInterceptor.Constants.COLUMNS, "0");
-
-    ColumnFilteringInterceptor.Builder builder = new ColumnFilteringInterceptor.Builder();
-    builder.configure(new Context(contextMap));
-    Interceptor interceptor = builder.build();
-
-    assertArrayEquals("Empty Bytes",
-        "\001".getBytes(),
-        interceptor.intercept(new InterceptorTestHelper.MyEvent("".getBytes())).getBody());
-
-    assertArrayEquals("One Field",
-        "First\001".getBytes(),
-        interceptor.intercept(new InterceptorTestHelper.MyEvent("First".getBytes())).getBody());
-
-    assertArrayEquals("Two Fields",
-        "\001".getBytes(),
-        interceptor.intercept(new InterceptorTestHelper.MyEvent("\002First".getBytes())).getBody());
-  }
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java b/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java
deleted file mode 100644
index dc95f08..0000000
--- a/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java
+++ /dev/null
@@ -1,216 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.interceptor;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Properties;
-
-import org.junit.Assert;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flume.Context;
-import org.apache.flume.Event;
-import org.apache.flume.interceptor.Interceptor;
-
-import com.datatorrent.netlet.util.Slice;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertNotNull;
-
-/**
- *
- */
-public class InterceptorTestHelper
-{
-  private static final byte FIELD_SEPARATOR = 1;
-
-  static class MyEvent implements Event
-  {
-    byte[] body;
-
-    MyEvent(byte[] bytes)
-    {
-      body = bytes;
-    }
-
-    @Override
-    public Map<String, String> getHeaders()
-    {
-      return null;
-    }
-
-    @Override
-    public void setHeaders(Map<String, String> map)
-    {
-    }
-
-    @Override
-    @SuppressWarnings("ReturnOfCollectionOrArrayField")
-    public byte[] getBody()
-    {
-      return body;
-    }
-
-    @Override
-    @SuppressWarnings("AssignmentToCollectionOrArrayFieldFromParameter")
-    public void setBody(byte[] bytes)
-    {
-      body = bytes;
-    }
-  }
-
-  private final Interceptor.Builder builder;
-  private final Map<String, String> context;
-
-  InterceptorTestHelper(Interceptor.Builder builder, Map<String, String> context)
-  {
-    this.builder = builder;
-    this.context = context;
-  }
-
-  public void testIntercept_Event()
-  {
-    builder.configure(new Context(context));
-    Interceptor interceptor = builder.build();
-
-    assertArrayEquals("Empty Bytes",
-        "\001\001\001".getBytes(),
-        interceptor.intercept(new MyEvent("".getBytes())).getBody());
-
-    assertArrayEquals("One Separator",
-        "\001\001\001".getBytes(),
-        interceptor.intercept(new MyEvent("\002".getBytes())).getBody());
-
-    assertArrayEquals("Two Separators",
-        "\001\001\001".getBytes(),
-        interceptor.intercept(new MyEvent("\002\002".getBytes())).getBody());
-
-    assertArrayEquals("One Field",
-        "\001\001\001".getBytes(),
-        interceptor.intercept(new MyEvent("First".getBytes())).getBody());
-
-    assertArrayEquals("Two Fields",
-        "First\001\001\001".getBytes(),
-        interceptor.intercept(new MyEvent("\002First".getBytes())).getBody());
-
-    assertArrayEquals("Two Fields",
-        "\001\001\001".getBytes(),
-        interceptor.intercept(new MyEvent("First\001".getBytes())).getBody());
-
-    assertArrayEquals("Two Fields",
-        "Second\001\001\001".getBytes(),
-        interceptor.intercept(new MyEvent("First\002Second".getBytes())).getBody());
-
-    assertArrayEquals("Three Fields",
-        "Second\001\001\001".getBytes(),
-        interceptor.intercept(new MyEvent("First\002Second\002".getBytes())).getBody());
-
-    assertArrayEquals("Three Fields",
-        "\001Second\001\001".getBytes(),
-        interceptor.intercept(new MyEvent("First\002\002Second".getBytes())).getBody());
-
-    assertArrayEquals("Four Fields",
-        "\001Second\001\001".getBytes(),
-        interceptor.intercept(new MyEvent("First\002\002Second\002".getBytes())).getBody());
-
-    assertArrayEquals("Five Fields",
-        "\001Second\001\001".getBytes(),
-        interceptor.intercept(new MyEvent("First\002\002Second\002\002".getBytes())).getBody());
-
-    assertArrayEquals("Six Fields",
-        "\001Second\001\001".getBytes(),
-        interceptor.intercept(new MyEvent("First\002\002Second\002\002\002".getBytes())).getBody());
-  }
-
-  public void testFiles() throws IOException, URISyntaxException
-  {
-    Properties properties = new Properties();
-    properties.load(getClass().getResourceAsStream("/flume/conf/flume-conf.properties"));
-
-    String interceptor = null;
-    for (Entry<Object, Object> entry : properties.entrySet()) {
-      logger.debug("{} => {}", entry.getKey(), entry.getValue());
-
-      if (builder.getClass().getName().equals(entry.getValue().toString())) {
-        String key = entry.getKey().toString();
-        if (key.endsWith(".type")) {
-          interceptor = key.substring(0, key.length() - "type".length());
-          break;
-        }
-      }
-    }
-
-    assertNotNull(builder.getClass().getName(), interceptor);
-    @SuppressWarnings({"null", "ConstantConditions"})
-    final int interceptorLength = interceptor.length();
-
-    HashMap<String, String> map = new HashMap<String, String>();
-    for (Entry<Object, Object> entry : properties.entrySet()) {
-      String key = entry.getKey().toString();
-      if (key.startsWith(interceptor)) {
-        map.put(key.substring(interceptorLength), entry.getValue().toString());
-      }
-    }
-
-    builder.configure(new Context(map));
-    Interceptor interceptorInstance = builder.build();
-
-    URL url = getClass().getResource("/test_data/gentxns/");
-    assertNotNull("Generated Transactions", url);
-
-    int records = 0;
-    File dir = new File(url.toURI());
-    for (File file : dir.listFiles()) {
-      records += processFile(file, interceptorInstance);
-    }
-
-    Assert.assertEquals("Total Records", 2200, records);
-  }
-
-  private int processFile(File file, Interceptor interceptor) throws IOException
-  {
-    InputStream stream = getClass().getResourceAsStream("/test_data/gentxns/" + file.getName());
-    BufferedReader br = new BufferedReader(new InputStreamReader(stream));
-
-    String line;
-    int i = 0;
-    while ((line = br.readLine()) != null) {
-      byte[] body = interceptor.intercept(new MyEvent(line.getBytes())).getBody();
-      RawEvent event = RawEvent.from(body, FIELD_SEPARATOR);
-      Assert.assertEquals("GUID", new Slice(line.getBytes(), 0, 32), event.guid);
-      logger.debug("guid = {}, time = {}", event.guid, event.time);
-      i++;
-    }
-
-    br.close();
-    return i;
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(InterceptorTestHelper.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java b/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java
deleted file mode 100644
index c029cd0..0000000
--- a/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.interceptor;
-
-import java.io.Serializable;
-
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.datatorrent.netlet.util.Slice;
-
-/**
- *
- */
-public class RawEvent implements Serializable
-{
-  public Slice guid;
-  public long time;
-  public int dimensionsOffset;
-
-  public Slice getGUID()
-  {
-    return guid;
-  }
-
-  public long getTime()
-  {
-    return time;
-  }
-
-  RawEvent()
-  {
-    /* needed for Kryo serialization */
-  }
-
-  public static RawEvent from(byte[] row, byte separator)
-  {
-    final int rowsize = row.length;
-
-    /*
-     * Lets get the guid out of the current record
-     */
-    int sliceLengh = -1;
-    while (++sliceLengh < rowsize) {
-      if (row[sliceLengh] == separator) {
-        break;
-      }
-    }
-
-    int i = sliceLengh + 1;
-
-    /* lets parse the date */
-    int dateStart = i;
-    while (i < rowsize) {
-      if (row[i++] == separator) {
-        long time = DATE_PARSER.parseMillis(new String(row, dateStart, i - dateStart - 1));
-        RawEvent event = new RawEvent();
-        event.guid = new Slice(row, 0, sliceLengh);
-        event.time = time;
-        event.dimensionsOffset = i;
-        return event;
-      }
-    }
-
-    return null;
-  }
-
-  @Override
-  public int hashCode()
-  {
-    int hash = 5;
-    hash = 61 * hash + (this.guid != null ? this.guid.hashCode() : 0);
-    hash = 61 * hash + (int)(this.time ^ (this.time >>> 32));
-    return hash;
-  }
-
-  @Override
-  public String toString()
-  {
-    return "RawEvent{" + "guid=" + guid + ", time=" + time + '}';
-  }
-
-  @Override
-  public boolean equals(Object obj)
-  {
-    if (obj == null) {
-      return false;
-    }
-    if (getClass() != obj.getClass()) {
-      return false;
-    }
-    final RawEvent other = (RawEvent)obj;
-    if (this.guid != other.guid && (this.guid == null || !this.guid.equals(other.guid))) {
-      return false;
-    }
-    return this.time == other.time;
-  }
-
-  private static final DateTimeFormatter DATE_PARSER = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
-  private static final Logger logger = LoggerFactory.getLogger(RawEvent.class);
-  private static final long serialVersionUID = 201312191312L;
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java b/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java
deleted file mode 100644
index 2f162a8..0000000
--- a/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.operator;
-
-import java.util.HashSet;
-import java.util.Set;
-
-import org.junit.Test;
-
-import static org.junit.Assert.assertTrue;
-
-/**
- *
- */
-public class AbstractFlumeInputOperatorTest
-{
-  public AbstractFlumeInputOperatorTest()
-  {
-  }
-
-  @Test
-  public void testThreadLocal()
-  {
-    ThreadLocal<Set<Integer>> tl = new ThreadLocal<Set<Integer>>()
-    {
-      @Override
-      protected Set<Integer> initialValue()
-      {
-        return new HashSet<Integer>();
-      }
-
-    };
-    Set<Integer> get1 = tl.get();
-    get1.add(1);
-    assertTrue("Just Added Value", get1.contains(1));
-
-    Set<Integer> get2 = tl.get();
-    assertTrue("Previously added value", get2.contains(1));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java b/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java
deleted file mode 100644
index 7949e63..0000000
--- a/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.sink;
-
-import java.io.IOException;
-import java.net.InetSocketAddress;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flume.channel.MemoryChannel;
-
-import com.datatorrent.flume.discovery.Discovery;
-import com.datatorrent.netlet.AbstractLengthPrependerClient;
-import com.datatorrent.netlet.DefaultEventLoop;
-import com.datatorrent.netlet.util.Slice;
-
-/**
- *
- */
-public class DTFlumeSinkTest
-{
-  static final String hostname = "localhost";
-  int port = 0;
-
-  @Test
-  @SuppressWarnings("SleepWhileInLoop")
-  public void testServer() throws InterruptedException, IOException
-  {
-    Discovery<byte[]> discovery = new Discovery<byte[]>()
-    {
-      @Override
-      public synchronized void unadvertise(Service<byte[]> service)
-      {
-        notify();
-      }
-
-      @Override
-      public synchronized void advertise(Service<byte[]> service)
-      {
-        port = service.getPort();
-        logger.debug("listening at {}", service);
-        notify();
-      }
-
-      @Override
-      @SuppressWarnings("unchecked")
-      public synchronized Collection<Service<byte[]>> discover()
-      {
-        try {
-          wait();
-        } catch (InterruptedException ie) {
-          throw new RuntimeException(ie);
-        }
-        return Collections.EMPTY_LIST;
-      }
-
-    };
-    DTFlumeSink sink = new DTFlumeSink();
-    sink.setName("TeskSink");
-    sink.setHostname(hostname);
-    sink.setPort(0);
-    sink.setAcceptedTolerance(2000);
-    sink.setChannel(new MemoryChannel());
-    sink.setDiscovery(discovery);
-    sink.start();
-    AbstractLengthPrependerClient client = new AbstractLengthPrependerClient()
-    {
-      private byte[] array;
-      private int offset = 2;
-
-      @Override
-      public void onMessage(byte[] buffer, int offset, int size)
-      {
-        Slice received = new Slice(buffer, offset, size);
-        logger.debug("Client Received = {}", received);
-        Assert.assertEquals(received,
-            new Slice(Arrays.copyOfRange(array, this.offset, array.length), 0, Server.Request.FIXED_SIZE));
-        synchronized (DTFlumeSinkTest.this) {
-          DTFlumeSinkTest.this.notify();
-        }
-      }
-
-      @Override
-      public void connected()
-      {
-        super.connected();
-        array = new byte[Server.Request.FIXED_SIZE + offset];
-        array[offset] = Server.Command.ECHO.getOrdinal();
-        array[offset + 1] = 1;
-        array[offset + 2] = 2;
-        array[offset + 3] = 3;
-        array[offset + 4] = 4;
-        array[offset + 5] = 5;
-        array[offset + 6] = 6;
-        array[offset + 7] = 7;
-        array[offset + 8] = 8;
-        Server.writeLong(array, offset + Server.Request.TIME_OFFSET, System.currentTimeMillis());
-        write(array, offset, Server.Request.FIXED_SIZE);
-      }
-
-    };
-
-    DefaultEventLoop eventloop = new DefaultEventLoop("Eventloop-TestClient");
-    eventloop.start();
-    discovery.discover();
-    try {
-      eventloop.connect(new InetSocketAddress(hostname, port), client);
-      try {
-        synchronized (this) {
-          this.wait();
-        }
-      } finally {
-        eventloop.disconnect(client);
-      }
-    } finally {
-      eventloop.stop();
-    }
-
-    sink.stop();
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(DTFlumeSinkTest.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java b/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java
deleted file mode 100644
index 8c225d1..0000000
--- a/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.sink;
-
-import java.util.Random;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- *
- */
-public class ServerTest
-{
-  byte[] array;
-
-  public ServerTest()
-  {
-    array = new byte[1024];
-  }
-
-  @Test
-  public void testInt()
-  {
-    Server.writeInt(array, 0, Integer.MAX_VALUE);
-    Assert.assertEquals("Max Integer", Integer.MAX_VALUE, Server.readInt(array, 0));
-
-    Server.writeInt(array, 0, Integer.MIN_VALUE);
-    Assert.assertEquals("Min Integer", Integer.MIN_VALUE, Server.readInt(array, 0));
-
-    Server.writeInt(array, 0, 0);
-    Assert.assertEquals("Zero Integer", 0, Server.readInt(array, 0));
-
-    Random rand = new Random();
-    for (int i = 0; i < 128; i++) {
-      int n = rand.nextInt();
-      if (rand.nextBoolean()) {
-        n = -n;
-      }
-      Server.writeInt(array, 0, n);
-      Assert.assertEquals("Random Integer", n, Server.readInt(array, 0));
-    }
-  }
-
-  @Test
-  public void testLong()
-  {
-    Server.writeLong(array, 0, Integer.MAX_VALUE);
-    Assert.assertEquals("Max Integer", Integer.MAX_VALUE, Server.readLong(array, 0));
-
-    Server.writeLong(array, 0, Integer.MIN_VALUE);
-    Assert.assertEquals("Min Integer", Integer.MIN_VALUE, Server.readLong(array, 0));
-
-    Server.writeLong(array, 0, 0);
-    Assert.assertEquals("Zero Integer", 0L, Server.readLong(array, 0));
-
-    Server.writeLong(array, 0, Long.MAX_VALUE);
-    Assert.assertEquals("Max Long", Long.MAX_VALUE, Server.readLong(array, 0));
-
-    Server.writeLong(array, 0, Long.MIN_VALUE);
-    Assert.assertEquals("Min Long", Long.MIN_VALUE, Server.readLong(array, 0));
-
-    Server.writeLong(array, 0, 0L);
-    Assert.assertEquals("Zero Long", 0L, Server.readLong(array, 0));
-
-    Random rand = new Random();
-    for (int i = 0; i < 128; i++) {
-      long n = rand.nextLong();
-      if (rand.nextBoolean()) {
-        n = -n;
-      }
-      Server.writeLong(array, 0, n);
-      Assert.assertEquals("Random Long", n, Server.readLong(array, 0));
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java
deleted file mode 100644
index 6b6adcb..0000000
--- a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.storage;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.primitives.Ints;
-
-import com.datatorrent.netlet.util.Slice;
-
-/**
- *
- */
-public class HDFSStorageMatching
-{
-
-  public static void main(String[] args)
-  {
-    HDFSStorage storage = new HDFSStorage();
-    storage.setBaseDir(args[0]);
-    storage.setId(args[1]);
-    storage.setRestore(true);
-    storage.setup(null);
-    int count = 100000000;
-
-    logger.debug(" start time {}", System.currentTimeMillis());
-    int index = 10000;
-    byte[] b = Ints.toByteArray(index);
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      index++;
-      b = Ints.toByteArray(index);
-    }
-    storage.flush();
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      index++;
-      b = Ints.toByteArray(index);
-    }
-    storage.flush();
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      index++;
-      b = Ints.toByteArray(index);
-    }
-    storage.flush();
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      index++;
-      b = Ints.toByteArray(index);
-    }
-    storage.flush();
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      index++;
-      b = Ints.toByteArray(index);
-    }
-    storage.flush();
-    logger.debug(" end time {}", System.currentTimeMillis());
-    logger.debug(" start time for retrieve {}", System.currentTimeMillis());
-    b = storage.retrieve(new byte[8]);
-    int org_index = index;
-    index = 10000;
-    match(b, index);
-    while (true) {
-      index++;
-      b = storage.retrieveNext();
-      if (b == null) {
-        logger.debug(" end time for retrieve {}/{}/{}", System.currentTimeMillis(), index, org_index);
-        return;
-      } else {
-        if (!match(b, index)) {
-          throw new RuntimeException("failed : " + index);
-        }
-      }
-    }
-
-  }
-
-  public static boolean match(byte[] data, int match)
-  {
-    byte[] tempData = new byte[data.length - 8];
-    System.arraycopy(data, 8, tempData, 0, tempData.length);
-    int dataR = Ints.fromByteArray(tempData);
-    //logger.debug("input: {}, output: {}",match,dataR);
-    if (match == dataR) {
-      return true;
-    }
-    return false;
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(HDFSStorageMatching.class);
-}
-

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java
deleted file mode 100644
index 098f3f7..0000000
--- a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.storage;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.datatorrent.netlet.util.Slice;
-
-/**
- *
- */
-public class HDFSStoragePerformance
-{
-
-  public static void main(String[] args)
-  {
-    HDFSStorage storage = new HDFSStorage();
-    storage.setBaseDir(".");
-    storage.setId("gaurav_flume_1");
-    storage.setRestore(true);
-    storage.setup(null);
-    int count = 1000000;
-
-    logger.debug(" start time {}", System.currentTimeMillis());
-    int index = 10000;
-    byte[] b = new byte[1024];
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-    }
-    storage.flush();
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-    }
-    storage.flush();
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-    }
-    storage.flush();
-    logger.debug(" end time {}", System.currentTimeMillis());
-    logger.debug(" start time for retrieve {}", System.currentTimeMillis());
-    storage.retrieve(new byte[8]);
-    String inputData = new String(b);
-    index = 1;
-    while (true) {
-      b = storage.retrieveNext();
-      if (b == null) {
-        logger.debug(" end time for retrieve {}", System.currentTimeMillis());
-        return;
-      } else {
-        if (!match(b, inputData)) {
-          throw new RuntimeException("failed : " + index);
-        }
-      }
-
-      index++;
-    }
-
-  }
-
-  public static boolean match(byte[] data, String match)
-  {
-    byte[] tempData = new byte[data.length - 8];
-    System.arraycopy(data, 8, tempData, 0, tempData.length);
-//    logger.debug("input: {}, output: {}",match,new String(tempData));
-    return (match.equals(new String(tempData)));
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(HDFSStoragePerformance.class);
-}
-


[09/13] apex-malhar git commit: Flume source

Posted by th...@apache.org.
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/main/java/com/datatorrent/flume/sink/Server.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/sink/Server.java b/flume/src/main/java/com/datatorrent/flume/sink/Server.java
new file mode 100644
index 0000000..14d9ff4
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/sink/Server.java
@@ -0,0 +1,419 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.sink;
+
+import java.net.InetSocketAddress;
+import java.nio.channels.SelectionKey;
+import java.nio.channels.ServerSocketChannel;
+import java.nio.channels.SocketChannel;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.datatorrent.flume.discovery.Discovery;
+import com.datatorrent.flume.discovery.Discovery.Service;
+import com.datatorrent.netlet.AbstractLengthPrependerClient;
+import com.datatorrent.netlet.AbstractServer;
+import com.datatorrent.netlet.EventLoop;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * <p>
+ * Server class.</p>
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ * @since 0.9.2
+ */
+public class Server extends AbstractServer
+{
+  private final String id;
+  private final Discovery<byte[]> discovery;
+  private final long acceptedTolerance;
+
+  public Server(String id, Discovery<byte[]> discovery, long acceptedTolerance)
+  {
+    this.id = id;
+    this.discovery = discovery;
+    this.acceptedTolerance = acceptedTolerance;
+  }
+
+  @Override
+  public void handleException(Exception cce, EventLoop el)
+  {
+    logger.error("Server Error", cce);
+    Request r = new Request(Command.SERVER_ERROR, null)
+    {
+      @Override
+      public Slice getAddress()
+      {
+        throw new UnsupportedOperationException("Not supported yet.");
+      }
+
+      @Override
+      public int getEventCount()
+      {
+        throw new UnsupportedOperationException("Not supported yet.");
+      }
+
+      @Override
+      public int getIdleCount()
+      {
+        throw new UnsupportedOperationException("Not supported yet.");
+      }
+
+    };
+    synchronized (requests) {
+      requests.add(r);
+    }
+  }
+
+  private final Service<byte[]> service = new Service<byte[]>()
+  {
+    @Override
+    public String getHost()
+    {
+      return ((InetSocketAddress)getServerAddress()).getHostName();
+    }
+
+    @Override
+    public int getPort()
+    {
+      return ((InetSocketAddress)getServerAddress()).getPort();
+    }
+
+    @Override
+    public byte[] getPayload()
+    {
+      return null;
+    }
+
+    @Override
+    public String getId()
+    {
+      return id;
+    }
+
+    @Override
+    public String toString()
+    {
+      return "Server.Service{id=" + id + ", host=" + getHost() + ", port=" + getPort() + ", payload=" +
+          Arrays.toString(getPayload()) + '}';
+    }
+
+  };
+
+  @Override
+  public void unregistered(final SelectionKey key)
+  {
+    discovery.unadvertise(service);
+    super.unregistered(key);
+  }
+
+  @Override
+  public void registered(final SelectionKey key)
+  {
+    super.registered(key);
+    discovery.advertise(service);
+  }
+
+  public enum Command
+  {
+    ECHO((byte)0),
+    SEEK((byte)1),
+    COMMITTED((byte)2),
+    CHECKPOINTED((byte)3),
+    CONNECTED((byte)4),
+    DISCONNECTED((byte)5),
+    WINDOWED((byte)6),
+    SERVER_ERROR((byte)7);
+
+    Command(byte b)
+    {
+      this.ord = b;
+    }
+
+    public byte getOrdinal()
+    {
+      return ord;
+    }
+
+    public static Command getCommand(byte b)
+    {
+      Command c;
+      switch (b) {
+        case 0:
+          c = ECHO;
+          break;
+
+        case 1:
+          c = SEEK;
+          break;
+
+        case 2:
+          c = COMMITTED;
+          break;
+
+        case 3:
+          c = CHECKPOINTED;
+          break;
+
+        case 4:
+          c = CONNECTED;
+          break;
+
+        case 5:
+          c = DISCONNECTED;
+          break;
+
+        case 6:
+          c = WINDOWED;
+          break;
+
+        case 7:
+          c = SERVER_ERROR;
+          break;
+
+        default:
+          throw new IllegalArgumentException(String.format("No Command defined for ordinal %b", b));
+      }
+
+      assert (b == c.ord);
+      return c;
+    }
+
+    private final byte ord;
+  }
+
+  public final ArrayList<Request> requests = new ArrayList<Request>(4);
+
+  @Override
+  public ClientListener getClientConnection(SocketChannel sc, ServerSocketChannel ssc)
+  {
+    Client lClient = new Client();
+    lClient.connected();
+    return lClient;
+  }
+
+  public class Client extends AbstractLengthPrependerClient
+  {
+
+    @Override
+    public void onMessage(byte[] buffer, int offset, int size)
+    {
+      if (size != Request.FIXED_SIZE) {
+        logger.warn("Invalid Request Received: {} from {}", Arrays.copyOfRange(buffer, offset, offset + size),
+            key.channel());
+        return;
+      }
+
+      long requestTime = Server.readLong(buffer, offset + Request.TIME_OFFSET);
+      if (System.currentTimeMillis() > (requestTime + acceptedTolerance)) {
+        logger.warn("Expired Request Received: {} from {}", Arrays.copyOfRange(buffer, offset, offset + size),
+            key.channel());
+        return;
+      }
+
+      try {
+        if (Command.getCommand(buffer[offset]) == Command.ECHO) {
+          write(buffer, offset, size);
+          return;
+        }
+      } catch (IllegalArgumentException ex) {
+        logger.warn("Invalid Request Received: {} from {}!", Arrays.copyOfRange(buffer, offset, offset + size),
+            key.channel(), ex);
+        return;
+      }
+
+      Request r = Request.getRequest(buffer, offset, this);
+      synchronized (requests) {
+        requests.add(r);
+      }
+    }
+
+    @Override
+    public void disconnected()
+    {
+      synchronized (requests) {
+        requests.add(Request.getRequest(
+            new byte[] {Command.DISCONNECTED.getOrdinal(), 0, 0, 0, 0, 0, 0, 0, 0}, 0, this));
+      }
+      super.disconnected();
+    }
+
+    public boolean write(byte[] address, Slice event)
+    {
+      if (event.offset == 0 && event.length == event.buffer.length) {
+        return write(address, event.buffer);
+      }
+
+      // a better method would be to replace the write implementation and allow it to natively support writing slices
+      return write(address, event.toByteArray());
+    }
+
+  }
+
+  public abstract static class Request
+  {
+    public static final int FIXED_SIZE = 17;
+    public static final int TIME_OFFSET = 9;
+    public final Command type;
+    public final Client client;
+
+    public Request(Command type, Client client)
+    {
+      this.type = type;
+      this.client = client;
+    }
+
+    public abstract Slice getAddress();
+
+    public abstract int getEventCount();
+
+    public abstract int getIdleCount();
+
+    @Override
+    public String toString()
+    {
+      return "Request{" + "type=" + type + '}';
+    }
+
+    public static Request getRequest(final byte[] buffer, final int offset, Client client)
+    {
+      Command command = Command.getCommand(buffer[offset]);
+      switch (command) {
+        case WINDOWED:
+          return new Request(Command.WINDOWED, client)
+          {
+            final int eventCount;
+            final int idleCount;
+
+            {
+              eventCount = Server.readInt(buffer, offset + 1);
+              idleCount = Server.readInt(buffer, offset + 5);
+            }
+
+            @Override
+            public Slice getAddress()
+            {
+              throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public int getEventCount()
+            {
+              return eventCount;
+            }
+
+            @Override
+            public int getIdleCount()
+            {
+              return idleCount;
+            }
+
+            @Override
+            public String toString()
+            {
+              return "Request{" + "type=" + type + ", eventCount=" + eventCount + ", idleCount=" + idleCount + '}';
+            }
+
+          };
+
+        default:
+          return new Request(command, client)
+          {
+            final Slice address;
+
+            {
+              address = new Slice(buffer, offset + 1, 8);
+            }
+
+            @Override
+            public Slice getAddress()
+            {
+              return address;
+            }
+
+            @Override
+            public int getEventCount()
+            {
+              throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public int getIdleCount()
+            {
+              throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public String toString()
+            {
+              return "Request{" + "type=" + type + ", address=" + address + '}';
+            }
+
+          };
+
+      }
+
+    }
+
+  }
+
+  public static int readInt(byte[] buffer, int offset)
+  {
+    return buffer[offset++] & 0xff
+           | (buffer[offset++] & 0xff) << 8
+           | (buffer[offset++] & 0xff) << 16
+           | (buffer[offset++] & 0xff) << 24;
+  }
+
+  public static void writeInt(byte[] buffer, int offset, int i)
+  {
+    buffer[offset++] = (byte)i;
+    buffer[offset++] = (byte)(i >>> 8);
+    buffer[offset++] = (byte)(i >>> 16);
+    buffer[offset++] = (byte)(i >>> 24);
+  }
+
+  public static long readLong(byte[] buffer, int offset)
+  {
+    return (long)buffer[offset++] & 0xff
+           | (long)(buffer[offset++] & 0xff) << 8
+           | (long)(buffer[offset++] & 0xff) << 16
+           | (long)(buffer[offset++] & 0xff) << 24
+           | (long)(buffer[offset++] & 0xff) << 32
+           | (long)(buffer[offset++] & 0xff) << 40
+           | (long)(buffer[offset++] & 0xff) << 48
+           | (long)(buffer[offset++] & 0xff) << 56;
+  }
+
+  public static void writeLong(byte[] buffer, int offset, long l)
+  {
+    buffer[offset++] = (byte)l;
+    buffer[offset++] = (byte)(l >>> 8);
+    buffer[offset++] = (byte)(l >>> 16);
+    buffer[offset++] = (byte)(l >>> 24);
+    buffer[offset++] = (byte)(l >>> 32);
+    buffer[offset++] = (byte)(l >>> 40);
+    buffer[offset++] = (byte)(l >>> 48);
+    buffer[offset++] = (byte)(l >>> 56);
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(Server.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/main/java/com/datatorrent/flume/source/TestSource.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/source/TestSource.java b/flume/src/main/java/com/datatorrent/flume/source/TestSource.java
new file mode 100644
index 0000000..490ac35
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/source/TestSource.java
@@ -0,0 +1,248 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.source;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.Timer;
+import java.util.TimerTask;
+
+import javax.annotation.Nonnull;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDrivenSource;
+import org.apache.flume.channel.ChannelProcessor;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.event.EventBuilder;
+import org.apache.flume.source.AbstractSource;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+/**
+ * <p>TestSource class.</p>
+ *
+ * @since 0.9.4
+ */
+public class TestSource extends AbstractSource implements EventDrivenSource, Configurable
+{
+  public static final String SOURCE_FILE = "sourceFile";
+  public static final String LINE_NUMBER = "lineNumber";
+  public static final String RATE = "rate";
+  public static final String PERCENT_PAST_EVENTS = "percentPastEvents";
+  static byte FIELD_SEPARATOR = 1;
+  static int DEF_PERCENT_PAST_EVENTS = 5;
+  public Timer emitTimer;
+  @Nonnull
+  String filePath;
+  int rate;
+  int numberOfPastEvents;
+  transient List<Row> cache;
+  private transient int startIndex;
+  private transient Random random;
+  private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+  private SimpleDateFormat timeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+
+  public TestSource()
+  {
+    super();
+    this.rate = 2500;
+    this.numberOfPastEvents = DEF_PERCENT_PAST_EVENTS * 25;
+    this.random = new Random();
+
+  }
+
+  @Override
+  public void configure(Context context)
+  {
+    filePath = context.getString(SOURCE_FILE);
+    rate = context.getInteger(RATE, rate);
+    int percentPastEvents = context.getInteger(PERCENT_PAST_EVENTS, DEF_PERCENT_PAST_EVENTS);
+    Preconditions.checkArgument(!Strings.isNullOrEmpty(filePath));
+    try {
+      BufferedReader lineReader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath)));
+      try {
+        buildCache(lineReader);
+      } finally {
+        lineReader.close();
+      }
+    } catch (FileNotFoundException e) {
+      throw new RuntimeException(e);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+
+    if (DEF_PERCENT_PAST_EVENTS != percentPastEvents) {
+      numberOfPastEvents = (int)(percentPastEvents / 100.0 * cache.size());
+    }
+  }
+
+  @Override
+  public void start()
+  {
+    super.start();
+    emitTimer = new Timer();
+
+    final ChannelProcessor channel = getChannelProcessor();
+    final int cacheSize = cache.size();
+    emitTimer.scheduleAtFixedRate(new TimerTask()
+    {
+      @Override
+      public void run()
+      {
+        int lastIndex = startIndex + rate;
+        if (lastIndex > cacheSize) {
+          lastIndex -= cacheSize;
+          processBatch(channel, cache.subList(startIndex, cacheSize));
+          startIndex = 0;
+          while (lastIndex > cacheSize) {
+            processBatch(channel, cache);
+            lastIndex -= cacheSize;
+          }
+          processBatch(channel, cache.subList(0, lastIndex));
+        } else {
+          processBatch(channel, cache.subList(startIndex, lastIndex));
+        }
+        startIndex = lastIndex;
+      }
+
+    }, 0, 1000);
+  }
+
+  private void processBatch(ChannelProcessor channelProcessor, List<Row> rows)
+  {
+    if (rows.isEmpty()) {
+      return;
+    }
+
+    int noise = random.nextInt(numberOfPastEvents + 1);
+    Set<Integer> pastIndices = Sets.newHashSet();
+    for (int i = 0; i < noise; i++) {
+      pastIndices.add(random.nextInt(rows.size()));
+    }
+
+    Calendar calendar = Calendar.getInstance();
+    long high = calendar.getTimeInMillis();
+    calendar.add(Calendar.DATE, -2);
+    long low = calendar.getTimeInMillis();
+
+
+
+    List<Event> events = Lists.newArrayList();
+    for (int i = 0; i < rows.size(); i++) {
+      Row eventRow = rows.get(i);
+      if (pastIndices.contains(i)) {
+        long pastTime = (long)((Math.random() * (high - low)) + low);
+        byte[] pastDateField = dateFormat.format(pastTime).getBytes();
+        byte[] pastTimeField = timeFormat.format(pastTime).getBytes();
+
+        System.arraycopy(pastDateField, 0, eventRow.bytes, eventRow.dateFieldStart, pastDateField.length);
+        System.arraycopy(pastTimeField, 0, eventRow.bytes, eventRow.timeFieldStart, pastTimeField.length);
+      } else {
+        calendar.setTimeInMillis(System.currentTimeMillis());
+        byte[] currentDateField = dateFormat.format(calendar.getTime()).getBytes();
+        byte[] currentTimeField = timeFormat.format(calendar.getTime()).getBytes();
+
+        System.arraycopy(currentDateField, 0, eventRow.bytes, eventRow.dateFieldStart, currentDateField.length);
+        System.arraycopy(currentTimeField, 0, eventRow.bytes, eventRow.timeFieldStart, currentTimeField.length);
+      }
+
+      HashMap<String, String> headers = new HashMap<String, String>(2);
+      headers.put(SOURCE_FILE, filePath);
+      headers.put(LINE_NUMBER, String.valueOf(startIndex + i));
+      events.add(EventBuilder.withBody(eventRow.bytes, headers));
+    }
+    channelProcessor.processEventBatch(events);
+  }
+
+  @Override
+  public void stop()
+  {
+    emitTimer.cancel();
+    super.stop();
+  }
+
+  private void buildCache(BufferedReader lineReader) throws IOException
+  {
+    cache = Lists.newArrayListWithCapacity(rate);
+
+    String line;
+    while ((line = lineReader.readLine()) != null) {
+      byte[] row = line.getBytes();
+      Row eventRow = new Row(row);
+      final int rowsize = row.length;
+
+      /* guid */
+      int sliceLengh = -1;
+      while (++sliceLengh < rowsize) {
+        if (row[sliceLengh] == FIELD_SEPARATOR) {
+          break;
+        }
+      }
+      int recordStart = sliceLengh + 1;
+      int pointer = sliceLengh + 1;
+      while (pointer < rowsize) {
+        if (row[pointer++] == FIELD_SEPARATOR) {
+          eventRow.dateFieldStart = recordStart;
+          break;
+        }
+      }
+
+      /* lets parse the date */
+      int dateStart = pointer;
+      while (pointer < rowsize) {
+        if (row[pointer++] == FIELD_SEPARATOR) {
+          eventRow.timeFieldStart = dateStart;
+          break;
+        }
+      }
+
+      cache.add(eventRow);
+    }
+  }
+
+  private static class Row
+  {
+    final byte[] bytes;
+    int dateFieldStart;
+    int timeFieldStart;
+//    boolean past;
+
+    Row(byte[] bytes)
+    {
+      this.bytes = bytes;
+    }
+
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(TestSource.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java b/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java
new file mode 100644
index 0000000..c416418
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java
@@ -0,0 +1,131 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.storage;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.conf.Configurable;
+
+import com.datatorrent.api.Component;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * <p>DebugWrapper class.</p>
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ * @since 0.9.4
+ */
+public class DebugWrapper implements Storage, Configurable, Component<com.datatorrent.api.Context>
+{
+  HDFSStorage storage = new HDFSStorage();
+
+  @Override
+  public byte[] store(Slice bytes)
+  {
+    byte[] ret = null;
+
+    try {
+      ret = storage.store(bytes);
+    } finally {
+      logger.debug("storage.store(new byte[]{{}});", bytes);
+    }
+
+    return ret;
+  }
+
+  @Override
+  public byte[] retrieve(byte[] identifier)
+  {
+    byte[] ret = null;
+
+    try {
+      ret = storage.retrieve(identifier);
+    } finally {
+      logger.debug("storage.retrieve(new byte[]{{}});", identifier);
+    }
+
+    return ret;
+  }
+
+  @Override
+  public byte[] retrieveNext()
+  {
+    byte[] ret = null;
+    try {
+      ret = storage.retrieveNext();
+    } finally {
+      logger.debug("storage.retrieveNext();");
+    }
+
+    return ret;
+  }
+
+  @Override
+  public void clean(byte[] identifier)
+  {
+    try {
+      storage.clean(identifier);
+    } finally {
+      logger.debug("storage.clean(new byte[]{{}});", identifier);
+    }
+  }
+
+  @Override
+  public void flush()
+  {
+    try {
+      storage.flush();
+    } finally {
+      logger.debug("storage.flush();");
+    }
+  }
+
+  @Override
+  public void configure(Context cntxt)
+  {
+    try {
+      storage.configure(cntxt);
+    } finally {
+      logger.debug("storage.configure({});", cntxt);
+    }
+  }
+
+  @Override
+  public void setup(com.datatorrent.api.Context t1)
+  {
+    try {
+      storage.setup(t1);
+    } finally {
+      logger.debug("storage.setup({});", t1);
+    }
+
+  }
+
+  @Override
+  public void teardown()
+  {
+    try {
+      storage.teardown();
+    } finally {
+      logger.debug("storage.teardown();");
+    }
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(DebugWrapper.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java b/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java
new file mode 100644
index 0000000..59c7fd3
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java
@@ -0,0 +1,61 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.storage;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Event;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * <p>ErrorMaskingEventCodec class.</p>
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ * @since 1.0.4
+ */
+public class ErrorMaskingEventCodec extends EventCodec
+{
+
+  @Override
+  public Object fromByteArray(Slice fragment)
+  {
+    try {
+      return super.fromByteArray(fragment);
+    } catch (RuntimeException re) {
+      logger.warn("Cannot deserialize event {}", fragment, re);
+    }
+
+    return null;
+  }
+
+  @Override
+  public Slice toByteArray(Event event)
+  {
+    try {
+      return super.toByteArray(event);
+    } catch (RuntimeException re) {
+      logger.warn("Cannot serialize event {}", event, re);
+    }
+
+    return null;
+  }
+
+
+  private static final Logger logger = LoggerFactory.getLogger(ErrorMaskingEventCodec.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java b/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java
new file mode 100644
index 0000000..03d0d87
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java
@@ -0,0 +1,91 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.storage;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Event;
+import org.apache.flume.event.EventBuilder;
+
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.io.Input;
+import com.esotericsoftware.kryo.io.Output;
+
+import com.datatorrent.api.StreamCodec;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * <p>EventCodec class.</p>
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ * @since 0.9.4
+ */
+public class EventCodec implements StreamCodec<Event>
+{
+  private final transient Kryo kryo;
+
+  public EventCodec()
+  {
+    this.kryo = new Kryo();
+    this.kryo.setClassLoader(Thread.currentThread().getContextClassLoader());
+  }
+
+  @Override
+  public Object fromByteArray(Slice fragment)
+  {
+    ByteArrayInputStream is = new ByteArrayInputStream(fragment.buffer, fragment.offset, fragment.length);
+    Input input = new Input(is);
+
+    @SuppressWarnings("unchecked")
+    HashMap<String, String> headers = kryo.readObjectOrNull(input, HashMap.class);
+    byte[] body = kryo.readObjectOrNull(input, byte[].class);
+    return EventBuilder.withBody(body, headers);
+  }
+
+  @Override
+  public Slice toByteArray(Event event)
+  {
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    Output output = new Output(os);
+
+    Map<String, String> headers = event.getHeaders();
+    if (headers != null && headers.getClass() != HashMap.class) {
+      HashMap<String, String> tmp = new HashMap<String, String>(headers.size());
+      tmp.putAll(headers);
+      headers = tmp;
+    }
+    kryo.writeObjectOrNull(output, headers, HashMap.class);
+    kryo.writeObjectOrNull(output, event.getBody(), byte[].class);
+    output.flush();
+    final byte[] bytes = os.toByteArray();
+    return new Slice(bytes, 0, bytes.length);
+  }
+
+  @Override
+  public int getPartition(Event o)
+  {
+    return o.hashCode();
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(EventCodec.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/main/resources/flume-conf/flume-conf.sample.properties
----------------------------------------------------------------------
diff --git a/flume/src/main/resources/flume-conf/flume-conf.sample.properties b/flume/src/main/resources/flume-conf/flume-conf.sample.properties
new file mode 100644
index 0000000..9d3e430
--- /dev/null
+++ b/flume/src/main/resources/flume-conf/flume-conf.sample.properties
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+#agent1 on  node1
+ agent1.sources = netcatSource
+ agent1.channels = ch1
+ agent1.sinks = dt
+
+# first sink - dt
+ agent1.sinks.dt.type = com.datatorrent.flume.sink.DTFlumeSink
+ agent1.sinks.dt.id = sink1
+ agent1.sinks.dt.hostname = localhost
+ agent1.sinks.dt.port = 8080
+ agent1.sinks.dt.sleepMillis = 7
+ agent1.sinks.dt.throughputAdjustmentFactor = 2
+ agent1.sinks.dt.maximumEventsPerTransaction = 5000
+ agent1.sinks.dt.minimumEventsPerTransaction = 1
+ agent1.sinks.dt.storage = com.datatorrent.flume.storage.HDFSStorage
+ agent1.sinks.dt.storage.restore = false
+ agent1.sinks.dt.storage.baseDir = /tmp/flume101
+ agent1.sinks.dt.channel = ch1
+
+# channels
+ agent1.channels.ch1.type = file
+ agent1.channels.ch1.capacity = 10000000
+ agent1.channels.ch1.transactionCapacity = 10000
+ agent1.channels.ch1.maxFileSize = 67108864
+
+ agent1.sources.netcatSource.type = exec
+ agent1.sources.netcatSource.channels = ch1
+ agent1.sources.netcatSource.command = src/test/bash/subcat_periodically src/test/resources/test_data/dt_spend 10000 1

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/main/resources/flume-conf/flume-env.sample.sh
----------------------------------------------------------------------
diff --git a/flume/src/main/resources/flume-conf/flume-env.sample.sh b/flume/src/main/resources/flume-conf/flume-env.sample.sh
new file mode 100644
index 0000000..aca341c
--- /dev/null
+++ b/flume/src/main/resources/flume-conf/flume-env.sample.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+#
+# Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+
+# This script runs on the machine which have maven repository populated under
+# $HOME/.m2 If that's not the case, please adjust the JARPATH variable below
+# to point to colon separated list of directories where jar files can be found
+if test -z "$DT_FLUME_JAR"
+then
+  echo [ERROR]: Environment variable DT_FLUME_JAR should point to a valid jar file which contains DTFlumeSink class >&2
+  exit 2
+fi
+
+echo JARPATH is set to ${JARPATH:=$HOME/.m2/repository:.}
+if test -z "$JAVA_HOME"
+then
+  JAVA=java
+else
+  JAVA=${JAVA_HOME}/bin/java
+fi
+FLUME_CLASSPATH=`JARPATH=$JARPATH $JAVA -cp $DT_FLUME_JAR com.datatorrent.jarpath.JarPath -N $DT_FLUME_JAR -Xdt-jarpath -Xdt-netlet`

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java b/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java
new file mode 100644
index 0000000..4acf764
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/discovery/ZKAssistedDiscoveryTest.java
@@ -0,0 +1,142 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.discovery;
+
+import org.codehaus.jackson.type.TypeReference;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.curator.x.discovery.ServiceInstance;
+import org.apache.curator.x.discovery.details.InstanceSerializer;
+
+import com.datatorrent.flume.discovery.Discovery.Service;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertNotNull;
+
+/**
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ */
+@Ignore
+public class ZKAssistedDiscoveryTest
+{
+  public ZKAssistedDiscoveryTest()
+  {
+  }
+
+  @Test
+  public void testSerialization() throws Exception
+  {
+    ZKAssistedDiscovery discovery = new ZKAssistedDiscovery();
+    discovery.setServiceName("DTFlumeTest");
+    discovery.setConnectionString("localhost:2181");
+    discovery.setBasePath("/HelloDT");
+    discovery.setup(null);
+    ServiceInstance<byte[]> instance = discovery.getInstance(new Service<byte[]>()
+    {
+      @Override
+      public String getHost()
+      {
+        return "localhost";
+      }
+
+      @Override
+      public int getPort()
+      {
+        return 8080;
+      }
+
+      @Override
+      public byte[] getPayload()
+      {
+        return null;
+      }
+
+      @Override
+      public String getId()
+      {
+        return "localhost8080";
+      }
+
+    });
+    InstanceSerializer<byte[]> instanceSerializer =
+        discovery.getInstanceSerializerFactory().getInstanceSerializer(new TypeReference<ServiceInstance<byte[]>>()
+        {
+        });
+    byte[] serialize = instanceSerializer.serialize(instance);
+    logger.debug("serialized json = {}", new String(serialize));
+    ServiceInstance<byte[]> deserialize = instanceSerializer.deserialize(serialize);
+    assertArrayEquals("Metadata", instance.getPayload(), deserialize.getPayload());
+  }
+
+  @Test
+  public void testDiscover()
+  {
+    ZKAssistedDiscovery discovery = new ZKAssistedDiscovery();
+    discovery.setServiceName("DTFlumeTest");
+    discovery.setConnectionString("localhost:2181");
+    discovery.setBasePath("/HelloDT");
+    discovery.setup(null);
+    assertNotNull("Discovered Sinks", discovery.discover());
+    discovery.teardown();
+  }
+
+  @Test
+  public void testAdvertize()
+  {
+    ZKAssistedDiscovery discovery = new ZKAssistedDiscovery();
+    discovery.setServiceName("DTFlumeTest");
+    discovery.setConnectionString("localhost:2181");
+    discovery.setBasePath("/HelloDT");
+    discovery.setup(null);
+
+    Service<byte[]> service = new Service<byte[]>()
+    {
+      @Override
+      public String getHost()
+      {
+        return "chetan";
+      }
+
+      @Override
+      public int getPort()
+      {
+        return 5033;
+      }
+
+      @Override
+      public byte[] getPayload()
+      {
+        return new byte[] {3, 2, 1};
+      }
+
+      @Override
+      public String getId()
+      {
+        return "uniqueId";
+      }
+
+    };
+    discovery.advertise(service);
+    discovery.teardown();
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(ZKAssistedDiscoveryTest.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java b/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java
new file mode 100644
index 0000000..41364c8
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/integration/ApplicationTest.java
@@ -0,0 +1,116 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.integration;
+
+import org.junit.Ignore;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Event;
+import org.apache.hadoop.conf.Configuration;
+
+import com.datatorrent.api.Context.OperatorContext;
+import com.datatorrent.api.DAG;
+import com.datatorrent.api.DAG.Locality;
+import com.datatorrent.api.DefaultInputPort;
+import com.datatorrent.api.LocalMode;
+import com.datatorrent.api.Operator;
+import com.datatorrent.api.StreamingApplication;
+import com.datatorrent.flume.operator.AbstractFlumeInputOperator;
+import com.datatorrent.flume.storage.EventCodec;
+
+/**
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ */
+@Ignore
+public class ApplicationTest implements StreamingApplication
+{
+  public static class FlumeInputOperator extends AbstractFlumeInputOperator<Event>
+  {
+    @Override
+    public Event convert(Event event)
+    {
+      return event;
+    }
+  }
+
+  public static class Counter implements Operator
+  {
+    private int count;
+    private transient Event event;
+    public final transient DefaultInputPort<Event> input = new DefaultInputPort<Event>()
+    {
+      @Override
+      public void process(Event tuple)
+      {
+        count++;
+        event = tuple;
+      }
+
+    };
+
+    @Override
+    public void beginWindow(long windowId)
+    {
+    }
+
+    @Override
+    public void endWindow()
+    {
+      logger.debug("total count = {}, tuple = {}", count, event);
+    }
+
+    @Override
+    public void setup(OperatorContext context)
+    {
+    }
+
+    @Override
+    public void teardown()
+    {
+    }
+
+    private static final Logger logger = LoggerFactory.getLogger(Counter.class);
+  }
+
+  @Override
+  public void populateDAG(DAG dag, Configuration conf)
+  {
+    dag.setAttribute(com.datatorrent.api.Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS, 1000);
+    FlumeInputOperator flume = dag.addOperator("FlumeOperator", new FlumeInputOperator());
+    flume.setConnectAddresses(new String[]{"test:127.0.0.1:8080"});
+    flume.setCodec(new EventCodec());
+    Counter counter = dag.addOperator("Counter", new Counter());
+
+    dag.addStream("Slices", flume.output, counter.input).setLocality(Locality.CONTAINER_LOCAL);
+  }
+
+  @Test
+  public void test()
+  {
+    try {
+      LocalMode.runApp(this, Integer.MAX_VALUE);
+    } catch (Exception ex) {
+      logger.warn("The dag seems to be not testable yet, if it's - remove this exception handling", ex);
+    }
+
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(ApplicationTest.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java b/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java
new file mode 100644
index 0000000..464df42
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptorTest.java
@@ -0,0 +1,85 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.interceptor;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import org.apache.flume.Context;
+import org.apache.flume.interceptor.Interceptor;
+
+import static org.junit.Assert.assertArrayEquals;
+
+/**
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ */
+public class ColumnFilteringInterceptorTest
+{
+  private static InterceptorTestHelper helper;
+
+  @BeforeClass
+  public static void startUp()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.DST_SEPARATOR, Byte.toString((byte)1));
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringInterceptor.Constants.COLUMNS, "1 2 3");
+
+    helper = new InterceptorTestHelper(new ColumnFilteringInterceptor.Builder(), contextMap);
+  }
+
+  @Test
+  public void testInterceptEvent()
+  {
+    helper.testIntercept_Event();
+  }
+
+  @Test
+  public void testFiles() throws IOException, URISyntaxException
+  {
+    helper.testFiles();
+  }
+
+  @Test
+  public void testInterceptEventWithColumnZero()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.DST_SEPARATOR, Byte.toString((byte)1));
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringInterceptor.Constants.COLUMNS, "0");
+
+    ColumnFilteringInterceptor.Builder builder = new ColumnFilteringInterceptor.Builder();
+    builder.configure(new Context(contextMap));
+    Interceptor interceptor = builder.build();
+
+    assertArrayEquals("Empty Bytes",
+        "\001".getBytes(),
+        interceptor.intercept(new InterceptorTestHelper.MyEvent("".getBytes())).getBody());
+
+    assertArrayEquals("One Field",
+        "First\001".getBytes(),
+        interceptor.intercept(new InterceptorTestHelper.MyEvent("First".getBytes())).getBody());
+
+    assertArrayEquals("Two Fields",
+        "\001".getBytes(),
+        interceptor.intercept(new InterceptorTestHelper.MyEvent("\002First".getBytes())).getBody());
+  }
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java b/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java
new file mode 100644
index 0000000..739184f
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/interceptor/InterceptorTestHelper.java
@@ -0,0 +1,214 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.interceptor;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.junit.Assert;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.interceptor.Interceptor;
+
+import com.datatorrent.netlet.util.Slice;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertNotNull;
+
+/**
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ */
+public class InterceptorTestHelper
+{
+  private static final byte FIELD_SEPARATOR = 1;
+
+  static class MyEvent implements Event
+  {
+    byte[] body;
+
+    MyEvent(byte[] bytes)
+    {
+      body = bytes;
+    }
+
+    @Override
+    public Map<String, String> getHeaders()
+    {
+      return null;
+    }
+
+    @Override
+    public void setHeaders(Map<String, String> map)
+    {
+    }
+
+    @Override
+    @SuppressWarnings("ReturnOfCollectionOrArrayField")
+    public byte[] getBody()
+    {
+      return body;
+    }
+
+    @Override
+    @SuppressWarnings("AssignmentToCollectionOrArrayFieldFromParameter")
+    public void setBody(byte[] bytes)
+    {
+      body = bytes;
+    }
+  }
+
+  private final Interceptor.Builder builder;
+  private final Map<String, String> context;
+
+  InterceptorTestHelper(Interceptor.Builder builder, Map<String, String> context)
+  {
+    this.builder = builder;
+    this.context = context;
+  }
+
+  public void testIntercept_Event()
+  {
+    builder.configure(new Context(context));
+    Interceptor interceptor = builder.build();
+
+    assertArrayEquals("Empty Bytes",
+        "\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("".getBytes())).getBody());
+
+    assertArrayEquals("One Separator",
+        "\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("\002".getBytes())).getBody());
+
+    assertArrayEquals("Two Separators",
+        "\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("\002\002".getBytes())).getBody());
+
+    assertArrayEquals("One Field",
+        "\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First".getBytes())).getBody());
+
+    assertArrayEquals("Two Fields",
+        "First\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("\002First".getBytes())).getBody());
+
+    assertArrayEquals("Two Fields",
+        "\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\001".getBytes())).getBody());
+
+    assertArrayEquals("Two Fields",
+        "Second\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\002Second".getBytes())).getBody());
+
+    assertArrayEquals("Three Fields",
+        "Second\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\002Second\002".getBytes())).getBody());
+
+    assertArrayEquals("Three Fields",
+        "\001Second\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\002\002Second".getBytes())).getBody());
+
+    assertArrayEquals("Four Fields",
+        "\001Second\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\002\002Second\002".getBytes())).getBody());
+
+    assertArrayEquals("Five Fields",
+        "\001Second\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\002\002Second\002\002".getBytes())).getBody());
+
+    assertArrayEquals("Six Fields",
+        "\001Second\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\002\002Second\002\002\002".getBytes())).getBody());
+  }
+
+  public void testFiles() throws IOException, URISyntaxException
+  {
+    Properties properties = new Properties();
+    properties.load(getClass().getResourceAsStream("/flume/conf/flume-conf.properties"));
+
+    String interceptor = null;
+    for (Entry<Object, Object> entry : properties.entrySet()) {
+      logger.debug("{} => {}", entry.getKey(), entry.getValue());
+
+      if (builder.getClass().getName().equals(entry.getValue().toString())) {
+        String key = entry.getKey().toString();
+        if (key.endsWith(".type")) {
+          interceptor = key.substring(0, key.length() - "type".length());
+          break;
+        }
+      }
+    }
+
+    assertNotNull(builder.getClass().getName(), interceptor);
+    @SuppressWarnings({"null", "ConstantConditions"})
+    final int interceptorLength = interceptor.length();
+
+    HashMap<String, String> map = new HashMap<String, String>();
+    for (Entry<Object, Object> entry : properties.entrySet()) {
+      String key = entry.getKey().toString();
+      if (key.startsWith(interceptor)) {
+        map.put(key.substring(interceptorLength), entry.getValue().toString());
+      }
+    }
+
+    builder.configure(new Context(map));
+    Interceptor interceptorInstance = builder.build();
+
+    URL url = getClass().getResource("/test_data/gentxns/");
+    assertNotNull("Generated Transactions", url);
+
+    int records = 0;
+    File dir = new File(url.toURI());
+    for (File file : dir.listFiles()) {
+      records += processFile(file, interceptorInstance);
+    }
+
+    Assert.assertEquals("Total Records", 2200, records);
+  }
+
+  private int processFile(File file, Interceptor interceptor) throws IOException
+  {
+    InputStream stream = getClass().getResourceAsStream("/test_data/gentxns/" + file.getName());
+    BufferedReader br = new BufferedReader(new InputStreamReader(stream));
+
+    String line;
+    int i = 0;
+    while ((line = br.readLine()) != null) {
+      byte[] body = interceptor.intercept(new MyEvent(line.getBytes())).getBody();
+      RawEvent event = RawEvent.from(body, FIELD_SEPARATOR);
+      Assert.assertEquals("GUID", new Slice(line.getBytes(), 0, 32), event.guid);
+      logger.debug("guid = {}, time = {}", event.guid, event.time);
+      i++;
+    }
+
+    br.close();
+    return i;
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(InterceptorTestHelper.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java b/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java
new file mode 100644
index 0000000..049609b
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/interceptor/RawEvent.java
@@ -0,0 +1,119 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.interceptor;
+
+import java.io.Serializable;
+
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ */
+public class RawEvent implements Serializable
+{
+  public Slice guid;
+  public long time;
+  public int dimensionsOffset;
+
+  public Slice getGUID()
+  {
+    return guid;
+  }
+
+  public long getTime()
+  {
+    return time;
+  }
+
+  RawEvent()
+  {
+    /* needed for Kryo serialization */
+  }
+
+  public static RawEvent from(byte[] row, byte separator)
+  {
+    final int rowsize = row.length;
+
+    /*
+     * Lets get the guid out of the current record
+     */
+    int sliceLengh = -1;
+    while (++sliceLengh < rowsize) {
+      if (row[sliceLengh] == separator) {
+        break;
+      }
+    }
+
+    int i = sliceLengh + 1;
+
+    /* lets parse the date */
+    int dateStart = i;
+    while (i < rowsize) {
+      if (row[i++] == separator) {
+        long time = DATE_PARSER.parseMillis(new String(row, dateStart, i - dateStart - 1));
+        RawEvent event = new RawEvent();
+        event.guid = new Slice(row, 0, sliceLengh);
+        event.time = time;
+        event.dimensionsOffset = i;
+        return event;
+      }
+    }
+
+    return null;
+  }
+
+  @Override
+  public int hashCode()
+  {
+    int hash = 5;
+    hash = 61 * hash + (this.guid != null ? this.guid.hashCode() : 0);
+    hash = 61 * hash + (int)(this.time ^ (this.time >>> 32));
+    return hash;
+  }
+
+  @Override
+  public String toString()
+  {
+    return "RawEvent{" + "guid=" + guid + ", time=" + time + '}';
+  }
+
+  @Override
+  public boolean equals(Object obj)
+  {
+    if (obj == null) {
+      return false;
+    }
+    if (getClass() != obj.getClass()) {
+      return false;
+    }
+    final RawEvent other = (RawEvent)obj;
+    if (this.guid != other.guid && (this.guid == null || !this.guid.equals(other.guid))) {
+      return false;
+    }
+    return this.time == other.time;
+  }
+
+  private static final DateTimeFormatter DATE_PARSER = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
+  private static final Logger logger = LoggerFactory.getLogger(RawEvent.class);
+  private static final long serialVersionUID = 201312191312L;
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java b/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java
new file mode 100644
index 0000000..a615496
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/operator/AbstractFlumeInputOperatorTest.java
@@ -0,0 +1,56 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.operator;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertTrue;
+
+/**
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ */
+public class AbstractFlumeInputOperatorTest
+{
+  public AbstractFlumeInputOperatorTest()
+  {
+  }
+
+  @Test
+  public void testThreadLocal()
+  {
+    ThreadLocal<Set<Integer>> tl = new ThreadLocal<Set<Integer>>()
+    {
+      @Override
+      protected Set<Integer> initialValue()
+      {
+        return new HashSet<Integer>();
+      }
+
+    };
+    Set<Integer> get1 = tl.get();
+    get1.add(1);
+    assertTrue("Just Added Value", get1.contains(1));
+
+    Set<Integer> get2 = tl.get();
+    assertTrue("Previously added value", get2.contains(1));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java b/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java
new file mode 100644
index 0000000..833a353
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/sink/DTFlumeSinkTest.java
@@ -0,0 +1,143 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.sink;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.channel.MemoryChannel;
+
+import com.datatorrent.flume.discovery.Discovery;
+import com.datatorrent.netlet.AbstractLengthPrependerClient;
+import com.datatorrent.netlet.DefaultEventLoop;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ */
+public class DTFlumeSinkTest
+{
+  static final String hostname = "localhost";
+  int port = 0;
+
+  @Test
+  @SuppressWarnings("SleepWhileInLoop")
+  public void testServer() throws InterruptedException, IOException
+  {
+    Discovery<byte[]> discovery = new Discovery<byte[]>()
+    {
+      @Override
+      public synchronized void unadvertise(Service<byte[]> service)
+      {
+        notify();
+      }
+
+      @Override
+      public synchronized void advertise(Service<byte[]> service)
+      {
+        port = service.getPort();
+        logger.debug("listening at {}", service);
+        notify();
+      }
+
+      @Override
+      @SuppressWarnings("unchecked")
+      public synchronized Collection<Service<byte[]>> discover()
+      {
+        try {
+          wait();
+        } catch (InterruptedException ie) {
+          throw new RuntimeException(ie);
+        }
+        return Collections.EMPTY_LIST;
+      }
+
+    };
+    DTFlumeSink sink = new DTFlumeSink();
+    sink.setName("TeskSink");
+    sink.setHostname(hostname);
+    sink.setPort(0);
+    sink.setAcceptedTolerance(2000);
+    sink.setChannel(new MemoryChannel());
+    sink.setDiscovery(discovery);
+    sink.start();
+    AbstractLengthPrependerClient client = new AbstractLengthPrependerClient()
+    {
+      private byte[] array;
+      private int offset = 2;
+
+      @Override
+      public void onMessage(byte[] buffer, int offset, int size)
+      {
+        Slice received = new Slice(buffer, offset, size);
+        logger.debug("Client Received = {}", received);
+        Assert.assertEquals(received,
+            new Slice(Arrays.copyOfRange(array, this.offset, array.length), 0, Server.Request.FIXED_SIZE));
+        synchronized (DTFlumeSinkTest.this) {
+          DTFlumeSinkTest.this.notify();
+        }
+      }
+
+      @Override
+      public void connected()
+      {
+        super.connected();
+        array = new byte[Server.Request.FIXED_SIZE + offset];
+        array[offset] = Server.Command.ECHO.getOrdinal();
+        array[offset + 1] = 1;
+        array[offset + 2] = 2;
+        array[offset + 3] = 3;
+        array[offset + 4] = 4;
+        array[offset + 5] = 5;
+        array[offset + 6] = 6;
+        array[offset + 7] = 7;
+        array[offset + 8] = 8;
+        Server.writeLong(array, offset + Server.Request.TIME_OFFSET, System.currentTimeMillis());
+        write(array, offset, Server.Request.FIXED_SIZE);
+      }
+
+    };
+
+    DefaultEventLoop eventloop = new DefaultEventLoop("Eventloop-TestClient");
+    eventloop.start();
+    discovery.discover();
+    try {
+      eventloop.connect(new InetSocketAddress(hostname, port), client);
+      try {
+        synchronized (this) {
+          this.wait();
+        }
+      } finally {
+        eventloop.disconnect(client);
+      }
+    } finally {
+      eventloop.stop();
+    }
+
+    sink.stop();
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(DTFlumeSinkTest.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java b/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java
new file mode 100644
index 0000000..64495db
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/sink/ServerTest.java
@@ -0,0 +1,92 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.sink;
+
+import java.util.Random;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ */
+public class ServerTest
+{
+  byte[] array;
+
+  public ServerTest()
+  {
+    array = new byte[1024];
+  }
+
+  @Test
+  public void testInt()
+  {
+    Server.writeInt(array, 0, Integer.MAX_VALUE);
+    Assert.assertEquals("Max Integer", Integer.MAX_VALUE, Server.readInt(array, 0));
+
+    Server.writeInt(array, 0, Integer.MIN_VALUE);
+    Assert.assertEquals("Min Integer", Integer.MIN_VALUE, Server.readInt(array, 0));
+
+    Server.writeInt(array, 0, 0);
+    Assert.assertEquals("Zero Integer", 0, Server.readInt(array, 0));
+
+    Random rand = new Random();
+    for (int i = 0; i < 128; i++) {
+      int n = rand.nextInt();
+      if (rand.nextBoolean()) {
+        n = -n;
+      }
+      Server.writeInt(array, 0, n);
+      Assert.assertEquals("Random Integer", n, Server.readInt(array, 0));
+    }
+  }
+
+  @Test
+  public void testLong()
+  {
+    Server.writeLong(array, 0, Integer.MAX_VALUE);
+    Assert.assertEquals("Max Integer", Integer.MAX_VALUE, Server.readLong(array, 0));
+
+    Server.writeLong(array, 0, Integer.MIN_VALUE);
+    Assert.assertEquals("Min Integer", Integer.MIN_VALUE, Server.readLong(array, 0));
+
+    Server.writeLong(array, 0, 0);
+    Assert.assertEquals("Zero Integer", 0L, Server.readLong(array, 0));
+
+    Server.writeLong(array, 0, Long.MAX_VALUE);
+    Assert.assertEquals("Max Long", Long.MAX_VALUE, Server.readLong(array, 0));
+
+    Server.writeLong(array, 0, Long.MIN_VALUE);
+    Assert.assertEquals("Min Long", Long.MIN_VALUE, Server.readLong(array, 0));
+
+    Server.writeLong(array, 0, 0L);
+    Assert.assertEquals("Zero Long", 0L, Server.readLong(array, 0));
+
+    Random rand = new Random();
+    for (int i = 0; i < 128; i++) {
+      long n = rand.nextLong();
+      if (rand.nextBoolean()) {
+        n = -n;
+      }
+      Server.writeLong(array, 0, n);
+      Assert.assertEquals("Random Long", n, Server.readLong(array, 0));
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/flume/conf/flume-conf.properties
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/flume/conf/flume-conf.properties b/flume/src/test/resources/flume/conf/flume-conf.properties
new file mode 100644
index 0000000..c892c53
--- /dev/null
+++ b/flume/src/test/resources/flume/conf/flume-conf.properties
@@ -0,0 +1,85 @@
+#
+# Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+#agent1 on  node1
+agent1.channels = ch1
+agent1.sources = netcatSource
+agent1.sinks = dt
+
+# channels
+agent1.channels.ch1.type = file
+agent1.channels.ch1.capacity = 10000000
+agent1.channels.ch1.transactionCapacity = 10000
+agent1.channels.ch1.maxFileSize = 67108864
+
+agent1.sources.netcatSource.type = exec
+agent1.sources.netcatSource.channels = ch1
+agent1.sources.netcatSource.command = src/test/bash/subcat_periodically src/test/resources/test_data/dt_spend 10000 1
+# Pick and Reorder the columns we need from a larger record for efficiency
+  agent1.sources.netcatSource.interceptors = columnchooser
+  agent1.sources.netcatSource.interceptors.columnchooser.type = com.datatorrent.flume.interceptor.ColumnFilteringInterceptor$Builder
+  agent1.sources.netcatSource.interceptors.columnchooser.srcSeparator = 2
+  agent1.sources.netcatSource.interceptors.columnchooser.dstSeparator = 1
+  agent1.sources.netcatSource.interceptors.columnchooser.columns = 0 43 62 69 68 139 190 70 71 52 75 37 39 42 191 138
+
+ agent2.sources.netcatSource.interceptors.columnchooser.type = com.datatorrent.flume.interceptor.ColumnFilteringFormattingInterceptor$Builder
+ agent2.sources.netcatSource.interceptors.columnchooser.srcSeparator = 2
+ agent2.sources.netcatSource.interceptors.columnchooser.columnsFormatter = {0}\u0001{43}\u0001{62}\u0001{69}\u0001{68}\u0001{139}\u0001{190}\u0001{70}\u0001{71}\u0001{52}\u0001{75}\u0001{37}\u0001{39}\u0001{42}\u0001{191}\u0001{138}\u0001
+
+# index  -- description -- type if different
+#  0 Slice guid; // long
+#  43 public long time // yyyy-MM-dd HH:mm:ss
+#  62 public long adv_id;
+#  69 public int cmp_type; // string
+#  68 public long cmp_id;
+#  139 public long line_id;
+#  190 public long bslice_id;
+#  70 public long ao_id;
+#  71 public long creative_id;
+#  52 public long algo_id;
+#  75 public int device_model_id; // string
+#  37 public long impressions;
+#  39 public long clicks;
+#  42 public double spend;
+#  191 public double bonus_spend;
+#  138 public double spend_local;
+#
+
+# first sink - dt
+agent1.sinks.dt.id = CEVL00P
+agent1.sinks.dt.type = com.datatorrent.flume.sink.DTFlumeSink
+agent1.sinks.dt.hostname = localhost
+agent1.sinks.dt.port = 8080
+agent1.sinks.dt.sleepMillis = 7
+agent1.sinks.dt.throughputAdjustmentFactor = 2
+agent1.sinks.dt.maximumEventsPerTransaction = 5000
+agent1.sinks.dt.minimumEventsPerTransaction = 1
+
+# Ensure that we do not lose the data handed over to us by flume.
+    agent1.sinks.dt.storage = com.datatorrent.flume.storage.HDFSStorage
+    agent1.sinks.dt.storage.restore = false
+    agent1.sinks.dt.storage.baseDir = /tmp/flume101
+    agent1.sinks.dt.channel = ch1
+
+# Ensure that we are able to detect flume sinks (and failures) automatically.
+   agent1.sinks.dt.discovery = com.datatorrent.flume.discovery.ZKAssistedDiscovery
+   agent1.sinks.dt.discovery.connectionString = 127.0.0.1:2181
+   agent1.sinks.dt.discovery.basePath = /HelloDT
+   agent1.sinks.dt.discovery.connectionTimeoutMillis = 1000
+   agent1.sinks.dt.discovery.connectionRetryCount = 10
+   agent1.sinks.dt.discovery.connectionRetrySleepMillis = 500
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/flume/conf/flume-env.sh
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/flume/conf/flume-env.sh b/flume/src/test/resources/flume/conf/flume-env.sh
new file mode 100644
index 0000000..c2232ea
--- /dev/null
+++ b/flume/src/test/resources/flume/conf/flume-env.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+#
+# Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+
+# This script runs on the machine which have maven repository populated under
+# $HOME/.m2 If that's not the case, please adjust the JARPATH variable below
+# to point to colon separated list of directories where jar files can be found
+if test -z "$DT_FLUME_JAR"
+then
+  echo [ERROR]: Environment variable DT_FLUME_JAR should point to a valid jar file which contains DTFlumeSink class >&2
+  exit 2
+fi
+
+echo JARPATH is set to ${JARPATH:=$HOME/.m2/repository:.}
+if test -z "$JAVA_HOME"
+then
+  JAVA=java
+else
+  JAVA=${JAVA_HOME}/bin/java
+fi
+FLUME_CLASSPATH=`JARPATH=$JARPATH $JAVA -cp $DT_FLUME_JAR com.datatorrent.jarpath.JarPath -N $DT_FLUME_JAR -Xdt-jarpath -Xdt-netlet`
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/log4j.properties b/flume/src/test/resources/log4j.properties
new file mode 100644
index 0000000..ac0a107
--- /dev/null
+++ b/flume/src/test/resources/log4j.properties
@@ -0,0 +1,38 @@
+#
+# Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+log4j.rootLogger=INFO,CONSOLE
+
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n
+
+log4j.appender.malhar=org.apache.log4j.RollingFileAppender
+log4j.appender.malhar.layout=org.apache.log4j.PatternLayout
+log4j.appender.malhar.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n
+#log4j.appender.malhar.File=/tmp/app.log
+
+# to enable, add SYSLOG to rootLogger
+log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender
+log4j.appender.SYSLOG.syslogHost=127.0.0.1
+log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout
+log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n
+log4j.appender.SYSLOG.Facility=LOCAL1
+
+#log4j.logger.org=info
+#log4j.logger.org.apache.commons.beanutils=warn
+log4j.logger.com.datatorrent=debug

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121500
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121500 b/flume/src/test/resources/test_data/gentxns/2013121500
new file mode 100644
index 0000000..3ce5646
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121500 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121501
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121501 b/flume/src/test/resources/test_data/gentxns/2013121501
new file mode 100644
index 0000000..b2e70c0
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121501 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121502
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121502 b/flume/src/test/resources/test_data/gentxns/2013121502
new file mode 100644
index 0000000..ec13862
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121502 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121503
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121503 b/flume/src/test/resources/test_data/gentxns/2013121503
new file mode 100644
index 0000000..8267dd3
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121503 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121504
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121504 b/flume/src/test/resources/test_data/gentxns/2013121504
new file mode 100644
index 0000000..addfe62
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121504 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121505
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121505 b/flume/src/test/resources/test_data/gentxns/2013121505
new file mode 100644
index 0000000..d76aa9f
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121505 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121506
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121506 b/flume/src/test/resources/test_data/gentxns/2013121506
new file mode 100644
index 0000000..2f5bbb6
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121506 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121507
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121507 b/flume/src/test/resources/test_data/gentxns/2013121507
new file mode 100644
index 0000000..a022dad
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121507 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121508
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121508 b/flume/src/test/resources/test_data/gentxns/2013121508
new file mode 100644
index 0000000..d1e7f5c
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121508 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121509
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121509 b/flume/src/test/resources/test_data/gentxns/2013121509
new file mode 100644
index 0000000..10d61de
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121509 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121510
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121510 b/flume/src/test/resources/test_data/gentxns/2013121510
new file mode 100644
index 0000000..c2f76c8
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121510 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121511
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121511 b/flume/src/test/resources/test_data/gentxns/2013121511
new file mode 100644
index 0000000..bf16cfe
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121511 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121512
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121512 b/flume/src/test/resources/test_data/gentxns/2013121512
new file mode 100644
index 0000000..fe75419
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121512 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121513
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121513 b/flume/src/test/resources/test_data/gentxns/2013121513
new file mode 100644
index 0000000..3094cae
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121513 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121514
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121514 b/flume/src/test/resources/test_data/gentxns/2013121514
new file mode 100644
index 0000000..6e00e4a
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121514 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121515
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121515 b/flume/src/test/resources/test_data/gentxns/2013121515
new file mode 100644
index 0000000..b860e43
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121515 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121516
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121516 b/flume/src/test/resources/test_data/gentxns/2013121516
new file mode 100644
index 0000000..dfb5854
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121516 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121517
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121517 b/flume/src/test/resources/test_data/gentxns/2013121517
new file mode 100644
index 0000000..c8da2cc
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121517 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121518
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121518 b/flume/src/test/resources/test_data/gentxns/2013121518
new file mode 100644
index 0000000..2cb628b
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121518 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121519
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121519 b/flume/src/test/resources/test_data/gentxns/2013121519
new file mode 100644
index 0000000..6fab9d9
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121519 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121520
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121520 b/flume/src/test/resources/test_data/gentxns/2013121520
new file mode 100644
index 0000000..ba56d49
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121520 differ

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/test/resources/test_data/gentxns/2013121521
----------------------------------------------------------------------
diff --git a/flume/src/test/resources/test_data/gentxns/2013121521 b/flume/src/test/resources/test_data/gentxns/2013121521
new file mode 100644
index 0000000..37de926
Binary files /dev/null and b/flume/src/test/resources/test_data/gentxns/2013121521 differ


[06/13] apex-malhar git commit: Changed package path for files to be included under malhar. Modifications to build files for project to build under malhar.

Posted by th...@apache.org.
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/sink/Server.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/sink/Server.java b/flume/src/main/java/com/datatorrent/flume/sink/Server.java
deleted file mode 100644
index 03c1ff0..0000000
--- a/flume/src/main/java/com/datatorrent/flume/sink/Server.java
+++ /dev/null
@@ -1,420 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.sink;
-
-import java.net.InetSocketAddress;
-import java.nio.channels.SelectionKey;
-import java.nio.channels.ServerSocketChannel;
-import java.nio.channels.SocketChannel;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.datatorrent.flume.discovery.Discovery;
-import com.datatorrent.flume.discovery.Discovery.Service;
-import com.datatorrent.netlet.AbstractLengthPrependerClient;
-import com.datatorrent.netlet.AbstractServer;
-import com.datatorrent.netlet.EventLoop;
-import com.datatorrent.netlet.util.Slice;
-
-/**
- * <p>
- * Server class.</p>
- *
- * @since 0.9.2
- */
-public class Server extends AbstractServer
-{
-  private final String id;
-  private final Discovery<byte[]> discovery;
-  private final long acceptedTolerance;
-
-  public Server(String id, Discovery<byte[]> discovery, long acceptedTolerance)
-  {
-    this.id = id;
-    this.discovery = discovery;
-    this.acceptedTolerance = acceptedTolerance;
-  }
-
-  @Override
-  public void handleException(Exception cce, EventLoop el)
-  {
-    logger.error("Server Error", cce);
-    Request r = new Request(Command.SERVER_ERROR, null)
-    {
-      @Override
-      public Slice getAddress()
-      {
-        throw new UnsupportedOperationException("Not supported yet.");
-      }
-
-      @Override
-      public int getEventCount()
-      {
-        throw new UnsupportedOperationException("Not supported yet.");
-      }
-
-      @Override
-      public int getIdleCount()
-      {
-        throw new UnsupportedOperationException("Not supported yet.");
-      }
-
-    };
-    synchronized (requests) {
-      requests.add(r);
-    }
-  }
-
-  private final Service<byte[]> service = new Service<byte[]>()
-  {
-    @Override
-    public String getHost()
-    {
-      return ((InetSocketAddress)getServerAddress()).getHostName();
-    }
-
-    @Override
-    public int getPort()
-    {
-      return ((InetSocketAddress)getServerAddress()).getPort();
-    }
-
-    @Override
-    public byte[] getPayload()
-    {
-      return null;
-    }
-
-    @Override
-    public String getId()
-    {
-      return id;
-    }
-
-    @Override
-    public String toString()
-    {
-      return "Server.Service{id=" + id + ", host=" + getHost() + ", port=" + getPort() + ", payload=" +
-          Arrays.toString(getPayload()) + '}';
-    }
-
-  };
-
-  @Override
-  public void unregistered(final SelectionKey key)
-  {
-    discovery.unadvertise(service);
-    super.unregistered(key);
-  }
-
-  @Override
-  public void registered(final SelectionKey key)
-  {
-    super.registered(key);
-    discovery.advertise(service);
-  }
-
-  public enum Command
-  {
-    ECHO((byte)0),
-    SEEK((byte)1),
-    COMMITTED((byte)2),
-    CHECKPOINTED((byte)3),
-    CONNECTED((byte)4),
-    DISCONNECTED((byte)5),
-    WINDOWED((byte)6),
-    SERVER_ERROR((byte)7);
-
-    Command(byte b)
-    {
-      this.ord = b;
-    }
-
-    public byte getOrdinal()
-    {
-      return ord;
-    }
-
-    public static Command getCommand(byte b)
-    {
-      Command c;
-      switch (b) {
-        case 0:
-          c = ECHO;
-          break;
-
-        case 1:
-          c = SEEK;
-          break;
-
-        case 2:
-          c = COMMITTED;
-          break;
-
-        case 3:
-          c = CHECKPOINTED;
-          break;
-
-        case 4:
-          c = CONNECTED;
-          break;
-
-        case 5:
-          c = DISCONNECTED;
-          break;
-
-        case 6:
-          c = WINDOWED;
-          break;
-
-        case 7:
-          c = SERVER_ERROR;
-          break;
-
-        default:
-          throw new IllegalArgumentException(String.format("No Command defined for ordinal %b", b));
-      }
-
-      assert (b == c.ord);
-      return c;
-    }
-
-    private final byte ord;
-  }
-
-  public final ArrayList<Request> requests = new ArrayList<Request>(4);
-
-  @Override
-  public ClientListener getClientConnection(SocketChannel sc, ServerSocketChannel ssc)
-  {
-    Client lClient = new Client();
-    lClient.connected();
-    return lClient;
-  }
-
-  public class Client extends AbstractLengthPrependerClient
-  {
-
-    @Override
-    public void onMessage(byte[] buffer, int offset, int size)
-    {
-      if (size != Request.FIXED_SIZE) {
-        logger.warn("Invalid Request Received: {} from {}", Arrays.copyOfRange(buffer, offset, offset + size),
-            key.channel());
-        return;
-      }
-
-      long requestTime = Server.readLong(buffer, offset + Request.TIME_OFFSET);
-      if (System.currentTimeMillis() > (requestTime + acceptedTolerance)) {
-        logger.warn("Expired Request Received: {} from {}", Arrays.copyOfRange(buffer, offset, offset + size),
-            key.channel());
-        return;
-      }
-
-      try {
-        if (Command.getCommand(buffer[offset]) == Command.ECHO) {
-          write(buffer, offset, size);
-          return;
-        }
-      } catch (IllegalArgumentException ex) {
-        logger.warn("Invalid Request Received: {} from {}!", Arrays.copyOfRange(buffer, offset, offset + size),
-            key.channel(), ex);
-        return;
-      }
-
-      Request r = Request.getRequest(buffer, offset, this);
-      synchronized (requests) {
-        requests.add(r);
-      }
-    }
-
-    @Override
-    public void disconnected()
-    {
-      synchronized (requests) {
-        requests.add(Request.getRequest(
-            new byte[] {Command.DISCONNECTED.getOrdinal(), 0, 0, 0, 0, 0, 0, 0, 0}, 0, this));
-      }
-      super.disconnected();
-    }
-
-    public boolean write(byte[] address, Slice event)
-    {
-      if (event.offset == 0 && event.length == event.buffer.length) {
-        return write(address, event.buffer);
-      }
-
-      // a better method would be to replace the write implementation and allow it to natively support writing slices
-      return write(address, event.toByteArray());
-    }
-
-  }
-
-  public abstract static class Request
-  {
-    public static final int FIXED_SIZE = 17;
-    public static final int TIME_OFFSET = 9;
-    public final Command type;
-    public final Client client;
-
-    public Request(Command type, Client client)
-    {
-      this.type = type;
-      this.client = client;
-    }
-
-    public abstract Slice getAddress();
-
-    public abstract int getEventCount();
-
-    public abstract int getIdleCount();
-
-    @Override
-    public String toString()
-    {
-      return "Request{" + "type=" + type + '}';
-    }
-
-    public static Request getRequest(final byte[] buffer, final int offset, Client client)
-    {
-      Command command = Command.getCommand(buffer[offset]);
-      switch (command) {
-        case WINDOWED:
-          return new Request(Command.WINDOWED, client)
-          {
-            final int eventCount;
-            final int idleCount;
-
-            {
-              eventCount = Server.readInt(buffer, offset + 1);
-              idleCount = Server.readInt(buffer, offset + 5);
-            }
-
-            @Override
-            public Slice getAddress()
-            {
-              throw new UnsupportedOperationException();
-            }
-
-            @Override
-            public int getEventCount()
-            {
-              return eventCount;
-            }
-
-            @Override
-            public int getIdleCount()
-            {
-              return idleCount;
-            }
-
-            @Override
-            public String toString()
-            {
-              return "Request{" + "type=" + type + ", eventCount=" + eventCount + ", idleCount=" + idleCount + '}';
-            }
-
-          };
-
-        default:
-          return new Request(command, client)
-          {
-            final Slice address;
-
-            {
-              address = new Slice(buffer, offset + 1, 8);
-            }
-
-            @Override
-            public Slice getAddress()
-            {
-              return address;
-            }
-
-            @Override
-            public int getEventCount()
-            {
-              throw new UnsupportedOperationException();
-            }
-
-            @Override
-            public int getIdleCount()
-            {
-              throw new UnsupportedOperationException();
-            }
-
-            @Override
-            public String toString()
-            {
-              return "Request{" + "type=" + type + ", address=" + address + '}';
-            }
-
-          };
-
-      }
-
-    }
-
-  }
-
-  public static int readInt(byte[] buffer, int offset)
-  {
-    return buffer[offset++] & 0xff
-           | (buffer[offset++] & 0xff) << 8
-           | (buffer[offset++] & 0xff) << 16
-           | (buffer[offset++] & 0xff) << 24;
-  }
-
-  public static void writeInt(byte[] buffer, int offset, int i)
-  {
-    buffer[offset++] = (byte)i;
-    buffer[offset++] = (byte)(i >>> 8);
-    buffer[offset++] = (byte)(i >>> 16);
-    buffer[offset++] = (byte)(i >>> 24);
-  }
-
-  public static long readLong(byte[] buffer, int offset)
-  {
-    return (long)buffer[offset++] & 0xff
-           | (long)(buffer[offset++] & 0xff) << 8
-           | (long)(buffer[offset++] & 0xff) << 16
-           | (long)(buffer[offset++] & 0xff) << 24
-           | (long)(buffer[offset++] & 0xff) << 32
-           | (long)(buffer[offset++] & 0xff) << 40
-           | (long)(buffer[offset++] & 0xff) << 48
-           | (long)(buffer[offset++] & 0xff) << 56;
-  }
-
-  public static void writeLong(byte[] buffer, int offset, long l)
-  {
-    buffer[offset++] = (byte)l;
-    buffer[offset++] = (byte)(l >>> 8);
-    buffer[offset++] = (byte)(l >>> 16);
-    buffer[offset++] = (byte)(l >>> 24);
-    buffer[offset++] = (byte)(l >>> 32);
-    buffer[offset++] = (byte)(l >>> 40);
-    buffer[offset++] = (byte)(l >>> 48);
-    buffer[offset++] = (byte)(l >>> 56);
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(Server.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java b/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java
deleted file mode 100644
index 72e1913..0000000
--- a/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java
+++ /dev/null
@@ -1,224 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.source;
-
-import java.io.BufferedReader;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.SimpleDateFormat;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.List;
-import java.util.Timer;
-import java.util.TimerTask;
-
-import javax.annotation.Nonnull;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-import org.apache.flume.Context;
-import org.apache.flume.Event;
-import org.apache.flume.EventDrivenSource;
-import org.apache.flume.channel.ChannelProcessor;
-import org.apache.flume.conf.Configurable;
-import org.apache.flume.event.EventBuilder;
-import org.apache.flume.source.AbstractSource;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
-
-import com.google.common.base.Preconditions;
-import com.google.common.base.Strings;
-import com.google.common.collect.Lists;
-
-/**
- * <p>TestSource class.</p>
- *
- * @since 0.9.4
- */
-public class HdfsTestSource extends AbstractSource implements EventDrivenSource, Configurable
-{
-  public static final String SOURCE_DIR = "sourceDir";
-  public static final String RATE = "rate";
-  public static final String INIT_DATE = "initDate";
-
-  static byte FIELD_SEPARATOR = 2;
-  public Timer emitTimer;
-  @Nonnull
-  String directory;
-  Path directoryPath;
-  int rate;
-  String initDate;
-  long initTime;
-  List<String> dataFiles;
-  long oneDayBack;
-
-  private transient BufferedReader br = null;
-  protected transient FileSystem fs;
-  private transient Configuration configuration;
-
-  private transient int currentFile = 0;
-  private transient boolean finished;
-  private List<Event> events;
-
-  public HdfsTestSource()
-  {
-    super();
-    this.rate = 2500;
-    dataFiles = Lists.newArrayList();
-    Calendar calendar = Calendar.getInstance();
-    calendar.add(Calendar.DATE, -1);
-    oneDayBack = calendar.getTimeInMillis();
-    configuration = new Configuration();
-    events = Lists.newArrayList();
-  }
-
-  @Override
-  public void configure(Context context)
-  {
-    directory = context.getString(SOURCE_DIR);
-    rate = context.getInteger(RATE, rate);
-    initDate = context.getString(INIT_DATE);
-
-    Preconditions.checkArgument(!Strings.isNullOrEmpty(directory));
-    directoryPath = new Path(directory);
-
-    String[] parts = initDate.split("-");
-    Preconditions.checkArgument(parts.length == 3);
-    Calendar calendar = Calendar.getInstance();
-    calendar.set(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]) - 1, Integer.parseInt(parts[2]), 0, 0, 0);
-    initTime = calendar.getTimeInMillis();
-
-    try {
-      List<String> files = findFiles();
-      for (String file : files) {
-        dataFiles.add(file);
-      }
-      if (logger.isDebugEnabled()) {
-        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
-        logger.debug("settings {} {} {} {} {}", directory, rate, dateFormat.format(oneDayBack),
-            dateFormat.format(new Date(initTime)), currentFile);
-        for (String file : dataFiles) {
-          logger.debug("settings add file {}", file);
-        }
-      }
-
-      fs = FileSystem.newInstance(new Path(directory).toUri(), configuration);
-      Path filePath = new Path(dataFiles.get(currentFile));
-      br = new BufferedReader(new InputStreamReader(new GzipCompressorInputStream(fs.open(filePath))));
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-
-    finished = true;
-
-  }
-
-  private List<String> findFiles() throws IOException
-  {
-    List<String> files = Lists.newArrayList();
-    Path directoryPath = new Path(directory);
-    FileSystem lfs = FileSystem.newInstance(directoryPath.toUri(), configuration);
-    try {
-      logger.debug("checking for new files in {}", directoryPath);
-      RemoteIterator<LocatedFileStatus> statuses = lfs.listFiles(directoryPath, true);
-      for (; statuses.hasNext(); ) {
-        FileStatus status = statuses.next();
-        Path path = status.getPath();
-        String filePathStr = path.toString();
-        if (!filePathStr.endsWith(".gz")) {
-          continue;
-        }
-        logger.debug("new file {}", filePathStr);
-        files.add(path.toString());
-      }
-    } catch (FileNotFoundException e) {
-      logger.warn("Failed to list directory {}", directoryPath, e);
-      throw new RuntimeException(e);
-    } finally {
-      lfs.close();
-    }
-    return files;
-  }
-
-  @Override
-  public void start()
-  {
-    super.start();
-    emitTimer = new Timer();
-
-    final ChannelProcessor channelProcessor = getChannelProcessor();
-    emitTimer.scheduleAtFixedRate(new TimerTask()
-    {
-      @Override
-      public void run()
-      {
-        int lineCount = 0;
-        events.clear();
-        try {
-          while (lineCount < rate && !finished) {
-            String line = br.readLine();
-
-            if (line == null) {
-              logger.debug("completed file {}", currentFile);
-              br.close();
-              currentFile++;
-              if (currentFile == dataFiles.size()) {
-                logger.info("finished all files");
-                finished = true;
-                break;
-              }
-              Path filePath = new Path(dataFiles.get(currentFile));
-              br = new BufferedReader(new InputStreamReader(new GzipCompressorInputStream(fs.open(filePath))));
-              logger.info("opening file {}. {}", currentFile, filePath);
-              continue;
-            }
-            lineCount++;
-            Event flumeEvent = EventBuilder.withBody(line.getBytes());
-            events.add(flumeEvent);
-          }
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        }
-        if (events.size() > 0) {
-          channelProcessor.processEventBatch(events);
-        }
-        if (finished) {
-          emitTimer.cancel();
-        }
-      }
-
-    }, 0, 1000);
-  }
-
-  @Override
-  public void stop()
-  {
-    emitTimer.cancel();
-    super.stop();
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(HdfsTestSource.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/source/TestSource.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/source/TestSource.java b/flume/src/main/java/com/datatorrent/flume/source/TestSource.java
deleted file mode 100644
index 5773de3..0000000
--- a/flume/src/main/java/com/datatorrent/flume/source/TestSource.java
+++ /dev/null
@@ -1,250 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.source;
-
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.SimpleDateFormat;
-import java.util.Calendar;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Random;
-import java.util.Set;
-import java.util.Timer;
-import java.util.TimerTask;
-
-import javax.annotation.Nonnull;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flume.Context;
-import org.apache.flume.Event;
-import org.apache.flume.EventDrivenSource;
-import org.apache.flume.channel.ChannelProcessor;
-import org.apache.flume.conf.Configurable;
-import org.apache.flume.event.EventBuilder;
-import org.apache.flume.source.AbstractSource;
-
-import com.google.common.base.Preconditions;
-import com.google.common.base.Strings;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-/**
- * <p>TestSource class.</p>
- *
- * @since 0.9.4
- */
-public class TestSource extends AbstractSource implements EventDrivenSource, Configurable
-{
-  public static final String SOURCE_FILE = "sourceFile";
-  public static final String LINE_NUMBER = "lineNumber";
-  public static final String RATE = "rate";
-  public static final String PERCENT_PAST_EVENTS = "percentPastEvents";
-  static byte FIELD_SEPARATOR = 1;
-  static int DEF_PERCENT_PAST_EVENTS = 5;
-  public Timer emitTimer;
-  @Nonnull
-  String filePath;
-  int rate;
-  int numberOfPastEvents;
-  transient List<Row> cache;
-  private transient int startIndex;
-  private transient Random random;
-  private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
-  private SimpleDateFormat timeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-
-  public TestSource()
-  {
-    super();
-    this.rate = 2500;
-    this.numberOfPastEvents = DEF_PERCENT_PAST_EVENTS * 25;
-    this.random = new Random();
-
-  }
-
-  @Override
-  public void configure(Context context)
-  {
-    filePath = context.getString(SOURCE_FILE);
-    rate = context.getInteger(RATE, rate);
-    int percentPastEvents = context.getInteger(PERCENT_PAST_EVENTS, DEF_PERCENT_PAST_EVENTS);
-    Preconditions.checkArgument(!Strings.isNullOrEmpty(filePath));
-    try {
-      BufferedReader lineReader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath)));
-      try {
-        buildCache(lineReader);
-      } finally {
-        lineReader.close();
-      }
-    } catch (FileNotFoundException e) {
-      throw new RuntimeException(e);
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-
-    if (DEF_PERCENT_PAST_EVENTS != percentPastEvents) {
-      numberOfPastEvents = (int)(percentPastEvents / 100.0 * cache.size());
-    }
-  }
-
-  @Override
-  public void start()
-  {
-    super.start();
-    emitTimer = new Timer();
-
-    final ChannelProcessor channel = getChannelProcessor();
-    final int cacheSize = cache.size();
-    emitTimer.scheduleAtFixedRate(new TimerTask()
-    {
-      @Override
-      public void run()
-      {
-        int lastIndex = startIndex + rate;
-        if (lastIndex > cacheSize) {
-          lastIndex -= cacheSize;
-          processBatch(channel, cache.subList(startIndex, cacheSize));
-          startIndex = 0;
-          while (lastIndex > cacheSize) {
-            processBatch(channel, cache);
-            lastIndex -= cacheSize;
-          }
-          processBatch(channel, cache.subList(0, lastIndex));
-        } else {
-          processBatch(channel, cache.subList(startIndex, lastIndex));
-        }
-        startIndex = lastIndex;
-      }
-
-    }, 0, 1000);
-  }
-
-  private void processBatch(ChannelProcessor channelProcessor, List<Row> rows)
-  {
-    if (rows.isEmpty()) {
-      return;
-    }
-
-    int noise = random.nextInt(numberOfPastEvents + 1);
-    Set<Integer> pastIndices = Sets.newHashSet();
-    for (int i = 0; i < noise; i++) {
-      pastIndices.add(random.nextInt(rows.size()));
-    }
-
-    Calendar calendar = Calendar.getInstance();
-    long high = calendar.getTimeInMillis();
-    calendar.add(Calendar.DATE, -2);
-    long low = calendar.getTimeInMillis();
-
-
-
-    List<Event> events = Lists.newArrayList();
-    for (int i = 0; i < rows.size(); i++) {
-      Row eventRow = rows.get(i);
-      if (pastIndices.contains(i)) {
-        long pastTime = (long)((Math.random() * (high - low)) + low);
-        byte[] pastDateField = dateFormat.format(pastTime).getBytes();
-        byte[] pastTimeField = timeFormat.format(pastTime).getBytes();
-
-        System.arraycopy(pastDateField, 0, eventRow.bytes, eventRow.dateFieldStart, pastDateField.length);
-        System.arraycopy(pastTimeField, 0, eventRow.bytes, eventRow.timeFieldStart, pastTimeField.length);
-      } else {
-        calendar.setTimeInMillis(System.currentTimeMillis());
-        byte[] currentDateField = dateFormat.format(calendar.getTime()).getBytes();
-        byte[] currentTimeField = timeFormat.format(calendar.getTime()).getBytes();
-
-        System.arraycopy(currentDateField, 0, eventRow.bytes, eventRow.dateFieldStart, currentDateField.length);
-        System.arraycopy(currentTimeField, 0, eventRow.bytes, eventRow.timeFieldStart, currentTimeField.length);
-      }
-
-      HashMap<String, String> headers = new HashMap<String, String>(2);
-      headers.put(SOURCE_FILE, filePath);
-      headers.put(LINE_NUMBER, String.valueOf(startIndex + i));
-      events.add(EventBuilder.withBody(eventRow.bytes, headers));
-    }
-    channelProcessor.processEventBatch(events);
-  }
-
-  @Override
-  public void stop()
-  {
-    emitTimer.cancel();
-    super.stop();
-  }
-
-  private void buildCache(BufferedReader lineReader) throws IOException
-  {
-    cache = Lists.newArrayListWithCapacity(rate);
-
-    String line;
-    while ((line = lineReader.readLine()) != null) {
-      byte[] row = line.getBytes();
-      Row eventRow = new Row(row);
-      final int rowsize = row.length;
-
-      /* guid */
-      int sliceLengh = -1;
-      while (++sliceLengh < rowsize) {
-        if (row[sliceLengh] == FIELD_SEPARATOR) {
-          break;
-        }
-      }
-      int recordStart = sliceLengh + 1;
-      int pointer = sliceLengh + 1;
-      while (pointer < rowsize) {
-        if (row[pointer++] == FIELD_SEPARATOR) {
-          eventRow.dateFieldStart = recordStart;
-          break;
-        }
-      }
-
-      /* lets parse the date */
-      int dateStart = pointer;
-      while (pointer < rowsize) {
-        if (row[pointer++] == FIELD_SEPARATOR) {
-          eventRow.timeFieldStart = dateStart;
-          break;
-        }
-      }
-
-      cache.add(eventRow);
-    }
-  }
-
-  private static class Row
-  {
-    final byte[] bytes;
-    int dateFieldStart;
-    int timeFieldStart;
-//    boolean past;
-
-    Row(byte[] bytes)
-    {
-      this.bytes = bytes;
-    }
-
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(TestSource.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java b/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java
deleted file mode 100644
index da94154..0000000
--- a/flume/src/main/java/com/datatorrent/flume/storage/DebugWrapper.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.storage;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flume.Context;
-import org.apache.flume.conf.Configurable;
-
-import com.datatorrent.api.Component;
-import com.datatorrent.netlet.util.Slice;
-
-/**
- * <p>DebugWrapper class.</p>
- *
- * @since 0.9.4
- */
-public class DebugWrapper implements Storage, Configurable, Component<com.datatorrent.api.Context>
-{
-  HDFSStorage storage = new HDFSStorage();
-
-  @Override
-  public byte[] store(Slice bytes)
-  {
-    byte[] ret = null;
-
-    try {
-      ret = storage.store(bytes);
-    } finally {
-      logger.debug("storage.store(new byte[]{{}});", bytes);
-    }
-
-    return ret;
-  }
-
-  @Override
-  public byte[] retrieve(byte[] identifier)
-  {
-    byte[] ret = null;
-
-    try {
-      ret = storage.retrieve(identifier);
-    } finally {
-      logger.debug("storage.retrieve(new byte[]{{}});", identifier);
-    }
-
-    return ret;
-  }
-
-  @Override
-  public byte[] retrieveNext()
-  {
-    byte[] ret = null;
-    try {
-      ret = storage.retrieveNext();
-    } finally {
-      logger.debug("storage.retrieveNext();");
-    }
-
-    return ret;
-  }
-
-  @Override
-  public void clean(byte[] identifier)
-  {
-    try {
-      storage.clean(identifier);
-    } finally {
-      logger.debug("storage.clean(new byte[]{{}});", identifier);
-    }
-  }
-
-  @Override
-  public void flush()
-  {
-    try {
-      storage.flush();
-    } finally {
-      logger.debug("storage.flush();");
-    }
-  }
-
-  @Override
-  public void configure(Context cntxt)
-  {
-    try {
-      storage.configure(cntxt);
-    } finally {
-      logger.debug("storage.configure({});", cntxt);
-    }
-  }
-
-  @Override
-  public void setup(com.datatorrent.api.Context t1)
-  {
-    try {
-      storage.setup(t1);
-    } finally {
-      logger.debug("storage.setup({});", t1);
-    }
-
-  }
-
-  @Override
-  public void teardown()
-  {
-    try {
-      storage.teardown();
-    } finally {
-      logger.debug("storage.teardown();");
-    }
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(DebugWrapper.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java b/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java
deleted file mode 100644
index 76f663c..0000000
--- a/flume/src/main/java/com/datatorrent/flume/storage/ErrorMaskingEventCodec.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.storage;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flume.Event;
-
-import com.datatorrent.netlet.util.Slice;
-
-/**
- * <p>ErrorMaskingEventCodec class.</p>
- *
- * @since 1.0.4
- */
-public class ErrorMaskingEventCodec extends EventCodec
-{
-
-  @Override
-  public Object fromByteArray(Slice fragment)
-  {
-    try {
-      return super.fromByteArray(fragment);
-    } catch (RuntimeException re) {
-      logger.warn("Cannot deserialize event {}", fragment, re);
-    }
-
-    return null;
-  }
-
-  @Override
-  public Slice toByteArray(Event event)
-  {
-    try {
-      return super.toByteArray(event);
-    } catch (RuntimeException re) {
-      logger.warn("Cannot serialize event {}", event, re);
-    }
-
-    return null;
-  }
-
-
-  private static final Logger logger = LoggerFactory.getLogger(ErrorMaskingEventCodec.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java b/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java
deleted file mode 100644
index 0ece548..0000000
--- a/flume/src/main/java/com/datatorrent/flume/storage/EventCodec.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.storage;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flume.Event;
-import org.apache.flume.event.EventBuilder;
-
-import com.esotericsoftware.kryo.Kryo;
-import com.esotericsoftware.kryo.io.Input;
-import com.esotericsoftware.kryo.io.Output;
-
-import com.datatorrent.api.StreamCodec;
-import com.datatorrent.netlet.util.Slice;
-
-/**
- * <p>EventCodec class.</p>
- *
- * @since 0.9.4
- */
-public class EventCodec implements StreamCodec<Event>
-{
-  private final transient Kryo kryo;
-
-  public EventCodec()
-  {
-    this.kryo = new Kryo();
-    this.kryo.setClassLoader(Thread.currentThread().getContextClassLoader());
-  }
-
-  @Override
-  public Object fromByteArray(Slice fragment)
-  {
-    ByteArrayInputStream is = new ByteArrayInputStream(fragment.buffer, fragment.offset, fragment.length);
-    Input input = new Input(is);
-
-    @SuppressWarnings("unchecked")
-    HashMap<String, String> headers = kryo.readObjectOrNull(input, HashMap.class);
-    byte[] body = kryo.readObjectOrNull(input, byte[].class);
-    return EventBuilder.withBody(body, headers);
-  }
-
-  @Override
-  public Slice toByteArray(Event event)
-  {
-    ByteArrayOutputStream os = new ByteArrayOutputStream();
-    Output output = new Output(os);
-
-    Map<String, String> headers = event.getHeaders();
-    if (headers != null && headers.getClass() != HashMap.class) {
-      HashMap<String, String> tmp = new HashMap<String, String>(headers.size());
-      tmp.putAll(headers);
-      headers = tmp;
-    }
-    kryo.writeObjectOrNull(output, headers, HashMap.class);
-    kryo.writeObjectOrNull(output, event.getBody(), byte[].class);
-    output.flush();
-    final byte[] bytes = os.toByteArray();
-    return new Slice(bytes, 0, bytes.length);
-  }
-
-  @Override
-  public int getPartition(Event o)
-  {
-    return o.hashCode();
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(EventCodec.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java b/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java
deleted file mode 100644
index 4dcddcd..0000000
--- a/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java
+++ /dev/null
@@ -1,947 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.storage;
-
-import java.io.DataInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
-import javax.validation.constraints.NotNull;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.flume.Context;
-import org.apache.flume.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-import com.google.common.primitives.Ints;
-import com.google.common.primitives.Longs;
-
-import com.datatorrent.api.Component;
-import com.datatorrent.common.util.NameableThreadFactory;
-import com.datatorrent.flume.sink.Server;
-import com.datatorrent.netlet.util.Slice;
-
-/**
- * HDFSStorage is developed to store and retrieve the data from HDFS
- * <p />
- * The properties that can be set on HDFSStorage are: <br />
- * baseDir - The base directory where the data is going to be stored <br />
- * restore - This is used to restore the application from previous failure <br />
- * blockSize - The maximum size of the each file to created. <br />
- *
- * @since 0.9.3
- */
-public class HDFSStorage implements Storage, Configurable, Component<com.datatorrent.api.Context>
-{
-  public static final int DEFAULT_BLOCK_SIZE = 64 * 1024 * 1024;
-  public static final String BASE_DIR_KEY = "baseDir";
-  public static final String RESTORE_KEY = "restore";
-  public static final String BLOCKSIZE = "blockSize";
-  public static final String BLOCK_SIZE_MULTIPLE = "blockSizeMultiple";
-  public static final String NUMBER_RETRY = "retryCount";
-
-  private static final String OFFSET_SUFFIX = "-offsetFile";
-  private static final String BOOK_KEEPING_FILE_OFFSET = "-bookKeepingOffsetFile";
-  private static final String FLUSHED_IDENTITY_FILE = "flushedCounter";
-  private static final String CLEAN_OFFSET_FILE = "cleanoffsetFile";
-  private static final String FLUSHED_IDENTITY_FILE_TEMP = "flushedCounter.tmp";
-  private static final String CLEAN_OFFSET_FILE_TEMP = "cleanoffsetFile.tmp";
-  private static final int IDENTIFIER_SIZE = 8;
-  private static final int DATA_LENGTH_BYTE_SIZE = 4;
-
-  /**
-   * Number of times the storage will try to get the filesystem
-   */
-  private int retryCount = 3;
-  /**
-   * The multiple of block size
-   */
-  private int blockSizeMultiple = 1;
-  /**
-   * Identifier for this storage.
-   */
-  @NotNull
-  private String id;
-  /**
-   * The baseDir where the storage facility is going to create files.
-   */
-  @NotNull
-  private String baseDir;
-  /**
-   * The block size to be used to create the storage files
-   */
-  private long blockSize;
-  /**
-   *
-   */
-  private boolean restore;
-  /**
-   * This identifies the current file number
-   */
-  private long currentWrittenFile;
-  /**
-   * This identifies the file number that has been flushed
-   */
-  private long flushedFileCounter;
-  /**
-   * The file that stores the fileCounter information
-   */
-  // private Path fileCounterFile;
-  /**
-   * The file that stores the flushed fileCounter information
-   */
-  private Path flushedCounterFile;
-  private Path flushedCounterFileTemp;
-  /**
-   * This identifies the last cleaned file number
-   */
-  private long cleanedFileCounter;
-  /**
-   * The file that stores the clean file counter information
-   */
-  // private Path cleanFileCounterFile;
-  /**
-   * The file that stores the clean file offset information
-   */
-  private Path cleanFileOffsetFile;
-  private Path cleanFileOffsetFileTemp;
-  private FileSystem fs;
-  private FSDataOutputStream dataStream;
-  ArrayList<DataBlock> files2Commit = new ArrayList<DataBlock>();
-  /**
-   * The offset in the current opened file
-   */
-  private long fileWriteOffset;
-  private FSDataInputStream readStream;
-  private long retrievalOffset;
-  private long retrievalFile;
-  private int offset;
-  private long flushedLong;
-  private long flushedFileWriteOffset;
-  private long bookKeepingFileOffset;
-  private byte[] cleanedOffset = new byte[8];
-  private long skipOffset;
-  private long skipFile;
-  private transient Path basePath;
-  private ExecutorService storageExecutor;
-  private byte[] currentData;
-  private FSDataInputStream nextReadStream;
-  private long nextFlushedLong;
-  private long nextRetrievalFile;
-  private byte[] nextRetrievalData;
-
-  public HDFSStorage()
-  {
-    this.restore = true;
-  }
-
-  /**
-   * This stores the Identifier information identified in the last store function call
-   *
-   * @param ctx
-   */
-  @Override
-  public void configure(Context ctx)
-  {
-    String tempId = ctx.getString(ID);
-    if (tempId == null) {
-      if (id == null) {
-        throw new IllegalArgumentException("id can't be  null.");
-      }
-    } else {
-      id = tempId;
-    }
-
-    String tempBaseDir = ctx.getString(BASE_DIR_KEY);
-    if (tempBaseDir != null) {
-      baseDir = tempBaseDir;
-    }
-
-    restore = ctx.getBoolean(RESTORE_KEY, restore);
-    Long tempBlockSize = ctx.getLong(BLOCKSIZE);
-    if (tempBlockSize != null) {
-      blockSize = tempBlockSize;
-    }
-    blockSizeMultiple = ctx.getInteger(BLOCK_SIZE_MULTIPLE, blockSizeMultiple);
-    retryCount = ctx.getInteger(NUMBER_RETRY,retryCount);
-  }
-
-  /**
-   * This function reads the file at a location and return the bytes stored in the file "
-   *
-   * @param path - the location of the file
-   * @return
-   * @throws IOException
-   */
-  byte[] readData(Path path) throws IOException
-  {
-    DataInputStream is = new DataInputStream(fs.open(path));
-    byte[] bytes = new byte[is.available()];
-    is.readFully(bytes);
-    is.close();
-    return bytes;
-  }
-
-  /**
-   * This function writes the bytes to a file specified by the path
-   *
-   * @param path the file location
-   * @param data the data to be written to the file
-   * @return
-   * @throws IOException
-   */
-  private FSDataOutputStream writeData(Path path, byte[] data) throws IOException
-  {
-    FSDataOutputStream fsOutputStream;
-    if (fs.getScheme().equals("file")) {
-      // local FS does not support hflush and does not flush native stream
-      fsOutputStream = new FSDataOutputStream(
-          new FileOutputStream(Path.getPathWithoutSchemeAndAuthority(path).toString()), null);
-    } else {
-      fsOutputStream = fs.create(path);
-    }
-    fsOutputStream.write(data);
-    return fsOutputStream;
-  }
-
-  private long calculateOffset(long fileOffset, long fileCounter)
-  {
-    return ((fileCounter << 32) | (fileOffset & 0xffffffffL));
-  }
-
-  @Override
-  public byte[] store(Slice slice)
-  {
-    // logger.debug("store message ");
-    int bytesToWrite = slice.length + DATA_LENGTH_BYTE_SIZE;
-    if (currentWrittenFile < skipFile) {
-      fileWriteOffset += bytesToWrite;
-      if (fileWriteOffset >= bookKeepingFileOffset) {
-        files2Commit.add(new DataBlock(null, bookKeepingFileOffset,
-            new Path(basePath, currentWrittenFile + OFFSET_SUFFIX), currentWrittenFile));
-        currentWrittenFile++;
-        if (fileWriteOffset > bookKeepingFileOffset) {
-          fileWriteOffset = bytesToWrite;
-        } else {
-          fileWriteOffset = 0;
-        }
-        try {
-          bookKeepingFileOffset = getFlushedFileWriteOffset(
-              new Path(basePath, currentWrittenFile + BOOK_KEEPING_FILE_OFFSET));
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        }
-      }
-      return null;
-    }
-
-    if (flushedFileCounter == currentWrittenFile && dataStream == null) {
-      currentWrittenFile++;
-      fileWriteOffset = 0;
-    }
-
-    if (flushedFileCounter == skipFile && skipFile != -1) {
-      skipFile++;
-    }
-
-    if (fileWriteOffset + bytesToWrite < blockSize) {
-      try {
-        /* write length and the actual data to the file */
-        if (fileWriteOffset == 0) {
-          // writeData(flushedCounterFile, String.valueOf(currentWrittenFile).getBytes()).close();
-          dataStream = writeData(new Path(basePath, String.valueOf(currentWrittenFile)),
-              Ints.toByteArray(slice.length));
-          dataStream.write(slice.buffer, slice.offset, slice.length);
-        } else {
-          dataStream.write(Ints.toByteArray(slice.length));
-          dataStream.write(slice.buffer, slice.offset, slice.length);
-        }
-        fileWriteOffset += bytesToWrite;
-
-        byte[] fileOffset = null;
-        if ((currentWrittenFile > skipFile) || (currentWrittenFile == skipFile && fileWriteOffset > skipOffset)) {
-          skipFile = -1;
-          fileOffset = new byte[IDENTIFIER_SIZE];
-          Server.writeLong(fileOffset, 0, calculateOffset(fileWriteOffset, currentWrittenFile));
-        }
-        return fileOffset;
-      } catch (IOException ex) {
-        logger.warn("Error while storing the bytes {}", ex.getMessage());
-        closeFs();
-        throw new RuntimeException(ex);
-      }
-    }
-    DataBlock db = new DataBlock(dataStream, fileWriteOffset,
-        new Path(basePath, currentWrittenFile + OFFSET_SUFFIX), currentWrittenFile);
-    db.close();
-    files2Commit.add(db);
-    fileWriteOffset = 0;
-    ++currentWrittenFile;
-    return store(slice);
-  }
-
-  /**
-   * @param b
-   * @param startIndex
-   * @return
-   */
-  long byteArrayToLong(byte[] b, int startIndex)
-  {
-    final byte b1 = 0;
-    return Longs.fromBytes(b1, b1, b1, b1, b[3 + startIndex], b[2 + startIndex], b[1 + startIndex], b[startIndex]);
-  }
-
-  @Override
-  public byte[] retrieve(byte[] identifier)
-  {
-    skipFile = -1;
-    skipOffset = 0;
-    logger.debug("retrieve with address {}", Arrays.toString(identifier));
-    // flushing the last incomplete flushed file
-    closeUnflushedFiles();
-
-    retrievalOffset = byteArrayToLong(identifier, 0);
-    retrievalFile = byteArrayToLong(identifier, offset);
-
-    if (retrievalFile == 0 && retrievalOffset == 0 && currentWrittenFile == 0 && fileWriteOffset == 0) {
-      skipOffset = 0;
-      return null;
-    }
-
-    // making sure that the deleted address is not requested again
-    if (retrievalFile != 0 || retrievalOffset != 0) {
-      long cleanedFile = byteArrayToLong(cleanedOffset, offset);
-      if (retrievalFile < cleanedFile || (retrievalFile == cleanedFile &&
-          retrievalOffset < byteArrayToLong(cleanedOffset, 0))) {
-        logger.warn("The address asked has been deleted retrievalFile={}, cleanedFile={}, retrievalOffset={}, " +
-            "cleanedOffset={}", retrievalFile, cleanedFile, retrievalOffset, byteArrayToLong(cleanedOffset, 0));
-        closeFs();
-        throw new IllegalArgumentException(String.format("The data for address %s has already been deleted",
-            Arrays.toString(identifier)));
-      }
-    }
-
-    // we have just started
-    if (retrievalFile == 0 && retrievalOffset == 0) {
-      retrievalFile = byteArrayToLong(cleanedOffset, offset);
-      retrievalOffset = byteArrayToLong(cleanedOffset, 0);
-    }
-
-    if ((retrievalFile > flushedFileCounter)) {
-      skipFile = retrievalFile;
-      skipOffset = retrievalOffset;
-      retrievalFile = -1;
-      return null;
-    }
-    if ((retrievalFile == flushedFileCounter && retrievalOffset >= flushedFileWriteOffset)) {
-      skipFile = retrievalFile;
-      skipOffset = retrievalOffset - flushedFileWriteOffset;
-      retrievalFile = -1;
-      return null;
-    }
-
-    try {
-      if (readStream != null) {
-        readStream.close();
-        readStream = null;
-      }
-      Path path = new Path(basePath, String.valueOf(retrievalFile));
-      if (!fs.exists(path)) {
-        retrievalFile = -1;
-        closeFs();
-        throw new RuntimeException(String.format("File %s does not exist", path.toString()));
-      }
-
-      byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
-      flushedLong = Server.readLong(flushedOffset, 0);
-      while (retrievalOffset >= flushedLong && retrievalFile < flushedFileCounter) {
-        retrievalOffset -= flushedLong;
-        retrievalFile++;
-        flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
-        flushedLong = Server.readLong(flushedOffset, 0);
-      }
-
-      if (retrievalOffset >= flushedLong) {
-        logger.warn("data not flushed for the given identifier");
-        retrievalFile = -1;
-        return null;
-      }
-      synchronized (HDFSStorage.this) {
-        if (nextReadStream != null) {
-          nextReadStream.close();
-          nextReadStream = null;
-        }
-      }
-      currentData = null;
-      path = new Path(basePath, String.valueOf(retrievalFile));
-      //readStream = new FSDataInputStream(fs.open(path));
-      currentData = readData(path);
-      //readStream.seek(retrievalOffset);
-      storageExecutor.submit(getNextStream());
-      return retrieveHelper();
-    } catch (IOException e) {
-      closeFs();
-      throw new RuntimeException(e);
-    }
-  }
-
-  private byte[] retrieveHelper() throws IOException
-  {
-    int tempRetrievalOffset = (int)retrievalOffset;
-    int length = Ints.fromBytes(currentData[tempRetrievalOffset], currentData[tempRetrievalOffset + 1],
-        currentData[tempRetrievalOffset + 2], currentData[tempRetrievalOffset + 3]);
-    byte[] data = new byte[length + IDENTIFIER_SIZE];
-    System.arraycopy(currentData, tempRetrievalOffset + 4, data, IDENTIFIER_SIZE, length);
-    retrievalOffset += length + DATA_LENGTH_BYTE_SIZE;
-    if (retrievalOffset >= flushedLong) {
-      Server.writeLong(data, 0, calculateOffset(0, retrievalFile + 1));
-    } else {
-      Server.writeLong(data, 0, calculateOffset(retrievalOffset, retrievalFile));
-    }
-    return data;
-  }
-
-  @Override
-  public byte[] retrieveNext()
-  {
-    if (retrievalFile == -1) {
-      closeFs();
-      throw new RuntimeException("Call retrieve first");
-    }
-
-    if (retrievalFile > flushedFileCounter) {
-      logger.warn("data is not flushed");
-      return null;
-    }
-
-    try {
-      if (currentData == null) {
-        synchronized (HDFSStorage.this) {
-          if (nextRetrievalData != null && (retrievalFile == nextRetrievalFile)) {
-            currentData = nextRetrievalData;
-            flushedLong = nextFlushedLong;
-            nextRetrievalData = null;
-          } else {
-            currentData = null;
-            currentData = readData(new Path(basePath, String.valueOf(retrievalFile)));
-            byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
-            flushedLong = Server.readLong(flushedOffset, 0);
-          }
-        }
-        storageExecutor.submit(getNextStream());
-      }
-
-      if (retrievalOffset >= flushedLong) {
-        retrievalFile++;
-        retrievalOffset = 0;
-
-        if (retrievalFile > flushedFileCounter) {
-          logger.warn("data is not flushed");
-          return null;
-        }
-
-        //readStream.close();
-        // readStream = new FSDataInputStream(fs.open(new Path(basePath, String.valueOf(retrievalFile))));
-        // byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
-        // flushedLong = Server.readLong(flushedOffset, 0);
-
-        synchronized (HDFSStorage.this) {
-          if (nextRetrievalData != null && (retrievalFile == nextRetrievalFile)) {
-            currentData = nextRetrievalData;
-            flushedLong = nextFlushedLong;
-            nextRetrievalData = null;
-          } else {
-            currentData = null;
-            currentData = readData(new Path(basePath, String.valueOf(retrievalFile)));
-            byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
-            flushedLong = Server.readLong(flushedOffset, 0);
-          }
-        }
-        storageExecutor.submit(getNextStream());
-      }
-      //readStream.seek(retrievalOffset);
-      return retrieveHelper();
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  @Override
-  @SuppressWarnings("AssignmentToCollectionOrArrayFieldFromParameter")
-  public void clean(byte[] identifier)
-  {
-    logger.info("clean {}", Arrays.toString(identifier));
-    long cleanFileIndex = byteArrayToLong(identifier, offset);
-
-    long cleanFileOffset = byteArrayToLong(identifier, 0);
-    if (flushedFileCounter == -1) {
-      identifier = new byte[8];
-    } else if (cleanFileIndex > flushedFileCounter ||
-        (cleanFileIndex == flushedFileCounter && cleanFileOffset >= flushedFileWriteOffset)) {
-      // This is to make sure that we clean only the data that is flushed
-      cleanFileIndex = flushedFileCounter;
-      cleanFileOffset = flushedFileWriteOffset;
-      Server.writeLong(identifier, 0, calculateOffset(cleanFileOffset, cleanFileIndex));
-    }
-    cleanedOffset = identifier;
-
-    try {
-      writeData(cleanFileOffsetFileTemp, identifier).close();
-      fs.rename(cleanFileOffsetFileTemp, cleanFileOffsetFile);
-      if (cleanedFileCounter >= cleanFileIndex) {
-        return;
-      }
-      do {
-        Path path = new Path(basePath, String.valueOf(cleanedFileCounter));
-        if (fs.exists(path) && fs.isFile(path)) {
-          fs.delete(path, false);
-        }
-        path = new Path(basePath, cleanedFileCounter + OFFSET_SUFFIX);
-        if (fs.exists(path) && fs.isFile(path)) {
-          fs.delete(path, false);
-        }
-        path = new Path(basePath, cleanedFileCounter + BOOK_KEEPING_FILE_OFFSET);
-        if (fs.exists(path) && fs.isFile(path)) {
-          fs.delete(path, false);
-        }
-        logger.info("deleted file {}", cleanedFileCounter);
-        ++cleanedFileCounter;
-      } while (cleanedFileCounter < cleanFileIndex);
-      // writeData(cleanFileCounterFile, String.valueOf(cleanedFileCounter).getBytes()).close();
-
-    } catch (IOException e) {
-      logger.warn("not able to close the streams {}", e.getMessage());
-      closeFs();
-      throw new RuntimeException(e);
-    }
-  }
-
-  /**
-   * This is used mainly for cleaning up of counter files created
-   */
-  void cleanHelperFiles()
-  {
-    try {
-      fs.delete(basePath, true);
-    } catch (IOException e) {
-      logger.warn(e.getMessage());
-    }
-  }
-
-  private void closeUnflushedFiles()
-  {
-    try {
-      files2Commit.clear();
-      // closing the stream
-      if (dataStream != null) {
-        dataStream.close();
-        dataStream = null;
-        // currentWrittenFile++;
-        // fileWriteOffset = 0;
-      }
-
-      if (!fs.exists(new Path(basePath, currentWrittenFile + OFFSET_SUFFIX))) {
-        fs.delete(new Path(basePath, String.valueOf(currentWrittenFile)), false);
-      }
-
-      if (fs.exists(new Path(basePath, flushedFileCounter + OFFSET_SUFFIX))) {
-        // This means that flush was called
-        flushedFileWriteOffset = getFlushedFileWriteOffset(new Path(basePath, flushedFileCounter + OFFSET_SUFFIX));
-        bookKeepingFileOffset = getFlushedFileWriteOffset(
-            new Path(basePath, flushedFileCounter + BOOK_KEEPING_FILE_OFFSET));
-      }
-
-      if (flushedFileCounter != -1) {
-        currentWrittenFile = flushedFileCounter;
-        fileWriteOffset = flushedFileWriteOffset;
-      } else {
-        currentWrittenFile = 0;
-        fileWriteOffset = 0;
-      }
-
-      flushedLong = 0;
-
-    } catch (IOException e) {
-      closeFs();
-      throw new RuntimeException(e);
-    }
-  }
-
-  @Override
-  public void flush()
-  {
-    nextReadStream = null;
-    StringBuilder builder = new StringBuilder();
-    Iterator<DataBlock> itr = files2Commit.iterator();
-    DataBlock db;
-    try {
-      while (itr.hasNext()) {
-        db = itr.next();
-        db.updateOffsets();
-        builder.append(db.fileName).append(", ");
-      }
-      files2Commit.clear();
-
-      if (dataStream != null) {
-        dataStream.hflush();
-        writeData(flushedCounterFileTemp, String.valueOf(currentWrittenFile).getBytes()).close();
-        fs.rename(flushedCounterFileTemp, flushedCounterFile);
-        updateFlushedOffset(new Path(basePath, currentWrittenFile + OFFSET_SUFFIX), fileWriteOffset);
-        flushedFileWriteOffset = fileWriteOffset;
-        builder.append(currentWrittenFile);
-      }
-      logger.debug("flushed files {}", builder.toString());
-    } catch (IOException ex) {
-      logger.warn("not able to close the stream {}", ex.getMessage());
-      closeFs();
-      throw new RuntimeException(ex);
-    }
-    flushedFileCounter = currentWrittenFile;
-    // logger.debug("flushedFileCounter in flush {}",flushedFileCounter);
-  }
-
-  /**
-   * This updates the flushed offset
-   */
-  private void updateFlushedOffset(Path file, long bytesWritten)
-  {
-    byte[] lastStoredOffset = new byte[IDENTIFIER_SIZE];
-    Server.writeLong(lastStoredOffset, 0, bytesWritten);
-    try {
-      writeData(file, lastStoredOffset).close();
-    } catch (IOException e) {
-      try {
-        if (!Arrays.equals(readData(file), lastStoredOffset)) {
-          closeFs();
-          throw new RuntimeException(e);
-        }
-      } catch (Exception e1) {
-        closeFs();
-        throw new RuntimeException(e1);
-      }
-    }
-  }
-
-  public int getBlockSizeMultiple()
-  {
-    return blockSizeMultiple;
-  }
-
-  public void setBlockSizeMultiple(int blockSizeMultiple)
-  {
-    this.blockSizeMultiple = blockSizeMultiple;
-  }
-
-  /**
-   * @return the baseDir
-   */
-  public String getBaseDir()
-  {
-    return baseDir;
-  }
-
-  /**
-   * @param baseDir the baseDir to set
-   */
-  public void setBaseDir(String baseDir)
-  {
-    this.baseDir = baseDir;
-  }
-
-  /**
-   * @return the id
-   */
-  public String getId()
-  {
-    return id;
-  }
-
-  /**
-   * @param id the id to set
-   */
-  public void setId(String id)
-  {
-    this.id = id;
-  }
-
-  /**
-   * @return the blockSize
-   */
-  public long getBlockSize()
-  {
-    return blockSize;
-  }
-
-  /**
-   * @param blockSize the blockSize to set
-   */
-  public void setBlockSize(long blockSize)
-  {
-    this.blockSize = blockSize;
-  }
-
-  /**
-   * @return the restore
-   */
-  public boolean isRestore()
-  {
-    return restore;
-  }
-
-  /**
-   * @param restore the restore to set
-   */
-  public void setRestore(boolean restore)
-  {
-    this.restore = restore;
-  }
-
-  class DataBlock
-  {
-    FSDataOutputStream dataStream;
-    long dataOffset;
-    Path path2FlushedData;
-    long fileName;
-    private Path bookKeepingPath;
-
-    DataBlock(FSDataOutputStream stream, long bytesWritten, Path path2FlushedData, long fileName)
-    {
-      this.dataStream = stream;
-      this.dataOffset = bytesWritten;
-      this.path2FlushedData = path2FlushedData;
-      this.fileName = fileName;
-    }
-
-    public void close()
-    {
-      if (dataStream != null) {
-        try {
-          dataStream.close();
-          bookKeepingPath = new Path(basePath, fileName + BOOK_KEEPING_FILE_OFFSET);
-          updateFlushedOffset(bookKeepingPath, dataOffset);
-        } catch (IOException ex) {
-          logger.warn("not able to close the stream {}", ex.getMessage());
-          closeFs();
-          throw new RuntimeException(ex);
-        }
-      }
-    }
-
-    public void updateOffsets() throws IOException
-    {
-      updateFlushedOffset(path2FlushedData, dataOffset);
-      if (bookKeepingPath != null && fs.exists(bookKeepingPath)) {
-        fs.delete(bookKeepingPath, false);
-      }
-    }
-
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(HDFSStorage.class);
-
-  @Override
-  public void setup(com.datatorrent.api.Context context)
-  {
-    Configuration conf = new Configuration();
-    if (baseDir == null) {
-      baseDir = conf.get("hadoop.tmp.dir");
-      if (baseDir == null || baseDir.isEmpty()) {
-        throw new IllegalArgumentException("baseDir cannot be null.");
-      }
-    }
-    offset = 4;
-    skipOffset = -1;
-    skipFile = -1;
-    int tempRetryCount = 0;
-    while (tempRetryCount < retryCount && fs == null) {
-      try {
-        fs = FileSystem.newInstance(conf);
-        tempRetryCount++;
-      } catch (Throwable throwable) {
-        logger.warn("Not able to get file system ", throwable);
-      }
-    }
-
-    try {
-      Path path = new Path(baseDir);
-      basePath = new Path(path, id);
-      if (fs == null) {
-        fs = FileSystem.newInstance(conf);
-      }
-      if (!fs.exists(path)) {
-        closeFs();
-        throw new RuntimeException(String.format("baseDir passed (%s) doesn't exist.", baseDir));
-      }
-      if (!fs.isDirectory(path)) {
-        closeFs();
-        throw new RuntimeException(String.format("baseDir passed (%s) is not a directory.", baseDir));
-      }
-      if (!restore) {
-        fs.delete(basePath, true);
-      }
-      if (!fs.exists(basePath) || !fs.isDirectory(basePath)) {
-        fs.mkdirs(basePath);
-      }
-
-      if (blockSize == 0) {
-        blockSize = fs.getDefaultBlockSize(new Path(basePath, "tempData"));
-      }
-      if (blockSize == 0) {
-        blockSize = DEFAULT_BLOCK_SIZE;
-      }
-
-      blockSize = blockSizeMultiple * blockSize;
-
-      currentWrittenFile = 0;
-      cleanedFileCounter = -1;
-      retrievalFile = -1;
-      // fileCounterFile = new Path(basePath, IDENTITY_FILE);
-      flushedFileCounter = -1;
-      // cleanFileCounterFile = new Path(basePath, CLEAN_FILE);
-      cleanFileOffsetFile = new Path(basePath, CLEAN_OFFSET_FILE);
-      cleanFileOffsetFileTemp = new Path(basePath, CLEAN_OFFSET_FILE_TEMP);
-      flushedCounterFile = new Path(basePath, FLUSHED_IDENTITY_FILE);
-      flushedCounterFileTemp = new Path(basePath, FLUSHED_IDENTITY_FILE_TEMP);
-
-      if (restore) {
-        //
-        // if (fs.exists(fileCounterFile) && fs.isFile(fileCounterFile)) {
-        // //currentWrittenFile = Long.valueOf(new String(readData(fileCounterFile)));
-        // }
-
-        if (fs.exists(cleanFileOffsetFile) && fs.isFile(cleanFileOffsetFile)) {
-          cleanedOffset = readData(cleanFileOffsetFile);
-        }
-
-        if (fs.exists(flushedCounterFile) && fs.isFile(flushedCounterFile)) {
-          String strFlushedFileCounter = new String(readData(flushedCounterFile));
-          if (strFlushedFileCounter.isEmpty()) {
-            logger.warn("empty flushed file");
-          } else {
-            flushedFileCounter = Long.valueOf(strFlushedFileCounter);
-            flushedFileWriteOffset = getFlushedFileWriteOffset(new Path(basePath, flushedFileCounter + OFFSET_SUFFIX));
-            bookKeepingFileOffset = getFlushedFileWriteOffset(
-                new Path(basePath, flushedFileCounter + BOOK_KEEPING_FILE_OFFSET));
-          }
-
-        }
-      }
-      fileWriteOffset = flushedFileWriteOffset;
-      currentWrittenFile = flushedFileCounter;
-      cleanedFileCounter = byteArrayToLong(cleanedOffset, offset) - 1;
-      if (currentWrittenFile == -1) {
-        ++currentWrittenFile;
-        fileWriteOffset = 0;
-      }
-
-    } catch (IOException io) {
-
-      throw new RuntimeException(io);
-    }
-    storageExecutor = Executors.newSingleThreadExecutor(new NameableThreadFactory("StorageHelper"));
-  }
-
-  private void closeFs()
-  {
-    if (fs != null) {
-      try {
-        fs.close();
-        fs = null;
-      } catch (IOException e) {
-        logger.debug(e.getMessage());
-      }
-    }
-  }
-
-  private long getFlushedFileWriteOffset(Path filePath) throws IOException
-  {
-    if (flushedFileCounter != -1 && fs.exists(filePath)) {
-      byte[] flushedFileOffsetByte = readData(filePath);
-      if (flushedFileOffsetByte != null && flushedFileOffsetByte.length == 8) {
-        return Server.readLong(flushedFileOffsetByte, 0);
-      }
-    }
-    return 0;
-  }
-
-  @Override
-  public void teardown()
-  {
-    logger.debug("called teardown");
-    try {
-      if (readStream != null) {
-        readStream.close();
-      }
-      synchronized (HDFSStorage.this) {
-        if (nextReadStream != null) {
-          nextReadStream.close();
-        }
-      }
-
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    } finally {
-      closeUnflushedFiles();
-      storageExecutor.shutdown();
-    }
-
-  }
-
-  private Runnable getNextStream()
-  {
-    return new Runnable()
-    {
-      @Override
-      public void run()
-      {
-        try {
-          synchronized (HDFSStorage.this) {
-            nextRetrievalFile = retrievalFile + 1;
-            if (nextRetrievalFile > flushedFileCounter) {
-              nextRetrievalData = null;
-              return;
-            }
-            Path path = new Path(basePath, String.valueOf(nextRetrievalFile));
-            Path offsetPath = new Path(basePath, nextRetrievalFile + OFFSET_SUFFIX);
-            nextRetrievalData = null;
-            nextRetrievalData = readData(path);
-            byte[] flushedOffset = readData(offsetPath);
-            nextFlushedLong = Server.readLong(flushedOffset, 0);
-          }
-        } catch (Throwable e) {
-          logger.warn("in storage executor ", e);
-
-        }
-      }
-    };
-  }
-
-}
-

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/com/datatorrent/flume/storage/Storage.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/Storage.java b/flume/src/main/java/com/datatorrent/flume/storage/Storage.java
deleted file mode 100644
index 5130f3c..0000000
--- a/flume/src/main/java/com/datatorrent/flume/storage/Storage.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.storage;
-
-import com.datatorrent.netlet.util.Slice;
-
-/**
- * <p>Storage interface.</p>
- *
- * @since 0.9.2
- */
-public interface Storage
-{
-  /**
-   * key in the context for Unique identifier for the storage which may be used to recover from failure.
-   */
-  String ID = "id";
-
-  /**
-   * This stores the bytes and returns the unique identifier to retrieve these bytes
-   *
-   * @param bytes
-   * @return
-   */
-  byte[] store(Slice bytes);
-
-  /**
-   * This returns the data bytes for the current identifier and the identifier for next data bytes. <br/>
-   * The first eight bytes contain the identifier and the remaining bytes contain the data
-   *
-   * @param identifier
-   * @return
-   */
-  byte[] retrieve(byte[] identifier);
-
-  /**
-   * This returns data bytes and the identifier for the next data bytes. The identifier for current data bytes is based
-   * on the retrieve method call and number of retrieveNext method calls after retrieve method call. <br/>
-   * The first eight bytes contain the identifier and the remaining bytes contain the data
-   *
-   * @return
-   */
-  byte[] retrieveNext();
-
-  /**
-   * This is used to clean up the files identified by identifier
-   *
-   * @param identifier
-   */
-  void clean(byte[] identifier);
-
-  /**
-   * This flushes the data from stream
-   *
-   */
-  void flush();
-
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/discovery/Discovery.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/discovery/Discovery.java b/flume/src/main/java/org/apache/apex/malhar/flume/discovery/Discovery.java
new file mode 100644
index 0000000..619a625
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/discovery/Discovery.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.discovery;
+
+import java.util.Collection;
+
+/**
+ * When DTFlumeSink server instance binds to the network interface, it can publish
+ * its whereabouts by invoking advertise method on the Discovery object. Similarly
+ * when it ceases accepting any more connections, it can publish its intent to do
+ * so by invoking unadvertise.<p />
+ * Interesting parties can call discover method to get the list of addresses where
+ * they can find an available DTFlumeSink server instance.
+ *
+ * @param <T> - Type of the objects which can be discovered
+ * @since 0.9.3
+ */
+public interface Discovery<T>
+{
+  /**
+   * Recall the previously published address as it's no longer valid.
+   *
+   * @param service
+   */
+  void unadvertise(Service<T> service);
+
+  /**
+   * Advertise the host/port address where DTFlumeSink is accepting a client connection.
+   *
+   * @param service
+   */
+  void advertise(Service<T> service);
+
+  /**
+   * Discover all the addresses which are actively accepting the client connections.
+   *
+   * @return - Active server addresses which can accept the connections.
+   */
+  Collection<Service<T>> discover();
+
+  interface Service<T>
+  {
+    String getHost();
+
+    int getPort();
+
+    T getPayload();
+
+    String getId();
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscovery.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscovery.java b/flume/src/main/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscovery.java
new file mode 100644
index 0000000..9a7dd3c
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscovery.java
@@ -0,0 +1,430 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.discovery;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+
+import javax.validation.constraints.NotNull;
+
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.ObjectReader;
+import org.codehaus.jackson.map.ObjectWriter;
+import org.codehaus.jackson.type.TypeReference;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.curator.framework.CuratorFramework;
+import org.apache.curator.framework.CuratorFrameworkFactory;
+import org.apache.curator.retry.RetryNTimes;
+import org.apache.curator.utils.EnsurePath;
+import org.apache.curator.x.discovery.ServiceDiscovery;
+import org.apache.curator.x.discovery.ServiceDiscoveryBuilder;
+import org.apache.curator.x.discovery.ServiceInstance;
+import org.apache.curator.x.discovery.details.InstanceSerializer;
+import org.apache.flume.conf.Configurable;
+
+import com.google.common.base.Throwables;
+
+import com.datatorrent.api.Component;
+
+/**
+ * <p>ZKAssistedDiscovery class.</p>
+ *
+ * @since 0.9.3
+ */
+public class ZKAssistedDiscovery implements Discovery<byte[]>,
+    Component<com.datatorrent.api.Context>, Configurable, Serializable
+{
+  @NotNull
+  private String serviceName;
+  @NotNull
+  private String connectionString;
+  @NotNull
+  private String basePath;
+  private int connectionTimeoutMillis;
+  private int connectionRetryCount;
+  private int conntectionRetrySleepMillis;
+  private transient InstanceSerializerFactory instanceSerializerFactory;
+  private transient CuratorFramework curatorFramework;
+  private transient ServiceDiscovery<byte[]> discovery;
+
+  public ZKAssistedDiscovery()
+  {
+    this.serviceName = "DTFlume";
+    this.conntectionRetrySleepMillis = 500;
+    this.connectionRetryCount = 10;
+    this.connectionTimeoutMillis = 1000;
+  }
+
+  @Override
+  public void unadvertise(Service<byte[]> service)
+  {
+    doAdvertise(service, false);
+  }
+
+  @Override
+  public void advertise(Service<byte[]> service)
+  {
+    doAdvertise(service, true);
+  }
+
+  public void doAdvertise(Service<byte[]> service, boolean flag)
+  {
+    try {
+      new EnsurePath(basePath).ensure(curatorFramework.getZookeeperClient());
+
+      ServiceInstance<byte[]> instance = getInstance(service);
+      if (flag) {
+        discovery.registerService(instance);
+      } else {
+        discovery.unregisterService(instance);
+      }
+    } catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  @Override
+  public Collection<Service<byte[]>> discover()
+  {
+    try {
+      new EnsurePath(basePath).ensure(curatorFramework.getZookeeperClient());
+
+      Collection<ServiceInstance<byte[]>> services = discovery.queryForInstances(serviceName);
+      ArrayList<Service<byte[]>> returnable = new ArrayList<Service<byte[]>>(services.size());
+      for (final ServiceInstance<byte[]> service : services) {
+        returnable.add(new Service<byte[]>()
+        {
+          @Override
+          public String getHost()
+          {
+            return service.getAddress();
+          }
+
+          @Override
+          public int getPort()
+          {
+            return service.getPort();
+          }
+
+          @Override
+          public byte[] getPayload()
+          {
+            return service.getPayload();
+          }
+
+          @Override
+          public String getId()
+          {
+            return service.getId();
+          }
+
+          @Override
+          public String toString()
+          {
+            return "{" + getId() + " => " + getHost() + ':' + getPort() + '}';
+          }
+
+        });
+      }
+      return returnable;
+    } catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  @Override
+  public String toString()
+  {
+    return "ZKAssistedDiscovery{" + "serviceName=" + serviceName + ", connectionString=" + connectionString +
+        ", basePath=" + basePath + ", connectionTimeoutMillis=" + connectionTimeoutMillis + ", connectionRetryCount=" +
+        connectionRetryCount + ", conntectionRetrySleepMillis=" + conntectionRetrySleepMillis + '}';
+  }
+
+  @Override
+  public int hashCode()
+  {
+    int hash = 7;
+    hash = 47 * hash + this.serviceName.hashCode();
+    hash = 47 * hash + this.connectionString.hashCode();
+    hash = 47 * hash + this.basePath.hashCode();
+    hash = 47 * hash + this.connectionTimeoutMillis;
+    hash = 47 * hash + this.connectionRetryCount;
+    hash = 47 * hash + this.conntectionRetrySleepMillis;
+    return hash;
+  }
+
+  @Override
+  public boolean equals(Object obj)
+  {
+    if (obj == null) {
+      return false;
+    }
+    if (getClass() != obj.getClass()) {
+      return false;
+    }
+    final ZKAssistedDiscovery other = (ZKAssistedDiscovery)obj;
+    if (!this.serviceName.equals(other.serviceName)) {
+      return false;
+    }
+    if (!this.connectionString.equals(other.connectionString)) {
+      return false;
+    }
+    if (!this.basePath.equals(other.basePath)) {
+      return false;
+    }
+    if (this.connectionTimeoutMillis != other.connectionTimeoutMillis) {
+      return false;
+    }
+    if (this.connectionRetryCount != other.connectionRetryCount) {
+      return false;
+    }
+    if (this.conntectionRetrySleepMillis != other.conntectionRetrySleepMillis) {
+      return false;
+    }
+    return true;
+  }
+
+  ServiceInstance<byte[]> getInstance(Service<byte[]> service) throws Exception
+  {
+    return ServiceInstance.<byte[]>builder()
+            .name(serviceName)
+            .address(service.getHost())
+            .port(service.getPort())
+            .id(service.getId())
+            .payload(service.getPayload())
+            .build();
+  }
+
+  private ServiceDiscovery<byte[]> getDiscovery(CuratorFramework curatorFramework)
+  {
+    return ServiceDiscoveryBuilder.builder(byte[].class)
+            .basePath(basePath)
+            .client(curatorFramework)
+            .serializer(instanceSerializerFactory.getInstanceSerializer(
+            new TypeReference<ServiceInstance<byte[]>>()
+              {})).build();
+  }
+
+  /**
+   * @return the instanceSerializerFactory
+   */
+  InstanceSerializerFactory getInstanceSerializerFactory()
+  {
+    return instanceSerializerFactory;
+  }
+
+  /**
+   * @return the connectionString
+   */
+  public String getConnectionString()
+  {
+    return connectionString;
+  }
+
+  /**
+   * @param connectionString the connectionString to set
+   */
+  public void setConnectionString(String connectionString)
+  {
+    this.connectionString = connectionString;
+  }
+
+  /**
+   * @return the basePath
+   */
+  public String getBasePath()
+  {
+    return basePath;
+  }
+
+  /**
+   * @param basePath the basePath to set
+   */
+  public void setBasePath(String basePath)
+  {
+    this.basePath = basePath;
+  }
+
+  /**
+   * @return the connectionTimeoutMillis
+   */
+  public int getConnectionTimeoutMillis()
+  {
+    return connectionTimeoutMillis;
+  }
+
+  /**
+   * @param connectionTimeoutMillis the connectionTimeoutMillis to set
+   */
+  public void setConnectionTimeoutMillis(int connectionTimeoutMillis)
+  {
+    this.connectionTimeoutMillis = connectionTimeoutMillis;
+  }
+
+  /**
+   * @return the connectionRetryCount
+   */
+  public int getConnectionRetryCount()
+  {
+    return connectionRetryCount;
+  }
+
+  /**
+   * @param connectionRetryCount the connectionRetryCount to set
+   */
+  public void setConnectionRetryCount(int connectionRetryCount)
+  {
+    this.connectionRetryCount = connectionRetryCount;
+  }
+
+  /**
+   * @return the conntectionRetrySleepMillis
+   */
+  public int getConntectionRetrySleepMillis()
+  {
+    return conntectionRetrySleepMillis;
+  }
+
+  /**
+   * @param conntectionRetrySleepMillis the conntectionRetrySleepMillis to set
+   */
+  public void setConntectionRetrySleepMillis(int conntectionRetrySleepMillis)
+  {
+    this.conntectionRetrySleepMillis = conntectionRetrySleepMillis;
+  }
+
+  /**
+   * @return the serviceName
+   */
+  public String getServiceName()
+  {
+    return serviceName;
+  }
+
+  /**
+   * @param serviceName the serviceName to set
+   */
+  public void setServiceName(String serviceName)
+  {
+    this.serviceName = serviceName;
+  }
+
+  @Override
+  public void configure(org.apache.flume.Context context)
+  {
+    serviceName = context.getString("serviceName", "DTFlume");
+    connectionString = context.getString("connectionString");
+    basePath = context.getString("basePath");
+
+    connectionTimeoutMillis = context.getInteger("connectionTimeoutMillis", 1000);
+    connectionRetryCount = context.getInteger("connectionRetryCount", 10);
+    conntectionRetrySleepMillis = context.getInteger("connectionRetrySleepMillis", 500);
+  }
+
+  @Override
+  public void setup(com.datatorrent.api.Context context)
+  {
+    ObjectMapper om = new ObjectMapper();
+    instanceSerializerFactory = new InstanceSerializerFactory(om.reader(), om.writer());
+
+    curatorFramework = CuratorFrameworkFactory.builder()
+            .connectionTimeoutMs(connectionTimeoutMillis)
+            .retryPolicy(new RetryNTimes(connectionRetryCount, conntectionRetrySleepMillis))
+            .connectString(connectionString)
+            .build();
+    curatorFramework.start();
+
+    discovery = getDiscovery(curatorFramework);
+    try {
+      discovery.start();
+    } catch (Exception ex) {
+      Throwables.propagate(ex);
+    }
+  }
+
+  @Override
+  public void teardown()
+  {
+    try {
+      discovery.close();
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    } finally {
+      curatorFramework.close();
+      curatorFramework = null;
+    }
+  }
+
+  public class InstanceSerializerFactory
+  {
+    private final ObjectReader objectReader;
+    private final ObjectWriter objectWriter;
+
+    InstanceSerializerFactory(ObjectReader objectReader, ObjectWriter objectWriter)
+    {
+      this.objectReader = objectReader;
+      this.objectWriter = objectWriter;
+    }
+
+    public <T> InstanceSerializer<T> getInstanceSerializer(
+        TypeReference<ServiceInstance<T>> typeReference)
+    {
+      return new JacksonInstanceSerializer<T>(objectReader, objectWriter, typeReference);
+    }
+
+    final class JacksonInstanceSerializer<T> implements InstanceSerializer<T>
+    {
+      private final TypeReference<ServiceInstance<T>> typeRef;
+      private final ObjectWriter objectWriter;
+      private final ObjectReader objectReader;
+
+      JacksonInstanceSerializer(ObjectReader objectReader, ObjectWriter objectWriter,
+          TypeReference<ServiceInstance<T>> typeRef)
+      {
+        this.objectReader = objectReader;
+        this.objectWriter = objectWriter;
+        this.typeRef = typeRef;
+      }
+
+      @Override
+      public ServiceInstance<T> deserialize(byte[] bytes) throws Exception
+      {
+        return objectReader.withType(typeRef).readValue(bytes);
+      }
+
+      @Override
+      public byte[] serialize(ServiceInstance<T> serviceInstance) throws Exception
+      {
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
+        objectWriter.writeValue(out, serviceInstance);
+        return out.toByteArray();
+      }
+
+    }
+
+  }
+
+  private static final long serialVersionUID = 201401221145L;
+  private static final Logger logger = LoggerFactory.getLogger(ZKAssistedDiscovery.class);
+}


[05/13] apex-malhar git commit: Changed package path for files to be included under malhar. Modifications to build files for project to build under malhar.

Posted by th...@apache.org.
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptor.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptor.java b/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptor.java
new file mode 100644
index 0000000..bd7e5e0
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptor.java
@@ -0,0 +1,227 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.interceptor;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.interceptor.Interceptor;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import com.google.common.primitives.Ints;
+
+import static org.apache.apex.malhar.flume.interceptor.ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER;
+
+/**
+ * <p>ColumnFilteringFormattingInterceptor class.</p>
+ *
+ * @since 0.9.4
+ */
+public class ColumnFilteringFormattingInterceptor implements Interceptor
+{
+  private final byte srcSeparator;
+  private final byte[][] dstSeparators;
+  private final byte[] prefix;
+  private final int maxIndex;
+  private final int maxColumn;
+  private final int[] columns;
+  private final int[] positions;
+
+  private ColumnFilteringFormattingInterceptor(int[] columns, byte srcSeparator, byte[][] dstSeparators, byte[] prefix)
+  {
+    this.columns = columns;
+
+    int tempMaxColumn = Integer.MIN_VALUE;
+    for (int column : columns) {
+      if (column > tempMaxColumn) {
+        tempMaxColumn = column;
+      }
+    }
+    maxIndex = tempMaxColumn;
+    maxColumn = tempMaxColumn + 1;
+    positions = new int[maxColumn + 1];
+    this.srcSeparator = srcSeparator;
+    this.dstSeparators = dstSeparators;
+    this.prefix = prefix;
+  }
+
+  @Override
+  public void initialize()
+  {
+    /* no-op */
+  }
+
+  @Override
+  public Event intercept(Event event)
+  {
+    byte[] body = event.getBody();
+    if (body == null) {
+      return event;
+    }
+
+    final int length = body.length;
+
+    /* store positions of character after the separators */
+    int i = 0;
+    int index = 0;
+    while (i < length) {
+      if (body[i++] == srcSeparator) {
+        positions[++index] = i;
+        if (index >= maxIndex) {
+          break;
+        }
+      }
+    }
+
+    int nextVirginIndex;
+    boolean separatorAtEnd = true;
+    if (i == length && index < maxColumn) {
+      nextVirginIndex = index + 2;
+      positions[nextVirginIndex - 1] = length;
+      separatorAtEnd = length > 0 ? body[length - 1] == srcSeparator : false;
+    } else {
+      nextVirginIndex = index + 1;
+    }
+
+    int newArrayLen = prefix.length;
+    for (i = columns.length; i-- > 0; ) {
+      int column = columns[i];
+      int len = positions[column + 1] - positions[column];
+      if (len > 0) {
+        if (positions[column + 1] == length && !separatorAtEnd) {
+          newArrayLen += len;
+        } else {
+          newArrayLen += len - 1;
+        }
+      }
+      newArrayLen += dstSeparators[i].length;
+    }
+
+    byte[] newBody = new byte[newArrayLen];
+    int newOffset = 0;
+    if (prefix.length > 0) {
+      System.arraycopy(prefix, 0, newBody, 0, prefix.length);
+      newOffset += prefix.length;
+    }
+    int dstSeparatorsIdx = 0;
+    for (int column : columns) {
+      int len = positions[column + 1] - positions[column];
+      byte[] separator = dstSeparators[dstSeparatorsIdx++];
+      if (len > 0) {
+        System.arraycopy(body, positions[column], newBody, newOffset, len);
+        newOffset += len;
+        if (newBody[newOffset - 1] == srcSeparator) {
+          newOffset--;
+        }
+      }
+      System.arraycopy(separator, 0, newBody, newOffset, separator.length);
+      newOffset += separator.length;
+    }
+    event.setBody(newBody);
+    Arrays.fill(positions, 1, nextVirginIndex, 0);
+    return event;
+  }
+
+  @Override
+  public List<Event> intercept(List<Event> events)
+  {
+    for (Event event : events) {
+      intercept(event);
+    }
+    return events;
+  }
+
+  @Override
+  public void close()
+  {
+  }
+
+  public static class Builder implements Interceptor.Builder
+  {
+    private int[] columns;
+    private byte srcSeparator;
+    private byte[][] dstSeparators;
+    private byte[] prefix;
+
+    @Override
+    public Interceptor build()
+    {
+      return new ColumnFilteringFormattingInterceptor(columns, srcSeparator, dstSeparators, prefix);
+    }
+
+    @Override
+    public void configure(Context context)
+    {
+      String formatter = context.getString(COLUMNS_FORMATTER);
+      if (Strings.isNullOrEmpty(formatter)) {
+        throw new IllegalArgumentException("This interceptor requires columns format to be specified!");
+      }
+      List<String> lSeparators = Lists.newArrayList();
+      List<Integer> lColumns = Lists.newArrayList();
+      Pattern colPat = Pattern.compile("\\{\\d+?\\}");
+      Matcher matcher = colPat.matcher(formatter);
+      int separatorStart = 0;
+      String lPrefix = "";
+      while (matcher.find()) {
+        String col = matcher.group();
+        lColumns.add(Integer.parseInt(col.substring(1, col.length() - 1)));
+        if (separatorStart == 0 && matcher.start() > 0) {
+          lPrefix = formatter.substring(0, matcher.start());
+        } else if (separatorStart > 0) {
+          lSeparators.add(formatter.substring(separatorStart, matcher.start()));
+        }
+
+        separatorStart = matcher.end();
+      }
+      if (separatorStart < formatter.length()) {
+        lSeparators.add(formatter.substring(separatorStart, formatter.length()));
+      }
+      columns = Ints.toArray(lColumns);
+      byte[] emptyStringBytes = "".getBytes();
+
+      dstSeparators = new byte[columns.length][];
+
+      for (int i = 0; i < columns.length; i++) {
+        if (i < lSeparators.size()) {
+          dstSeparators[i] = lSeparators.get(i).getBytes();
+        } else {
+          dstSeparators[i] = emptyStringBytes;
+        }
+      }
+      srcSeparator = context.getInteger(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, (int) ColumnFilteringInterceptor.Constants.SRC_SEPARATOR_DFLT).byteValue();
+      this.prefix = lPrefix.getBytes();
+    }
+  }
+
+  public static class Constants extends ColumnFilteringInterceptor.Constants
+  {
+    public static final String COLUMNS_FORMATTER = "columnsFormatter";
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(ColumnFilteringFormattingInterceptor.class);
+
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringInterceptor.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringInterceptor.java b/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringInterceptor.java
new file mode 100644
index 0000000..f0de5e0
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringInterceptor.java
@@ -0,0 +1,205 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.interceptor;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.interceptor.Interceptor;
+
+import static org.apache.apex.malhar.flume.interceptor.ColumnFilteringInterceptor.Constants.COLUMNS;
+import static org.apache.apex.malhar.flume.interceptor.ColumnFilteringInterceptor.Constants.DST_SEPARATOR;
+import static org.apache.apex.malhar.flume.interceptor.ColumnFilteringInterceptor.Constants.DST_SEPARATOR_DFLT;
+import static org.apache.apex.malhar.flume.interceptor.ColumnFilteringInterceptor.Constants.SRC_SEPARATOR;
+import static org.apache.apex.malhar.flume.interceptor.ColumnFilteringInterceptor.Constants.SRC_SEPARATOR_DFLT;
+
+/**
+ * <p>ColumnFilteringInterceptor class.</p>
+ *
+ * @since 0.9.4
+ */
+public class ColumnFilteringInterceptor implements Interceptor
+{
+  private final byte srcSeparator;
+  private final byte dstSeparator;
+
+  private final int maxIndex;
+  private final int maxColumn;
+  private final int[] columns;
+  private final int[] positions;
+
+  private ColumnFilteringInterceptor(int[] columns, byte srcSeparator, byte dstSeparator)
+  {
+    this.columns = columns;
+
+    int tempMaxColumn = Integer.MIN_VALUE;
+    for (int column: columns) {
+      if (column > tempMaxColumn) {
+        tempMaxColumn = column;
+      }
+    }
+    maxIndex = tempMaxColumn;
+    maxColumn = tempMaxColumn + 1;
+    positions = new int[maxColumn + 1];
+
+    this.srcSeparator = srcSeparator;
+    this.dstSeparator = dstSeparator;
+  }
+
+  @Override
+  public void initialize()
+  {
+    /* no-op */
+  }
+
+  @Override
+  public Event intercept(Event event)
+  {
+    byte[] body = event.getBody();
+    if (body == null) {
+      return event;
+    }
+
+    final int length = body.length;
+
+    /* store positions of character after the separators */
+    int i = 0;
+    int index = 0;
+    while (i < length) {
+      if (body[i++] == srcSeparator) {
+        positions[++index] = i;
+        if (index >= maxIndex) {
+          break;
+        }
+      }
+    }
+
+    int nextVirginIndex;
+    boolean separatorTerminated;
+    if (i == length && index < maxColumn) {
+      nextVirginIndex = index + 2;
+      positions[nextVirginIndex - 1] = length;
+      separatorTerminated = length > 0 ? body[length - 1]  != srcSeparator : false;
+    } else {
+      nextVirginIndex = index + 1;
+      separatorTerminated = true;
+    }
+
+    int newArrayLen = 0;
+    for (i = columns.length; i-- > 0;) {
+      int column = columns[i];
+      int len = positions[column + 1] - positions[column];
+      if (len <= 0) {
+        newArrayLen++;
+      } else {
+        if (separatorTerminated && positions[column + 1] == length) {
+          newArrayLen++;
+        }
+        newArrayLen += len;
+      }
+    }
+
+    byte[] newbody = new byte[newArrayLen];
+    int newoffset = 0;
+    for (int column: columns) {
+      int len = positions[column + 1] - positions[column];
+      if (len > 0) {
+        System.arraycopy(body, positions[column], newbody, newoffset, len);
+        newoffset += len;
+        if (newbody[newoffset - 1] == srcSeparator) {
+          newbody[newoffset - 1] = dstSeparator;
+        } else {
+          newbody[newoffset++] = dstSeparator;
+        }
+      } else {
+        newbody[newoffset++] = dstSeparator;
+      }
+    }
+
+    event.setBody(newbody);
+    Arrays.fill(positions, 1, nextVirginIndex, 0);
+    return event;
+  }
+
+  @Override
+  public List<Event> intercept(List<Event> events)
+  {
+    for (Event event: events) {
+      intercept(event);
+    }
+    return events;
+  }
+
+  @Override
+  public void close()
+  {
+  }
+
+  public static class Builder implements Interceptor.Builder
+  {
+    private int[] columns;
+    private byte srcSeparator;
+    private byte dstSeparator;
+
+    @Override
+    public Interceptor build()
+    {
+      return new ColumnFilteringInterceptor(columns, srcSeparator, dstSeparator);
+    }
+
+    @Override
+    public void configure(Context context)
+    {
+      String sColumns = context.getString(COLUMNS);
+      if (sColumns == null || sColumns.trim().isEmpty()) {
+        throw new Error("This interceptor requires filtered columns to be specified!");
+      }
+
+      String[] parts = sColumns.split(" ");
+      columns = new int[parts.length];
+      for (int i = parts.length; i-- > 0;) {
+        columns[i] = Integer.parseInt(parts[i]);
+      }
+
+      srcSeparator = context.getInteger(SRC_SEPARATOR, (int)SRC_SEPARATOR_DFLT).byteValue();
+      dstSeparator = context.getInteger(DST_SEPARATOR, (int)DST_SEPARATOR_DFLT).byteValue();
+    }
+
+  }
+
+  @SuppressWarnings("ClassMayBeInterface") /* adhering to flume until i understand it completely */
+
+  public static class Constants
+  {
+    public static final String SRC_SEPARATOR = "srcSeparator";
+    public static final byte SRC_SEPARATOR_DFLT = 2;
+
+    public static final String DST_SEPARATOR = "dstSeparator";
+    public static final byte DST_SEPARATOR_DFLT = 1;
+
+    public static final String COLUMNS = "columns";
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(ColumnFilteringInterceptor.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java b/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java
new file mode 100644
index 0000000..da1a8aa
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperator.java
@@ -0,0 +1,759 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.operator;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import javax.validation.constraints.Min;
+import javax.validation.constraints.NotNull;
+
+import org.apache.apex.malhar.flume.discovery.Discovery;
+import org.apache.apex.malhar.flume.sink.Server;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Event;
+
+import com.datatorrent.api.Context;
+import com.datatorrent.api.Context.OperatorContext;
+import com.datatorrent.api.DefaultOutputPort;
+import com.datatorrent.api.DefaultPartition;
+import com.datatorrent.api.InputOperator;
+import com.datatorrent.api.Operator;
+import com.datatorrent.api.Partitioner;
+import com.datatorrent.api.Stats.OperatorStats;
+import com.datatorrent.api.StreamCodec;
+import org.apache.apex.malhar.flume.discovery.ZKAssistedDiscovery;
+import com.datatorrent.netlet.AbstractLengthPrependerClient;
+import com.datatorrent.netlet.DefaultEventLoop;
+import com.datatorrent.netlet.util.Slice;
+
+import static java.lang.Thread.sleep;
+
+/**
+ * <p>
+ * Abstract AbstractFlumeInputOperator class.</p>
+ *
+ * @param <T> Type of the output payload.
+ * @since 0.9.2
+ */
+public abstract class AbstractFlumeInputOperator<T>
+    implements InputOperator, Operator.ActivationListener<OperatorContext>, Operator.IdleTimeHandler,
+    Operator.CheckpointListener, Partitioner<AbstractFlumeInputOperator<T>>
+{
+  public final transient DefaultOutputPort<T> output = new DefaultOutputPort<T>();
+  public final transient DefaultOutputPort<Slice> drop = new DefaultOutputPort<Slice>();
+  @NotNull
+  private String[] connectionSpecs;
+  @NotNull
+  private StreamCodec<Event> codec;
+  private final ArrayList<RecoveryAddress> recoveryAddresses;
+  @SuppressWarnings("FieldMayBeFinal") // it's not final because that mucks with the serialization somehow
+  private transient ArrayBlockingQueue<Slice> handoverBuffer;
+  private transient int idleCounter;
+  private transient int eventCounter;
+  private transient DefaultEventLoop eventloop;
+  private transient volatile boolean connected;
+  private transient OperatorContext context;
+  private transient Client client;
+  private transient long windowId;
+  private transient byte[] address;
+  @Min(0)
+  private long maxEventsPerSecond;
+  //This is calculated from maxEventsPerSecond, App window count and streaming window size
+  private transient long maxEventsPerWindow;
+
+  public AbstractFlumeInputOperator()
+  {
+    handoverBuffer = new ArrayBlockingQueue<Slice>(1024 * 5);
+    connectionSpecs = new String[0];
+    recoveryAddresses = new ArrayList<RecoveryAddress>();
+    maxEventsPerSecond = Long.MAX_VALUE;
+  }
+
+  @Override
+  public void setup(OperatorContext context)
+  {
+    long windowDurationMillis = context.getValue(OperatorContext.APPLICATION_WINDOW_COUNT) *
+        context.getValue(Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS);
+    maxEventsPerWindow = (long)(windowDurationMillis / 1000.0 * maxEventsPerSecond);
+    logger.debug("max-events per-second {} per-window {}", maxEventsPerSecond, maxEventsPerWindow);
+
+    try {
+      eventloop = new DefaultEventLoop("EventLoop-" + context.getId());
+      eventloop.start();
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+
+  @Override
+  @SuppressWarnings({"unchecked"})
+  public void activate(OperatorContext ctx)
+  {
+    if (connectionSpecs.length == 0) {
+      logger.info("Discovered zero DTFlumeSink");
+    } else if (connectionSpecs.length == 1) {
+      for (String connectAddresse: connectionSpecs) {
+        logger.debug("Connection spec is {}", connectAddresse);
+        String[] parts = connectAddresse.split(":");
+        eventloop.connect(new InetSocketAddress(parts[1], Integer.parseInt(parts[2])), client = new Client(parts[0]));
+      }
+    } else {
+      throw new IllegalArgumentException(
+          String.format("A physical %s operator cannot connect to more than 1 addresses!",
+              this.getClass().getSimpleName()));
+    }
+
+    context = ctx;
+  }
+
+  @Override
+  public void beginWindow(long windowId)
+  {
+    this.windowId = windowId;
+    idleCounter = 0;
+    eventCounter = 0;
+  }
+
+  @Override
+  public void emitTuples()
+  {
+    int i = handoverBuffer.size();
+    if (i > 0 && eventCounter < maxEventsPerWindow) {
+
+      while (--i > 0 && eventCounter < maxEventsPerWindow - 1) {
+        final Slice slice = handoverBuffer.poll();
+        slice.offset += 8;
+        slice.length -= 8;
+        T convert = convert((Event)codec.fromByteArray(slice));
+        if (convert == null) {
+          drop.emit(slice);
+        } else {
+          output.emit(convert);
+        }
+        eventCounter++;
+      }
+
+      final Slice slice = handoverBuffer.poll();
+      slice.offset += 8;
+      slice.length -= 8;
+      T convert = convert((Event)codec.fromByteArray(slice));
+      if (convert == null) {
+        drop.emit(slice);
+      } else {
+        output.emit(convert);
+      }
+      eventCounter++;
+
+      address = Arrays.copyOfRange(slice.buffer, slice.offset - 8, slice.offset);
+    }
+  }
+
+  @Override
+  public void endWindow()
+  {
+    if (connected) {
+      byte[] array = new byte[Server.Request.FIXED_SIZE];
+
+      array[0] = Server.Command.WINDOWED.getOrdinal();
+      Server.writeInt(array, 1, eventCounter);
+      Server.writeInt(array, 5, idleCounter);
+      Server.writeLong(array, Server.Request.TIME_OFFSET, System.currentTimeMillis());
+
+      logger.debug("wrote {} with eventCounter = {} and idleCounter = {}", Server.Command.WINDOWED, eventCounter, idleCounter);
+      client.write(array);
+    }
+
+    if (address != null) {
+      RecoveryAddress rAddress = new RecoveryAddress();
+      rAddress.address = address;
+      address = null;
+      rAddress.windowId = windowId;
+      recoveryAddresses.add(rAddress);
+    }
+  }
+
+  @Override
+  public void deactivate()
+  {
+    if (connected) {
+      eventloop.disconnect(client);
+    }
+    context = null;
+  }
+
+  @Override
+  public void teardown()
+  {
+    eventloop.stop();
+    eventloop = null;
+  }
+
+  @Override
+  public void handleIdleTime()
+  {
+    idleCounter++;
+    try {
+      sleep(context.getValue(OperatorContext.SPIN_MILLIS));
+    } catch (InterruptedException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+
+  public abstract T convert(Event event);
+
+  /**
+   * @return the connectAddress
+   */
+  public String[] getConnectAddresses()
+  {
+    return connectionSpecs.clone();
+  }
+
+  /**
+   * @param specs - sinkid:host:port specification of all the sinks.
+   */
+  public void setConnectAddresses(String[] specs)
+  {
+    this.connectionSpecs = specs.clone();
+  }
+
+  /**
+   * @return the codec
+   */
+  public StreamCodec<Event> getCodec()
+  {
+    return codec;
+  }
+
+  /**
+   * @param codec the codec to set
+   */
+  public void setCodec(StreamCodec<Event> codec)
+  {
+    this.codec = codec;
+  }
+
+  private static class RecoveryAddress implements Serializable
+  {
+    long windowId;
+    byte[] address;
+
+    @Override
+    public String toString()
+    {
+      return "RecoveryAddress{" + "windowId=" + windowId + ", address=" + Arrays.toString(address) + '}';
+    }
+
+    @Override
+    public boolean equals(Object o)
+    {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof RecoveryAddress)) {
+        return false;
+      }
+
+      RecoveryAddress that = (RecoveryAddress)o;
+
+      if (windowId != that.windowId) {
+        return false;
+      }
+      return Arrays.equals(address, that.address);
+    }
+
+    @Override
+    public int hashCode()
+    {
+      int result = (int)(windowId ^ (windowId >>> 32));
+      result = 31 * result + (address != null ? Arrays.hashCode(address) : 0);
+      return result;
+    }
+
+    private static final long serialVersionUID = 201312021432L;
+  }
+
+  @Override
+  public void checkpointed(long windowId)
+  {
+    /* dont do anything */
+  }
+
+  @Override
+  public void committed(long windowId)
+  {
+    if (!connected) {
+      return;
+    }
+
+    synchronized (recoveryAddresses) {
+      byte[] addr = null;
+
+      Iterator<RecoveryAddress> iterator = recoveryAddresses.iterator();
+      while (iterator.hasNext()) {
+        RecoveryAddress ra = iterator.next();
+        if (ra.windowId > windowId) {
+          break;
+        }
+
+        iterator.remove();
+        if (ra.address != null) {
+          addr = ra.address;
+        }
+      }
+
+      if (addr != null) {
+        /*
+         * Make sure that we store the last valid address processed
+         */
+        if (recoveryAddresses.isEmpty()) {
+          RecoveryAddress ra = new RecoveryAddress();
+          ra.address = addr;
+          recoveryAddresses.add(ra);
+        }
+
+        int arraySize = 1/* for the type of the message */
+            + 8 /* for the location to commit */
+            + 8 /* for storing the current time stamp*/;
+        byte[] array = new byte[arraySize];
+
+        array[0] = Server.Command.COMMITTED.getOrdinal();
+        System.arraycopy(addr, 0, array, 1, 8);
+        Server.writeLong(array, Server.Request.TIME_OFFSET, System.currentTimeMillis());
+        logger.debug("wrote {} with recoveryOffset = {}", Server.Command.COMMITTED, Arrays.toString(addr));
+        client.write(array);
+      }
+    }
+  }
+
+  @Override
+  public Collection<Partition<AbstractFlumeInputOperator<T>>> definePartitions(
+      Collection<Partition<AbstractFlumeInputOperator<T>>> partitions, PartitioningContext context)
+  {
+    Collection<Discovery.Service<byte[]>> discovered = discoveredFlumeSinks.get();
+    if (discovered == null) {
+      return partitions;
+    }
+
+    HashMap<String, ArrayList<RecoveryAddress>> allRecoveryAddresses = abandonedRecoveryAddresses.get();
+    ArrayList<String> allConnectAddresses = new ArrayList<String>(partitions.size());
+    for (Partition<AbstractFlumeInputOperator<T>> partition: partitions) {
+      String[] lAddresses = partition.getPartitionedInstance().connectionSpecs;
+      allConnectAddresses.addAll(Arrays.asList(lAddresses));
+      for (int i = lAddresses.length; i-- > 0;) {
+        String[] parts = lAddresses[i].split(":", 2);
+        allRecoveryAddresses.put(parts[0], partition.getPartitionedInstance().recoveryAddresses);
+      }
+    }
+
+    HashMap<String, String> connections = new HashMap<String, String>(discovered.size());
+    for (Discovery.Service<byte[]> service: discovered) {
+      String previousSpec = connections.get(service.getId());
+      String newspec = service.getId() + ':' + service.getHost() + ':' + service.getPort();
+      if (previousSpec == null) {
+        connections.put(service.getId(), newspec);
+      } else {
+        boolean found = false;
+        for (ConnectionStatus cs: partitionedInstanceStatus.get().values()) {
+          if (previousSpec.equals(cs.spec) && !cs.connected) {
+            connections.put(service.getId(), newspec);
+            found = true;
+            break;
+          }
+        }
+
+        if (!found) {
+          logger.warn("2 sinks found with the same id: {} and {}... Ignoring previous.", previousSpec, newspec);
+          connections.put(service.getId(), newspec);
+        }
+      }
+    }
+
+    for (int i = allConnectAddresses.size(); i-- > 0;) {
+      String[] parts = allConnectAddresses.get(i).split(":");
+      String connection = connections.remove(parts[0]);
+      if (connection == null) {
+        allConnectAddresses.remove(i);
+      } else {
+        allConnectAddresses.set(i, connection);
+      }
+    }
+
+    allConnectAddresses.addAll(connections.values());
+
+    partitions.clear();
+    try {
+      if (allConnectAddresses.isEmpty()) {
+        /* return at least one of them; otherwise stram becomes grumpy */
+        @SuppressWarnings("unchecked")
+        AbstractFlumeInputOperator<T> operator = getClass().newInstance();
+        operator.setCodec(codec);
+        operator.setMaxEventsPerSecond(maxEventsPerSecond);
+        for (ArrayList<RecoveryAddress> lRecoveryAddresses: allRecoveryAddresses.values()) {
+          operator.recoveryAddresses.addAll(lRecoveryAddresses);
+        }
+        operator.connectionSpecs = new String[allConnectAddresses.size()];
+        for (int i = connectionSpecs.length; i-- > 0;) {
+          connectionSpecs[i] = allConnectAddresses.get(i);
+        }
+
+        partitions.add(new DefaultPartition<AbstractFlumeInputOperator<T>>(operator));
+      } else {
+        long maxEventsPerSecondPerOperator = maxEventsPerSecond / allConnectAddresses.size();
+        for (int i = allConnectAddresses.size(); i-- > 0;) {
+          @SuppressWarnings("unchecked")
+          AbstractFlumeInputOperator<T> operator = getClass().newInstance();
+          operator.setCodec(codec);
+          operator.setMaxEventsPerSecond(maxEventsPerSecondPerOperator);
+          String connectAddress = allConnectAddresses.get(i);
+          operator.connectionSpecs = new String[] {connectAddress};
+
+          String[] parts = connectAddress.split(":", 2);
+          ArrayList<RecoveryAddress> remove = allRecoveryAddresses.remove(parts[0]);
+          if (remove != null) {
+            operator.recoveryAddresses.addAll(remove);
+          }
+
+          partitions.add(new DefaultPartition<AbstractFlumeInputOperator<T>>(operator));
+        }
+      }
+    } catch (IllegalAccessException ex) {
+      throw new RuntimeException(ex);
+    } catch (InstantiationException ex) {
+      throw new RuntimeException(ex);
+    }
+
+    logger.debug("Requesting partitions: {}", partitions);
+    return partitions;
+  }
+
+  @Override
+  public void partitioned(Map<Integer, Partition<AbstractFlumeInputOperator<T>>> partitions)
+  {
+    logger.debug("Partitioned Map: {}", partitions);
+    HashMap<Integer, ConnectionStatus> map = partitionedInstanceStatus.get();
+    map.clear();
+    for (Entry<Integer, Partition<AbstractFlumeInputOperator<T>>> entry: partitions.entrySet()) {
+      if (map.containsKey(entry.getKey())) {
+        // what can be done here?
+      } else {
+        map.put(entry.getKey(), null);
+      }
+    }
+  }
+
+  @Override
+  public String toString()
+  {
+    return "AbstractFlumeInputOperator{" + "connected=" + connected + ", connectionSpecs=" +
+        (connectionSpecs.length == 0 ? "empty" : connectionSpecs[0]) + ", recoveryAddresses=" + recoveryAddresses + '}';
+  }
+
+  class Client extends AbstractLengthPrependerClient
+  {
+    private final String id;
+
+    Client(String id)
+    {
+      this.id = id;
+    }
+
+    @Override
+    public void onMessage(byte[] buffer, int offset, int size)
+    {
+      try {
+        handoverBuffer.put(new Slice(buffer, offset, size));
+      } catch (InterruptedException ex) {
+        handleException(ex, eventloop);
+      }
+    }
+
+    @Override
+    public void connected()
+    {
+      super.connected();
+
+      byte[] address;
+      synchronized (recoveryAddresses) {
+        if (recoveryAddresses.size() > 0) {
+          address = recoveryAddresses.get(recoveryAddresses.size() - 1).address;
+        } else {
+          address = new byte[8];
+        }
+      }
+
+      int len = 1 /* for the message type SEEK */
+          + 8 /* for the address */
+          + 8 /* for storing the current time stamp*/;
+
+      byte[] array = new byte[len];
+      array[0] = Server.Command.SEEK.getOrdinal();
+      System.arraycopy(address, 0, array, 1, 8);
+      Server.writeLong(array, 9, System.currentTimeMillis());
+      write(array);
+
+      connected = true;
+      ConnectionStatus connectionStatus = new ConnectionStatus();
+      connectionStatus.connected = true;
+      connectionStatus.spec = connectionSpecs[0];
+      OperatorContext ctx = context;
+      synchronized (ctx) {
+        logger.debug("{} Submitting ConnectionStatus = {}", AbstractFlumeInputOperator.this, connectionStatus);
+        context.setCounters(connectionStatus);
+      }
+    }
+
+    @Override
+    public void disconnected()
+    {
+      connected = false;
+      ConnectionStatus connectionStatus = new ConnectionStatus();
+      connectionStatus.connected = false;
+      connectionStatus.spec = connectionSpecs[0];
+      OperatorContext ctx = context;
+      synchronized (ctx) {
+        logger.debug("{} Submitting ConnectionStatus = {}", AbstractFlumeInputOperator.this, connectionStatus);
+        context.setCounters(connectionStatus);
+      }
+      super.disconnected();
+    }
+
+  }
+
+  public static class ZKStatsListner extends ZKAssistedDiscovery implements com.datatorrent.api.StatsListener,
+      Serializable
+  {
+    /*
+     * In the current design, one input operator is able to connect
+     * to only one flume adapter. Sometime in future, we should support
+     * any number of input operators connecting to any number of flume
+     * sinks and vice a versa.
+     *
+     * Until that happens the following map should be sufficient to
+     * keep track of which input operator is connected to which flume sink.
+     */
+    long intervalMillis;
+    private final Response response;
+    private transient long nextMillis;
+
+    public ZKStatsListner()
+    {
+      intervalMillis = 60 * 1000L;
+      response = new Response();
+    }
+
+    @Override
+    public Response processStats(BatchedOperatorStats stats)
+    {
+      final HashMap<Integer, ConnectionStatus> map = partitionedInstanceStatus.get();
+      response.repartitionRequired = false;
+
+      Object lastStat = null;
+      List<OperatorStats> lastWindowedStats = stats.getLastWindowedStats();
+      for (OperatorStats os: lastWindowedStats) {
+        if (os.counters != null) {
+          lastStat = os.counters;
+          logger.debug("Received custom stats = {}", lastStat);
+        }
+      }
+
+      if (lastStat instanceof ConnectionStatus) {
+        ConnectionStatus cs = (ConnectionStatus)lastStat;
+        map.put(stats.getOperatorId(), cs);
+        if (!cs.connected) {
+          logger.debug("setting repatitioned = true because of lastStat = {}", lastStat);
+          response.repartitionRequired = true;
+        }
+      }
+
+      if (System.currentTimeMillis() >= nextMillis) {
+        logger.debug("nextMillis = {}", nextMillis);
+        try {
+          super.setup(null);
+          Collection<Discovery.Service<byte[]>> addresses;
+          try {
+            addresses = discover();
+          } finally {
+            super.teardown();
+          }
+          AbstractFlumeInputOperator.discoveredFlumeSinks.set(addresses);
+          logger.debug("\ncurrent map = {}\ndiscovered sinks = {}", map, addresses);
+          switch (addresses.size()) {
+            case 0:
+              response.repartitionRequired = map.size() != 1;
+              break;
+
+            default:
+              if (addresses.size() == map.size()) {
+                for (ConnectionStatus value: map.values()) {
+                  if (value == null || !value.connected) {
+                    response.repartitionRequired = true;
+                    break;
+                  }
+                }
+              } else {
+                response.repartitionRequired = true;
+              }
+              break;
+          }
+        } catch (Error er) {
+          throw er;
+        } catch (Throwable cause) {
+          logger.warn("Unable to discover services, using values from last successful discovery", cause);
+        } finally {
+          nextMillis = System.currentTimeMillis() + intervalMillis;
+          logger.debug("Proposed NextMillis = {}", nextMillis);
+        }
+      }
+
+      return response;
+    }
+
+    /**
+     * @return the intervalMillis
+     */
+    public long getIntervalMillis()
+    {
+      return intervalMillis;
+    }
+
+    /**
+     * @param intervalMillis the intervalMillis to set
+     */
+    public void setIntervalMillis(long intervalMillis)
+    {
+      this.intervalMillis = intervalMillis;
+    }
+
+    private static final long serialVersionUID = 201312241646L;
+  }
+
+  public static class ConnectionStatus implements Serializable
+  {
+    int id;
+    String spec;
+    boolean connected;
+
+    @Override
+    public int hashCode()
+    {
+      return spec.hashCode();
+    }
+
+    @Override
+    public boolean equals(Object obj)
+    {
+      if (obj == null) {
+        return false;
+      }
+      if (getClass() != obj.getClass()) {
+        return false;
+      }
+      final ConnectionStatus other = (ConnectionStatus)obj;
+      return spec == null ? other.spec == null : spec.equals(other.spec);
+    }
+
+    @Override
+    public String toString()
+    {
+      return "ConnectionStatus{" + "id=" + id + ", spec=" + spec + ", connected=" + connected + '}';
+    }
+
+    private static final long serialVersionUID = 201312261615L;
+  }
+
+  private static final transient ThreadLocal<HashMap<Integer, ConnectionStatus>> partitionedInstanceStatus =
+      new ThreadLocal<HashMap<Integer, ConnectionStatus>>()
+    {
+      @Override
+      protected HashMap<Integer, ConnectionStatus> initialValue()
+      {
+        return new HashMap<Integer, ConnectionStatus>();
+      }
+
+    };
+  /**
+   * When a sink goes away and a replacement sink is not found, we stash the recovery addresses associated
+   * with the sink in a hope that the new sink may show up in near future.
+   */
+  private static final transient ThreadLocal<HashMap<String, ArrayList<RecoveryAddress>>> abandonedRecoveryAddresses =
+      new ThreadLocal<HashMap<String, ArrayList<RecoveryAddress>>>()
+  {
+    @Override
+    protected HashMap<String, ArrayList<RecoveryAddress>> initialValue()
+    {
+      return new HashMap<String, ArrayList<RecoveryAddress>>();
+    }
+
+  };
+  private static final transient ThreadLocal<Collection<Discovery.Service<byte[]>>> discoveredFlumeSinks =
+      new ThreadLocal<Collection<Discovery.Service<byte[]>>>();
+
+  @Override
+  public boolean equals(Object o)
+  {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof AbstractFlumeInputOperator)) {
+      return false;
+    }
+
+    AbstractFlumeInputOperator<?> that = (AbstractFlumeInputOperator<?>)o;
+
+    if (!Arrays.equals(connectionSpecs, that.connectionSpecs)) {
+      return false;
+    }
+    return recoveryAddresses.equals(that.recoveryAddresses);
+
+  }
+
+  @Override
+  public int hashCode()
+  {
+    int result = connectionSpecs != null ? Arrays.hashCode(connectionSpecs) : 0;
+    result = 31 * result + (recoveryAddresses.hashCode());
+    return result;
+  }
+
+  public void setMaxEventsPerSecond(long maxEventsPerSecond)
+  {
+    this.maxEventsPerSecond = maxEventsPerSecond;
+  }
+
+  public long getMaxEventsPerSecond()
+  {
+    return maxEventsPerSecond;
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(AbstractFlumeInputOperator.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java b/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java
new file mode 100644
index 0000000..306ce13
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/sink/DTFlumeSink.java
@@ -0,0 +1,572 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.sink;
+
+import java.io.IOError;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.ServiceConfigurationError;
+
+import org.apache.apex.malhar.flume.discovery.Discovery;
+import org.apache.apex.malhar.flume.storage.EventCodec;
+import org.apache.apex.malhar.flume.storage.Storage;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.sink.AbstractSink;
+
+import com.datatorrent.api.Component;
+import com.datatorrent.api.StreamCodec;
+import org.apache.apex.malhar.flume.sink.Server.Client;
+import org.apache.apex.malhar.flume.sink.Server.Request;
+import com.datatorrent.netlet.DefaultEventLoop;
+import com.datatorrent.netlet.NetletThrowable;
+import com.datatorrent.netlet.NetletThrowable.NetletRuntimeException;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * DTFlumeSink is a flume sink developed to ingest the data into DataTorrent DAG
+ * from flume. It's essentially a flume sink which acts as a server capable of
+ * talking to one client at a time. The client for this server is AbstractFlumeInputOperator.
+ * <p />
+ * &lt;experimental&gt;DTFlumeSink auto adjusts the rate at which it consumes the data from channel to
+ * match the throughput of the DAG.&lt;/experimental&gt;
+ * <p />
+ * The properties you can set on the DTFlumeSink are: <br />
+ * id - string unique value identifying this sink <br />
+ * hostname - string value indicating the fqdn or ip address of the interface on which the server should listen <br />
+ * port - integer value indicating the numeric port to which the server should bind <br />
+ * sleepMillis - integer value indicating the number of milliseconds the process should sleep when there are no events
+ * before checking for next event again <br />
+ * throughputAdjustmentPercent - integer value indicating by what percentage the flume transaction size should be
+ * adjusted upward or downward at a time <br />
+ * minimumEventsPerTransaction - integer value indicating the minimum number of events per transaction <br />
+ * maximumEventsPerTransaction - integer value indicating the maximum number of events per transaction. This value can
+ * not be more than channel's transaction capacity.<br />
+ *
+ * @since 0.9.2
+ */
+public class DTFlumeSink extends AbstractSink implements Configurable
+{
+  private static final String HOSTNAME_STRING = "hostname";
+  private static final String HOSTNAME_DEFAULT = "locahost";
+  private static final long ACCEPTED_TOLERANCE = 20000;
+  private DefaultEventLoop eventloop;
+  private Server server;
+  private int outstandingEventsCount;
+  private int lastConsumedEventsCount;
+  private int idleCount;
+  private byte[] playback;
+  private Client client;
+  private String hostname;
+  private int port;
+  private String id;
+  private long acceptedTolerance;
+  private long sleepMillis;
+  private double throughputAdjustmentFactor;
+  private int minimumEventsPerTransaction;
+  private int maximumEventsPerTransaction;
+  private long commitEventTimeoutMillis;
+  private transient long lastCommitEventTimeMillis;
+  private Storage storage;
+  Discovery<byte[]> discovery;
+  StreamCodec<Event> codec;
+  /* Begin implementing Flume Sink interface */
+
+  @Override
+  @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch", "UseSpecificCatch", "SleepWhileInLoop"})
+  public Status process() throws EventDeliveryException
+  {
+    Slice slice;
+    synchronized (server.requests) {
+      for (Request r : server.requests) {
+        logger.debug("found {}", r);
+        switch (r.type) {
+          case SEEK:
+            lastCommitEventTimeMillis = System.currentTimeMillis();
+            slice = r.getAddress();
+            playback = storage.retrieve(Arrays.copyOfRange(slice.buffer, slice.offset, slice.offset + slice.length));
+            client = r.client;
+            break;
+
+          case COMMITTED:
+            lastCommitEventTimeMillis = System.currentTimeMillis();
+            slice = r.getAddress();
+            storage.clean(Arrays.copyOfRange(slice.buffer, slice.offset, slice.offset + slice.length));
+            break;
+
+          case CONNECTED:
+            logger.debug("Connected received, ignoring it!");
+            break;
+
+          case DISCONNECTED:
+            if (r.client == client) {
+              client = null;
+              outstandingEventsCount = 0;
+            }
+            break;
+
+          case WINDOWED:
+            lastConsumedEventsCount = r.getEventCount();
+            idleCount = r.getIdleCount();
+            outstandingEventsCount -= lastConsumedEventsCount;
+            break;
+
+          case SERVER_ERROR:
+            throw new IOError(null);
+
+          default:
+            logger.debug("Cannot understand the request {}", r);
+            break;
+        }
+      }
+
+      server.requests.clear();
+    }
+
+    if (client == null) {
+      logger.info("No client expressed interest yet to consume the events.");
+      return Status.BACKOFF;
+    } else if (System.currentTimeMillis() - lastCommitEventTimeMillis > commitEventTimeoutMillis) {
+      logger.info("Client has not processed the workload given for the last {} milliseconds, so backing off.",
+          System.currentTimeMillis() - lastCommitEventTimeMillis);
+      return Status.BACKOFF;
+    }
+
+    int maxTuples;
+    // the following logic needs to be fixed... this is a quick put together.
+    if (outstandingEventsCount < 0) {
+      if (idleCount > 1) {
+        maxTuples = (int)((1 + throughputAdjustmentFactor * idleCount) * lastConsumedEventsCount);
+      } else {
+        maxTuples = (int)((1 + throughputAdjustmentFactor) * lastConsumedEventsCount);
+      }
+    } else if (outstandingEventsCount > lastConsumedEventsCount) {
+      maxTuples = (int)((1 - throughputAdjustmentFactor) * lastConsumedEventsCount);
+    } else {
+      if (idleCount > 0) {
+        maxTuples = (int)((1 + throughputAdjustmentFactor * idleCount) * lastConsumedEventsCount);
+        if (maxTuples <= 0) {
+          maxTuples = minimumEventsPerTransaction;
+        }
+      } else {
+        maxTuples = lastConsumedEventsCount;
+      }
+    }
+
+    if (maxTuples >= maximumEventsPerTransaction) {
+      maxTuples = maximumEventsPerTransaction;
+    } else if (maxTuples <= 0) {
+      maxTuples = minimumEventsPerTransaction;
+    }
+
+    if (maxTuples > 0) {
+      if (playback != null) {
+        try {
+          int i = 0;
+          do {
+            if (!client.write(playback)) {
+              retryWrite(playback, null);
+            }
+            outstandingEventsCount++;
+            playback = storage.retrieveNext();
+          }
+          while (++i < maxTuples && playback != null);
+        } catch (Exception ex) {
+          logger.warn("Playback Failed", ex);
+          if (ex instanceof NetletThrowable) {
+            try {
+              eventloop.disconnect(client);
+            } finally {
+              client = null;
+              outstandingEventsCount = 0;
+            }
+          }
+          return Status.BACKOFF;
+        }
+      } else {
+        int storedTuples = 0;
+
+        Transaction t = getChannel().getTransaction();
+        try {
+          t.begin();
+
+          Event e;
+          while (storedTuples < maxTuples && (e = getChannel().take()) != null) {
+            Slice event = codec.toByteArray(e);
+            byte[] address = storage.store(event);
+            if (address != null) {
+              if (!client.write(address, event)) {
+                retryWrite(address, event);
+              }
+              outstandingEventsCount++;
+            } else {
+              logger.debug("Detected the condition of recovery from flume crash!");
+            }
+            storedTuples++;
+          }
+
+          if (storedTuples > 0) {
+            storage.flush();
+          }
+
+          t.commit();
+
+          if (storedTuples > 0) { /* log less frequently */
+            logger.debug("Transaction details maxTuples = {}, storedTuples = {}, outstanding = {}",
+                maxTuples, storedTuples, outstandingEventsCount);
+          }
+        } catch (Error er) {
+          t.rollback();
+          throw er;
+        } catch (Exception ex) {
+          logger.error("Transaction Failed", ex);
+          if (ex instanceof NetletRuntimeException && client != null) {
+            try {
+              eventloop.disconnect(client);
+            } finally {
+              client = null;
+              outstandingEventsCount = 0;
+            }
+          }
+          t.rollback();
+          return Status.BACKOFF;
+        } finally {
+          t.close();
+        }
+
+        if (storedTuples == 0) {
+          sleep();
+        }
+      }
+    }
+
+    return Status.READY;
+  }
+
+  private void sleep()
+  {
+    try {
+      Thread.sleep(sleepMillis);
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+    }
+  }
+
+  @Override
+  public void start()
+  {
+    try {
+      if (storage instanceof Component) {
+        @SuppressWarnings("unchecked")
+        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)storage;
+        component.setup(null);
+      }
+      if (discovery instanceof Component) {
+        @SuppressWarnings("unchecked")
+        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)discovery;
+        component.setup(null);
+      }
+      if (codec instanceof Component) {
+        @SuppressWarnings("unchecked")
+        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)codec;
+        component.setup(null);
+      }
+      eventloop = new DefaultEventLoop("EventLoop-" + id);
+      server = new Server(id, discovery,acceptedTolerance);
+    } catch (Error error) {
+      throw error;
+    } catch (RuntimeException re) {
+      throw re;
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    }
+
+    eventloop.start();
+    eventloop.start(hostname, port, server);
+    super.start();
+  }
+
+  @Override
+  public void stop()
+  {
+    try {
+      super.stop();
+    } finally {
+      try {
+        if (client != null) {
+          eventloop.disconnect(client);
+          client = null;
+        }
+
+        eventloop.stop(server);
+        eventloop.stop();
+
+        if (codec instanceof Component) {
+          @SuppressWarnings("unchecked")
+          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)codec;
+          component.teardown();
+        }
+        if (discovery instanceof Component) {
+          @SuppressWarnings("unchecked")
+          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)discovery;
+          component.teardown();
+        }
+        if (storage instanceof Component) {
+          @SuppressWarnings("unchecked")
+          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)storage;
+          component.teardown();
+        }
+      } catch (Throwable cause) {
+        throw new ServiceConfigurationError("Failed Stop", cause);
+      }
+    }
+  }
+
+  /* End implementing Flume Sink interface */
+
+  /* Begin Configurable Interface */
+  @Override
+  public void configure(Context context)
+  {
+    hostname = context.getString(HOSTNAME_STRING, HOSTNAME_DEFAULT);
+    port = context.getInteger("port", 0);
+    id = context.getString("id");
+    if (id == null) {
+      id = getName();
+    }
+    acceptedTolerance = context.getLong("acceptedTolerance", ACCEPTED_TOLERANCE);
+    sleepMillis = context.getLong("sleepMillis", 5L);
+    throughputAdjustmentFactor = context.getInteger("throughputAdjustmentPercent", 5) / 100.0;
+    maximumEventsPerTransaction = context.getInteger("maximumEventsPerTransaction", 10000);
+    minimumEventsPerTransaction = context.getInteger("minimumEventsPerTransaction", 100);
+    commitEventTimeoutMillis = context.getLong("commitEventTimeoutMillis", Long.MAX_VALUE);
+
+    @SuppressWarnings("unchecked")
+    Discovery<byte[]> ldiscovery = configure("discovery", Discovery.class, context);
+    if (ldiscovery == null) {
+      logger.warn("Discovery agent not configured for the sink!");
+      discovery = new Discovery<byte[]>()
+      {
+        @Override
+        public void unadvertise(Service<byte[]> service)
+        {
+          logger.debug("Sink {} stopped listening on {}:{}", service.getId(), service.getHost(), service.getPort());
+        }
+
+        @Override
+        public void advertise(Service<byte[]> service)
+        {
+          logger.debug("Sink {} started listening on {}:{}", service.getId(), service.getHost(), service.getPort());
+        }
+
+        @Override
+        @SuppressWarnings("unchecked")
+        public Collection<Service<byte[]>> discover()
+        {
+          return Collections.EMPTY_SET;
+        }
+
+      };
+    } else {
+      discovery = ldiscovery;
+    }
+
+    storage = configure("storage", Storage.class, context);
+    if (storage == null) {
+      logger.warn("storage key missing... DTFlumeSink may lose data!");
+      storage = new Storage()
+      {
+        @Override
+        public byte[] store(Slice slice)
+        {
+          return null;
+        }
+
+        @Override
+        public byte[] retrieve(byte[] identifier)
+        {
+          return null;
+        }
+
+        @Override
+        public byte[] retrieveNext()
+        {
+          return null;
+        }
+
+        @Override
+        public void clean(byte[] identifier)
+        {
+        }
+
+        @Override
+        public void flush()
+        {
+        }
+
+      };
+    }
+
+    @SuppressWarnings("unchecked")
+    StreamCodec<Event> lCodec = configure("codec", StreamCodec.class, context);
+    if (lCodec == null) {
+      codec = new EventCodec();
+    } else {
+      codec = lCodec;
+    }
+
+  }
+
+  /* End Configurable Interface */
+
+  @SuppressWarnings({"UseSpecificCatch", "BroadCatchBlock", "TooBroadCatch"})
+  private static <T> T configure(String key, Class<T> clazz, Context context)
+  {
+    String classname = context.getString(key);
+    if (classname == null) {
+      return null;
+    }
+
+    try {
+      Class<?> loadClass = Thread.currentThread().getContextClassLoader().loadClass(classname);
+      if (clazz.isAssignableFrom(loadClass)) {
+        @SuppressWarnings("unchecked")
+        T object = (T)loadClass.newInstance();
+        if (object instanceof Configurable) {
+          Context context1 = new Context(context.getSubProperties(key + '.'));
+          String id = context1.getString(Storage.ID);
+          if (id == null) {
+            id = context.getString(Storage.ID);
+            logger.debug("{} inherited id={} from sink", key, id);
+            context1.put(Storage.ID, id);
+          }
+          ((Configurable)object).configure(context1);
+        }
+
+        return object;
+      } else {
+        logger.error("key class {} does not implement {} interface", classname, Storage.class.getCanonicalName());
+        throw new Error("Invalid storage " + classname);
+      }
+    } catch (Error error) {
+      throw error;
+    } catch (RuntimeException re) {
+      throw re;
+    } catch (Throwable t) {
+      throw new RuntimeException(t);
+    }
+  }
+
+  /**
+   * @return the hostname
+   */
+  String getHostname()
+  {
+    return hostname;
+  }
+
+  /**
+   * @param hostname the hostname to set
+   */
+  void setHostname(String hostname)
+  {
+    this.hostname = hostname;
+  }
+
+  /**
+   * @return the port
+   */
+  int getPort()
+  {
+    return port;
+  }
+
+  public long getAcceptedTolerance()
+  {
+    return acceptedTolerance;
+  }
+
+  public void setAcceptedTolerance(long acceptedTolerance)
+  {
+    this.acceptedTolerance = acceptedTolerance;
+  }
+
+  /**
+   * @param port the port to set
+   */
+  void setPort(int port)
+  {
+    this.port = port;
+  }
+
+  /**
+   * @return the discovery
+   */
+  Discovery<byte[]> getDiscovery()
+  {
+    return discovery;
+  }
+
+  /**
+   * @param discovery the discovery to set
+   */
+  void setDiscovery(Discovery<byte[]> discovery)
+  {
+    this.discovery = discovery;
+  }
+
+  /**
+   * Attempt the sequence of writing after sleeping twice and upon failure assume
+   * that the client connection has problems and hence close it.
+   *
+   * @param address
+   * @param e
+   * @throws IOException
+   */
+  private void retryWrite(byte[] address, Slice event) throws IOException
+  {
+    if (event == null) {  /* this happens for playback where address and event are sent as single object */
+      while (client.isConnected()) {
+        sleep();
+        if (client.write(address)) {
+          return;
+        }
+      }
+    } else {  /* this happens when the events are taken from the flume channel and writing first time failed */
+      while (client.isConnected()) {
+        sleep();
+        if (client.write(address, event)) {
+          return;
+        }
+      }
+    }
+
+    throw new IOException("Client disconnected!");
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(DTFlumeSink.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/sink/Server.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/sink/Server.java b/flume/src/main/java/org/apache/apex/malhar/flume/sink/Server.java
new file mode 100644
index 0000000..a771cb3
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/sink/Server.java
@@ -0,0 +1,419 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.sink;
+
+import java.net.InetSocketAddress;
+import java.nio.channels.SelectionKey;
+import java.nio.channels.ServerSocketChannel;
+import java.nio.channels.SocketChannel;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.apache.apex.malhar.flume.discovery.Discovery;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.datatorrent.netlet.AbstractLengthPrependerClient;
+import com.datatorrent.netlet.AbstractServer;
+import com.datatorrent.netlet.EventLoop;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * <p>
+ * Server class.</p>
+ *
+ * @since 0.9.2
+ */
+public class Server extends AbstractServer
+{
+  private final String id;
+  private final Discovery<byte[]> discovery;
+  private final long acceptedTolerance;
+
+  public Server(String id, Discovery<byte[]> discovery, long acceptedTolerance)
+  {
+    this.id = id;
+    this.discovery = discovery;
+    this.acceptedTolerance = acceptedTolerance;
+  }
+
+  @Override
+  public void handleException(Exception cce, EventLoop el)
+  {
+    logger.error("Server Error", cce);
+    Request r = new Request(Command.SERVER_ERROR, null)
+    {
+      @Override
+      public Slice getAddress()
+      {
+        throw new UnsupportedOperationException("Not supported yet.");
+      }
+
+      @Override
+      public int getEventCount()
+      {
+        throw new UnsupportedOperationException("Not supported yet.");
+      }
+
+      @Override
+      public int getIdleCount()
+      {
+        throw new UnsupportedOperationException("Not supported yet.");
+      }
+
+    };
+    synchronized (requests) {
+      requests.add(r);
+    }
+  }
+
+  private final Discovery.Service<byte[]> service = new Discovery.Service<byte[]>()
+  {
+    @Override
+    public String getHost()
+    {
+      return ((InetSocketAddress)getServerAddress()).getHostName();
+    }
+
+    @Override
+    public int getPort()
+    {
+      return ((InetSocketAddress)getServerAddress()).getPort();
+    }
+
+    @Override
+    public byte[] getPayload()
+    {
+      return null;
+    }
+
+    @Override
+    public String getId()
+    {
+      return id;
+    }
+
+    @Override
+    public String toString()
+    {
+      return "Server.Service{id=" + id + ", host=" + getHost() + ", port=" + getPort() + ", payload=" +
+          Arrays.toString(getPayload()) + '}';
+    }
+
+  };
+
+  @Override
+  public void unregistered(final SelectionKey key)
+  {
+    discovery.unadvertise(service);
+    super.unregistered(key);
+  }
+
+  @Override
+  public void registered(final SelectionKey key)
+  {
+    super.registered(key);
+    discovery.advertise(service);
+  }
+
+  public enum Command
+  {
+    ECHO((byte)0),
+    SEEK((byte)1),
+    COMMITTED((byte)2),
+    CHECKPOINTED((byte)3),
+    CONNECTED((byte)4),
+    DISCONNECTED((byte)5),
+    WINDOWED((byte)6),
+    SERVER_ERROR((byte)7);
+
+    Command(byte b)
+    {
+      this.ord = b;
+    }
+
+    public byte getOrdinal()
+    {
+      return ord;
+    }
+
+    public static Command getCommand(byte b)
+    {
+      Command c;
+      switch (b) {
+        case 0:
+          c = ECHO;
+          break;
+
+        case 1:
+          c = SEEK;
+          break;
+
+        case 2:
+          c = COMMITTED;
+          break;
+
+        case 3:
+          c = CHECKPOINTED;
+          break;
+
+        case 4:
+          c = CONNECTED;
+          break;
+
+        case 5:
+          c = DISCONNECTED;
+          break;
+
+        case 6:
+          c = WINDOWED;
+          break;
+
+        case 7:
+          c = SERVER_ERROR;
+          break;
+
+        default:
+          throw new IllegalArgumentException(String.format("No Command defined for ordinal %b", b));
+      }
+
+      assert (b == c.ord);
+      return c;
+    }
+
+    private final byte ord;
+  }
+
+  public final ArrayList<Request> requests = new ArrayList<Request>(4);
+
+  @Override
+  public ClientListener getClientConnection(SocketChannel sc, ServerSocketChannel ssc)
+  {
+    Client lClient = new Client();
+    lClient.connected();
+    return lClient;
+  }
+
+  public class Client extends AbstractLengthPrependerClient
+  {
+
+    @Override
+    public void onMessage(byte[] buffer, int offset, int size)
+    {
+      if (size != Request.FIXED_SIZE) {
+        logger.warn("Invalid Request Received: {} from {}", Arrays.copyOfRange(buffer, offset, offset + size),
+            key.channel());
+        return;
+      }
+
+      long requestTime = Server.readLong(buffer, offset + Request.TIME_OFFSET);
+      if (System.currentTimeMillis() > (requestTime + acceptedTolerance)) {
+        logger.warn("Expired Request Received: {} from {}", Arrays.copyOfRange(buffer, offset, offset + size),
+            key.channel());
+        return;
+      }
+
+      try {
+        if (Command.getCommand(buffer[offset]) == Command.ECHO) {
+          write(buffer, offset, size);
+          return;
+        }
+      } catch (IllegalArgumentException ex) {
+        logger.warn("Invalid Request Received: {} from {}!", Arrays.copyOfRange(buffer, offset, offset + size),
+            key.channel(), ex);
+        return;
+      }
+
+      Request r = Request.getRequest(buffer, offset, this);
+      synchronized (requests) {
+        requests.add(r);
+      }
+    }
+
+    @Override
+    public void disconnected()
+    {
+      synchronized (requests) {
+        requests.add(Request.getRequest(
+            new byte[] {Command.DISCONNECTED.getOrdinal(), 0, 0, 0, 0, 0, 0, 0, 0}, 0, this));
+      }
+      super.disconnected();
+    }
+
+    public boolean write(byte[] address, Slice event)
+    {
+      if (event.offset == 0 && event.length == event.buffer.length) {
+        return write(address, event.buffer);
+      }
+
+      // a better method would be to replace the write implementation and allow it to natively support writing slices
+      return write(address, event.toByteArray());
+    }
+
+  }
+
+  public abstract static class Request
+  {
+    public static final int FIXED_SIZE = 17;
+    public static final int TIME_OFFSET = 9;
+    public final Command type;
+    public final Client client;
+
+    public Request(Command type, Client client)
+    {
+      this.type = type;
+      this.client = client;
+    }
+
+    public abstract Slice getAddress();
+
+    public abstract int getEventCount();
+
+    public abstract int getIdleCount();
+
+    @Override
+    public String toString()
+    {
+      return "Request{" + "type=" + type + '}';
+    }
+
+    public static Request getRequest(final byte[] buffer, final int offset, Client client)
+    {
+      Command command = Command.getCommand(buffer[offset]);
+      switch (command) {
+        case WINDOWED:
+          return new Request(Command.WINDOWED, client)
+          {
+            final int eventCount;
+            final int idleCount;
+
+            {
+              eventCount = Server.readInt(buffer, offset + 1);
+              idleCount = Server.readInt(buffer, offset + 5);
+            }
+
+            @Override
+            public Slice getAddress()
+            {
+              throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public int getEventCount()
+            {
+              return eventCount;
+            }
+
+            @Override
+            public int getIdleCount()
+            {
+              return idleCount;
+            }
+
+            @Override
+            public String toString()
+            {
+              return "Request{" + "type=" + type + ", eventCount=" + eventCount + ", idleCount=" + idleCount + '}';
+            }
+
+          };
+
+        default:
+          return new Request(command, client)
+          {
+            final Slice address;
+
+            {
+              address = new Slice(buffer, offset + 1, 8);
+            }
+
+            @Override
+            public Slice getAddress()
+            {
+              return address;
+            }
+
+            @Override
+            public int getEventCount()
+            {
+              throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public int getIdleCount()
+            {
+              throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public String toString()
+            {
+              return "Request{" + "type=" + type + ", address=" + address + '}';
+            }
+
+          };
+
+      }
+
+    }
+
+  }
+
+  public static int readInt(byte[] buffer, int offset)
+  {
+    return buffer[offset++] & 0xff
+           | (buffer[offset++] & 0xff) << 8
+           | (buffer[offset++] & 0xff) << 16
+           | (buffer[offset++] & 0xff) << 24;
+  }
+
+  public static void writeInt(byte[] buffer, int offset, int i)
+  {
+    buffer[offset++] = (byte)i;
+    buffer[offset++] = (byte)(i >>> 8);
+    buffer[offset++] = (byte)(i >>> 16);
+    buffer[offset++] = (byte)(i >>> 24);
+  }
+
+  public static long readLong(byte[] buffer, int offset)
+  {
+    return (long)buffer[offset++] & 0xff
+           | (long)(buffer[offset++] & 0xff) << 8
+           | (long)(buffer[offset++] & 0xff) << 16
+           | (long)(buffer[offset++] & 0xff) << 24
+           | (long)(buffer[offset++] & 0xff) << 32
+           | (long)(buffer[offset++] & 0xff) << 40
+           | (long)(buffer[offset++] & 0xff) << 48
+           | (long)(buffer[offset++] & 0xff) << 56;
+  }
+
+  public static void writeLong(byte[] buffer, int offset, long l)
+  {
+    buffer[offset++] = (byte)l;
+    buffer[offset++] = (byte)(l >>> 8);
+    buffer[offset++] = (byte)(l >>> 16);
+    buffer[offset++] = (byte)(l >>> 24);
+    buffer[offset++] = (byte)(l >>> 32);
+    buffer[offset++] = (byte)(l >>> 40);
+    buffer[offset++] = (byte)(l >>> 48);
+    buffer[offset++] = (byte)(l >>> 56);
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(Server.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/source/HdfsTestSource.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/source/HdfsTestSource.java b/flume/src/main/java/org/apache/apex/malhar/flume/source/HdfsTestSource.java
new file mode 100644
index 0000000..6160bd5
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/source/HdfsTestSource.java
@@ -0,0 +1,224 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.source;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.List;
+import java.util.Timer;
+import java.util.TimerTask;
+
+import javax.annotation.Nonnull;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDrivenSource;
+import org.apache.flume.channel.ChannelProcessor;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.event.EventBuilder;
+import org.apache.flume.source.AbstractSource;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+
+/**
+ * <p>TestSource class.</p>
+ *
+ * @since 0.9.4
+ */
+public class HdfsTestSource extends AbstractSource implements EventDrivenSource, Configurable
+{
+  public static final String SOURCE_DIR = "sourceDir";
+  public static final String RATE = "rate";
+  public static final String INIT_DATE = "initDate";
+
+  static byte FIELD_SEPARATOR = 2;
+  public Timer emitTimer;
+  @Nonnull
+  String directory;
+  Path directoryPath;
+  int rate;
+  String initDate;
+  long initTime;
+  List<String> dataFiles;
+  long oneDayBack;
+
+  private transient BufferedReader br = null;
+  protected transient FileSystem fs;
+  private transient Configuration configuration;
+
+  private transient int currentFile = 0;
+  private transient boolean finished;
+  private List<Event> events;
+
+  public HdfsTestSource()
+  {
+    super();
+    this.rate = 2500;
+    dataFiles = Lists.newArrayList();
+    Calendar calendar = Calendar.getInstance();
+    calendar.add(Calendar.DATE, -1);
+    oneDayBack = calendar.getTimeInMillis();
+    configuration = new Configuration();
+    events = Lists.newArrayList();
+  }
+
+  @Override
+  public void configure(Context context)
+  {
+    directory = context.getString(SOURCE_DIR);
+    rate = context.getInteger(RATE, rate);
+    initDate = context.getString(INIT_DATE);
+
+    Preconditions.checkArgument(!Strings.isNullOrEmpty(directory));
+    directoryPath = new Path(directory);
+
+    String[] parts = initDate.split("-");
+    Preconditions.checkArgument(parts.length == 3);
+    Calendar calendar = Calendar.getInstance();
+    calendar.set(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]) - 1, Integer.parseInt(parts[2]), 0, 0, 0);
+    initTime = calendar.getTimeInMillis();
+
+    try {
+      List<String> files = findFiles();
+      for (String file : files) {
+        dataFiles.add(file);
+      }
+      if (logger.isDebugEnabled()) {
+        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+        logger.debug("settings {} {} {} {} {}", directory, rate, dateFormat.format(oneDayBack),
+            dateFormat.format(new Date(initTime)), currentFile);
+        for (String file : dataFiles) {
+          logger.debug("settings add file {}", file);
+        }
+      }
+
+      fs = FileSystem.newInstance(new Path(directory).toUri(), configuration);
+      Path filePath = new Path(dataFiles.get(currentFile));
+      br = new BufferedReader(new InputStreamReader(new GzipCompressorInputStream(fs.open(filePath))));
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+
+    finished = true;
+
+  }
+
+  private List<String> findFiles() throws IOException
+  {
+    List<String> files = Lists.newArrayList();
+    Path directoryPath = new Path(directory);
+    FileSystem lfs = FileSystem.newInstance(directoryPath.toUri(), configuration);
+    try {
+      logger.debug("checking for new files in {}", directoryPath);
+      RemoteIterator<LocatedFileStatus> statuses = lfs.listFiles(directoryPath, true);
+      for (; statuses.hasNext(); ) {
+        FileStatus status = statuses.next();
+        Path path = status.getPath();
+        String filePathStr = path.toString();
+        if (!filePathStr.endsWith(".gz")) {
+          continue;
+        }
+        logger.debug("new file {}", filePathStr);
+        files.add(path.toString());
+      }
+    } catch (FileNotFoundException e) {
+      logger.warn("Failed to list directory {}", directoryPath, e);
+      throw new RuntimeException(e);
+    } finally {
+      lfs.close();
+    }
+    return files;
+  }
+
+  @Override
+  public void start()
+  {
+    super.start();
+    emitTimer = new Timer();
+
+    final ChannelProcessor channelProcessor = getChannelProcessor();
+    emitTimer.scheduleAtFixedRate(new TimerTask()
+    {
+      @Override
+      public void run()
+      {
+        int lineCount = 0;
+        events.clear();
+        try {
+          while (lineCount < rate && !finished) {
+            String line = br.readLine();
+
+            if (line == null) {
+              logger.debug("completed file {}", currentFile);
+              br.close();
+              currentFile++;
+              if (currentFile == dataFiles.size()) {
+                logger.info("finished all files");
+                finished = true;
+                break;
+              }
+              Path filePath = new Path(dataFiles.get(currentFile));
+              br = new BufferedReader(new InputStreamReader(new GzipCompressorInputStream(fs.open(filePath))));
+              logger.info("opening file {}. {}", currentFile, filePath);
+              continue;
+            }
+            lineCount++;
+            Event flumeEvent = EventBuilder.withBody(line.getBytes());
+            events.add(flumeEvent);
+          }
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+        if (events.size() > 0) {
+          channelProcessor.processEventBatch(events);
+        }
+        if (finished) {
+          emitTimer.cancel();
+        }
+      }
+
+    }, 0, 1000);
+  }
+
+  @Override
+  public void stop()
+  {
+    emitTimer.cancel();
+    super.stop();
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(HdfsTestSource.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/source/TestSource.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/source/TestSource.java b/flume/src/main/java/org/apache/apex/malhar/flume/source/TestSource.java
new file mode 100644
index 0000000..87c118f
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/source/TestSource.java
@@ -0,0 +1,250 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.source;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.Timer;
+import java.util.TimerTask;
+
+import javax.annotation.Nonnull;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDrivenSource;
+import org.apache.flume.channel.ChannelProcessor;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.event.EventBuilder;
+import org.apache.flume.source.AbstractSource;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+/**
+ * <p>TestSource class.</p>
+ *
+ * @since 0.9.4
+ */
+public class TestSource extends AbstractSource implements EventDrivenSource, Configurable
+{
+  public static final String SOURCE_FILE = "sourceFile";
+  public static final String LINE_NUMBER = "lineNumber";
+  public static final String RATE = "rate";
+  public static final String PERCENT_PAST_EVENTS = "percentPastEvents";
+  static byte FIELD_SEPARATOR = 1;
+  static int DEF_PERCENT_PAST_EVENTS = 5;
+  public Timer emitTimer;
+  @Nonnull
+  String filePath;
+  int rate;
+  int numberOfPastEvents;
+  transient List<Row> cache;
+  private transient int startIndex;
+  private transient Random random;
+  private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+  private SimpleDateFormat timeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+
+  public TestSource()
+  {
+    super();
+    this.rate = 2500;
+    this.numberOfPastEvents = DEF_PERCENT_PAST_EVENTS * 25;
+    this.random = new Random();
+
+  }
+
+  @Override
+  public void configure(Context context)
+  {
+    filePath = context.getString(SOURCE_FILE);
+    rate = context.getInteger(RATE, rate);
+    int percentPastEvents = context.getInteger(PERCENT_PAST_EVENTS, DEF_PERCENT_PAST_EVENTS);
+    Preconditions.checkArgument(!Strings.isNullOrEmpty(filePath));
+    try {
+      BufferedReader lineReader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath)));
+      try {
+        buildCache(lineReader);
+      } finally {
+        lineReader.close();
+      }
+    } catch (FileNotFoundException e) {
+      throw new RuntimeException(e);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+
+    if (DEF_PERCENT_PAST_EVENTS != percentPastEvents) {
+      numberOfPastEvents = (int)(percentPastEvents / 100.0 * cache.size());
+    }
+  }
+
+  @Override
+  public void start()
+  {
+    super.start();
+    emitTimer = new Timer();
+
+    final ChannelProcessor channel = getChannelProcessor();
+    final int cacheSize = cache.size();
+    emitTimer.scheduleAtFixedRate(new TimerTask()
+    {
+      @Override
+      public void run()
+      {
+        int lastIndex = startIndex + rate;
+        if (lastIndex > cacheSize) {
+          lastIndex -= cacheSize;
+          processBatch(channel, cache.subList(startIndex, cacheSize));
+          startIndex = 0;
+          while (lastIndex > cacheSize) {
+            processBatch(channel, cache);
+            lastIndex -= cacheSize;
+          }
+          processBatch(channel, cache.subList(0, lastIndex));
+        } else {
+          processBatch(channel, cache.subList(startIndex, lastIndex));
+        }
+        startIndex = lastIndex;
+      }
+
+    }, 0, 1000);
+  }
+
+  private void processBatch(ChannelProcessor channelProcessor, List<Row> rows)
+  {
+    if (rows.isEmpty()) {
+      return;
+    }
+
+    int noise = random.nextInt(numberOfPastEvents + 1);
+    Set<Integer> pastIndices = Sets.newHashSet();
+    for (int i = 0; i < noise; i++) {
+      pastIndices.add(random.nextInt(rows.size()));
+    }
+
+    Calendar calendar = Calendar.getInstance();
+    long high = calendar.getTimeInMillis();
+    calendar.add(Calendar.DATE, -2);
+    long low = calendar.getTimeInMillis();
+
+
+
+    List<Event> events = Lists.newArrayList();
+    for (int i = 0; i < rows.size(); i++) {
+      Row eventRow = rows.get(i);
+      if (pastIndices.contains(i)) {
+        long pastTime = (long)((Math.random() * (high - low)) + low);
+        byte[] pastDateField = dateFormat.format(pastTime).getBytes();
+        byte[] pastTimeField = timeFormat.format(pastTime).getBytes();
+
+        System.arraycopy(pastDateField, 0, eventRow.bytes, eventRow.dateFieldStart, pastDateField.length);
+        System.arraycopy(pastTimeField, 0, eventRow.bytes, eventRow.timeFieldStart, pastTimeField.length);
+      } else {
+        calendar.setTimeInMillis(System.currentTimeMillis());
+        byte[] currentDateField = dateFormat.format(calendar.getTime()).getBytes();
+        byte[] currentTimeField = timeFormat.format(calendar.getTime()).getBytes();
+
+        System.arraycopy(currentDateField, 0, eventRow.bytes, eventRow.dateFieldStart, currentDateField.length);
+        System.arraycopy(currentTimeField, 0, eventRow.bytes, eventRow.timeFieldStart, currentTimeField.length);
+      }
+
+      HashMap<String, String> headers = new HashMap<String, String>(2);
+      headers.put(SOURCE_FILE, filePath);
+      headers.put(LINE_NUMBER, String.valueOf(startIndex + i));
+      events.add(EventBuilder.withBody(eventRow.bytes, headers));
+    }
+    channelProcessor.processEventBatch(events);
+  }
+
+  @Override
+  public void stop()
+  {
+    emitTimer.cancel();
+    super.stop();
+  }
+
+  private void buildCache(BufferedReader lineReader) throws IOException
+  {
+    cache = Lists.newArrayListWithCapacity(rate);
+
+    String line;
+    while ((line = lineReader.readLine()) != null) {
+      byte[] row = line.getBytes();
+      Row eventRow = new Row(row);
+      final int rowsize = row.length;
+
+      /* guid */
+      int sliceLengh = -1;
+      while (++sliceLengh < rowsize) {
+        if (row[sliceLengh] == FIELD_SEPARATOR) {
+          break;
+        }
+      }
+      int recordStart = sliceLengh + 1;
+      int pointer = sliceLengh + 1;
+      while (pointer < rowsize) {
+        if (row[pointer++] == FIELD_SEPARATOR) {
+          eventRow.dateFieldStart = recordStart;
+          break;
+        }
+      }
+
+      /* lets parse the date */
+      int dateStart = pointer;
+      while (pointer < rowsize) {
+        if (row[pointer++] == FIELD_SEPARATOR) {
+          eventRow.timeFieldStart = dateStart;
+          break;
+        }
+      }
+
+      cache.add(eventRow);
+    }
+  }
+
+  private static class Row
+  {
+    final byte[] bytes;
+    int dateFieldStart;
+    int timeFieldStart;
+//    boolean past;
+
+    Row(byte[] bytes)
+    {
+      this.bytes = bytes;
+    }
+
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(TestSource.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/main/java/org/apache/apex/malhar/flume/storage/DebugWrapper.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/org/apache/apex/malhar/flume/storage/DebugWrapper.java b/flume/src/main/java/org/apache/apex/malhar/flume/storage/DebugWrapper.java
new file mode 100644
index 0000000..ae1ed23
--- /dev/null
+++ b/flume/src/main/java/org/apache/apex/malhar/flume/storage/DebugWrapper.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.storage;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.conf.Configurable;
+
+import com.datatorrent.api.Component;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * <p>DebugWrapper class.</p>
+ *
+ * @since 0.9.4
+ */
+public class DebugWrapper implements Storage, Configurable, Component<com.datatorrent.api.Context>
+{
+  HDFSStorage storage = new HDFSStorage();
+
+  @Override
+  public byte[] store(Slice bytes)
+  {
+    byte[] ret = null;
+
+    try {
+      ret = storage.store(bytes);
+    } finally {
+      logger.debug("storage.store(new byte[]{{}});", bytes);
+    }
+
+    return ret;
+  }
+
+  @Override
+  public byte[] retrieve(byte[] identifier)
+  {
+    byte[] ret = null;
+
+    try {
+      ret = storage.retrieve(identifier);
+    } finally {
+      logger.debug("storage.retrieve(new byte[]{{}});", identifier);
+    }
+
+    return ret;
+  }
+
+  @Override
+  public byte[] retrieveNext()
+  {
+    byte[] ret = null;
+    try {
+      ret = storage.retrieveNext();
+    } finally {
+      logger.debug("storage.retrieveNext();");
+    }
+
+    return ret;
+  }
+
+  @Override
+  public void clean(byte[] identifier)
+  {
+    try {
+      storage.clean(identifier);
+    } finally {
+      logger.debug("storage.clean(new byte[]{{}});", identifier);
+    }
+  }
+
+  @Override
+  public void flush()
+  {
+    try {
+      storage.flush();
+    } finally {
+      logger.debug("storage.flush();");
+    }
+  }
+
+  @Override
+  public void configure(Context cntxt)
+  {
+    try {
+      storage.configure(cntxt);
+    } finally {
+      logger.debug("storage.configure({});", cntxt);
+    }
+  }
+
+  @Override
+  public void setup(com.datatorrent.api.Context t1)
+  {
+    try {
+      storage.setup(t1);
+    } finally {
+      logger.debug("storage.setup({});", t1);
+    }
+
+  }
+
+  @Override
+  public void teardown()
+  {
+    try {
+      storage.teardown();
+    } finally {
+      logger.debug("storage.teardown();");
+    }
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(DebugWrapper.class);
+}


[10/13] apex-malhar git commit: Flume source

Posted by th...@apache.org.
Flume source


Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo
Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/bbdab0e8
Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/bbdab0e8
Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/bbdab0e8

Branch: refs/heads/master
Commit: bbdab0e8a417dd15813d947ef16bcc65bb01c7d7
Parents: c84a2c8
Author: Chetan Narsude <ch...@datatorrent.com>
Authored: Sun Feb 19 21:27:29 2017 +0530
Committer: Pramod Immaneni <pr...@datatorrent.com>
Committed: Mon May 22 16:47:34 2017 -0700

----------------------------------------------------------------------
 flume/pom.xml                                   | 275 +++++++
 .../datatorrent/flume/discovery/Discovery.java  |  68 ++
 .../flume/discovery/ZKAssistedDiscovery.java    | 429 +++++++++++
 .../interceptor/ColumnFilteringInterceptor.java | 204 +++++
 .../operator/AbstractFlumeInputOperator.java    | 760 +++++++++++++++++++
 .../com/datatorrent/flume/sink/DTFlumeSink.java | 571 ++++++++++++++
 .../java/com/datatorrent/flume/sink/Server.java | 419 ++++++++++
 .../datatorrent/flume/source/TestSource.java    | 248 ++++++
 .../datatorrent/flume/storage/DebugWrapper.java | 131 ++++
 .../flume/storage/ErrorMaskingEventCodec.java   |  61 ++
 .../datatorrent/flume/storage/EventCodec.java   |  91 +++
 .../flume-conf/flume-conf.sample.properties     |  45 ++
 .../resources/flume-conf/flume-env.sample.sh    |  36 +
 .../discovery/ZKAssistedDiscoveryTest.java      | 142 ++++
 .../flume/integration/ApplicationTest.java      | 116 +++
 .../ColumnFilteringInterceptorTest.java         |  85 +++
 .../interceptor/InterceptorTestHelper.java      | 214 ++++++
 .../datatorrent/flume/interceptor/RawEvent.java | 119 +++
 .../AbstractFlumeInputOperatorTest.java         |  56 ++
 .../datatorrent/flume/sink/DTFlumeSinkTest.java | 143 ++++
 .../com/datatorrent/flume/sink/ServerTest.java  |  92 +++
 .../resources/flume/conf/flume-conf.properties  |  85 +++
 .../src/test/resources/flume/conf/flume-env.sh  |  36 +
 flume/src/test/resources/log4j.properties       |  38 +
 .../test/resources/test_data/gentxns/2013121500 | Bin 0 -> 225010 bytes
 .../test/resources/test_data/gentxns/2013121501 | Bin 0 -> 224956 bytes
 .../test/resources/test_data/gentxns/2013121502 | Bin 0 -> 225028 bytes
 .../test/resources/test_data/gentxns/2013121503 | Bin 0 -> 225068 bytes
 .../test/resources/test_data/gentxns/2013121504 | Bin 0 -> 224845 bytes
 .../test/resources/test_data/gentxns/2013121505 | Bin 0 -> 225004 bytes
 .../test/resources/test_data/gentxns/2013121506 | Bin 0 -> 224929 bytes
 .../test/resources/test_data/gentxns/2013121507 | Bin 0 -> 224879 bytes
 .../test/resources/test_data/gentxns/2013121508 | Bin 0 -> 224963 bytes
 .../test/resources/test_data/gentxns/2013121509 | Bin 0 -> 224963 bytes
 .../test/resources/test_data/gentxns/2013121510 | Bin 0 -> 225007 bytes
 .../test/resources/test_data/gentxns/2013121511 | Bin 0 -> 224913 bytes
 .../test/resources/test_data/gentxns/2013121512 | Bin 0 -> 224929 bytes
 .../test/resources/test_data/gentxns/2013121513 | Bin 0 -> 225078 bytes
 .../test/resources/test_data/gentxns/2013121514 | Bin 0 -> 224882 bytes
 .../test/resources/test_data/gentxns/2013121515 | Bin 0 -> 224958 bytes
 .../test/resources/test_data/gentxns/2013121516 | Bin 0 -> 225032 bytes
 .../test/resources/test_data/gentxns/2013121517 | Bin 0 -> 225059 bytes
 .../test/resources/test_data/gentxns/2013121518 | Bin 0 -> 224890 bytes
 .../test/resources/test_data/gentxns/2013121519 | Bin 0 -> 225000 bytes
 .../test/resources/test_data/gentxns/2013121520 | Bin 0 -> 225064 bytes
 .../test/resources/test_data/gentxns/2013121521 | Bin 0 -> 225091 bytes
 46 files changed, 4464 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/pom.xml
----------------------------------------------------------------------
diff --git a/flume/pom.xml b/flume/pom.xml
new file mode 100644
index 0000000..ade05a0
--- /dev/null
+++ b/flume/pom.xml
@@ -0,0 +1,275 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <artifactId>dt-megh</artifactId>
+    <groupId>com.datatorrent</groupId>
+    <version>3.6.0-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>dt-flume</artifactId>
+  <packaging>jar</packaging>
+  <name>DataTorrent Flume Integration</name>
+
+  <profiles>
+    <profile>
+      <id>release</id>
+      <properties>
+        <package.username>flume</package.username>
+        <rpm.skip>package</rpm.skip>
+        <rpm.phase>${rpm.skip}</rpm.phase>
+      </properties>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>rpm-maven-plugin</artifactId>
+            <version>2.1-alpha-4</version>
+            <executions>
+              <execution>
+                <phase>${rpm.phase}</phase>
+                <id>generate-sink-rpm</id>
+                <goals>
+                  <goal>attached-rpm</goal>
+                </goals>
+                <configuration>
+                  <license>Copyright &copy; 2014 DataTorrent, Inc.</license>
+                  <version>${parsedVersion.majorVersion}.${parsedVersion.minorVersion}.${parsedVersion.incrementalVersion}</version>
+                  <release>${parsedVersion.qualifier}${parsedVersion.buildNumber}</release>
+                  <workarea>target/sink-rpm</workarea>
+                  <classifier>sink</classifier>
+                  <name>datatorrent-flume-sink</name>
+                  <distribution>DataTorrent Enterprise ${project.version}</distribution>
+                  <group>Messaging Client Support</group>
+                  <icon>src/main/resources/logo.gif</icon>
+                  <packager>DataTorrent Build System</packager>
+                  <prefix>${package.prefix}</prefix>
+                  <changelogFile>src/changelog</changelogFile>
+                  <defineStatements>
+                    <defineStatement>_unpackaged_files_terminate_build 0</defineStatement>
+                  </defineStatements>
+                  <mappings>
+                    <mapping>
+                      <directory>${package.prefix}/flume-${project.version}/lib</directory>
+                      <filemode>750</filemode>
+                      <username>${package.username}</username>
+                      <groupname>${package.groupname}</groupname>
+                      <artifact></artifact>
+                      <dependency>
+                        <includes>
+                          <include>org.apache.apex:apex-api:jar:${apex.core.version}</include>
+                          <include>com.datatorrent:dt-netlet:jar:1.2.0</include>
+                          <include>org.apache.apex:apex-common:jar:${apex.core.version}</include>
+                          <include>com.esotericsoftware.kryo:kryo:jar:2.24.0</include>
+                          <include>com.esotericsoftware.minlog:minlog:jar:1.2</include>
+                          <include>org.objenesis:objenesis:jar:2.1</include>
+                          <include>org.apache.curator:curator-client:jar:2.3.0</include>
+                          <include>org.apache.curator:curator-x-discovery:jar:2.3.0</include>
+                          <include>org.apache.curator:curator-framework:jar:2.3.0</include>
+                        </includes>
+                      </dependency>
+                    </mapping>
+                    <mapping>
+                      <directory>${package.prefix}/flume-${project.version}/conf</directory>
+                      <configuration>true</configuration>
+                      <filemode>640</filemode>
+                      <username>${package.username}</username>
+                      <groupname>${package.groupname}</groupname>
+                      <sources>
+                        <source>
+                          <location>src/main/resources/flume-conf</location>
+                        </source>
+                      </sources>
+                    </mapping>
+                  </mappings>
+                  <preinstallScriptlet>
+                    <script>groupadd -f ${package.groupname} &amp;&amp; id ${package.username} &gt;/dev/null 2&gt;&amp;1 &amp;&amp; usermod -aG ${package.groupname} ${package.username} || useradd -g ${package.groupname} ${package.username}</script>
+                  </preinstallScriptlet>
+                </configuration>
+              </execution>
+
+              <execution>
+                <phase>${rpm.phase}</phase>
+                <id>generate-operator-rpm</id>
+                <goals>
+                  <goal>attached-rpm</goal>
+                </goals>
+                <configuration>
+                  <version>${parsedVersion.majorVersion}.${parsedVersion.minorVersion}.${parsedVersion.incrementalVersion}</version>
+                  <license>Copyright &copy; 2014 DataTorrent, Inc.</license>
+                  <release>${parsedVersion.qualifier}${parsedVersion.buildNumber}</release>
+                  <workarea>target/operator-rpm</workarea>
+                  <classifier>operator</classifier>
+                  <name>datatorrent-flume-operator</name>
+                  <distribution>DataTorrent Enterprise ${project.version}</distribution>
+                  <group>Messaging Client Support</group>
+                  <icon>src/main/resources/logo.gif</icon>
+                  <packager>DataTorrent Build System</packager>
+                  <prefix>${package.prefix}</prefix>
+                  <changelogFile>src/changelog</changelogFile>
+                  <description>${rpm.release}</description>
+                  <defineStatements>
+                    <defineStatement>_unpackaged_files_terminate_build 0</defineStatement>
+                  </defineStatements>
+                  <mappings>
+                    <mapping>
+                      <directory>${package.prefix}/flume-operator-${project.version}/lib</directory>
+                      <filemode>640</filemode>
+                      <username>${package.username}</username>
+                      <groupname>${package.groupname}</groupname>
+                      <artifact></artifact>
+                      <dependency>
+                        <includes>
+                          <include>org.apache.curator:curator-client:jar:2.3.0</include>
+                          <include>org.apache.curator:curator-x-discovery:jar:2.3.0</include>
+                          <include>org.apache.curator:curator-framework:jar:2.3.0</include>
+                          <include>org.apache.flume:flume-ng-sdk:jar:1.5.0</include>
+                          <include>org.apache.flume:flume-ng-core:jar:1.5.0</include>
+                          <include>org.apache.flume:flume-ng-configuration:jar:1.5.0</include>
+                        </includes>
+                      </dependency>
+                    </mapping>
+                  </mappings>
+                  <preinstallScriptlet>
+                    <script>groupadd -f ${package.groupname} &amp;&amp; id ${package.username} &gt;/dev/null 2&gt;&amp;1 &amp;&amp; usermod -aG ${package.groupname} ${package.username} || useradd -g ${package.groupname} ${package.username}</script>
+                  </preinstallScriptlet>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <version>2.16</version>
+        <configuration>
+          <argLine>-Xmx5000M</argLine>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+  <dependencies>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>1.7.2</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.apex</groupId>
+      <artifactId>apex-common</artifactId>
+      <version>${apex.core.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.flume</groupId>
+      <artifactId>flume-ng-core</artifactId>
+      <version>1.5.0</version>
+      <exclusions>
+        <exclusion>
+          <!-- Curator requires later version of Guava -->
+          <artifactId>guava</artifactId>
+          <groupId>com.google.guava</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>jackson-core-asl</artifactId>
+          <groupId>org.codehaus.jackson</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>jackson-mapper-asl</artifactId>
+          <groupId>org.codehaus.jackson</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>jetty</artifactId>
+          <groupId>org.mortbay.jetty</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>jetty-util</artifactId>
+          <groupId>org.mortbay.jetty</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>commons-codec</artifactId>
+          <groupId>commons-codec</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>commons-io</artifactId>
+          <groupId>commons-io</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>commons-lang</artifactId>
+          <groupId>commons-lang</groupId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.apex</groupId>
+      <artifactId>apex-api</artifactId>
+      <version>${apex.core.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.datatorrent</groupId>
+      <artifactId>netlet</artifactId>
+      <version>1.2.0</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.curator</groupId>
+      <artifactId>curator-x-discovery</artifactId>
+      <version>2.3.0</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-core-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-mapper-asl</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-mapper-asl</artifactId>
+      <version>${jackson.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-core-asl</artifactId>
+      <version>${jackson.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>11.0.2</version>
+    </dependency>
+  </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java b/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java
new file mode 100644
index 0000000..d802002
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/discovery/Discovery.java
@@ -0,0 +1,68 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.discovery;
+
+import java.util.Collection;
+
+/**
+ * When DTFlumeSink server instance binds to the network interface, it can publish
+ * its whereabouts by invoking advertise method on the Discovery object. Similarly
+ * when it ceases accepting any more connections, it can publish its intent to do
+ * so by invoking unadvertise.<p />
+ * Interesting parties can call discover method to get the list of addresses where
+ * they can find an available DTFlumeSink server instance.
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ * @param <T> - Type of the objects which can be discovered
+ * @since 0.9.3
+ */
+public interface Discovery<T>
+{
+  /**
+   * Recall the previously published address as it's no longer valid.
+   *
+   * @param service
+   */
+  void unadvertise(Service<T> service);
+
+  /**
+   * Advertise the host/port address where DTFlumeSink is accepting a client connection.
+   *
+   * @param service
+   */
+  void advertise(Service<T> service);
+
+  /**
+   * Discover all the addresses which are actively accepting the client connections.
+   *
+   * @return - Active server addresses which can accept the connections.
+   */
+  Collection<Service<T>> discover();
+
+  interface Service<T>
+  {
+    String getHost();
+
+    int getPort();
+
+    T getPayload();
+
+    String getId();
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java b/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java
new file mode 100644
index 0000000..460a478
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/discovery/ZKAssistedDiscovery.java
@@ -0,0 +1,429 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.discovery;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+
+import javax.validation.constraints.NotNull;
+
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.ObjectReader;
+import org.codehaus.jackson.map.ObjectWriter;
+import org.codehaus.jackson.type.TypeReference;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.curator.framework.CuratorFramework;
+import org.apache.curator.framework.CuratorFrameworkFactory;
+import org.apache.curator.retry.RetryNTimes;
+import org.apache.curator.utils.EnsurePath;
+import org.apache.curator.x.discovery.ServiceDiscovery;
+import org.apache.curator.x.discovery.ServiceDiscoveryBuilder;
+import org.apache.curator.x.discovery.ServiceInstance;
+import org.apache.curator.x.discovery.details.InstanceSerializer;
+import org.apache.flume.conf.Configurable;
+
+import com.google.common.base.Throwables;
+
+import com.datatorrent.api.Component;
+
+/**
+ * <p>ZKAssistedDiscovery class.</p>
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ * @since 0.9.3
+ */
+public class ZKAssistedDiscovery implements Discovery<byte[]>,
+    Component<com.datatorrent.api.Context>, Configurable, Serializable
+{
+  @NotNull
+  private String serviceName;
+  @NotNull
+  private String connectionString;
+  @NotNull
+  private String basePath;
+  private int connectionTimeoutMillis;
+  private int connectionRetryCount;
+  private int conntectionRetrySleepMillis;
+  private transient InstanceSerializerFactory instanceSerializerFactory;
+  private transient CuratorFramework curatorFramework;
+  private transient ServiceDiscovery<byte[]> discovery;
+
+  public ZKAssistedDiscovery()
+  {
+    this.serviceName = "DTFlume";
+    this.conntectionRetrySleepMillis = 500;
+    this.connectionRetryCount = 10;
+    this.connectionTimeoutMillis = 1000;
+  }
+
+  @Override
+  public void unadvertise(Service<byte[]> service)
+  {
+    doAdvertise(service, false);
+  }
+
+  @Override
+  public void advertise(Service<byte[]> service)
+  {
+    doAdvertise(service, true);
+  }
+
+  public void doAdvertise(Service<byte[]> service, boolean flag)
+  {
+    try {
+      new EnsurePath(basePath).ensure(curatorFramework.getZookeeperClient());
+
+      ServiceInstance<byte[]> instance = getInstance(service);
+      if (flag) {
+        discovery.registerService(instance);
+      } else {
+        discovery.unregisterService(instance);
+      }
+    } catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  @Override
+  public Collection<Service<byte[]>> discover()
+  {
+    try {
+      new EnsurePath(basePath).ensure(curatorFramework.getZookeeperClient());
+
+      Collection<ServiceInstance<byte[]>> services = discovery.queryForInstances(serviceName);
+      ArrayList<Service<byte[]>> returnable = new ArrayList<Service<byte[]>>(services.size());
+      for (final ServiceInstance<byte[]> service : services) {
+        returnable.add(new Service<byte[]>()
+        {
+          @Override
+          public String getHost()
+          {
+            return service.getAddress();
+          }
+
+          @Override
+          public int getPort()
+          {
+            return service.getPort();
+          }
+
+          @Override
+          public byte[] getPayload()
+          {
+            return service.getPayload();
+          }
+
+          @Override
+          public String getId()
+          {
+            return service.getId();
+          }
+
+          @Override
+          public String toString()
+          {
+            return "{" + getId() + " => " + getHost() + ':' + getPort() + '}';
+          }
+
+        });
+      }
+      return returnable;
+    } catch (Exception e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  @Override
+  public String toString()
+  {
+    return "ZKAssistedDiscovery{" + "serviceName=" + serviceName + ", connectionString=" + connectionString +
+        ", basePath=" + basePath + ", connectionTimeoutMillis=" + connectionTimeoutMillis + ", connectionRetryCount=" +
+        connectionRetryCount + ", conntectionRetrySleepMillis=" + conntectionRetrySleepMillis + '}';
+  }
+
+  @Override
+  public int hashCode()
+  {
+    int hash = 7;
+    hash = 47 * hash + this.serviceName.hashCode();
+    hash = 47 * hash + this.connectionString.hashCode();
+    hash = 47 * hash + this.basePath.hashCode();
+    hash = 47 * hash + this.connectionTimeoutMillis;
+    hash = 47 * hash + this.connectionRetryCount;
+    hash = 47 * hash + this.conntectionRetrySleepMillis;
+    return hash;
+  }
+
+  @Override
+  public boolean equals(Object obj)
+  {
+    if (obj == null) {
+      return false;
+    }
+    if (getClass() != obj.getClass()) {
+      return false;
+    }
+    final ZKAssistedDiscovery other = (ZKAssistedDiscovery)obj;
+    if (!this.serviceName.equals(other.serviceName)) {
+      return false;
+    }
+    if (!this.connectionString.equals(other.connectionString)) {
+      return false;
+    }
+    if (!this.basePath.equals(other.basePath)) {
+      return false;
+    }
+    if (this.connectionTimeoutMillis != other.connectionTimeoutMillis) {
+      return false;
+    }
+    if (this.connectionRetryCount != other.connectionRetryCount) {
+      return false;
+    }
+    if (this.conntectionRetrySleepMillis != other.conntectionRetrySleepMillis) {
+      return false;
+    }
+    return true;
+  }
+
+  ServiceInstance<byte[]> getInstance(Service<byte[]> service) throws Exception
+  {
+    return ServiceInstance.<byte[]>builder()
+            .name(serviceName)
+            .address(service.getHost())
+            .port(service.getPort())
+            .id(service.getId())
+            .payload(service.getPayload())
+            .build();
+  }
+
+  private ServiceDiscovery<byte[]> getDiscovery(CuratorFramework curatorFramework)
+  {
+    return ServiceDiscoveryBuilder.builder(byte[].class)
+            .basePath(basePath)
+            .client(curatorFramework)
+            .serializer(instanceSerializerFactory.getInstanceSerializer(
+            new TypeReference<ServiceInstance<byte[]>>()
+              {})).build();
+  }
+
+  /**
+   * @return the instanceSerializerFactory
+   */
+  InstanceSerializerFactory getInstanceSerializerFactory()
+  {
+    return instanceSerializerFactory;
+  }
+
+  /**
+   * @return the connectionString
+   */
+  public String getConnectionString()
+  {
+    return connectionString;
+  }
+
+  /**
+   * @param connectionString the connectionString to set
+   */
+  public void setConnectionString(String connectionString)
+  {
+    this.connectionString = connectionString;
+  }
+
+  /**
+   * @return the basePath
+   */
+  public String getBasePath()
+  {
+    return basePath;
+  }
+
+  /**
+   * @param basePath the basePath to set
+   */
+  public void setBasePath(String basePath)
+  {
+    this.basePath = basePath;
+  }
+
+  /**
+   * @return the connectionTimeoutMillis
+   */
+  public int getConnectionTimeoutMillis()
+  {
+    return connectionTimeoutMillis;
+  }
+
+  /**
+   * @param connectionTimeoutMillis the connectionTimeoutMillis to set
+   */
+  public void setConnectionTimeoutMillis(int connectionTimeoutMillis)
+  {
+    this.connectionTimeoutMillis = connectionTimeoutMillis;
+  }
+
+  /**
+   * @return the connectionRetryCount
+   */
+  public int getConnectionRetryCount()
+  {
+    return connectionRetryCount;
+  }
+
+  /**
+   * @param connectionRetryCount the connectionRetryCount to set
+   */
+  public void setConnectionRetryCount(int connectionRetryCount)
+  {
+    this.connectionRetryCount = connectionRetryCount;
+  }
+
+  /**
+   * @return the conntectionRetrySleepMillis
+   */
+  public int getConntectionRetrySleepMillis()
+  {
+    return conntectionRetrySleepMillis;
+  }
+
+  /**
+   * @param conntectionRetrySleepMillis the conntectionRetrySleepMillis to set
+   */
+  public void setConntectionRetrySleepMillis(int conntectionRetrySleepMillis)
+  {
+    this.conntectionRetrySleepMillis = conntectionRetrySleepMillis;
+  }
+
+  /**
+   * @return the serviceName
+   */
+  public String getServiceName()
+  {
+    return serviceName;
+  }
+
+  /**
+   * @param serviceName the serviceName to set
+   */
+  public void setServiceName(String serviceName)
+  {
+    this.serviceName = serviceName;
+  }
+
+  @Override
+  public void configure(org.apache.flume.Context context)
+  {
+    serviceName = context.getString("serviceName", "DTFlume");
+    connectionString = context.getString("connectionString");
+    basePath = context.getString("basePath");
+
+    connectionTimeoutMillis = context.getInteger("connectionTimeoutMillis", 1000);
+    connectionRetryCount = context.getInteger("connectionRetryCount", 10);
+    conntectionRetrySleepMillis = context.getInteger("connectionRetrySleepMillis", 500);
+  }
+
+  @Override
+  public void setup(com.datatorrent.api.Context context)
+  {
+    ObjectMapper om = new ObjectMapper();
+    instanceSerializerFactory = new InstanceSerializerFactory(om.reader(), om.writer());
+
+    curatorFramework = CuratorFrameworkFactory.builder()
+            .connectionTimeoutMs(connectionTimeoutMillis)
+            .retryPolicy(new RetryNTimes(connectionRetryCount, conntectionRetrySleepMillis))
+            .connectString(connectionString)
+            .build();
+    curatorFramework.start();
+
+    discovery = getDiscovery(curatorFramework);
+    try {
+      discovery.start();
+    } catch (Exception ex) {
+      Throwables.propagate(ex);
+    }
+  }
+
+  @Override
+  public void teardown()
+  {
+    try {
+      discovery.close();
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    } finally {
+      curatorFramework.close();
+      curatorFramework = null;
+    }
+  }
+
+  public class InstanceSerializerFactory
+  {
+    private final ObjectReader objectReader;
+    private final ObjectWriter objectWriter;
+
+    InstanceSerializerFactory(ObjectReader objectReader, ObjectWriter objectWriter)
+    {
+      this.objectReader = objectReader;
+      this.objectWriter = objectWriter;
+    }
+
+    public <T> InstanceSerializer<T> getInstanceSerializer(
+        TypeReference<ServiceInstance<T>> typeReference)
+    {
+      return new JacksonInstanceSerializer<T>(objectReader, objectWriter, typeReference);
+    }
+
+    final class JacksonInstanceSerializer<T> implements InstanceSerializer<T>
+    {
+      private final TypeReference<ServiceInstance<T>> typeRef;
+      private final ObjectWriter objectWriter;
+      private final ObjectReader objectReader;
+
+      JacksonInstanceSerializer(ObjectReader objectReader, ObjectWriter objectWriter,
+          TypeReference<ServiceInstance<T>> typeRef)
+      {
+        this.objectReader = objectReader;
+        this.objectWriter = objectWriter;
+        this.typeRef = typeRef;
+      }
+
+      @Override
+      public ServiceInstance<T> deserialize(byte[] bytes) throws Exception
+      {
+        return objectReader.withType(typeRef).readValue(bytes);
+      }
+
+      @Override
+      public byte[] serialize(ServiceInstance<T> serviceInstance) throws Exception
+      {
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
+        objectWriter.writeValue(out, serviceInstance);
+        return out.toByteArray();
+      }
+
+    }
+
+  }
+
+  private static final long serialVersionUID = 201401221145L;
+  private static final Logger logger = LoggerFactory.getLogger(ZKAssistedDiscovery.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java b/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java
new file mode 100644
index 0000000..90c3a04
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringInterceptor.java
@@ -0,0 +1,204 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.interceptor;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.interceptor.Interceptor;
+
+import static com.datatorrent.flume.interceptor.ColumnFilteringInterceptor.Constants.COLUMNS;
+import static com.datatorrent.flume.interceptor.ColumnFilteringInterceptor.Constants.DST_SEPARATOR;
+import static com.datatorrent.flume.interceptor.ColumnFilteringInterceptor.Constants.DST_SEPARATOR_DFLT;
+import static com.datatorrent.flume.interceptor.ColumnFilteringInterceptor.Constants.SRC_SEPARATOR;
+import static com.datatorrent.flume.interceptor.ColumnFilteringInterceptor.Constants.SRC_SEPARATOR_DFLT;
+
+/**
+ * <p>ColumnFilteringInterceptor class.</p>
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ * @since 0.9.4
+ */
+public class ColumnFilteringInterceptor implements Interceptor
+{
+  private final byte srcSeparator;
+  private final byte dstSeparator;
+
+  private final int maxIndex;
+  private final int maxColumn;
+  private final int[] columns;
+  private final int[] positions;
+
+  private ColumnFilteringInterceptor(int[] columns, byte srcSeparator, byte dstSeparator)
+  {
+    this.columns = columns;
+
+    int tempMaxColumn = Integer.MIN_VALUE;
+    for (int column: columns) {
+      if (column > tempMaxColumn) {
+        tempMaxColumn = column;
+      }
+    }
+    maxIndex = tempMaxColumn;
+    maxColumn = tempMaxColumn + 1;
+    positions = new int[maxColumn + 1];
+
+    this.srcSeparator = srcSeparator;
+    this.dstSeparator = dstSeparator;
+  }
+
+  @Override
+  public void initialize()
+  {
+    /* no-op */
+  }
+
+  @Override
+  public Event intercept(Event event)
+  {
+    byte[] body = event.getBody();
+    if (body == null) {
+      return event;
+    }
+
+    final int length = body.length;
+
+    /* store positions of character after the separators */
+    int i = 0;
+    int index = 0;
+    while (i < length) {
+      if (body[i++] == srcSeparator) {
+        positions[++index] = i;
+        if (index >= maxIndex) {
+          break;
+        }
+      }
+    }
+
+    int nextVirginIndex;
+    boolean separatorTerminated;
+    if (i == length && index < maxColumn) {
+      nextVirginIndex = index + 2;
+      positions[nextVirginIndex - 1] = length;
+      separatorTerminated = length > 0 ? body[length - 1]  != srcSeparator : false;
+    } else {
+      nextVirginIndex = index + 1;
+      separatorTerminated = true;
+    }
+
+    int newArrayLen = 0;
+    for (i = columns.length; i-- > 0;) {
+      int column = columns[i];
+      int len = positions[column + 1] - positions[column];
+      if (len <= 0) {
+        newArrayLen++;
+      } else {
+        if (separatorTerminated && positions[column + 1] == length) {
+          newArrayLen++;
+        }
+        newArrayLen += len;
+      }
+    }
+
+    byte[] newbody = new byte[newArrayLen];
+    int newoffset = 0;
+    for (int column: columns) {
+      int len = positions[column + 1] - positions[column];
+      if (len > 0) {
+        System.arraycopy(body, positions[column], newbody, newoffset, len);
+        newoffset += len;
+        if (newbody[newoffset - 1] == srcSeparator) {
+          newbody[newoffset - 1] = dstSeparator;
+        } else {
+          newbody[newoffset++] = dstSeparator;
+        }
+      } else {
+        newbody[newoffset++] = dstSeparator;
+      }
+    }
+
+    event.setBody(newbody);
+    Arrays.fill(positions, 1, nextVirginIndex, 0);
+    return event;
+  }
+
+  @Override
+  public List<Event> intercept(List<Event> events)
+  {
+    for (Event event: events) {
+      intercept(event);
+    }
+    return events;
+  }
+
+  @Override
+  public void close()
+  {
+  }
+
+  public static class Builder implements Interceptor.Builder
+  {
+    private int[] columns;
+    private byte srcSeparator;
+    private byte dstSeparator;
+
+    @Override
+    public Interceptor build()
+    {
+      return new ColumnFilteringInterceptor(columns, srcSeparator, dstSeparator);
+    }
+
+    @Override
+    public void configure(Context context)
+    {
+      String sColumns = context.getString(COLUMNS);
+      if (sColumns == null || sColumns.trim().isEmpty()) {
+        throw new Error("This interceptor requires filtered columns to be specified!");
+      }
+
+      String[] parts = sColumns.split(" ");
+      columns = new int[parts.length];
+      for (int i = parts.length; i-- > 0;) {
+        columns[i] = Integer.parseInt(parts[i]);
+      }
+
+      srcSeparator = context.getInteger(SRC_SEPARATOR, (int)SRC_SEPARATOR_DFLT).byteValue();
+      dstSeparator = context.getInteger(DST_SEPARATOR, (int)DST_SEPARATOR_DFLT).byteValue();
+    }
+
+  }
+
+  @SuppressWarnings("ClassMayBeInterface") /* adhering to flume until i understand it completely */
+
+  public static class Constants
+  {
+    public static final String SRC_SEPARATOR = "srcSeparator";
+    public static final byte SRC_SEPARATOR_DFLT = 2;
+
+    public static final String DST_SEPARATOR = "dstSeparator";
+    public static final byte DST_SEPARATOR_DFLT = 1;
+
+    public static final String COLUMNS = "columns";
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(ColumnFilteringInterceptor.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java b/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java
new file mode 100644
index 0000000..1ab7182
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/operator/AbstractFlumeInputOperator.java
@@ -0,0 +1,760 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.operator;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import javax.validation.constraints.Min;
+import javax.validation.constraints.NotNull;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Event;
+
+import com.datatorrent.api.Context;
+import com.datatorrent.api.Context.OperatorContext;
+import com.datatorrent.api.DefaultOutputPort;
+import com.datatorrent.api.DefaultPartition;
+import com.datatorrent.api.InputOperator;
+import com.datatorrent.api.Operator;
+import com.datatorrent.api.Partitioner;
+import com.datatorrent.api.Stats.OperatorStats;
+import com.datatorrent.api.StreamCodec;
+import com.datatorrent.flume.discovery.Discovery.Service;
+import com.datatorrent.flume.discovery.ZKAssistedDiscovery;
+import com.datatorrent.flume.sink.Server;
+import com.datatorrent.flume.sink.Server.Command;
+import com.datatorrent.flume.sink.Server.Request;
+import com.datatorrent.netlet.AbstractLengthPrependerClient;
+import com.datatorrent.netlet.DefaultEventLoop;
+import com.datatorrent.netlet.util.Slice;
+
+import static java.lang.Thread.sleep;
+
+/**
+ * <p>
+ * Abstract AbstractFlumeInputOperator class.</p>
+ *
+ * @param <T> Type of the output payload.
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ * @since 0.9.2
+ */
+public abstract class AbstractFlumeInputOperator<T>
+    implements InputOperator, Operator.ActivationListener<OperatorContext>, Operator.IdleTimeHandler,
+    Operator.CheckpointListener, Partitioner<AbstractFlumeInputOperator<T>>
+{
+  public final transient DefaultOutputPort<T> output = new DefaultOutputPort<T>();
+  public final transient DefaultOutputPort<Slice> drop = new DefaultOutputPort<Slice>();
+  @NotNull
+  private String[] connectionSpecs;
+  @NotNull
+  private StreamCodec<Event> codec;
+  private final ArrayList<RecoveryAddress> recoveryAddresses;
+  @SuppressWarnings("FieldMayBeFinal") // it's not final because that mucks with the serialization somehow
+  private transient ArrayBlockingQueue<Slice> handoverBuffer;
+  private transient int idleCounter;
+  private transient int eventCounter;
+  private transient DefaultEventLoop eventloop;
+  private transient volatile boolean connected;
+  private transient OperatorContext context;
+  private transient Client client;
+  private transient long windowId;
+  private transient byte[] address;
+  @Min(0)
+  private long maxEventsPerSecond;
+  //This is calculated from maxEventsPerSecond, App window count and streaming window size
+  private transient long maxEventsPerWindow;
+
+  public AbstractFlumeInputOperator()
+  {
+    handoverBuffer = new ArrayBlockingQueue<Slice>(1024 * 5);
+    connectionSpecs = new String[0];
+    recoveryAddresses = new ArrayList<RecoveryAddress>();
+    maxEventsPerSecond = Long.MAX_VALUE;
+  }
+
+  @Override
+  public void setup(OperatorContext context)
+  {
+    long windowDurationMillis = context.getValue(OperatorContext.APPLICATION_WINDOW_COUNT) *
+        context.getValue(Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS);
+    maxEventsPerWindow = (long)(windowDurationMillis / 1000.0 * maxEventsPerSecond);
+    logger.debug("max-events per-second {} per-window {}", maxEventsPerSecond, maxEventsPerWindow);
+
+    try {
+      eventloop = new DefaultEventLoop("EventLoop-" + context.getId());
+      eventloop.start();
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+
+  @Override
+  @SuppressWarnings({"unchecked"})
+  public void activate(OperatorContext ctx)
+  {
+    if (connectionSpecs.length == 0) {
+      logger.info("Discovered zero DTFlumeSink");
+    } else if (connectionSpecs.length == 1) {
+      for (String connectAddresse: connectionSpecs) {
+        logger.debug("Connection spec is {}", connectAddresse);
+        String[] parts = connectAddresse.split(":");
+        eventloop.connect(new InetSocketAddress(parts[1], Integer.parseInt(parts[2])), client = new Client(parts[0]));
+      }
+    } else {
+      throw new IllegalArgumentException(
+          String.format("A physical %s operator cannot connect to more than 1 addresses!",
+              this.getClass().getSimpleName()));
+    }
+
+    context = ctx;
+  }
+
+  @Override
+  public void beginWindow(long windowId)
+  {
+    this.windowId = windowId;
+    idleCounter = 0;
+    eventCounter = 0;
+  }
+
+  @Override
+  public void emitTuples()
+  {
+    int i = handoverBuffer.size();
+    if (i > 0 && eventCounter < maxEventsPerWindow) {
+
+      while (--i > 0 && eventCounter < maxEventsPerWindow - 1) {
+        final Slice slice = handoverBuffer.poll();
+        slice.offset += 8;
+        slice.length -= 8;
+        T convert = convert((Event)codec.fromByteArray(slice));
+        if (convert == null) {
+          drop.emit(slice);
+        } else {
+          output.emit(convert);
+        }
+        eventCounter++;
+      }
+
+      final Slice slice = handoverBuffer.poll();
+      slice.offset += 8;
+      slice.length -= 8;
+      T convert = convert((Event)codec.fromByteArray(slice));
+      if (convert == null) {
+        drop.emit(slice);
+      } else {
+        output.emit(convert);
+      }
+      eventCounter++;
+
+      address = Arrays.copyOfRange(slice.buffer, slice.offset - 8, slice.offset);
+    }
+  }
+
+  @Override
+  public void endWindow()
+  {
+    if (connected) {
+      byte[] array = new byte[Request.FIXED_SIZE];
+
+      array[0] = Command.WINDOWED.getOrdinal();
+      Server.writeInt(array, 1, eventCounter);
+      Server.writeInt(array, 5, idleCounter);
+      Server.writeLong(array, Request.TIME_OFFSET, System.currentTimeMillis());
+
+      logger.debug("wrote {} with eventCounter = {} and idleCounter = {}", Command.WINDOWED, eventCounter, idleCounter);
+      client.write(array);
+    }
+
+    if (address != null) {
+      RecoveryAddress rAddress = new RecoveryAddress();
+      rAddress.address = address;
+      address = null;
+      rAddress.windowId = windowId;
+      recoveryAddresses.add(rAddress);
+    }
+  }
+
+  @Override
+  public void deactivate()
+  {
+    if (connected) {
+      eventloop.disconnect(client);
+    }
+    context = null;
+  }
+
+  @Override
+  public void teardown()
+  {
+    eventloop.stop();
+    eventloop = null;
+  }
+
+  @Override
+  public void handleIdleTime()
+  {
+    idleCounter++;
+    try {
+      sleep(context.getValue(OperatorContext.SPIN_MILLIS));
+    } catch (InterruptedException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+
+  public abstract T convert(Event event);
+
+  /**
+   * @return the connectAddress
+   */
+  public String[] getConnectAddresses()
+  {
+    return connectionSpecs.clone();
+  }
+
+  /**
+   * @param specs - sinkid:host:port specification of all the sinks.
+   */
+  public void setConnectAddresses(String[] specs)
+  {
+    this.connectionSpecs = specs.clone();
+  }
+
+  /**
+   * @return the codec
+   */
+  public StreamCodec<Event> getCodec()
+  {
+    return codec;
+  }
+
+  /**
+   * @param codec the codec to set
+   */
+  public void setCodec(StreamCodec<Event> codec)
+  {
+    this.codec = codec;
+  }
+
+  private static class RecoveryAddress implements Serializable
+  {
+    long windowId;
+    byte[] address;
+
+    @Override
+    public String toString()
+    {
+      return "RecoveryAddress{" + "windowId=" + windowId + ", address=" + Arrays.toString(address) + '}';
+    }
+
+    @Override
+    public boolean equals(Object o)
+    {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof RecoveryAddress)) {
+        return false;
+      }
+
+      RecoveryAddress that = (RecoveryAddress)o;
+
+      if (windowId != that.windowId) {
+        return false;
+      }
+      return Arrays.equals(address, that.address);
+    }
+
+    @Override
+    public int hashCode()
+    {
+      int result = (int)(windowId ^ (windowId >>> 32));
+      result = 31 * result + (address != null ? Arrays.hashCode(address) : 0);
+      return result;
+    }
+
+    private static final long serialVersionUID = 201312021432L;
+  }
+
+  @Override
+  public void checkpointed(long windowId)
+  {
+    /* dont do anything */
+  }
+
+  @Override
+  public void committed(long windowId)
+  {
+    if (!connected) {
+      return;
+    }
+
+    synchronized (recoveryAddresses) {
+      byte[] addr = null;
+
+      Iterator<RecoveryAddress> iterator = recoveryAddresses.iterator();
+      while (iterator.hasNext()) {
+        RecoveryAddress ra = iterator.next();
+        if (ra.windowId > windowId) {
+          break;
+        }
+
+        iterator.remove();
+        if (ra.address != null) {
+          addr = ra.address;
+        }
+      }
+
+      if (addr != null) {
+        /*
+         * Make sure that we store the last valid address processed
+         */
+        if (recoveryAddresses.isEmpty()) {
+          RecoveryAddress ra = new RecoveryAddress();
+          ra.address = addr;
+          recoveryAddresses.add(ra);
+        }
+
+        int arraySize = 1/* for the type of the message */
+            + 8 /* for the location to commit */
+            + 8 /* for storing the current time stamp*/;
+        byte[] array = new byte[arraySize];
+
+        array[0] = Command.COMMITTED.getOrdinal();
+        System.arraycopy(addr, 0, array, 1, 8);
+        Server.writeLong(array, Request.TIME_OFFSET, System.currentTimeMillis());
+        logger.debug("wrote {} with recoveryOffset = {}", Command.COMMITTED, Arrays.toString(addr));
+        client.write(array);
+      }
+    }
+  }
+
+  @Override
+  public Collection<Partition<AbstractFlumeInputOperator<T>>> definePartitions(
+      Collection<Partition<AbstractFlumeInputOperator<T>>> partitions, PartitioningContext context)
+  {
+    Collection<Service<byte[]>> discovered = discoveredFlumeSinks.get();
+    if (discovered == null) {
+      return partitions;
+    }
+
+    HashMap<String, ArrayList<RecoveryAddress>> allRecoveryAddresses = abandonedRecoveryAddresses.get();
+    ArrayList<String> allConnectAddresses = new ArrayList<String>(partitions.size());
+    for (Partition<AbstractFlumeInputOperator<T>> partition: partitions) {
+      String[] lAddresses = partition.getPartitionedInstance().connectionSpecs;
+      allConnectAddresses.addAll(Arrays.asList(lAddresses));
+      for (int i = lAddresses.length; i-- > 0;) {
+        String[] parts = lAddresses[i].split(":", 2);
+        allRecoveryAddresses.put(parts[0], partition.getPartitionedInstance().recoveryAddresses);
+      }
+    }
+
+    HashMap<String, String> connections = new HashMap<String, String>(discovered.size());
+    for (Service<byte[]> service: discovered) {
+      String previousSpec = connections.get(service.getId());
+      String newspec = service.getId() + ':' + service.getHost() + ':' + service.getPort();
+      if (previousSpec == null) {
+        connections.put(service.getId(), newspec);
+      } else {
+        boolean found = false;
+        for (ConnectionStatus cs: partitionedInstanceStatus.get().values()) {
+          if (previousSpec.equals(cs.spec) && !cs.connected) {
+            connections.put(service.getId(), newspec);
+            found = true;
+            break;
+          }
+        }
+
+        if (!found) {
+          logger.warn("2 sinks found with the same id: {} and {}... Ignoring previous.", previousSpec, newspec);
+          connections.put(service.getId(), newspec);
+        }
+      }
+    }
+
+    for (int i = allConnectAddresses.size(); i-- > 0;) {
+      String[] parts = allConnectAddresses.get(i).split(":");
+      String connection = connections.remove(parts[0]);
+      if (connection == null) {
+        allConnectAddresses.remove(i);
+      } else {
+        allConnectAddresses.set(i, connection);
+      }
+    }
+
+    allConnectAddresses.addAll(connections.values());
+
+    partitions.clear();
+    try {
+      if (allConnectAddresses.isEmpty()) {
+        /* return at least one of them; otherwise stram becomes grumpy */
+        @SuppressWarnings("unchecked")
+        AbstractFlumeInputOperator<T> operator = getClass().newInstance();
+        operator.setCodec(codec);
+        operator.setMaxEventsPerSecond(maxEventsPerSecond);
+        for (ArrayList<RecoveryAddress> lRecoveryAddresses: allRecoveryAddresses.values()) {
+          operator.recoveryAddresses.addAll(lRecoveryAddresses);
+        }
+        operator.connectionSpecs = new String[allConnectAddresses.size()];
+        for (int i = connectionSpecs.length; i-- > 0;) {
+          connectionSpecs[i] = allConnectAddresses.get(i);
+        }
+
+        partitions.add(new DefaultPartition<AbstractFlumeInputOperator<T>>(operator));
+      } else {
+        long maxEventsPerSecondPerOperator = maxEventsPerSecond / allConnectAddresses.size();
+        for (int i = allConnectAddresses.size(); i-- > 0;) {
+          @SuppressWarnings("unchecked")
+          AbstractFlumeInputOperator<T> operator = getClass().newInstance();
+          operator.setCodec(codec);
+          operator.setMaxEventsPerSecond(maxEventsPerSecondPerOperator);
+          String connectAddress = allConnectAddresses.get(i);
+          operator.connectionSpecs = new String[] {connectAddress};
+
+          String[] parts = connectAddress.split(":", 2);
+          ArrayList<RecoveryAddress> remove = allRecoveryAddresses.remove(parts[0]);
+          if (remove != null) {
+            operator.recoveryAddresses.addAll(remove);
+          }
+
+          partitions.add(new DefaultPartition<AbstractFlumeInputOperator<T>>(operator));
+        }
+      }
+    } catch (IllegalAccessException ex) {
+      throw new RuntimeException(ex);
+    } catch (InstantiationException ex) {
+      throw new RuntimeException(ex);
+    }
+
+    logger.debug("Requesting partitions: {}", partitions);
+    return partitions;
+  }
+
+  @Override
+  public void partitioned(Map<Integer, Partition<AbstractFlumeInputOperator<T>>> partitions)
+  {
+    logger.debug("Partitioned Map: {}", partitions);
+    HashMap<Integer, ConnectionStatus> map = partitionedInstanceStatus.get();
+    map.clear();
+    for (Entry<Integer, Partition<AbstractFlumeInputOperator<T>>> entry: partitions.entrySet()) {
+      if (map.containsKey(entry.getKey())) {
+        // what can be done here?
+      } else {
+        map.put(entry.getKey(), null);
+      }
+    }
+  }
+
+  @Override
+  public String toString()
+  {
+    return "AbstractFlumeInputOperator{" + "connected=" + connected + ", connectionSpecs=" +
+        (connectionSpecs.length == 0 ? "empty" : connectionSpecs[0]) + ", recoveryAddresses=" + recoveryAddresses + '}';
+  }
+
+  class Client extends AbstractLengthPrependerClient
+  {
+    private final String id;
+
+    Client(String id)
+    {
+      this.id = id;
+    }
+
+    @Override
+    public void onMessage(byte[] buffer, int offset, int size)
+    {
+      try {
+        handoverBuffer.put(new Slice(buffer, offset, size));
+      } catch (InterruptedException ex) {
+        handleException(ex, eventloop);
+      }
+    }
+
+    @Override
+    public void connected()
+    {
+      super.connected();
+
+      byte[] address;
+      synchronized (recoveryAddresses) {
+        if (recoveryAddresses.size() > 0) {
+          address = recoveryAddresses.get(recoveryAddresses.size() - 1).address;
+        } else {
+          address = new byte[8];
+        }
+      }
+
+      int len = 1 /* for the message type SEEK */
+          + 8 /* for the address */
+          + 8 /* for storing the current time stamp*/;
+
+      byte[] array = new byte[len];
+      array[0] = Command.SEEK.getOrdinal();
+      System.arraycopy(address, 0, array, 1, 8);
+      Server.writeLong(array, 9, System.currentTimeMillis());
+      write(array);
+
+      connected = true;
+      ConnectionStatus connectionStatus = new ConnectionStatus();
+      connectionStatus.connected = true;
+      connectionStatus.spec = connectionSpecs[0];
+      OperatorContext ctx = context;
+      synchronized (ctx) {
+        logger.debug("{} Submitting ConnectionStatus = {}", AbstractFlumeInputOperator.this, connectionStatus);
+        context.setCounters(connectionStatus);
+      }
+    }
+
+    @Override
+    public void disconnected()
+    {
+      connected = false;
+      ConnectionStatus connectionStatus = new ConnectionStatus();
+      connectionStatus.connected = false;
+      connectionStatus.spec = connectionSpecs[0];
+      OperatorContext ctx = context;
+      synchronized (ctx) {
+        logger.debug("{} Submitting ConnectionStatus = {}", AbstractFlumeInputOperator.this, connectionStatus);
+        context.setCounters(connectionStatus);
+      }
+      super.disconnected();
+    }
+
+  }
+
+  public static class ZKStatsListner extends ZKAssistedDiscovery implements com.datatorrent.api.StatsListener,
+      Serializable
+  {
+    /*
+     * In the current design, one input operator is able to connect
+     * to only one flume adapter. Sometime in future, we should support
+     * any number of input operators connecting to any number of flume
+     * sinks and vice a versa.
+     *
+     * Until that happens the following map should be sufficient to
+     * keep track of which input operator is connected to which flume sink.
+     */
+    long intervalMillis;
+    private final Response response;
+    private transient long nextMillis;
+
+    public ZKStatsListner()
+    {
+      intervalMillis = 60 * 1000L;
+      response = new Response();
+    }
+
+    @Override
+    public Response processStats(BatchedOperatorStats stats)
+    {
+      final HashMap<Integer, ConnectionStatus> map = partitionedInstanceStatus.get();
+      response.repartitionRequired = false;
+
+      Object lastStat = null;
+      List<OperatorStats> lastWindowedStats = stats.getLastWindowedStats();
+      for (OperatorStats os: lastWindowedStats) {
+        if (os.counters != null) {
+          lastStat = os.counters;
+          logger.debug("Received custom stats = {}", lastStat);
+        }
+      }
+
+      if (lastStat instanceof ConnectionStatus) {
+        ConnectionStatus cs = (ConnectionStatus)lastStat;
+        map.put(stats.getOperatorId(), cs);
+        if (!cs.connected) {
+          logger.debug("setting repatitioned = true because of lastStat = {}", lastStat);
+          response.repartitionRequired = true;
+        }
+      }
+
+      if (System.currentTimeMillis() >= nextMillis) {
+        logger.debug("nextMillis = {}", nextMillis);
+        try {
+          super.setup(null);
+          Collection<Service<byte[]>> addresses;
+          try {
+            addresses = discover();
+          } finally {
+            super.teardown();
+          }
+          AbstractFlumeInputOperator.discoveredFlumeSinks.set(addresses);
+          logger.debug("\ncurrent map = {}\ndiscovered sinks = {}", map, addresses);
+          switch (addresses.size()) {
+            case 0:
+              response.repartitionRequired = map.size() != 1;
+              break;
+
+            default:
+              if (addresses.size() == map.size()) {
+                for (ConnectionStatus value: map.values()) {
+                  if (value == null || !value.connected) {
+                    response.repartitionRequired = true;
+                    break;
+                  }
+                }
+              } else {
+                response.repartitionRequired = true;
+              }
+              break;
+          }
+        } catch (Error er) {
+          throw er;
+        } catch (Throwable cause) {
+          logger.warn("Unable to discover services, using values from last successful discovery", cause);
+        } finally {
+          nextMillis = System.currentTimeMillis() + intervalMillis;
+          logger.debug("Proposed NextMillis = {}", nextMillis);
+        }
+      }
+
+      return response;
+    }
+
+    /**
+     * @return the intervalMillis
+     */
+    public long getIntervalMillis()
+    {
+      return intervalMillis;
+    }
+
+    /**
+     * @param intervalMillis the intervalMillis to set
+     */
+    public void setIntervalMillis(long intervalMillis)
+    {
+      this.intervalMillis = intervalMillis;
+    }
+
+    private static final long serialVersionUID = 201312241646L;
+  }
+
+  public static class ConnectionStatus implements Serializable
+  {
+    int id;
+    String spec;
+    boolean connected;
+
+    @Override
+    public int hashCode()
+    {
+      return spec.hashCode();
+    }
+
+    @Override
+    public boolean equals(Object obj)
+    {
+      if (obj == null) {
+        return false;
+      }
+      if (getClass() != obj.getClass()) {
+        return false;
+      }
+      final ConnectionStatus other = (ConnectionStatus)obj;
+      return spec == null ? other.spec == null : spec.equals(other.spec);
+    }
+
+    @Override
+    public String toString()
+    {
+      return "ConnectionStatus{" + "id=" + id + ", spec=" + spec + ", connected=" + connected + '}';
+    }
+
+    private static final long serialVersionUID = 201312261615L;
+  }
+
+  private static final transient ThreadLocal<HashMap<Integer, ConnectionStatus>> partitionedInstanceStatus =
+      new ThreadLocal<HashMap<Integer, ConnectionStatus>>()
+    {
+      @Override
+      protected HashMap<Integer, ConnectionStatus> initialValue()
+      {
+        return new HashMap<Integer, ConnectionStatus>();
+      }
+
+    };
+  /**
+   * When a sink goes away and a replacement sink is not found, we stash the recovery addresses associated
+   * with the sink in a hope that the new sink may show up in near future.
+   */
+  private static final transient ThreadLocal<HashMap<String, ArrayList<RecoveryAddress>>> abandonedRecoveryAddresses =
+      new ThreadLocal<HashMap<String, ArrayList<RecoveryAddress>>>()
+  {
+    @Override
+    protected HashMap<String, ArrayList<RecoveryAddress>> initialValue()
+    {
+      return new HashMap<String, ArrayList<RecoveryAddress>>();
+    }
+
+  };
+  private static final transient ThreadLocal<Collection<Service<byte[]>>> discoveredFlumeSinks =
+      new ThreadLocal<Collection<Service<byte[]>>>();
+
+  @Override
+  public boolean equals(Object o)
+  {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof AbstractFlumeInputOperator)) {
+      return false;
+    }
+
+    AbstractFlumeInputOperator<?> that = (AbstractFlumeInputOperator<?>)o;
+
+    if (!Arrays.equals(connectionSpecs, that.connectionSpecs)) {
+      return false;
+    }
+    return recoveryAddresses.equals(that.recoveryAddresses);
+
+  }
+
+  @Override
+  public int hashCode()
+  {
+    int result = connectionSpecs != null ? Arrays.hashCode(connectionSpecs) : 0;
+    result = 31 * result + (recoveryAddresses.hashCode());
+    return result;
+  }
+
+  public void setMaxEventsPerSecond(long maxEventsPerSecond)
+  {
+    this.maxEventsPerSecond = maxEventsPerSecond;
+  }
+
+  public long getMaxEventsPerSecond()
+  {
+    return maxEventsPerSecond;
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(AbstractFlumeInputOperator.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/bbdab0e8/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java b/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java
new file mode 100644
index 0000000..35d0c5f
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/sink/DTFlumeSink.java
@@ -0,0 +1,571 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.sink;
+
+import java.io.IOError;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.ServiceConfigurationError;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.sink.AbstractSink;
+
+import com.datatorrent.api.Component;
+import com.datatorrent.api.StreamCodec;
+import com.datatorrent.flume.discovery.Discovery;
+import com.datatorrent.flume.sink.Server.Client;
+import com.datatorrent.flume.sink.Server.Request;
+import com.datatorrent.flume.storage.EventCodec;
+import com.datatorrent.flume.storage.Storage;
+import com.datatorrent.netlet.DefaultEventLoop;
+import com.datatorrent.netlet.NetletThrowable;
+import com.datatorrent.netlet.NetletThrowable.NetletRuntimeException;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * DTFlumeSink is a flume sink developed to ingest the data into DataTorrent DAG
+ * from flume. It's essentially a flume sink which acts as a server capable of
+ * talking to one client at a time. The client for this server is AbstractFlumeInputOperator.
+ * <p />
+ * &lt;experimental&gt;DTFlumeSink auto adjusts the rate at which it consumes the data from channel to
+ * match the throughput of the DAG.&lt;/experimental&gt;
+ * <p />
+ * The properties you can set on the DTFlumeSink are: <br />
+ * id - string unique value identifying this sink <br />
+ * hostname - string value indicating the fqdn or ip address of the interface on which the server should listen <br />
+ * port - integer value indicating the numeric port to which the server should bind <br />
+ * sleepMillis - integer value indicating the number of milliseconds the process should sleep when there are no events
+ * before checking for next event again <br />
+ * throughputAdjustmentPercent - integer value indicating by what percentage the flume transaction size should be
+ * adjusted upward or downward at a time <br />
+ * minimumEventsPerTransaction - integer value indicating the minimum number of events per transaction <br />
+ * maximumEventsPerTransaction - integer value indicating the maximum number of events per transaction. This value can
+ * not be more than channel's transaction capacity.<br />
+ *
+ * @author Chetan Narsude <ch...@datatorrent.com>
+ * @since 0.9.2
+ */
+public class DTFlumeSink extends AbstractSink implements Configurable
+{
+  private static final String HOSTNAME_STRING = "hostname";
+  private static final String HOSTNAME_DEFAULT = "locahost";
+  private static final long ACCEPTED_TOLERANCE = 20000;
+  private DefaultEventLoop eventloop;
+  private Server server;
+  private int outstandingEventsCount;
+  private int lastConsumedEventsCount;
+  private int idleCount;
+  private byte[] playback;
+  private Client client;
+  private String hostname;
+  private int port;
+  private String id;
+  private long acceptedTolerance;
+  private long sleepMillis;
+  private double throughputAdjustmentFactor;
+  private int minimumEventsPerTransaction;
+  private int maximumEventsPerTransaction;
+  private long commitEventTimeoutMillis;
+  private transient long lastCommitEventTimeMillis;
+  private Storage storage;
+  Discovery<byte[]> discovery;
+  StreamCodec<Event> codec;
+  /* Begin implementing Flume Sink interface */
+
+  @Override
+  @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch", "UseSpecificCatch", "SleepWhileInLoop"})
+  public Status process() throws EventDeliveryException
+  {
+    Slice slice;
+    synchronized (server.requests) {
+      for (Request r : server.requests) {
+        logger.debug("found {}", r);
+        switch (r.type) {
+          case SEEK:
+            lastCommitEventTimeMillis = System.currentTimeMillis();
+            slice = r.getAddress();
+            playback = storage.retrieve(Arrays.copyOfRange(slice.buffer, slice.offset, slice.offset + slice.length));
+            client = r.client;
+            break;
+
+          case COMMITTED:
+            lastCommitEventTimeMillis = System.currentTimeMillis();
+            slice = r.getAddress();
+            storage.clean(Arrays.copyOfRange(slice.buffer, slice.offset, slice.offset + slice.length));
+            break;
+
+          case CONNECTED:
+            logger.debug("Connected received, ignoring it!");
+            break;
+
+          case DISCONNECTED:
+            if (r.client == client) {
+              client = null;
+              outstandingEventsCount = 0;
+            }
+            break;
+
+          case WINDOWED:
+            lastConsumedEventsCount = r.getEventCount();
+            idleCount = r.getIdleCount();
+            outstandingEventsCount -= lastConsumedEventsCount;
+            break;
+
+          case SERVER_ERROR:
+            throw new IOError(null);
+
+          default:
+            logger.debug("Cannot understand the request {}", r);
+            break;
+        }
+      }
+
+      server.requests.clear();
+    }
+
+    if (client == null) {
+      logger.info("No client expressed interest yet to consume the events.");
+      return Status.BACKOFF;
+    } else if (System.currentTimeMillis() - lastCommitEventTimeMillis > commitEventTimeoutMillis) {
+      logger.info("Client has not processed the workload given for the last {} milliseconds, so backing off.",
+          System.currentTimeMillis() - lastCommitEventTimeMillis);
+      return Status.BACKOFF;
+    }
+
+    int maxTuples;
+    // the following logic needs to be fixed... this is a quick put together.
+    if (outstandingEventsCount < 0) {
+      if (idleCount > 1) {
+        maxTuples = (int)((1 + throughputAdjustmentFactor * idleCount) * lastConsumedEventsCount);
+      } else {
+        maxTuples = (int)((1 + throughputAdjustmentFactor) * lastConsumedEventsCount);
+      }
+    } else if (outstandingEventsCount > lastConsumedEventsCount) {
+      maxTuples = (int)((1 - throughputAdjustmentFactor) * lastConsumedEventsCount);
+    } else {
+      if (idleCount > 0) {
+        maxTuples = (int)((1 + throughputAdjustmentFactor * idleCount) * lastConsumedEventsCount);
+        if (maxTuples <= 0) {
+          maxTuples = minimumEventsPerTransaction;
+        }
+      } else {
+        maxTuples = lastConsumedEventsCount;
+      }
+    }
+
+    if (maxTuples >= maximumEventsPerTransaction) {
+      maxTuples = maximumEventsPerTransaction;
+    } else if (maxTuples <= 0) {
+      maxTuples = minimumEventsPerTransaction;
+    }
+
+    if (maxTuples > 0) {
+      if (playback != null) {
+        try {
+          int i = 0;
+          do {
+            if (!client.write(playback)) {
+              retryWrite(playback, null);
+            }
+            outstandingEventsCount++;
+            playback = storage.retrieveNext();
+          }
+          while (++i < maxTuples && playback != null);
+        } catch (Exception ex) {
+          logger.warn("Playback Failed", ex);
+          if (ex instanceof NetletThrowable) {
+            try {
+              eventloop.disconnect(client);
+            } finally {
+              client = null;
+              outstandingEventsCount = 0;
+            }
+          }
+          return Status.BACKOFF;
+        }
+      } else {
+        int storedTuples = 0;
+
+        Transaction t = getChannel().getTransaction();
+        try {
+          t.begin();
+
+          Event e;
+          while (storedTuples < maxTuples && (e = getChannel().take()) != null) {
+            Slice event = codec.toByteArray(e);
+            byte[] address = storage.store(event);
+            if (address != null) {
+              if (!client.write(address, event)) {
+                retryWrite(address, event);
+              }
+              outstandingEventsCount++;
+            } else {
+              logger.debug("Detected the condition of recovery from flume crash!");
+            }
+            storedTuples++;
+          }
+
+          if (storedTuples > 0) {
+            storage.flush();
+          }
+
+          t.commit();
+
+          if (storedTuples > 0) { /* log less frequently */
+            logger.debug("Transaction details maxTuples = {}, storedTuples = {}, outstanding = {}",
+                maxTuples, storedTuples, outstandingEventsCount);
+          }
+        } catch (Error er) {
+          t.rollback();
+          throw er;
+        } catch (Exception ex) {
+          logger.error("Transaction Failed", ex);
+          if (ex instanceof NetletRuntimeException && client != null) {
+            try {
+              eventloop.disconnect(client);
+            } finally {
+              client = null;
+              outstandingEventsCount = 0;
+            }
+          }
+          t.rollback();
+          return Status.BACKOFF;
+        } finally {
+          t.close();
+        }
+
+        if (storedTuples == 0) {
+          sleep();
+        }
+      }
+    }
+
+    return Status.READY;
+  }
+
+  private void sleep()
+  {
+    try {
+      Thread.sleep(sleepMillis);
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+    }
+  }
+
+  @Override
+  public void start()
+  {
+    try {
+      if (storage instanceof Component) {
+        @SuppressWarnings("unchecked")
+        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)storage;
+        component.setup(null);
+      }
+      if (discovery instanceof Component) {
+        @SuppressWarnings("unchecked")
+        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)discovery;
+        component.setup(null);
+      }
+      if (codec instanceof Component) {
+        @SuppressWarnings("unchecked")
+        Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)codec;
+        component.setup(null);
+      }
+      eventloop = new DefaultEventLoop("EventLoop-" + id);
+      server = new Server(id, discovery,acceptedTolerance);
+    } catch (Error error) {
+      throw error;
+    } catch (RuntimeException re) {
+      throw re;
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    }
+
+    eventloop.start();
+    eventloop.start(hostname, port, server);
+    super.start();
+  }
+
+  @Override
+  public void stop()
+  {
+    try {
+      super.stop();
+    } finally {
+      try {
+        if (client != null) {
+          eventloop.disconnect(client);
+          client = null;
+        }
+
+        eventloop.stop(server);
+        eventloop.stop();
+
+        if (codec instanceof Component) {
+          @SuppressWarnings("unchecked")
+          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)codec;
+          component.teardown();
+        }
+        if (discovery instanceof Component) {
+          @SuppressWarnings("unchecked")
+          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)discovery;
+          component.teardown();
+        }
+        if (storage instanceof Component) {
+          @SuppressWarnings("unchecked")
+          Component<com.datatorrent.api.Context> component = (Component<com.datatorrent.api.Context>)storage;
+          component.teardown();
+        }
+      } catch (Throwable cause) {
+        throw new ServiceConfigurationError("Failed Stop", cause);
+      }
+    }
+  }
+
+  /* End implementing Flume Sink interface */
+
+  /* Begin Configurable Interface */
+  @Override
+  public void configure(Context context)
+  {
+    hostname = context.getString(HOSTNAME_STRING, HOSTNAME_DEFAULT);
+    port = context.getInteger("port", 0);
+    id = context.getString("id");
+    if (id == null) {
+      id = getName();
+    }
+    acceptedTolerance = context.getLong("acceptedTolerance", ACCEPTED_TOLERANCE);
+    sleepMillis = context.getLong("sleepMillis", 5L);
+    throughputAdjustmentFactor = context.getInteger("throughputAdjustmentPercent", 5) / 100.0;
+    maximumEventsPerTransaction = context.getInteger("maximumEventsPerTransaction", 10000);
+    minimumEventsPerTransaction = context.getInteger("minimumEventsPerTransaction", 100);
+    commitEventTimeoutMillis = context.getLong("commitEventTimeoutMillis", Long.MAX_VALUE);
+
+    @SuppressWarnings("unchecked")
+    Discovery<byte[]> ldiscovery = configure("discovery", Discovery.class, context);
+    if (ldiscovery == null) {
+      logger.warn("Discovery agent not configured for the sink!");
+      discovery = new Discovery<byte[]>()
+      {
+        @Override
+        public void unadvertise(Service<byte[]> service)
+        {
+          logger.debug("Sink {} stopped listening on {}:{}", service.getId(), service.getHost(), service.getPort());
+        }
+
+        @Override
+        public void advertise(Service<byte[]> service)
+        {
+          logger.debug("Sink {} started listening on {}:{}", service.getId(), service.getHost(), service.getPort());
+        }
+
+        @Override
+        @SuppressWarnings("unchecked")
+        public Collection<Service<byte[]>> discover()
+        {
+          return Collections.EMPTY_SET;
+        }
+
+      };
+    } else {
+      discovery = ldiscovery;
+    }
+
+    storage = configure("storage", Storage.class, context);
+    if (storage == null) {
+      logger.warn("storage key missing... DTFlumeSink may lose data!");
+      storage = new Storage()
+      {
+        @Override
+        public byte[] store(Slice slice)
+        {
+          return null;
+        }
+
+        @Override
+        public byte[] retrieve(byte[] identifier)
+        {
+          return null;
+        }
+
+        @Override
+        public byte[] retrieveNext()
+        {
+          return null;
+        }
+
+        @Override
+        public void clean(byte[] identifier)
+        {
+        }
+
+        @Override
+        public void flush()
+        {
+        }
+
+      };
+    }
+
+    @SuppressWarnings("unchecked")
+    StreamCodec<Event> lCodec = configure("codec", StreamCodec.class, context);
+    if (lCodec == null) {
+      codec = new EventCodec();
+    } else {
+      codec = lCodec;
+    }
+
+  }
+
+  /* End Configurable Interface */
+
+  @SuppressWarnings({"UseSpecificCatch", "BroadCatchBlock", "TooBroadCatch"})
+  private static <T> T configure(String key, Class<T> clazz, Context context)
+  {
+    String classname = context.getString(key);
+    if (classname == null) {
+      return null;
+    }
+
+    try {
+      Class<?> loadClass = Thread.currentThread().getContextClassLoader().loadClass(classname);
+      if (clazz.isAssignableFrom(loadClass)) {
+        @SuppressWarnings("unchecked")
+        T object = (T)loadClass.newInstance();
+        if (object instanceof Configurable) {
+          Context context1 = new Context(context.getSubProperties(key + '.'));
+          String id = context1.getString(Storage.ID);
+          if (id == null) {
+            id = context.getString(Storage.ID);
+            logger.debug("{} inherited id={} from sink", key, id);
+            context1.put(Storage.ID, id);
+          }
+          ((Configurable)object).configure(context1);
+        }
+
+        return object;
+      } else {
+        logger.error("key class {} does not implement {} interface", classname, Storage.class.getCanonicalName());
+        throw new Error("Invalid storage " + classname);
+      }
+    } catch (Error error) {
+      throw error;
+    } catch (RuntimeException re) {
+      throw re;
+    } catch (Throwable t) {
+      throw new RuntimeException(t);
+    }
+  }
+
+  /**
+   * @return the hostname
+   */
+  String getHostname()
+  {
+    return hostname;
+  }
+
+  /**
+   * @param hostname the hostname to set
+   */
+  void setHostname(String hostname)
+  {
+    this.hostname = hostname;
+  }
+
+  /**
+   * @return the port
+   */
+  int getPort()
+  {
+    return port;
+  }
+
+  public long getAcceptedTolerance()
+  {
+    return acceptedTolerance;
+  }
+
+  public void setAcceptedTolerance(long acceptedTolerance)
+  {
+    this.acceptedTolerance = acceptedTolerance;
+  }
+
+  /**
+   * @param port the port to set
+   */
+  void setPort(int port)
+  {
+    this.port = port;
+  }
+
+  /**
+   * @return the discovery
+   */
+  Discovery<byte[]> getDiscovery()
+  {
+    return discovery;
+  }
+
+  /**
+   * @param discovery the discovery to set
+   */
+  void setDiscovery(Discovery<byte[]> discovery)
+  {
+    this.discovery = discovery;
+  }
+
+  /**
+   * Attempt the sequence of writing after sleeping twice and upon failure assume
+   * that the client connection has problems and hence close it.
+   *
+   * @param address
+   * @param e
+   * @throws IOException
+   */
+  private void retryWrite(byte[] address, Slice event) throws IOException
+  {
+    if (event == null) {  /* this happens for playback where address and event are sent as single object */
+      while (client.isConnected()) {
+        sleep();
+        if (client.write(address)) {
+          return;
+        }
+      }
+    } else {  /* this happens when the events are taken from the flume channel and writing first time failed */
+      while (client.isConnected()) {
+        sleep();
+        if (client.write(address, event)) {
+          return;
+        }
+      }
+    }
+
+    throw new IOException("Client disconnected!");
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(DTFlumeSink.class);
+}


[11/13] apex-malhar git commit: Interceptor and hdfs test source

Posted by th...@apache.org.
Interceptor and hdfs test source


Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo
Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/2cfe153c
Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/2cfe153c
Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/2cfe153c

Branch: refs/heads/master
Commit: 2cfe153c98dd05b8185beaf58d6db8c4b7ec9408
Parents: 4432651
Author: Chandni Singh <ch...@datatorrent.com>
Authored: Sun Feb 19 21:34:49 2017 +0530
Committer: Pramod Immaneni <pr...@datatorrent.com>
Committed: Mon May 22 16:47:34 2017 -0700

----------------------------------------------------------------------
 .../ColumnFilteringFormattingInterceptor.java   | 228 +++++++++++++++++++
 .../flume/source/HdfsTestSource.java            | 222 ++++++++++++++++++
 ...olumnFilteringFormattingInterceptorTest.java | 134 +++++++++++
 3 files changed, 584 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2cfe153c/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java b/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java
new file mode 100644
index 0000000..ce92f6d
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptor.java
@@ -0,0 +1,228 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.interceptor;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.interceptor.Interceptor;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import com.google.common.primitives.Ints;
+
+import static com.datatorrent.flume.interceptor.ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER;
+import static com.datatorrent.flume.interceptor.ColumnFilteringFormattingInterceptor.Constants.SRC_SEPARATOR;
+import static com.datatorrent.flume.interceptor.ColumnFilteringFormattingInterceptor.Constants.SRC_SEPARATOR_DFLT;
+
+/**
+ * <p>ColumnFilteringFormattingInterceptor class.</p>
+ *
+ * @author Chandni Singh <ch...@datatorrent.com>
+ * @since 0.9.4
+ */
+public class ColumnFilteringFormattingInterceptor implements Interceptor
+{
+  private final byte srcSeparator;
+  private final byte[][] dstSeparators;
+  private final byte[] prefix;
+  private final int maxIndex;
+  private final int maxColumn;
+  private final int[] columns;
+  private final int[] positions;
+
+  private ColumnFilteringFormattingInterceptor(int[] columns, byte srcSeparator, byte[][] dstSeparators, byte[] prefix)
+  {
+    this.columns = columns;
+
+    int tempMaxColumn = Integer.MIN_VALUE;
+    for (int column : columns) {
+      if (column > tempMaxColumn) {
+        tempMaxColumn = column;
+      }
+    }
+    maxIndex = tempMaxColumn;
+    maxColumn = tempMaxColumn + 1;
+    positions = new int[maxColumn + 1];
+    this.srcSeparator = srcSeparator;
+    this.dstSeparators = dstSeparators;
+    this.prefix = prefix;
+  }
+
+  @Override
+  public void initialize()
+  {
+    /* no-op */
+  }
+
+  @Override
+  public Event intercept(Event event)
+  {
+    byte[] body = event.getBody();
+    if (body == null) {
+      return event;
+    }
+
+    final int length = body.length;
+
+    /* store positions of character after the separators */
+    int i = 0;
+    int index = 0;
+    while (i < length) {
+      if (body[i++] == srcSeparator) {
+        positions[++index] = i;
+        if (index >= maxIndex) {
+          break;
+        }
+      }
+    }
+
+    int nextVirginIndex;
+    boolean separatorAtEnd = true;
+    if (i == length && index < maxColumn) {
+      nextVirginIndex = index + 2;
+      positions[nextVirginIndex - 1] = length;
+      separatorAtEnd = length > 0 ? body[length - 1] == srcSeparator : false;
+    } else {
+      nextVirginIndex = index + 1;
+    }
+
+    int newArrayLen = prefix.length;
+    for (i = columns.length; i-- > 0; ) {
+      int column = columns[i];
+      int len = positions[column + 1] - positions[column];
+      if (len > 0) {
+        if (positions[column + 1] == length && !separatorAtEnd) {
+          newArrayLen += len;
+        } else {
+          newArrayLen += len - 1;
+        }
+      }
+      newArrayLen += dstSeparators[i].length;
+    }
+
+    byte[] newBody = new byte[newArrayLen];
+    int newOffset = 0;
+    if (prefix.length > 0) {
+      System.arraycopy(prefix, 0, newBody, 0, prefix.length);
+      newOffset += prefix.length;
+    }
+    int dstSeparatorsIdx = 0;
+    for (int column : columns) {
+      int len = positions[column + 1] - positions[column];
+      byte[] separator = dstSeparators[dstSeparatorsIdx++];
+      if (len > 0) {
+        System.arraycopy(body, positions[column], newBody, newOffset, len);
+        newOffset += len;
+        if (newBody[newOffset - 1] == srcSeparator) {
+          newOffset--;
+        }
+      }
+      System.arraycopy(separator, 0, newBody, newOffset, separator.length);
+      newOffset += separator.length;
+    }
+    event.setBody(newBody);
+    Arrays.fill(positions, 1, nextVirginIndex, 0);
+    return event;
+  }
+
+  @Override
+  public List<Event> intercept(List<Event> events)
+  {
+    for (Event event : events) {
+      intercept(event);
+    }
+    return events;
+  }
+
+  @Override
+  public void close()
+  {
+  }
+
+  public static class Builder implements Interceptor.Builder
+  {
+    private int[] columns;
+    private byte srcSeparator;
+    private byte[][] dstSeparators;
+    private byte[] prefix;
+
+    @Override
+    public Interceptor build()
+    {
+      return new ColumnFilteringFormattingInterceptor(columns, srcSeparator, dstSeparators, prefix);
+    }
+
+    @Override
+    public void configure(Context context)
+    {
+      String formatter = context.getString(COLUMNS_FORMATTER);
+      if (Strings.isNullOrEmpty(formatter)) {
+        throw new IllegalArgumentException("This interceptor requires columns format to be specified!");
+      }
+      List<String> lSeparators = Lists.newArrayList();
+      List<Integer> lColumns = Lists.newArrayList();
+      Pattern colPat = Pattern.compile("\\{\\d+?\\}");
+      Matcher matcher = colPat.matcher(formatter);
+      int separatorStart = 0;
+      String lPrefix = "";
+      while (matcher.find()) {
+        String col = matcher.group();
+        lColumns.add(Integer.parseInt(col.substring(1, col.length() - 1)));
+        if (separatorStart == 0 && matcher.start() > 0) {
+          lPrefix = formatter.substring(0, matcher.start());
+        } else if (separatorStart > 0) {
+          lSeparators.add(formatter.substring(separatorStart, matcher.start()));
+        }
+
+        separatorStart = matcher.end();
+      }
+      if (separatorStart < formatter.length()) {
+        lSeparators.add(formatter.substring(separatorStart, formatter.length()));
+      }
+      columns = Ints.toArray(lColumns);
+      byte[] emptyStringBytes = "".getBytes();
+
+      dstSeparators = new byte[columns.length][];
+
+      for (int i = 0; i < columns.length; i++) {
+        if (i < lSeparators.size()) {
+          dstSeparators[i] = lSeparators.get(i).getBytes();
+        } else {
+          dstSeparators[i] = emptyStringBytes;
+        }
+      }
+      srcSeparator = context.getInteger(SRC_SEPARATOR, (int)SRC_SEPARATOR_DFLT).byteValue();
+      this.prefix = lPrefix.getBytes();
+    }
+  }
+
+  public static class Constants extends ColumnFilteringInterceptor.Constants
+  {
+    public static final String COLUMNS_FORMATTER = "columnsFormatter";
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(ColumnFilteringFormattingInterceptor.class);
+
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2cfe153c/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java b/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java
new file mode 100644
index 0000000..18aac37
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/source/HdfsTestSource.java
@@ -0,0 +1,222 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.source;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.List;
+import java.util.Timer;
+import java.util.TimerTask;
+
+import javax.annotation.Nonnull;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDrivenSource;
+import org.apache.flume.channel.ChannelProcessor;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.event.EventBuilder;
+import org.apache.flume.source.AbstractSource;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+
+/**
+ * <p>TestSource class.</p>
+ *
+ * @since 0.9.4
+ */
+public class HdfsTestSource extends AbstractSource implements EventDrivenSource, Configurable
+{
+  public static final String SOURCE_DIR = "sourceDir";
+  public static final String RATE = "rate";
+  public static final String INIT_DATE = "initDate";
+
+  static byte FIELD_SEPARATOR = 2;
+  public Timer emitTimer;
+  @Nonnull
+  String directory;
+  Path directoryPath;
+  int rate;
+  String initDate;
+  long initTime;
+  List<String> dataFiles;
+  long oneDayBack;
+
+  private transient BufferedReader br = null;
+  protected transient FileSystem fs;
+  private transient Configuration configuration;
+
+  private transient int currentFile = 0;
+  private transient boolean finished;
+  private List<Event> events;
+
+  public HdfsTestSource()
+  {
+    super();
+    this.rate = 2500;
+    dataFiles = Lists.newArrayList();
+    Calendar calendar = Calendar.getInstance();
+    calendar.add(Calendar.DATE, -1);
+    oneDayBack = calendar.getTimeInMillis();
+    configuration = new Configuration();
+    events = Lists.newArrayList();
+  }
+
+  @Override
+  public void configure(Context context)
+  {
+    directory = context.getString(SOURCE_DIR);
+    rate = context.getInteger(RATE, rate);
+    initDate = context.getString(INIT_DATE);
+
+    Preconditions.checkArgument(!Strings.isNullOrEmpty(directory));
+    directoryPath = new Path(directory);
+
+    String[] parts = initDate.split("-");
+    Preconditions.checkArgument(parts.length == 3);
+    Calendar calendar = Calendar.getInstance();
+    calendar.set(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]) - 1, Integer.parseInt(parts[2]), 0, 0, 0);
+    initTime = calendar.getTimeInMillis();
+
+    try {
+      List<String> files = findFiles();
+      for (String file : files) {
+        dataFiles.add(file);
+      }
+      if (logger.isDebugEnabled()) {
+        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+        logger.debug("settings {} {} {} {} {}", directory, rate, dateFormat.format(oneDayBack),
+            dateFormat.format(new Date(initTime)), currentFile);
+        for (String file : dataFiles) {
+          logger.debug("settings add file {}", file);
+        }
+      }
+
+      fs = FileSystem.newInstance(new Path(directory).toUri(), configuration);
+      Path filePath = new Path(dataFiles.get(currentFile));
+      br = new BufferedReader(new InputStreamReader(new GzipCompressorInputStream(fs.open(filePath))));
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+
+    finished = true;
+
+  }
+
+  private List<String> findFiles() throws IOException
+  {
+    List<String> files = Lists.newArrayList();
+    Path directoryPath = new Path(directory);
+    FileSystem lfs = FileSystem.newInstance(directoryPath.toUri(), configuration);
+    try {
+      logger.debug("checking for new files in {}", directoryPath);
+      RemoteIterator<LocatedFileStatus> statuses = lfs.listFiles(directoryPath, true);
+      for (; statuses.hasNext(); ) {
+        FileStatus status = statuses.next();
+        Path path = status.getPath();
+        String filePathStr = path.toString();
+        if (!filePathStr.endsWith(".gz")) {
+          continue;
+        }
+        logger.debug("new file {}", filePathStr);
+        files.add(path.toString());
+      }
+    } catch (FileNotFoundException e) {
+      logger.warn("Failed to list directory {}", directoryPath, e);
+      throw new RuntimeException(e);
+    } finally {
+      lfs.close();
+    }
+    return files;
+  }
+
+  @Override
+  public void start()
+  {
+    super.start();
+    emitTimer = new Timer();
+
+    final ChannelProcessor channelProcessor = getChannelProcessor();
+    emitTimer.scheduleAtFixedRate(new TimerTask()
+    {
+      @Override
+      public void run()
+      {
+        int lineCount = 0;
+        events.clear();
+        try {
+          while (lineCount < rate && !finished) {
+            String line = br.readLine();
+
+            if (line == null) {
+              logger.debug("completed file {}", currentFile);
+              br.close();
+              currentFile++;
+              if (currentFile == dataFiles.size()) {
+                logger.info("finished all files");
+                finished = true;
+                break;
+              }
+              Path filePath = new Path(dataFiles.get(currentFile));
+              br = new BufferedReader(new InputStreamReader(new GzipCompressorInputStream(fs.open(filePath))));
+              logger.info("opening file {}. {}", currentFile, filePath);
+              continue;
+            }
+            lineCount++;
+            Event flumeEvent = EventBuilder.withBody(line.getBytes());
+            events.add(flumeEvent);
+          }
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+        if (events.size() > 0) {
+          channelProcessor.processEventBatch(events);
+        }
+        if (finished) {
+          emitTimer.cancel();
+        }
+      }
+
+    }, 0, 1000);
+  }
+
+  @Override
+  public void stop()
+  {
+    emitTimer.cancel();
+    super.stop();
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(HdfsTestSource.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/2cfe153c/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptorTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptorTest.java b/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptorTest.java
new file mode 100644
index 0000000..aca99c3
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/interceptor/ColumnFilteringFormattingInterceptorTest.java
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.interceptor;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import org.apache.flume.Context;
+import org.apache.flume.interceptor.Interceptor;
+
+import static org.junit.Assert.assertArrayEquals;
+
+/**
+ * Tests for {@link ColumnFilteringFormattingInterceptor}
+ */
+public class ColumnFilteringFormattingInterceptorTest
+{
+  private static InterceptorTestHelper helper;
+
+  @BeforeClass
+  public static void startUp()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "{1}\001{2}\001{3}\001");
+
+    helper = new InterceptorTestHelper(new ColumnFilteringFormattingInterceptor.Builder(), contextMap);
+  }
+
+  @Test
+  public void testInterceptEvent()
+  {
+    helper.testIntercept_Event();
+  }
+
+  @Test
+  public void testFiles() throws IOException, URISyntaxException
+  {
+    helper.testFiles();
+  }
+
+  @Test
+  public void testInterceptEventWithPrefix()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "\001{1}\001{2}\001{3}\001");
+
+    ColumnFilteringFormattingInterceptor.Builder builder = new ColumnFilteringFormattingInterceptor.Builder();
+    builder.configure(new Context(contextMap));
+    Interceptor interceptor = builder.build();
+
+    assertArrayEquals("Six Fields",
+        "\001\001Second\001\001".getBytes(),
+        interceptor.intercept(
+        new InterceptorTestHelper.MyEvent("First\002\002Second\002\002\002".getBytes())).getBody());
+  }
+
+  @Test
+  public void testInterceptEventWithLongSeparator()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "a{1}bc{2}def{3}ghi");
+
+    ColumnFilteringFormattingInterceptor.Builder builder = new ColumnFilteringFormattingInterceptor.Builder();
+    builder.configure(new Context(contextMap));
+    Interceptor interceptor = builder.build();
+    byte[] body = interceptor.intercept(
+        new InterceptorTestHelper.MyEvent("First\002\002Second\002\002\002".getBytes())).getBody();
+
+    assertArrayEquals("Six Fields, " + new String(body), "abcSeconddefghi".getBytes(), body);
+  }
+
+  @Test
+  public void testInterceptEventWithTerminatingSeparator()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "a{1}bc{2}def{3}");
+
+    ColumnFilteringFormattingInterceptor.Builder builder = new ColumnFilteringFormattingInterceptor.Builder();
+    builder.configure(new Context(contextMap));
+    Interceptor interceptor = builder.build();
+    byte[] body = interceptor.intercept(
+        new InterceptorTestHelper.MyEvent("First\002\002Second\002\002\002".getBytes())).getBody();
+
+    assertArrayEquals("Six Fields, " + new String(body), "abcSeconddef".getBytes(), body);
+  }
+
+  @Test
+  public void testInterceptEventWithColumnZero()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "{0}\001");
+
+    ColumnFilteringFormattingInterceptor.Builder builder = new ColumnFilteringFormattingInterceptor.Builder();
+    builder.configure(new Context(contextMap));
+    Interceptor interceptor = builder.build();
+
+    assertArrayEquals("Empty Bytes",
+        "\001".getBytes(),
+        interceptor.intercept(new InterceptorTestHelper.MyEvent("".getBytes())).getBody());
+
+    assertArrayEquals("One Field",
+        "First\001".getBytes(),
+        interceptor.intercept(new InterceptorTestHelper.MyEvent("First".getBytes())).getBody());
+
+    assertArrayEquals("Two Fields",
+        "\001".getBytes(),
+        interceptor.intercept(new InterceptorTestHelper.MyEvent("\002First".getBytes())).getBody());
+  }
+}


[08/13] apex-malhar git commit: Storage

Posted by th...@apache.org.
Storage


Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo
Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/44326514
Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/44326514
Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/44326514

Branch: refs/heads/master
Commit: 4432651437e8aac8b442c0f86a6775adc47e962c
Parents: bbdab0e
Author: gaurav <ga...@datatorrent.com>
Authored: Sun Feb 19 21:29:56 2017 +0530
Committer: Pramod Immaneni <pr...@datatorrent.com>
Committed: Mon May 22 16:47:34 2017 -0700

----------------------------------------------------------------------
 .../datatorrent/flume/storage/HDFSStorage.java  | 946 +++++++++++++++++++
 .../com/datatorrent/flume/storage/Storage.java  |  73 ++
 .../flume/storage/HDFSStorageMatching.java      | 109 +++
 .../flume/storage/HDFSStoragePerformance.java   |  85 ++
 .../storage/HDFSStoragePerformanceTest.java     | 112 +++
 .../flume/storage/HDFSStorageTest.java          | 693 ++++++++++++++
 6 files changed, 2018 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/44326514/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java b/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java
new file mode 100644
index 0000000..74849e9
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/storage/HDFSStorage.java
@@ -0,0 +1,946 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.storage;
+
+import java.io.DataInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import javax.validation.constraints.NotNull;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.google.common.primitives.Ints;
+import com.google.common.primitives.Longs;
+
+import com.datatorrent.api.Component;
+import com.datatorrent.common.util.NameableThreadFactory;
+import com.datatorrent.flume.sink.Server;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * HDFSStorage is developed to store and retrieve the data from HDFS
+ * <p />
+ * The properties that can be set on HDFSStorage are: <br />
+ * baseDir - The base directory where the data is going to be stored <br />
+ * restore - This is used to restore the application from previous failure <br />
+ * blockSize - The maximum size of the each file to created. <br />
+ *
+ * @author Gaurav Gupta <ga...@datatorrent.com>
+ * @since 0.9.3
+ */
+public class HDFSStorage implements Storage, Configurable, Component<com.datatorrent.api.Context>
+{
+  public static final int DEFAULT_BLOCK_SIZE = 64 * 1024 * 1024;
+  public static final String BASE_DIR_KEY = "baseDir";
+  public static final String RESTORE_KEY = "restore";
+  public static final String BLOCKSIZE = "blockSize";
+  public static final String BLOCK_SIZE_MULTIPLE = "blockSizeMultiple";
+  public static final String NUMBER_RETRY = "retryCount";
+
+  private static final String OFFSET_SUFFIX = "-offsetFile";
+  private static final String BOOK_KEEPING_FILE_OFFSET = "-bookKeepingOffsetFile";
+  private static final String FLUSHED_IDENTITY_FILE = "flushedCounter";
+  private static final String CLEAN_OFFSET_FILE = "cleanoffsetFile";
+  private static final String FLUSHED_IDENTITY_FILE_TEMP = "flushedCounter.tmp";
+  private static final String CLEAN_OFFSET_FILE_TEMP = "cleanoffsetFile.tmp";
+  private static final int IDENTIFIER_SIZE = 8;
+  private static final int DATA_LENGTH_BYTE_SIZE = 4;
+
+  /**
+   * Number of times the storage will try to get the filesystem
+   */
+  private int retryCount = 3;
+  /**
+   * The multiple of block size
+   */
+  private int blockSizeMultiple = 1;
+  /**
+   * Identifier for this storage.
+   */
+  @NotNull
+  private String id;
+  /**
+   * The baseDir where the storage facility is going to create files.
+   */
+  @NotNull
+  private String baseDir;
+  /**
+   * The block size to be used to create the storage files
+   */
+  private long blockSize;
+  /**
+   *
+   */
+  private boolean restore;
+  /**
+   * This identifies the current file number
+   */
+  private long currentWrittenFile;
+  /**
+   * This identifies the file number that has been flushed
+   */
+  private long flushedFileCounter;
+  /**
+   * The file that stores the fileCounter information
+   */
+  // private Path fileCounterFile;
+  /**
+   * The file that stores the flushed fileCounter information
+   */
+  private Path flushedCounterFile;
+  private Path flushedCounterFileTemp;
+  /**
+   * This identifies the last cleaned file number
+   */
+  private long cleanedFileCounter;
+  /**
+   * The file that stores the clean file counter information
+   */
+  // private Path cleanFileCounterFile;
+  /**
+   * The file that stores the clean file offset information
+   */
+  private Path cleanFileOffsetFile;
+  private Path cleanFileOffsetFileTemp;
+  private FileSystem fs;
+  private FSDataOutputStream dataStream;
+  ArrayList<DataBlock> files2Commit = new ArrayList<DataBlock>();
+  /**
+   * The offset in the current opened file
+   */
+  private long fileWriteOffset;
+  private FSDataInputStream readStream;
+  private long retrievalOffset;
+  private long retrievalFile;
+  private int offset;
+  private long flushedLong;
+  private long flushedFileWriteOffset;
+  private long bookKeepingFileOffset;
+  private byte[] cleanedOffset = new byte[8];
+  private long skipOffset;
+  private long skipFile;
+  private transient Path basePath;
+  private ExecutorService storageExecutor;
+  private byte[] currentData;
+  private FSDataInputStream nextReadStream;
+  private long nextFlushedLong;
+  private long nextRetrievalFile;
+  private byte[] nextRetrievalData;
+
+  public HDFSStorage()
+  {
+    this.restore = true;
+  }
+
+  /**
+   * This stores the Identifier information identified in the last store function call
+   *
+   * @param ctx
+   */
+  @Override
+  public void configure(Context ctx)
+  {
+    String tempId = ctx.getString(ID);
+    if (tempId == null) {
+      if (id == null) {
+        throw new IllegalArgumentException("id can't be  null.");
+      }
+    } else {
+      id = tempId;
+    }
+
+    String tempBaseDir = ctx.getString(BASE_DIR_KEY);
+    if (tempBaseDir != null) {
+      baseDir = tempBaseDir;
+    }
+
+    restore = ctx.getBoolean(RESTORE_KEY, restore);
+    Long tempBlockSize = ctx.getLong(BLOCKSIZE);
+    if (tempBlockSize != null) {
+      blockSize = tempBlockSize;
+    }
+    blockSizeMultiple = ctx.getInteger(BLOCK_SIZE_MULTIPLE, blockSizeMultiple);
+    retryCount = ctx.getInteger(NUMBER_RETRY,retryCount);
+  }
+
+  /**
+   * This function reads the file at a location and return the bytes stored in the file "
+   *
+   * @param path - the location of the file
+   * @return
+   * @throws IOException
+   */
+  byte[] readData(Path path) throws IOException
+  {
+    DataInputStream is = new DataInputStream(fs.open(path));
+    byte[] bytes = new byte[is.available()];
+    is.readFully(bytes);
+    is.close();
+    return bytes;
+  }
+
+  /**
+   * This function writes the bytes to a file specified by the path
+   *
+   * @param path the file location
+   * @param data the data to be written to the file
+   * @return
+   * @throws IOException
+   */
+  private FSDataOutputStream writeData(Path path, byte[] data) throws IOException
+  {
+    FSDataOutputStream fsOutputStream;
+    if (fs.getScheme().equals("file")) {
+      // local FS does not support hflush and does not flush native stream
+      fsOutputStream = new FSDataOutputStream(
+          new FileOutputStream(Path.getPathWithoutSchemeAndAuthority(path).toString()), null);
+    } else {
+      fsOutputStream = fs.create(path);
+    }
+    fsOutputStream.write(data);
+    return fsOutputStream;
+  }
+
+  private long calculateOffset(long fileOffset, long fileCounter)
+  {
+    return ((fileCounter << 32) | (fileOffset & 0xffffffffL));
+  }
+
+  @Override
+  public byte[] store(Slice slice)
+  {
+    // logger.debug("store message ");
+    int bytesToWrite = slice.length + DATA_LENGTH_BYTE_SIZE;
+    if (currentWrittenFile < skipFile) {
+      fileWriteOffset += bytesToWrite;
+      if (fileWriteOffset >= bookKeepingFileOffset) {
+        files2Commit.add(new DataBlock(null, bookKeepingFileOffset,
+            new Path(basePath, currentWrittenFile + OFFSET_SUFFIX), currentWrittenFile));
+        currentWrittenFile++;
+        if (fileWriteOffset > bookKeepingFileOffset) {
+          fileWriteOffset = bytesToWrite;
+        } else {
+          fileWriteOffset = 0;
+        }
+        try {
+          bookKeepingFileOffset = getFlushedFileWriteOffset(
+              new Path(basePath, currentWrittenFile + BOOK_KEEPING_FILE_OFFSET));
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+      return null;
+    }
+
+    if (flushedFileCounter == currentWrittenFile && dataStream == null) {
+      currentWrittenFile++;
+      fileWriteOffset = 0;
+    }
+
+    if (flushedFileCounter == skipFile && skipFile != -1) {
+      skipFile++;
+    }
+
+    if (fileWriteOffset + bytesToWrite < blockSize) {
+      try {
+        /* write length and the actual data to the file */
+        if (fileWriteOffset == 0) {
+          // writeData(flushedCounterFile, String.valueOf(currentWrittenFile).getBytes()).close();
+          dataStream = writeData(new Path(basePath, String.valueOf(currentWrittenFile)),
+              Ints.toByteArray(slice.length));
+          dataStream.write(slice.buffer, slice.offset, slice.length);
+        } else {
+          dataStream.write(Ints.toByteArray(slice.length));
+          dataStream.write(slice.buffer, slice.offset, slice.length);
+        }
+        fileWriteOffset += bytesToWrite;
+
+        byte[] fileOffset = null;
+        if ((currentWrittenFile > skipFile) || (currentWrittenFile == skipFile && fileWriteOffset > skipOffset)) {
+          skipFile = -1;
+          fileOffset = new byte[IDENTIFIER_SIZE];
+          Server.writeLong(fileOffset, 0, calculateOffset(fileWriteOffset, currentWrittenFile));
+        }
+        return fileOffset;
+      } catch (IOException ex) {
+        logger.warn("Error while storing the bytes {}", ex.getMessage());
+        closeFs();
+        throw new RuntimeException(ex);
+      }
+    }
+    DataBlock db = new DataBlock(dataStream, fileWriteOffset,
+        new Path(basePath, currentWrittenFile + OFFSET_SUFFIX), currentWrittenFile);
+    db.close();
+    files2Commit.add(db);
+    fileWriteOffset = 0;
+    ++currentWrittenFile;
+    return store(slice);
+  }
+
+  /**
+   * @param b
+   * @param startIndex
+   * @return
+   */
+  long byteArrayToLong(byte[] b, int startIndex)
+  {
+    final byte b1 = 0;
+    return Longs.fromBytes(b1, b1, b1, b1, b[3 + startIndex], b[2 + startIndex], b[1 + startIndex], b[startIndex]);
+  }
+
+  @Override
+  public byte[] retrieve(byte[] identifier)
+  {
+    skipFile = -1;
+    skipOffset = 0;
+    logger.debug("retrieve with address {}", Arrays.toString(identifier));
+    // flushing the last incomplete flushed file
+    closeUnflushedFiles();
+
+    retrievalOffset = byteArrayToLong(identifier, 0);
+    retrievalFile = byteArrayToLong(identifier, offset);
+
+    if (retrievalFile == 0 && retrievalOffset == 0 && currentWrittenFile == 0 && fileWriteOffset == 0) {
+      skipOffset = 0;
+      return null;
+    }
+
+    // making sure that the deleted address is not requested again
+    if (retrievalFile != 0 || retrievalOffset != 0) {
+      long cleanedFile = byteArrayToLong(cleanedOffset, offset);
+      if (retrievalFile < cleanedFile || (retrievalFile == cleanedFile &&
+          retrievalOffset < byteArrayToLong(cleanedOffset, 0))) {
+        logger.warn("The address asked has been deleted retrievalFile={}, cleanedFile={}, retrievalOffset={}, " +
+            "cleanedOffset={}", retrievalFile, cleanedFile, retrievalOffset, byteArrayToLong(cleanedOffset, 0));
+        closeFs();
+        throw new IllegalArgumentException(String.format("The data for address %s has already been deleted",
+            Arrays.toString(identifier)));
+      }
+    }
+
+    // we have just started
+    if (retrievalFile == 0 && retrievalOffset == 0) {
+      retrievalFile = byteArrayToLong(cleanedOffset, offset);
+      retrievalOffset = byteArrayToLong(cleanedOffset, 0);
+    }
+
+    if ((retrievalFile > flushedFileCounter)) {
+      skipFile = retrievalFile;
+      skipOffset = retrievalOffset;
+      retrievalFile = -1;
+      return null;
+    }
+    if ((retrievalFile == flushedFileCounter && retrievalOffset >= flushedFileWriteOffset)) {
+      skipFile = retrievalFile;
+      skipOffset = retrievalOffset - flushedFileWriteOffset;
+      retrievalFile = -1;
+      return null;
+    }
+
+    try {
+      if (readStream != null) {
+        readStream.close();
+        readStream = null;
+      }
+      Path path = new Path(basePath, String.valueOf(retrievalFile));
+      if (!fs.exists(path)) {
+        retrievalFile = -1;
+        closeFs();
+        throw new RuntimeException(String.format("File %s does not exist", path.toString()));
+      }
+
+      byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
+      flushedLong = Server.readLong(flushedOffset, 0);
+      while (retrievalOffset >= flushedLong && retrievalFile < flushedFileCounter) {
+        retrievalOffset -= flushedLong;
+        retrievalFile++;
+        flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
+        flushedLong = Server.readLong(flushedOffset, 0);
+      }
+
+      if (retrievalOffset >= flushedLong) {
+        logger.warn("data not flushed for the given identifier");
+        retrievalFile = -1;
+        return null;
+      }
+      synchronized (HDFSStorage.this) {
+        if (nextReadStream != null) {
+          nextReadStream.close();
+          nextReadStream = null;
+        }
+      }
+      currentData = null;
+      path = new Path(basePath, String.valueOf(retrievalFile));
+      //readStream = new FSDataInputStream(fs.open(path));
+      currentData = readData(path);
+      //readStream.seek(retrievalOffset);
+      storageExecutor.submit(getNextStream());
+      return retrieveHelper();
+    } catch (IOException e) {
+      closeFs();
+      throw new RuntimeException(e);
+    }
+  }
+
+  private byte[] retrieveHelper() throws IOException
+  {
+    int tempRetrievalOffset = (int)retrievalOffset;
+    int length = Ints.fromBytes(currentData[tempRetrievalOffset], currentData[tempRetrievalOffset + 1],
+        currentData[tempRetrievalOffset + 2], currentData[tempRetrievalOffset + 3]);
+    byte[] data = new byte[length + IDENTIFIER_SIZE];
+    System.arraycopy(currentData, tempRetrievalOffset + 4, data, IDENTIFIER_SIZE, length);
+    retrievalOffset += length + DATA_LENGTH_BYTE_SIZE;
+    if (retrievalOffset >= flushedLong) {
+      Server.writeLong(data, 0, calculateOffset(0, retrievalFile + 1));
+    } else {
+      Server.writeLong(data, 0, calculateOffset(retrievalOffset, retrievalFile));
+    }
+    return data;
+  }
+
+  @Override
+  public byte[] retrieveNext()
+  {
+    if (retrievalFile == -1) {
+      closeFs();
+      throw new RuntimeException("Call retrieve first");
+    }
+
+    if (retrievalFile > flushedFileCounter) {
+      logger.warn("data is not flushed");
+      return null;
+    }
+
+    try {
+      if (currentData == null) {
+        synchronized (HDFSStorage.this) {
+          if (nextRetrievalData != null && (retrievalFile == nextRetrievalFile)) {
+            currentData = nextRetrievalData;
+            flushedLong = nextFlushedLong;
+            nextRetrievalData = null;
+          } else {
+            currentData = null;
+            currentData = readData(new Path(basePath, String.valueOf(retrievalFile)));
+            byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
+            flushedLong = Server.readLong(flushedOffset, 0);
+          }
+        }
+        storageExecutor.submit(getNextStream());
+      }
+
+      if (retrievalOffset >= flushedLong) {
+        retrievalFile++;
+        retrievalOffset = 0;
+
+        if (retrievalFile > flushedFileCounter) {
+          logger.warn("data is not flushed");
+          return null;
+        }
+
+        //readStream.close();
+        // readStream = new FSDataInputStream(fs.open(new Path(basePath, String.valueOf(retrievalFile))));
+        // byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
+        // flushedLong = Server.readLong(flushedOffset, 0);
+
+        synchronized (HDFSStorage.this) {
+          if (nextRetrievalData != null && (retrievalFile == nextRetrievalFile)) {
+            currentData = nextRetrievalData;
+            flushedLong = nextFlushedLong;
+            nextRetrievalData = null;
+          } else {
+            currentData = null;
+            currentData = readData(new Path(basePath, String.valueOf(retrievalFile)));
+            byte[] flushedOffset = readData(new Path(basePath, retrievalFile + OFFSET_SUFFIX));
+            flushedLong = Server.readLong(flushedOffset, 0);
+          }
+        }
+        storageExecutor.submit(getNextStream());
+      }
+      //readStream.seek(retrievalOffset);
+      return retrieveHelper();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  @SuppressWarnings("AssignmentToCollectionOrArrayFieldFromParameter")
+  public void clean(byte[] identifier)
+  {
+    logger.info("clean {}", Arrays.toString(identifier));
+    long cleanFileIndex = byteArrayToLong(identifier, offset);
+
+    long cleanFileOffset = byteArrayToLong(identifier, 0);
+    if (flushedFileCounter == -1) {
+      identifier = new byte[8];
+    } else if (cleanFileIndex > flushedFileCounter ||
+        (cleanFileIndex == flushedFileCounter && cleanFileOffset >= flushedFileWriteOffset)) {
+      // This is to make sure that we clean only the data that is flushed
+      cleanFileIndex = flushedFileCounter;
+      cleanFileOffset = flushedFileWriteOffset;
+      Server.writeLong(identifier, 0, calculateOffset(cleanFileOffset, cleanFileIndex));
+    }
+    cleanedOffset = identifier;
+
+    try {
+      writeData(cleanFileOffsetFileTemp, identifier).close();
+      fs.rename(cleanFileOffsetFileTemp, cleanFileOffsetFile);
+      if (cleanedFileCounter >= cleanFileIndex) {
+        return;
+      }
+      do {
+        Path path = new Path(basePath, String.valueOf(cleanedFileCounter));
+        if (fs.exists(path) && fs.isFile(path)) {
+          fs.delete(path, false);
+        }
+        path = new Path(basePath, cleanedFileCounter + OFFSET_SUFFIX);
+        if (fs.exists(path) && fs.isFile(path)) {
+          fs.delete(path, false);
+        }
+        path = new Path(basePath, cleanedFileCounter + BOOK_KEEPING_FILE_OFFSET);
+        if (fs.exists(path) && fs.isFile(path)) {
+          fs.delete(path, false);
+        }
+        logger.info("deleted file {}", cleanedFileCounter);
+        ++cleanedFileCounter;
+      } while (cleanedFileCounter < cleanFileIndex);
+      // writeData(cleanFileCounterFile, String.valueOf(cleanedFileCounter).getBytes()).close();
+
+    } catch (IOException e) {
+      logger.warn("not able to close the streams {}", e.getMessage());
+      closeFs();
+      throw new RuntimeException(e);
+    }
+  }
+
+  /**
+   * This is used mainly for cleaning up of counter files created
+   */
+  void cleanHelperFiles()
+  {
+    try {
+      fs.delete(basePath, true);
+    } catch (IOException e) {
+      logger.warn(e.getMessage());
+    }
+  }
+
+  private void closeUnflushedFiles()
+  {
+    try {
+      files2Commit.clear();
+      // closing the stream
+      if (dataStream != null) {
+        dataStream.close();
+        dataStream = null;
+        // currentWrittenFile++;
+        // fileWriteOffset = 0;
+      }
+
+      if (!fs.exists(new Path(basePath, currentWrittenFile + OFFSET_SUFFIX))) {
+        fs.delete(new Path(basePath, String.valueOf(currentWrittenFile)), false);
+      }
+
+      if (fs.exists(new Path(basePath, flushedFileCounter + OFFSET_SUFFIX))) {
+        // This means that flush was called
+        flushedFileWriteOffset = getFlushedFileWriteOffset(new Path(basePath, flushedFileCounter + OFFSET_SUFFIX));
+        bookKeepingFileOffset = getFlushedFileWriteOffset(
+            new Path(basePath, flushedFileCounter + BOOK_KEEPING_FILE_OFFSET));
+      }
+
+      if (flushedFileCounter != -1) {
+        currentWrittenFile = flushedFileCounter;
+        fileWriteOffset = flushedFileWriteOffset;
+      } else {
+        currentWrittenFile = 0;
+        fileWriteOffset = 0;
+      }
+
+      flushedLong = 0;
+
+    } catch (IOException e) {
+      closeFs();
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  public void flush()
+  {
+    nextReadStream = null;
+    StringBuilder builder = new StringBuilder();
+    Iterator<DataBlock> itr = files2Commit.iterator();
+    DataBlock db;
+    try {
+      while (itr.hasNext()) {
+        db = itr.next();
+        db.updateOffsets();
+        builder.append(db.fileName).append(", ");
+      }
+      files2Commit.clear();
+
+      if (dataStream != null) {
+        dataStream.hflush();
+        writeData(flushedCounterFileTemp, String.valueOf(currentWrittenFile).getBytes()).close();
+        fs.rename(flushedCounterFileTemp, flushedCounterFile);
+        updateFlushedOffset(new Path(basePath, currentWrittenFile + OFFSET_SUFFIX), fileWriteOffset);
+        flushedFileWriteOffset = fileWriteOffset;
+        builder.append(currentWrittenFile);
+      }
+      logger.debug("flushed files {}", builder.toString());
+    } catch (IOException ex) {
+      logger.warn("not able to close the stream {}", ex.getMessage());
+      closeFs();
+      throw new RuntimeException(ex);
+    }
+    flushedFileCounter = currentWrittenFile;
+    // logger.debug("flushedFileCounter in flush {}",flushedFileCounter);
+  }
+
+  /**
+   * This updates the flushed offset
+   */
+  private void updateFlushedOffset(Path file, long bytesWritten)
+  {
+    byte[] lastStoredOffset = new byte[IDENTIFIER_SIZE];
+    Server.writeLong(lastStoredOffset, 0, bytesWritten);
+    try {
+      writeData(file, lastStoredOffset).close();
+    } catch (IOException e) {
+      try {
+        if (!Arrays.equals(readData(file), lastStoredOffset)) {
+          closeFs();
+          throw new RuntimeException(e);
+        }
+      } catch (Exception e1) {
+        closeFs();
+        throw new RuntimeException(e1);
+      }
+    }
+  }
+
+  public int getBlockSizeMultiple()
+  {
+    return blockSizeMultiple;
+  }
+
+  public void setBlockSizeMultiple(int blockSizeMultiple)
+  {
+    this.blockSizeMultiple = blockSizeMultiple;
+  }
+
+  /**
+   * @return the baseDir
+   */
+  public String getBaseDir()
+  {
+    return baseDir;
+  }
+
+  /**
+   * @param baseDir the baseDir to set
+   */
+  public void setBaseDir(String baseDir)
+  {
+    this.baseDir = baseDir;
+  }
+
+  /**
+   * @return the id
+   */
+  public String getId()
+  {
+    return id;
+  }
+
+  /**
+   * @param id the id to set
+   */
+  public void setId(String id)
+  {
+    this.id = id;
+  }
+
+  /**
+   * @return the blockSize
+   */
+  public long getBlockSize()
+  {
+    return blockSize;
+  }
+
+  /**
+   * @param blockSize the blockSize to set
+   */
+  public void setBlockSize(long blockSize)
+  {
+    this.blockSize = blockSize;
+  }
+
+  /**
+   * @return the restore
+   */
+  public boolean isRestore()
+  {
+    return restore;
+  }
+
+  /**
+   * @param restore the restore to set
+   */
+  public void setRestore(boolean restore)
+  {
+    this.restore = restore;
+  }
+
+  class DataBlock
+  {
+    FSDataOutputStream dataStream;
+    long dataOffset;
+    Path path2FlushedData;
+    long fileName;
+    private Path bookKeepingPath;
+
+    DataBlock(FSDataOutputStream stream, long bytesWritten, Path path2FlushedData, long fileName)
+    {
+      this.dataStream = stream;
+      this.dataOffset = bytesWritten;
+      this.path2FlushedData = path2FlushedData;
+      this.fileName = fileName;
+    }
+
+    public void close()
+    {
+      if (dataStream != null) {
+        try {
+          dataStream.close();
+          bookKeepingPath = new Path(basePath, fileName + BOOK_KEEPING_FILE_OFFSET);
+          updateFlushedOffset(bookKeepingPath, dataOffset);
+        } catch (IOException ex) {
+          logger.warn("not able to close the stream {}", ex.getMessage());
+          closeFs();
+          throw new RuntimeException(ex);
+        }
+      }
+    }
+
+    public void updateOffsets() throws IOException
+    {
+      updateFlushedOffset(path2FlushedData, dataOffset);
+      if (bookKeepingPath != null && fs.exists(bookKeepingPath)) {
+        fs.delete(bookKeepingPath, false);
+      }
+    }
+
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(HDFSStorage.class);
+
+  @Override
+  public void setup(com.datatorrent.api.Context context)
+  {
+    Configuration conf = new Configuration();
+    if (baseDir == null) {
+      baseDir = conf.get("hadoop.tmp.dir");
+      if (baseDir == null || baseDir.isEmpty()) {
+        throw new IllegalArgumentException("baseDir cannot be null.");
+      }
+    }
+    offset = 4;
+    skipOffset = -1;
+    skipFile = -1;
+    int tempRetryCount = 0;
+    while (tempRetryCount < retryCount && fs == null) {
+      try {
+        fs = FileSystem.newInstance(conf);
+        tempRetryCount++;
+      } catch (Throwable throwable) {
+        logger.warn("Not able to get file system ", throwable);
+      }
+    }
+
+    try {
+      Path path = new Path(baseDir);
+      basePath = new Path(path, id);
+      if (fs == null) {
+        fs = FileSystem.newInstance(conf);
+      }
+      if (!fs.exists(path)) {
+        closeFs();
+        throw new RuntimeException(String.format("baseDir passed (%s) doesn't exist.", baseDir));
+      }
+      if (!fs.isDirectory(path)) {
+        closeFs();
+        throw new RuntimeException(String.format("baseDir passed (%s) is not a directory.", baseDir));
+      }
+      if (!restore) {
+        fs.delete(basePath, true);
+      }
+      if (!fs.exists(basePath) || !fs.isDirectory(basePath)) {
+        fs.mkdirs(basePath);
+      }
+
+      if (blockSize == 0) {
+        blockSize = fs.getDefaultBlockSize(new Path(basePath, "tempData"));
+      }
+      if (blockSize == 0) {
+        blockSize = DEFAULT_BLOCK_SIZE;
+      }
+
+      blockSize = blockSizeMultiple * blockSize;
+
+      currentWrittenFile = 0;
+      cleanedFileCounter = -1;
+      retrievalFile = -1;
+      // fileCounterFile = new Path(basePath, IDENTITY_FILE);
+      flushedFileCounter = -1;
+      // cleanFileCounterFile = new Path(basePath, CLEAN_FILE);
+      cleanFileOffsetFile = new Path(basePath, CLEAN_OFFSET_FILE);
+      cleanFileOffsetFileTemp = new Path(basePath, CLEAN_OFFSET_FILE_TEMP);
+      flushedCounterFile = new Path(basePath, FLUSHED_IDENTITY_FILE);
+      flushedCounterFileTemp = new Path(basePath, FLUSHED_IDENTITY_FILE_TEMP);
+
+      if (restore) {
+        //
+        // if (fs.exists(fileCounterFile) && fs.isFile(fileCounterFile)) {
+        // //currentWrittenFile = Long.valueOf(new String(readData(fileCounterFile)));
+        // }
+
+        if (fs.exists(cleanFileOffsetFile) && fs.isFile(cleanFileOffsetFile)) {
+          cleanedOffset = readData(cleanFileOffsetFile);
+        }
+
+        if (fs.exists(flushedCounterFile) && fs.isFile(flushedCounterFile)) {
+          String strFlushedFileCounter = new String(readData(flushedCounterFile));
+          if (strFlushedFileCounter.isEmpty()) {
+            logger.warn("empty flushed file");
+          } else {
+            flushedFileCounter = Long.valueOf(strFlushedFileCounter);
+            flushedFileWriteOffset = getFlushedFileWriteOffset(new Path(basePath, flushedFileCounter + OFFSET_SUFFIX));
+            bookKeepingFileOffset = getFlushedFileWriteOffset(
+                new Path(basePath, flushedFileCounter + BOOK_KEEPING_FILE_OFFSET));
+          }
+
+        }
+      }
+      fileWriteOffset = flushedFileWriteOffset;
+      currentWrittenFile = flushedFileCounter;
+      cleanedFileCounter = byteArrayToLong(cleanedOffset, offset) - 1;
+      if (currentWrittenFile == -1) {
+        ++currentWrittenFile;
+        fileWriteOffset = 0;
+      }
+
+    } catch (IOException io) {
+
+      throw new RuntimeException(io);
+    }
+    storageExecutor = Executors.newSingleThreadExecutor(new NameableThreadFactory("StorageHelper"));
+  }
+
+  private void closeFs()
+  {
+    if (fs != null) {
+      try {
+        fs.close();
+        fs = null;
+      } catch (IOException e) {
+        logger.debug(e.getMessage());
+      }
+    }
+  }
+
+  private long getFlushedFileWriteOffset(Path filePath) throws IOException
+  {
+    if (flushedFileCounter != -1 && fs.exists(filePath)) {
+      byte[] flushedFileOffsetByte = readData(filePath);
+      if (flushedFileOffsetByte != null && flushedFileOffsetByte.length == 8) {
+        return Server.readLong(flushedFileOffsetByte, 0);
+      }
+    }
+    return 0;
+  }
+
+  @Override
+  public void teardown()
+  {
+    logger.debug("called teardown");
+    try {
+      if (readStream != null) {
+        readStream.close();
+      }
+      synchronized (HDFSStorage.this) {
+        if (nextReadStream != null) {
+          nextReadStream.close();
+        }
+      }
+
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    } finally {
+      closeUnflushedFiles();
+      storageExecutor.shutdown();
+    }
+
+  }
+
+  private Runnable getNextStream()
+  {
+    return new Runnable()
+    {
+      @Override
+      public void run()
+      {
+        try {
+          synchronized (HDFSStorage.this) {
+            nextRetrievalFile = retrievalFile + 1;
+            if (nextRetrievalFile > flushedFileCounter) {
+              nextRetrievalData = null;
+              return;
+            }
+            Path path = new Path(basePath, String.valueOf(nextRetrievalFile));
+            Path offsetPath = new Path(basePath, nextRetrievalFile + OFFSET_SUFFIX);
+            nextRetrievalData = null;
+            nextRetrievalData = readData(path);
+            byte[] flushedOffset = readData(offsetPath);
+            nextFlushedLong = Server.readLong(flushedOffset, 0);
+          }
+        } catch (Throwable e) {
+          logger.warn("in storage executor ", e);
+
+        }
+      }
+    };
+  }
+
+}
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/44326514/flume/src/main/java/com/datatorrent/flume/storage/Storage.java
----------------------------------------------------------------------
diff --git a/flume/src/main/java/com/datatorrent/flume/storage/Storage.java b/flume/src/main/java/com/datatorrent/flume/storage/Storage.java
new file mode 100644
index 0000000..9f3a010
--- /dev/null
+++ b/flume/src/main/java/com/datatorrent/flume/storage/Storage.java
@@ -0,0 +1,73 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.storage;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * <p>Storage interface.</p>
+ *
+ * @author Gaurav Gupta  <ga...@datatorrent.com>
+ * @since 0.9.2
+ */
+public interface Storage
+{
+  /**
+   * key in the context for Unique identifier for the storage which may be used to recover from failure.
+   */
+  String ID = "id";
+
+  /**
+   * This stores the bytes and returns the unique identifier to retrieve these bytes
+   *
+   * @param bytes
+   * @return
+   */
+  byte[] store(Slice bytes);
+
+  /**
+   * This returns the data bytes for the current identifier and the identifier for next data bytes. <br/>
+   * The first eight bytes contain the identifier and the remaining bytes contain the data
+   *
+   * @param identifier
+   * @return
+   */
+  byte[] retrieve(byte[] identifier);
+
+  /**
+   * This returns data bytes and the identifier for the next data bytes. The identifier for current data bytes is based
+   * on the retrieve method call and number of retrieveNext method calls after retrieve method call. <br/>
+   * The first eight bytes contain the identifier and the remaining bytes contain the data
+   *
+   * @return
+   */
+  byte[] retrieveNext();
+
+  /**
+   * This is used to clean up the files identified by identifier
+   *
+   * @param identifier
+   */
+  void clean(byte[] identifier);
+
+  /**
+   * This flushes the data from stream
+   *
+   */
+  void flush();
+
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/44326514/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java
new file mode 100644
index 0000000..05eeb4e
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageMatching.java
@@ -0,0 +1,109 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.storage;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.primitives.Ints;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * @author Gaurav Gupta  <ga...@datatorrent.com>
+ */
+public class HDFSStorageMatching
+{
+
+  public static void main(String[] args)
+  {
+    HDFSStorage storage = new HDFSStorage();
+    storage.setBaseDir(args[0]);
+    storage.setId(args[1]);
+    storage.setRestore(true);
+    storage.setup(null);
+    int count = 100000000;
+
+    logger.debug(" start time {}", System.currentTimeMillis());
+    int index = 10000;
+    byte[] b = Ints.toByteArray(index);
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    logger.debug(" end time {}", System.currentTimeMillis());
+    logger.debug(" start time for retrieve {}", System.currentTimeMillis());
+    b = storage.retrieve(new byte[8]);
+    int org_index = index;
+    index = 10000;
+    match(b, index);
+    while (true) {
+      index++;
+      b = storage.retrieveNext();
+      if (b == null) {
+        logger.debug(" end time for retrieve {}/{}/{}", System.currentTimeMillis(), index, org_index);
+        return;
+      } else {
+        if (!match(b, index)) {
+          throw new RuntimeException("failed : " + index);
+        }
+      }
+    }
+
+  }
+
+  public static boolean match(byte[] data, int match)
+  {
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    int dataR = Ints.fromByteArray(tempData);
+    //logger.debug("input: {}, output: {}",match,dataR);
+    if (match == dataR) {
+      return true;
+    }
+    return false;
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(HDFSStorageMatching.class);
+}
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/44326514/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java
new file mode 100644
index 0000000..394ce0e
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformance.java
@@ -0,0 +1,85 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.storage;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * @author Gaurav Gupta  <ga...@datatorrent.com>
+ */
+public class HDFSStoragePerformance
+{
+
+  public static void main(String[] args)
+  {
+    HDFSStorage storage = new HDFSStorage();
+    storage.setBaseDir(".");
+    storage.setId("gaurav_flume_1");
+    storage.setRestore(true);
+    storage.setup(null);
+    int count = 1000000;
+
+    logger.debug(" start time {}", System.currentTimeMillis());
+    int index = 10000;
+    byte[] b = new byte[1024];
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+    }
+    storage.flush();
+    logger.debug(" end time {}", System.currentTimeMillis());
+    logger.debug(" start time for retrieve {}", System.currentTimeMillis());
+    storage.retrieve(new byte[8]);
+    String inputData = new String(b);
+    index = 1;
+    while (true) {
+      b = storage.retrieveNext();
+      if (b == null) {
+        logger.debug(" end time for retrieve {}", System.currentTimeMillis());
+        return;
+      } else {
+        if (!match(b, inputData)) {
+          throw new RuntimeException("failed : " + index);
+        }
+      }
+
+      index++;
+    }
+
+  }
+
+  public static boolean match(byte[] data, String match)
+  {
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+//    logger.debug("input: {}, output: {}",match,new String(tempData));
+    return (match.equals(new String(tempData)));
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(HDFSStoragePerformance.class);
+}
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/44326514/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java
new file mode 100644
index 0000000..08476c2
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java
@@ -0,0 +1,112 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.storage;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.primitives.Ints;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * <p>HDFSStoragePerformanceTest class.</p>
+ *
+ * @author Gaurav Gupta  <ga...@datatorrent.com>
+ * @since 1.0.1
+ */
+public class HDFSStoragePerformanceTest
+{
+
+  public static void main(String[] args)
+  {
+    HDFSStorage storage = new HDFSStorage();
+    storage.setBaseDir(args[0]);
+    storage.setId(args[1]);
+    storage.setRestore(true);
+    storage.setup(null);
+    int count = 100000000;
+
+    logger.debug(" start time {}", System.currentTimeMillis());
+    int index = 10000;
+    byte[] b = Ints.toByteArray(index);
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    logger.debug(" end time {}", System.currentTimeMillis());
+    logger.debug(" start time for retrieve {}", System.currentTimeMillis());
+    b = storage.retrieve(new byte[8]);
+    int org_index = index;
+    index = 10000;
+    match(b, index);
+    while (true) {
+      index++;
+      b = storage.retrieveNext();
+      if (b == null) {
+        logger.debug(" end time for retrieve {}/{}/{}", System.currentTimeMillis(), index, org_index);
+        return;
+      } else {
+        if (!match(b, index)) {
+          throw new RuntimeException("failed : " + index);
+        }
+      }
+    }
+
+  }
+
+  public static boolean match(byte[] data, int match)
+  {
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    int dataR = Ints.fromByteArray(tempData);
+    //logger.debug("input: {}, output: {}",match,dataR);
+    if (match == dataR) {
+      return true;
+    }
+    return false;
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(HDFSStoragePerformanceTest.class);
+}
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/44326514/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java
new file mode 100644
index 0000000..b348c8f
--- /dev/null
+++ b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java
@@ -0,0 +1,693 @@
+/**
+ * Copyright (c) 2016 DataTorrent, Inc. ALL Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.flume.storage;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestWatcher;
+import org.junit.runner.Description;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.flume.Context;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * @author Gaurav Gupta <ga...@datatorrent.com>
+ */
+public class HDFSStorageTest
+{
+  public static class TestMeta extends TestWatcher
+  {
+    public String baseDir;
+    public String testFile;
+    private String testData = "No and yes. There is also IdleTimeHandler that allows the operator to emit tuples. " +
+        "There is overlap, why not have a single interface. \n" +
+        "Also consider the possibility of an operator that does other processing and not consume nor emit tuples,";
+
+    @Override
+    protected void starting(org.junit.runner.Description description)
+    {
+      String className = description.getClassName();
+      baseDir = "target/" + className;
+      try {
+        baseDir = (new File(baseDir)).getAbsolutePath();
+        FileUtils.forceMkdir(new File(baseDir));
+        testFile = baseDir + "/testInput.txt";
+        FileOutputStream outputStream = FileUtils.openOutputStream(new File(testFile));
+        outputStream.write(testData.getBytes());
+        outputStream.close();
+
+      } catch (IOException ex) {
+        throw new RuntimeException(ex);
+      }
+    }
+
+    @Override
+    protected void finished(Description description)
+    {
+      try {
+        FileUtils.deleteDirectory(new File(baseDir));
+      } catch (IOException ex) {
+        throw new RuntimeException(ex);
+      }
+    }
+  }
+
+  @Rule
+  public TestMeta testMeta = new TestMeta();
+
+  private String STORAGE_DIRECTORY;
+
+  private HDFSStorage getStorage(String id, boolean restore)
+  {
+    Context ctx = new Context();
+    STORAGE_DIRECTORY = testMeta.baseDir;
+    ctx.put(HDFSStorage.BASE_DIR_KEY, testMeta.baseDir);
+    ctx.put(HDFSStorage.RESTORE_KEY, Boolean.toString(restore));
+    ctx.put(HDFSStorage.ID, id);
+    ctx.put(HDFSStorage.BLOCKSIZE, "256");
+    HDFSStorage lstorage = new HDFSStorage();
+    lstorage.configure(ctx);
+    lstorage.setup(null);
+    return lstorage;
+  }
+
+  private HDFSStorage storage;
+
+  @Before
+  public void setup()
+  {
+    storage = getStorage("1", false);
+  }
+
+  @After
+  public void teardown()
+  {
+    storage.teardown();
+    try {
+      Thread.sleep(100);
+    } catch (InterruptedException e) {
+      throw new RuntimeException(e);
+    }
+    storage.cleanHelperFiles();
+  }
+
+  /**
+   * This test covers following use case 1. Some data is stored 2. File is flush but the file is not close 3. Some more
+   * data is stored but the file doesn't roll-overs 4. Retrieve is called for the last returned address and it return
+   * nulls 5. Some more data is stored again but the address is returned null because of previous retrieve call
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlush() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = "ab".getBytes();
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    b = "cb".getBytes();
+    byte[] addr = storage.store(new Slice(b, 0, b.length));
+    match(storage.retrieve(new byte[8]), "ab");
+    Assert.assertNull(storage.retrieve(addr));
+    Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    storage.flush();
+    match(storage.retrieve(address), "cb");
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+  }
+
+  /**
+   * This test covers following use case 1. Some data is stored to make sure that there is no roll over 2. File is
+   * flushed but the file is not closed 3. Some more data is stored. The data stored is enough to make the file roll
+   * over 4. Retrieve is called for the last returned address and it return nulls as the data is not flushed 5. Some
+   * more data is stored again but the address is returned null because of previous retrieve call 6. The data is flushed
+   * to make sure that the data is committed. 7. Now the data is retrieved from the starting and data returned matches
+   * the data stored
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushRollOver() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53, 52,
+        51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49, 49,
+        45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49, 54,
+        49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49, 53,
+        52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48, 1,
+        48, 46, 48, 1, 48, 46, 48};
+    byte[] b_org = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
+        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
+        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
+        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
+        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
+        1, 48, 46, 48, 1, 48, 46, 48};
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    byte[] addr = null;
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      addr = storage.store(new Slice(b, 0, b.length));
+    }
+    Assert.assertNull(storage.retrieve(addr));
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    }
+    storage.flush();
+    match(storage.retrieve(new byte[8]), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieve(address), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+
+  }
+
+  /**
+   * This test covers following use case 1. Some data is stored to make sure that there is no roll over 2. File is
+   * flushed but the file is not closed 3. Some more data is stored. The data stored is enough to make the file roll
+   * over 4. The storage crashes and new storage is instiated. 5. Retrieve is called for the last returned address and
+   * it return nulls as the data is not flushed 6. Some more data is stored again but the address is returned null
+   * because of previous retrieve call 7. The data is flushed to make sure that the data is committed. 8. Now the data
+   * is retrieved from the starting and data returned matches the data stored
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushRollOverWithFailure() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53, 52,
+        51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49, 49,
+        45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49, 54,
+        49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49, 53,
+        52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48, 1,
+        48, 46, 48, 1, 48, 46, 48};
+    byte[] b_org = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
+        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
+        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
+        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
+        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
+        1, 48, 46, 48, 1, 48, 46, 48};
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    byte[] addr = null;
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      addr = storage.store(new Slice(b, 0, b.length));
+    }
+    storage = getStorage("1", true);
+    Assert.assertNull(storage.retrieve(addr));
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    }
+    storage.flush();
+    match(storage.retrieve(new byte[8]), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieve(address), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+
+  }
+
+  /**
+   * This tests clean when the file doesn't roll over
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushWithClean() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = "ab".getBytes();
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    storage.clean(address);
+    b = "cb".getBytes();
+    byte[] addr = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNull(storage.retrieve(addr));
+    Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    storage.flush();
+    match(storage.retrieve(new byte[8]), "cb");
+    match(storage.retrieve(address), "cb");
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+  }
+
+  /**
+   * This tests clean when the file doesn't roll over
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushWithCleanAndFailure() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = "ab".getBytes();
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    storage.clean(address);
+    b = "cb".getBytes();
+    byte[] addr = storage.store(new Slice(b, 0, b.length));
+    storage = getStorage("1", true);
+    Assert.assertNull(storage.retrieve(addr));
+    Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    storage.flush();
+    match(storage.retrieve(new byte[8]), "cb");
+    match(storage.retrieve(address), "cb");
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+  }
+
+  /**
+   * This test covers following use case 1. Some data is stored to make sure that there is no roll over 2. File is
+   * flushed but the file is not closed 3. The data is cleaned till the last returned address 4. Some more data is
+   * stored. The data stored is enough to make the file roll over 5. Retrieve is called for the last returned address
+   * and it return nulls as the data is not flushed 6. Some more data is stored again but the address is returned null
+   * because of previous retrieve call 7. The data is flushed to make sure that the data is committed. 8. Now the data
+   * is retrieved from the starting and data returned matches the data stored
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushWithCleanAndRollOver() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53, 52,
+        51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49, 49,
+        45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49, 54,
+        49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49, 53,
+        52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48, 1,
+        48, 46, 48, 1, 48, 46, 48};
+    byte[] b_org = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
+        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
+        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
+        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
+        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
+        1, 48, 46, 48, 1, 48, 46, 48};
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    storage.clean(address);
+
+    byte[] addr = null;
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      addr = storage.store(new Slice(b, 0, b.length));
+    }
+    Assert.assertNull(storage.retrieve(addr));
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    }
+    storage.flush();
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieve(new byte[8]), new String(b_org));
+    match(storage.retrieve(address), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+
+  }
+
+  /**
+   * This tests the clean when the files are roll-over and the storage fails
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushWithCleanAndRollOverAndFailure() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53, 52,
+        51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49, 49,
+        45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49, 54,
+        49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49, 53,
+        52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48, 1,
+        48, 46, 48, 1, 48, 46, 48};
+    byte[] b_org = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
+        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
+        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
+        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
+        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
+        1, 48, 46, 48, 1, 48, 46, 48};
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    storage.clean(address);
+    byte[] addr = null;
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      addr = storage.store(new Slice(b, 0, b.length));
+    }
+    storage = getStorage("1", true);
+    Assert.assertNull(storage.retrieve(addr));
+    for (int i = 0; i < 5; i++) {
+      b[0] = (byte)(b[0] + 1);
+      Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    }
+    storage.flush();
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieve(address), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+    b_org[0] = (byte)(b_org[0] + 1);
+    match(storage.retrieveNext(), new String(b_org));
+
+  }
+
+  /**
+   * This test covers following use case The file is flushed and then more data is written to the same file, but the new
+   * data is not flushed and file is not roll over and storage fails The new storage comes up and client asks for data
+   * at the last returned address from earlier storage instance. The new storage returns null. Client stores the data
+   * again but the address returned this time is null and the retrieval of the earlier address now returns data
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testPartialFlushWithFailure() throws Exception
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = "ab".getBytes();
+    byte[] address = storage.store(new Slice(b, 0, b.length));
+    Assert.assertNotNull(address);
+    storage.flush();
+    b = "cb".getBytes();
+    byte[] addr = storage.store(new Slice(b, 0, b.length));
+    storage = getStorage("1", true);
+    Assert.assertNull(storage.retrieve(addr));
+    Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
+    storage.flush();
+    match(storage.retrieve(address), "cb");
+  }
+
+  private void match(byte[] data, String match)
+  {
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    Assert.assertEquals("matched the stored value with retrieved value", match, new String(tempData));
+  }
+
+  @Test
+  public void testStorage() throws IOException
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = new byte[200];
+    byte[] identifier;
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+    storage.flush();
+    byte[] data = storage.retrieve(new byte[8]);
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+    identifier = storage.store(new Slice(b, 0, b.length));
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    Assert.assertEquals("matched the stored value with retrieved value", new String(b), new String(tempData));
+    Assert.assertNull(storage.retrieve(identifier));
+  }
+
+  @Test
+  public void testStorageWithRestore() throws IOException
+  {
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = new byte[200];
+    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
+    storage.flush();
+    storage.teardown();
+
+    storage = getStorage("1", true);
+    storage.store(new Slice(b, 0, b.length));
+    storage.flush();
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+    boolean exists = fs.exists(new Path(STORAGE_DIRECTORY + "/1/" + "1"));
+    Assert.assertEquals("file should exist", true, exists);
+  }
+
+  @Test
+  public void testCleanup() throws IOException
+  {
+    RandomAccessFile r = new RandomAccessFile(testMeta.testFile, "r");
+    r.seek(0);
+    byte[] b = r.readLine().getBytes();
+    storage.store(new Slice(b, 0, b.length));
+    byte[] val = storage.store(new Slice(b, 0, b.length));
+    storage.flush();
+    storage.clean(val);
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+    boolean exists = fs.exists(new Path(STORAGE_DIRECTORY + "/" + "0"));
+    Assert.assertEquals("file should not exist", false, exists);
+    r.close();
+  }
+
+  @Test
+  public void testNext() throws IOException
+  {
+    RandomAccessFile r = new RandomAccessFile(testMeta.testFile, "r");
+    r.seek(0);
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    byte[] b = r.readLine().getBytes();
+    storage.store(new Slice(b, 0, b.length));
+    byte[] b1 = r.readLine().getBytes();
+    storage.store(new Slice(b1, 0, b1.length));
+    storage.store(new Slice(b, 0, b.length));
+    storage.flush();
+    storage.store(new Slice(b1, 0, b1.length));
+    storage.store(new Slice(b, 0, b.length));
+    storage.flush();
+    byte[] data = storage.retrieve(new byte[8]);
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    Assert.assertEquals("matched the stored value with retrieved value", new String(b), new String(tempData));
+    data = storage.retrieveNext();
+    tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    Assert.assertEquals("matched the stored value with retrieved value", new String(b1), new String(tempData));
+    data = storage.retrieveNext();
+    tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    Assert.assertEquals("matched the stored value with retrieved value", new String(b), new String(tempData));
+    r.close();
+  }
+
+  @Test
+  public void testFailure() throws IOException
+  {
+    byte[] address;
+    byte[] b = new byte[200];
+    storage.retrieve(new byte[8]);
+    for (int i = 0; i < 5; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      address = storage.store(new Slice(b, 0, b.length));
+      storage.flush();
+      storage.clean(address);
+    }
+    storage.teardown();
+
+    byte[] identifier = new byte[8];
+    storage = getStorage("1", true);
+
+    storage.retrieve(identifier);
+
+    storage.store(new Slice(b, 0, b.length));
+    storage.store(new Slice(b, 0, b.length));
+    storage.store(new Slice(b, 0, b.length));
+    storage.flush();
+    byte[] data = storage.retrieve(identifier);
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    Assert.assertEquals("matched the stored value with retrieved value", new String(b), new String(tempData));
+  }
+
+  /**
+   * This test case tests the clean call before any flush is called.
+   *
+   * @throws IOException
+   */
+  @Test
+  public void testCleanUnflushedData() throws IOException
+  {
+    for (int i = 0; i < 5; i++) {
+      final byte[] bytes = (i + "").getBytes();
+      storage.store(new Slice(bytes, 0, bytes.length));
+    }
+    storage.clean(new byte[8]);
+    storage.flush();
+    match(storage.retrieve(new byte[8]), "0");
+    match(storage.retrieveNext(), "1");
+  }
+
+  @Test
+  public void testCleanForUnflushedData() throws IOException
+  {
+    byte[] address = null;
+    byte[] b = new byte[200];
+    storage.retrieve(new byte[8]);
+    for (int i = 0; i < 5; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      address = storage.store(new Slice(b, 0, b.length));
+      storage.flush();
+      // storage.clean(address);
+    }
+    byte[] lastWrittenAddress = null;
+    for (int i = 0; i < 5; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      lastWrittenAddress = storage.store(new Slice(b, 0, b.length));
+    }
+    storage.clean(lastWrittenAddress);
+    byte[] cleanedOffset = storage.readData(new Path(STORAGE_DIRECTORY + "/1/cleanoffsetFile"));
+    Assert.assertArrayEquals(address, cleanedOffset);
+
+  }
+
+  @Test
+  public void testCleanForFlushedData() throws IOException
+  {
+    byte[] b = new byte[200];
+    storage.retrieve(new byte[8]);
+    for (int i = 0; i < 5; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      storage.store(new Slice(b, 0, b.length));
+      storage.flush();
+      // storage.clean(address);
+    }
+    byte[] lastWrittenAddress = null;
+    for (int i = 0; i < 5; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      lastWrittenAddress = storage.store(new Slice(b, 0, b.length));
+    }
+    storage.flush();
+    storage.clean(lastWrittenAddress);
+    byte[] cleanedOffset = storage.readData(new Path(STORAGE_DIRECTORY + "/1/cleanoffsetFile"));
+    Assert.assertArrayEquals(lastWrittenAddress, cleanedOffset);
+
+  }
+
+  @Test
+  public void testCleanForPartialFlushedData() throws IOException
+  {
+    byte[] b = new byte[8];
+    storage.retrieve(new byte[8]);
+
+    storage.store(new Slice(b, 0, b.length));
+    byte[] bytes = "1a".getBytes();
+    byte[] address = storage.store(new Slice(bytes, 0, bytes.length));
+    storage.flush();
+    storage.clean(address);
+
+    byte[] lastWrittenAddress = null;
+    for (int i = 0; i < 5; i++) {
+      final byte[] bytes1 = (i + "").getBytes();
+      storage.store(new Slice(bytes1, 0, bytes1.length));
+      lastWrittenAddress = storage.store(new Slice(b, 0, b.length));
+    }
+    Assert.assertNull(storage.retrieve(new byte[8]));
+    Assert.assertNull(storage.retrieve(lastWrittenAddress));
+    storage.store(new Slice(b, 0, b.length));
+    storage.flush();
+    Assert.assertNull(storage.retrieve(lastWrittenAddress));
+  }
+
+  @Test
+  public void testRandomSequence() throws IOException
+  {
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    byte[] bytes = new byte[]{48, 48, 48, 51, 101, 100, 55, 56, 55, 49, 53, 99, 52, 101, 55, 50, 97, 52, 48, 49, 51,
+        99, 97, 54, 102, 57, 55, 53, 57, 100, 49, 99, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51,
+        45, 49, 49, 45, 48, 55, 32, 48, 48, 58, 48, 48, 58, 52, 54, 1, 52, 50, 49, 50, 51, 1, 50, 1, 49, 53, 49, 49,
+        52, 50, 54, 53, 1, 49, 53, 49, 49, 57, 51, 53, 49, 1, 49, 53, 49, 50, 57, 56, 50, 52, 1, 49, 53, 49, 50, 49,
+        55, 48, 55, 1, 49, 48, 48, 55, 55, 51, 57, 51, 1, 49, 57, 49, 52, 55, 50, 53, 52, 54, 49, 1, 49, 1, 48, 1, 48,
+        46, 48, 1, 48, 46, 48, 1, 48, 46, 48};
+    storage.store(new Slice(bytes, 0, bytes.length));
+    storage.flush();
+    storage.clean(new byte[]{-109, 0, 0, 0, 0, 0, 0, 0});
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 2555; i++) {
+      byte[] bytes1 = new byte[]{48, 48, 48, 55, 56, 51, 98, 101, 50, 54, 50, 98, 52, 102, 50, 54, 56, 97, 55, 56, 102,
+          48, 54, 54, 50, 49, 49, 54, 99, 98, 101, 99, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51,
+          45, 49, 49, 45, 48, 55, 32, 48, 48, 58, 48, 48, 58, 53, 49, 1, 49, 49, 49, 49, 54, 51, 57, 1, 50, 1, 49, 53,
+          49, 48, 57, 57, 56, 51, 1, 49, 53, 49, 49, 49, 55, 48, 52, 1, 49, 53, 49, 50, 49, 51, 55, 49, 1, 49, 53, 49,
+          49, 52, 56, 51, 49, 1, 49, 48, 48, 55, 49, 57, 56, 49, 1, 49, 50, 48, 50, 55, 54, 49, 54, 56, 53, 1, 49, 1,
+          48, 1, 48, 46, 48, 1, 48, 46, 48, 1, 48, 46, 48};
+      storage.store(new Slice(bytes1, 0, bytes1.length));
+      storage.flush();
+    }
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 1297; i++) {
+      storage.retrieveNext();
+    }
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 1302; i++) {
+      storage.retrieveNext();
+    }
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 1317; i++) {
+      storage.retrieveNext();
+    }
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 2007; i++) {
+      storage.retrieveNext();
+    }
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 2556; i++) {
+      storage.retrieveNext();
+    }
+    byte[] bytes1 = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
+        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
+        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
+        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
+        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
+        1, 48, 46, 48, 1, 48, 46, 48};
+    storage.store(new Slice(bytes1, 0, bytes1.length));
+    storage.flush();
+    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
+    for (int i = 0; i < 2062; i++) {
+      storage.retrieveNext();
+
+    }
+  }
+
+  @SuppressWarnings("unused")
+  private static final Logger logger = LoggerFactory.getLogger(HDFSStorageTest.class);
+}


[03/13] apex-malhar git commit: Changed package path for files to be included under malhar. Modifications to build files for project to build under malhar.

Posted by th...@apache.org.
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java
deleted file mode 100644
index 97e9aa8..0000000
--- a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStoragePerformanceTest.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.storage;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.primitives.Ints;
-
-import com.datatorrent.netlet.util.Slice;
-
-/**
- * <p>HDFSStoragePerformanceTest class.</p>
- *
- * @since 1.0.1
- */
-public class HDFSStoragePerformanceTest
-{
-
-  public static void main(String[] args)
-  {
-    HDFSStorage storage = new HDFSStorage();
-    storage.setBaseDir(args[0]);
-    storage.setId(args[1]);
-    storage.setRestore(true);
-    storage.setup(null);
-    int count = 100000000;
-
-    logger.debug(" start time {}", System.currentTimeMillis());
-    int index = 10000;
-    byte[] b = Ints.toByteArray(index);
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      index++;
-      b = Ints.toByteArray(index);
-    }
-    storage.flush();
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      index++;
-      b = Ints.toByteArray(index);
-    }
-    storage.flush();
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      index++;
-      b = Ints.toByteArray(index);
-    }
-    storage.flush();
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      index++;
-      b = Ints.toByteArray(index);
-    }
-    storage.flush();
-    for (int i = 0; i < count; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      index++;
-      b = Ints.toByteArray(index);
-    }
-    storage.flush();
-    logger.debug(" end time {}", System.currentTimeMillis());
-    logger.debug(" start time for retrieve {}", System.currentTimeMillis());
-    b = storage.retrieve(new byte[8]);
-    int org_index = index;
-    index = 10000;
-    match(b, index);
-    while (true) {
-      index++;
-      b = storage.retrieveNext();
-      if (b == null) {
-        logger.debug(" end time for retrieve {}/{}/{}", System.currentTimeMillis(), index, org_index);
-        return;
-      } else {
-        if (!match(b, index)) {
-          throw new RuntimeException("failed : " + index);
-        }
-      }
-    }
-
-  }
-
-  public static boolean match(byte[] data, int match)
-  {
-    byte[] tempData = new byte[data.length - 8];
-    System.arraycopy(data, 8, tempData, 0, tempData.length);
-    int dataR = Ints.fromByteArray(tempData);
-    //logger.debug("input: {}, output: {}",match,dataR);
-    if (match == dataR) {
-      return true;
-    }
-    return false;
-  }
-
-  private static final Logger logger = LoggerFactory.getLogger(HDFSStoragePerformanceTest.class);
-}
-

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java b/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java
deleted file mode 100644
index 0cb9935..0000000
--- a/flume/src/test/java/com/datatorrent/flume/storage/HDFSStorageTest.java
+++ /dev/null
@@ -1,695 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package com.datatorrent.flume.storage;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.RandomAccessFile;
-
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestWatcher;
-import org.junit.runner.Description;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.flume.Context;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-import com.datatorrent.netlet.util.Slice;
-
-/**
- *
- */
-public class HDFSStorageTest
-{
-  public static class TestMeta extends TestWatcher
-  {
-    public String baseDir;
-    public String testFile;
-    private String testData = "No and yes. There is also IdleTimeHandler that allows the operator to emit tuples. " +
-        "There is overlap, why not have a single interface. \n" +
-        "Also consider the possibility of an operator that does other processing and not consume nor emit tuples,";
-
-    @Override
-    protected void starting(org.junit.runner.Description description)
-    {
-      String className = description.getClassName();
-      baseDir = "target/" + className;
-      try {
-        baseDir = (new File(baseDir)).getAbsolutePath();
-        FileUtils.forceMkdir(new File(baseDir));
-        testFile = baseDir + "/testInput.txt";
-        FileOutputStream outputStream = FileUtils.openOutputStream(new File(testFile));
-        outputStream.write(testData.getBytes());
-        outputStream.close();
-
-      } catch (IOException ex) {
-        throw new RuntimeException(ex);
-      }
-    }
-
-    @Override
-    protected void finished(Description description)
-    {
-      try {
-        FileUtils.deleteDirectory(new File(baseDir));
-      } catch (IOException ex) {
-        throw new RuntimeException(ex);
-      }
-    }
-  }
-
-  @Rule
-  public TestMeta testMeta = new TestMeta();
-
-  private String STORAGE_DIRECTORY;
-
-  private HDFSStorage getStorage(String id, boolean restore)
-  {
-    Context ctx = new Context();
-    STORAGE_DIRECTORY = testMeta.baseDir;
-    ctx.put(HDFSStorage.BASE_DIR_KEY, testMeta.baseDir);
-    ctx.put(HDFSStorage.RESTORE_KEY, Boolean.toString(restore));
-    ctx.put(HDFSStorage.ID, id);
-    ctx.put(HDFSStorage.BLOCKSIZE, "256");
-    HDFSStorage lstorage = new HDFSStorage();
-    lstorage.configure(ctx);
-    lstorage.setup(null);
-    return lstorage;
-  }
-
-  private HDFSStorage storage;
-
-  @Before
-  public void setup()
-  {
-    storage = getStorage("1", false);
-  }
-
-  @After
-  public void teardown()
-  {
-    storage.teardown();
-    try {
-      Thread.sleep(100);
-    } catch (InterruptedException e) {
-      throw new RuntimeException(e);
-    }
-    storage.cleanHelperFiles();
-  }
-
-  /**
-   * This test covers following use case 1. Some data is stored 2. File is flush but the file is not close 3. Some more
-   * data is stored but the file doesn't roll-overs 4. Retrieve is called for the last returned address and it return
-   * nulls 5. Some more data is stored again but the address is returned null because of previous retrieve call
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testPartialFlush() throws Exception
-  {
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    byte[] b = "ab".getBytes();
-    byte[] address = storage.store(new Slice(b, 0, b.length));
-    Assert.assertNotNull(address);
-    storage.flush();
-    b = "cb".getBytes();
-    byte[] addr = storage.store(new Slice(b, 0, b.length));
-    match(storage.retrieve(new byte[8]), "ab");
-    Assert.assertNull(storage.retrieve(addr));
-    Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
-    storage.flush();
-    match(storage.retrieve(address), "cb");
-    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
-  }
-
-  /**
-   * This test covers following use case 1. Some data is stored to make sure that there is no roll over 2. File is
-   * flushed but the file is not closed 3. Some more data is stored. The data stored is enough to make the file roll
-   * over 4. Retrieve is called for the last returned address and it return nulls as the data is not flushed 5. Some
-   * more data is stored again but the address is returned null because of previous retrieve call 6. The data is flushed
-   * to make sure that the data is committed. 7. Now the data is retrieved from the starting and data returned matches
-   * the data stored
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testPartialFlushRollOver() throws Exception
-  {
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    byte[] b = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53, 52,
-        51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49, 49,
-        45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49, 54,
-        49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49, 53,
-        52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48, 1,
-        48, 46, 48, 1, 48, 46, 48};
-    byte[] b_org = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
-        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
-        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
-        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
-        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
-        1, 48, 46, 48, 1, 48, 46, 48};
-    byte[] address = storage.store(new Slice(b, 0, b.length));
-    Assert.assertNotNull(address);
-    storage.flush();
-    byte[] addr = null;
-    for (int i = 0; i < 5; i++) {
-      b[0] = (byte)(b[0] + 1);
-      addr = storage.store(new Slice(b, 0, b.length));
-    }
-    Assert.assertNull(storage.retrieve(addr));
-    for (int i = 0; i < 5; i++) {
-      b[0] = (byte)(b[0] + 1);
-      Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
-    }
-    storage.flush();
-    match(storage.retrieve(new byte[8]), new String(b_org));
-    b_org[0] = (byte)(b_org[0] + 1);
-    match(storage.retrieve(address), new String(b_org));
-    b_org[0] = (byte)(b_org[0] + 1);
-    match(storage.retrieveNext(), new String(b_org));
-    b_org[0] = (byte)(b_org[0] + 1);
-    match(storage.retrieveNext(), new String(b_org));
-
-  }
-
-  /**
-   * This test covers following use case 1. Some data is stored to make sure that there is no roll over 2. File is
-   * flushed but the file is not closed 3. Some more data is stored. The data stored is enough to make the file roll
-   * over 4. The storage crashes and new storage is instiated. 5. Retrieve is called for the last returned address and
-   * it return nulls as the data is not flushed 6. Some more data is stored again but the address is returned null
-   * because of previous retrieve call 7. The data is flushed to make sure that the data is committed. 8. Now the data
-   * is retrieved from the starting and data returned matches the data stored
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testPartialFlushRollOverWithFailure() throws Exception
-  {
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    byte[] b = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53, 52,
-        51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49, 49,
-        45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49, 54,
-        49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49, 53,
-        52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48, 1,
-        48, 46, 48, 1, 48, 46, 48};
-    byte[] b_org = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
-        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
-        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
-        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
-        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
-        1, 48, 46, 48, 1, 48, 46, 48};
-    byte[] address = storage.store(new Slice(b, 0, b.length));
-    Assert.assertNotNull(address);
-    storage.flush();
-    byte[] addr = null;
-    for (int i = 0; i < 5; i++) {
-      b[0] = (byte)(b[0] + 1);
-      addr = storage.store(new Slice(b, 0, b.length));
-    }
-    storage = getStorage("1", true);
-    Assert.assertNull(storage.retrieve(addr));
-    for (int i = 0; i < 5; i++) {
-      b[0] = (byte)(b[0] + 1);
-      Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
-    }
-    storage.flush();
-    match(storage.retrieve(new byte[8]), new String(b_org));
-    b_org[0] = (byte)(b_org[0] + 1);
-    match(storage.retrieve(address), new String(b_org));
-    b_org[0] = (byte)(b_org[0] + 1);
-    match(storage.retrieveNext(), new String(b_org));
-    b_org[0] = (byte)(b_org[0] + 1);
-    match(storage.retrieveNext(), new String(b_org));
-
-  }
-
-  /**
-   * This tests clean when the file doesn't roll over
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testPartialFlushWithClean() throws Exception
-  {
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    byte[] b = "ab".getBytes();
-    byte[] address = storage.store(new Slice(b, 0, b.length));
-    Assert.assertNotNull(address);
-    storage.flush();
-    storage.clean(address);
-    b = "cb".getBytes();
-    byte[] addr = storage.store(new Slice(b, 0, b.length));
-    Assert.assertNull(storage.retrieve(addr));
-    Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
-    storage.flush();
-    match(storage.retrieve(new byte[8]), "cb");
-    match(storage.retrieve(address), "cb");
-    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
-  }
-
-  /**
-   * This tests clean when the file doesn't roll over
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testPartialFlushWithCleanAndFailure() throws Exception
-  {
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    byte[] b = "ab".getBytes();
-    byte[] address = storage.store(new Slice(b, 0, b.length));
-    Assert.assertNotNull(address);
-    storage.flush();
-    storage.clean(address);
-    b = "cb".getBytes();
-    byte[] addr = storage.store(new Slice(b, 0, b.length));
-    storage = getStorage("1", true);
-    Assert.assertNull(storage.retrieve(addr));
-    Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
-    storage.flush();
-    match(storage.retrieve(new byte[8]), "cb");
-    match(storage.retrieve(address), "cb");
-    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
-  }
-
-  /**
-   * This test covers following use case 1. Some data is stored to make sure that there is no roll over 2. File is
-   * flushed but the file is not closed 3. The data is cleaned till the last returned address 4. Some more data is
-   * stored. The data stored is enough to make the file roll over 5. Retrieve is called for the last returned address
-   * and it return nulls as the data is not flushed 6. Some more data is stored again but the address is returned null
-   * because of previous retrieve call 7. The data is flushed to make sure that the data is committed. 8. Now the data
-   * is retrieved from the starting and data returned matches the data stored
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testPartialFlushWithCleanAndRollOver() throws Exception
-  {
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    byte[] b = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53, 52,
-        51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49, 49,
-        45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49, 54,
-        49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49, 53,
-        52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48, 1,
-        48, 46, 48, 1, 48, 46, 48};
-    byte[] b_org = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
-        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
-        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
-        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
-        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
-        1, 48, 46, 48, 1, 48, 46, 48};
-    byte[] address = storage.store(new Slice(b, 0, b.length));
-    Assert.assertNotNull(address);
-    storage.flush();
-    storage.clean(address);
-
-    byte[] addr = null;
-    for (int i = 0; i < 5; i++) {
-      b[0] = (byte)(b[0] + 1);
-      addr = storage.store(new Slice(b, 0, b.length));
-    }
-    Assert.assertNull(storage.retrieve(addr));
-    for (int i = 0; i < 5; i++) {
-      b[0] = (byte)(b[0] + 1);
-      Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
-    }
-    storage.flush();
-    b_org[0] = (byte)(b_org[0] + 1);
-    match(storage.retrieve(new byte[8]), new String(b_org));
-    match(storage.retrieve(address), new String(b_org));
-    b_org[0] = (byte)(b_org[0] + 1);
-    match(storage.retrieveNext(), new String(b_org));
-    b_org[0] = (byte)(b_org[0] + 1);
-    match(storage.retrieveNext(), new String(b_org));
-
-  }
-
-  /**
-   * This tests the clean when the files are roll-over and the storage fails
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testPartialFlushWithCleanAndRollOverAndFailure() throws Exception
-  {
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    byte[] b = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53, 52,
-        51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49, 49,
-        45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49, 54,
-        49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49, 53,
-        52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48, 1,
-        48, 46, 48, 1, 48, 46, 48};
-    byte[] b_org = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
-        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
-        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
-        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
-        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
-        1, 48, 46, 48, 1, 48, 46, 48};
-    byte[] address = storage.store(new Slice(b, 0, b.length));
-    Assert.assertNotNull(address);
-    storage.flush();
-    storage.clean(address);
-    byte[] addr = null;
-    for (int i = 0; i < 5; i++) {
-      b[0] = (byte)(b[0] + 1);
-      addr = storage.store(new Slice(b, 0, b.length));
-    }
-    storage = getStorage("1", true);
-    Assert.assertNull(storage.retrieve(addr));
-    for (int i = 0; i < 5; i++) {
-      b[0] = (byte)(b[0] + 1);
-      Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
-    }
-    storage.flush();
-    b_org[0] = (byte)(b_org[0] + 1);
-    match(storage.retrieve(address), new String(b_org));
-    b_org[0] = (byte)(b_org[0] + 1);
-    match(storage.retrieveNext(), new String(b_org));
-    b_org[0] = (byte)(b_org[0] + 1);
-    match(storage.retrieveNext(), new String(b_org));
-
-  }
-
-  /**
-   * This test covers following use case The file is flushed and then more data is written to the same file, but the new
-   * data is not flushed and file is not roll over and storage fails The new storage comes up and client asks for data
-   * at the last returned address from earlier storage instance. The new storage returns null. Client stores the data
-   * again but the address returned this time is null and the retrieval of the earlier address now returns data
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testPartialFlushWithFailure() throws Exception
-  {
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    byte[] b = "ab".getBytes();
-    byte[] address = storage.store(new Slice(b, 0, b.length));
-    Assert.assertNotNull(address);
-    storage.flush();
-    b = "cb".getBytes();
-    byte[] addr = storage.store(new Slice(b, 0, b.length));
-    storage = getStorage("1", true);
-    Assert.assertNull(storage.retrieve(addr));
-    Assert.assertNull(storage.store(new Slice(b, 0, b.length)));
-    storage.flush();
-    match(storage.retrieve(address), "cb");
-  }
-
-  private void match(byte[] data, String match)
-  {
-    byte[] tempData = new byte[data.length - 8];
-    System.arraycopy(data, 8, tempData, 0, tempData.length);
-    Assert.assertEquals("matched the stored value with retrieved value", match, new String(tempData));
-  }
-
-  @Test
-  public void testStorage() throws IOException
-  {
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    byte[] b = new byte[200];
-    byte[] identifier;
-    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
-    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
-    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
-    storage.flush();
-    byte[] data = storage.retrieve(new byte[8]);
-    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
-    identifier = storage.store(new Slice(b, 0, b.length));
-    byte[] tempData = new byte[data.length - 8];
-    System.arraycopy(data, 8, tempData, 0, tempData.length);
-    Assert.assertEquals("matched the stored value with retrieved value", new String(b), new String(tempData));
-    Assert.assertNull(storage.retrieve(identifier));
-  }
-
-  @Test
-  public void testStorageWithRestore() throws IOException
-  {
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    byte[] b = new byte[200];
-    Assert.assertNotNull(storage.store(new Slice(b, 0, b.length)));
-    storage.flush();
-    storage.teardown();
-
-    storage = getStorage("1", true);
-    storage.store(new Slice(b, 0, b.length));
-    storage.flush();
-    Configuration conf = new Configuration();
-    FileSystem fs = FileSystem.get(conf);
-    boolean exists = fs.exists(new Path(STORAGE_DIRECTORY + "/1/" + "1"));
-    Assert.assertEquals("file should exist", true, exists);
-  }
-
-  @Test
-  public void testCleanup() throws IOException
-  {
-    RandomAccessFile r = new RandomAccessFile(testMeta.testFile, "r");
-    r.seek(0);
-    byte[] b = r.readLine().getBytes();
-    storage.store(new Slice(b, 0, b.length));
-    byte[] val = storage.store(new Slice(b, 0, b.length));
-    storage.flush();
-    storage.clean(val);
-    Configuration conf = new Configuration();
-    FileSystem fs = FileSystem.get(conf);
-    boolean exists = fs.exists(new Path(STORAGE_DIRECTORY + "/" + "0"));
-    Assert.assertEquals("file should not exist", false, exists);
-    r.close();
-  }
-
-  @Test
-  public void testNext() throws IOException
-  {
-    RandomAccessFile r = new RandomAccessFile(testMeta.testFile, "r");
-    r.seek(0);
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    byte[] b = r.readLine().getBytes();
-    storage.store(new Slice(b, 0, b.length));
-    byte[] b1 = r.readLine().getBytes();
-    storage.store(new Slice(b1, 0, b1.length));
-    storage.store(new Slice(b, 0, b.length));
-    storage.flush();
-    storage.store(new Slice(b1, 0, b1.length));
-    storage.store(new Slice(b, 0, b.length));
-    storage.flush();
-    byte[] data = storage.retrieve(new byte[8]);
-    byte[] tempData = new byte[data.length - 8];
-    System.arraycopy(data, 8, tempData, 0, tempData.length);
-    Assert.assertEquals("matched the stored value with retrieved value", new String(b), new String(tempData));
-    data = storage.retrieveNext();
-    tempData = new byte[data.length - 8];
-    System.arraycopy(data, 8, tempData, 0, tempData.length);
-    Assert.assertEquals("matched the stored value with retrieved value", new String(b1), new String(tempData));
-    data = storage.retrieveNext();
-    tempData = new byte[data.length - 8];
-    System.arraycopy(data, 8, tempData, 0, tempData.length);
-    Assert.assertEquals("matched the stored value with retrieved value", new String(b), new String(tempData));
-    r.close();
-  }
-
-  @Test
-  public void testFailure() throws IOException
-  {
-    byte[] address;
-    byte[] b = new byte[200];
-    storage.retrieve(new byte[8]);
-    for (int i = 0; i < 5; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      address = storage.store(new Slice(b, 0, b.length));
-      storage.flush();
-      storage.clean(address);
-    }
-    storage.teardown();
-
-    byte[] identifier = new byte[8];
-    storage = getStorage("1", true);
-
-    storage.retrieve(identifier);
-
-    storage.store(new Slice(b, 0, b.length));
-    storage.store(new Slice(b, 0, b.length));
-    storage.store(new Slice(b, 0, b.length));
-    storage.flush();
-    byte[] data = storage.retrieve(identifier);
-    byte[] tempData = new byte[data.length - 8];
-    System.arraycopy(data, 8, tempData, 0, tempData.length);
-    Assert.assertEquals("matched the stored value with retrieved value", new String(b), new String(tempData));
-  }
-
-  /**
-   * This test case tests the clean call before any flush is called.
-   *
-   * @throws IOException
-   */
-  @Test
-  public void testCleanUnflushedData() throws IOException
-  {
-    for (int i = 0; i < 5; i++) {
-      final byte[] bytes = (i + "").getBytes();
-      storage.store(new Slice(bytes, 0, bytes.length));
-    }
-    storage.clean(new byte[8]);
-    storage.flush();
-    match(storage.retrieve(new byte[8]), "0");
-    match(storage.retrieveNext(), "1");
-  }
-
-  @Test
-  public void testCleanForUnflushedData() throws IOException
-  {
-    byte[] address = null;
-    byte[] b = new byte[200];
-    storage.retrieve(new byte[8]);
-    for (int i = 0; i < 5; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      address = storage.store(new Slice(b, 0, b.length));
-      storage.flush();
-      // storage.clean(address);
-    }
-    byte[] lastWrittenAddress = null;
-    for (int i = 0; i < 5; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      lastWrittenAddress = storage.store(new Slice(b, 0, b.length));
-    }
-    storage.clean(lastWrittenAddress);
-    byte[] cleanedOffset = storage.readData(new Path(STORAGE_DIRECTORY + "/1/cleanoffsetFile"));
-    Assert.assertArrayEquals(address, cleanedOffset);
-
-  }
-
-  @Test
-  public void testCleanForFlushedData() throws IOException
-  {
-    byte[] b = new byte[200];
-    storage.retrieve(new byte[8]);
-    for (int i = 0; i < 5; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      storage.store(new Slice(b, 0, b.length));
-      storage.flush();
-      // storage.clean(address);
-    }
-    byte[] lastWrittenAddress = null;
-    for (int i = 0; i < 5; i++) {
-      storage.store(new Slice(b, 0, b.length));
-      lastWrittenAddress = storage.store(new Slice(b, 0, b.length));
-    }
-    storage.flush();
-    storage.clean(lastWrittenAddress);
-    byte[] cleanedOffset = storage.readData(new Path(STORAGE_DIRECTORY + "/1/cleanoffsetFile"));
-    Assert.assertArrayEquals(lastWrittenAddress, cleanedOffset);
-
-  }
-
-  @Test
-  public void testCleanForPartialFlushedData() throws IOException
-  {
-    byte[] b = new byte[8];
-    storage.retrieve(new byte[8]);
-
-    storage.store(new Slice(b, 0, b.length));
-    byte[] bytes = "1a".getBytes();
-    byte[] address = storage.store(new Slice(bytes, 0, bytes.length));
-    storage.flush();
-    storage.clean(address);
-
-    byte[] lastWrittenAddress = null;
-    for (int i = 0; i < 5; i++) {
-      final byte[] bytes1 = (i + "").getBytes();
-      storage.store(new Slice(bytes1, 0, bytes1.length));
-      lastWrittenAddress = storage.store(new Slice(b, 0, b.length));
-    }
-    Assert.assertNull(storage.retrieve(new byte[8]));
-    Assert.assertNull(storage.retrieve(lastWrittenAddress));
-    storage.store(new Slice(b, 0, b.length));
-    storage.flush();
-    Assert.assertNull(storage.retrieve(lastWrittenAddress));
-  }
-
-  @Test
-  public void testRandomSequence() throws IOException
-  {
-    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
-    byte[] bytes = new byte[]{48, 48, 48, 51, 101, 100, 55, 56, 55, 49, 53, 99, 52, 101, 55, 50, 97, 52, 48, 49, 51,
-        99, 97, 54, 102, 57, 55, 53, 57, 100, 49, 99, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51,
-        45, 49, 49, 45, 48, 55, 32, 48, 48, 58, 48, 48, 58, 52, 54, 1, 52, 50, 49, 50, 51, 1, 50, 1, 49, 53, 49, 49,
-        52, 50, 54, 53, 1, 49, 53, 49, 49, 57, 51, 53, 49, 1, 49, 53, 49, 50, 57, 56, 50, 52, 1, 49, 53, 49, 50, 49,
-        55, 48, 55, 1, 49, 48, 48, 55, 55, 51, 57, 51, 1, 49, 57, 49, 52, 55, 50, 53, 52, 54, 49, 1, 49, 1, 48, 1, 48,
-        46, 48, 1, 48, 46, 48, 1, 48, 46, 48};
-    storage.store(new Slice(bytes, 0, bytes.length));
-    storage.flush();
-    storage.clean(new byte[]{-109, 0, 0, 0, 0, 0, 0, 0});
-    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
-    for (int i = 0; i < 2555; i++) {
-      byte[] bytes1 = new byte[]{48, 48, 48, 55, 56, 51, 98, 101, 50, 54, 50, 98, 52, 102, 50, 54, 56, 97, 55, 56, 102,
-          48, 54, 54, 50, 49, 49, 54, 99, 98, 101, 99, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51,
-          45, 49, 49, 45, 48, 55, 32, 48, 48, 58, 48, 48, 58, 53, 49, 1, 49, 49, 49, 49, 54, 51, 57, 1, 50, 1, 49, 53,
-          49, 48, 57, 57, 56, 51, 1, 49, 53, 49, 49, 49, 55, 48, 52, 1, 49, 53, 49, 50, 49, 51, 55, 49, 1, 49, 53, 49,
-          49, 52, 56, 51, 49, 1, 49, 48, 48, 55, 49, 57, 56, 49, 1, 49, 50, 48, 50, 55, 54, 49, 54, 56, 53, 1, 49, 1,
-          48, 1, 48, 46, 48, 1, 48, 46, 48, 1, 48, 46, 48};
-      storage.store(new Slice(bytes1, 0, bytes1.length));
-      storage.flush();
-    }
-    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
-    for (int i = 0; i < 1297; i++) {
-      storage.retrieveNext();
-    }
-    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
-    for (int i = 0; i < 1302; i++) {
-      storage.retrieveNext();
-    }
-    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
-    for (int i = 0; i < 1317; i++) {
-      storage.retrieveNext();
-    }
-    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
-    for (int i = 0; i < 2007; i++) {
-      storage.retrieveNext();
-    }
-    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
-    for (int i = 0; i < 2556; i++) {
-      storage.retrieveNext();
-    }
-    byte[] bytes1 = new byte[]{48, 48, 48, 48, 98, 48, 52, 54, 49, 57, 55, 51, 52, 97, 53, 101, 56, 56, 97, 55, 98, 53,
-        52, 51, 98, 50, 102, 51, 49, 97, 97, 54, 1, 50, 48, 49, 51, 45, 49, 49, 45, 48, 55, 1, 50, 48, 49, 51, 45, 49,
-        49, 45, 48, 55, 32, 48, 48, 58, 51, 49, 58, 52, 56, 1, 49, 48, 53, 53, 57, 52, 50, 1, 50, 1, 49, 53, 49, 49,
-        54, 49, 56, 52, 1, 49, 53, 49, 49, 57, 50, 49, 49, 1, 49, 53, 49, 50, 57, 54, 54, 53, 1, 49, 53, 49, 50, 49,
-        53, 52, 56, 1, 49, 48, 48, 56, 48, 51, 52, 50, 1, 55, 56, 56, 50, 54, 53, 52, 56, 1, 49, 1, 48, 1, 48, 46, 48,
-        1, 48, 46, 48, 1, 48, 46, 48};
-    storage.store(new Slice(bytes1, 0, bytes1.length));
-    storage.flush();
-    storage.retrieve(new byte[]{0, 0, 0, 0, 0, 0, 0, 0});
-    for (int i = 0; i < 2062; i++) {
-      storage.retrieveNext();
-
-    }
-  }
-
-  @SuppressWarnings("unused")
-  private static final Logger logger = LoggerFactory.getLogger(HDFSStorageTest.class);
-}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java
new file mode 100644
index 0000000..6503357
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/discovery/ZKAssistedDiscoveryTest.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.discovery;
+
+import org.codehaus.jackson.type.TypeReference;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.curator.x.discovery.ServiceInstance;
+import org.apache.curator.x.discovery.details.InstanceSerializer;
+
+import com.datatorrent.flume.discovery.Discovery.Service;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertNotNull;
+
+/**
+ *
+ */
+@Ignore
+public class ZKAssistedDiscoveryTest
+{
+  public ZKAssistedDiscoveryTest()
+  {
+  }
+
+  @Test
+  public void testSerialization() throws Exception
+  {
+    ZKAssistedDiscovery discovery = new ZKAssistedDiscovery();
+    discovery.setServiceName("DTFlumeTest");
+    discovery.setConnectionString("localhost:2181");
+    discovery.setBasePath("/HelloDT");
+    discovery.setup(null);
+    ServiceInstance<byte[]> instance = discovery.getInstance(new Service<byte[]>()
+    {
+      @Override
+      public String getHost()
+      {
+        return "localhost";
+      }
+
+      @Override
+      public int getPort()
+      {
+        return 8080;
+      }
+
+      @Override
+      public byte[] getPayload()
+      {
+        return null;
+      }
+
+      @Override
+      public String getId()
+      {
+        return "localhost8080";
+      }
+
+    });
+    InstanceSerializer<byte[]> instanceSerializer =
+        discovery.getInstanceSerializerFactory().getInstanceSerializer(new TypeReference<ServiceInstance<byte[]>>()
+        {
+        });
+    byte[] serialize = instanceSerializer.serialize(instance);
+    logger.debug("serialized json = {}", new String(serialize));
+    ServiceInstance<byte[]> deserialize = instanceSerializer.deserialize(serialize);
+    assertArrayEquals("Metadata", instance.getPayload(), deserialize.getPayload());
+  }
+
+  @Test
+  public void testDiscover()
+  {
+    ZKAssistedDiscovery discovery = new ZKAssistedDiscovery();
+    discovery.setServiceName("DTFlumeTest");
+    discovery.setConnectionString("localhost:2181");
+    discovery.setBasePath("/HelloDT");
+    discovery.setup(null);
+    assertNotNull("Discovered Sinks", discovery.discover());
+    discovery.teardown();
+  }
+
+  @Test
+  public void testAdvertize()
+  {
+    ZKAssistedDiscovery discovery = new ZKAssistedDiscovery();
+    discovery.setServiceName("DTFlumeTest");
+    discovery.setConnectionString("localhost:2181");
+    discovery.setBasePath("/HelloDT");
+    discovery.setup(null);
+
+    Service<byte[]> service = new Service<byte[]>()
+    {
+      @Override
+      public String getHost()
+      {
+        return "chetan";
+      }
+
+      @Override
+      public int getPort()
+      {
+        return 5033;
+      }
+
+      @Override
+      public byte[] getPayload()
+      {
+        return new byte[] {3, 2, 1};
+      }
+
+      @Override
+      public String getId()
+      {
+        return "uniqueId";
+      }
+
+    };
+    discovery.advertise(service);
+    discovery.teardown();
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(ZKAssistedDiscoveryTest.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationTest.java
new file mode 100644
index 0000000..10153bc
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/integration/ApplicationTest.java
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.integration;
+
+import org.junit.Ignore;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Event;
+import org.apache.hadoop.conf.Configuration;
+
+import com.datatorrent.api.Context.OperatorContext;
+import com.datatorrent.api.DAG;
+import com.datatorrent.api.DAG.Locality;
+import com.datatorrent.api.DefaultInputPort;
+import com.datatorrent.api.LocalMode;
+import com.datatorrent.api.Operator;
+import com.datatorrent.api.StreamingApplication;
+import com.datatorrent.flume.operator.AbstractFlumeInputOperator;
+import com.datatorrent.flume.storage.EventCodec;
+
+/**
+ *
+ */
+@Ignore
+public class ApplicationTest implements StreamingApplication
+{
+  public static class FlumeInputOperator extends AbstractFlumeInputOperator<Event>
+  {
+    @Override
+    public Event convert(Event event)
+    {
+      return event;
+    }
+  }
+
+  public static class Counter implements Operator
+  {
+    private int count;
+    private transient Event event;
+    public final transient DefaultInputPort<Event> input = new DefaultInputPort<Event>()
+    {
+      @Override
+      public void process(Event tuple)
+      {
+        count++;
+        event = tuple;
+      }
+
+    };
+
+    @Override
+    public void beginWindow(long windowId)
+    {
+    }
+
+    @Override
+    public void endWindow()
+    {
+      logger.debug("total count = {}, tuple = {}", count, event);
+    }
+
+    @Override
+    public void setup(OperatorContext context)
+    {
+    }
+
+    @Override
+    public void teardown()
+    {
+    }
+
+    private static final Logger logger = LoggerFactory.getLogger(Counter.class);
+  }
+
+  @Override
+  public void populateDAG(DAG dag, Configuration conf)
+  {
+    dag.setAttribute(com.datatorrent.api.Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS, 1000);
+    FlumeInputOperator flume = dag.addOperator("FlumeOperator", new FlumeInputOperator());
+    flume.setConnectAddresses(new String[]{"test:127.0.0.1:8080"});
+    flume.setCodec(new EventCodec());
+    Counter counter = dag.addOperator("Counter", new Counter());
+
+    dag.addStream("Slices", flume.output, counter.input).setLocality(Locality.CONTAINER_LOCAL);
+  }
+
+  @Test
+  public void test()
+  {
+    try {
+      LocalMode.runApp(this, Integer.MAX_VALUE);
+    } catch (Exception ex) {
+      logger.warn("The dag seems to be not testable yet, if it's - remove this exception handling", ex);
+    }
+
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(ApplicationTest.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptorTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptorTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptorTest.java
new file mode 100644
index 0000000..47bcacf
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringFormattingInterceptorTest.java
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.interceptor;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import org.apache.flume.Context;
+import org.apache.flume.interceptor.Interceptor;
+
+import static org.junit.Assert.assertArrayEquals;
+
+/**
+ * Tests for {@link ColumnFilteringFormattingInterceptor}
+ */
+public class ColumnFilteringFormattingInterceptorTest
+{
+  private static InterceptorTestHelper helper;
+
+  @BeforeClass
+  public static void startUp()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "{1}\001{2}\001{3}\001");
+
+    helper = new InterceptorTestHelper(new ColumnFilteringFormattingInterceptor.Builder(), contextMap);
+  }
+
+  @Test
+  public void testInterceptEvent()
+  {
+    helper.testIntercept_Event();
+  }
+
+  @Test
+  public void testFiles() throws IOException, URISyntaxException
+  {
+    helper.testFiles();
+  }
+
+  @Test
+  public void testInterceptEventWithPrefix()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "\001{1}\001{2}\001{3}\001");
+
+    ColumnFilteringFormattingInterceptor.Builder builder = new ColumnFilteringFormattingInterceptor.Builder();
+    builder.configure(new Context(contextMap));
+    Interceptor interceptor = builder.build();
+
+    assertArrayEquals("Six Fields",
+        "\001\001Second\001\001".getBytes(),
+        interceptor.intercept(
+        new InterceptorTestHelper.MyEvent("First\002\002Second\002\002\002".getBytes())).getBody());
+  }
+
+  @Test
+  public void testInterceptEventWithLongSeparator()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "a{1}bc{2}def{3}ghi");
+
+    ColumnFilteringFormattingInterceptor.Builder builder = new ColumnFilteringFormattingInterceptor.Builder();
+    builder.configure(new Context(contextMap));
+    Interceptor interceptor = builder.build();
+    byte[] body = interceptor.intercept(
+        new InterceptorTestHelper.MyEvent("First\002\002Second\002\002\002".getBytes())).getBody();
+
+    assertArrayEquals("Six Fields, " + new String(body), "abcSeconddefghi".getBytes(), body);
+  }
+
+  @Test
+  public void testInterceptEventWithTerminatingSeparator()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "a{1}bc{2}def{3}");
+
+    ColumnFilteringFormattingInterceptor.Builder builder = new ColumnFilteringFormattingInterceptor.Builder();
+    builder.configure(new Context(contextMap));
+    Interceptor interceptor = builder.build();
+    byte[] body = interceptor.intercept(
+        new InterceptorTestHelper.MyEvent("First\002\002Second\002\002\002".getBytes())).getBody();
+
+    assertArrayEquals("Six Fields, " + new String(body), "abcSeconddef".getBytes(), body);
+  }
+
+  @Test
+  public void testInterceptEventWithColumnZero()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringFormattingInterceptor.Constants.COLUMNS_FORMATTER, "{0}\001");
+
+    ColumnFilteringFormattingInterceptor.Builder builder = new ColumnFilteringFormattingInterceptor.Builder();
+    builder.configure(new Context(contextMap));
+    Interceptor interceptor = builder.build();
+
+    assertArrayEquals("Empty Bytes",
+        "\001".getBytes(),
+        interceptor.intercept(new InterceptorTestHelper.MyEvent("".getBytes())).getBody());
+
+    assertArrayEquals("One Field",
+        "First\001".getBytes(),
+        interceptor.intercept(new InterceptorTestHelper.MyEvent("First".getBytes())).getBody());
+
+    assertArrayEquals("Two Fields",
+        "\001".getBytes(),
+        interceptor.intercept(new InterceptorTestHelper.MyEvent("\002First".getBytes())).getBody());
+  }
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringInterceptorTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringInterceptorTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringInterceptorTest.java
new file mode 100644
index 0000000..b001b21
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/ColumnFilteringInterceptorTest.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.interceptor;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import org.apache.flume.Context;
+import org.apache.flume.interceptor.Interceptor;
+
+import static org.junit.Assert.assertArrayEquals;
+
+/**
+ *
+ */
+public class ColumnFilteringInterceptorTest
+{
+  private static InterceptorTestHelper helper;
+
+  @BeforeClass
+  public static void startUp()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.DST_SEPARATOR, Byte.toString((byte)1));
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringInterceptor.Constants.COLUMNS, "1 2 3");
+
+    helper = new InterceptorTestHelper(new ColumnFilteringInterceptor.Builder(), contextMap);
+  }
+
+  @Test
+  public void testInterceptEvent()
+  {
+    helper.testIntercept_Event();
+  }
+
+  @Test
+  public void testFiles() throws IOException, URISyntaxException
+  {
+    helper.testFiles();
+  }
+
+  @Test
+  public void testInterceptEventWithColumnZero()
+  {
+    HashMap<String, String> contextMap = new HashMap<String, String>();
+    contextMap.put(ColumnFilteringInterceptor.Constants.DST_SEPARATOR, Byte.toString((byte)1));
+    contextMap.put(ColumnFilteringInterceptor.Constants.SRC_SEPARATOR, Byte.toString((byte)2));
+    contextMap.put(ColumnFilteringInterceptor.Constants.COLUMNS, "0");
+
+    ColumnFilteringInterceptor.Builder builder = new ColumnFilteringInterceptor.Builder();
+    builder.configure(new Context(contextMap));
+    Interceptor interceptor = builder.build();
+
+    assertArrayEquals("Empty Bytes",
+        "\001".getBytes(),
+        interceptor.intercept(new InterceptorTestHelper.MyEvent("".getBytes())).getBody());
+
+    assertArrayEquals("One Field",
+        "First\001".getBytes(),
+        interceptor.intercept(new InterceptorTestHelper.MyEvent("First".getBytes())).getBody());
+
+    assertArrayEquals("Two Fields",
+        "\001".getBytes(),
+        interceptor.intercept(new InterceptorTestHelper.MyEvent("\002First".getBytes())).getBody());
+  }
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/InterceptorTestHelper.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/InterceptorTestHelper.java b/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/InterceptorTestHelper.java
new file mode 100644
index 0000000..b8dfbe0
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/InterceptorTestHelper.java
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.interceptor;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.junit.Assert;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.interceptor.Interceptor;
+
+import com.datatorrent.netlet.util.Slice;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertNotNull;
+
+/**
+ *
+ */
+public class InterceptorTestHelper
+{
+  private static final byte FIELD_SEPARATOR = 1;
+
+  static class MyEvent implements Event
+  {
+    byte[] body;
+
+    MyEvent(byte[] bytes)
+    {
+      body = bytes;
+    }
+
+    @Override
+    public Map<String, String> getHeaders()
+    {
+      return null;
+    }
+
+    @Override
+    public void setHeaders(Map<String, String> map)
+    {
+    }
+
+    @Override
+    @SuppressWarnings("ReturnOfCollectionOrArrayField")
+    public byte[] getBody()
+    {
+      return body;
+    }
+
+    @Override
+    @SuppressWarnings("AssignmentToCollectionOrArrayFieldFromParameter")
+    public void setBody(byte[] bytes)
+    {
+      body = bytes;
+    }
+  }
+
+  private final Interceptor.Builder builder;
+  private final Map<String, String> context;
+
+  InterceptorTestHelper(Interceptor.Builder builder, Map<String, String> context)
+  {
+    this.builder = builder;
+    this.context = context;
+  }
+
+  public void testIntercept_Event()
+  {
+    builder.configure(new Context(context));
+    Interceptor interceptor = builder.build();
+
+    assertArrayEquals("Empty Bytes",
+        "\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("".getBytes())).getBody());
+
+    assertArrayEquals("One Separator",
+        "\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("\002".getBytes())).getBody());
+
+    assertArrayEquals("Two Separators",
+        "\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("\002\002".getBytes())).getBody());
+
+    assertArrayEquals("One Field",
+        "\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First".getBytes())).getBody());
+
+    assertArrayEquals("Two Fields",
+        "First\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("\002First".getBytes())).getBody());
+
+    assertArrayEquals("Two Fields",
+        "\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\001".getBytes())).getBody());
+
+    assertArrayEquals("Two Fields",
+        "Second\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\002Second".getBytes())).getBody());
+
+    assertArrayEquals("Three Fields",
+        "Second\001\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\002Second\002".getBytes())).getBody());
+
+    assertArrayEquals("Three Fields",
+        "\001Second\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\002\002Second".getBytes())).getBody());
+
+    assertArrayEquals("Four Fields",
+        "\001Second\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\002\002Second\002".getBytes())).getBody());
+
+    assertArrayEquals("Five Fields",
+        "\001Second\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\002\002Second\002\002".getBytes())).getBody());
+
+    assertArrayEquals("Six Fields",
+        "\001Second\001\001".getBytes(),
+        interceptor.intercept(new MyEvent("First\002\002Second\002\002\002".getBytes())).getBody());
+  }
+
+  public void testFiles() throws IOException, URISyntaxException
+  {
+    Properties properties = new Properties();
+    properties.load(getClass().getResourceAsStream("/flume/conf/flume-conf.properties"));
+
+    String interceptor = null;
+    for (Entry<Object, Object> entry : properties.entrySet()) {
+      logger.debug("{} => {}", entry.getKey(), entry.getValue());
+
+      if (builder.getClass().getName().equals(entry.getValue().toString())) {
+        String key = entry.getKey().toString();
+        if (key.endsWith(".type")) {
+          interceptor = key.substring(0, key.length() - "type".length());
+          break;
+        }
+      }
+    }
+
+    assertNotNull(builder.getClass().getName(), interceptor);
+    @SuppressWarnings({"null", "ConstantConditions"})
+    final int interceptorLength = interceptor.length();
+
+    HashMap<String, String> map = new HashMap<String, String>();
+    for (Entry<Object, Object> entry : properties.entrySet()) {
+      String key = entry.getKey().toString();
+      if (key.startsWith(interceptor)) {
+        map.put(key.substring(interceptorLength), entry.getValue().toString());
+      }
+    }
+
+    builder.configure(new Context(map));
+    Interceptor interceptorInstance = builder.build();
+
+    URL url = getClass().getResource("/test_data/gentxns/");
+    assertNotNull("Generated Transactions", url);
+
+    int records = 0;
+    File dir = new File(url.toURI());
+    for (File file : dir.listFiles()) {
+      records += processFile(file, interceptorInstance);
+    }
+
+    Assert.assertEquals("Total Records", 2200, records);
+  }
+
+  private int processFile(File file, Interceptor interceptor) throws IOException
+  {
+    InputStream stream = getClass().getResourceAsStream("/test_data/gentxns/" + file.getName());
+    BufferedReader br = new BufferedReader(new InputStreamReader(stream));
+
+    String line;
+    int i = 0;
+    while ((line = br.readLine()) != null) {
+      byte[] body = interceptor.intercept(new MyEvent(line.getBytes())).getBody();
+      RawEvent event = RawEvent.from(body, FIELD_SEPARATOR);
+      Assert.assertEquals("GUID", new Slice(line.getBytes(), 0, 32), event.guid);
+      logger.debug("guid = {}, time = {}", event.guid, event.time);
+      i++;
+    }
+
+    br.close();
+    return i;
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(InterceptorTestHelper.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/RawEvent.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/RawEvent.java b/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/RawEvent.java
new file mode 100644
index 0000000..cf6a823
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/interceptor/RawEvent.java
@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.interceptor;
+
+import java.io.Serializable;
+
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ *
+ */
+public class RawEvent implements Serializable
+{
+  public Slice guid;
+  public long time;
+  public int dimensionsOffset;
+
+  public Slice getGUID()
+  {
+    return guid;
+  }
+
+  public long getTime()
+  {
+    return time;
+  }
+
+  RawEvent()
+  {
+    /* needed for Kryo serialization */
+  }
+
+  public static RawEvent from(byte[] row, byte separator)
+  {
+    final int rowsize = row.length;
+
+    /*
+     * Lets get the guid out of the current record
+     */
+    int sliceLengh = -1;
+    while (++sliceLengh < rowsize) {
+      if (row[sliceLengh] == separator) {
+        break;
+      }
+    }
+
+    int i = sliceLengh + 1;
+
+    /* lets parse the date */
+    int dateStart = i;
+    while (i < rowsize) {
+      if (row[i++] == separator) {
+        long time = DATE_PARSER.parseMillis(new String(row, dateStart, i - dateStart - 1));
+        RawEvent event = new RawEvent();
+        event.guid = new Slice(row, 0, sliceLengh);
+        event.time = time;
+        event.dimensionsOffset = i;
+        return event;
+      }
+    }
+
+    return null;
+  }
+
+  @Override
+  public int hashCode()
+  {
+    int hash = 5;
+    hash = 61 * hash + (this.guid != null ? this.guid.hashCode() : 0);
+    hash = 61 * hash + (int)(this.time ^ (this.time >>> 32));
+    return hash;
+  }
+
+  @Override
+  public String toString()
+  {
+    return "RawEvent{" + "guid=" + guid + ", time=" + time + '}';
+  }
+
+  @Override
+  public boolean equals(Object obj)
+  {
+    if (obj == null) {
+      return false;
+    }
+    if (getClass() != obj.getClass()) {
+      return false;
+    }
+    final RawEvent other = (RawEvent)obj;
+    if (this.guid != other.guid && (this.guid == null || !this.guid.equals(other.guid))) {
+      return false;
+    }
+    return this.time == other.time;
+  }
+
+  private static final DateTimeFormatter DATE_PARSER = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
+  private static final Logger logger = LoggerFactory.getLogger(RawEvent.class);
+  private static final long serialVersionUID = 201312191312L;
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperatorTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperatorTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperatorTest.java
new file mode 100644
index 0000000..d7c4c30
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/operator/AbstractFlumeInputOperatorTest.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.operator;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertTrue;
+
+/**
+ *
+ */
+public class AbstractFlumeInputOperatorTest
+{
+  public AbstractFlumeInputOperatorTest()
+  {
+  }
+
+  @Test
+  public void testThreadLocal()
+  {
+    ThreadLocal<Set<Integer>> tl = new ThreadLocal<Set<Integer>>()
+    {
+      @Override
+      protected Set<Integer> initialValue()
+      {
+        return new HashSet<Integer>();
+      }
+
+    };
+    Set<Integer> get1 = tl.get();
+    get1.add(1);
+    assertTrue("Just Added Value", get1.contains(1));
+
+    Set<Integer> get2 = tl.get();
+    assertTrue("Previously added value", get2.contains(1));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java
new file mode 100644
index 0000000..9bc69e8
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/sink/DTFlumeSinkTest.java
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.sink;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.flume.channel.MemoryChannel;
+
+import com.datatorrent.flume.discovery.Discovery;
+import com.datatorrent.netlet.AbstractLengthPrependerClient;
+import com.datatorrent.netlet.DefaultEventLoop;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ *
+ */
+public class DTFlumeSinkTest
+{
+  static final String hostname = "localhost";
+  int port = 0;
+
+  @Test
+  @SuppressWarnings("SleepWhileInLoop")
+  public void testServer() throws InterruptedException, IOException
+  {
+    Discovery<byte[]> discovery = new Discovery<byte[]>()
+    {
+      @Override
+      public synchronized void unadvertise(Service<byte[]> service)
+      {
+        notify();
+      }
+
+      @Override
+      public synchronized void advertise(Service<byte[]> service)
+      {
+        port = service.getPort();
+        logger.debug("listening at {}", service);
+        notify();
+      }
+
+      @Override
+      @SuppressWarnings("unchecked")
+      public synchronized Collection<Service<byte[]>> discover()
+      {
+        try {
+          wait();
+        } catch (InterruptedException ie) {
+          throw new RuntimeException(ie);
+        }
+        return Collections.EMPTY_LIST;
+      }
+
+    };
+    DTFlumeSink sink = new DTFlumeSink();
+    sink.setName("TeskSink");
+    sink.setHostname(hostname);
+    sink.setPort(0);
+    sink.setAcceptedTolerance(2000);
+    sink.setChannel(new MemoryChannel());
+    sink.setDiscovery(discovery);
+    sink.start();
+    AbstractLengthPrependerClient client = new AbstractLengthPrependerClient()
+    {
+      private byte[] array;
+      private int offset = 2;
+
+      @Override
+      public void onMessage(byte[] buffer, int offset, int size)
+      {
+        Slice received = new Slice(buffer, offset, size);
+        logger.debug("Client Received = {}", received);
+        Assert.assertEquals(received,
+            new Slice(Arrays.copyOfRange(array, this.offset, array.length), 0, Server.Request.FIXED_SIZE));
+        synchronized (DTFlumeSinkTest.this) {
+          DTFlumeSinkTest.this.notify();
+        }
+      }
+
+      @Override
+      public void connected()
+      {
+        super.connected();
+        array = new byte[Server.Request.FIXED_SIZE + offset];
+        array[offset] = Server.Command.ECHO.getOrdinal();
+        array[offset + 1] = 1;
+        array[offset + 2] = 2;
+        array[offset + 3] = 3;
+        array[offset + 4] = 4;
+        array[offset + 5] = 5;
+        array[offset + 6] = 6;
+        array[offset + 7] = 7;
+        array[offset + 8] = 8;
+        Server.writeLong(array, offset + Server.Request.TIME_OFFSET, System.currentTimeMillis());
+        write(array, offset, Server.Request.FIXED_SIZE);
+      }
+
+    };
+
+    DefaultEventLoop eventloop = new DefaultEventLoop("Eventloop-TestClient");
+    eventloop.start();
+    discovery.discover();
+    try {
+      eventloop.connect(new InetSocketAddress(hostname, port), client);
+      try {
+        synchronized (this) {
+          this.wait();
+        }
+      } finally {
+        eventloop.disconnect(client);
+      }
+    } finally {
+      eventloop.stop();
+    }
+
+    sink.stop();
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(DTFlumeSinkTest.class);
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/sink/ServerTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/sink/ServerTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/sink/ServerTest.java
new file mode 100644
index 0000000..a893ebd
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/sink/ServerTest.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.sink;
+
+import java.util.Random;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ *
+ */
+public class ServerTest
+{
+  byte[] array;
+
+  public ServerTest()
+  {
+    array = new byte[1024];
+  }
+
+  @Test
+  public void testInt()
+  {
+    Server.writeInt(array, 0, Integer.MAX_VALUE);
+    Assert.assertEquals("Max Integer", Integer.MAX_VALUE, Server.readInt(array, 0));
+
+    Server.writeInt(array, 0, Integer.MIN_VALUE);
+    Assert.assertEquals("Min Integer", Integer.MIN_VALUE, Server.readInt(array, 0));
+
+    Server.writeInt(array, 0, 0);
+    Assert.assertEquals("Zero Integer", 0, Server.readInt(array, 0));
+
+    Random rand = new Random();
+    for (int i = 0; i < 128; i++) {
+      int n = rand.nextInt();
+      if (rand.nextBoolean()) {
+        n = -n;
+      }
+      Server.writeInt(array, 0, n);
+      Assert.assertEquals("Random Integer", n, Server.readInt(array, 0));
+    }
+  }
+
+  @Test
+  public void testLong()
+  {
+    Server.writeLong(array, 0, Integer.MAX_VALUE);
+    Assert.assertEquals("Max Integer", Integer.MAX_VALUE, Server.readLong(array, 0));
+
+    Server.writeLong(array, 0, Integer.MIN_VALUE);
+    Assert.assertEquals("Min Integer", Integer.MIN_VALUE, Server.readLong(array, 0));
+
+    Server.writeLong(array, 0, 0);
+    Assert.assertEquals("Zero Integer", 0L, Server.readLong(array, 0));
+
+    Server.writeLong(array, 0, Long.MAX_VALUE);
+    Assert.assertEquals("Max Long", Long.MAX_VALUE, Server.readLong(array, 0));
+
+    Server.writeLong(array, 0, Long.MIN_VALUE);
+    Assert.assertEquals("Min Long", Long.MIN_VALUE, Server.readLong(array, 0));
+
+    Server.writeLong(array, 0, 0L);
+    Assert.assertEquals("Zero Long", 0L, Server.readLong(array, 0));
+
+    Random rand = new Random();
+    for (int i = 0; i < 128; i++) {
+      long n = rand.nextLong();
+      if (rand.nextBoolean()) {
+        n = -n;
+      }
+      Server.writeLong(array, 0, n);
+      Assert.assertEquals("Random Long", n, Server.readLong(array, 0));
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStorageMatching.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStorageMatching.java b/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStorageMatching.java
new file mode 100644
index 0000000..4a714fe
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStorageMatching.java
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.storage;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.primitives.Ints;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ *
+ */
+public class HDFSStorageMatching
+{
+
+  public static void main(String[] args)
+  {
+    HDFSStorage storage = new HDFSStorage();
+    storage.setBaseDir(args[0]);
+    storage.setId(args[1]);
+    storage.setRestore(true);
+    storage.setup(null);
+    int count = 100000000;
+
+    logger.debug(" start time {}", System.currentTimeMillis());
+    int index = 10000;
+    byte[] b = Ints.toByteArray(index);
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    logger.debug(" end time {}", System.currentTimeMillis());
+    logger.debug(" start time for retrieve {}", System.currentTimeMillis());
+    b = storage.retrieve(new byte[8]);
+    int org_index = index;
+    index = 10000;
+    match(b, index);
+    while (true) {
+      index++;
+      b = storage.retrieveNext();
+      if (b == null) {
+        logger.debug(" end time for retrieve {}/{}/{}", System.currentTimeMillis(), index, org_index);
+        return;
+      } else {
+        if (!match(b, index)) {
+          throw new RuntimeException("failed : " + index);
+        }
+      }
+    }
+
+  }
+
+  public static boolean match(byte[] data, int match)
+  {
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    int dataR = Ints.fromByteArray(tempData);
+    //logger.debug("input: {}, output: {}",match,dataR);
+    if (match == dataR) {
+      return true;
+    }
+    return false;
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(HDFSStorageMatching.class);
+}
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStoragePerformance.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStoragePerformance.java b/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStoragePerformance.java
new file mode 100644
index 0000000..6ed3892
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStoragePerformance.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.storage;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ *
+ */
+public class HDFSStoragePerformance
+{
+
+  public static void main(String[] args)
+  {
+    HDFSStorage storage = new HDFSStorage();
+    storage.setBaseDir(".");
+    storage.setId("gaurav_flume_1");
+    storage.setRestore(true);
+    storage.setup(null);
+    int count = 1000000;
+
+    logger.debug(" start time {}", System.currentTimeMillis());
+    int index = 10000;
+    byte[] b = new byte[1024];
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+    }
+    storage.flush();
+    logger.debug(" end time {}", System.currentTimeMillis());
+    logger.debug(" start time for retrieve {}", System.currentTimeMillis());
+    storage.retrieve(new byte[8]);
+    String inputData = new String(b);
+    index = 1;
+    while (true) {
+      b = storage.retrieveNext();
+      if (b == null) {
+        logger.debug(" end time for retrieve {}", System.currentTimeMillis());
+        return;
+      } else {
+        if (!match(b, inputData)) {
+          throw new RuntimeException("failed : " + index);
+        }
+      }
+
+      index++;
+    }
+
+  }
+
+  public static boolean match(byte[] data, String match)
+  {
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+//    logger.debug("input: {}, output: {}",match,new String(tempData));
+    return (match.equals(new String(tempData)));
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(HDFSStoragePerformance.class);
+}
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/d200737b/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStoragePerformanceTest.java
----------------------------------------------------------------------
diff --git a/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStoragePerformanceTest.java b/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStoragePerformanceTest.java
new file mode 100644
index 0000000..72f03fc
--- /dev/null
+++ b/flume/src/test/java/org/apache/apex/malhar/flume/storage/HDFSStoragePerformanceTest.java
@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.apex.malhar.flume.storage;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.primitives.Ints;
+
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * <p>HDFSStoragePerformanceTest class.</p>
+ *
+ * @since 1.0.1
+ */
+public class HDFSStoragePerformanceTest
+{
+
+  public static void main(String[] args)
+  {
+    HDFSStorage storage = new HDFSStorage();
+    storage.setBaseDir(args[0]);
+    storage.setId(args[1]);
+    storage.setRestore(true);
+    storage.setup(null);
+    int count = 100000000;
+
+    logger.debug(" start time {}", System.currentTimeMillis());
+    int index = 10000;
+    byte[] b = Ints.toByteArray(index);
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    for (int i = 0; i < count; i++) {
+      storage.store(new Slice(b, 0, b.length));
+      index++;
+      b = Ints.toByteArray(index);
+    }
+    storage.flush();
+    logger.debug(" end time {}", System.currentTimeMillis());
+    logger.debug(" start time for retrieve {}", System.currentTimeMillis());
+    b = storage.retrieve(new byte[8]);
+    int org_index = index;
+    index = 10000;
+    match(b, index);
+    while (true) {
+      index++;
+      b = storage.retrieveNext();
+      if (b == null) {
+        logger.debug(" end time for retrieve {}/{}/{}", System.currentTimeMillis(), index, org_index);
+        return;
+      } else {
+        if (!match(b, index)) {
+          throw new RuntimeException("failed : " + index);
+        }
+      }
+    }
+
+  }
+
+  public static boolean match(byte[] data, int match)
+  {
+    byte[] tempData = new byte[data.length - 8];
+    System.arraycopy(data, 8, tempData, 0, tempData.length);
+    int dataR = Ints.fromByteArray(tempData);
+    //logger.debug("input: {}, output: {}",match,dataR);
+    if (match == dataR) {
+      return true;
+    }
+    return false;
+  }
+
+  private static final Logger logger = LoggerFactory.getLogger(HDFSStoragePerformanceTest.class);
+}
+