You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@apex.apache.org by th...@apache.org on 2017/03/27 15:19:08 UTC
[07/19] apex-malhar git commit: Added deduper example. Updated malhar
version to 3.5.0
Added deduper example.
Updated malhar version to 3.5.0
Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo
Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/45735280
Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/45735280
Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/45735280
Branch: refs/heads/master
Commit: 4573528050751bc4d9e440bcd7e7a08560956106
Parents: 63df474
Author: bhupeshchawda <bh...@gmail.com>
Authored: Mon Jul 25 19:02:17 2016 +0530
Committer: Lakshmi Prasanna Velineni <la...@datatorrent.com>
Committed: Sun Mar 26 11:43:48 2017 -0700
----------------------------------------------------------------------
examples/dedup/README.md | 16 ++
examples/dedup/XmlJavadocCommentsExtractor.xsl | 44 +++
examples/dedup/pom.xml | 280 +++++++++++++++++++
examples/dedup/src/assemble/appPackage.xml | 43 +++
.../java/com/example/dedup/Application.java | 123 ++++++++
.../src/main/resources/META-INF/properties.xml | 40 +++
.../java/com/example/dedup/ApplicationTest.java | 38 +++
.../dedup/src/test/resources/log4j.properties | 22 ++
8 files changed, 606 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/45735280/examples/dedup/README.md
----------------------------------------------------------------------
diff --git a/examples/dedup/README.md b/examples/dedup/README.md
new file mode 100644
index 0000000..207a0c4
--- /dev/null
+++ b/examples/dedup/README.md
@@ -0,0 +1,16 @@
+This sample application shows how to use the Dedup operator for de-duplicating in a stream of incoming data.
+The operators in the application are as follows:
+1. Random data generator (RandomGenerator) which emits POJO tuples as records
+2. Dedup operator (Deduper) which accepts the POJO tuples and identifies unique and duplicate tuples.
+3. Console operator (ConsoleUnique) for unique tuples
+4. Console operator (ConsoleDuplicate) for duplicate tuples
+5. Console operator (ConsoleExpired) for expired tuples
+
+The following properties are configured for using the Application:
+1. ```dt.application.DedupExample.operator.RandomGenerator.prop.tuplesPerWindow``` - This is a limit on the number of tuples that will be generated by the Random Generator operator.
+2. ```dt.application.DedupExample.operator.Deduper.prop.keyExpression``` - This is the pseudo java expression for deriving the key fields from the incoming POJO.
+3. ```dt.application.DedupExample.operator.Deduper.prop.timeExpression``` - This is the pseudo java expression for deriving the time field in the incoming POJO. In case, ```timeExpression``` is not specified, then the System time is used to compute the expiration for the tuples.
+4. ```dt.application.DedupExample.operator.Deduper.prop.expireBefore``` - The expiry time for incoming tuples in seconds. The keys in the system expire after every ```expireBefore``` seconds.
+5. ```dt.application.DedupExample.operator.Deduper.prop.bucketSpan``` - The span of a single expiry bucket. When an expiry time elapses, the bucket as a whole is discarded from the system. This can be set keeping in mind the largest unit that can be discarded. For example, if ```expireBefore``` is set to 1 hour, and we are getting new data per minuite, it would make sense to set the ```bucketSpan``` to 1 minute or 5 minutes.
+
+Example values for these parameters have been specified in src/main/resources/META-INF/properties.xml.
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/45735280/examples/dedup/XmlJavadocCommentsExtractor.xsl
----------------------------------------------------------------------
diff --git a/examples/dedup/XmlJavadocCommentsExtractor.xsl b/examples/dedup/XmlJavadocCommentsExtractor.xsl
new file mode 100644
index 0000000..08075a9
--- /dev/null
+++ b/examples/dedup/XmlJavadocCommentsExtractor.xsl
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+
+<!--
+ Document : XmlJavadocCommentsExtractor.xsl
+ Created on : September 16, 2014, 11:30 AM
+ Description:
+ The transformation strips off all information except for comments and tags from xml javadoc generated by xml-doclet.
+-->
+
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+ <xsl:output method="xml" standalone="yes"/>
+
+ <!-- copy xml by selecting only the following nodes, attributes and text -->
+ <xsl:template match="node()|text()|@*">
+ <xsl:copy>
+ <xsl:apply-templates select="root|package|class|interface|method|field|type|comment|tag|text()|@name|@qualified|@text"/>
+ </xsl:copy>
+ </xsl:template>
+
+ <!-- Strip off the following paths from the selected xml -->
+ <xsl:template match="//root/package/interface/interface
+ |//root/package/interface/method/@qualified
+ |//root/package/class/interface
+ |//root/package/class/class
+ |//root/package/class/method/@qualified
+ |//root/package/class/field/@qualified" />
+
+ <xsl:strip-space elements="*"/>
+</xsl:stylesheet>
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/45735280/examples/dedup/pom.xml
----------------------------------------------------------------------
diff --git a/examples/dedup/pom.xml b/examples/dedup/pom.xml
new file mode 100644
index 0000000..f777784
--- /dev/null
+++ b/examples/dedup/pom.xml
@@ -0,0 +1,280 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <groupId>com.example</groupId>
+ <version>1.0-SNAPSHOT</version>
+ <artifactId>dedup</artifactId>
+ <packaging>jar</packaging>
+
+ <!-- change these to the appropriate values -->
+ <name>My Apex Application</name>
+ <description>My Apex Application Description</description>
+
+ <properties>
+ <!-- change this if you desire to use a different version of Apex Core -->
+ <apex.version>3.5.0</apex.version>
+ <malhar.version>3.6.0</malhar.version>
+ <apex.apppackage.classpath>lib/*.jar</apex.apppackage.classpath>
+ </properties>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-eclipse-plugin</artifactId>
+ <version>2.9</version>
+ <configuration>
+ <downloadSources>true</downloadSources>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>3.3</version>
+ <configuration>
+ <encoding>UTF-8</encoding>
+ <source>1.7</source>
+ <target>1.7</target>
+ <debug>true</debug>
+ <optimize>false</optimize>
+ <showDeprecation>true</showDeprecation>
+ <showWarnings>true</showWarnings>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>2.8</version>
+ <executions>
+ <execution>
+ <id>copy-dependencies</id>
+ <phase>prepare-package</phase>
+ <goals>
+ <goal>copy-dependencies</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>target/deps</outputDirectory>
+ <includeScope>runtime</includeScope>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>app-package-assembly</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ <configuration>
+ <finalName>${project.artifactId}-${project.version}-apexapp</finalName>
+ <appendAssemblyId>false</appendAssemblyId>
+ <descriptors>
+ <descriptor>src/assemble/appPackage.xml</descriptor>
+ </descriptors>
+ <archiverConfig>
+ <defaultDirectoryMode>0755</defaultDirectoryMode>
+ </archiverConfig>
+ <archive>
+ <manifestEntries>
+ <Class-Path>${apex.apppackage.classpath}</Class-Path>
+ <DT-Engine-Version>${apex.version}</DT-Engine-Version>
+ <DT-App-Package-Group-Id>${project.groupId}</DT-App-Package-Group-Id>
+ <DT-App-Package-Name>${project.artifactId}</DT-App-Package-Name>
+ <DT-App-Package-Version>${project.version}</DT-App-Package-Version>
+ <DT-App-Package-Display-Name>${project.name}</DT-App-Package-Display-Name>
+ <DT-App-Package-Description>${project.description}</DT-App-Package-Description>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <artifactId>maven-antrun-plugin</artifactId>
+ <version>1.7</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <configuration>
+ <target>
+ <move file="${project.build.directory}/${project.artifactId}-${project.version}-apexapp.jar"
+ tofile="${project.build.directory}/${project.artifactId}-${project.version}.apa" />
+ </target>
+ </configuration>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ </execution>
+ <execution>
+ <!-- create resource directory for xml javadoc-->
+ <id>createJavadocDirectory</id>
+ <phase>generate-resources</phase>
+ <configuration>
+ <tasks>
+ <delete dir="${project.build.directory}/generated-resources/xml-javadoc"/>
+ <mkdir dir="${project.build.directory}/generated-resources/xml-javadoc"/>
+ </tasks>
+ </configuration>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ <version>1.9.1</version>
+ <executions>
+ <execution>
+ <id>attach-artifacts</id>
+ <phase>package</phase>
+ <goals>
+ <goal>attach-artifact</goal>
+ </goals>
+ <configuration>
+ <artifacts>
+ <artifact>
+ <file>target/${project.artifactId}-${project.version}.apa</file>
+ <type>apa</type>
+ </artifact>
+ </artifacts>
+ <skipAttach>false</skipAttach>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <!-- generate javdoc -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <executions>
+ <!-- generate xml javadoc -->
+ <execution>
+ <id>xml-doclet</id>
+ <phase>generate-resources</phase>
+ <goals>
+ <goal>javadoc</goal>
+ </goals>
+ <configuration>
+ <doclet>com.github.markusbernhardt.xmldoclet.XmlDoclet</doclet>
+ <additionalparam>-d ${project.build.directory}/generated-resources/xml-javadoc -filename ${project.artifactId}-${project.version}-javadoc.xml</additionalparam>
+ <useStandardDocletOptions>false</useStandardDocletOptions>
+ <docletArtifact>
+ <groupId>com.github.markusbernhardt</groupId>
+ <artifactId>xml-doclet</artifactId>
+ <version>1.0.4</version>
+ </docletArtifact>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <!-- Transform xml javadoc to stripped down version containing only class/interface comments and tags-->
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>xml-maven-plugin</artifactId>
+ <version>1.0</version>
+ <executions>
+ <execution>
+ <id>transform-xmljavadoc</id>
+ <phase>generate-resources</phase>
+ <goals>
+ <goal>transform</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <transformationSets>
+ <transformationSet>
+ <dir>${project.build.directory}/generated-resources/xml-javadoc</dir>
+ <includes>
+ <include>${project.artifactId}-${project.version}-javadoc.xml</include>
+ </includes>
+ <stylesheet>XmlJavadocCommentsExtractor.xsl</stylesheet>
+ <outputDir>${project.build.directory}/generated-resources/xml-javadoc</outputDir>
+ </transformationSet>
+ </transformationSets>
+ </configuration>
+ </plugin>
+ <!-- copy xml javadoc to class jar -->
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <version>2.6</version>
+ <executions>
+ <execution>
+ <id>copy-resources</id>
+ <phase>process-resources</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>${basedir}/target/classes</outputDirectory>
+ <resources>
+ <resource>
+ <directory>${project.build.directory}/generated-resources/xml-javadoc</directory>
+ <includes>
+ <include>${project.artifactId}-${project.version}-javadoc.xml</include>
+ </includes>
+ <filtering>true</filtering>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ </plugins>
+
+ </build>
+
+ <dependencies>
+ <!-- add your dependencies here -->
+ <dependency>
+ <groupId>org.apache.apex</groupId>
+ <artifactId>malhar-library</artifactId>
+ <version>${malhar.version}</version>
+ <!--
+ If you know that your application does not need transitive dependencies pulled in by malhar-library,
+ uncomment the following to reduce the size of your app package.
+ -->
+ <!--
+ <exclusions>
+ <exclusion>
+ <groupId>*</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ -->
+ </dependency>
+ <dependency>
+ <groupId>org.apache.apex</groupId>
+ <artifactId>apex-common</artifactId>
+ <version>${apex.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.10</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.apex</groupId>
+ <artifactId>apex-engine</artifactId>
+ <version>${apex.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.janino</groupId>
+ <artifactId>janino</artifactId>
+ <version>2.7.8</version>
+ </dependency>
+ </dependencies>
+
+</project>
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/45735280/examples/dedup/src/assemble/appPackage.xml
----------------------------------------------------------------------
diff --git a/examples/dedup/src/assemble/appPackage.xml b/examples/dedup/src/assemble/appPackage.xml
new file mode 100644
index 0000000..7ad071c
--- /dev/null
+++ b/examples/dedup/src/assemble/appPackage.xml
@@ -0,0 +1,43 @@
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
+ <id>appPackage</id>
+ <formats>
+ <format>jar</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+ <fileSets>
+ <fileSet>
+ <directory>${basedir}/target/</directory>
+ <outputDirectory>/app</outputDirectory>
+ <includes>
+ <include>${project.artifactId}-${project.version}.jar</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <directory>${basedir}/target/deps</directory>
+ <outputDirectory>/lib</outputDirectory>
+ </fileSet>
+ <fileSet>
+ <directory>${basedir}/src/site/conf</directory>
+ <outputDirectory>/conf</outputDirectory>
+ <includes>
+ <include>*.xml</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <directory>${basedir}/src/main/resources/META-INF</directory>
+ <outputDirectory>/META-INF</outputDirectory>
+ </fileSet>
+ <fileSet>
+ <directory>${basedir}/src/main/resources/app</directory>
+ <outputDirectory>/app</outputDirectory>
+ </fileSet>
+ <fileSet>
+ <directory>${basedir}/src/main/resources/resources</directory>
+ <outputDirectory>/resources</outputDirectory>
+ </fileSet>
+ </fileSets>
+
+</assembly>
+
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/45735280/examples/dedup/src/main/java/com/example/dedup/Application.java
----------------------------------------------------------------------
diff --git a/examples/dedup/src/main/java/com/example/dedup/Application.java b/examples/dedup/src/main/java/com/example/dedup/Application.java
new file mode 100644
index 0000000..cabdce2
--- /dev/null
+++ b/examples/dedup/src/main/java/com/example/dedup/Application.java
@@ -0,0 +1,123 @@
+/**
+ * Put your copyright and license info here.
+ */
+package com.example.dedup;
+
+import java.util.Date;
+import java.util.Random;
+
+import org.apache.apex.malhar.lib.dedup.TimeBasedDedupOperator;
+import org.apache.hadoop.conf.Configuration;
+
+import com.datatorrent.api.annotation.ApplicationAnnotation;
+import com.datatorrent.common.partitioner.StatelessPartitioner;
+import com.datatorrent.common.util.BaseOperator;
+import com.datatorrent.lib.io.ConsoleOutputOperator;
+import com.datatorrent.api.StreamingApplication;
+import com.datatorrent.api.Context;
+import com.datatorrent.api.DAG;
+import com.datatorrent.api.DefaultOutputPort;
+import com.datatorrent.api.InputOperator;
+
+@ApplicationAnnotation(name="DedupExample")
+public class Application implements StreamingApplication
+{
+
+ @Override
+ public void populateDAG(DAG dag, Configuration conf)
+ {
+ // Test Data Generator Operator
+ RandomDataGeneratorOperator gen = dag.addOperator("RandomGenerator", new RandomDataGeneratorOperator());
+
+ // Dedup Operator. Configuration through resources/META-INF/properties.xml
+ TimeBasedDedupOperator dedup = dag.addOperator("Deduper", new TimeBasedDedupOperator());
+
+ // Console output operator for unique tuples
+ ConsoleOutputOperator consoleUnique = dag.addOperator("ConsoleUnique", new ConsoleOutputOperator());
+
+ // Console output operator for duplicate tuples
+ ConsoleOutputOperator consoleDuplicate = dag.addOperator("ConsoleDuplicate", new ConsoleOutputOperator());
+
+ // Console output operator for duplicate tuples
+ ConsoleOutputOperator consoleExpired = dag.addOperator("ConsoleExpired", new ConsoleOutputOperator());
+
+ // Streams
+ dag.addStream("Generator to Dedup", gen.output, dedup.input);
+
+ // Connect Dedup unique to Console
+ dag.addStream("Dedup Unique to Console", dedup.unique, consoleUnique.input);
+ // Connect Dedup duplicate to Console
+ dag.addStream("Dedup Duplicate to Console", dedup.duplicate, consoleDuplicate.input);
+ // Connect Dedup expired to Console
+ dag.addStream("Dedup Expired to Console", dedup.expired, consoleExpired.input);
+
+ // Set Attribute TUPLE_CLASS for supplying schema information to the port
+ dag.setInputPortAttribute(dedup.input, Context.PortContext.TUPLE_CLASS, TestEvent.class);
+
+ // Uncomment the following line to create multiple partitions for Dedup operator. In this case: 2
+ // dag.setAttribute(dedup, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<TimeBasedDedupOperator>(2));
+ }
+
+ public static class RandomDataGeneratorOperator extends BaseOperator implements InputOperator
+ {
+
+ public final transient DefaultOutputPort<TestEvent> output = new DefaultOutputPort<>();
+ private final transient Random r = new Random();
+ private int tuplesPerWindow = 100;
+ private transient int count = 0;
+
+ @Override
+ public void beginWindow(long windowId) {
+ count = 0;
+ }
+
+ @Override
+ public void emitTuples()
+ {
+ if (count++ > tuplesPerWindow) {
+ return;
+ }
+ TestEvent event = new TestEvent();
+ event.id = r.nextInt(100);
+ event.eventTime = new Date(System.currentTimeMillis() - (r.nextInt(60 * 1000)));
+ output.emit(event);
+ }
+ }
+
+ public static class TestEvent
+ {
+ private int id;
+ private Date eventTime;
+
+ public TestEvent()
+ {
+ }
+
+ public int getId()
+ {
+ return id;
+ }
+
+ public void setId(int id)
+ {
+ this.id = id;
+ }
+
+ public Date getEventTime()
+ {
+ return eventTime;
+ }
+
+ public void setEventTime(Date eventTime)
+ {
+ this.eventTime = eventTime;
+ }
+
+ @Override
+ public String toString() {
+ return "TestEvent [id=" + id + ", eventTime=" + eventTime + "]";
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/45735280/examples/dedup/src/main/resources/META-INF/properties.xml
----------------------------------------------------------------------
diff --git a/examples/dedup/src/main/resources/META-INF/properties.xml b/examples/dedup/src/main/resources/META-INF/properties.xml
new file mode 100644
index 0000000..ffe3350
--- /dev/null
+++ b/examples/dedup/src/main/resources/META-INF/properties.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<configuration>
+ <property>
+ <name>dt.application.DedupExample.operator.RandomGenerator.prop.tuplesPerWindow</name>
+ <value>100</value>
+ </property>
+ <property>
+ <name>dt.application.DedupExample.operator.Deduper.prop.keyExpression</name>
+ <value>id</value>
+ </property>
+ <property>
+ <name>dt.application.DedupExample.operator.Deduper.prop.timeExpression</name>
+ <value>eventTime.getTime()</value>
+ </property>
+ <property>
+ <name>dt.application.DedupExample.operator.Deduper.prop.bucketSpan</name>
+ <value>10</value>
+ </property>
+ <property>
+ <name>dt.application.DedupExample.operator.Deduper.prop.expireBefore</name>
+ <value>60</value>
+ </property>
+ <property>
+ <name>dt.application.DedupExample.operator.ConsoleUnique.prop.stringFormat</name>
+ <value>Unique: %s</value>
+ </property>
+ <property>
+ <name>dt.application.DedupExample.operator.ConsoleDuplicate.prop.stringFormat</name>
+ <value>Duplicate: %s</value>
+ </property>
+ <property>
+ <name>dt.application.DedupExample.operator.ConsoleExpired.prop.stringFormat</name>
+ <value>Expired: %s</value>
+ </property>
+ <property>
+ <name>dt.application.DedupExample.operator.*.attr.MEMORY_MB</name>
+ <value>512</value>
+ </property>
+</configuration>
+
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/45735280/examples/dedup/src/test/java/com/example/dedup/ApplicationTest.java
----------------------------------------------------------------------
diff --git a/examples/dedup/src/test/java/com/example/dedup/ApplicationTest.java b/examples/dedup/src/test/java/com/example/dedup/ApplicationTest.java
new file mode 100644
index 0000000..9c9f17c
--- /dev/null
+++ b/examples/dedup/src/test/java/com/example/dedup/ApplicationTest.java
@@ -0,0 +1,38 @@
+/**
+ * Put your copyright and license info here.
+ */
+package com.example.dedup;
+
+import java.io.IOException;
+
+import javax.validation.ConstraintViolationException;
+
+import org.junit.Assert;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import com.datatorrent.api.LocalMode;
+import com.example.dedup.Application;
+
+/**
+ * Test the DAG declaration in local mode.
+ */
+public class ApplicationTest {
+
+ @Test
+ public void testApplication() throws IOException, Exception {
+ try {
+ LocalMode lma = LocalMode.newInstance();
+ Configuration conf = new Configuration(false);
+ conf.addResource(this.getClass().getResourceAsStream("/META-INF/properties.xml"));
+ lma.prepareDAG(new Application(), conf);
+ LocalMode.Controller lc = lma.getController();
+ lc.runAsync();
+ Thread.sleep(10 * 1000);
+ lc.shutdown();
+ } catch (ConstraintViolationException e) {
+ Assert.fail("constraint violations: " + e.getConstraintViolations());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/45735280/examples/dedup/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/examples/dedup/src/test/resources/log4j.properties b/examples/dedup/src/test/resources/log4j.properties
new file mode 100644
index 0000000..98544e8
--- /dev/null
+++ b/examples/dedup/src/test/resources/log4j.properties
@@ -0,0 +1,22 @@
+log4j.rootLogger=DEBUG,CONSOLE
+
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n
+
+log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n
+log4j.appender.RFA.File=/tmp/app.log
+
+# to enable, add SYSLOG to rootLogger
+log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender
+log4j.appender.SYSLOG.syslogHost=127.0.0.1
+log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout
+log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n
+log4j.appender.SYSLOG.Facility=LOCAL1
+
+#log4j.logger.org.apache.commons.beanutils=warn
+log4j.logger.com.datatorrent=debug
+log4j.logger.org.apache.apex=debug
+log4j.logger.org=info