You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@apex.apache.org by th...@apache.org on 2017/05/19 18:09:21 UTC

[2/4] apex-malhar git commit: SPOI-8848 S3 to HDFS example app. SPOI-10927 Example app s3 tuple output module.

SPOI-8848 S3 to HDFS example app.
SPOI-10927 Example app s3 tuple output module.


Project: http://git-wip-us.apache.org/repos/asf/apex-malhar/repo
Commit: http://git-wip-us.apache.org/repos/asf/apex-malhar/commit/6a617f9b
Tree: http://git-wip-us.apache.org/repos/asf/apex-malhar/tree/6a617f9b
Diff: http://git-wip-us.apache.org/repos/asf/apex-malhar/diff/6a617f9b

Branch: refs/heads/master
Commit: 6a617f9ba7d6bebb74dbe95964729a74bdb542e5
Parents: 118a75f
Author: yogidevendra <de...@datatorrent.com>
Authored: Tue Apr 18 21:48:43 2017 -0700
Committer: Lakshmi Prasanna Velineni <la...@datatorrent.com>
Committed: Thu May 18 16:53:50 2017 -0700

----------------------------------------------------------------------
 examples/s3-to-hdfs-sync/README.md              |   5 +
 .../XmlJavadocCommentsExtractor.xsl             |  28 ++
 examples/s3-to-hdfs-sync/pom.xml                | 279 +++++++++++++++++++
 .../s3-to-hdfs-sync/src/assemble/appPackage.xml |  43 +++
 .../s3input/S3ToHDFSSyncApplication.java        |  32 +++
 .../src/main/resources/META-INF/properties.xml  |  12 +
 .../src/test/resources/log4j.properties         |  21 ++
 examples/s3-tuple-output/README.md              |  19 ++
 .../XmlJavadocCommentsExtractor.xsl             |  28 ++
 examples/s3-tuple-output/pom.xml                | 276 ++++++++++++++++++
 .../s3-tuple-output/src/assemble/appPackage.xml |  43 +++
 .../tutorials/s3output/Application.java         |  27 ++
 .../src/main/resources/META-INF/properties.xml  |  47 ++++
 .../src/test/resources/log4j.properties         |  21 ++
 14 files changed, 881 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/README.md
----------------------------------------------------------------------
diff --git a/examples/s3-to-hdfs-sync/README.md b/examples/s3-to-hdfs-sync/README.md
new file mode 100644
index 0000000..cee5baf
--- /dev/null
+++ b/examples/s3-to-hdfs-sync/README.md
@@ -0,0 +1,5 @@
+# Amazon S3 to HDFS sync application
+Ingest and backup Amazon S3 data to Hadoop HDFS for data download from Amazon to hadoop. 
+  This application transfers files from the configured S3 location to the destination path in HDFS.
+  The source code is available at: https://github.com/DataTorrent/examples/tree/master/tutorials/s3-to-hdfs-sync
+  Send feedback or feature requests to feedback@datatorrent.com
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl
----------------------------------------------------------------------
diff --git a/examples/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl b/examples/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl
new file mode 100644
index 0000000..1ddbbcc
--- /dev/null
+++ b/examples/s3-to-hdfs-sync/XmlJavadocCommentsExtractor.xsl
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Document   : XmlJavadocCommentsExtractor.xsl
+    Created on : September 16, 2014, 11:30 AM
+    Description:
+        The transformation strips off all information except for comments and tags from xml javadoc generated by xml-doclet.
+-->
+
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+  <xsl:output method="xml" standalone="yes"/>
+
+  <!-- copy xml by selecting only the following nodes, attributes and text -->
+  <xsl:template match="node()|text()|@*">
+    <xsl:copy>
+      <xsl:apply-templates select="root|package|class|interface|method|field|type|comment|tag|text()|@name|@qualified|@text"/>
+    </xsl:copy>
+  </xsl:template>
+
+  <!-- Strip off the following paths from the selected xml -->
+  <xsl:template match="//root/package/interface/interface
+                      |//root/package/interface/method/@qualified
+                      |//root/package/class/interface
+                      |//root/package/class/class
+                      |//root/package/class/method/@qualified
+                      |//root/package/class/field/@qualified" />
+
+  <xsl:strip-space elements="*"/>
+</xsl:stylesheet>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/pom.xml
----------------------------------------------------------------------
diff --git a/examples/s3-to-hdfs-sync/pom.xml b/examples/s3-to-hdfs-sync/pom.xml
new file mode 100644
index 0000000..55d98de
--- /dev/null
+++ b/examples/s3-to-hdfs-sync/pom.xml
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  
+  <groupId>com.datatorrent.apps</groupId>
+  <version>0.8</version>
+  <artifactId>s3-to-hdfs-sync</artifactId>
+  <packaging>jar</packaging>
+
+  <!-- change these to the appropriate values -->
+  <name>S3 to HDFS Sync App</name>
+  <description>Ingest and backup Amazon S3 data to Hadoop HDFS for data download from Amazon to hadoop. 
+  This application transfers files from the configured S3 location to the destination path in HDFS.
+  The source code is available at: https://github.com/DataTorrent/examples/tree/master/tutorials/s3-to-hdfs-sync
+  Send feedback or feature requests to feedback@datatorrent.com</description>
+
+  <properties>
+    <!-- skip tests by default as they depend on external setup -->
+    <skipTests>true</skipTests>
+    <!-- change this if you desire to use a different version of Apex Core -->
+    <apex.version>3.5.0</apex.version>
+    <malhar.version>3.6.0</malhar.version>
+    <apex.apppackage.classpath>lib/*.jar</apex.apppackage.classpath>
+  </properties>
+
+  <build>
+    <plugins>
+       <plugin>
+         <groupId>org.apache.maven.plugins</groupId>
+         <artifactId>maven-eclipse-plugin</artifactId>
+         <version>2.9</version>
+         <configuration>
+           <downloadSources>true</downloadSources>
+         </configuration>
+       </plugin>
+       <plugin>
+         <artifactId>maven-compiler-plugin</artifactId>
+         <version>3.3</version>
+         <configuration>
+           <encoding>UTF-8</encoding>
+           <source>1.7</source>
+           <target>1.7</target>
+           <debug>true</debug>
+           <optimize>false</optimize>
+           <showDeprecation>true</showDeprecation>
+           <showWarnings>true</showWarnings>
+         </configuration>
+       </plugin>
+       <plugin>
+         <artifactId>maven-dependency-plugin</artifactId>
+         <version>2.8</version>
+         <executions>
+           <execution>
+             <id>copy-dependencies</id>
+             <phase>prepare-package</phase>
+             <goals>
+               <goal>copy-dependencies</goal>
+             </goals>
+             <configuration>
+               <outputDirectory>target/deps</outputDirectory>
+               <includeScope>runtime</includeScope>
+             </configuration>
+           </execution>
+         </executions>
+       </plugin>
+
+       <plugin>
+         <artifactId>maven-assembly-plugin</artifactId>
+         <executions>
+           <execution>
+             <id>app-package-assembly</id>
+             <phase>package</phase>
+             <goals>
+               <goal>single</goal>
+             </goals>
+             <configuration>
+               <finalName>${project.artifactId}-${project.version}-apexapp</finalName>
+               <appendAssemblyId>false</appendAssemblyId>
+               <descriptors>
+                 <descriptor>src/assemble/appPackage.xml</descriptor>
+               </descriptors>
+               <archiverConfig>
+                 <defaultDirectoryMode>0755</defaultDirectoryMode>
+               </archiverConfig>                  
+               <archive>
+                 <manifestEntries>
+                   <Class-Path>${apex.apppackage.classpath}</Class-Path>
+                   <DT-Engine-Version>${apex.version}</DT-Engine-Version>
+                   <DT-App-Package-Group-Id>${project.groupId}</DT-App-Package-Group-Id>
+                   <DT-App-Package-Name>${project.artifactId}</DT-App-Package-Name>
+                   <DT-App-Package-Version>${project.version}</DT-App-Package-Version>
+                   <DT-App-Package-Display-Name>${project.name}</DT-App-Package-Display-Name>
+                   <DT-App-Package-Description>${project.description}</DT-App-Package-Description>
+                 </manifestEntries>
+               </archive>
+             </configuration>
+           </execution>
+         </executions>
+       </plugin>
+
+       <plugin>
+         <artifactId>maven-antrun-plugin</artifactId>
+         <version>1.7</version>
+         <executions>
+           <execution>
+             <phase>package</phase>
+             <configuration>
+               <target>
+                 <move file="${project.build.directory}/${project.artifactId}-${project.version}-apexapp.jar"
+                       tofile="${project.build.directory}/${project.artifactId}-${project.version}.apa" />
+               </target>
+             </configuration>
+             <goals>
+               <goal>run</goal>
+             </goals>
+           </execution>
+           <execution>
+             <!-- create resource directory for xml javadoc-->
+             <id>createJavadocDirectory</id>
+             <phase>generate-resources</phase>
+             <configuration>
+               <tasks>
+                 <delete dir="${project.build.directory}/generated-resources/xml-javadoc"/>
+                 <mkdir dir="${project.build.directory}/generated-resources/xml-javadoc"/>
+               </tasks>
+             </configuration>
+             <goals>
+               <goal>run</goal>
+             </goals>
+           </execution>
+         </executions>
+       </plugin>
+
+       <plugin>
+         <groupId>org.codehaus.mojo</groupId>
+         <artifactId>build-helper-maven-plugin</artifactId>
+         <version>1.9.1</version>
+         <executions>
+           <execution>
+             <id>attach-artifacts</id>
+             <phase>package</phase>
+             <goals>
+               <goal>attach-artifact</goal>
+             </goals>
+             <configuration>
+               <artifacts>
+                 <artifact>
+                   <file>target/${project.artifactId}-${project.version}.apa</file>
+                   <type>apa</type>
+                 </artifact>
+               </artifacts>
+               <skipAttach>false</skipAttach>
+             </configuration>
+           </execution>
+         </executions>
+       </plugin>
+
+      <!-- generate javdoc -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <executions>
+          <!-- generate xml javadoc -->
+          <execution>
+            <id>xml-doclet</id>
+            <phase>generate-resources</phase>
+            <goals>
+              <goal>javadoc</goal>
+            </goals>
+            <configuration>
+              <doclet>com.github.markusbernhardt.xmldoclet.XmlDoclet</doclet>
+              <additionalparam>-d ${project.build.directory}/generated-resources/xml-javadoc -filename ${project.artifactId}-${project.version}-javadoc.xml</additionalparam>
+              <useStandardDocletOptions>false</useStandardDocletOptions>
+              <docletArtifact>
+                <groupId>com.github.markusbernhardt</groupId>
+                <artifactId>xml-doclet</artifactId>
+                <version>1.0.4</version>
+              </docletArtifact>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <!-- Transform xml javadoc to stripped down version containing only class/interface comments and tags-->
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>xml-maven-plugin</artifactId>
+        <version>1.0</version>
+        <executions>
+          <execution>
+            <id>transform-xmljavadoc</id>
+            <phase>generate-resources</phase>
+            <goals>
+              <goal>transform</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <transformationSets>
+            <transformationSet>
+              <dir>${project.build.directory}/generated-resources/xml-javadoc</dir>
+              <includes>
+                <include>${project.artifactId}-${project.version}-javadoc.xml</include>
+              </includes>
+              <stylesheet>XmlJavadocCommentsExtractor.xsl</stylesheet>
+              <outputDir>${project.build.directory}/generated-resources/xml-javadoc</outputDir>
+            </transformationSet>
+          </transformationSets>
+        </configuration>
+      </plugin>
+      <!-- copy xml javadoc to class jar -->
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+        <version>2.6</version>
+        <executions>
+          <execution>
+            <id>copy-resources</id>
+            <phase>process-resources</phase>
+            <goals>
+              <goal>copy-resources</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>${basedir}/target/classes</outputDirectory>
+              <resources>
+                <resource>
+                  <directory>${project.build.directory}/generated-resources/xml-javadoc</directory>
+                  <includes>
+                    <include>${project.artifactId}-${project.version}-javadoc.xml</include>
+                  </includes>
+                  <filtering>true</filtering>
+                </resource>
+              </resources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+    </plugins>
+
+  </build>
+
+  <dependencies>
+    <!-- add your dependencies here -->
+    <dependency>
+      <groupId>org.apache.apex</groupId>
+      <artifactId>malhar-library</artifactId>
+      <version>${malhar.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>*</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>aws-java-sdk-s3</artifactId>
+      <version>1.10.73</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.apex</groupId>
+      <artifactId>apex-common</artifactId>
+      <version>${apex.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.10</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.apex</groupId>
+      <artifactId>apex-engine</artifactId>
+      <version>${apex.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+</project>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/src/assemble/appPackage.xml
----------------------------------------------------------------------
diff --git a/examples/s3-to-hdfs-sync/src/assemble/appPackage.xml b/examples/s3-to-hdfs-sync/src/assemble/appPackage.xml
new file mode 100644
index 0000000..7ad071c
--- /dev/null
+++ b/examples/s3-to-hdfs-sync/src/assemble/appPackage.xml
@@ -0,0 +1,43 @@
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
+  <id>appPackage</id>
+  <formats>
+    <format>jar</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+  <fileSets>
+    <fileSet>
+      <directory>${basedir}/target/</directory>
+      <outputDirectory>/app</outputDirectory>
+      <includes>
+        <include>${project.artifactId}-${project.version}.jar</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/target/deps</directory>
+      <outputDirectory>/lib</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/site/conf</directory>
+      <outputDirectory>/conf</outputDirectory>
+      <includes>
+        <include>*.xml</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/resources/META-INF</directory>
+      <outputDirectory>/META-INF</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/resources/app</directory>
+      <outputDirectory>/app</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/resources/resources</directory>
+      <outputDirectory>/resources</outputDirectory>
+    </fileSet>
+  </fileSets>
+
+</assembly>
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java
----------------------------------------------------------------------
diff --git a/examples/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java b/examples/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java
new file mode 100644
index 0000000..dae9c4a
--- /dev/null
+++ b/examples/s3-to-hdfs-sync/src/main/java/com/datatorrent/tutorial/s3input/S3ToHDFSSyncApplication.java
@@ -0,0 +1,32 @@
+package com.datatorrent.tutorial.s3input;
+
+import org.apache.hadoop.conf.Configuration;
+
+import com.datatorrent.api.DAG;
+import com.datatorrent.api.DAG.Locality;
+import com.datatorrent.api.StreamingApplication;
+import com.datatorrent.api.annotation.ApplicationAnnotation;
+import com.datatorrent.lib.io.fs.HDFSFileCopyModule;
+import com.datatorrent.lib.io.fs.S3InputModule;
+
+/**
+ * Simple application illustrating file copy from S3
+ */
+@ApplicationAnnotation(name="S3-to-HDFS-Sync")
+public class S3ToHDFSSyncApplication implements StreamingApplication
+{
+
+  @Override
+  public void populateDAG(DAG dag, Configuration conf)
+  {
+
+    S3InputModule inputModule = dag.addModule("S3InputModule", new S3InputModule());
+    HDFSFileCopyModule outputModule = dag.addModule("HDFSFileCopyModule", new HDFSFileCopyModule());
+
+    dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput);
+    dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput)
+        .setLocality(Locality.THREAD_LOCAL);
+    dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(Locality.THREAD_LOCAL);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml
----------------------------------------------------------------------
diff --git a/examples/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml b/examples/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml
new file mode 100644
index 0000000..6d8ecea
--- /dev/null
+++ b/examples/s3-to-hdfs-sync/src/main/resources/META-INF/properties.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<configuration>
+  <property>
+    <name>dt.operator.S3InputModule.prop.files</name>
+     <value>s3n://ACCESS_KEY_ID:SECRET_KEY@BUCKET_NAME/DIRECTORY</value>
+  </property>
+  <property>
+    <name>dt.operator.HDFSFileCopyModule.prop.outputDirectoryPath</name>
+    <value>hdfs://destination-namenode-service:port/path-to-output-directory</value>
+  </property>
+</configuration>
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-to-hdfs-sync/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/examples/s3-to-hdfs-sync/src/test/resources/log4j.properties b/examples/s3-to-hdfs-sync/src/test/resources/log4j.properties
new file mode 100644
index 0000000..3bfcdc5
--- /dev/null
+++ b/examples/s3-to-hdfs-sync/src/test/resources/log4j.properties
@@ -0,0 +1,21 @@
+log4j.rootLogger=DEBUG,CONSOLE
+
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n
+
+log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n
+log4j.appender.RFA.File=/tmp/app.log
+
+# to enable, add SYSLOG to rootLogger
+log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender
+log4j.appender.SYSLOG.syslogHost=127.0.0.1
+log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout
+log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n
+log4j.appender.SYSLOG.Facility=LOCAL1
+
+log4j.logger.org=info
+#log4j.logger.org.apache.commons.beanutils=warn
+log4j.logger.com.datatorrent=debug

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/README.md
----------------------------------------------------------------------
diff --git a/examples/s3-tuple-output/README.md b/examples/s3-tuple-output/README.md
new file mode 100644
index 0000000..e8a98f4
--- /dev/null
+++ b/examples/s3-tuple-output/README.md
@@ -0,0 +1,19 @@
+# S3 tuple output example
+
+Sample application to show how to use the S3 tuple output module.
+
+The application reads records from HDFS using `FSRecordReaderModule`. 
+These records are then written to Amazon S3 using `S3BytesOutputModule`.
+
+### How to configure
+The properties file META-INF/properties.xml shows how to configure the respective operators.
+
+### How to compile
+`shell> mvn clean package`
+
+This will generate application package s3-tuple-output-1.0-SNAPSHOT.apa inside target directory.
+
+### How to run
+Use the application package generated above to launch the application from UI console(if available) or apex command line interface.
+
+`apex> launch target/s3-tuple-output-1.0-SNAPSHOT.apa`

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/XmlJavadocCommentsExtractor.xsl
----------------------------------------------------------------------
diff --git a/examples/s3-tuple-output/XmlJavadocCommentsExtractor.xsl b/examples/s3-tuple-output/XmlJavadocCommentsExtractor.xsl
new file mode 100644
index 0000000..1ddbbcc
--- /dev/null
+++ b/examples/s3-tuple-output/XmlJavadocCommentsExtractor.xsl
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Document   : XmlJavadocCommentsExtractor.xsl
+    Created on : September 16, 2014, 11:30 AM
+    Description:
+        The transformation strips off all information except for comments and tags from xml javadoc generated by xml-doclet.
+-->
+
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+  <xsl:output method="xml" standalone="yes"/>
+
+  <!-- copy xml by selecting only the following nodes, attributes and text -->
+  <xsl:template match="node()|text()|@*">
+    <xsl:copy>
+      <xsl:apply-templates select="root|package|class|interface|method|field|type|comment|tag|text()|@name|@qualified|@text"/>
+    </xsl:copy>
+  </xsl:template>
+
+  <!-- Strip off the following paths from the selected xml -->
+  <xsl:template match="//root/package/interface/interface
+                      |//root/package/interface/method/@qualified
+                      |//root/package/class/interface
+                      |//root/package/class/class
+                      |//root/package/class/method/@qualified
+                      |//root/package/class/field/@qualified" />
+
+  <xsl:strip-space elements="*"/>
+</xsl:stylesheet>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/pom.xml
----------------------------------------------------------------------
diff --git a/examples/s3-tuple-output/pom.xml b/examples/s3-tuple-output/pom.xml
new file mode 100644
index 0000000..3a57dab
--- /dev/null
+++ b/examples/s3-tuple-output/pom.xml
@@ -0,0 +1,276 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  
+  <groupId>com.datatorrent.apps</groupId>
+  <version>1.0-SNAPSHOT</version>
+  <artifactId>s3-tuple-output</artifactId>
+  <packaging>jar</packaging>
+
+  <!-- change these to the appropriate values -->
+  <name>S3 Tuple output example</name>
+  <description>Example application for S3 Tuple output module</description>
+
+  <properties>
+    <!-- skip tests by default as they depend on external setup -->
+    <skipTests>true</skipTests>
+    <!-- change this if you desire to use a different version of Apex Core -->
+    <apex.version>3.5.0</apex.version>
+    <malhar.version>3.7.0-SNAPSHOT</malhar.version>
+    <apex.apppackage.classpath>lib/*.jar</apex.apppackage.classpath>
+  </properties>
+
+  <build>
+    <plugins>
+       <plugin>
+         <groupId>org.apache.maven.plugins</groupId>
+         <artifactId>maven-eclipse-plugin</artifactId>
+         <version>2.9</version>
+         <configuration>
+           <downloadSources>true</downloadSources>
+         </configuration>
+       </plugin>
+       <plugin>
+         <artifactId>maven-compiler-plugin</artifactId>
+         <version>3.3</version>
+         <configuration>
+           <encoding>UTF-8</encoding>
+           <source>1.7</source>
+           <target>1.7</target>
+           <debug>true</debug>
+           <optimize>false</optimize>
+           <showDeprecation>true</showDeprecation>
+           <showWarnings>true</showWarnings>
+         </configuration>
+       </plugin>
+       <plugin>
+         <artifactId>maven-dependency-plugin</artifactId>
+         <version>2.8</version>
+         <executions>
+           <execution>
+             <id>copy-dependencies</id>
+             <phase>prepare-package</phase>
+             <goals>
+               <goal>copy-dependencies</goal>
+             </goals>
+             <configuration>
+               <outputDirectory>target/deps</outputDirectory>
+               <includeScope>runtime</includeScope>
+             </configuration>
+           </execution>
+         </executions>
+       </plugin>
+
+       <plugin>
+         <artifactId>maven-assembly-plugin</artifactId>
+         <executions>
+           <execution>
+             <id>app-package-assembly</id>
+             <phase>package</phase>
+             <goals>
+               <goal>single</goal>
+             </goals>
+             <configuration>
+               <finalName>${project.artifactId}-${project.version}-apexapp</finalName>
+               <appendAssemblyId>false</appendAssemblyId>
+               <descriptors>
+                 <descriptor>src/assemble/appPackage.xml</descriptor>
+               </descriptors>
+               <archiverConfig>
+                 <defaultDirectoryMode>0755</defaultDirectoryMode>
+               </archiverConfig>                  
+               <archive>
+                 <manifestEntries>
+                   <Class-Path>${apex.apppackage.classpath}</Class-Path>
+                   <DT-Engine-Version>${apex.version}</DT-Engine-Version>
+                   <DT-App-Package-Group-Id>${project.groupId}</DT-App-Package-Group-Id>
+                   <DT-App-Package-Name>${project.artifactId}</DT-App-Package-Name>
+                   <DT-App-Package-Version>${project.version}</DT-App-Package-Version>
+                   <DT-App-Package-Display-Name>${project.name}</DT-App-Package-Display-Name>
+                   <DT-App-Package-Description>${project.description}</DT-App-Package-Description>
+                 </manifestEntries>
+               </archive>
+             </configuration>
+           </execution>
+         </executions>
+       </plugin>
+
+       <plugin>
+         <artifactId>maven-antrun-plugin</artifactId>
+         <version>1.7</version>
+         <executions>
+           <execution>
+             <phase>package</phase>
+             <configuration>
+               <target>
+                 <move file="${project.build.directory}/${project.artifactId}-${project.version}-apexapp.jar"
+                       tofile="${project.build.directory}/${project.artifactId}-${project.version}.apa" />
+               </target>
+             </configuration>
+             <goals>
+               <goal>run</goal>
+             </goals>
+           </execution>
+           <execution>
+             <!-- create resource directory for xml javadoc-->
+             <id>createJavadocDirectory</id>
+             <phase>generate-resources</phase>
+             <configuration>
+               <tasks>
+                 <delete dir="${project.build.directory}/generated-resources/xml-javadoc"/>
+                 <mkdir dir="${project.build.directory}/generated-resources/xml-javadoc"/>
+               </tasks>
+             </configuration>
+             <goals>
+               <goal>run</goal>
+             </goals>
+           </execution>
+         </executions>
+       </plugin>
+
+       <plugin>
+         <groupId>org.codehaus.mojo</groupId>
+         <artifactId>build-helper-maven-plugin</artifactId>
+         <version>1.9.1</version>
+         <executions>
+           <execution>
+             <id>attach-artifacts</id>
+             <phase>package</phase>
+             <goals>
+               <goal>attach-artifact</goal>
+             </goals>
+             <configuration>
+               <artifacts>
+                 <artifact>
+                   <file>target/${project.artifactId}-${project.version}.apa</file>
+                   <type>apa</type>
+                 </artifact>
+               </artifacts>
+               <skipAttach>false</skipAttach>
+             </configuration>
+           </execution>
+         </executions>
+       </plugin>
+
+      <!-- generate javdoc -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <executions>
+          <!-- generate xml javadoc -->
+          <execution>
+            <id>xml-doclet</id>
+            <phase>generate-resources</phase>
+            <goals>
+              <goal>javadoc</goal>
+            </goals>
+            <configuration>
+              <doclet>com.github.markusbernhardt.xmldoclet.XmlDoclet</doclet>
+              <additionalparam>-d ${project.build.directory}/generated-resources/xml-javadoc -filename ${project.artifactId}-${project.version}-javadoc.xml</additionalparam>
+              <useStandardDocletOptions>false</useStandardDocletOptions>
+              <docletArtifact>
+                <groupId>com.github.markusbernhardt</groupId>
+                <artifactId>xml-doclet</artifactId>
+                <version>1.0.4</version>
+              </docletArtifact>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <!-- Transform xml javadoc to stripped down version containing only class/interface comments and tags-->
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>xml-maven-plugin</artifactId>
+        <version>1.0</version>
+        <executions>
+          <execution>
+            <id>transform-xmljavadoc</id>
+            <phase>generate-resources</phase>
+            <goals>
+              <goal>transform</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <transformationSets>
+            <transformationSet>
+              <dir>${project.build.directory}/generated-resources/xml-javadoc</dir>
+              <includes>
+                <include>${project.artifactId}-${project.version}-javadoc.xml</include>
+              </includes>
+              <stylesheet>XmlJavadocCommentsExtractor.xsl</stylesheet>
+              <outputDir>${project.build.directory}/generated-resources/xml-javadoc</outputDir>
+            </transformationSet>
+          </transformationSets>
+        </configuration>
+      </plugin>
+      <!-- copy xml javadoc to class jar -->
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+        <version>2.6</version>
+        <executions>
+          <execution>
+            <id>copy-resources</id>
+            <phase>process-resources</phase>
+            <goals>
+              <goal>copy-resources</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>${basedir}/target/classes</outputDirectory>
+              <resources>
+                <resource>
+                  <directory>${project.build.directory}/generated-resources/xml-javadoc</directory>
+                  <includes>
+                    <include>${project.artifactId}-${project.version}-javadoc.xml</include>
+                  </includes>
+                  <filtering>true</filtering>
+                </resource>
+              </resources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+    </plugins>
+
+  </build>
+
+  <dependencies>
+    <!-- add your dependencies here -->
+    <dependency>
+      <groupId>org.apache.apex</groupId>
+      <artifactId>malhar-library</artifactId>
+      <version>${malhar.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>*</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>aws-java-sdk-s3</artifactId>
+      <version>1.10.73</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.apex</groupId>
+      <artifactId>apex-common</artifactId>
+      <version>${apex.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.10</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.apex</groupId>
+      <artifactId>apex-engine</artifactId>
+      <version>${apex.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+</project>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/src/assemble/appPackage.xml
----------------------------------------------------------------------
diff --git a/examples/s3-tuple-output/src/assemble/appPackage.xml b/examples/s3-tuple-output/src/assemble/appPackage.xml
new file mode 100644
index 0000000..7ad071c
--- /dev/null
+++ b/examples/s3-tuple-output/src/assemble/appPackage.xml
@@ -0,0 +1,43 @@
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
+  <id>appPackage</id>
+  <formats>
+    <format>jar</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+  <fileSets>
+    <fileSet>
+      <directory>${basedir}/target/</directory>
+      <outputDirectory>/app</outputDirectory>
+      <includes>
+        <include>${project.artifactId}-${project.version}.jar</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/target/deps</directory>
+      <outputDirectory>/lib</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/site/conf</directory>
+      <outputDirectory>/conf</outputDirectory>
+      <includes>
+        <include>*.xml</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/resources/META-INF</directory>
+      <outputDirectory>/META-INF</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/resources/app</directory>
+      <outputDirectory>/app</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/resources/resources</directory>
+      <outputDirectory>/resources</outputDirectory>
+    </fileSet>
+  </fileSets>
+
+</assembly>
+

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java
----------------------------------------------------------------------
diff --git a/examples/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java b/examples/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java
new file mode 100644
index 0000000..a4f487d
--- /dev/null
+++ b/examples/s3-tuple-output/src/main/java/com/datatorrent/tutorials/s3output/Application.java
@@ -0,0 +1,27 @@
+package com.datatorrent.tutorials.s3output;
+
+import org.apache.apex.malhar.lib.fs.FSRecordReaderModule;
+import org.apache.apex.malhar.lib.fs.s3.S3TupleOutputModule.S3BytesOutputModule;
+import org.apache.hadoop.conf.Configuration;
+
+import com.datatorrent.api.Context.PortContext;
+import com.datatorrent.api.DAG;
+import com.datatorrent.api.StreamingApplication;
+import com.datatorrent.api.annotation.ApplicationAnnotation;
+
+/**
+ * Simple application illustrating file copy from S3
+ */
+@ApplicationAnnotation(name="s3-output-line")
+public class Application implements StreamingApplication
+{
+
+  public void populateDAG(DAG dag, Configuration conf)
+  {
+    FSRecordReaderModule recordReader = dag.addModule("lineInput", FSRecordReaderModule.class);
+    S3BytesOutputModule s3StringOutputModule = dag.addModule("s3output", S3BytesOutputModule.class);
+    dag.addStream("data", recordReader.records, s3StringOutputModule.input);
+    
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/src/main/resources/META-INF/properties.xml
----------------------------------------------------------------------
diff --git a/examples/s3-tuple-output/src/main/resources/META-INF/properties.xml b/examples/s3-tuple-output/src/main/resources/META-INF/properties.xml
new file mode 100644
index 0000000..c1365bd
--- /dev/null
+++ b/examples/s3-tuple-output/src/main/resources/META-INF/properties.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0"?>
+<configuration>
+  <property>
+    <name>dt.operator.lineInput.prop.files</name>
+     <value>/user/appuser/test</value>
+  </property>
+  <property>
+    <name>dt.operator.*.attr.MEMORY_MB</name>
+     <value>5000</value>
+  </property>
+  <property>
+    <name>dt.operator.s3output.prop.accessKey</name>
+    <value>ACCESS_KEY_ID</value>
+  </property>
+  <property>
+    <name>dt.operator.s3output.prop.secretAccessKey</name>
+    <value>SECRET_ACCESS_KEY</value>
+  </property>
+  <property>
+    <name>dt.operator.s3output.prop.bucketName</name>
+    <value>BUCKET_NAME</value>
+  </property>
+  <property>
+    <name>dt.operator.s3output.prop.outputDirectoryPath</name>
+    <value>test</value>
+  </property>
+  <property>
+    <name>dt.operator.s3output.prop.maxTuplesPerSecPerPartition</name>
+    <value>300000</value>
+  </property>
+  <property>
+    <name>dt.operator.s3output.prop.maxS3UploadPartitions</name>
+    <value>8</value>
+  </property>
+  <property>
+    <name>dt.operator.lineInput.prop.maxReaders</name>
+     <value>8</value>
+  </property>
+  <property>
+    <name>dt.operator.lineInput.prop.minReaders</name>
+     <value>1</value>
+  </property>
+  <property>
+    <name>dt.loggers.level</name>
+    <value>org.apache.apex.malhar.lib.fs.s3.*:DEBUG,org.apache.apex.*:DEBUG,com.datatorrent.stram.plan.physical.*:DEBUG,com.datatorrent.lib.*:DEBUG</value>
+  </property>
+</configuration>

http://git-wip-us.apache.org/repos/asf/apex-malhar/blob/6a617f9b/examples/s3-tuple-output/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/examples/s3-tuple-output/src/test/resources/log4j.properties b/examples/s3-tuple-output/src/test/resources/log4j.properties
new file mode 100644
index 0000000..3bfcdc5
--- /dev/null
+++ b/examples/s3-tuple-output/src/test/resources/log4j.properties
@@ -0,0 +1,21 @@
+log4j.rootLogger=DEBUG,CONSOLE
+
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n
+
+log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} %M - %m%n
+log4j.appender.RFA.File=/tmp/app.log
+
+# to enable, add SYSLOG to rootLogger
+log4j.appender.SYSLOG=org.apache.log4j.net.SyslogAppender
+log4j.appender.SYSLOG.syslogHost=127.0.0.1
+log4j.appender.SYSLOG.layout=org.apache.log4j.PatternLayout
+log4j.appender.SYSLOG.layout.conversionPattern=${dt.cid} %-5p [%t] %c{2} %x - %m%n
+log4j.appender.SYSLOG.Facility=LOCAL1
+
+log4j.logger.org=info
+#log4j.logger.org.apache.commons.beanutils=warn
+log4j.logger.com.datatorrent=debug