You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@storm.apache.org by pt...@apache.org on 2015/06/03 19:35:30 UTC

[16/50] [abbrv] storm git commit: update examples and docs for HDFS example

update examples and docs for HDFS example


Project: http://git-wip-us.apache.org/repos/asf/storm/repo
Commit: http://git-wip-us.apache.org/repos/asf/storm/commit/0c1e0aa8
Tree: http://git-wip-us.apache.org/repos/asf/storm/tree/0c1e0aa8
Diff: http://git-wip-us.apache.org/repos/asf/storm/diff/0c1e0aa8

Branch: refs/heads/master
Commit: 0c1e0aa81f39e473bcf3482448813d78065e1212
Parents: 3411bc7
Author: P. Taylor Goetz <pt...@gmail.com>
Authored: Mon Apr 6 23:48:38 2015 -0400
Committer: P. Taylor Goetz <pt...@gmail.com>
Committed: Mon Apr 6 23:48:38 2015 -0400

----------------------------------------------------------------------
 .../src/test/resources/configs/hdfs_test.yaml   |  97 -----------------
 flux-examples/README.md                         |  30 +++++-
 flux-examples/pom.xml                           |   6 ++
 .../src/main/resources/hdfs_bolt.properties     |   9 ++
 flux-examples/src/main/resources/multilang.yaml |  89 ++++++++++++++++
 flux-examples/src/main/resources/shell.yaml     |  89 ----------------
 .../src/main/resources/simple_hdfs.yaml         | 105 +++++++++++++++++++
 7 files changed, 238 insertions(+), 187 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-core/src/test/resources/configs/hdfs_test.yaml
----------------------------------------------------------------------
diff --git a/flux-core/src/test/resources/configs/hdfs_test.yaml b/flux-core/src/test/resources/configs/hdfs_test.yaml
deleted file mode 100644
index c1d28d2..0000000
--- a/flux-core/src/test/resources/configs/hdfs_test.yaml
+++ /dev/null
@@ -1,97 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Test ability to wire together shell spouts/bolts
----
-
-# topology definition
-# name to be used when submitting
-name: "hdfs-topology"
-
-# Components
-# Components are analagous to Spring beans. They are meant to be used as constructor,
-# property(setter), and builder arguments.
-#
-# for the time being, components must be declared in the order they are referenced
-components:
-  - id: "syncPolicy"
-    className: "org.apache.storm.hdfs.bolt.sync.CountSyncPolicy"
-    constructorArgs:
-      - 1000
-  - id: "rotationPolicy"
-    className: "org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy"
-    constructorArgs:
-      - 5.0
-      - MB
-
-  - id: "fileNameFormat"
-    className: "org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat"
-    configMethods:
-      - name: "withPath"
-        args: ["/tmp/foo/"]
-      - name: "withExtension"
-        args: [".txt"]
-
-  - id: "recordFormat"
-    className: "org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat"
-    configMethods:
-      - name: "withFieldDelimiter"
-        args: ["|"]
-
-  - id: "rotationAction"
-    className: "org.apache.storm.hdfs.common.rotation.MoveFileAction"
-    configMethods:
-      - name: "toDestination"
-        args: ["/tmp/dest2"]
-
-# spout definitions
-spouts:
-  - id: "spout-1"
-    className: "backtype.storm.testing.TestWordSpout"
-    parallelism: 1
-    # ...
-
-# bolt definitions
-
-#        HdfsBolt bolt = new HdfsBolt()
-#                .withConfigKey("hdfs.config")
-#                .withFsUrl(args[0])
-#                .withFileNameFormat(fileNameFormat)
-#                .withRecordFormat(format)
-#                .withRotationPolicy(rotationPolicy)
-#                .withSyncPolicy(syncPolicy)
-#                .addRotationAction(new MoveFileAction().toDestination("/tmp/dest2/"));
-bolts:
-  - id: "bolt-1"
-    className: "org.apache.storm.hdfs.bolt.HdfsBolt"
-    configMethods:
-      - name: "withConfigKey"
-        args: ["hdfs.config"]
-      - name: "withFsUrl"
-        args: ["hdfs://localhost:1234"]
-      - name: "withFileNameFormat"
-        args: [ref: "fileNameFormat"]
-      - name: "withRecordFormat"
-        args: [ref: "recordFormat"]
-      - name: "withRotationPolicy"
-        args: [ref: "rotationPolicy"]
-      - name: "withSyncPolicy"
-        args: [ref: "syncPolicy"]
-      - name: "addRotationAction"
-        args: [ref: "rotationAction"]
-    parallelism: 1
-    # ...
-

http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/README.md
----------------------------------------------------------------------
diff --git a/flux-examples/README.md b/flux-examples/README.md
index 2f107e7..9f5682e 100644
--- a/flux-examples/README.md
+++ b/flux-examples/README.md
@@ -23,6 +23,34 @@ The example YAML files are also packaged in the examples jar, so they can also b
 command line switch:
 
 ```bash
-storm jar ./target/flux-examples-0.2.3-SNAPSHOT.jar org.apache.storm.flux.Flux --local --resource /sime_wordcount.yaml
+storm jar ./target/flux-examples-0.2.3-SNAPSHOT.jar org.apache.storm.flux.Flux --local --resource /simple_wordcount.yaml
+```
+
+## Available Examples
+
+### simple_wordcount.yaml
+
+This is a very basic wordcount example using Java spouts and bolts. It simply logs the running count of each word
+received.
+
+### multilang.yaml
+
+Another wordcount example that uses a spout written in JavaScript (node.js), a bolt written in Python, and two bolts
+written in java.
+
+### kafka_spout.yaml
+This example illustrates how to configure Storm's `storm-kafka` spout using Flux YAML DSL `components`, `references`,
+and `constructor arguments` constructs.
+
+### simple_hdfs.yaml
+
+This example demonstrates using Flux to setup a storm-hdfs bolt to write to an HDFS cluster. It also demonstrates Flux's
+variable substitution/filtering feature.
+
+To run the `simple_hdfs.yaml` example, copy the `hdfs_bolt.properties` file to a convenient location and change, at
+least, the property `hdfs.url` to point to a HDFS cluster. Then you can run the example something like:
+
+```bash
+storm jar ./target/flux-examples-0.2.3-SNAPSHOT.jar org.apache.storm.flux.Flux --local ./src/main/resources/simple_hdfs.yaml --filter my_hdfs_bolt.properties
 ```
 

http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/pom.xml
----------------------------------------------------------------------
diff --git a/flux-examples/pom.xml b/flux-examples/pom.xml
index 63bc312..09db717 100644
--- a/flux-examples/pom.xml
+++ b/flux-examples/pom.xml
@@ -44,6 +44,12 @@
             <artifactId>flux-wrappers</artifactId>
             <version>${project.version}</version>
         </dependency>
+
+        <dependency>
+            <groupId>org.apache.storm</groupId>
+            <artifactId>storm-hdfs</artifactId>
+            <version>${storm.version}</version>
+        </dependency>
     </dependencies>
 
     <build>

http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/src/main/resources/hdfs_bolt.properties
----------------------------------------------------------------------
diff --git a/flux-examples/src/main/resources/hdfs_bolt.properties b/flux-examples/src/main/resources/hdfs_bolt.properties
new file mode 100644
index 0000000..34a7a23
--- /dev/null
+++ b/flux-examples/src/main/resources/hdfs_bolt.properties
@@ -0,0 +1,9 @@
+# The HDFS url
+hdfs.url="hdfs://hadoop:54310"
+
+# The HDFS directory where the bolt will write incoming data
+hdfs.write.dir="/incoming"
+
+# The HDFS directory where files will be moved once the bolt has
+# finished writing to it.
+hdfs.dest.dir="/complete"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/src/main/resources/multilang.yaml
----------------------------------------------------------------------
diff --git a/flux-examples/src/main/resources/multilang.yaml b/flux-examples/src/main/resources/multilang.yaml
new file mode 100644
index 0000000..4f80667
--- /dev/null
+++ b/flux-examples/src/main/resources/multilang.yaml
@@ -0,0 +1,89 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test ability to wire together shell spouts/bolts
+---
+
+# topology definition
+# name to be used when submitting
+name: "shell-topology"
+
+# topology configuration
+# this will be passed to the submitter as a map of config options
+#
+config:
+  topology.workers: 1
+  # ...
+
+# spout definitions
+spouts:
+  - id: "sentence-spout"
+    className: "org.apache.storm.flux.wrappers.spouts.FluxShellSpout"
+    # shell spout constructor takes 2 arguments: String[], String[]
+    constructorArgs:
+      # command line
+      - ["node", "randomsentence.js"]
+      # output fields
+      - ["word"]
+    parallelism: 1
+    # ...
+
+# bolt definitions
+bolts:
+  - id: "splitsentence"
+    className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt"
+    constructorArgs:
+      # command line
+      - ["python", "splitsentence.py"]
+      # output fields
+      - ["word"]
+    parallelism: 1
+    # ...
+
+  - id: "log"
+    className: "org.apache.storm.flux.wrappers.bolts.LogInfoBolt"
+    parallelism: 1
+    # ...
+
+  - id: "count"
+    className: "backtype.storm.testing.TestWordCounter"
+    parallelism: 1
+    # ...
+
+#stream definitions
+# stream definitions define connections between spouts and bolts.
+# note that such connections can be cyclical
+# custom stream groupings are also supported
+
+streams:
+  - name: "spout --> split" # name isn't used (placeholder for logging, UI, etc.)
+    from: "sentence-spout"
+    to: "splitsentence"
+    grouping:
+      type: SHUFFLE
+
+  - name: "split --> count"
+    from: "splitsentence"
+    to: "count"
+    grouping:
+      type: FIELDS
+      args: ["word"]
+
+  - name: "count --> log"
+    from: "count"
+    to: "log"
+    grouping:
+      type: SHUFFLE

http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/src/main/resources/shell.yaml
----------------------------------------------------------------------
diff --git a/flux-examples/src/main/resources/shell.yaml b/flux-examples/src/main/resources/shell.yaml
deleted file mode 100644
index 4f80667..0000000
--- a/flux-examples/src/main/resources/shell.yaml
+++ /dev/null
@@ -1,89 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Test ability to wire together shell spouts/bolts
----
-
-# topology definition
-# name to be used when submitting
-name: "shell-topology"
-
-# topology configuration
-# this will be passed to the submitter as a map of config options
-#
-config:
-  topology.workers: 1
-  # ...
-
-# spout definitions
-spouts:
-  - id: "sentence-spout"
-    className: "org.apache.storm.flux.wrappers.spouts.FluxShellSpout"
-    # shell spout constructor takes 2 arguments: String[], String[]
-    constructorArgs:
-      # command line
-      - ["node", "randomsentence.js"]
-      # output fields
-      - ["word"]
-    parallelism: 1
-    # ...
-
-# bolt definitions
-bolts:
-  - id: "splitsentence"
-    className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt"
-    constructorArgs:
-      # command line
-      - ["python", "splitsentence.py"]
-      # output fields
-      - ["word"]
-    parallelism: 1
-    # ...
-
-  - id: "log"
-    className: "org.apache.storm.flux.wrappers.bolts.LogInfoBolt"
-    parallelism: 1
-    # ...
-
-  - id: "count"
-    className: "backtype.storm.testing.TestWordCounter"
-    parallelism: 1
-    # ...
-
-#stream definitions
-# stream definitions define connections between spouts and bolts.
-# note that such connections can be cyclical
-# custom stream groupings are also supported
-
-streams:
-  - name: "spout --> split" # name isn't used (placeholder for logging, UI, etc.)
-    from: "sentence-spout"
-    to: "splitsentence"
-    grouping:
-      type: SHUFFLE
-
-  - name: "split --> count"
-    from: "splitsentence"
-    to: "count"
-    grouping:
-      type: FIELDS
-      args: ["word"]
-
-  - name: "count --> log"
-    from: "count"
-    to: "log"
-    grouping:
-      type: SHUFFLE

http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/src/main/resources/simple_hdfs.yaml
----------------------------------------------------------------------
diff --git a/flux-examples/src/main/resources/simple_hdfs.yaml b/flux-examples/src/main/resources/simple_hdfs.yaml
new file mode 100644
index 0000000..ea7721d
--- /dev/null
+++ b/flux-examples/src/main/resources/simple_hdfs.yaml
@@ -0,0 +1,105 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test ability to wire together shell spouts/bolts
+---
+
+# topology definition
+# name to be used when submitting
+name: "hdfs-topology"
+
+# Components
+# Components are analagous to Spring beans. They are meant to be used as constructor,
+# property(setter), and builder arguments.
+#
+# for the time being, components must be declared in the order they are referenced
+components:
+  - id: "syncPolicy"
+    className: "org.apache.storm.hdfs.bolt.sync.CountSyncPolicy"
+    constructorArgs:
+      - 1000
+  - id: "rotationPolicy"
+    className: "org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy"
+    constructorArgs:
+      - 30
+      - SECONDS
+
+  - id: "fileNameFormat"
+    className: "org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat"
+    configMethods:
+      - name: "withPath"
+        args: [${hdfs.write.dir}]
+      - name: "withExtension"
+        args: [".txt"]
+
+  - id: "recordFormat"
+    className: "org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat"
+    configMethods:
+      - name: "withFieldDelimiter"
+        args: ["|"]
+
+  - id: "rotationAction"
+    className: "org.apache.storm.hdfs.common.rotation.MoveFileAction"
+    configMethods:
+      - name: "toDestination"
+        args: [${hdfs.dest.dir}]
+
+# spout definitions
+spouts:
+  - id: "spout-1"
+    className: "backtype.storm.testing.TestWordSpout"
+    parallelism: 1
+    # ...
+
+# bolt definitions
+
+bolts:
+  - id: "bolt-1"
+    className: "org.apache.storm.hdfs.bolt.HdfsBolt"
+    configMethods:
+      - name: "withConfigKey"
+        args: ["hdfs.config"]
+      - name: "withFsUrl"
+        args: [${hdfs.url}]
+      - name: "withFileNameFormat"
+        args: [ref: "fileNameFormat"]
+      - name: "withRecordFormat"
+        args: [ref: "recordFormat"]
+      - name: "withRotationPolicy"
+        args: [ref: "rotationPolicy"]
+      - name: "withSyncPolicy"
+        args: [ref: "syncPolicy"]
+      - name: "addRotationAction"
+        args: [ref: "rotationAction"]
+    parallelism: 1
+    # ...
+
+  - id: "bolt-2"
+    className: "org.apache.storm.flux.wrappers.bolts.LogInfoBolt"
+    parallelism: 1
+
+streams:
+  - name: "" # name isn't used (placeholder for logging, UI, etc.)
+    from: "spout-1"
+    to: "bolt-1"
+    grouping:
+      type: SHUFFLE
+
+  - name: "" # name isn't used (placeholder for logging, UI, etc.)
+    from: "spout-1"
+    to: "bolt-2"
+    grouping:
+      type: SHUFFLE
\ No newline at end of file