You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@storm.apache.org by pt...@apache.org on 2015/06/04 04:05:50 UTC
[08/50] [abbrv] storm git commit: update examples and docs for HDFS
example
update examples and docs for HDFS example
Project: http://git-wip-us.apache.org/repos/asf/storm/repo
Commit: http://git-wip-us.apache.org/repos/asf/storm/commit/0c1e0aa8
Tree: http://git-wip-us.apache.org/repos/asf/storm/tree/0c1e0aa8
Diff: http://git-wip-us.apache.org/repos/asf/storm/diff/0c1e0aa8
Branch: refs/heads/0.10.x-branch
Commit: 0c1e0aa81f39e473bcf3482448813d78065e1212
Parents: 3411bc7
Author: P. Taylor Goetz <pt...@gmail.com>
Authored: Mon Apr 6 23:48:38 2015 -0400
Committer: P. Taylor Goetz <pt...@gmail.com>
Committed: Mon Apr 6 23:48:38 2015 -0400
----------------------------------------------------------------------
.../src/test/resources/configs/hdfs_test.yaml | 97 -----------------
flux-examples/README.md | 30 +++++-
flux-examples/pom.xml | 6 ++
.../src/main/resources/hdfs_bolt.properties | 9 ++
flux-examples/src/main/resources/multilang.yaml | 89 ++++++++++++++++
flux-examples/src/main/resources/shell.yaml | 89 ----------------
.../src/main/resources/simple_hdfs.yaml | 105 +++++++++++++++++++
7 files changed, 238 insertions(+), 187 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-core/src/test/resources/configs/hdfs_test.yaml
----------------------------------------------------------------------
diff --git a/flux-core/src/test/resources/configs/hdfs_test.yaml b/flux-core/src/test/resources/configs/hdfs_test.yaml
deleted file mode 100644
index c1d28d2..0000000
--- a/flux-core/src/test/resources/configs/hdfs_test.yaml
+++ /dev/null
@@ -1,97 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Test ability to wire together shell spouts/bolts
----
-
-# topology definition
-# name to be used when submitting
-name: "hdfs-topology"
-
-# Components
-# Components are analagous to Spring beans. They are meant to be used as constructor,
-# property(setter), and builder arguments.
-#
-# for the time being, components must be declared in the order they are referenced
-components:
- - id: "syncPolicy"
- className: "org.apache.storm.hdfs.bolt.sync.CountSyncPolicy"
- constructorArgs:
- - 1000
- - id: "rotationPolicy"
- className: "org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy"
- constructorArgs:
- - 5.0
- - MB
-
- - id: "fileNameFormat"
- className: "org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat"
- configMethods:
- - name: "withPath"
- args: ["/tmp/foo/"]
- - name: "withExtension"
- args: [".txt"]
-
- - id: "recordFormat"
- className: "org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat"
- configMethods:
- - name: "withFieldDelimiter"
- args: ["|"]
-
- - id: "rotationAction"
- className: "org.apache.storm.hdfs.common.rotation.MoveFileAction"
- configMethods:
- - name: "toDestination"
- args: ["/tmp/dest2"]
-
-# spout definitions
-spouts:
- - id: "spout-1"
- className: "backtype.storm.testing.TestWordSpout"
- parallelism: 1
- # ...
-
-# bolt definitions
-
-# HdfsBolt bolt = new HdfsBolt()
-# .withConfigKey("hdfs.config")
-# .withFsUrl(args[0])
-# .withFileNameFormat(fileNameFormat)
-# .withRecordFormat(format)
-# .withRotationPolicy(rotationPolicy)
-# .withSyncPolicy(syncPolicy)
-# .addRotationAction(new MoveFileAction().toDestination("/tmp/dest2/"));
-bolts:
- - id: "bolt-1"
- className: "org.apache.storm.hdfs.bolt.HdfsBolt"
- configMethods:
- - name: "withConfigKey"
- args: ["hdfs.config"]
- - name: "withFsUrl"
- args: ["hdfs://localhost:1234"]
- - name: "withFileNameFormat"
- args: [ref: "fileNameFormat"]
- - name: "withRecordFormat"
- args: [ref: "recordFormat"]
- - name: "withRotationPolicy"
- args: [ref: "rotationPolicy"]
- - name: "withSyncPolicy"
- args: [ref: "syncPolicy"]
- - name: "addRotationAction"
- args: [ref: "rotationAction"]
- parallelism: 1
- # ...
-
http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/README.md
----------------------------------------------------------------------
diff --git a/flux-examples/README.md b/flux-examples/README.md
index 2f107e7..9f5682e 100644
--- a/flux-examples/README.md
+++ b/flux-examples/README.md
@@ -23,6 +23,34 @@ The example YAML files are also packaged in the examples jar, so they can also b
command line switch:
```bash
-storm jar ./target/flux-examples-0.2.3-SNAPSHOT.jar org.apache.storm.flux.Flux --local --resource /sime_wordcount.yaml
+storm jar ./target/flux-examples-0.2.3-SNAPSHOT.jar org.apache.storm.flux.Flux --local --resource /simple_wordcount.yaml
+```
+
+## Available Examples
+
+### simple_wordcount.yaml
+
+This is a very basic wordcount example using Java spouts and bolts. It simply logs the running count of each word
+received.
+
+### multilang.yaml
+
+Another wordcount example that uses a spout written in JavaScript (node.js), a bolt written in Python, and two bolts
+written in java.
+
+### kafka_spout.yaml
+This example illustrates how to configure Storm's `storm-kafka` spout using Flux YAML DSL `components`, `references`,
+and `constructor arguments` constructs.
+
+### simple_hdfs.yaml
+
+This example demonstrates using Flux to setup a storm-hdfs bolt to write to an HDFS cluster. It also demonstrates Flux's
+variable substitution/filtering feature.
+
+To run the `simple_hdfs.yaml` example, copy the `hdfs_bolt.properties` file to a convenient location and change, at
+least, the property `hdfs.url` to point to a HDFS cluster. Then you can run the example something like:
+
+```bash
+storm jar ./target/flux-examples-0.2.3-SNAPSHOT.jar org.apache.storm.flux.Flux --local ./src/main/resources/simple_hdfs.yaml --filter my_hdfs_bolt.properties
```
http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/pom.xml
----------------------------------------------------------------------
diff --git a/flux-examples/pom.xml b/flux-examples/pom.xml
index 63bc312..09db717 100644
--- a/flux-examples/pom.xml
+++ b/flux-examples/pom.xml
@@ -44,6 +44,12 @@
<artifactId>flux-wrappers</artifactId>
<version>${project.version}</version>
</dependency>
+
+ <dependency>
+ <groupId>org.apache.storm</groupId>
+ <artifactId>storm-hdfs</artifactId>
+ <version>${storm.version}</version>
+ </dependency>
</dependencies>
<build>
http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/src/main/resources/hdfs_bolt.properties
----------------------------------------------------------------------
diff --git a/flux-examples/src/main/resources/hdfs_bolt.properties b/flux-examples/src/main/resources/hdfs_bolt.properties
new file mode 100644
index 0000000..34a7a23
--- /dev/null
+++ b/flux-examples/src/main/resources/hdfs_bolt.properties
@@ -0,0 +1,9 @@
+# The HDFS url
+hdfs.url="hdfs://hadoop:54310"
+
+# The HDFS directory where the bolt will write incoming data
+hdfs.write.dir="/incoming"
+
+# The HDFS directory where files will be moved once the bolt has
+# finished writing to it.
+hdfs.dest.dir="/complete"
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/src/main/resources/multilang.yaml
----------------------------------------------------------------------
diff --git a/flux-examples/src/main/resources/multilang.yaml b/flux-examples/src/main/resources/multilang.yaml
new file mode 100644
index 0000000..4f80667
--- /dev/null
+++ b/flux-examples/src/main/resources/multilang.yaml
@@ -0,0 +1,89 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test ability to wire together shell spouts/bolts
+---
+
+# topology definition
+# name to be used when submitting
+name: "shell-topology"
+
+# topology configuration
+# this will be passed to the submitter as a map of config options
+#
+config:
+ topology.workers: 1
+ # ...
+
+# spout definitions
+spouts:
+ - id: "sentence-spout"
+ className: "org.apache.storm.flux.wrappers.spouts.FluxShellSpout"
+ # shell spout constructor takes 2 arguments: String[], String[]
+ constructorArgs:
+ # command line
+ - ["node", "randomsentence.js"]
+ # output fields
+ - ["word"]
+ parallelism: 1
+ # ...
+
+# bolt definitions
+bolts:
+ - id: "splitsentence"
+ className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt"
+ constructorArgs:
+ # command line
+ - ["python", "splitsentence.py"]
+ # output fields
+ - ["word"]
+ parallelism: 1
+ # ...
+
+ - id: "log"
+ className: "org.apache.storm.flux.wrappers.bolts.LogInfoBolt"
+ parallelism: 1
+ # ...
+
+ - id: "count"
+ className: "backtype.storm.testing.TestWordCounter"
+ parallelism: 1
+ # ...
+
+#stream definitions
+# stream definitions define connections between spouts and bolts.
+# note that such connections can be cyclical
+# custom stream groupings are also supported
+
+streams:
+ - name: "spout --> split" # name isn't used (placeholder for logging, UI, etc.)
+ from: "sentence-spout"
+ to: "splitsentence"
+ grouping:
+ type: SHUFFLE
+
+ - name: "split --> count"
+ from: "splitsentence"
+ to: "count"
+ grouping:
+ type: FIELDS
+ args: ["word"]
+
+ - name: "count --> log"
+ from: "count"
+ to: "log"
+ grouping:
+ type: SHUFFLE
http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/src/main/resources/shell.yaml
----------------------------------------------------------------------
diff --git a/flux-examples/src/main/resources/shell.yaml b/flux-examples/src/main/resources/shell.yaml
deleted file mode 100644
index 4f80667..0000000
--- a/flux-examples/src/main/resources/shell.yaml
+++ /dev/null
@@ -1,89 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Test ability to wire together shell spouts/bolts
----
-
-# topology definition
-# name to be used when submitting
-name: "shell-topology"
-
-# topology configuration
-# this will be passed to the submitter as a map of config options
-#
-config:
- topology.workers: 1
- # ...
-
-# spout definitions
-spouts:
- - id: "sentence-spout"
- className: "org.apache.storm.flux.wrappers.spouts.FluxShellSpout"
- # shell spout constructor takes 2 arguments: String[], String[]
- constructorArgs:
- # command line
- - ["node", "randomsentence.js"]
- # output fields
- - ["word"]
- parallelism: 1
- # ...
-
-# bolt definitions
-bolts:
- - id: "splitsentence"
- className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt"
- constructorArgs:
- # command line
- - ["python", "splitsentence.py"]
- # output fields
- - ["word"]
- parallelism: 1
- # ...
-
- - id: "log"
- className: "org.apache.storm.flux.wrappers.bolts.LogInfoBolt"
- parallelism: 1
- # ...
-
- - id: "count"
- className: "backtype.storm.testing.TestWordCounter"
- parallelism: 1
- # ...
-
-#stream definitions
-# stream definitions define connections between spouts and bolts.
-# note that such connections can be cyclical
-# custom stream groupings are also supported
-
-streams:
- - name: "spout --> split" # name isn't used (placeholder for logging, UI, etc.)
- from: "sentence-spout"
- to: "splitsentence"
- grouping:
- type: SHUFFLE
-
- - name: "split --> count"
- from: "splitsentence"
- to: "count"
- grouping:
- type: FIELDS
- args: ["word"]
-
- - name: "count --> log"
- from: "count"
- to: "log"
- grouping:
- type: SHUFFLE
http://git-wip-us.apache.org/repos/asf/storm/blob/0c1e0aa8/flux-examples/src/main/resources/simple_hdfs.yaml
----------------------------------------------------------------------
diff --git a/flux-examples/src/main/resources/simple_hdfs.yaml b/flux-examples/src/main/resources/simple_hdfs.yaml
new file mode 100644
index 0000000..ea7721d
--- /dev/null
+++ b/flux-examples/src/main/resources/simple_hdfs.yaml
@@ -0,0 +1,105 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test ability to wire together shell spouts/bolts
+---
+
+# topology definition
+# name to be used when submitting
+name: "hdfs-topology"
+
+# Components
+# Components are analagous to Spring beans. They are meant to be used as constructor,
+# property(setter), and builder arguments.
+#
+# for the time being, components must be declared in the order they are referenced
+components:
+ - id: "syncPolicy"
+ className: "org.apache.storm.hdfs.bolt.sync.CountSyncPolicy"
+ constructorArgs:
+ - 1000
+ - id: "rotationPolicy"
+ className: "org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy"
+ constructorArgs:
+ - 30
+ - SECONDS
+
+ - id: "fileNameFormat"
+ className: "org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat"
+ configMethods:
+ - name: "withPath"
+ args: [${hdfs.write.dir}]
+ - name: "withExtension"
+ args: [".txt"]
+
+ - id: "recordFormat"
+ className: "org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat"
+ configMethods:
+ - name: "withFieldDelimiter"
+ args: ["|"]
+
+ - id: "rotationAction"
+ className: "org.apache.storm.hdfs.common.rotation.MoveFileAction"
+ configMethods:
+ - name: "toDestination"
+ args: [${hdfs.dest.dir}]
+
+# spout definitions
+spouts:
+ - id: "spout-1"
+ className: "backtype.storm.testing.TestWordSpout"
+ parallelism: 1
+ # ...
+
+# bolt definitions
+
+bolts:
+ - id: "bolt-1"
+ className: "org.apache.storm.hdfs.bolt.HdfsBolt"
+ configMethods:
+ - name: "withConfigKey"
+ args: ["hdfs.config"]
+ - name: "withFsUrl"
+ args: [${hdfs.url}]
+ - name: "withFileNameFormat"
+ args: [ref: "fileNameFormat"]
+ - name: "withRecordFormat"
+ args: [ref: "recordFormat"]
+ - name: "withRotationPolicy"
+ args: [ref: "rotationPolicy"]
+ - name: "withSyncPolicy"
+ args: [ref: "syncPolicy"]
+ - name: "addRotationAction"
+ args: [ref: "rotationAction"]
+ parallelism: 1
+ # ...
+
+ - id: "bolt-2"
+ className: "org.apache.storm.flux.wrappers.bolts.LogInfoBolt"
+ parallelism: 1
+
+streams:
+ - name: "" # name isn't used (placeholder for logging, UI, etc.)
+ from: "spout-1"
+ to: "bolt-1"
+ grouping:
+ type: SHUFFLE
+
+ - name: "" # name isn't used (placeholder for logging, UI, etc.)
+ from: "spout-1"
+ to: "bolt-2"
+ grouping:
+ type: SHUFFLE
\ No newline at end of file