You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@storm.apache.org by ka...@apache.org on 2016/06/24 09:46:09 UTC

[1/3] storm git commit: STORM-1902: add a simple & flexible FileNameFormat for storm-hdfs

Repository: storm
Updated Branches:
  refs/heads/master 9773f0538 -> dcae610e4


STORM-1902: add a simple & flexible FileNameFormat for storm-hdfs

Closes #1489


Project: http://git-wip-us.apache.org/repos/asf/storm/repo
Commit: http://git-wip-us.apache.org/repos/asf/storm/commit/7a144357
Tree: http://git-wip-us.apache.org/repos/asf/storm/tree/7a144357
Diff: http://git-wip-us.apache.org/repos/asf/storm/diff/7a144357

Branch: refs/heads/master
Commit: 7a144357f3d1bbcbd0be674535cfed32dfd6c20e
Parents: 9773f05
Author: vesense <be...@163.com>
Authored: Thu Jun 16 10:57:36 2016 +0800
Committer: Jungtaek Lim <ka...@gmail.com>
Committed: Fri Jun 24 18:40:48 2016 +0900

----------------------------------------------------------------------
 external/storm-hdfs/README.md                   |  14 +++
 .../hdfs/bolt/format/SimpleFileNameFormat.java  | 102 +++++++++++++++++++
 .../trident/format/SimpleFileNameFormat.java    |  97 ++++++++++++++++++
 .../bolt/format/TestSimpleFileNameFormat.java   |  79 ++++++++++++++
 .../format/TestSimpleFileNameFormat.java        |  70 +++++++++++++
 5 files changed, 362 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/storm/blob/7a144357/external/storm-hdfs/README.md
----------------------------------------------------------------------
diff --git a/external/storm-hdfs/README.md b/external/storm-hdfs/README.md
index 101e511..e6c07ef 100644
--- a/external/storm-hdfs/README.md
+++ b/external/storm-hdfs/README.md
@@ -158,6 +158,20 @@ For example:
 
 By default, prefix is empty and extenstion is ".txt".
 
+**New FileNameFormat:**
+
+The new provided `org.apache.storm.hdfs.format.SimpleFileNameFormat` and `org.apache.storm.hdfs.trident.format.SimpleFileNameFormat` are more flexible, and the `withName` method support parameters as following:
+
+* $TIME - current time. use `withTimeFormat` to format.
+* $NUM - rotation number
+* $HOST - local host name
+* $PARTITION - partition index (`org.apache.storm.hdfs.trident.format.SimpleFileNameFormat` only)
+* $COMPONENT - component id (`org.apache.storm.hdfs.format.SimpleFileNameFormat` only)
+* $TASK - task id (`org.apache.storm.hdfs.format.SimpleFileNameFormat` only)
+
+eg: `seq.$TIME.$HOST.$COMPONENT.$NUM.dat`
+
+The default file `name` is `$TIME.$NUM.txt`, and the default `timeFormat` is `yyyyMMddHHmmss`.
 
 
 ### Sync Policies

http://git-wip-us.apache.org/repos/asf/storm/blob/7a144357/external/storm-hdfs/src/main/java/org/apache/storm/hdfs/bolt/format/SimpleFileNameFormat.java
----------------------------------------------------------------------
diff --git a/external/storm-hdfs/src/main/java/org/apache/storm/hdfs/bolt/format/SimpleFileNameFormat.java b/external/storm-hdfs/src/main/java/org/apache/storm/hdfs/bolt/format/SimpleFileNameFormat.java
new file mode 100644
index 0000000..43273f6
--- /dev/null
+++ b/external/storm-hdfs/src/main/java/org/apache/storm/hdfs/bolt/format/SimpleFileNameFormat.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.storm.hdfs.bolt.format;
+
+import java.net.UnknownHostException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Map;
+
+import org.apache.storm.task.TopologyContext;
+import org.apache.storm.utils.Utils;
+
+public class SimpleFileNameFormat implements FileNameFormat {
+
+    private static final long serialVersionUID = 1L;
+
+    private String componentId;
+    private int taskId;
+    private String host;
+    private String path = "/storm";
+    private String name = "$TIME.$NUM.txt";
+    private String timeFormat = "yyyyMMddHHmmss";
+
+    @Override
+    public String getName(long rotation, long timeStamp) {
+        // compile parameters
+        SimpleDateFormat dateFormat = new SimpleDateFormat(timeFormat);
+        String ret = name
+                .replace("$TIME", dateFormat.format(new Date(timeStamp)))
+                .replace("$NUM", String.valueOf(rotation))
+                .replace("$HOST", host)
+                .replace("$COMPONENT", componentId)
+                .replace("$TASK", String.valueOf(taskId));
+        return ret;
+    }
+
+    @Override
+    public String getPath() {
+        return path;
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void prepare(Map conf, TopologyContext topologyContext) {
+        this.componentId = topologyContext.getThisComponentId();
+        this.taskId = topologyContext.getThisTaskId();
+        try {
+            this.host = Utils.localHostname();
+        } catch (UnknownHostException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public SimpleFileNameFormat withPath(String path) {
+        this.path = path;
+        return this;
+    }
+
+    /**
+     * support parameters:<br/>
+     * $TIME - current time. use <code>withTimeFormat</code> to format.<br/>
+     * $NUM - rotation number<br/>
+     * $HOST - local host name<br/>
+     * $COMPONENT - component id<br/>
+     * $TASK - task id<br/>
+     * 
+     * @param name
+     *            file name
+     * @return
+     */
+    public SimpleFileNameFormat withName(String name) {
+        this.name = name;
+        return this;
+    }
+
+    public SimpleFileNameFormat withTimeFormat(String timeFormat) {
+        //check format
+        try{
+            new SimpleDateFormat(timeFormat);
+        }catch (Exception e) {
+            throw new IllegalArgumentException("invalid timeFormat: "+e.getMessage());
+        }
+        this.timeFormat = timeFormat;
+        return this;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/storm/blob/7a144357/external/storm-hdfs/src/main/java/org/apache/storm/hdfs/trident/format/SimpleFileNameFormat.java
----------------------------------------------------------------------
diff --git a/external/storm-hdfs/src/main/java/org/apache/storm/hdfs/trident/format/SimpleFileNameFormat.java b/external/storm-hdfs/src/main/java/org/apache/storm/hdfs/trident/format/SimpleFileNameFormat.java
new file mode 100644
index 0000000..239c6ca
--- /dev/null
+++ b/external/storm-hdfs/src/main/java/org/apache/storm/hdfs/trident/format/SimpleFileNameFormat.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.storm.hdfs.trident.format;
+
+import java.net.UnknownHostException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Map;
+
+import org.apache.storm.utils.Utils;
+
+public class SimpleFileNameFormat implements FileNameFormat {
+
+    private static final long serialVersionUID = 1L;
+
+    private int partitionIndex;
+    private String host;
+    private String path = "/storm";
+    private String name = "$TIME.$NUM.txt";
+    private String timeFormat = "yyyyMMddHHmmss";
+
+    @Override
+    public String getName(long rotation, long timeStamp) {
+        // compile parameters
+        SimpleDateFormat dateFormat = new SimpleDateFormat(timeFormat);
+        String ret = name
+                .replace("$TIME", dateFormat.format(new Date(timeStamp)))
+                .replace("$NUM", String.valueOf(rotation))
+                .replace("$HOST", host)
+                .replace("$PARTITION", String.valueOf(partitionIndex));
+        return ret;
+    }
+
+    @Override
+    public String getPath() {
+        return path;
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void prepare(Map conf, int partitionIndex, int numPartitions) {
+        this.partitionIndex = partitionIndex;
+        try {
+            this.host = Utils.localHostname();
+        } catch (UnknownHostException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public SimpleFileNameFormat withPath(String path) {
+        this.path = path;
+        return this;
+    }
+
+    /**
+     * support parameters:<br/>
+     * $TIME - current time. use <code>withTimeFormat</code> to format.<br/>
+     * $NUM - rotation number<br/>
+     * $HOST - local host name<br/>
+     * $PARTITION - partition index<br/>
+     * 
+     * @param name
+     *            file name
+     * @return
+     */
+    public SimpleFileNameFormat withName(String name) {
+        this.name = name;
+        return this;
+    }
+
+    public SimpleFileNameFormat withTimeFormat(String timeFormat) {
+        //check format
+        try{
+            new SimpleDateFormat(timeFormat);
+        }catch (Exception e) {
+            throw new IllegalArgumentException("invalid timeFormat: "+e.getMessage());
+        }
+        this.timeFormat = timeFormat;
+        return this;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/storm/blob/7a144357/external/storm-hdfs/src/test/java/org/apache/storm/hdfs/bolt/format/TestSimpleFileNameFormat.java
----------------------------------------------------------------------
diff --git a/external/storm-hdfs/src/test/java/org/apache/storm/hdfs/bolt/format/TestSimpleFileNameFormat.java b/external/storm-hdfs/src/test/java/org/apache/storm/hdfs/bolt/format/TestSimpleFileNameFormat.java
new file mode 100644
index 0000000..4c90295
--- /dev/null
+++ b/external/storm-hdfs/src/test/java/org/apache/storm/hdfs/bolt/format/TestSimpleFileNameFormat.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.storm.hdfs.bolt.format;
+
+import java.net.UnknownHostException;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.storm.task.TopologyContext;
+import org.apache.storm.utils.Utils;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestSimpleFileNameFormat {
+
+    @Test
+    public void testDefaults() {
+        SimpleFileNameFormat format = new SimpleFileNameFormat();
+        format.prepare(null, createTopologyContext());
+        long now = System.currentTimeMillis();
+        String path = format.getPath();
+        String name = format.getName(1, now);
+
+        Assert.assertEquals("/storm", path);
+        String time = new SimpleDateFormat("yyyyMMddHHmmss").format(now);
+        Assert.assertEquals(time+".1.txt", name);
+    }
+
+    @Test
+    public void testParameters() {
+        SimpleFileNameFormat format = new SimpleFileNameFormat()
+            .withName("$TIME.$HOST.$COMPONENT.$TASK.$NUM.txt")
+            .withPath("/mypath")
+            .withTimeFormat("yyyy-MM-dd HH:mm:ss");
+        format.prepare(null, createTopologyContext());
+        long now = System.currentTimeMillis();
+        String path = format.getPath();
+        String name = format.getName(1, now);
+    
+        Assert.assertEquals("/mypath", path);
+        String time = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(now);
+        String host = null;
+        try {
+            host = Utils.localHostname();
+        } catch (UnknownHostException e) {
+            e.printStackTrace();
+        }
+        Assert.assertEquals(time+"."+host+".Xcom.7.1.txt", name);
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void testTimeFormat() {
+        SimpleFileNameFormat format = new SimpleFileNameFormat()
+        	.withTimeFormat("xyz");
+        format.prepare(null, createTopologyContext());
+    }
+    
+    private TopologyContext createTopologyContext(){
+    	Map<Integer, String> taskToComponent = new HashMap<Integer, String>();
+        taskToComponent.put(7, "Xcom");
+    	return new TopologyContext(null, null, taskToComponent, null, null, null, null, null, 7, 6703, null, null, null, null, null, null);
+    }
+}

http://git-wip-us.apache.org/repos/asf/storm/blob/7a144357/external/storm-hdfs/src/test/java/org/apache/storm/hdfs/trident/format/TestSimpleFileNameFormat.java
----------------------------------------------------------------------
diff --git a/external/storm-hdfs/src/test/java/org/apache/storm/hdfs/trident/format/TestSimpleFileNameFormat.java b/external/storm-hdfs/src/test/java/org/apache/storm/hdfs/trident/format/TestSimpleFileNameFormat.java
new file mode 100644
index 0000000..232b4cf
--- /dev/null
+++ b/external/storm-hdfs/src/test/java/org/apache/storm/hdfs/trident/format/TestSimpleFileNameFormat.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.storm.hdfs.trident.format;
+
+import java.net.UnknownHostException;
+import java.text.SimpleDateFormat;
+
+import org.apache.storm.utils.Utils;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestSimpleFileNameFormat {
+
+    @Test
+    public void testDefaults() {
+        SimpleFileNameFormat format = new SimpleFileNameFormat();
+        format.prepare(null, 3, 5);
+        long now = System.currentTimeMillis();
+        String path = format.getPath();
+        String name = format.getName(1, now);
+
+        Assert.assertEquals("/storm", path);
+        String time = new SimpleDateFormat("yyyyMMddHHmmss").format(now);
+        Assert.assertEquals(time+".1.txt", name);
+    }
+
+    @Test
+    public void testParameters() {
+        SimpleFileNameFormat format = new SimpleFileNameFormat()
+            .withName("$TIME.$HOST.$PARTITION.$NUM.txt")
+            .withPath("/mypath")
+            .withTimeFormat("yyyy-MM-dd HH:mm:ss");
+        format.prepare(null, 3, 5);
+        long now = System.currentTimeMillis();
+        String path = format.getPath();
+        String name = format.getName(1, now);
+    
+        Assert.assertEquals("/mypath", path);
+        String time = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(now);
+        String host = null;
+        try {
+            host = Utils.localHostname();
+        } catch (UnknownHostException e) {
+            e.printStackTrace();
+        }
+        Assert.assertEquals(time+"."+host+".3.1.txt", name);
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void testTimeFormat() {
+        SimpleFileNameFormat format = new SimpleFileNameFormat()
+        	.withTimeFormat("xyz");
+        format.prepare(null, 3, 5);
+    }
+}


[2/3] storm git commit: Merge branch 'STORM-1902'

Posted by ka...@apache.org.
Merge branch 'STORM-1902'


Project: http://git-wip-us.apache.org/repos/asf/storm/repo
Commit: http://git-wip-us.apache.org/repos/asf/storm/commit/65aae49d
Tree: http://git-wip-us.apache.org/repos/asf/storm/tree/65aae49d
Diff: http://git-wip-us.apache.org/repos/asf/storm/diff/65aae49d

Branch: refs/heads/master
Commit: 65aae49dd7528ba2c41076e8a5ec57b94686623e
Parents: 9773f05 7a14435
Author: Jungtaek Lim <ka...@gmail.com>
Authored: Fri Jun 24 18:42:13 2016 +0900
Committer: Jungtaek Lim <ka...@gmail.com>
Committed: Fri Jun 24 18:42:13 2016 +0900

----------------------------------------------------------------------
 external/storm-hdfs/README.md                   |  14 +++
 .../hdfs/bolt/format/SimpleFileNameFormat.java  | 102 +++++++++++++++++++
 .../trident/format/SimpleFileNameFormat.java    |  97 ++++++++++++++++++
 .../bolt/format/TestSimpleFileNameFormat.java   |  79 ++++++++++++++
 .../format/TestSimpleFileNameFormat.java        |  70 +++++++++++++
 5 files changed, 362 insertions(+)
----------------------------------------------------------------------



[3/3] storm git commit: add STORM-1902 to CHANGELOG

Posted by ka...@apache.org.
add STORM-1902 to CHANGELOG


Project: http://git-wip-us.apache.org/repos/asf/storm/repo
Commit: http://git-wip-us.apache.org/repos/asf/storm/commit/dcae610e
Tree: http://git-wip-us.apache.org/repos/asf/storm/tree/dcae610e
Diff: http://git-wip-us.apache.org/repos/asf/storm/diff/dcae610e

Branch: refs/heads/master
Commit: dcae610e4d12db884110b72e8b9db691b26346b4
Parents: 65aae49
Author: Jungtaek Lim <ka...@gmail.com>
Authored: Fri Jun 24 18:45:53 2016 +0900
Committer: Jungtaek Lim <ka...@gmail.com>
Committed: Fri Jun 24 18:45:53 2016 +0900

----------------------------------------------------------------------
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/storm/blob/dcae610e/CHANGELOG.md
----------------------------------------------------------------------
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 15b417c..4366a2f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -108,6 +108,7 @@
  * STORM-1769: Added a test to check local nimbus with notifier plugin
 
 ## 1.1.0 
+ * STORM-1902: add a simple & flexible FileNameFormat for storm-hdfs
  * STORM-1914: Storm Kafka Field Topic Selector
  * STORM-1925: Remove Nimbus thrift call from Nimbus itself
  * STORM-1907: PartitionedTridentSpoutExecutor has incompatible types that cause ClassCastException