You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@streams.apache.org by sb...@apache.org on 2014/02/10 21:23:26 UTC

svn commit: r1566730 - in /incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch: ./ pom.xml

Author: sblackmon
Date: Mon Feb 10 20:23:26 2014
New Revision: 1566730

URL: http://svn.apache.org/r1566730
Log:
Scaffold for streams-persist-elasticsearch


Added:
    incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/
    incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml

Added: incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml
URL: http://svn.apache.org/viewvc/incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml?rev=1566730&view=auto
==============================================================================
--- incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml (added)
+++ incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml Mon Feb 10 20:23:26 2014
@@ -0,0 +1,93 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>streams-contrib</artifactId>
+        <groupId>org.apache.streams</groupId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>streams-persist-elasticsearch</artifactId>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.streams</groupId>
+            <artifactId>streams-config</artifactId>
+            <version>0.1-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.streams</groupId>
+            <artifactId>streams-core</artifactId>
+            <version>0.1-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.streams</groupId>
+            <artifactId>streams-pojo</artifactId>
+            <version>0.1-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.streams</groupId>
+            <artifactId>streams-util</artifactId>
+            <version>0.1-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.elasticsearch</groupId>
+            <artifactId>elasticsearch</artifactId>
+            <version>0.90.5</version>
+            <scope>compile</scope>
+            <type>jar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.json</groupId>
+            <artifactId>json</artifactId>
+            <version>20090211</version>
+        </dependency>
+    </dependencies>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+                <version>1.8</version>
+                <executions>
+                    <execution>
+                        <id>add-source</id>
+                        <phase>generate-sources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                        </goals>
+                        <configuration>
+                            <sources>
+                                <source>target/generated-sources/jsonschema2pojo</source>
+                            </sources>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.jsonschema2pojo</groupId>
+                <artifactId>jsonschema2pojo-maven-plugin</artifactId>
+                <configuration>
+                    <addCompileSourceRoot>true</addCompileSourceRoot>
+                    <generateBuilders>true</generateBuilders>
+                    <sourcePaths>
+                        <sourcePath>src/main/jsonschema/org/apache/streams/elasticsearch/ElasticsearchConfiguration.json</sourcePath>
+                    </sourcePaths>
+                    <outputDirectory>target/generated-sources/jsonschema2pojo</outputDirectory>
+                    <targetPackage>org.apache.streams.elasticsearch.pojo</targetPackage>
+                    <useLongIntegers>true</useLongIntegers>
+                    <useJodaDates>false</useJodaDates>
+                </configuration>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>generate</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>
\ No newline at end of file



Re: svn commit: r1566730 - in /incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch: ./ pom.xml

Posted by Steve Blackmon <sb...@apache.org>.
This evening I committed an elasticsearch persister.  Special thanks
to @smashew who contributed heavily to the code in the module.

https://github.com/w2ogroup/streams-examples has two modules that
demonstrate how to use it.

At a high level:
1) mvn install streams 0.1-SNAPSHOT
2) mvn package twitter-history-elasticsearch
3) install elasticsearch 1.0.0.RC2
4) install http://www.elasticsearch.org/overview/marvel/
5) start elasticsearch: bin/elasticsearch
7) for each json file in twitter-history-elasticsearch/src/main/resources:
       curl -XPUT ${eshost}:9200/_template/${templateid} <-- where
templateid is name of file before '['
8) java -cp twitter-history-elasticsearch/target/*.jar
       change twitter-history-elasticsearch/src/main/resources/application.conf
to change which twitter accounts to archive
9) visit ${eshost}:9200/_plugin/marvel, you should be able to confirm
that the index specified exists and contains documents

Then review and run elasticsearch-reindex, which uses
elasticsearch-reindex/src/main/resources/application.conf

Concurrency management and constructor proliferation are starting to
become issues.  I'm thinking of dealing with intra-process concurrency
by subclassing from a new class with a public static final
ForkJoinPool.  I'm thinking of dealing with constructor proliferation
by switching to 'chaining' setters, and coding all providers and
persisters to initialize primitives and lists directly from their
corresponding typesafe Configs.

Please let me know if you find some of the recent contributions
useful.  I'm absolutely open to suggestions on how to improve their
general code quality and want to find opportunities to integrate them
with other streams modules.

Steve

On Mon, Feb 10, 2014 at 12:25 PM, Jason Letourneau
<jl...@gmail.com> wrote:
> Awesome....was just thinking elastic search persistance would be a
> great contribution...
>
> On Mon, Feb 10, 2014 at 3:23 PM,  <sb...@apache.org> wrote:
>> Author: sblackmon
>> Date: Mon Feb 10 20:23:26 2014
>> New Revision: 1566730
>>
>> URL: http://svn.apache.org/r1566730
>> Log:
>> Scaffold for streams-persist-elasticsearch
>>
>>
>> Added:
>>     incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/
>>     incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml
>>
>> Added: incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml
>> URL: http://svn.apache.org/viewvc/incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml?rev=1566730&view=auto
>> ==============================================================================
>> --- incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml (added)
>> +++ incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml Mon Feb 10 20:23:26 2014
>> @@ -0,0 +1,93 @@
>> +<?xml version="1.0" encoding="UTF-8"?>
>> +<project xmlns="http://maven.apache.org/POM/4.0.0"
>> +         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
>> +         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
>> +    <parent>
>> +        <artifactId>streams-contrib</artifactId>
>> +        <groupId>org.apache.streams</groupId>
>> +        <version>0.1-SNAPSHOT</version>
>> +    </parent>
>> +    <modelVersion>4.0.0</modelVersion>
>> +
>> +    <artifactId>streams-persist-elasticsearch</artifactId>
>> +
>> +    <dependencies>
>> +        <dependency>
>> +            <groupId>org.apache.streams</groupId>
>> +            <artifactId>streams-config</artifactId>
>> +            <version>0.1-SNAPSHOT</version>
>> +        </dependency>
>> +        <dependency>
>> +            <groupId>org.apache.streams</groupId>
>> +            <artifactId>streams-core</artifactId>
>> +            <version>0.1-SNAPSHOT</version>
>> +        </dependency>
>> +        <dependency>
>> +            <groupId>org.apache.streams</groupId>
>> +            <artifactId>streams-pojo</artifactId>
>> +            <version>0.1-SNAPSHOT</version>
>> +        </dependency>
>> +        <dependency>
>> +            <groupId>org.apache.streams</groupId>
>> +            <artifactId>streams-util</artifactId>
>> +            <version>0.1-SNAPSHOT</version>
>> +        </dependency>
>> +        <dependency>
>> +            <groupId>org.elasticsearch</groupId>
>> +            <artifactId>elasticsearch</artifactId>
>> +            <version>0.90.5</version>
>> +            <scope>compile</scope>
>> +            <type>jar</type>
>> +        </dependency>
>> +        <dependency>
>> +            <groupId>org.json</groupId>
>> +            <artifactId>json</artifactId>
>> +            <version>20090211</version>
>> +        </dependency>
>> +    </dependencies>
>> +    <build>
>> +        <plugins>
>> +            <plugin>
>> +                <groupId>org.codehaus.mojo</groupId>
>> +                <artifactId>build-helper-maven-plugin</artifactId>
>> +                <version>1.8</version>
>> +                <executions>
>> +                    <execution>
>> +                        <id>add-source</id>
>> +                        <phase>generate-sources</phase>
>> +                        <goals>
>> +                            <goal>add-source</goal>
>> +                        </goals>
>> +                        <configuration>
>> +                            <sources>
>> +                                <source>target/generated-sources/jsonschema2pojo</source>
>> +                            </sources>
>> +                        </configuration>
>> +                    </execution>
>> +                </executions>
>> +            </plugin>
>> +            <plugin>
>> +                <groupId>org.jsonschema2pojo</groupId>
>> +                <artifactId>jsonschema2pojo-maven-plugin</artifactId>
>> +                <configuration>
>> +                    <addCompileSourceRoot>true</addCompileSourceRoot>
>> +                    <generateBuilders>true</generateBuilders>
>> +                    <sourcePaths>
>> +                        <sourcePath>src/main/jsonschema/org/apache/streams/elasticsearch/ElasticsearchConfiguration.json</sourcePath>
>> +                    </sourcePaths>
>> +                    <outputDirectory>target/generated-sources/jsonschema2pojo</outputDirectory>
>> +                    <targetPackage>org.apache.streams.elasticsearch.pojo</targetPackage>
>> +                    <useLongIntegers>true</useLongIntegers>
>> +                    <useJodaDates>false</useJodaDates>
>> +                </configuration>
>> +                <executions>
>> +                    <execution>
>> +                        <goals>
>> +                            <goal>generate</goal>
>> +                        </goals>
>> +                    </execution>
>> +                </executions>
>> +            </plugin>
>> +        </plugins>
>> +    </build>
>> +</project>
>> \ No newline at end of file
>>
>>

Re: svn commit: r1566730 - in /incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch: ./ pom.xml

Posted by Steve Blackmon <st...@blackmon.org>.
This evening I committed an elasticsearch persister.  Special thanks
to @smashew who contributed heavily to the code in the module.

https://github.com/w2ogroup/streams-examples has two modules that
demonstrate how to use it.

At a high level:
1) mvn install streams 0.1-SNAPSHOT
2) mvn package twitter-history-elasticsearch
3) install elasticsearch 1.0.0.RC2
4) install http://www.elasticsearch.org/overview/marvel/
5) start elasticsearch: bin/elasticsearch
7) for each json file in twitter-history-elasticsearch/src/main/resources:
       curl -XPUT ${eshost}:9200/_template/${templateid} <-- where
templateid is name of file before '['
8) java -cp twitter-history-elasticsearch/target/*.jar
       change twitter-history-elasticsearch/src/main/resources/application.conf
to change which twitter accounts to archive
9) visit ${eshost}:9200/_plugin/marvel, you should be able to confirm
that the index specified exists and contains documents

Then review and run elasticsearch-reindex, which uses
elasticsearch-reindex/src/main/resources/application.conf

Concurrency management and constructor proliferation are starting to
become issues.  I'm thinking of dealing with intra-process concurrency
by subclassing from a new class with a public static final
ForkJoinPool.  I'm thinking of dealing with constructor proliferation
by switching to 'chaining' setters, and coding all providers and
persisters to initialize primitives and lists directly from their
corresponding typesafe Configs.

Please let me know if you find some of the recent contributions
useful.  I'm absolutely open to suggestions on how to improve their
general code quality and want to find opportunities to integrate them
with other streams modules.

Steve

On Mon, Feb 10, 2014 at 12:25 PM, Jason Letourneau
<jl...@gmail.com> wrote:
> Awesome....was just thinking elastic search persistance would be a
> great contribution...
>
> On Mon, Feb 10, 2014 at 3:23 PM,  <sb...@apache.org> wrote:
>> Author: sblackmon
>> Date: Mon Feb 10 20:23:26 2014
>> New Revision: 1566730
>>
>> URL: http://svn.apache.org/r1566730
>> Log:
>> Scaffold for streams-persist-elasticsearch
>>
>>
>> Added:
>>     incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/
>>     incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml
>>
>> Added: incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml
>> URL: http://svn.apache.org/viewvc/incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml?rev=1566730&view=auto
>> ==============================================================================
>> --- incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml (added)
>> +++ incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml Mon Feb 10 20:23:26 2014
>> @@ -0,0 +1,93 @@
>> +<?xml version="1.0" encoding="UTF-8"?>
>> +<project xmlns="http://maven.apache.org/POM/4.0.0"
>> +         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
>> +         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
>> +    <parent>
>> +        <artifactId>streams-contrib</artifactId>
>> +        <groupId>org.apache.streams</groupId>
>> +        <version>0.1-SNAPSHOT</version>
>> +    </parent>
>> +    <modelVersion>4.0.0</modelVersion>
>> +
>> +    <artifactId>streams-persist-elasticsearch</artifactId>
>> +
>> +    <dependencies>
>> +        <dependency>
>> +            <groupId>org.apache.streams</groupId>
>> +            <artifactId>streams-config</artifactId>
>> +            <version>0.1-SNAPSHOT</version>
>> +        </dependency>
>> +        <dependency>
>> +            <groupId>org.apache.streams</groupId>
>> +            <artifactId>streams-core</artifactId>
>> +            <version>0.1-SNAPSHOT</version>
>> +        </dependency>
>> +        <dependency>
>> +            <groupId>org.apache.streams</groupId>
>> +            <artifactId>streams-pojo</artifactId>
>> +            <version>0.1-SNAPSHOT</version>
>> +        </dependency>
>> +        <dependency>
>> +            <groupId>org.apache.streams</groupId>
>> +            <artifactId>streams-util</artifactId>
>> +            <version>0.1-SNAPSHOT</version>
>> +        </dependency>
>> +        <dependency>
>> +            <groupId>org.elasticsearch</groupId>
>> +            <artifactId>elasticsearch</artifactId>
>> +            <version>0.90.5</version>
>> +            <scope>compile</scope>
>> +            <type>jar</type>
>> +        </dependency>
>> +        <dependency>
>> +            <groupId>org.json</groupId>
>> +            <artifactId>json</artifactId>
>> +            <version>20090211</version>
>> +        </dependency>
>> +    </dependencies>
>> +    <build>
>> +        <plugins>
>> +            <plugin>
>> +                <groupId>org.codehaus.mojo</groupId>
>> +                <artifactId>build-helper-maven-plugin</artifactId>
>> +                <version>1.8</version>
>> +                <executions>
>> +                    <execution>
>> +                        <id>add-source</id>
>> +                        <phase>generate-sources</phase>
>> +                        <goals>
>> +                            <goal>add-source</goal>
>> +                        </goals>
>> +                        <configuration>
>> +                            <sources>
>> +                                <source>target/generated-sources/jsonschema2pojo</source>
>> +                            </sources>
>> +                        </configuration>
>> +                    </execution>
>> +                </executions>
>> +            </plugin>
>> +            <plugin>
>> +                <groupId>org.jsonschema2pojo</groupId>
>> +                <artifactId>jsonschema2pojo-maven-plugin</artifactId>
>> +                <configuration>
>> +                    <addCompileSourceRoot>true</addCompileSourceRoot>
>> +                    <generateBuilders>true</generateBuilders>
>> +                    <sourcePaths>
>> +                        <sourcePath>src/main/jsonschema/org/apache/streams/elasticsearch/ElasticsearchConfiguration.json</sourcePath>
>> +                    </sourcePaths>
>> +                    <outputDirectory>target/generated-sources/jsonschema2pojo</outputDirectory>
>> +                    <targetPackage>org.apache.streams.elasticsearch.pojo</targetPackage>
>> +                    <useLongIntegers>true</useLongIntegers>
>> +                    <useJodaDates>false</useJodaDates>
>> +                </configuration>
>> +                <executions>
>> +                    <execution>
>> +                        <goals>
>> +                            <goal>generate</goal>
>> +                        </goals>
>> +                    </execution>
>> +                </executions>
>> +            </plugin>
>> +        </plugins>
>> +    </build>
>> +</project>
>> \ No newline at end of file
>>
>>

Re: svn commit: r1566730 - in /incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch: ./ pom.xml

Posted by Jason Letourneau <jl...@gmail.com>.
Awesome....was just thinking elastic search persistance would be a
great contribution...

On Mon, Feb 10, 2014 at 3:23 PM,  <sb...@apache.org> wrote:
> Author: sblackmon
> Date: Mon Feb 10 20:23:26 2014
> New Revision: 1566730
>
> URL: http://svn.apache.org/r1566730
> Log:
> Scaffold for streams-persist-elasticsearch
>
>
> Added:
>     incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/
>     incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml
>
> Added: incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml
> URL: http://svn.apache.org/viewvc/incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml?rev=1566730&view=auto
> ==============================================================================
> --- incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml (added)
> +++ incubator/streams/trunk/streams-contrib/streams-persist-elasticsearch/pom.xml Mon Feb 10 20:23:26 2014
> @@ -0,0 +1,93 @@
> +<?xml version="1.0" encoding="UTF-8"?>
> +<project xmlns="http://maven.apache.org/POM/4.0.0"
> +         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> +         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
> +    <parent>
> +        <artifactId>streams-contrib</artifactId>
> +        <groupId>org.apache.streams</groupId>
> +        <version>0.1-SNAPSHOT</version>
> +    </parent>
> +    <modelVersion>4.0.0</modelVersion>
> +
> +    <artifactId>streams-persist-elasticsearch</artifactId>
> +
> +    <dependencies>
> +        <dependency>
> +            <groupId>org.apache.streams</groupId>
> +            <artifactId>streams-config</artifactId>
> +            <version>0.1-SNAPSHOT</version>
> +        </dependency>
> +        <dependency>
> +            <groupId>org.apache.streams</groupId>
> +            <artifactId>streams-core</artifactId>
> +            <version>0.1-SNAPSHOT</version>
> +        </dependency>
> +        <dependency>
> +            <groupId>org.apache.streams</groupId>
> +            <artifactId>streams-pojo</artifactId>
> +            <version>0.1-SNAPSHOT</version>
> +        </dependency>
> +        <dependency>
> +            <groupId>org.apache.streams</groupId>
> +            <artifactId>streams-util</artifactId>
> +            <version>0.1-SNAPSHOT</version>
> +        </dependency>
> +        <dependency>
> +            <groupId>org.elasticsearch</groupId>
> +            <artifactId>elasticsearch</artifactId>
> +            <version>0.90.5</version>
> +            <scope>compile</scope>
> +            <type>jar</type>
> +        </dependency>
> +        <dependency>
> +            <groupId>org.json</groupId>
> +            <artifactId>json</artifactId>
> +            <version>20090211</version>
> +        </dependency>
> +    </dependencies>
> +    <build>
> +        <plugins>
> +            <plugin>
> +                <groupId>org.codehaus.mojo</groupId>
> +                <artifactId>build-helper-maven-plugin</artifactId>
> +                <version>1.8</version>
> +                <executions>
> +                    <execution>
> +                        <id>add-source</id>
> +                        <phase>generate-sources</phase>
> +                        <goals>
> +                            <goal>add-source</goal>
> +                        </goals>
> +                        <configuration>
> +                            <sources>
> +                                <source>target/generated-sources/jsonschema2pojo</source>
> +                            </sources>
> +                        </configuration>
> +                    </execution>
> +                </executions>
> +            </plugin>
> +            <plugin>
> +                <groupId>org.jsonschema2pojo</groupId>
> +                <artifactId>jsonschema2pojo-maven-plugin</artifactId>
> +                <configuration>
> +                    <addCompileSourceRoot>true</addCompileSourceRoot>
> +                    <generateBuilders>true</generateBuilders>
> +                    <sourcePaths>
> +                        <sourcePath>src/main/jsonschema/org/apache/streams/elasticsearch/ElasticsearchConfiguration.json</sourcePath>
> +                    </sourcePaths>
> +                    <outputDirectory>target/generated-sources/jsonschema2pojo</outputDirectory>
> +                    <targetPackage>org.apache.streams.elasticsearch.pojo</targetPackage>
> +                    <useLongIntegers>true</useLongIntegers>
> +                    <useJodaDates>false</useJodaDates>
> +                </configuration>
> +                <executions>
> +                    <execution>
> +                        <goals>
> +                            <goal>generate</goal>
> +                        </goals>
> +                    </execution>
> +                </executions>
> +            </plugin>
> +        </plugins>
> +    </build>
> +</project>
> \ No newline at end of file
>
>