You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by cl...@apache.org on 2014/01/20 19:50:44 UTC
[1/2] GIRAPH-803
Updated Branches:
refs/heads/trunk 26d31606b -> 55e22de81
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/src/site/xdoc/rexster.xml
----------------------------------------------------------------------
diff --git a/src/site/xdoc/rexster.xml b/src/site/xdoc/rexster.xml
index efd3ea3..cc55eab 100644
--- a/src/site/xdoc/rexster.xml
+++ b/src/site/xdoc/rexster.xml
@@ -30,30 +30,138 @@
<body>
<section name="Overview">
Giraph can use the <a href="http://rexster.tinkerpop.com">Rexster</a>
- REST API to load graphs into the cluster. In this manner it is possible
- to load graphs from all the graph databases that
- <a href="http://blueprints.tinkerpop.com">Blueprints</a> supports.
- Additionally, a subset of the input graph can be injected by means of
- <a href="http://rexster.tinkerpop.com">Gramlin</a> scripts. This page
- is intended to get you started with the Giraph API for Rexster.
+ REST API to load and store graphs from graph databases like
+ <a href="http://www.neo4j.org/">Neo4j</a>,
+ <a href="http://www.orientdb.org/">OrientDB</a> and others to perform a
+ computation. Graph databases that are supported by
+ <a href="http://blueprints.tinkerpop.com">Blueprints</a> are also
+ available via Rexster. Additionally, a subset of the input graph can
+ be injected by means of <a href="http://rexster.tinkerpop.com">
+ Gremlin</a> scripts. This page is intended to get you started with the
+ Giraph API for Rexster I/O.
</section>
+
+ <section name="Quick Start For Inpatients">
+ Since not everyone is interested in the whole story, here you can find
+ some easy steps to get quickly started using the Rexster I/O API. We are
+ assuming you already have a working Hadoop/Giraph setup. If it is not
+ so, start <a href="/quick_start.html">here</a> and then come back. This
+ is important since the <code>OutputFormat</code> example is based on the
+ same example provided by the Quick Start guide.<br />
+ Below you can find a single script to prepare the environment and a
+ small example to use the <code>OutputFormat</code>. The only step required
+ to make the example work is to adjust the configuration variables to your
+ environment settings. For more details, read the rest of the
+ document :)<br/>
+ The script below also assumes that Hadoop is up and running
+ based on the Quick Start guide and the <code>tiny_graph.txt</code>
+ input graph is in-place in the input directory.
+ <div class="source"><pre class="prettyprint">
+#!/bin/bash
+# Configuration
+export REXSTER_VERSION=2.4.0
+export HADOOP_VERSION=1.0.2
+export GIRAPH_VERSION=1.1.0-SNAPSHOT
+export GIRAPH_DIR=/path/to/giraph
+export REXSTER_DIR=/path/to/rexster
+export HADOOP_DIR=/path/to/hadoop
+
+# Constants
+export GIRAPH_REXSTER=${GIRAPH_DIR}/giraph-rexster/giraph-rexster-io
+export GIRAPH_CORE=${GIRAPH_DIR}/giraph-core
+export GIRAPH_EXAMPLES=${GIRAPH_DIR}/giraph-examples
+export GIRAPH_KIBBLE=${GIRAPH_DIR}/giraph-rexster/giraph-kibble
+
+export GIRAPH_REXSTER_JAR=${GIRAPH_REXSTER}/target/giraph-rexster-io-${GIRAPH_VERSION}.jar
+export GIRAPH_CORE_JAR=${GIRAPH_CORE}/target/giraph-${GIRAPH_VERSION}-for-hadoop-${HADOOP_VERSION}-jar-with-dependencies.jar
+export GIRAPH_EXAMPLES_JAR=${GIRAPH_EXAMPLES}/target/giraph-examples-${GIRAPH_VERSION}-for-hadoop-${HADOOP_VERSION}-jar-with-dependencies.jar
+export GIRAPH_KIBBLE_JAR=${GIRAPH_KIBBLE}/target/giraph-kibble-${GIRAPH_VERSION}.jar
+
+export HADOOP_CLASSPATH=${GIRAPH_REXSTER_JAR}:${GIRAPH_EXAMPLES_JAR}:${GIRAPH_CORE_JAR}
+
+# Main
+# prepare rexster
+mkdir ${REXSTER_DIR}
+cd ${REXSTER_DIR}
+wget http://tinkerpop.com/downloads/rexster/rexster-server-${REXSTER_VERSION}.zip
+unzip rexster-server-${REXSTER_VERSION}.zip
+REXSTER_DIR=${REXSTER_DIR}/rexster-server-${REXSTER_VERSION}
+
+# copy the compiled kibble, prepare the rexster configuration, and start rexster
+cp ${GIRAPH_KIBBLE_JAR} ${REXSTER_DIR}/ext/
+lines=$(wc -l ${REXSTER_DIR}/config/rexster.xml | cut -d" " -f1)
+head -n +$(( lines - 2 )) ${REXSTER_DIR}/config/rexster.xml >\
+ ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " <graph>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " <graph-name>giraphgraph</graph-name>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " <graph-location>/tmp/giraphgraph</graph-location>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " <graph-type>tinkergraph</graph-type>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " <graph-storage>graphson</graph-storage>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " <extensions>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " <allows>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " <allow>tp:gremlin</allow>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " <allow>tp:giraph</allow>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " </allows>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " </extensions>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " </graph>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo " </graphs>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+echo "</rexster>" >> ${REXSTER_DIR}/config/rexster.giraph.xml
+${REXSTER_DIR}/bin/rexster.sh -s -c ${REXSTER_DIR}/config/rexster.giraph.xml 
+
+# start a Giraph Job
+su - hduser
+${HADOOP_DIR}/bin/hadoop jar ${GIRAPH_EXAMPLES_JAR} org.apache.giraph.GiraphRunner \
+ -Dgiraph.rexster.output.graph=giraphgraph \
+ -Dgiraph.rexster.hostname=127.0.0.1 \
+ -libjars ${GIRAPH_REXSTER_JAR},${GIRAPH_CORE_JAR} \
+ org.apache.giraph.examples.SimpleShortestPathsComputation \
+ -vif org.apache.giraph.io.formats.JsonLongDoubleFloatDoubleVertexInputFormat \
+ -vip input/ \
+ -vof org.apache.giraph.rexster.io.formats.RexsterLongDoubleFloatVertexOutputFormat \
+ -eof org.apache.giraph.rexster.io.formats.RexsterLongDoubleFloatEdgeOutputFormat \
+ -w 1
+exit
+ </pre></div>
+ </section>
+
+ <section name="Architectrue">
+ The Rexster I/O Format is composed by three main components, namely
+ the <b>Rexster Input Format</b> and the <b>Rexster Output
+ Format</b> which are part of the Giraph code. Both components are
+ split into <b>Vertex</b> and <b>Edge</b> interfaces. Additionally,
+ the architectre provides the <b>Giraph Kibble</b>, which is a Rexster
+ extension to provide the needed facilities to load and store the data
+ from and to the graph databases. The figure below shows the architecture
+ in a high level fashion.<br/>
+
+ <p style="text-align: center">
+ <img syle="align: center" src="images/RexsterIO.svg" />
+ </p>
+ </section>
+
<section name="The API">
Because of how the
<a href="https://github.com/tinkerpop/rexster/wiki/Basic-REST-API">Basic
Rexster API</a> is organized, the Giraph API requires the user to specify
- both an <code>VertexInputFormat</code> and a <code>EdgeInputFormat</code>.
- Even though such a step is required, the user does not have to deal with
- the Rexster connection, which can be easily configured using the Giraph
- options provided.<br />
- The two classes which need to be extended are
- <code>RexsterVertexInputFormat</code> and
- <code>RexsterEdgeInputFormat</code>. These two classes only require the
- user to specify the way in which a JSON object is translated to a Giraph
- object.
+ both an <b>Vertex</b> and a <b>Edge</b> format in both the input and
+ the output format. Even though such a step is required, the user does
+ not have to deal with the Rexster connection, which can be easily
+ configured using the Giraph options provided.<br />
+ In the next sections, you will be guided in the peculiarities of the
+ API, starting from the
+ <a href="#Configuration_Options">configurations</a>. Afterwards, we
+ will provide you with a short descriptionof how to prepare Rexster to be
+ used with Giraph. Finally, we will walk you through Input and the Output
+ format APIs and we will conclude presenting some cavet related to the
+ system.
</section>
+
<section name="Configuration Options">
The configuration options which can be specified by the user of the
- Rexster input format are the following.<br />
+ Rexster input format are the following. The configurations are group
+ in three different categories. <b>General Configurations</b>, <b>Input
+ Format Configurations</b>, and <b>Output Format Configurations</b>.<br />
+ <h3>General Configurations</h3>
<table border='0'>
<tr>
<th>label</th>
@@ -62,19 +170,19 @@
<th>description</th>
</tr>
<tr>
- <td>giraph.input.rexster.hostname</td>
+ <td>giraph.rexster.hostname</td>
<td>string</td>
<td>127.0.0.1</td>
<td>Rexster hostname which provides the REST API - required</td>
</tr>
<tr>
- <td>giraph.input.rexster.port</td>
+ <td>giraph.rexster.port</td>
<td>integer</td>
<td>8182</td>
<td>Rexster port where to contact the REST API.</td>
</tr>
<tr>
- <td>giraph.input.rexster.ssl</td>
+ <td>giraph.rexster.ssl</td>
<td>boolean</td>
<td>false</td>
<td>
@@ -82,13 +190,37 @@
</td>
</tr>
<tr>
- <td>giraph.input.rexster.graph</td>
+ <td>giraph.rexster.username</td>
+ <td>string</td>
+ <td></td>
+ <td>Rexster username to access the REST API.</td>
+ </tr>
+ <tr>
+ <td>giraph.rexster.password</td>
<td>string</td>
+ <td></td>
+ <td>Rexster password to access the REST API.</td>
+ </tr>
+ </table><br/><br/>
+
+ <h3>Input Format Configurations</h3>
+ <table border='0'>
+ <tr>
+ <th>label</th>
+ <th>type</th>
+ <th>default value</th>
+ <th>description</th>
+ </tr>
+ <tr>
+ <td>giraph.rexster.input.graph</td>
<td>graphdb</td>
- <td>Rexster graph.</td>
+ <td>string</td>
+ <td>
+ Rexster input graph.
+ </td>
</tr>
<tr>
- <td>giraph.input.rexster.vertices</td>
+ <td>giraph.rexster.input.vertex</td>
<td>integer</td>
<td>1000</td>
<td>
@@ -96,7 +228,7 @@
</td>
</tr>
<tr>
- <td>giraph.input.rexster.edges</td>
+ <td>giraph.rexster.input.edge</td>
<td>integer</td>
<td>1000</td>
<td>
@@ -104,114 +236,415 @@
</td>
</tr>
<tr>
- <td>giraph.input.rexster.username</td>
+ <td>giraph.input.rexster.vertices.gremlinScript</td>
<td>string</td>
<td></td>
- <td>Rexster username to access the REST API.</td>
+ <td>
+ If the database is Gremlin enabled, the script will be used to
+ retrieve the vertices from the Rexster exposed database.
+ </td>
</tr>
<tr>
- <td>giraph.input.rexster.password</td>
+ <td>giraph.input.rexster.edges.gremlinScript</td>
<td>string</td>
<td></td>
- <td>Rexster password to access the REST API.</td>
+ <td>
+ If the database is Gremlin enabled, the script will be used to
+ retrieve the edges from the Rexster exposed database.
+ </td>
</tr>
+ </table>
+
+
+ <h3>Output Format Configurations</h3>
+ <table border='0'>
<tr>
- <td>giraph.input.rexster.hasGramlin</td>
- <td>boolean</td>
- <td>false</td>
+ <th>label</th>
+ <th>type</th>
+ <th>default value</th>
+ <th>description</th>
+ </tr>
+ <tr>
+ <td>giraph.rexster.output.graph</td>
+ <td>graphdb</td>
+ <td>string</td>
<td>
- Gramlin enabled option for Rexster. If the database to which to
- connect has gramlin extension enabled, it will be possible to
- provide a Gramlin scriipt.
+ Rexster output graph.
</td>
</tr>
<tr>
- <td>giraph.input.rexster.vertices.gramlinScript</td>
+ <td>giraph.rexster.output.vlabel</td>
<td>string</td>
- <td></td>
+ <td>_vid</td>
<td>
- If the database is Gramlin enabled, the script will be used to
- retrieve the vertices from the Rexster exposed database.
+ Rexster Vertex ID label for the JSON format.
</td>
</tr>
<tr>
- <td>"giraph.input.rexster.edges.gramlinScript"</td>
- <td>string</td>
- <td></td>
+ <td>giraph.rexster.output.backoffDelay</td>
+ <td>integer</td>
+ <td>5</td>
<td>
- If the database is Gramlin enabled, the script will be used to
- retrieve the edges from the Rexster exposed database.
+ Rexster back-off delay in milliseconds which is multiplied to an
+ exponentially increasing counter. Needed to deal with deadlocks and
+ consistency raised by the graph database
+ </td>
+ </tr>
+ <tr>
+ <td>giraph.rexster.output.backoffRetry</td>
+ <td>integer</td>
+ <td>20</td>
+ <td>
+ Rexster output format wait timeout (seconds). This is used to wake up
+ the thread to call progress very x seconds if not progress from the
+ ZooKeeper is detected.
+ </td>
+ </tr>
+ <tr>
+ <td>giraph.rexster.output.timeout</td>
+ <td>integer</td>
+ <td>10</td>
+ <td>
+ Rexster output format wait timeout (seconds). This is
+ used to wake up the thread to call progress very x
+ seconds if not progress from the ZooKeeper is
+ detected.
+ </td>
+ </tr>
+ <tr>
+ <td>giraph.rexster.output.vertex.txsize</td>
+ <td>integer</td>
+ <td>1000</td>
+ <td>
+ Rexster Output format transaction size. This parameter
+ defines how many vertexes are sent for each
+ transaction.
+ </td>
+ </tr>
+ <tr>
+ <td>giraph.rexster.output.edge.txsize</td>
+ <td>integer</td>
+ <td>1000</td>
+ <td>
+ Rexster Output format transaction size. This parameter
+ defines how many edges are sent for each
+ transaction.
</td>
</tr>
</table>
</section>
- <section name="Preparation">
- To be able to test the Rexster API the user needs to prepare the Rexster
- environment. A guide to set-up a Rexster Server together with a database
- can be found at the
- <a href="https://github.com/tinkerpop/rexster/wiki/Getting-Started">
- Rexster Wiki</a> page.<br />
- In this brief guide, it will be assumed that an available graph database
- will be reachable at <a>http://127.0.0.1:8182/graphs/shortest-path/</a>.
- For the tests, the following database structure was used:<br />
- <code>
- {<br />
- "graph": {<br />
- "mode": "NORMAL",<br />
- "vertices": [<br />
- { "_id": 1, "_type": "vertex" },<br />
- { "_id": 2, "_type": "vertex" },<br />
- { "_id": 3, "_type": "vertex" },<br />
- { "_id": 4, "_type": "vertex" },<br />
- { "_id": 5, "_type": "vertex" }],<br />
- "edges": [<br />
- { "weight": 1, "_id": 0, "_type": "edge", "_outV": 1, "_inV": 2, "_label": "_default" },<br />
- { "weight": 3, "_id": 1, "_type": "edge", "_outV": 1, "_inV": 4, "_label": "_default" },<br />
- { "weight": 1, "_id": 2, "_type": "edge", "_outV": 2, "_inV": 1, "_label": "_default" },<br />
- { "weight": 2, "_id": 3, "_type": "edge", "_outV": 2, "_inV": 3, "_label": "_default" },<br />
- { "weight": 1, "_id": 4, "_type": "edge", "_outV": 2, "_inV": 4, "_label": "_default" },<br />
- { "weight": 2, "_id": 5, "_type": "edge", "_outV": 3, "_inV": 2, "_label": "_default" },<br />
- { "weight": 4, "_id": 6, "_type": "edge", "_outV": 3, "_inV": 5, "_label": "_default" },<br />
- { "weight": 3, "_id": 7, "_type": "edge", "_outV": 4, "_inV": 1, "_label": "_default" },<br />
- { "weight": 1, "_id": 8, "_type": "edge", "_outV": 4, "_inV": 2, "_label": "_default" },<br />
- { "weight": 4, "_id": 9, "_type": "edge", "_outV": 4, "_inV": 5, "_label": "_default" },<br />
- { "weight": 4, "_id": 10, "_type": "edge", "_outV": 5, "_inV": 4, "_label": "_default" },<br />
- { "weight": 4, "_id": 11, "_type": "edge", "_outV": 5, "_inV": 3, "_label": "_default" } ]<br />
- }<br />
+
+ <section name="Prepare The Environment">
+ In this section we will briefly explain how to prepare a Rexster server
+ for your computation. For additional information about Rexster and
+ the configuration of the server, you can take a look at the
+ <a href="https://github.com/tinkerpop/rexster/wiki" target="_new">
+ Rexster Wiki</a>.<br />
+ As it is visible in the <a href="#Quick_Start_For_Inpatients">quick
+ start</a> above, to start a new Rexster server, it is extremely easy.
+ First of all, you need to download one of the versions available on the
+ Tinkerpop repository. We suggest you to get the most recent version, as we
+ will explain later when talking about <a href="#Cavet">cavet</a>. So, the
+ first step is to download rexster and unzip it.
+
+ <div class="source"><pre class="prettyprint">
+$ wget http://tinkerpop.com/downloads/rexster/rexster-server-2.4.0.zip
+$ unzip rexster-server-2.4.0.zip
+ </pre></div>
+
+ At this point, it is important to perpare the database you are going to
+ use, allowing the Giraph Kibble to be available for the database. This is
+ done by adding the entry <code><allow>tp:giraph</allow>"</code>
+ for the desired graph under the <code><extension></code> tag scope.
+ Moreover, you will need to copy the Giraph Kibble into the <code>ext/</code>
+ directory of rexster.
+
+ <br/><br/>
+ <div class="source"><pre class="prettyprint">
+$ cp /path/to/giraph/giraph-rexster/giraph-kibble/target/giraph-kibble-${hadoop.version}.jar rexster-server-2.4.0/ext/
+ </pre></div>
+
+ At this point, just enter the rexster directory and start the server.<br/>
+ <div class="source"><pre class="prettyprint">
+$ cd rexster-server-2.4.0
+$ ./bin/rexster.sh -s
+ </pre></div>
+
+ This command will automatically locate the configuration file in the
+ <code>config/</code> directory and will automatically provide you with
+ some initial database. To test the server is properly working, open
+ a browser and type the following URL.
+
+ <div class="source"><pre class="prettyprint">
+ http://localhost:8182/graphs/
+ </pre></div>
+
+ This will provide you with a JSON listing the available loaded graphs.
+ </section>
+
+ <section name="Example explained: Input Format">
+ The first part of the API that we are presenting is the
+ <b>Rexster Input Format</b>. This API allows a Giraph computation to load
+ the graph from one database exposed by an existing
+
+ <h4>Vertex Input Format</h4>
+ As anticipated earlier, the input API provides two required abstract
+ classes, namely <code>RexsterVertexInputFormat</code> and
+ <code>RexsterEdgeInputFormat</code>. This is required, since the Giraph
+ Kibble provides two different URIs to load the vertices and the edges.<br/>
+ NB: you need to make also sure that the rexster hostname is provided to
+ Giraph, since this is a mandatory parameter.<br/>
+ The two classes below are directly extracted from the Giraph source code
+ repository and exemplify how to implement custom
+ <code>RexsterVertexInputFormat</code> and
+ <code>RexsterEdgeInputFormat</code>.<br/>
+ <div class="source"><pre class="prettyprint">
+public class RexsterLongDoubleFloatVertexInputFormat
+ extends RexsterVertexInputFormat<LongWritable, DoubleWritable,
+ FloatWritable> {
+
+ @Override
+ public RexsterVertexReader createVertexReader(
+ InputSplit split, TaskAttemptContext context) throws IOException {
+
+ return new RexsterLongDoubleFloatVertexReader();
+ }
+
+ /**
+ * Rexster vertex reader
+ */
+ protected class RexsterLongDoubleFloatVertexReader
+ extends RexsterVertexReader {
+
+ @Override
+ protected Vertex<LongWritable, DoubleWritable, FloatWritable> parseVertex(
+ JSONObject jsonVertex) throws JSONException {
+
+ /* create the actual vertex */
+ Vertex<LongWritable, DoubleWritable, FloatWritable> vertex =
+ getConf().createVertex();
+
+ Long id;
+ try {
+ id = jsonVertex.getLong("_id");
+ } catch (JSONException ex) {
+ /* OrientDB compatibility; try to transform it as long */
+ String idString = jsonVertex.getString("_id");
+ String[] splits = idString.split(":");
+ id = Long.parseLong(splits[1]);
}
- </code><br />
+ vertex.initialize(new LongWritable(id), new DoubleWritable(0));
+ return vertex;
+ }
+ }
+}
+ </pre></div>
+
+ <h4>Edge Input Format</h4>
+ <div class="source"><pre class="prettyprint">
+public class RexsterLongFloatEdgeInputFormat
+ extends RexsterEdgeInputFormat<LongWritable, FloatWritable> {
+
+ @Override
+ public RexsterEdgeReader createEdgeReader(
+ InputSplit split, TaskAttemptContext context) throws IOException {
+
+ return new RexsterLongFloatEdgeReader();
+ }
+
+ protected class RexsterLongFloatEdgeReader extends RexsterEdgeReader {
+
+ /** source vertex of the edge */
+ private LongWritable sourceId;
+
+ @Override
+ public LongWritable getCurrentSourceId()
+ throws IOException, InterruptedException {
+
+ return this.sourceId;
+ }
+
+ @Override
+ protected Edge<LongWritable, FloatWritable> parseEdge(JSONObject jsonEdge)
+ throws JSONException {
+
+ Long value = jsonEdge.getLong("weight");
+ Long dest;
+ try {
+ dest = jsonEdge.getLong("_outV");
+ } catch (JSONException ex) {
+ /* OrientDB compatibility; try to transform it as long */
+ String idString = jsonEdge.getString("_outV");
+ String[] splits = idString.split(":");
+ dest = Long.parseLong(splits[1]);
+ }
+ Edge<LongWritable, FloatWritable> edge =
+ EdgeFactory.create(new LongWritable(dest), new FloatWritable(value));
+
+ Long sid;
+ try {
+ sid = jsonEdge.getLong("_inV");
+ } catch (JSONException ex) {
+ /* OrientDB compatibility; try to transform it as long */
+ String sidString = jsonEdge.getString("_inV");
+ String[] splits = sidString.split(":");
+ sid = Long.parseLong(splits[1]);
+ }
+ this.sourceId = new LongWritable(sid);
+ return edge;
+ }
+ }
+}
+ </pre></div>
+
+ <h4>Usage</h4>
+ To use these classes, it is simple and does not require any particular
+ effort. To provide you with an example, below you can find the Hadoop
+ command issued to start a Shortest Path computation by loading the
+ graph from Rexster.
+
+ <div class="source"><pre class="prettyprint">
+hadoop jar /path/to/giraph/giraph-examples/target/giraph-examples-*-jar-with-dependencies.jar \
+ org.apache.giraph.GiraphRunner \
+ -libjars /path/to/giraph/giraph-rexster/giraph-rexster-io/target/giraph-rexster-io*-jar-with-dependencies.jar \
+ org.apache.giraph.examples.SimpleShortestPathsComputation \
+ -vif org.apache.giraph.rexster.io.formats.RexsterLongDoubleFloatVertexInputFormat \
+ -eif org.apache.giraph.rexster.io.formats.RexsterLongFloatEdgeInputFormat \
+ -vof org.apache.giraph.io.formats.JsonLongDoubleFloatDoubleVertexOutputFormat \
+ -op output \
+ -w 1
+ </pre></div>
</section>
- <section name="Input Example">
- As anticipated previously, to make use of the Giraph API available for
- Rexster, it is required to extend the classes
- <code>RexsterVertexInputFormat</code> and
- <code>RexsterEdgeInputFormat</code>. In the first class, the only method
- that has to be implemented is <code>parseVertex</code> to transform a
- <code>JSONObject</code> object into a <code>Vertex</code> object.
- Similarly, for the second class the methods that have to be implemented
- are <code>parseEdge</code>, to extract the <code>Edge</code> object, and
- the <code>getCurrentSourceId</code> which has to point to the id of
- the source vertex of the current edge. Examples of such implementations
- are the classes <code>RexsterLongDoubleFloatVertexInputFormat</code> and
- <code>RexsterLongFloatEdgeInputFormat</code>.<br />
- An example that shows how to use these classes to compute the shortest
- bath algorithm onto the graph database shown previously is provided below.
- <br />
- <code>
- export GIRAPH_CORE_JAR=$GIRAPH_CORE_TARGET_DIR/giraph-$GIRAPH_VERSION-for-$HADOOP_VERSION-jar-with-dependencies.jar<br />
- export GIRAPH_EXAMPLES_JAR=$GIRAPH_EXAMPLES_TARGET_DIR/giraph-examples-$GIRAPH_VERSION-for-$HADOOP_VERSION-jar-with-dependencies.jar<br />
- export GIRAPH_REXSTER_JAR=$GIRAPH_REXSTER_TARGET_DIR/giraph-rexster-$GIRAPH_VERSION.jar<br />
- export HADOOP_CLASSPATH=$GIRAPH_CORE_JAR:$GIRAPH_EXAMPLES_JAR:$GIRAPH_REXSTER_JAR<br /><br />
- hadoop jar $GIRAPH_EXAMPLES_JAR org.apache.giraph.GiraphRunner -libjars GIRAPH_REXSTER_JAR,$(GIRAPH_CORE_JAR) org.apache.giraph.examples.SimpleShortestPathsComputation -vif org.apache.giraph.rexster.io.RexsterVertexInputFormat -eif org.apache.giraph.rexster.io.RexsterEdgeInputFormat -of org.apache.giraph.io.formats.IdWithValueTextOutputFormat -op outShortestPath -w 1
- </code><br />
- The result of this computation is<br />
- <code>
- 1 --> 1 = 0 <br />
- 1 --> 2 = 1 <br />
- 1 --> 3 = 3 <br />
- 1 --> 4 = 2 <br />
- 1 --> 5 = 6
- </code><br />
+
+ <section name="Example explained: Output Format">
+ <h4>Vertex Output Format</h4>
+ Also in this case, the output API provides two required
+ classes, namely <code>RexsterVertexOutputFormat</code> and
+ <code>RexsterEdgeOutputFormat</code>. Also in this case, both are required,
+ due to the way the Giraph Kibble provides manages the sotring of the
+ edges.<br/>
+ NB: to deal with databases deadlocks and consistency issues, the
+ Kibble uses the Exponetial Backoff strategy to complete the transation.
+ Make sure that the parameters for the time daley and number of retry, suit
+ your needs. Moreover, to reduce the quantiti of memory used by rexster,
+ the size of each transaction is also provided. Make sure that also this
+ parameter suits your environment.<br/>
+ Differently from the Input format present above, in this case you can
+ directly make us of the <code>RexsterVertexOutputFormat</code> and
+ <code>RexsterEdgeOutputFormat</code> classes without the need to implement
+ your own. However, in some cases it is still reasonable to user your
+ own.<br/>
+ The two classes below are directly extracted from the Giraph source code
+ repository and exemplify how to implement custom
+ <code>RexsterVertexOutputFormat</code> and
+ <code>RexsterEdgeOutputFormat</code>.<br/>
+ <div class="source"><pre class="prettyprint">
+public class RexsterLongDoubleFloatVertexOutputFormat
+ extends RexsterVertexOutputFormat<LongWritable, DoubleWritable,
+ FloatWritable> {
+
+ @Override
+ public RexsterVertexWriter createVertexWriter(
+ TaskAttemptContext context) throws IOException,
+ InterruptedException {
+
+ return new RexsterLongDoubleFloatVertexWriter();
+ }
+
+ /**
+ * Rexster vertex writer.
+ */
+ protected class RexsterLongDoubleFloatVertexWriter
+ extends RexsterVertexWriter {
+
+ /** current vertex ID */
+ private LongWritable vertexId;
+
+ @Override
+ protected JSONObject getVertex(
+ Vertex<LongWritable, DoubleWritable, FloatWritable> vertex)
+ throws JSONException {
+
+ vertexId = vertex.getId();
+
+ double value = vertex.getValue().get();
+ JSONObject jsonVertex = new JSONObject();
+ jsonVertex.accumulate("value", value);
+
+ return jsonVertex;
+ }
+
+ @Override
+ protected LongWritable getVertexId() {
+ return vertexId;
+ }
+ }
+}
+ </pre></div>
+
+ <h4>Edge Output Format</h4>
+<div class="source"><pre class="prettyprint">
+public class RexsterLongDoubleFloatEdgeOutputFormat
+ extends RexsterEdgeOutputFormat<LongWritable, DoubleWritable,
+ FloatWritable> {
+
+ @Override
+ public RexsterEdgeWriter createEdgeWriter(
+ TaskAttemptContext context) throws IOException,
+ InterruptedException {
+
+ return new RexsterLongDoubleFloatEdgeWriter();
+ }
+
+ /**
+ * Rexster edge writer.
+ */
+ protected class RexsterLongDoubleFloatEdgeWriter
+ extends RexsterEdgeWriter {
+
+ @Override
+ protected JSONObject getEdge(LongWritable srcId, DoubleWritable srcValue,
+ Edge<LongWritable, FloatWritable> edge) throws JSONException {
+
+ long outId = srcId.get();
+ long inId = edge.getTargetVertexId().get();
+ float value = edge.getValue().get();
+ JSONObject jsonEdge = new JSONObject();
+ jsonEdge.accumulate("_outV", outId);
+ jsonEdge.accumulate("_inV", inId);
+ jsonEdge.accumulate("value", value);
+
+ return jsonEdge;
+ }
+ }
+}
+ </pre></div>
+
+ <h4>Usage</h4>
+ Also in this case, we provide you with an example of how to use these
+ classes.
+
+ <div class="source"><pre class="prettyprint">
+hadoop jar /path/to/giraph/giraph-examples/target/giraph-examples-*-jar-with-dependencies.jar \
+ org.apache.giraph.GiraphRunner \
+ -libjars /path/to/giraph/giraph-rexster/giraph-rexster-io/target/giraph-rexster-io*-jar-with-dependencies.jar \
+ org.apache.giraph.examples.SimpleShortestPathsComputation \
+ -vif org.apache.giraph.io.formats.JsonLongDoubleFloatDoubleVertexInputFormat \
+ -vof org.apache.giraph.rexster.io.formats.RexsterVertexOutputFormat \
+ -eof org.apache.giraph.rexster.io.formats.RexsterEdgeOutputFormat \
+ -vip input/ \
+ -w 1
+ </pre></div>
+ </section>
+
+ <section name="Cavet">
+ <h4>OrientDB</h4>
+ One of the most important details that you must be aware of is that
+ only using Rexster with a version equal or grater to 2.5.0 you will be
+ able to work with OrietnDB. Unfortunately, the previous versions of
+ Rexster include the buggy OrientDB API, which cause issues that are very
+ difficult to handle. With newer versions of OrientDB, the API has been
+ improved and the system works as expected.
</section>
</body>
</document>
[2/2] git commit: updated refs/heads/trunk to 55e22de
Posted by cl...@apache.org.
GIRAPH-803
Project: http://git-wip-us.apache.org/repos/asf/giraph/repo
Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/55e22de8
Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/55e22de8
Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/55e22de8
Branch: refs/heads/trunk
Commit: 55e22de8114dbbae83ffc2ec713de798d2a05349
Parents: 26d3160
Author: Claudio Martella <cl...@gmail.com>
Authored: Mon Jan 20 19:50:23 2014 +0100
Committer: Claudio Martella <cl...@gmail.com>
Committed: Mon Jan 20 19:50:23 2014 +0100
----------------------------------------------------------------------
CHANGELOG | 2 +
giraph-rexster/pom.xml | 135 +---
giraph-rexster/src/main/assembly/compile.xml | 39 --
.../rexster/conf/GiraphRexsterConstants.java | 81 ---
.../giraph/rexster/conf/package-info.java | 21 -
.../rexster/io/RexsterEdgeInputFormat.java | 180 ------
.../giraph/rexster/io/RexsterInputSplit.java | 112 ----
.../rexster/io/RexsterVertexInputFormat.java | 193 ------
...RexsterLongDoubleFloatVertexInputFormat.java | 68 --
.../RexsterLongFloatEdgeInputFormat.java | 78 ---
.../giraph/rexster/io/formats/package-info.java | 21 -
.../apache/giraph/rexster/io/package-info.java | 21 -
.../giraph/rexster/utils/RexsterUtils.java | 361 -----------
.../giraph/rexster/utils/package-info.java | 21 -
.../formats/TestAbstractRexsterInputFormat.java | 129 ----
...RexsterLongDoubleFloatVertexInputFormat.java | 155 -----
.../rexster/io/formats/empty-db.input.json | 8 -
.../giraph/rexster/io/formats/rexster.xml | 52 --
.../rexster/io/formats/test-db.input.json | 126 ----
.../rexster/io/formats/test-db.output.json | 5 -
pom.xml | 70 +-
src/site/site.xml | 2 +-
src/site/xdoc/rexster.xml | 647 ++++++++++++++++---
23 files changed, 619 insertions(+), 1908 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index dd86f1c..3862d07 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,8 @@
Giraph Change Log
Release 1.1.0 - unreleased
+ GIRAPH-803: Rexster I/O new complete API (armax00 via claudio)
+
GIRAPH-805: getZookeeperList can return null (armax00 via claudio)
GIRAPH-823: upgrade hiveio to version 0.21 from olderversion 0.20 (pavanka via majakabiljo)
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/pom.xml
----------------------------------------------------------------------
diff --git a/giraph-rexster/pom.xml b/giraph-rexster/pom.xml
index 8e98c21..260d3de 100644
--- a/giraph-rexster/pom.xml
+++ b/giraph-rexster/pom.xml
@@ -17,7 +17,8 @@ specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
@@ -26,133 +27,15 @@ under the License.
<artifactId>giraph-parent</artifactId>
<version>1.1.0-SNAPSHOT</version>
</parent>
- <artifactId>giraph-rexster</artifactId>
- <packaging>jar</packaging>
+ <artifactId>giraph-rexster</artifactId>
+ <packaging>pom</packaging>
<name>Apache Giraph Rexster I/O</name>
<url>http://giraph.apache.org/giraph-rexster/</url>
- <description>Giraph Rexster input/output classes</description>
-
- <properties>
- <top.dir>${project.basedir}/..</top.dir>
- </properties>
-
- <build>
- <testResources>
- <testResource>
- <directory>${project.basedir}/src/test/resources</directory>
- </testResource>
- </testResources>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-assembly-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-checkstyle-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jar-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-javadoc-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-site-plugin</artifactId>
- <configuration>
- <siteDirectory>${project.basedir}/src/site</siteDirectory>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <version>2.6</version>
- <configuration>
- <systemProperties>
- <property>
- <name>prop.jarLocation</name>
- <value>${top.dir}/giraph-core/target/giraph-${project.version}-${forHadoop}-jar-with-dependencies.jar</value>
- </property>
- </systemProperties>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>findbugs-maven-plugin</artifactId>
- </plugin>
- </plugins>
- </build>
-
- <dependencies>
- <!-- compile dependencies. sorted lexicographically. -->
- <dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- </dependency>
- <dependency>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.giraph</groupId>
- <artifactId>giraph-core</artifactId>
- </dependency>
- <dependency>
- <groupId>org.json</groupId>
- <artifactId>json</artifactId>
- </dependency>
- <dependency>
- <groupId>commons-codec</groupId>
- <artifactId>commons-codec</artifactId>
- </dependency>
- <dependency>
- <groupId>commons-configuration</groupId>
- <artifactId>commons-configuration</artifactId>
- </dependency>
- <dependency>
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- </dependency>
+ <description>Giraph Rexster I/O classes and kibble</description>
- <!-- test dependencies. sorted lexicographically. -->
- <dependency>
- <groupId>com.tinkerpop.blueprints</groupId>
- <artifactId>blueprints-core</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>com.tinkerpop.rexster</groupId>
- <artifactId>rexster-core</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>com.tinkerpop.rexster</groupId>
- <artifactId>rexster-protocol</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>com.tinkerpop.rexster</groupId>
- <artifactId>rexster-server</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.mockito</groupId>
- <artifactId>mockito-core</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- <scope>test</scope>
- </dependency>
- </dependencies>
+ <modules>
+ <module>giraph-kibble</module>
+ <module>giraph-rexster-io</module>
+ </modules>
</project>
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/main/assembly/compile.xml
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/main/assembly/compile.xml b/giraph-rexster/src/main/assembly/compile.xml
deleted file mode 100644
index fcaffa6..0000000
--- a/giraph-rexster/src/main/assembly/compile.xml
+++ /dev/null
@@ -1,39 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
- <id>jar-with-dependencies</id>
- <formats>
- <format>jar</format>
- </formats>
- <includeBaseDirectory>false</includeBaseDirectory>
-
- <dependencySets>
- <dependencySet>
- <useProjectArtifact>true</useProjectArtifact>
- <outputDirectory>/</outputDirectory>
- <unpackOptions>
- <excludes>
- <exclude>META-INF/LICENSE</exclude>
- </excludes>
- </unpackOptions>
- <unpack>true</unpack>
- <scope>runtime</scope>
- </dependencySet>
- </dependencySets>
-</assembly>
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/GiraphRexsterConstants.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/GiraphRexsterConstants.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/GiraphRexsterConstants.java
deleted file mode 100644
index c4b4655..0000000
--- a/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/GiraphRexsterConstants.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.rexster.conf;
-
-import org.apache.giraph.conf.StrConfOption;
-import org.apache.giraph.conf.IntConfOption;
-import org.apache.giraph.conf.BooleanConfOption;
-
-/**
- * Constants used all over Giraph for configuration specific for Rexster
- * REST API.
- */
-// CHECKSTYLE: stop InterfaceIsTypeCheck
-public interface GiraphRexsterConstants {
- /** Rexster hostname which provides the REST API. */
- StrConfOption GIRAPH_REXSTER_HOSTNAME =
- new StrConfOption("giraph.input.rexster.hostname", null,
- "Rexster hostname which provides the REST API. " +
- "- required");
- /** Rexster port where to contact the REST API. */
- IntConfOption GIRAPH_REXSTER_PORT =
- new IntConfOption("giraph.input.rexster.port", 8182,
- "Rexster port where to contact the REST API.");
- /** Rexster flag to set the connection over SSL instaed of clear-text. */
- BooleanConfOption GIRAPH_REXSTER_USES_SSL =
- new BooleanConfOption("giraph.input.rexster.ssl", false,
- "Rexster flag to set the connection over SSL " +
- "instaed of clear-text.");
- /** Rexster graph. */
- StrConfOption GIRAPH_REXSTER_GRAPH =
- new StrConfOption("giraph.input.rexster.graph", "graphdb",
- "Rexster graph.");
- /** Rexster number of estimated vertices in the graph to be loaded. */
- IntConfOption GIRAPH_REXSTER_V_ESTIMATE =
- new IntConfOption("giraph.input.rexster.vertices", 1000,
- "Rexster number of estimated vertices in the " +
- "graph to be loaded.");
- /** Rexster number of estimated edges in the graph to be loaded. */
- IntConfOption GIRAPH_REXSTER_E_ESTIMATE =
- new IntConfOption("giraph.input.rexster.edges", 1000,
- "Rexster number of estimated vertices in the " +
- "graph to be loaded.");
- /** Rexster username to access the REST API. */
- StrConfOption GIRAPH_REXSTER_USERNAME =
- new StrConfOption("giraph.input.rexster.username", "",
- "Rexster username to access the REST API.");
- /** Rexster password to access the REST API. */
- StrConfOption GIRAPH_REXSTER_PASSWORD =
- new StrConfOption("giraph.input.rexster.password", "",
- "Rexster password to access the REST API.");
- /** If the database is Gremlin enabled, the script will be used to retrieve
- the vertices from the Rexster exposed database. */
- StrConfOption GIRAPH_REXSTER_GREMLIN_V_SCRIPT =
- new StrConfOption("giraph.input.rexster.vertices.gremlinScript", "",
- "If the database is Gremlin enabled, the script will " +
- "be used to retrieve the vertices from the Rexster " +
- "exposed database.");
- /** If the database is Gremlin enabled, the script will be used to retrieve
- the edges from the Rexster exposed database. */
- StrConfOption GIRAPH_REXSTER_GREMLIN_E_SCRIPT =
- new StrConfOption("giraph.input.rexster.edges.gremlinScript", "",
- "If the database is Gremlin enabled, the script will " +
- "be used to retrieve the edges from the Rexster " +
- "exposed database.");
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/package-info.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/package-info.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/package-info.java
deleted file mode 100644
index b4917bc..0000000
--- a/giraph-rexster/src/main/java/org/apache/giraph/rexster/conf/package-info.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * Package of configuration information for Rexster in Giraph.
- */
-package org.apache.giraph.rexster.conf;
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterEdgeInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterEdgeInputFormat.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterEdgeInputFormat.java
deleted file mode 100644
index 4dee078..0000000
--- a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterEdgeInputFormat.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.rexster.io;
-
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_E_ESTIMATE;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.giraph.edge.Edge;
-import org.apache.giraph.io.EdgeInputFormat;
-import org.apache.giraph.io.EdgeReader;
-import org.apache.giraph.rexster.utils.RexsterUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.log4j.Logger;
-import org.json.JSONException;
-import org.json.JSONObject;
-import org.json.JSONTokener;
-
-/**
- * Abstract class that users should subclass to use their own Rexster based
- * vertex input format. This class was inspired by the Rexster Input format
- * available in Faunus authored by Stephen Mallette.
- * @param <I> Vertex id
- * @param <E> Edge data
- */
-public abstract class RexsterEdgeInputFormat<I extends WritableComparable,
- E extends Writable> extends EdgeInputFormat<I, E> {
-
- /** Class logger. */
- private static final Logger LOG =
- Logger.getLogger(RexsterEdgeInputFormat.class);
-
- /**
- * @param conf configuration parameters
- */
- public void checkInputSpecs(Configuration conf) { }
-
- @Override
- public List<InputSplit> getSplits(JobContext context, int minSplitCountHint)
- throws IOException, InterruptedException {
-
- return RexsterUtils.getSplits(context,
- GIRAPH_REXSTER_E_ESTIMATE.get(getConf()));
- }
-
- @Override
- public abstract RexsterEdgeReader createEdgeReader(InputSplit split,
- TaskAttemptContext context) throws IOException;
-
- /**
- * Abstract class to be implemented by the user based on their specific
- * vertex input. Easiest to ignore the key value separator and only use
- * key instead.
- */
- protected abstract class RexsterEdgeReader extends EdgeReader<I, E> {
-
- /** Input stream from the HTTP connection to the REST endpoint */
- private BufferedReader rexsterBufferedStream;
- /** JSON parser/tokenizer object */
- private JSONTokener tokener;
- /** start index of the Rexster paging */
- private long splitStart;
- /** end index of the Rexster paging */
- private long splitEnd;
- /** number of iterated items */
- private long itemsIterated = 0;
- /** current edge obtained from Rexster */
- private Edge<I, E> edge;
-
- @Override
- public void initialize(InputSplit inputSplit, TaskAttemptContext context)
- throws IOException, InterruptedException {
-
- final RexsterInputSplit rexsterInputSplit =
- (RexsterInputSplit) inputSplit;
-
- this.splitEnd = rexsterInputSplit.getEnd();
- this.splitStart = rexsterInputSplit.getStart();
-
- this.rexsterBufferedStream =
- RexsterUtils.Edge.openRexsterStream(getConf(),
- this.splitStart, this.splitEnd);
-
- this.tokener = RexsterUtils.parseJSONEnvelope(this.rexsterBufferedStream);
- }
-
- @Override
- public void close() throws IOException {
- this.rexsterBufferedStream.close();
- }
-
- @Override
- public float getProgress() throws IOException, InterruptedException {
- final float estimated = GIRAPH_REXSTER_E_ESTIMATE.get(getConf());
-
- if (this.splitStart == this.splitEnd) {
- return 0.0f;
- } else {
- /* assuming you got the estimate right this progress should be
- pretty close; */
- return Math.min(1.0f, this.itemsIterated / (float) estimated);
- }
- }
-
- @Override
- public Edge<I, E> getCurrentEdge()
- throws IOException, InterruptedException {
-
- return this.edge;
- }
-
- @Override
- public boolean nextEdge() throws IOException, InterruptedException {
- try {
- JSONObject obj;
- char c;
-
- /* if the tokener was not set, no objects are in fact available */
- if (this.tokener == null) {
- return false;
- }
-
- obj = new JSONObject(this.tokener);
- this.edge = parseEdge(obj);
-
- c = tokener.nextClean();
- if (c == RexsterUtils.ARRAY_SEPARATOR) {
- itemsIterated += 1;
- return true;
- } else if (c == RexsterUtils.END_ARRAY) {
- return false;
- } else {
- LOG.error(String.format("Expected a '%c' at the end of the array",
- RexsterUtils.END_ARRAY));
- throw new InterruptedException();
- }
-
- } catch (JSONException e) {
- LOG.error(e.toString());
- return false;
- }
- }
-
- /**
- * Parser for a single edge JSON object
- *
- * @param jsonEdge edge represented as JSON object
- * @return The edge object associated with the JSON object
- */
- protected abstract Edge<I, E> parseEdge(JSONObject jsonEdge)
- throws JSONException;
-
- @Override
- public abstract I getCurrentSourceId()
- throws IOException, InterruptedException;
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterInputSplit.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterInputSplit.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterInputSplit.java
deleted file mode 100644
index b6da8de..0000000
--- a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterInputSplit.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.rexster.io;
-
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.InputSplit;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-/**
- * A InputSplit that spans a set of vertices. This code is taken from the
- * Faunus project and was originally authored by Stephen Mallette.
- */
-public class RexsterInputSplit extends InputSplit implements Writable {
- /** End index for the Rexster paging */
- private long end = 0;
- /** Start index for the Rexster paging */
- private long start = 0;
-
- /**
- * Default constructor.
- */
- public RexsterInputSplit() {
- }
-
- /**
- * Overloaded constructor
- * @param start start of the paging provided by Rexster
- * @param end end of the paging provided by Rexster
- */
- public RexsterInputSplit(long start, long end) {
- this.start = start;
- this.end = end;
- }
-
- /**
- * Stub function returning empty list of locations
- * @return String[] array of locations
- * @throws IOException
- */
- public String[] getLocations() {
- return new String[]{};
- }
-
- /**
- * Get the start of the paging.
- * @return long start of the paging
- */
- public long getStart() {
- return start;
- }
-
- /**
- * Get the end of the paging.
- * @return long end of the paging
- */
- public long getEnd() {
- return end;
- }
-
- /**
- * Get the length of the paging
- * @return long length of the page
- */
- public long getLength() {
- return end - start;
- }
-
- /**
- *
- * @param input data input from where to unserialize
- * @throws IOException
- */
- public void readFields(DataInput input) throws IOException {
- start = input.readLong();
- end = input.readLong();
- }
-
- /**
- *
- * @param output data output where to serialize
- * @throws IOException
- */
- public void write(DataOutput output) throws IOException {
- output.writeLong(start);
- output.writeLong(end);
- }
-
- @Override
- public String toString() {
- return String.format("Split at [%s to %s]", this.start,
- this.end == Long.MAX_VALUE ? "END" : this.end - 1);
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterVertexInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterVertexInputFormat.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterVertexInputFormat.java
deleted file mode 100644
index da45b5b..0000000
--- a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/RexsterVertexInputFormat.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.rexster.io;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.io.VertexInputFormat;
-import org.apache.giraph.io.VertexReader;
-import org.apache.giraph.rexster.utils.RexsterUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.log4j.Logger;
-import org.json.JSONException;
-import org.json.JSONObject;
-import org.json.JSONTokener;
-
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_V_ESTIMATE;
-
-/**
- * Abstract class that users should subclass to use their own Rexster based
- * vertex input format. This class was inspired by the Rexster Input format
- * available in Faunus authored by Stephen Mallette.
- *
- * @param <I>
- * @param <V>
- * @param <E>
- */
-public abstract class RexsterVertexInputFormat<I extends WritableComparable,
- V extends Writable, E extends Writable>
- extends VertexInputFormat<I, V, E> {
-
- /** Class logger. */
- private static final Logger LOG =
- Logger.getLogger(RexsterVertexInputFormat.class);
-
- /**
- * @param conf configuration parameters
- */
- public void checkInputSpecs(Configuration conf) { }
-
- /**
- * Create a vertex reader for a given split. Guaranteed to have been
- * configured with setConf() prior to use. The framework will also call
- * {@link VertexReader#initialize(InputSplit, TaskAttemptContext)} before
- * the split is used.
- *
- * @param split the split to be read
- * @param context the information about the task
- * @return a new record reader
- * @throws IOException
- */
- public abstract RexsterVertexReader createVertexReader(InputSplit split,
- TaskAttemptContext context) throws IOException;
-
- @Override
- public List<InputSplit> getSplits(JobContext context, int minSplitCountHint)
- throws IOException, InterruptedException {
-
- return RexsterUtils.getSplits(context,
- GIRAPH_REXSTER_V_ESTIMATE.get(getConf()));
- }
-
- /**
- * Abstract class to be implemented by the user based on their specific
- * vertex input. Easiest to ignore the key value separator and only use
- * key instead.
- */
- protected abstract class RexsterVertexReader extends VertexReader<I, V, E> {
-
- /** Input stream from the HTTP connection to the REST endpoint */
- private BufferedReader rexsterBufferedStream;
- /** JSON parser/tokenizer object */
- private JSONTokener tokener;
- /** start index of the Rexster paging */
- private long splitStart;
- /** end index of the Rexster paging */
- private long splitEnd;
- /** index to access the iterated vertices */
- private long itemsIterated = 0;
- /** current vertex */
- private Vertex<I, V, E> vertex;
-
- @Override
- public void initialize(InputSplit inputSplit, TaskAttemptContext context)
- throws IOException, InterruptedException {
-
- final RexsterInputSplit rexsterInputSplit =
- (RexsterInputSplit) inputSplit;
-
- this.splitEnd = rexsterInputSplit.getEnd();
- this.splitStart = rexsterInputSplit.getStart();
-
- this.rexsterBufferedStream =
- RexsterUtils.Vertex.openRexsterStream(getConf(),
- this.splitStart, this.splitEnd);
-
- this.tokener = RexsterUtils.parseJSONEnvelope(this.rexsterBufferedStream);
- }
-
- @Override
- public boolean nextVertex()
- throws IOException, InterruptedException {
-
- try {
- JSONObject obj;
- char c;
-
- /* if the tokener was not set, no objects are in fact available */
- if (this.tokener == null) {
- return false;
- }
-
- obj = new JSONObject(this.tokener);
- this.vertex = parseVertex(obj);
-
- c = this.tokener.nextClean();
- if (c == RexsterUtils.ARRAY_SEPARATOR) {
- itemsIterated += 1;
- return true;
- } else if (c == RexsterUtils.END_ARRAY) {
- return false;
- } else {
- LOG.error(String.format("Expected a '%c' at the end of the array",
- RexsterUtils.END_ARRAY));
- throw new InterruptedException(
- String.format("Expected a '%c' at the end of the array",
- RexsterUtils.END_ARRAY));
- }
- } catch (JSONException e) {
- /* this in case of empty results */
- LOG.error(e.toString());
- return false;
- }
- }
-
- @Override
- public void close() throws IOException {
- this.rexsterBufferedStream.close();
- }
-
- @Override
- public float getProgress() throws IOException, InterruptedException {
- final float vestimated = GIRAPH_REXSTER_V_ESTIMATE.get(getConf());
-
- if (this.splitStart == this.splitEnd) {
- return 0.0f;
- } else {
- // assuming you got the estimate right this progress should be
- // pretty close;
- return Math.min(1.0f, this.itemsIterated / (float) vestimated);
- }
- }
-
- @Override
- public Vertex<I, V, E> getCurrentVertex()
- throws IOException, InterruptedException {
-
- return this.vertex;
- }
-
- /**
- * Parser for a single vertex JSON object
- *
- * @param jsonVertex vertex represented as JSON object
- * @return The vertex object represented by the JSON object
- */
- protected abstract Vertex<I, V, E> parseVertex(JSONObject jsonVertex)
- throws JSONException;
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongDoubleFloatVertexInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongDoubleFloatVertexInputFormat.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongDoubleFloatVertexInputFormat.java
deleted file mode 100644
index 03681c1..0000000
--- a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongDoubleFloatVertexInputFormat.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.rexster.io.formats;
-
-import java.io.IOException;
-
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.rexster.io.RexsterVertexInputFormat;
-import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-/**
- * Rexster Edge Input Format for Long vertex ID's and Float edge values
- */
-public class RexsterLongDoubleFloatVertexInputFormat
- extends RexsterVertexInputFormat<LongWritable, DoubleWritable,
- FloatWritable> {
-
- @Override
- public RexsterVertexReader createVertexReader(
- InputSplit split, TaskAttemptContext context) throws IOException {
-
- return new RexsterLongDoubleFloatVertexReader();
- }
-
- /**
- * Rexster vertex reader
- */
- protected class RexsterLongDoubleFloatVertexReader
- extends RexsterVertexReader {
-
- @Override
- protected Vertex<LongWritable, DoubleWritable, FloatWritable> parseVertex(
- JSONObject jsonVertex) throws JSONException {
-
- Vertex<LongWritable, DoubleWritable, FloatWritable> vertex;
- Long id;
-
- /* create the actual vertex */
- vertex = getConf().createVertex();
- id = jsonVertex.getLong("_id");
- vertex.initialize(new LongWritable(id),
- new DoubleWritable(0));
- return vertex;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongFloatEdgeInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongFloatEdgeInputFormat.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongFloatEdgeInputFormat.java
deleted file mode 100644
index b2d43af..0000000
--- a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/RexsterLongFloatEdgeInputFormat.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.rexster.io.formats;
-
-import java.io.IOException;
-
-import org.apache.giraph.edge.Edge;
-import org.apache.giraph.edge.EdgeFactory;
-import org.apache.giraph.rexster.io.RexsterEdgeInputFormat;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-/**
- * Rexster Edge Input Format for Long vertex ID's and Float edge values
- */
-public class RexsterLongFloatEdgeInputFormat
- extends RexsterEdgeInputFormat<LongWritable, FloatWritable> {
-
- @Override
- public RexsterEdgeReader createEdgeReader(
- InputSplit split, TaskAttemptContext context) throws IOException {
-
- return new RexsterLongFloatEdgeReader();
- }
-
- /**
- * Rexster edge reader
- */
- protected class RexsterLongFloatEdgeReader extends RexsterEdgeReader {
-
- /** source vertex of the edge */
- private LongWritable sourceId;
-
- @Override
- public LongWritable getCurrentSourceId() throws IOException,
- InterruptedException {
-
- return this.sourceId;
- }
-
- @Override
- protected Edge<LongWritable, FloatWritable> parseEdge(JSONObject jsonEdge)
- throws JSONException {
-
- Edge<LongWritable, FloatWritable> edge = null;
- Long dest;
- Long value;
-
- value = jsonEdge.getLong("weight");
- dest = jsonEdge.getLong("_outV");
- edge = EdgeFactory.create(new LongWritable(dest),
- new FloatWritable(value));
- this.sourceId = new LongWritable(jsonEdge.getLong("_inV"));
-
- return edge;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/package-info.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/package-info.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/package-info.java
deleted file mode 100644
index b5ae44f..0000000
--- a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/formats/package-info.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * Package of reusable Input/Output formats for Rexster in Giraph.
- */
-package org.apache.giraph.rexster.io.formats;
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/package-info.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/package-info.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/package-info.java
deleted file mode 100644
index bbd5a7f..0000000
--- a/giraph-rexster/src/main/java/org/apache/giraph/rexster/io/package-info.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * Package of reusable Input/Output formats for Rexster in Giraph.
- */
-package org.apache.giraph.rexster.io;
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/RexsterUtils.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/RexsterUtils.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/RexsterUtils.java
deleted file mode 100644
index e669ca9..0000000
--- a/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/RexsterUtils.java
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.rexster.utils;
-
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GRAPH;
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GREMLIN_E_SCRIPT;
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GREMLIN_V_SCRIPT;
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_HOSTNAME;
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_PASSWORD;
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_PORT;
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_USERNAME;
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_USES_SSL;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
-import org.apache.giraph.rexster.io.RexsterInputSplit;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.log4j.Logger;
-import org.json.JSONException;
-import org.json.JSONTokener;
-
-/**
- * Utility functions for the Rexster REST interface
- */
-public abstract class RexsterUtils {
- /** start object symbol for JSON */
- public static final char KEY_VALUE_SEPARATOR = ':';
- /** start object symbol for JSON */
- public static final char START_OBJECT = '{';
- /** end object symbol for JSON */
- public static final char END_OBJECT = '}';
- /** start array symbol for JSON */
- public static final char START_ARRAY = '[';
- /** end array symbol for JSON */
- public static final char END_ARRAY = ']';
- /** array elements separator symbol for JSON */
- public static final char ARRAY_SEPARATOR = ',';
- /** Class logger. */
- private static final Logger LOG = Logger.getLogger(RexsterUtils.class);
-
- /**
- * The default constructor is set to be private by default so that the
- * class is not instantiated.
- */
- private RexsterUtils() { /* private constructor */ }
-
- /**
- * Parse all the vertices from the JSON retreived from Rexster. Inspired
- * by the implementation of the JSONObject class.
- *
- * @param br buffer over the HTTP response content
- * @return JSONTokener tokener over the HTTP JSON. Null in case the results
- * array is empty.
- */
- public static JSONTokener parseJSONEnvelope(BufferedReader br)
- throws InterruptedException {
-
- JSONTokener tokener = null;
-
- try {
- char c;
- String key = null;
-
- tokener = new JSONTokener(br);
- /* check that the JSON is well-formed by starting with a '{' */
- if (tokener.nextClean() != START_OBJECT) {
- LOG.error(
- String.format("A JSONObject text must begin with '%c'",
- START_OBJECT));
- }
-
- /* loop on the whole array */
- for (;;) {
- c = tokener.nextClean();
- switch (c) {
- case 0:
- LOG.error(String.format("A JSONObject text must end with '%c'",
- END_OBJECT));
- break;
- case END_OBJECT:
- return tokener;
- default:
- tokener.back();
- key = tokener.nextValue().toString();
- }
-
- c = tokener.nextClean();
-
- if (c != KEY_VALUE_SEPARATOR) {
- LOG.error(String.format("Expected a %c after a key", c));
- }
-
- if (key != null && !key.equals("results")) {
- tokener.nextValue();
- } else {
- /* starting array */
- c = tokener.nextClean();
- if (c != START_ARRAY) {
- LOG.error("'results' is expected to be an array");
- }
-
- /* check if the array is emty. If so, return null to signal that
- no objects are available in the array, otherwise return the
- tokener. */
- c = tokener.nextClean();
- if (c == END_ARRAY) {
- return null;
- } else {
- tokener.back();
- return tokener;
- }
- }
-
- switch (tokener.nextClean()) {
- case ';':
- case ',':
- if (tokener.nextClean() == '}') {
- return tokener;
- }
- tokener.back();
- break;
- case '}':
- return tokener;
- default:
- LOG.error("Expected a ',' or '}'");
- }
- }
-
- } catch (JSONException e) {
- LOG.error("Unable to parse the JSON with the vertices.\n" +
- e.getMessage());
- throw new InterruptedException(e.toString());
- }
- }
-
- /**
- * Splitter used by both Vertex and Edge Input Format.
- *
- * @param context The job context
- * @param estimation Number of estimated objects
- * @return splits to be generated to read the input
- */
- public static List<InputSplit> getSplits(JobContext context,
- long estimation) throws IOException, InterruptedException {
-
- final int chunks = context.getConfiguration().getInt("mapred.map.tasks", 1);
- final long chunkSize = estimation / chunks;
- final List<InputSplit> splits = new ArrayList<InputSplit>();
-
- if (LOG.isDebugEnabled()) {
- LOG.debug(String.format("Estimated objects: %d", estimation));
- LOG.debug(String.format("Number of chunks: %d", chunks));
- }
-
- for (int i = 0; i < chunks; ++i) {
- final RexsterInputSplit split;
- final long start;
- final long end;
-
- start = i * chunkSize;
- end = ((i + 1) == chunks) ? Long.MAX_VALUE :
- (i * chunkSize) + chunkSize;
- split = new RexsterInputSplit(start, end);
- splits.add(split);
-
- if (LOG.isDebugEnabled()) {
- LOG.debug(String.format("Chunk: start %d; end %d;", start, end));
- LOG.debug(String.format("Chunk: size %d;", chunkSize));
- LOG.debug(split);
- }
- }
-
- return splits;
- }
-
- /**
- * Opens an HTTP connection to the specified Rexster server.
- *
- * @param conf giraph configuration
- * @param start start index of the Rexster page split
- * @param end end index of the Rexster page split
- * @param urlSuffix stream type (vertices or edges) needed for the
- * REST Url
- * @param gremlinScript gremlin script. If set to null, will be ignored.
- * @return BufferedReader the object used to retrieve the HTTP response
- * content
- */
- // CHECKSTYLE: stop IllegalCatch
- protected static BufferedReader openRexsterStream(
- ImmutableClassesGiraphConfiguration conf,
- long start, long end, String urlSuffix, String gremlinScript)
- throws InterruptedException {
-
- final String uriScriptFormat =
- "/graphs/%s/tp/gremlin?script=%s" +
- "&rexster.offset.start=%s&rexster.offset.end=%s";
- final String uriFormat =
- "/graphs/%s/%s/" +
- "?rexster.offset.start=%s&rexster.offset.end=%s";
-
- final String endpoint = GIRAPH_REXSTER_HOSTNAME.get(conf);
-
- if (endpoint == null) {
- throw new InterruptedException(GIRAPH_REXSTER_HOSTNAME.getKey() +
- " is a mandatory ");
- }
-
- final boolean isSsl = GIRAPH_REXSTER_USES_SSL.get(conf);
- final int port = GIRAPH_REXSTER_PORT.get(conf);
- final String graph = GIRAPH_REXSTER_GRAPH.get(conf);
-
-
- try {
- URL url;
- /*final String url;*/
- final String auth;
- final String username;
- final String password;
- final HttpURLConnection connection;
- final InputStream is;
- final InputStreamReader isr;
-
- if (gremlinScript != null && !gremlinScript.isEmpty()) {
- url = new URL(isSsl ? "https" : "http",
- endpoint, port,
- String.format(uriScriptFormat, graph, gremlinScript,
- start, end));
- } else {
- url = new URL(isSsl ? "https" : "http",
- endpoint, port,
- String.format(uriFormat, graph, urlSuffix, start, end));
- }
-
- LOG.info(url);
-
- username = GIRAPH_REXSTER_USERNAME.get(conf);
- password = GIRAPH_REXSTER_PASSWORD.get(conf);
- byte[] authBytes = (username + ":" + password).getBytes(
- Charset.defaultCharset());
- auth = "Basic " + Base64.encodeBase64URLSafeString(authBytes);
-
- connection = createConnection(url, auth);
- connection.setDoOutput(true);
- is = connection.getInputStream();
- isr = new InputStreamReader(is, Charset.defaultCharset());
-
- return new BufferedReader(isr);
-
- } catch (Exception e) {
- throw new RuntimeException(e.getMessage(), e);
- }
- }
- // CHECKSTYLE: resume IllegalCatch
-
- /**
- * Creates a new HTTP connection to the specified server.
- *
- * @param url URI to connec to
- * @param authValue authetication value if available
- * @return a new HTTP connection
- */
- private static HttpURLConnection createConnection(final URL url,
- final String authValue) throws Exception {
-
- final HttpURLConnection connection =
- (HttpURLConnection) url.openConnection();
-
- connection.setConnectTimeout(0);
- connection.setReadTimeout(0);
- connection.setRequestMethod("GET");
- connection.setRequestProperty("Authorization", authValue);
- connection.setDoOutput(true);
-
- return connection;
- }
-
- /**
- * Specific Rexster utility functions for vertices
- */
- public static class Vertex {
- /**
- * Empty private constructor. This class should not be instantiated.
- */
- private Vertex() { /* private constructor */ }
-
- /**
- * Opens an HTTP connection to the specified Rexster server for vertices.
- *
- * @param conf giraph configuration
- * @param start start index of the Rexster page split
- * @param end end index of the Rexster page split
- * @return BufferedReader the object used to retrieve the HTTP response
- */
- public static BufferedReader openRexsterStream(
- ImmutableClassesGiraphConfiguration conf, long start, long end)
- throws InterruptedException {
-
- String gremlinScript = null;
-
- gremlinScript = GIRAPH_REXSTER_GREMLIN_V_SCRIPT.get(conf);
- return RexsterUtils.openRexsterStream(conf, start, end, "vertices",
- gremlinScript);
- }
- }
-
- /**
- * Specific Rexster utility functions for edges
- */
- public static class Edge {
- /**
- * Empty private constructor. This class should not be instantiated.
- */
- private Edge() { /* private constructor */ }
-
- /**
- * Opens an HTTP connection to the specified Rexster server for edges.
- *
- * @param conf giraph configuration
- * @param start start index of the Rexster page split
- * @param end end index of the Rexster page split
- * @return BufferedReader the object used to retrieve the HTTP response
- */
- public static BufferedReader openRexsterStream(
- ImmutableClassesGiraphConfiguration conf, long start, long end)
- throws InterruptedException {
-
- String gremlinScript = null;
- gremlinScript = GIRAPH_REXSTER_GREMLIN_E_SCRIPT.get(conf);
-
- return RexsterUtils.openRexsterStream(conf, start, end, "edges",
- gremlinScript);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/package-info.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/package-info.java b/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/package-info.java
deleted file mode 100644
index 3f6810f..0000000
--- a/giraph-rexster/src/main/java/org/apache/giraph/rexster/utils/package-info.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * Package of reusable utils for Rexster in Giraph.
- */
-package org.apache.giraph.rexster.utils;
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestAbstractRexsterInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestAbstractRexsterInputFormat.java b/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestAbstractRexsterInputFormat.java
deleted file mode 100644
index 09b27d3..0000000
--- a/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestAbstractRexsterInputFormat.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.rexster.io.formats;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.HashSet;
-import java.util.List;
-
-import org.apache.commons.configuration.HierarchicalConfiguration;
-import org.apache.commons.configuration.XMLConfiguration;
-import org.apache.commons.io.FileUtils;
-import org.junit.After;
-import org.junit.Before;
-
-import com.tinkerpop.blueprints.Graph;
-import com.tinkerpop.blueprints.impls.tg.TinkerGraph;
-import com.tinkerpop.blueprints.util.io.graphson.GraphSONReader;
-import com.tinkerpop.rexster.Tokens;
-import com.tinkerpop.rexster.protocol.EngineController;
-import com.tinkerpop.rexster.server.HttpRexsterServer;
-import com.tinkerpop.rexster.server.RexsterApplication;
-import com.tinkerpop.rexster.server.RexsterServer;
-import com.tinkerpop.rexster.server.XmlRexsterApplication;
-
-public abstract class TestAbstractRexsterInputFormat {
- /** temporary directory */
- private final String TMP_DIR = "/tmp/";
- /** input JSON extension */
- private final String INPUT_JSON_EXT = ".input.json";
- /** output JSON extension */
- protected final String OUTPUT_JSON_EXT = ".output.json";
- /** rexster XML configuration file */
- private final String REXSTER_CONF = "rexster.xml";
- /** string databases */
- protected final String DATABASES[] =
- {
- "empty-db",
- "test-db"
- };
- /** Rexster server instance */
- protected RexsterServer server;
-
- @Before
- @SuppressWarnings("unchecked")
- public void setUp() throws Exception {
- final XMLConfiguration properties = new XMLConfiguration();
- final RexsterApplication application;
- final List<HierarchicalConfiguration> graphConfigs;
- final InputStream rexsterConf;
- final int scriptEngineThreshold;
- final String scriptEngineInitFile;
- final List<String> scriptEngineNames;
-
- /* prepare all databases */
- for (int i = 0; i < DATABASES.length; ++i) {
- prepareDb(DATABASES[i]);
- }
-
- /* start the Rexster HTTP server using the prepared rexster configuration */
- rexsterConf =
- this.getClass().getResourceAsStream(REXSTER_CONF);
- properties.load(rexsterConf);
- rexsterConf.close();
-
- graphConfigs = properties.configurationsAt(Tokens.REXSTER_GRAPH_PATH);
- application = new XmlRexsterApplication(graphConfigs);
- this.server = new HttpRexsterServer(properties);
-
- scriptEngineThreshold =
- properties.getInt("script-engine-reset-threshold",
- EngineController.RESET_NEVER);
- scriptEngineInitFile = properties.getString("script-engine-init", "");
-
- /* allow scriptengines to be configured so that folks can drop in
- different gremlin flavors. */
- scriptEngineNames = properties.getList("script-engines");
-
- if (scriptEngineNames == null) {
- // configure to default with gremlin-groovy
- EngineController.configure(scriptEngineThreshold, scriptEngineInitFile);
- } else {
- EngineController.configure(scriptEngineThreshold, scriptEngineInitFile,
- new HashSet<String>(scriptEngineNames));
- }
-
- this.server.start(application);
- }
-
- @After
- public void tearDown() throws IOException {
- for (int i = 0; i < DATABASES.length; ++i) {
- FileUtils.deleteDirectory(new File(TMP_DIR + DATABASES[i]));
- }
-
- try {
- this.server.stop();
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- private void prepareDb(String dbName) throws IOException {
- final InputStream db;
- final Graph tinkergraph;
-
- db = this.getClass().getResourceAsStream(dbName + INPUT_JSON_EXT);
- tinkergraph = new TinkerGraph(TMP_DIR + dbName);
- GraphSONReader.inputGraph(tinkergraph, db);
- tinkergraph.shutdown();
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestRexsterLongDoubleFloatVertexInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestRexsterLongDoubleFloatVertexInputFormat.java b/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestRexsterLongDoubleFloatVertexInputFormat.java
deleted file mode 100644
index a68db67..0000000
--- a/giraph-rexster/src/test/java/org/apache/giraph/rexster/io/formats/TestRexsterLongDoubleFloatVertexInputFormat.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.rexster.io.formats;
-
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GREMLIN_E_SCRIPT;
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GREMLIN_V_SCRIPT;
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_HOSTNAME;
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_PORT;
-import static org.apache.giraph.rexster.conf.GiraphRexsterConstants.GIRAPH_REXSTER_GRAPH;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URL;
-import java.util.Iterator;
-
-import org.apache.giraph.conf.GiraphConfiguration;
-import org.apache.giraph.graph.BasicComputation;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.io.formats.JsonLongDoubleFloatDoubleVertexOutputFormat;
-import org.apache.giraph.utils.InternalVertexRunner;
-import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.junit.Assert;
-import org.junit.Test;
-
-import com.google.common.base.Charsets;
-import com.google.common.io.Files;
-
-public class TestRexsterLongDoubleFloatVertexInputFormat
- extends TestAbstractRexsterInputFormat {
-
- @Test
- public void getEmptyDb() throws Exception {
- Iterable<String> results;
- GiraphConfiguration conf = new GiraphConfiguration();
- final String dbName = super.DATABASES[0];
- Iterator<String> result;
-
- GIRAPH_REXSTER_HOSTNAME.set(conf, "127.0.0.1");
- GIRAPH_REXSTER_PORT.set(conf, 18182);
- GIRAPH_REXSTER_GRAPH.set(conf, dbName);
- conf.setComputationClass(EmptyComputation.class);
- conf.setVertexInputFormatClass(RexsterLongDoubleFloatVertexInputFormat.class);
- conf.setEdgeInputFormatClass(RexsterLongFloatEdgeInputFormat.class);
- conf.setVertexOutputFormatClass(JsonLongDoubleFloatDoubleVertexOutputFormat.class);
- results = InternalVertexRunner.run(conf, new String[0], new String[0]);
- Assert.assertNotNull(results);
-
- result = results.iterator();
- Assert.assertFalse(result.hasNext());
- }
-
- @Test
- public void getTestDb() throws Exception {
- Iterable<String> results;
- GiraphConfiguration conf = new GiraphConfiguration();
- final String dbName = super.DATABASES[1];
- Iterator<String> result;
- Iterator<String> expected;
- final File expectedFile;
- final URL expectedFileUrl;
-
- GIRAPH_REXSTER_HOSTNAME.set(conf, "127.0.0.1");
- GIRAPH_REXSTER_PORT.set(conf, 18182);
- GIRAPH_REXSTER_GRAPH.set(conf, dbName);
- conf.setComputationClass(EmptyComputation.class);
- conf.setVertexInputFormatClass(RexsterLongDoubleFloatVertexInputFormat.class);
- conf.setEdgeInputFormatClass(RexsterLongFloatEdgeInputFormat.class);
- conf.setVertexOutputFormatClass(JsonLongDoubleFloatDoubleVertexOutputFormat.class);
-
- results = InternalVertexRunner.run(conf, new String[0], new String[0]);
- Assert.assertNotNull(results);
-
- expectedFileUrl =
- this.getClass().getResource(dbName + super.OUTPUT_JSON_EXT);
- expectedFile = new File(expectedFileUrl.toURI());
- expected = Files.readLines(expectedFile, Charsets.UTF_8).iterator();
- result = results.iterator();
-
- while(expected.hasNext() && result.hasNext()) {
- String resultLine = (String) result.next();
- String expectedLine = (String) expected.next();
-
- Assert.assertTrue(expectedLine.equals(resultLine));
- }
- }
-
- @Test
- public void getGremlinDb() throws Exception {
- Iterable<String> results;
- GiraphConfiguration conf = new GiraphConfiguration();
- final String dbName = super.DATABASES[1];
- Iterator<String> result;
- Iterator<String> expected;
- final File expectedFile;
- final URL expectedFileUrl;
-
- GIRAPH_REXSTER_HOSTNAME.set(conf, "127.0.0.1");
- GIRAPH_REXSTER_PORT.set(conf, 18182);
- GIRAPH_REXSTER_GRAPH.set(conf, dbName);
- GIRAPH_REXSTER_GREMLIN_V_SCRIPT.set(conf, "g.V");
- GIRAPH_REXSTER_GREMLIN_E_SCRIPT.set(conf, "g.E");
- conf.setComputationClass(EmptyComputation.class);
- conf.setVertexInputFormatClass(RexsterLongDoubleFloatVertexInputFormat.class);
- conf.setEdgeInputFormatClass(RexsterLongFloatEdgeInputFormat.class);
- conf.setVertexOutputFormatClass(JsonLongDoubleFloatDoubleVertexOutputFormat.class);
-
- results = InternalVertexRunner.run(conf, new String[0], new String[0]);
- Assert.assertNotNull(results);
-
- expectedFileUrl =
- this.getClass().getResource(dbName + super.OUTPUT_JSON_EXT);
- expectedFile = new File(expectedFileUrl.toURI());
- expected = Files.readLines(expectedFile, Charsets.UTF_8).iterator();
- result = results.iterator();
-
- while(expected.hasNext() && result.hasNext()) {
- String resultLine = (String) result.next();
- String expectedLine = (String) expected.next();
-
- Assert.assertTrue(expectedLine.equals(resultLine));
- }
- }
-
- /*
- Test compute method that sends each edge a notification of its parents.
- The test set only has a 1-1 parent-to-child ratio for this unit test.
- */
- public static class EmptyComputation
- extends BasicComputation<LongWritable, DoubleWritable, FloatWritable, LongWritable> {
-
- @Override
- public void compute(Vertex<LongWritable, DoubleWritable, FloatWritable> vertex,
- Iterable<LongWritable> messages) throws IOException {
- vertex.voteToHalt();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/empty-db.input.json
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/empty-db.input.json b/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/empty-db.input.json
deleted file mode 100644
index 66d6c5d..0000000
--- a/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/empty-db.input.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
- "graph":
- {
- "mode": "NORMAL",
- "vertices": [],
- "edges": []
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/rexster.xml
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/rexster.xml b/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/rexster.xml
deleted file mode 100644
index e7de484..0000000
--- a/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/rexster.xml
+++ /dev/null
@@ -1,52 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<rexster>
- <http>
- <server-port>18182</server-port>
- <server-host>127.0.0.1</server-host>
- <base-uri>http://127.0.0.1</base-uri>
- <web-root>public</web-root>
- <character-set>UTF-8</character-set>
- <enable-jmx>false</enable-jmx>
- <enable-doghouse>false</enable-doghouse>
- <thread-pool>
- <worker>
- <core-size>8</core-size>
- <max-size>8</max-size>
- </worker>
- <kernal>
- <core-size>4</core-size>
- <max-size>4</max-size>
- </kernal>
- </thread-pool>
- <io-strategy>leader-follower</io-strategy>
- </http>
- <script-engine-reset-threshold>-1</script-engine-reset-threshold>
- <script-engine-init>data/init.groovy</script-engine-init>
- <script-engines>gremlin-groovy</script-engines>
- <graphs>
- <graph>
- <graph-name>empty-db</graph-name>
- <graph-type>tinkergraph</graph-type>
- <graph-location>/tmp/empty-db</graph-location>
- </graph>
- <graph>
- <graph-name>test-db</graph-name>
- <graph-type>tinkergraph</graph-type>
- <graph-location>/tmp/test-db</graph-location>
- <extensions>
- <allows>
- <allow>tp:gremlin</allow>
- </allows>
- <extension>
- <namespace>tp</namespace>
- <name>gremlin</name>
- <configuration>
- <scripts>script-directory</scripts>
- <allow-client-script>true</allow-client-script>
- <cache-scripts>true</cache-scripts>
- </configuration>
- </extension>
- </extensions>
- </graph>
- </graphs>
-</rexster>
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.input.json
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.input.json b/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.input.json
deleted file mode 100644
index 955af91..0000000
--- a/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.input.json
+++ /dev/null
@@ -1,126 +0,0 @@
-{
- "graph":
- {
- "mode": "NORMAL",
- "vertices": [
- {
- "_id": 1,
- "_type": "vertex"
- },
- {
- "_id": 2,
- "_type":"vertex"
- },
- {
- "_id": 3,
- "_type":"vertex"
- },
- {
- "_id": 4,
- "_type":"vertex"
- },
- {
- "_id": 5,
- "_type":"vertex"
- }
- ],
- "edges": [
- {
- "weight": 1,
- "_id": 0,
- "_type": "edge",
- "_outV": 1,
- "_inV": 2,
- "_label": "_default"
- },
- {
- "weight": 3,
- "_id": 1,
- "_type": "edge",
- "_outV": 1,
- "_inV": 4,
- "_label": "_default"
- },
- {
- "weight": 1,
- "_id": 2,
- "_type": "edge",
- "_outV": 2,
- "_inV": 1,
- "_label": "_default"
- },
- {
- "weight": 2,
- "_id": 3,
- "_type": "edge",
- "_outV": 2,
- "_inV": 3,
- "_label": "_default"
- },
- {
- "weight": 1,
- "_id": 4,
- "_type": "edge",
- "_outV": 2,
- "_inV": 4,
- "_label": "_default"
- },
- {
- "weight": 2,
- "_id": 5,
- "_type": "edge",
- "_outV": 3,
- "_inV": 2,
- "_label": "_default"
- },
- {
- "weight": 4,
- "_id": 6,
- "_type": "edge",
- "_outV": 3,
- "_inV": 5,
- "_label": "_default"
- },
- {
- "weight": 3,
- "_id": 7,
- "_type": "edge",
- "_outV": 4,
- "_inV": 1,
- "_label": "_default"
- },
- {
- "weight": 1,
- "_id": 8,
- "_type": "edge",
- "_outV": 4,
- "_inV": 2,
- "_label": "_default"
- },
- {
- "weight": 4,
- "_id": 9,
- "_type": "edge",
- "_outV": 4,
- "_inV": 5,
- "_label": "_default"
- },
- {
- "weight": 4,
- "_id": 10,
- "_type": "edge",
- "_outV": 5,
- "_inV": 4,
- "_label": "_default"
- },
- {
- "weight": 4,
- "_id": 11,
- "_type": "edge",
- "_outV": 5,
- "_inV": 3,
- "_label": "_default"
- }
- ]
- }
-}
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.output.json
----------------------------------------------------------------------
diff --git a/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.output.json b/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.output.json
deleted file mode 100644
index f1198df..0000000
--- a/giraph-rexster/src/test/resources/org/apache/giraph/rexster/io/formats/test-db.output.json
+++ /dev/null
@@ -1,5 +0,0 @@
-[5,0,[[3,4],[4,4]]]
-[2,0,[[1,1],[3,2],[4,1]]]
-[1,0,[[2,1],[4,3]]]
-[3,0,[[2,2]]]
-[4,0,[[1,3],[5,4],[2,1]]]
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index da25a06..b4de73e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -271,7 +271,7 @@ under the License.
<dep.commons-collections.version>3.2.1</dep.commons-collections.version>
<dep.commons-configuration.version>1.6</dep.commons-configuration.version>
<dep.commons-httpclient.version>3.0.1</dep.commons-httpclient.version>
- <dep.commons-logging.version>1.0.4</dep.commons-logging.version>
+ <dep.commons-logging.version>1.1.1</dep.commons-logging.version>
<dep.commons-io.version>2.1</dep.commons-io.version>
<dep.commons-net.version>3.1</dep.commons-net.version>
<dep.fasterxml-jackson.version>2.1.2</dep.fasterxml-jackson.version>
@@ -290,7 +290,7 @@ under the License.
<dep.mockito.version>1.9.5</dep.mockito.version>
<dep.netty.version>3.5.3.Final</dep.netty.version>
<dep.slf4j.version>1.7.5</dep.slf4j.version>
- <dep.tinkerpop.rexter.version>2.3.0</dep.tinkerpop.rexter.version>
+ <dep.tinkerpop.rexter.version>2.4.0</dep.tinkerpop.rexter.version>
<dep.typetools.version>0.2.1</dep.typetools.version>
<dep.yammer-metrics.version>2.2.0</dep.yammer-metrics.version>
<dep.yourkit-api.version>9.5.6</dep.yourkit-api.version>
@@ -324,7 +324,7 @@ under the License.
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
- <version>2.7</version>
+ <version>2.8</version>
<configuration>
<skip>${giraph.maven.dependency.plugin.skip}</skip>
<failOnWarning>true</failOnWarning>
@@ -1318,6 +1318,38 @@ under the License.
<groupId>com.tinkerpop.blueprints</groupId>
<artifactId>blueprints-core</artifactId>
<version>${dep.tinkerpop.rexter.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.tinkerpop.blueprints</groupId>
+ <artifactId>blueprints-orient-graph</artifactId>
+ <version>${dep.tinkerpop.rexter.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.tinkerpop.blueprints</groupId>
+ <artifactId>blueprints-neo4j-graph</artifactId>
+ <version>${dep.tinkerpop.rexter.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>com.tinkerpop.rexster</groupId>
@@ -1344,9 +1376,21 @@ under the License.
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
+ <groupId>com.tinkerpop.gremlin</groupId>
+ <artifactId>gremlin-groovy</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.ant</groupId>
+ <artifactId>ant</artifactId>
+ </exclusion>
+ <exclusion>
<groupId>org.glassfish.grizzly</groupId>
<artifactId>grizzly-framework</artifactId>
</exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ </exclusion>
</exclusions>
</dependency>
<dependency>
@@ -1355,10 +1399,30 @@ under the License.
<version>${dep.tinkerpop.rexter.version}</version>
<exclusions>
<exclusion>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ </exclusion>
+ <exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.tinkerpop.gremlin</groupId>
+ <artifactId>gremlin-groovy</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ </exclusion>
+ <exclusion>
<groupId>org.glassfish.grizzly</groupId>
<artifactId>grizzly-framework</artifactId>
</exclusion>
http://git-wip-us.apache.org/repos/asf/giraph/blob/55e22de8/src/site/site.xml
----------------------------------------------------------------------
diff --git a/src/site/site.xml b/src/site/site.xml
index a5931ea..9fc9810 100644
--- a/src/site/site.xml
+++ b/src/site/site.xml
@@ -79,7 +79,7 @@
<item name="Input/Output in Giraph" href="io.html"/>
<item name="Hive" href="hive.html"/>
<item name="Gora" href="gora.html"/>
- <item name="Rexster" href="rexster.html"/>
+ <item name="Rexster I/O" href="rexster.html"/>
<item name="Aggregators" href="aggregators.html"/>
<item name="Out-of-core" href="ooc.html"/>
<item name="Javadoc" href="javadoc_modules.html"/>