You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@rya.apache.org by ca...@apache.org on 2017/09/29 18:52:08 UTC

incubator-rya git commit: RYA-255 Provide an example class that demonstrates the Prospector. Closes #144.

Repository: incubator-rya
Updated Branches:
  refs/heads/master 7949baa67 -> 33ef52cbb


RYA-255 Provide an example class that demonstrates the Prospector. Closes #144.


Project: http://git-wip-us.apache.org/repos/asf/incubator-rya/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-rya/commit/33ef52cb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-rya/tree/33ef52cb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-rya/diff/33ef52cb

Branch: refs/heads/master
Commit: 33ef52cbbb585b78c33878c41421ef07761f9032
Parents: 7949baa
Author: Kevin Chilton <ke...@parsons.com>
Authored: Tue Mar 7 16:02:16 2017 -0500
Committer: Caleb Meier <ca...@parsons.com>
Committed: Fri Sep 29 11:50:46 2017 -0700

----------------------------------------------------------------------
 .../rya/accumulo/AccumuloRdfConfiguration.java  |  40 ++++
 .../src/main/java/ProspectorExample.java        | 193 +++++++++++++++++++
 .../src/main/resources/stats_cluster_config.xml |  93 +++++++++
 3 files changed, 326 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/33ef52cb/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java
----------------------------------------------------------------------
diff --git a/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java b/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java
index 0200cf7..ed76b4a 100644
--- a/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java
+++ b/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java
@@ -205,6 +205,13 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration {
     
 
     /**
+     * @param enabled - {@code true} if the Rya instance is backed by a mock Accumulo; otherwise {@code false}.
+     */
+    public void useMockInstance(boolean enabled) {
+        super.setBooleanIfUnset(USE_MOCK_INSTANCE, enabled);
+    }
+
+    /**
      * Indicates that a Mock instance of Accumulo is being used to back the Rya instance.
      *
      * @return {@code true} if the Rya instance is backed by a mock Accumulo; otherwise {@code false}.
@@ -214,6 +221,14 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration {
     }
 
     /**
+     * @param username - The Accumulo username from the configuration object that is meant to
+     *   be used when connecting a {@link Connector} to Accumulo.
+     */
+    public void setUsername(String username) {
+        super.set(CLOUDBASE_USER, username);
+    }
+
+    /**
      * Get the Accumulo username from the configuration object that is meant to
      * be used when connecting a {@link Connector} to Accumulo.
      *
@@ -224,6 +239,14 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration {
     }
 
     /**
+     * @param password - The Accumulo password from the configuration object that is meant to
+     * be used when connecting a {@link Connector} to Accumulo.
+     */
+    public void setPassword(String password) {
+        super.set(CLOUDBASE_PASSWORD, password);
+    }
+
+    /**
      * Get the Accumulo password from the configuration object that is meant to
      * be used when connecting a {@link Connector} to Accumulo.
      *
@@ -234,6 +257,14 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration {
     }
 
     /**
+     * @param instanceName - The Accumulo instance name from the configuration object that is
+     * meant to be used when connecting a {@link Connector} to Accumulo.
+     */
+    public void setInstanceName(String instanceName) {
+        super.set(CLOUDBASE_INSTANCE, instanceName);
+    }
+
+    /**
      * Get the Accumulo instance name from the configuration object that is
      * meant to be used when connecting a {@link Connector} to Accumulo.
      *
@@ -244,6 +275,15 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration {
     }
 
     /**
+     * @param zookeepers - A comma delimited list of the names of the Zookeeper servers from
+     * the configuration object that is meant to be used when connecting a
+     * {@link Connector} to Accumulo.
+     */
+    public void setZookeepers(String zookeepers) {
+        super.set(CLOUDBASE_ZOOKEEPERS, zookeepers);
+    }
+
+    /**
      * Get a comma delimited list of the names of the Zookeeper servers from
      * the configuration object that is meant to be used when connecting a
      * {@link Connector} to Accumulo.

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/33ef52cb/extras/indexingExample/src/main/java/ProspectorExample.java
----------------------------------------------------------------------
diff --git a/extras/indexingExample/src/main/java/ProspectorExample.java b/extras/indexingExample/src/main/java/ProspectorExample.java
new file mode 100644
index 0000000..fd0e4f8
--- /dev/null
+++ b/extras/indexingExample/src/main/java/ProspectorExample.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.List;
+
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.ConsoleAppender;
+import org.apache.log4j.Level;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.log4j.PatternLayout;
+import org.apache.rya.accumulo.AccumuloRdfConfiguration;
+import org.apache.rya.accumulo.utils.ConnectorFactory;
+import org.apache.rya.api.persist.RdfEvalStatsDAO.CARDINALITY_OF;
+import org.apache.rya.prospector.mr.Prospector;
+import org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO;
+import org.apache.rya.sail.config.RyaSailFactory;
+import org.openrdf.model.Statement;
+import org.openrdf.model.URI;
+import org.openrdf.model.ValueFactory;
+import org.openrdf.model.impl.ValueFactoryImpl;
+import org.openrdf.sail.Sail;
+import org.openrdf.sail.SailConnection;
+
+import com.beust.jcommander.internal.Lists;
+
+/**
+ * Demonstrates how you can use the {@link Prospector} to count values that appear within an instance of Rya and
+ * then use the {@link ProspectorServiceEvalStatsDAO} to fetch those counts.
+ */
+public class ProspectorExample {
+    private static final Logger log = Logger.getLogger(RyaClientExample.class);
+
+    private static final ValueFactory VALUE_FACTORY = new ValueFactoryImpl();
+
+    private static final URI ALICE = VALUE_FACTORY.createURI("urn:alice");
+    private static final URI BOB = VALUE_FACTORY.createURI("urn:bob");
+    private static final URI CHARLIE = VALUE_FACTORY.createURI("urn:charlie");
+
+    private static final URI WORKS_AT = VALUE_FACTORY.createURI("urn:worksAt");
+    private static final URI ADMIRES = VALUE_FACTORY.createURI("urn:admires");
+    private static final URI LIVES_WITH = VALUE_FACTORY.createURI("urn:livesWith");
+
+    private static final URI BURGER_JOINT = VALUE_FACTORY.createURI("urn:burgerJoint");
+    private static final URI DONUT_SHOP= VALUE_FACTORY.createURI("urn:donutShop");
+
+    public static void main(final String[] args) throws Exception {
+        setupLogging();
+
+        // Configure Rya to use a mock instance.
+        final AccumuloRdfConfiguration config = new AccumuloRdfConfiguration();
+        config.useMockInstance(true);
+        config.setTablePrefix("rya_");
+        config.setUsername("user");
+        config.setPassword("pass");
+        config.setInstanceName("accumulo");
+
+        // Load some data into Rya.
+        final List<Statement> statements = Lists.newArrayList(
+                VALUE_FACTORY.createStatement(ALICE, WORKS_AT, BURGER_JOINT),
+                VALUE_FACTORY.createStatement(ALICE, ADMIRES, BOB),
+                VALUE_FACTORY.createStatement(BOB, WORKS_AT, DONUT_SHOP),
+                VALUE_FACTORY.createStatement(CHARLIE, WORKS_AT, DONUT_SHOP),
+                VALUE_FACTORY.createStatement(CHARLIE, LIVES_WITH, BOB),
+                VALUE_FACTORY.createStatement(BOB, LIVES_WITH, CHARLIE),
+                VALUE_FACTORY.createStatement(BOB, LIVES_WITH, ALICE));
+
+        final Sail sail = RyaSailFactory.getInstance(config);
+        final SailConnection conn = sail.getConnection();
+        log.info("Loading the following statements into a Mock instance of Accumulo Rya:");
+        conn.begin();
+        for(final Statement statement : statements) {
+            log.info("    " + statement.toString());
+            conn.addStatement(statement.getSubject(), statement.getPredicate(), statement.getObject());
+        }
+        conn.commit();
+        conn.close();
+
+        // Create the table that the Prospector's results will be written to.
+        ConnectorFactory.connect(config)
+                .tableOperations()
+                .create("rya_prospects");
+
+        // Run the Prospector using the configuration file that is in the resources directory.
+        log.info("");
+        log.info("Running the Map Reduce job that computes the Prospector results.");
+        ToolRunner.run(new Prospector(), new String[]{ "src/main/resources/stats_cluster_config.xml" });
+
+        // Print the table that was created by the Prospector.
+        log.info("");
+        log.info("The following cardinalities were written to the Prospector table:");
+        final ProspectorServiceEvalStatsDAO dao = ProspectorServiceEvalStatsDAO.make(config);
+
+        // Do each of the Subjects.
+        double cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECT, Lists.newArrayList(ALICE));
+        log.info("    subject: " + ALICE + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECT, Lists.newArrayList(BOB));
+        log.info("    subject: " + BOB + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECT, Lists.newArrayList(CHARLIE));
+        log.info("    subject: " + CHARLIE + ", cardinality: " + cardinality);
+
+        // Do each of the Predicates.
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATE, Lists.newArrayList(WORKS_AT));
+        log.info("    predicate: " + WORKS_AT + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATE, Lists.newArrayList(ADMIRES));
+        log.info("    predicate: " + ADMIRES + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATE, Lists.newArrayList(LIVES_WITH));
+        log.info("    predicate: " + LIVES_WITH + ", cardinality: " + cardinality);
+
+        // Do each of the Objects.
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(BURGER_JOINT));
+        log.info("    object: " + BURGER_JOINT + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(DONUT_SHOP));
+        log.info("    object: " + DONUT_SHOP + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(ALICE));
+        log.info("    object: " + ALICE + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(BOB));
+        log.info("    object: " + BOB + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(CHARLIE));
+        log.info("    object: " + CHARLIE + ", cardinality: " + cardinality);
+
+        // Do each of the Subject/Predicate pairs.
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(ALICE, WORKS_AT));
+        log.info("    subject/predicate: " + ALICE + "/" + WORKS_AT + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(ALICE, ADMIRES));
+        log.info("    subject/predicate: " + ALICE + "/" + ADMIRES + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(BOB, WORKS_AT));
+        log.info("    subject/predicate: " + BOB + "/" + WORKS_AT + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(CHARLIE, WORKS_AT));
+        log.info("    subject/predicate: " + CHARLIE + "/" + WORKS_AT + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(CHARLIE, LIVES_WITH));
+        log.info("    subject/predicate: " + CHARLIE + "/" + LIVES_WITH + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(BOB, LIVES_WITH));
+        log.info("    subject/predicate: " + BOB + "/" + LIVES_WITH + ", cardinality: " + cardinality);
+
+        // Do each of the Subject/Object pairs.
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(ALICE, BURGER_JOINT));
+        log.info("    subject/object: " + ALICE + "/" + BURGER_JOINT + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(ALICE, BOB));
+        log.info("    subject/object: " + ALICE + "/" + BOB + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(BOB, DONUT_SHOP));
+        log.info("    subject/object: " + ALICE + "/" + DONUT_SHOP + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(CHARLIE, DONUT_SHOP));
+        log.info("    subject/object: " + CHARLIE + "/" + DONUT_SHOP + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(CHARLIE, BOB));
+        log.info("    subject/object: " + CHARLIE + "/" + BOB + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(BOB, CHARLIE));
+        log.info("    subject/object: " + BOB + "/" + CHARLIE + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(BOB, ALICE));
+        log.info("    subject/object: " + BOB + "/" + ALICE + ", cardinality: " + cardinality);
+
+        // Do each of the Predicate/Object pairs.
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(WORKS_AT, BURGER_JOINT));
+        log.info("    predicate/object: " + WORKS_AT + "/" + BURGER_JOINT + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(ADMIRES, BOB));
+        log.info("    predicate/object: " + ADMIRES + "/" + BOB + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(WORKS_AT, DONUT_SHOP));
+        log.info("    predicate/object: " + WORKS_AT + "/" + DONUT_SHOP + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(LIVES_WITH, BOB));
+        log.info("    predicate/object: " + LIVES_WITH + "/" + BOB + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(LIVES_WITH, CHARLIE));
+        log.info("    predicate/object: " + LIVES_WITH + "/" + CHARLIE + ", cardinality: " + cardinality);
+        cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(LIVES_WITH, ALICE));
+        log.info("    predicate/object: " + LIVES_WITH + "/" + ALICE + ", cardinality: " + cardinality);
+    }
+
+    private static void setupLogging() {
+        // Turn off all the loggers and customize how they write to the console.
+        final Logger rootLogger = LogManager.getRootLogger();
+        rootLogger.setLevel(Level.OFF);
+        final ConsoleAppender ca = (ConsoleAppender) rootLogger.getAppender("stdout");
+        ca.setLayout(new PatternLayout("%-5p - %m%n"));
+
+        // Turn the logger used by the demo back on.
+        log.setLevel(Level.INFO);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/33ef52cb/extras/indexingExample/src/main/resources/stats_cluster_config.xml
----------------------------------------------------------------------
diff --git a/extras/indexingExample/src/main/resources/stats_cluster_config.xml b/extras/indexingExample/src/main/resources/stats_cluster_config.xml
new file mode 100644
index 0000000..170a0c2
--- /dev/null
+++ b/extras/indexingExample/src/main/resources/stats_cluster_config.xml
@@ -0,0 +1,93 @@
+<?xml version="1.0"?>
+
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+   <!-- Accumulo info -->
+   
+    <property>
+        <name>mock</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>instance</name>
+        <value>accumulo</value>
+    </property>
+
+   <!-- User info -->
+    <property>
+        <name>username</name>
+        <value>user</value>
+    </property>
+    <property>
+        <name>password</name>
+        <value>pass</value>
+    </property>
+
+   <!-- Rya info -->
+   <property>
+        <name>spo.table</name>
+        <value>rya_spo</value>
+    </property>
+    <property>
+        <name>prospects.table</name>
+        <value>rya_prospects</value>
+    </property>
+    <property>
+        <name>selectivity.table</name>
+        <value>rya_selectivity</value>
+    </property>
+    <property>
+        <name>auths</name>
+        <value>U</value>
+    </property>
+    <property>
+        <name>prospector.auths</name>
+        <value>U</value>
+    </property>
+    
+    <property>
+        <name>prospector.intable</name>
+        <value>rya_spo</value>
+    </property>
+    <property>
+        <name>prospector.outtable</name>
+        <value>rya_prospects</value>
+    </property>
+    
+    <property>
+        <name>inputpath</name>
+        <value>/tmp/RyaStats/JoinSelectStatisticsSumInput</value>
+    </property>
+    <property>
+        <name>outputpath</name>
+        <value>/tmp/RyaStats/JoinSelectStatisticsSumInput</value>
+    </property>
+     <property>
+        <name>prospects.outputpath</name>
+        <value>/tmp/RyaStats/ProspectsOutput</value>
+    </property>
+    <property>
+        <name>spo.outputpath</name>
+        <value>/tmp/RyaStats/SpoOutput</value>
+    </property>
+</configuration>