You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@rya.apache.org by ca...@apache.org on 2017/09/29 18:52:08 UTC
incubator-rya git commit: RYA-255 Provide an example class that
demonstrates the Prospector. Closes #144.
Repository: incubator-rya
Updated Branches:
refs/heads/master 7949baa67 -> 33ef52cbb
RYA-255 Provide an example class that demonstrates the Prospector. Closes #144.
Project: http://git-wip-us.apache.org/repos/asf/incubator-rya/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-rya/commit/33ef52cb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-rya/tree/33ef52cb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-rya/diff/33ef52cb
Branch: refs/heads/master
Commit: 33ef52cbbb585b78c33878c41421ef07761f9032
Parents: 7949baa
Author: Kevin Chilton <ke...@parsons.com>
Authored: Tue Mar 7 16:02:16 2017 -0500
Committer: Caleb Meier <ca...@parsons.com>
Committed: Fri Sep 29 11:50:46 2017 -0700
----------------------------------------------------------------------
.../rya/accumulo/AccumuloRdfConfiguration.java | 40 ++++
.../src/main/java/ProspectorExample.java | 193 +++++++++++++++++++
.../src/main/resources/stats_cluster_config.xml | 93 +++++++++
3 files changed, 326 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/33ef52cb/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java
----------------------------------------------------------------------
diff --git a/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java b/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java
index 0200cf7..ed76b4a 100644
--- a/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java
+++ b/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/AccumuloRdfConfiguration.java
@@ -205,6 +205,13 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration {
/**
+ * @param enabled - {@code true} if the Rya instance is backed by a mock Accumulo; otherwise {@code false}.
+ */
+ public void useMockInstance(boolean enabled) {
+ super.setBooleanIfUnset(USE_MOCK_INSTANCE, enabled);
+ }
+
+ /**
* Indicates that a Mock instance of Accumulo is being used to back the Rya instance.
*
* @return {@code true} if the Rya instance is backed by a mock Accumulo; otherwise {@code false}.
@@ -214,6 +221,14 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration {
}
/**
+ * @param username - The Accumulo username from the configuration object that is meant to
+ * be used when connecting a {@link Connector} to Accumulo.
+ */
+ public void setUsername(String username) {
+ super.set(CLOUDBASE_USER, username);
+ }
+
+ /**
* Get the Accumulo username from the configuration object that is meant to
* be used when connecting a {@link Connector} to Accumulo.
*
@@ -224,6 +239,14 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration {
}
/**
+ * @param password - The Accumulo password from the configuration object that is meant to
+ * be used when connecting a {@link Connector} to Accumulo.
+ */
+ public void setPassword(String password) {
+ super.set(CLOUDBASE_PASSWORD, password);
+ }
+
+ /**
* Get the Accumulo password from the configuration object that is meant to
* be used when connecting a {@link Connector} to Accumulo.
*
@@ -234,6 +257,14 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration {
}
/**
+ * @param instanceName - The Accumulo instance name from the configuration object that is
+ * meant to be used when connecting a {@link Connector} to Accumulo.
+ */
+ public void setInstanceName(String instanceName) {
+ super.set(CLOUDBASE_INSTANCE, instanceName);
+ }
+
+ /**
* Get the Accumulo instance name from the configuration object that is
* meant to be used when connecting a {@link Connector} to Accumulo.
*
@@ -244,6 +275,15 @@ public class AccumuloRdfConfiguration extends RdfCloudTripleStoreConfiguration {
}
/**
+ * @param zookeepers - A comma delimited list of the names of the Zookeeper servers from
+ * the configuration object that is meant to be used when connecting a
+ * {@link Connector} to Accumulo.
+ */
+ public void setZookeepers(String zookeepers) {
+ super.set(CLOUDBASE_ZOOKEEPERS, zookeepers);
+ }
+
+ /**
* Get a comma delimited list of the names of the Zookeeper servers from
* the configuration object that is meant to be used when connecting a
* {@link Connector} to Accumulo.
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/33ef52cb/extras/indexingExample/src/main/java/ProspectorExample.java
----------------------------------------------------------------------
diff --git a/extras/indexingExample/src/main/java/ProspectorExample.java b/extras/indexingExample/src/main/java/ProspectorExample.java
new file mode 100644
index 0000000..fd0e4f8
--- /dev/null
+++ b/extras/indexingExample/src/main/java/ProspectorExample.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.List;
+
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.ConsoleAppender;
+import org.apache.log4j.Level;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.log4j.PatternLayout;
+import org.apache.rya.accumulo.AccumuloRdfConfiguration;
+import org.apache.rya.accumulo.utils.ConnectorFactory;
+import org.apache.rya.api.persist.RdfEvalStatsDAO.CARDINALITY_OF;
+import org.apache.rya.prospector.mr.Prospector;
+import org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO;
+import org.apache.rya.sail.config.RyaSailFactory;
+import org.openrdf.model.Statement;
+import org.openrdf.model.URI;
+import org.openrdf.model.ValueFactory;
+import org.openrdf.model.impl.ValueFactoryImpl;
+import org.openrdf.sail.Sail;
+import org.openrdf.sail.SailConnection;
+
+import com.beust.jcommander.internal.Lists;
+
+/**
+ * Demonstrates how you can use the {@link Prospector} to count values that appear within an instance of Rya and
+ * then use the {@link ProspectorServiceEvalStatsDAO} to fetch those counts.
+ */
+public class ProspectorExample {
+ private static final Logger log = Logger.getLogger(RyaClientExample.class);
+
+ private static final ValueFactory VALUE_FACTORY = new ValueFactoryImpl();
+
+ private static final URI ALICE = VALUE_FACTORY.createURI("urn:alice");
+ private static final URI BOB = VALUE_FACTORY.createURI("urn:bob");
+ private static final URI CHARLIE = VALUE_FACTORY.createURI("urn:charlie");
+
+ private static final URI WORKS_AT = VALUE_FACTORY.createURI("urn:worksAt");
+ private static final URI ADMIRES = VALUE_FACTORY.createURI("urn:admires");
+ private static final URI LIVES_WITH = VALUE_FACTORY.createURI("urn:livesWith");
+
+ private static final URI BURGER_JOINT = VALUE_FACTORY.createURI("urn:burgerJoint");
+ private static final URI DONUT_SHOP= VALUE_FACTORY.createURI("urn:donutShop");
+
+ public static void main(final String[] args) throws Exception {
+ setupLogging();
+
+ // Configure Rya to use a mock instance.
+ final AccumuloRdfConfiguration config = new AccumuloRdfConfiguration();
+ config.useMockInstance(true);
+ config.setTablePrefix("rya_");
+ config.setUsername("user");
+ config.setPassword("pass");
+ config.setInstanceName("accumulo");
+
+ // Load some data into Rya.
+ final List<Statement> statements = Lists.newArrayList(
+ VALUE_FACTORY.createStatement(ALICE, WORKS_AT, BURGER_JOINT),
+ VALUE_FACTORY.createStatement(ALICE, ADMIRES, BOB),
+ VALUE_FACTORY.createStatement(BOB, WORKS_AT, DONUT_SHOP),
+ VALUE_FACTORY.createStatement(CHARLIE, WORKS_AT, DONUT_SHOP),
+ VALUE_FACTORY.createStatement(CHARLIE, LIVES_WITH, BOB),
+ VALUE_FACTORY.createStatement(BOB, LIVES_WITH, CHARLIE),
+ VALUE_FACTORY.createStatement(BOB, LIVES_WITH, ALICE));
+
+ final Sail sail = RyaSailFactory.getInstance(config);
+ final SailConnection conn = sail.getConnection();
+ log.info("Loading the following statements into a Mock instance of Accumulo Rya:");
+ conn.begin();
+ for(final Statement statement : statements) {
+ log.info(" " + statement.toString());
+ conn.addStatement(statement.getSubject(), statement.getPredicate(), statement.getObject());
+ }
+ conn.commit();
+ conn.close();
+
+ // Create the table that the Prospector's results will be written to.
+ ConnectorFactory.connect(config)
+ .tableOperations()
+ .create("rya_prospects");
+
+ // Run the Prospector using the configuration file that is in the resources directory.
+ log.info("");
+ log.info("Running the Map Reduce job that computes the Prospector results.");
+ ToolRunner.run(new Prospector(), new String[]{ "src/main/resources/stats_cluster_config.xml" });
+
+ // Print the table that was created by the Prospector.
+ log.info("");
+ log.info("The following cardinalities were written to the Prospector table:");
+ final ProspectorServiceEvalStatsDAO dao = ProspectorServiceEvalStatsDAO.make(config);
+
+ // Do each of the Subjects.
+ double cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECT, Lists.newArrayList(ALICE));
+ log.info(" subject: " + ALICE + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECT, Lists.newArrayList(BOB));
+ log.info(" subject: " + BOB + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECT, Lists.newArrayList(CHARLIE));
+ log.info(" subject: " + CHARLIE + ", cardinality: " + cardinality);
+
+ // Do each of the Predicates.
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATE, Lists.newArrayList(WORKS_AT));
+ log.info(" predicate: " + WORKS_AT + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATE, Lists.newArrayList(ADMIRES));
+ log.info(" predicate: " + ADMIRES + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATE, Lists.newArrayList(LIVES_WITH));
+ log.info(" predicate: " + LIVES_WITH + ", cardinality: " + cardinality);
+
+ // Do each of the Objects.
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(BURGER_JOINT));
+ log.info(" object: " + BURGER_JOINT + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(DONUT_SHOP));
+ log.info(" object: " + DONUT_SHOP + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(ALICE));
+ log.info(" object: " + ALICE + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(BOB));
+ log.info(" object: " + BOB + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(CHARLIE));
+ log.info(" object: " + CHARLIE + ", cardinality: " + cardinality);
+
+ // Do each of the Subject/Predicate pairs.
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(ALICE, WORKS_AT));
+ log.info(" subject/predicate: " + ALICE + "/" + WORKS_AT + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(ALICE, ADMIRES));
+ log.info(" subject/predicate: " + ALICE + "/" + ADMIRES + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(BOB, WORKS_AT));
+ log.info(" subject/predicate: " + BOB + "/" + WORKS_AT + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(CHARLIE, WORKS_AT));
+ log.info(" subject/predicate: " + CHARLIE + "/" + WORKS_AT + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(CHARLIE, LIVES_WITH));
+ log.info(" subject/predicate: " + CHARLIE + "/" + LIVES_WITH + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(BOB, LIVES_WITH));
+ log.info(" subject/predicate: " + BOB + "/" + LIVES_WITH + ", cardinality: " + cardinality);
+
+ // Do each of the Subject/Object pairs.
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(ALICE, BURGER_JOINT));
+ log.info(" subject/object: " + ALICE + "/" + BURGER_JOINT + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(ALICE, BOB));
+ log.info(" subject/object: " + ALICE + "/" + BOB + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(BOB, DONUT_SHOP));
+ log.info(" subject/object: " + ALICE + "/" + DONUT_SHOP + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(CHARLIE, DONUT_SHOP));
+ log.info(" subject/object: " + CHARLIE + "/" + DONUT_SHOP + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(CHARLIE, BOB));
+ log.info(" subject/object: " + CHARLIE + "/" + BOB + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(BOB, CHARLIE));
+ log.info(" subject/object: " + BOB + "/" + CHARLIE + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(BOB, ALICE));
+ log.info(" subject/object: " + BOB + "/" + ALICE + ", cardinality: " + cardinality);
+
+ // Do each of the Predicate/Object pairs.
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(WORKS_AT, BURGER_JOINT));
+ log.info(" predicate/object: " + WORKS_AT + "/" + BURGER_JOINT + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(ADMIRES, BOB));
+ log.info(" predicate/object: " + ADMIRES + "/" + BOB + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(WORKS_AT, DONUT_SHOP));
+ log.info(" predicate/object: " + WORKS_AT + "/" + DONUT_SHOP + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(LIVES_WITH, BOB));
+ log.info(" predicate/object: " + LIVES_WITH + "/" + BOB + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(LIVES_WITH, CHARLIE));
+ log.info(" predicate/object: " + LIVES_WITH + "/" + CHARLIE + ", cardinality: " + cardinality);
+ cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(LIVES_WITH, ALICE));
+ log.info(" predicate/object: " + LIVES_WITH + "/" + ALICE + ", cardinality: " + cardinality);
+ }
+
+ private static void setupLogging() {
+ // Turn off all the loggers and customize how they write to the console.
+ final Logger rootLogger = LogManager.getRootLogger();
+ rootLogger.setLevel(Level.OFF);
+ final ConsoleAppender ca = (ConsoleAppender) rootLogger.getAppender("stdout");
+ ca.setLayout(new PatternLayout("%-5p - %m%n"));
+
+ // Turn the logger used by the demo back on.
+ log.setLevel(Level.INFO);
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/33ef52cb/extras/indexingExample/src/main/resources/stats_cluster_config.xml
----------------------------------------------------------------------
diff --git a/extras/indexingExample/src/main/resources/stats_cluster_config.xml b/extras/indexingExample/src/main/resources/stats_cluster_config.xml
new file mode 100644
index 0000000..170a0c2
--- /dev/null
+++ b/extras/indexingExample/src/main/resources/stats_cluster_config.xml
@@ -0,0 +1,93 @@
+<?xml version="1.0"?>
+
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+ <!-- Accumulo info -->
+
+ <property>
+ <name>mock</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>instance</name>
+ <value>accumulo</value>
+ </property>
+
+ <!-- User info -->
+ <property>
+ <name>username</name>
+ <value>user</value>
+ </property>
+ <property>
+ <name>password</name>
+ <value>pass</value>
+ </property>
+
+ <!-- Rya info -->
+ <property>
+ <name>spo.table</name>
+ <value>rya_spo</value>
+ </property>
+ <property>
+ <name>prospects.table</name>
+ <value>rya_prospects</value>
+ </property>
+ <property>
+ <name>selectivity.table</name>
+ <value>rya_selectivity</value>
+ </property>
+ <property>
+ <name>auths</name>
+ <value>U</value>
+ </property>
+ <property>
+ <name>prospector.auths</name>
+ <value>U</value>
+ </property>
+
+ <property>
+ <name>prospector.intable</name>
+ <value>rya_spo</value>
+ </property>
+ <property>
+ <name>prospector.outtable</name>
+ <value>rya_prospects</value>
+ </property>
+
+ <property>
+ <name>inputpath</name>
+ <value>/tmp/RyaStats/JoinSelectStatisticsSumInput</value>
+ </property>
+ <property>
+ <name>outputpath</name>
+ <value>/tmp/RyaStats/JoinSelectStatisticsSumInput</value>
+ </property>
+ <property>
+ <name>prospects.outputpath</name>
+ <value>/tmp/RyaStats/ProspectsOutput</value>
+ </property>
+ <property>
+ <name>spo.outputpath</name>
+ <value>/tmp/RyaStats/SpoOutput</value>
+ </property>
+</configuration>