You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@rya.apache.org by pu...@apache.org on 2016/07/21 12:50:29 UTC
[4/6] incubator-rya git commit: Consolidated MapReduce API and
applications into toplevel project.
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/dao/accumulo.rya/src/test/java/mvm/rya/accumulo/mr/upgrade/UpgradeObjectSerializationTest.java
----------------------------------------------------------------------
diff --git a/dao/accumulo.rya/src/test/java/mvm/rya/accumulo/mr/upgrade/UpgradeObjectSerializationTest.java b/dao/accumulo.rya/src/test/java/mvm/rya/accumulo/mr/upgrade/UpgradeObjectSerializationTest.java
deleted file mode 100644
index b138292..0000000
--- a/dao/accumulo.rya/src/test/java/mvm/rya/accumulo/mr/upgrade/UpgradeObjectSerializationTest.java
+++ /dev/null
@@ -1,119 +0,0 @@
-package mvm.rya.accumulo.mr.upgrade;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import mvm.rya.api.resolver.impl.*;
-import org.junit.Test;
-
-import static mvm.rya.accumulo.mr.upgrade.Upgrade322Tool.UpgradeObjectSerialization;
-import static org.junit.Assert.*;
-
-public class UpgradeObjectSerializationTest {
-
- @Test
- public void testBooleanUpgrade() throws Exception {
- String object = "true";
- final UpgradeObjectSerialization upgradeObjectSerialization
- = new UpgradeObjectSerialization();
- final String upgrade = upgradeObjectSerialization
- .upgrade(object, BooleanRyaTypeResolver.BOOLEAN_LITERAL_MARKER);
-
- assertEquals("1", upgrade);
- }
-
- @Test
- public void testBooleanUpgradeFalse() throws Exception {
- String object = "false";
- final UpgradeObjectSerialization upgradeObjectSerialization
- = new UpgradeObjectSerialization();
- final String upgrade = upgradeObjectSerialization
- .upgrade(object, BooleanRyaTypeResolver.BOOLEAN_LITERAL_MARKER);
-
- assertEquals("0", upgrade);
- }
-
- @Test
- public void testByteUpgradeLowest() throws Exception {
- String object = "-127";
- final UpgradeObjectSerialization upgradeObjectSerialization
- = new UpgradeObjectSerialization();
- final String upgrade = upgradeObjectSerialization
- .upgrade(object, ByteRyaTypeResolver.LITERAL_MARKER);
-
- assertEquals("81", upgrade);
- }
-
- @Test
- public void testByteUpgradeHighest() throws Exception {
- String object = "127";
- final UpgradeObjectSerialization upgradeObjectSerialization
- = new UpgradeObjectSerialization();
- final String upgrade = upgradeObjectSerialization
- .upgrade(object, ByteRyaTypeResolver.LITERAL_MARKER);
-
- assertEquals("7f", upgrade);
- }
-
- @Test
- public void testLongUpgrade() throws Exception {
- String object = "00000000000000000010";
- final UpgradeObjectSerialization upgradeObjectSerialization
- = new UpgradeObjectSerialization();
- final String upgrade = upgradeObjectSerialization
- .upgrade(object, LongRyaTypeResolver.LONG_LITERAL_MARKER);
-
- assertEquals("800000000000000a", upgrade);
- }
-
- @Test
- public void testIntUpgrade() throws Exception {
- String object = "00000000010";
- final UpgradeObjectSerialization upgradeObjectSerialization
- = new UpgradeObjectSerialization();
- final String upgrade = upgradeObjectSerialization
- .upgrade(object, IntegerRyaTypeResolver.INTEGER_LITERAL_MARKER);
-
- assertEquals("8000000a", upgrade);
- }
-
- @Test
- public void testDateTimeUpgrade() throws Exception {
- String object = "9223370726404375807";
- final UpgradeObjectSerialization upgradeObjectSerialization
- = new UpgradeObjectSerialization();
- final String upgrade = upgradeObjectSerialization
- .upgrade(object, DateTimeRyaTypeResolver.DATETIME_LITERAL_MARKER);
-
- assertEquals("800001311cee3b00", upgrade);
- }
-
- @Test
- public void testDoubleUpgrade() throws Exception {
- String object = "00001 1.0";
- final UpgradeObjectSerialization upgradeObjectSerialization
- = new UpgradeObjectSerialization();
- final String upgrade = upgradeObjectSerialization
- .upgrade(object, DoubleRyaTypeResolver.DOUBLE_LITERAL_MARKER);
-
- assertEquals("c024000000000000", upgrade);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/dao/accumulo.rya/src/test/resources/namedgraphs.trig
----------------------------------------------------------------------
diff --git a/dao/accumulo.rya/src/test/resources/namedgraphs.trig b/dao/accumulo.rya/src/test/resources/namedgraphs.trig
deleted file mode 100644
index b647632..0000000
--- a/dao/accumulo.rya/src/test/resources/namedgraphs.trig
+++ /dev/null
@@ -1,7 +0,0 @@
-@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
-@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
-@prefix swp: <http://www.w3.org/2004/03/trix/swp-1/> .
-@prefix dc: <http://purl.org/dc/elements/1.1/> .
-@prefix ex: <http://www.example.org/vocabulary#> .
-@prefix : <http://www.example.org/exampleDocument#> .
-:G1 { :Monica ex:name "Monica Murphy" . }
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/dao/accumulo.rya/src/test/resources/test.ntriples
----------------------------------------------------------------------
diff --git a/dao/accumulo.rya/src/test/resources/test.ntriples b/dao/accumulo.rya/src/test/resources/test.ntriples
deleted file mode 100644
index 26a0a17..0000000
--- a/dao/accumulo.rya/src/test/resources/test.ntriples
+++ /dev/null
@@ -1 +0,0 @@
-<urn:lubm:rdfts#GraduateStudent01> <urn:lubm:rdfts#hasFriend> <urn:lubm:rdfts#GraduateStudent02> .
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullFreeTextIndexer.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullFreeTextIndexer.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullFreeTextIndexer.java
deleted file mode 100644
index 147050f..0000000
--- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullFreeTextIndexer.java
+++ /dev/null
@@ -1,102 +0,0 @@
-package mvm.rya.accumulo.mr;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import info.aduna.iteration.CloseableIteration;
-
-import java.io.IOException;
-import java.util.Set;
-
-import mvm.rya.accumulo.experimental.AbstractAccumuloIndexer;
-import mvm.rya.api.RdfCloudTripleStoreConfiguration;
-import mvm.rya.api.domain.RyaStatement;
-import mvm.rya.indexing.FreeTextIndexer;
-import mvm.rya.indexing.StatementConstraints;
-
-import org.apache.accumulo.core.client.Connector;
-import org.apache.hadoop.conf.Configuration;
-import org.openrdf.model.Statement;
-import org.openrdf.model.URI;
-import org.openrdf.query.QueryEvaluationException;
-
-public class NullFreeTextIndexer extends AbstractAccumuloIndexer implements FreeTextIndexer {
-
- @Override
- public String getTableName() {
- return null;
- }
-
- @Override
- public void storeStatement(RyaStatement statement) throws IOException {
- }
-
- @Override
- public Configuration getConf() {
- return null;
- }
-
- @Override
- public void setConf(Configuration arg0) {
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryText(String query, StatementConstraints contraints)
- throws IOException {
- return null;
- }
-
- @Override
- public Set<URI> getIndexablePredicates() {
- return null;
- }
-
- @Override
- public void init() {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void setConnector(Connector connector) {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void destroy() {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void purge(RdfCloudTripleStoreConfiguration configuration) {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void dropAndDestroy() {
- // TODO Auto-generated method stub
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullGeoIndexer.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullGeoIndexer.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullGeoIndexer.java
deleted file mode 100644
index fe26f6f..0000000
--- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullGeoIndexer.java
+++ /dev/null
@@ -1,153 +0,0 @@
-package mvm.rya.accumulo.mr;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import info.aduna.iteration.CloseableIteration;
-
-import java.io.IOException;
-import java.util.Set;
-
-import mvm.rya.accumulo.experimental.AbstractAccumuloIndexer;
-import mvm.rya.api.RdfCloudTripleStoreConfiguration;
-import mvm.rya.api.domain.RyaStatement;
-import mvm.rya.indexing.GeoIndexer;
-import mvm.rya.indexing.StatementConstraints;
-
-import org.apache.accumulo.core.client.Connector;
-import org.apache.hadoop.conf.Configuration;
-import org.openrdf.model.Statement;
-import org.openrdf.model.URI;
-import org.openrdf.query.QueryEvaluationException;
-
-import com.vividsolutions.jts.geom.Geometry;
-
-public class NullGeoIndexer extends AbstractAccumuloIndexer implements GeoIndexer {
-
- @Override
- public String getTableName() {
-
- return null;
- }
-
- @Override
- public void storeStatement(RyaStatement statement) throws IOException {
-
-
- }
-
- @Override
- public Configuration getConf() {
-
- return null;
- }
-
- @Override
- public void setConf(Configuration arg0) {
-
-
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryEquals(Geometry query, StatementConstraints contraints) {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryDisjoint(Geometry query, StatementConstraints contraints) {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryIntersects(Geometry query, StatementConstraints contraints) {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryTouches(Geometry query, StatementConstraints contraints) {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryCrosses(Geometry query, StatementConstraints contraints) {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryWithin(Geometry query, StatementConstraints contraints) {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryContains(Geometry query, StatementConstraints contraints) {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryOverlaps(Geometry query, StatementConstraints contraints) {
-
- return null;
- }
-
- @Override
- public Set<URI> getIndexablePredicates() {
-
- return null;
- }
-
- @Override
- public void init() {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void setConnector(Connector connector) {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void destroy() {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void purge(RdfCloudTripleStoreConfiguration configuration) {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void dropAndDestroy() {
- // TODO Auto-generated method stub
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullTemporalIndexer.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullTemporalIndexer.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullTemporalIndexer.java
deleted file mode 100644
index cbe36b8..0000000
--- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/NullTemporalIndexer.java
+++ /dev/null
@@ -1,186 +0,0 @@
-package mvm.rya.accumulo.mr;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-import info.aduna.iteration.CloseableIteration;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Set;
-
-import mvm.rya.accumulo.experimental.AbstractAccumuloIndexer;
-import mvm.rya.api.RdfCloudTripleStoreConfiguration;
-import mvm.rya.api.domain.RyaStatement;
-import mvm.rya.api.domain.RyaURI;
-import mvm.rya.indexing.StatementConstraints;
-import mvm.rya.indexing.TemporalIndexer;
-import mvm.rya.indexing.TemporalInstant;
-import mvm.rya.indexing.TemporalInterval;
-
-import org.apache.accumulo.core.client.Connector;
-import org.apache.hadoop.conf.Configuration;
-import org.openrdf.model.Statement;
-import org.openrdf.model.URI;
-import org.openrdf.query.QueryEvaluationException;
-
-/**
- * Temporal Indexer that does nothing, like when disabled.
- *
- */
-public class NullTemporalIndexer extends AbstractAccumuloIndexer implements TemporalIndexer {
-
- @Override
- public String getTableName() {
-
- return null;
- }
-
- @Override
- public void storeStatement(RyaStatement statement) throws IOException {
-
-
- }
-
- @Override
- public Configuration getConf() {
-
- return null;
- }
-
- @Override
- public void setConf(Configuration arg0) {
-
-
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryInstantEqualsInstant(TemporalInstant queryInstant,
- StatementConstraints contraints) throws QueryEvaluationException {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryInstantBeforeInstant(TemporalInstant queryInstant,
- StatementConstraints contraints) throws QueryEvaluationException {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryInstantAfterInstant(TemporalInstant queryInstant,
- StatementConstraints contraints) throws QueryEvaluationException {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryInstantBeforeInterval(TemporalInterval givenInterval,
- StatementConstraints contraints) throws QueryEvaluationException {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryInstantAfterInterval(TemporalInterval givenInterval,
- StatementConstraints contraints) throws QueryEvaluationException {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryInstantInsideInterval(TemporalInterval givenInterval,
- StatementConstraints contraints) throws QueryEvaluationException {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryInstantHasBeginningInterval(TemporalInterval queryInterval,
- StatementConstraints contraints) throws QueryEvaluationException {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryInstantHasEndInterval(TemporalInterval queryInterval,
- StatementConstraints contraints) throws QueryEvaluationException {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryIntervalEquals(TemporalInterval query,
- StatementConstraints contraints) throws QueryEvaluationException {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryIntervalBefore(TemporalInterval query,
- StatementConstraints contraints) throws QueryEvaluationException {
-
- return null;
- }
-
- @Override
- public CloseableIteration<Statement, QueryEvaluationException> queryIntervalAfter(TemporalInterval query, StatementConstraints contraints)
- throws QueryEvaluationException {
-
- return null;
- }
-
- @Override
- public Set<URI> getIndexablePredicates() {
-
- return null;
- }
-
- @Override
- public void init() {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void setConnector(Connector connector) {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void destroy() {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void purge(RdfCloudTripleStoreConfiguration configuration) {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void dropAndDestroy() {
- // TODO Auto-generated method stub
-
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/RyaOutputFormat.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/RyaOutputFormat.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/RyaOutputFormat.java
deleted file mode 100644
index 7e690f4..0000000
--- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/RyaOutputFormat.java
+++ /dev/null
@@ -1,329 +0,0 @@
-package mvm.rya.accumulo.mr;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import java.io.Closeable;
-import java.io.Flushable;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-
-import mvm.rya.accumulo.AccumuloRdfConfiguration;
-import mvm.rya.accumulo.AccumuloRyaDAO;
-import mvm.rya.accumulo.mr.utils.MRUtils;
-import mvm.rya.api.domain.RyaStatement;
-import mvm.rya.api.persist.RyaDAOException;
-import mvm.rya.api.resolver.RdfToRyaConversions;
-import mvm.rya.indexing.FreeTextIndexer;
-import mvm.rya.indexing.GeoIndexer;
-import mvm.rya.indexing.StatementSerializer;
-import mvm.rya.indexing.TemporalIndexer;
-import mvm.rya.indexing.accumulo.ConfigUtils;
-import mvm.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer;
-import mvm.rya.indexing.accumulo.geo.GeoMesaGeoIndexer;
-import mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer;
-
-import org.apache.accumulo.core.client.AccumuloException;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.TableExistsException;
-import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.log4j.Logger;
-import org.geotools.feature.SchemaException;
-import org.openrdf.model.Statement;
-
-/**
- * Hadoop Map/Reduce class to use Rya, the {@link GeoIndexer}, the {@link FreeTextIndexer}, and the {@link TemporalIndexer} as the sink of {@link Statement} data.
- * wrapped in an {@link StatementWritable} objects. This {@link OutputFormat} ignores the Keys and only writes the Values to Rya.
- *
- * The user must specify connection parameters for Rya, {@link GeoIndexer}, {@link FreeTextIndexer}, and {@link TemporalIndexer}.
- */
-public class RyaOutputFormat extends OutputFormat<Writable, StatementWritable> {
- private static final Logger logger = Logger.getLogger(RyaOutputFormat.class);
-
- private static final String PREFIX = RyaOutputFormat.class.getSimpleName();
- private static final String MAX_MUTATION_BUFFER_SIZE = PREFIX + ".maxmemory";
- private static final String ENABLE_FREETEXT = PREFIX + ".freetext.enable";
- private static final String ENABLE_GEO = PREFIX + ".geo.enable";
- private static final String ENABLE_TEMPORAL = PREFIX + ".temporal.enable";;
-
-
- @Override
- public void checkOutputSpecs(JobContext jobContext) throws IOException, InterruptedException {
- Configuration conf = jobContext.getConfiguration();
-
- // make sure that all of the indexers can connect
- getGeoIndexer(conf);
- getFreeTextIndexer(conf);
- getTemporalIndexer(conf);
- getRyaIndexer(conf);
- }
-
- @Override
- public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
- // copied from AccumuloOutputFormat
- return new NullOutputFormat<Text, Mutation>().getOutputCommitter(context);
- }
-
- @Override
- public RecordWriter<Writable, StatementWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
- return new RyaRecordWriter(context);
- }
-
- private static GeoIndexer getGeoIndexer(Configuration conf) throws IOException {
- if (!conf.getBoolean(ENABLE_GEO, true)) {
- return new NullGeoIndexer();
- }
-
- GeoMesaGeoIndexer geo = new GeoMesaGeoIndexer();
- geo.setConf(conf);
- return geo;
-
- }
-
- private static FreeTextIndexer getFreeTextIndexer(Configuration conf) throws IOException {
- if (!conf.getBoolean(ENABLE_FREETEXT, true)) {
- return new NullFreeTextIndexer();
- }
-
- AccumuloFreeTextIndexer freeText = new AccumuloFreeTextIndexer();
- freeText.setConf(conf);
- return freeText;
-
- }
-
- private static TemporalIndexer getTemporalIndexer(Configuration conf) throws IOException {
- if (!conf.getBoolean(ENABLE_TEMPORAL, true)) {
- return new NullTemporalIndexer();
- }
- AccumuloTemporalIndexer temporal = new AccumuloTemporalIndexer();
- temporal.setConf(conf);
- return temporal;
- }
-
- private static AccumuloRyaDAO getRyaIndexer(Configuration conf) throws IOException {
- try {
- AccumuloRyaDAO ryaIndexer = new AccumuloRyaDAO();
- Connector conn = ConfigUtils.getConnector(conf);
- ryaIndexer.setConnector(conn);
-
- AccumuloRdfConfiguration ryaConf = new AccumuloRdfConfiguration();
-
- String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null);
- if (tablePrefix != null) {
- ryaConf.setTablePrefix(tablePrefix);
- }
- ryaConf.setDisplayQueryPlan(false);
- ryaIndexer.setConf(ryaConf);
- ryaIndexer.init();
- return ryaIndexer;
- } catch (AccumuloException e) {
- logger.error("Cannot create RyaIndexer", e);
- throw new IOException(e);
- } catch (AccumuloSecurityException e) {
- logger.error("Cannot create RyaIndexer", e);
- throw new IOException(e);
- } catch (RyaDAOException e) {
- logger.error("Cannot create RyaIndexer", e);
- throw new IOException(e);
- }
- }
-
- public static class RyaRecordWriter extends RecordWriter<Writable, StatementWritable> implements Closeable, Flushable {
- private static final Logger logger = Logger.getLogger(RyaRecordWriter.class);
-
- private FreeTextIndexer freeTextIndexer;
- private GeoIndexer geoIndexer;
- private TemporalIndexer temporalIndexer;
- private AccumuloRyaDAO ryaIndexer;
-
- private static final long ONE_MEGABYTE = 1024L * 1024L;
- private static final long AVE_STATEMENT_SIZE = 100L;
-
- private long bufferSizeLimit;
- private long bufferCurrentSize = 0;
-
- private ArrayList<RyaStatement> buffer;
-
- public RyaRecordWriter(TaskAttemptContext context) throws IOException {
- this(context.getConfiguration());
- }
-
- public RyaRecordWriter(Configuration conf) throws IOException {
- // set up the buffer
- bufferSizeLimit = conf.getLong(MAX_MUTATION_BUFFER_SIZE, ONE_MEGABYTE);
- int bufferCapacity = (int) (bufferSizeLimit / AVE_STATEMENT_SIZE);
- buffer = new ArrayList<RyaStatement>(bufferCapacity);
-
- // set up the indexers
- freeTextIndexer = getFreeTextIndexer(conf);
- geoIndexer = getGeoIndexer(conf);
- temporalIndexer = getTemporalIndexer(conf);
- ryaIndexer = getRyaIndexer(conf);
-
- // update fields used for metrics
- startTime = System.currentTimeMillis();
- lastCommitFinishTime = startTime;
- }
-
- @Override
- public void flush() throws IOException {
- flushBuffer();
- }
-
- @Override
- public void close() throws IOException {
- close(null);
- }
-
- @Override
- public void close(TaskAttemptContext paramTaskAttemptContext) throws IOException {
- // close everything. log errors
- try {
- flush();
- } catch (IOException e) {
- logger.error("Error flushing the buffer on RyaOutputFormat Close", e);
- }
- try {
- if (geoIndexer != null)
- geoIndexer.close();
- } catch (IOException e) {
- logger.error("Error closing the geoIndexer on RyaOutputFormat Close", e);
- }
- try {
- if (freeTextIndexer != null)
- freeTextIndexer.close();
- } catch (IOException e) {
- logger.error("Error closing the freetextIndexer on RyaOutputFormat Close", e);
- }
- try {
- if (temporalIndexer != null)
- temporalIndexer.close();
- } catch (IOException e) {
- logger.error("Error closing the temporalIndexer on RyaOutputFormat Close", e);
- }
- try {
- ryaIndexer.destroy();
- } catch (RyaDAOException e) {
- logger.error("Error closing RyaDAO on RyaOutputFormat Close", e);
- }
- }
-
- public void write(Statement statement) throws IOException, InterruptedException {
- write(null, new StatementWritable(statement));
- }
-
- @Override
- public void write(Writable key, StatementWritable value) throws IOException, InterruptedException {
- buffer.add(RdfToRyaConversions.convertStatement(value));
-
- bufferCurrentSize += StatementSerializer.writeStatement(value).length();
-
- if (bufferCurrentSize >= bufferSizeLimit) {
- flushBuffer();
- }
- }
-
- // fields for storing metrics
- private long startTime = 0;
- private long lastCommitFinishTime = 0;
- private long totalCommitRecords = 0;
-
- private double totalReadDuration = 0;
- private double totalWriteDuration = 0;
-
- private long commitCount = 0;
-
- private void flushBuffer() throws IOException {
- totalCommitRecords += buffer.size();
- commitCount++;
-
- long startCommitTime = System.currentTimeMillis();
-
- logger.info(String.format("(C-%d) Flushing buffer with %,d objects and %,d bytes", commitCount, buffer.size(),
- bufferCurrentSize));
-
- double readingDuration = (startCommitTime - lastCommitFinishTime) / 1000.;
- totalReadDuration += readingDuration;
- double currentReadRate = buffer.size() / readingDuration;
- double totalReadRate = totalCommitRecords / totalReadDuration;
-
- // Print "reading" metrics
- logger.info(String.format("(C-%d) (Reading) Duration, Current Rate, Total Rate: %.2f %.2f %.2f ", commitCount, readingDuration,
- currentReadRate, totalReadRate));
-
- // write to geo
- geoIndexer.storeStatements(buffer);
- geoIndexer.flush();
-
- // write to free text
- freeTextIndexer.storeStatements(buffer);
- freeTextIndexer.flush();
-
- // write to temporal
- temporalIndexer.storeStatements(buffer);
- temporalIndexer.flush();
-
- // write to rya
- try {
- ryaIndexer.add(buffer.iterator());
- } catch (RyaDAOException e) {
- logger.error("Cannot writing statement to Rya", e);
- throw new IOException(e);
- }
-
- lastCommitFinishTime = System.currentTimeMillis();
-
- double writingDuration = (lastCommitFinishTime - startCommitTime) / 1000.;
- totalWriteDuration += writingDuration;
- double currentWriteRate = buffer.size() / writingDuration;
- double totalWriteRate = totalCommitRecords / totalWriteDuration;
-
- // Print "writing" stats
- logger.info(String.format("(C-%d) (Writing) Duration, Current Rate, Total Rate: %.2f %.2f %.2f ", commitCount, writingDuration,
- currentWriteRate, totalWriteRate));
-
- double processDuration = writingDuration + readingDuration;
- double totalProcessDuration = totalWriteDuration + totalReadDuration;
- double currentProcessRate = buffer.size() / processDuration;
- double totalProcessRate = totalCommitRecords / (totalProcessDuration);
-
- // Print "total" stats
- logger.info(String.format("(C-%d) (Total) Duration, Current Rate, Total Rate: %.2f %.2f %.2f ", commitCount, processDuration,
- currentProcessRate, totalProcessRate));
-
- // clear the buffer
- buffer.clear();
- bufferCurrentSize = 0L;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/StatementWritable.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/StatementWritable.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/StatementWritable.java
deleted file mode 100644
index 629baf2..0000000
--- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/StatementWritable.java
+++ /dev/null
@@ -1,86 +0,0 @@
-package mvm.rya.accumulo.mr;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.Writable;
-import org.openrdf.model.Resource;
-import org.openrdf.model.Statement;
-import org.openrdf.model.URI;
-import org.openrdf.model.Value;
-
-import mvm.rya.indexing.StatementSerializer;
-
-/**
- * A {@link Writable} wrapper for {@link Statement} objects.
- */
-@SuppressWarnings("serial")
-public class StatementWritable implements Statement, Writable {
-
- private Statement statement;
-
- public StatementWritable(Statement statement) {
- setStatement(statement);
- }
-
- public void setStatement(Statement statement) {
- this.statement = statement;
- }
-
- public Statement getStatement() {
- return statement;
- }
-
- @Override
- public void readFields(DataInput paramDataInput) throws IOException {
- statement = StatementSerializer.readStatement(paramDataInput.readUTF());
- }
-
- @Override
- public void write(DataOutput paramDataOutput) throws IOException {
- paramDataOutput.writeUTF(StatementSerializer.writeStatement(statement));
- }
-
- @Override
- public Resource getSubject() {
- return statement.getSubject();
- }
-
- @Override
- public URI getPredicate() {
- return statement.getPredicate();
- }
-
- @Override
- public Value getObject() {
- return statement.getObject();
- }
-
- @Override
- public Resource getContext() {
- return statement.getContext();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/BulkNtripsInputToolIndexing.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/BulkNtripsInputToolIndexing.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/BulkNtripsInputToolIndexing.java
deleted file mode 100644
index ecc2354..0000000
--- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/BulkNtripsInputToolIndexing.java
+++ /dev/null
@@ -1,227 +0,0 @@
-package mvm.rya.accumulo.mr.fileinput;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import java.io.IOException;
-import java.io.StringReader;
-
-import mvm.rya.accumulo.AccumuloRdfConfiguration;
-import mvm.rya.accumulo.mr.utils.MRUtils;
-import mvm.rya.api.resolver.RdfToRyaConversions;
-import mvm.rya.indexing.FreeTextIndexer;
-import mvm.rya.indexing.GeoIndexer;
-import mvm.rya.indexing.accumulo.ConfigUtils;
-import mvm.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer;
-import mvm.rya.indexing.accumulo.geo.GeoMesaGeoIndexer;
-
-import org.apache.accumulo.core.client.AccumuloException;
-import org.apache.accumulo.core.client.AccumuloSecurityException;
-import org.apache.accumulo.core.client.TableExistsException;
-import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.log4j.Logger;
-import org.geotools.feature.SchemaException;
-import org.openrdf.model.Resource;
-import org.openrdf.model.Statement;
-import org.openrdf.model.ValueFactory;
-import org.openrdf.model.impl.ContextStatementImpl;
-import org.openrdf.model.impl.ValueFactoryImpl;
-import org.openrdf.rio.ParserConfig;
-import org.openrdf.rio.RDFFormat;
-import org.openrdf.rio.RDFHandlerException;
-import org.openrdf.rio.RDFParseException;
-import org.openrdf.rio.RDFParser;
-import org.openrdf.rio.Rio;
-import org.openrdf.rio.helpers.RDFHandlerBase;
-
-import com.google.common.base.Preconditions;
-
-/**
- * Take large ntrips files and use MapReduce to ingest into other indexing
- */
-public class BulkNtripsInputToolIndexing extends Configured implements Tool {
-
- private String userName = null;
- private String pwd = null;
- private String instance = null;
- private String zk = null;
-
- private String format = RDFFormat.NTRIPLES.getName();
-
- @Override
- public int run(final String[] args) throws Exception {
- final Configuration conf = getConf();
- // conf
- zk = conf.get(MRUtils.AC_ZK_PROP, zk);
- instance = conf.get(MRUtils.AC_INSTANCE_PROP, instance);
- userName = conf.get(MRUtils.AC_USERNAME_PROP, userName);
- pwd = conf.get(MRUtils.AC_PWD_PROP, pwd);
- format = conf.get(MRUtils.FORMAT_PROP, format);
-
- String auths = conf.get(MRUtils.AC_CV_PROP, "");
-
- conf.set(MRUtils.FORMAT_PROP, format);
- Preconditions.checkNotNull(zk, MRUtils.AC_ZK_PROP + " not set");
- Preconditions.checkNotNull(instance, MRUtils.AC_INSTANCE_PROP + " not set");
- Preconditions.checkNotNull(userName, MRUtils.AC_USERNAME_PROP + " not set");
- Preconditions.checkNotNull(pwd, MRUtils.AC_PWD_PROP + " not set");
-
- // map the config values to free text configu values
- conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk);
- conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance);
- conf.set(ConfigUtils.CLOUDBASE_USER, userName);
- conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd);
- conf.set(ConfigUtils.CLOUDBASE_AUTHS, auths);
-
- final String inputDir = args[0];
-
- String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null);
- Preconditions.checkNotNull(tablePrefix, MRUtils.TABLE_PREFIX_PROPERTY + " not set");
-
- String docTextTable = tablePrefix + "text";
- conf.set(ConfigUtils.FREE_TEXT_DOC_TABLENAME, docTextTable);
-
- String docTermTable = tablePrefix + "terms";
- conf.set(ConfigUtils.FREE_TEXT_TERM_TABLENAME, docTermTable);
-
- String geoTable = tablePrefix + "geo";
- conf.set(ConfigUtils.GEO_TABLENAME, geoTable);
-
- System.out.println("Loading data into tables[freetext, geo]");
- System.out.println("Loading data into tables[" + docTermTable + " " + docTextTable + " " + geoTable + "]");
-
- Job job = new Job(new Configuration(conf), "Bulk Ingest load data into Indexing Tables");
- job.setJarByClass(this.getClass());
-
- // setting long job
- Configuration jobConf = job.getConfiguration();
- jobConf.setBoolean("mapred.map.tasks.speculative.execution", false);
- jobConf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
- jobConf.set("io.sort.mb", jobConf.get("io.sort.mb", "256"));
- jobConf.setBoolean("mapred.compress.map.output", true);
-
- job.setInputFormatClass(TextInputFormat.class);
-
- job.setMapperClass(ParseNtripsMapper.class);
-
- // I'm not actually going to write output.
- job.setOutputFormatClass(NullOutputFormat.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Text.class);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Text.class);
-
- TextInputFormat.setInputPaths(job, new Path(inputDir));
-
- job.setNumReduceTasks(0);
-
- job.waitForCompletion(true);
-
- return 0;
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new BulkNtripsInputToolIndexing(), args);
- }
-
- public static class ParseNtripsMapper extends Mapper<LongWritable, Text, Text, Text> {
- private static final Logger logger = Logger.getLogger(ParseNtripsMapper.class);
-
- public static final String TABLE_PROPERTY = "parsentripsmapper.table";
-
- private RDFParser parser;
- private FreeTextIndexer freeTextIndexer;
- private GeoIndexer geoIndexer;
- private String rdfFormat;
-
- @Override
- protected void setup(final Context context) throws IOException, InterruptedException {
- super.setup(context);
- Configuration conf = context.getConfiguration();
-
- freeTextIndexer = new AccumuloFreeTextIndexer();
- freeTextIndexer.setConf(conf);
- geoIndexer = new GeoMesaGeoIndexer();
- geoIndexer.setConf(conf);
- final ValueFactory vf = new ValueFactoryImpl();
-
- rdfFormat = conf.get(MRUtils.FORMAT_PROP);
- checkNotNull(rdfFormat, "Rdf format cannot be null");
-
- String namedGraphString = conf.get(MRUtils.NAMED_GRAPH_PROP);
- checkNotNull(namedGraphString, MRUtils.NAMED_GRAPH_PROP + " cannot be null");
-
- final Resource namedGraph = vf.createURI(namedGraphString);
-
- parser = Rio.createParser(RDFFormat.valueOf(rdfFormat));
- parser.setParserConfig(new ParserConfig(true, true, true, RDFParser.DatatypeHandling.VERIFY));
- parser.setRDFHandler(new RDFHandlerBase() {
-
- @Override
- public void handleStatement(Statement statement) throws RDFHandlerException {
- Statement contextStatement = new ContextStatementImpl(statement.getSubject(), statement
- .getPredicate(), statement.getObject(), namedGraph);
- try {
- freeTextIndexer.storeStatement(RdfToRyaConversions.convertStatement(contextStatement));
- geoIndexer.storeStatement(RdfToRyaConversions.convertStatement(contextStatement));
- } catch (IOException e) {
- logger.error("Error creating indexers", e);
- }
- }
- });
- }
-
- @Override
- public void map(LongWritable key, Text value, Context output) throws IOException, InterruptedException {
- String rdf = value.toString();
- try {
- parser.parse(new StringReader(rdf), "");
- } catch (RDFParseException e) {
- System.out.println("Line[" + rdf + "] cannot be formatted with format[" + rdfFormat + "]. Exception[" + e.getMessage()
- + "]");
- } catch (Exception e) {
- logger.error("error during map", e);
- throw new IOException("Exception occurred parsing triple[" + rdf + "]");
- }
- }
-
- @Override
- public void cleanup(Context context) {
- IOUtils.closeStream(freeTextIndexer);
- IOUtils.closeStream(geoIndexer);
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/RyaBatchWriterInputTool.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/RyaBatchWriterInputTool.java b/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/RyaBatchWriterInputTool.java
deleted file mode 100644
index fb80804..0000000
--- a/extras/indexing/src/main/java/mvm/rya/accumulo/mr/fileinput/RyaBatchWriterInputTool.java
+++ /dev/null
@@ -1,243 +0,0 @@
-package mvm.rya.accumulo.mr.fileinput;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import java.io.IOException;
-import java.io.StringReader;
-
-import mvm.rya.accumulo.mr.RyaOutputFormat;
-import mvm.rya.accumulo.mr.StatementWritable;
-import mvm.rya.accumulo.mr.utils.MRUtils;
-import mvm.rya.indexing.accumulo.ConfigUtils;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.log4j.Logger;
-import org.openrdf.model.Resource;
-import org.openrdf.model.Statement;
-import org.openrdf.model.URI;
-import org.openrdf.model.Value;
-import org.openrdf.model.ValueFactory;
-import org.openrdf.model.impl.ValueFactoryImpl;
-import org.openrdf.rio.ParserConfig;
-import org.openrdf.rio.RDFFormat;
-import org.openrdf.rio.RDFHandlerException;
-import org.openrdf.rio.RDFParseException;
-import org.openrdf.rio.RDFParser;
-import org.openrdf.rio.Rio;
-import org.openrdf.rio.helpers.RDFHandlerBase;
-
-import com.google.common.base.Preconditions;
-
-/**
- * Take large ntrips files and use MapReduce to ingest into other indexing
- */
-public class RyaBatchWriterInputTool extends Configured implements Tool {
- private static final Logger logger = Logger.getLogger(RyaBatchWriterInputTool.class);
-
- @Override
- public int run(final String[] args) throws Exception {
- String userName = null;
- String pwd = null;
- String instance = null;
- String zk = null;
- String format = null;
-
- final Configuration conf = getConf();
- // conf
- zk = conf.get(MRUtils.AC_ZK_PROP, zk);
- instance = conf.get(MRUtils.AC_INSTANCE_PROP, instance);
- userName = conf.get(MRUtils.AC_USERNAME_PROP, userName);
- pwd = conf.get(MRUtils.AC_PWD_PROP, pwd);
- format = conf.get(MRUtils.FORMAT_PROP, RDFFormat.NTRIPLES.getName());
-
- String auths = conf.get(MRUtils.AC_CV_PROP, "");
-
- conf.set(MRUtils.FORMAT_PROP, format);
- Preconditions.checkNotNull(zk, MRUtils.AC_ZK_PROP + " not set");
- Preconditions.checkNotNull(instance, MRUtils.AC_INSTANCE_PROP + " not set");
- Preconditions.checkNotNull(userName, MRUtils.AC_USERNAME_PROP + " not set");
- Preconditions.checkNotNull(pwd, MRUtils.AC_PWD_PROP + " not set");
-
- // map the config values to free text configure values
- conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk);
- conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance);
- conf.set(ConfigUtils.CLOUDBASE_USER, userName);
- conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd);
- conf.set(ConfigUtils.CLOUDBASE_AUTHS, auths);
-
- final String inputDir = args[0];
-
- String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null);
- Preconditions.checkNotNull(tablePrefix, MRUtils.TABLE_PREFIX_PROPERTY + " not set");
-
- String docTextTable = tablePrefix + "text";
- conf.set(ConfigUtils.FREE_TEXT_DOC_TABLENAME, docTextTable);
-
- String docTermTable = tablePrefix + "terms";
- conf.set(ConfigUtils.FREE_TEXT_TERM_TABLENAME, docTermTable);
-
- String geoTable = tablePrefix + "geo";
- conf.set(ConfigUtils.GEO_TABLENAME, geoTable);
-
- logger.info("Loading data into tables[rya, freetext, geo]");
- logger.info("Loading data into tables[" + docTermTable + " " + docTextTable + " " + geoTable + "]");
-
- Job job = new Job(new Configuration(conf), "Batch Writer load data into Rya Core and Indexing Tables");
- job.setJarByClass(this.getClass());
-
- // setting long job
- Configuration jobConf = job.getConfiguration();
- jobConf.setBoolean("mapred.map.tasks.speculative.execution", false);
-
- jobConf.setInt("mapred.task.timeout", 1000 * 60 * 60 * 24); // timeout after 1 day
-
- job.setInputFormatClass(TextInputFormat.class);
-
- job.setMapperClass(ParseNtripsMapper.class);
-
- job.setNumReduceTasks(0);
-
- // Use Rya Output Format
- job.setOutputFormatClass(RyaOutputFormat.class);
- job.setOutputKeyClass(NullWritable.class);
- job.setOutputValueClass(StatementWritable.class);
- job.setMapOutputKeyClass(NullWritable.class);
- job.setMapOutputValueClass(StatementWritable.class);
-
- TextInputFormat.setInputPaths(job, new Path(inputDir));
-
- job.waitForCompletion(true);
-
- return 0;
- }
-
- public static void main(String[] args) throws Exception {
- ToolRunner.run(new Configuration(), new RyaBatchWriterInputTool(), args);
- }
-
- public static class ParseNtripsMapper extends Mapper<LongWritable, Text, Writable, Statement> {
- private static final Logger logger = Logger.getLogger(ParseNtripsMapper.class);
-
- private RDFParser parser;
- private RDFFormat rdfFormat;
-
- @Override
- protected void setup(final Context context) throws IOException, InterruptedException {
- super.setup(context);
- Configuration conf = context.getConfiguration();
-
- final ValueFactory vf = new ValueFactoryImpl();
-
- String rdfFormatName = conf.get(MRUtils.FORMAT_PROP);
- checkNotNull(rdfFormatName, "Rdf format cannot be null");
- rdfFormat = RDFFormat.valueOf(rdfFormatName);
-
- String namedGraphString = conf.get(MRUtils.NAMED_GRAPH_PROP);
- checkNotNull(namedGraphString, MRUtils.NAMED_GRAPH_PROP + " cannot be null");
-
- final Resource namedGraph = vf.createURI(namedGraphString);
-
- parser = Rio.createParser(rdfFormat);
- parser.setParserConfig(new ParserConfig(true, true, true, RDFParser.DatatypeHandling.VERIFY));
- parser.setRDFHandler(new RDFHandlerBase() {
- @Override
- public void handleStatement(Statement statement) throws RDFHandlerException {
- Statement output;
- if (rdfFormat.equals(RDFFormat.NTRIPLES)) {
- output = new ConextStatementWrapper(statement, namedGraph);
- } else {
- output = statement;
- }
- try {
- context.write(NullWritable.get(), new StatementWritable(output));
- } catch (IOException e) {
- logger.error("Error writing statement", e);
- throw new RDFHandlerException(e);
- } catch (InterruptedException e) {
- logger.error("Error writing statement", e);
- throw new RDFHandlerException(e);
- }
- }
-
- });
- }
-
- @Override
- public void map(LongWritable key, Text value, Context output) throws IOException, InterruptedException {
- String rdf = value.toString();
- try {
- parser.parse(new StringReader(rdf), "");
- } catch (RDFParseException e) {
- logger.error("Line[" + rdf + "] cannot be formatted with format[" + rdfFormat + "]. Exception[" + e.getMessage()
- + "]", e);
- } catch (Exception e) {
- logger.error("error during map", e);
- throw new IOException("Exception occurred parsing triple[" + rdf + "]", e);
- }
- }
- }
-
- @SuppressWarnings("serial")
- private static class ConextStatementWrapper implements Statement {
- private Statement statementWithoutConext;
- private Resource context;
-
- public ConextStatementWrapper(Statement statementWithoutConext, Resource context) {
- this.statementWithoutConext = statementWithoutConext;
- this.context = context;
- }
-
- @Override
- public Resource getSubject() {
- return statementWithoutConext.getSubject();
- }
-
- @Override
- public URI getPredicate() {
- return statementWithoutConext.getPredicate();
- }
-
- @Override
- public Value getObject() {
- return statementWithoutConext.getObject();
- }
-
- @Override
- public Resource getContext() {
- return context;
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.manual/src/site/markdown/_index.md
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/site/markdown/_index.md b/extras/rya.manual/src/site/markdown/_index.md
index bf030a3..6e45779 100644
--- a/extras/rya.manual/src/site/markdown/_index.md
+++ b/extras/rya.manual/src/site/markdown/_index.md
@@ -28,6 +28,7 @@
- [Evaluation Table](eval.md)
- [Pre-computed Joins](loadPrecomputedJoin.md)
- [Inferencing](infer.md)
+- [MapReduce Interface](mapreduce.md)
# Samples
- [Typical First Steps](sm-firststeps.md)
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.manual/src/site/markdown/index.md
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/site/markdown/index.md b/extras/rya.manual/src/site/markdown/index.md
index 0748284..2840d10 100644
--- a/extras/rya.manual/src/site/markdown/index.md
+++ b/extras/rya.manual/src/site/markdown/index.md
@@ -30,6 +30,7 @@ This project contains documentation about the Rya, a scalable RDF triple store o
- [Evaluation Table](eval.md)
- [Pre-computed Joins](loadPrecomputedJoin.md)
- [Inferencing](infer.md)
+- [MapReduce Interface](mapreduce.md)
# Samples
- [Typical First Steps](sm-firststeps.md)
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.manual/src/site/markdown/loaddata.md
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/site/markdown/loaddata.md b/extras/rya.manual/src/site/markdown/loaddata.md
index 2c6bc00..74fb90b 100644
--- a/extras/rya.manual/src/site/markdown/loaddata.md
+++ b/extras/rya.manual/src/site/markdown/loaddata.md
@@ -96,10 +96,10 @@ Bulk loading data is done through Map Reduce jobs
### Bulk Load RDF data
-This Map Reduce job will read a full file into memory and parse it into statements. The statements are saved into the store. Here is an example for storing in Accumulo:
+This Map Reduce job will read files into memory and parse them into statements. The statements are saved into the store. Here is an example for storing in Accumulo:
```
-hadoop jar target/accumulo.rya-3.0.4-SNAPSHOT-shaded.jar mvm.rya.accumulo.mr.fileinput.BulkNtripsInputTool -Dac.zk=localhost:2181 -Dac.instance=accumulo -Dac.username=root -Dac.pwd=secret -Drdf.tablePrefix=triplestore_ -Dio.sort.mb=64 /tmp/temp.ntrips
+hadoop jar target/rya.mapreduce-3.2.10-SNAPSHOT-shaded.jar mvm.rya.accumulo.mr.RdfFileInputTool -Dac.zk=localhost:2181 -Dac.instance=accumulo -Dac.username=root -Dac.pwd=secret -Drdf.tablePrefix=triplestore_ -Drdf.format=N-Triples /tmp/temp.ntrips
```
Options:
@@ -107,9 +107,14 @@ Options:
- rdf.tablePrefix : The tables (spo, po, osp) are prefixed with this qualifier. The tables become: (rdf.tablePrefix)spo,(rdf.tablePrefix)po,(rdf.tablePrefix)osp
- ac.* : Accumulo connection parameters
- rdf.format : See RDFFormat from openrdf, samples include (Trig, N-Triples, RDF/XML)
-- io.sort.mb : Higher the value, the faster the job goes. Just remember that you will need this much ram at least per mapper
+- sc.use_freetext, sc.use_geo, sc.use_temporal, sc.use_entity : If any of these are set to true, statements will also be
+ added to the enabled secondary indices.
+- sc.freetext.predicates, sc.geo.predicates, sc.temporal.predicates: If the associated indexer is enabled, these options specify
+ which statements should be sent to that indexer (based on the predicate). If not given, all indexers will attempt to index
+ all statements.
-The argument is the directory/file to load. This file needs to be loaded into HDFS before running.
+The argument is the directory/file to load. This file needs to be loaded into HDFS before running. If loading a directory, all files should have the same RDF
+format.
## Direct OpenRDF API
@@ -139,4 +144,4 @@ conn.commit();
conn.close();
myRepository.shutDown();
-```
\ No newline at end of file
+```
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.manual/src/site/markdown/mapreduce.md
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/site/markdown/mapreduce.md b/extras/rya.manual/src/site/markdown/mapreduce.md
new file mode 100644
index 0000000..fde2231
--- /dev/null
+++ b/extras/rya.manual/src/site/markdown/mapreduce.md
@@ -0,0 +1,107 @@
+<!--
+
+[comment]: # Licensed to the Apache Software Foundation (ASF) under one
+[comment]: # or more contributor license agreements. See the NOTICE file
+[comment]: # distributed with this work for additional information
+[comment]: # regarding copyright ownership. The ASF licenses this file
+[comment]: # to you under the Apache License, Version 2.0 (the
+[comment]: # "License"); you may not use this file except in compliance
+[comment]: # with the License. You may obtain a copy of the License at
+[comment]: #
+[comment]: # http://www.apache.org/licenses/LICENSE-2.0
+[comment]: #
+[comment]: # Unless required by applicable law or agreed to in writing,
+[comment]: # software distributed under the License is distributed on an
+[comment]: # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+[comment]: # KIND, either express or implied. See the License for the
+[comment]: # specific language governing permissions and limitations
+[comment]: # under the License.
+
+-->
+# MapReduce Interface
+
+The rya.mapreduce project contains a set of classes facilitating the use of
+Accumulo-backed Rya as the input source or output destination of Hadoop
+MapReduce jobs.
+
+## Writable
+
+*RyaStatementWritable* wraps a statement in a WritableComparable object, so
+triples can be used as keys or values in MapReduce tasks. Statements are
+considered equal if they contain equivalent triples and equivalent Accumulo
+metadata (visibility, timestamp, etc.).
+
+## Statement Input
+
+Input formats are provided for reading triple data from Rya or from RDF files:
+
+- *RdfFileInputFormat* will read and parse RDF files of any format. Format must
+ be explicitly specified. Reading and parsing is done asynchronously, enabling
+ large input files depending on how much information the openrdf parser itself
+ needs to hold in memory in order to parse the file. (For example, large
+ N-Triples files can be handled easily, but large XML files might require you
+ to allocate more memory for the Map task.) Handles multiple files if given a
+ directory as input, as long as all files are the specified format. Files will
+ only be split if the format is set to N-Triples or N-Quads; otherwise, the
+ number of input files will be the number of splits. Output pairs are
+ `<LongWritable, RyaStatementWritable>`, where the former is the number of the
+ statement within the input split and the latter is the statement itself.
+
+- *RyaInputFormat* will read statements directly from a Rya table in Accumulo.
+ Extends Accumulo's AbstractInputFormat and uses that class's configuration
+ methods to configure the connection to Accumulo. The table scanned should be
+ one of the Rya core tables (spo, po, or osp), and whichever is used should be
+ specified using `RyaInputFormat.setTableLayout`, so the input format can
+ deserialize the statements correctly. Choice of table may influence
+ parallelization if the tables are split differently in Accumulo. (The number
+ of splits in Accumulo will be the number of input splits in Hadoop and
+ therefore the number of Mappers.) Output pairs are
+ `<Text, RyaStatementWritable>`, where the former is the Accumulo row ID and
+ the latter is the statement itself.
+
+## Statement Output
+
+An output format is provided for writing triple data to Rya:
+
+- *RyaOutputFormat* will insert statements into the Rya core tables and/or any
+ configured secondary indexers. Configuration options include:
+ * Table prefix: identifies Rya instance
+ * Default visibility: any statement without a visibility set will be written
+ with this visibility
+ * Default context: any statement without a context (named graph) set will be
+ written with this context
+ * Enable freetext index, geo index, temporal index, entity index, and core
+ tables: separate options for configuring exactly which indexers to use.
+ If using secondary indexers, consider providing configuration variables
+ "sc.freetext.predicates", "sc.geo.predicates", and "sc.temporal.predicates"
+ as appropriate; otherwise each indexer will attempt to index every
+ statement.
+ Expects input pairs `<Writable, RyaStatementWritable>`. Keys are ignored and
+ values are written to Rya.
+
+## Configuration
+
+*MRUtils* defines constant configuration parameter names used for passing
+Accumulo connection information, Rya prefix and table layout, RDF format,
+etc., as well as some convenience methods for getting and setting these
+values with respect to a given Configuration.
+
+## Base Tool
+
+*AbstractAccumuloMRTool* can be used as a base class for Rya MapReduce Tools
+using the ToolRunner API. It extracts necessary parameters from the
+configuration and provides methods for setting input and/or output formats and
+configuring them accordingly. To use, extend this class and implement `run`.
+In the run method, call `init` to extract and validate configuration values from
+the Hadoop Configuration. Then use `setup*(Input/Output)` methods as needed to
+configure input and output for MapReduce jobs using the stored parameters.
+(Input and output formats can then be configured directly, if necessary.)
+
+Expects parameters to be specified in the configuration using the names defined
+in MRUtils, or for secondary indexers, the names in
+`mvm.rya.indexing.accumulo.ConfigUtils`.
+
+## Examples
+
+See the `examples` subpackage for examples of how to use the interface, and the
+`tools` subpackage for some individual MapReduce applications.
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.manual/src/site/markdown/sm-firststeps.md
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/site/markdown/sm-firststeps.md b/extras/rya.manual/src/site/markdown/sm-firststeps.md
index 34f995b..431292a 100644
--- a/extras/rya.manual/src/site/markdown/sm-firststeps.md
+++ b/extras/rya.manual/src/site/markdown/sm-firststeps.md
@@ -42,11 +42,11 @@ See the [Build From Source Section](build-source.md) to get the appropriate arti
I find that the best way to load the data is through the Bulk Load Map Reduce job.
* Save the RDF Data above onto HDFS. From now on we will refer to this location as `<RDF_HDFS_LOCATION>`
-* Move the `accumulo.rya-<version>-job.jar` onto the hadoop cluster
+* Move the `rya.mapreduce-<version>-job.jar` onto the hadoop cluster
* Bulk load the data. Here is a sample command line:
```
-hadoop jar ../accumulo.rya-2.0.0-SNAPSHOT-job.jar BulkNtripsInputTool -Drdf.tablePrefix=lubm_ -Dcb.username=user -Dcb.pwd=cbpwd -Dcb.instance=instance -Dcb.zk=zookeeperLocation -Drdf.format=N-Triples <RDF_HDFS_LOCATION>
+hadoop jar ../rya.mapreduce-3.2.10-SNAPSHOT-job.jar mvm.rya.accumulo.mr.RdfFileInputTool -Drdf.tablePrefix=lubm_ -Dcb.username=user -Dcb.pwd=cbpwd -Dcb.instance=instance -Dcb.zk=zookeeperLocation -Drdf.format=N-Triples <RDF_HDFS_LOCATION>
```
Once the data is loaded, it is actually a good practice to compact your tables. You can do this by opening the accumulo shell `shell` and running the `compact` command on the generated tables. Remember the generated tables will be prefixed by the `rdf.tablePrefix` property you assigned above. The default tablePrefix is `rts`.
@@ -77,4 +77,4 @@ This page provides a very simple text box for running queries against the store
Remember to update the connection information in the WAR: `WEB-INF/spring/spring-accumulo.xml`
-See the [Query data section](querydata.md) for more information.
\ No newline at end of file
+See the [Query data section](querydata.md) for more information.
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.manual/src/site/site.xml
----------------------------------------------------------------------
diff --git a/extras/rya.manual/src/site/site.xml b/extras/rya.manual/src/site/site.xml
index a671d3d..a5fab57 100644
--- a/extras/rya.manual/src/site/site.xml
+++ b/extras/rya.manual/src/site/site.xml
@@ -45,6 +45,7 @@ under the License.
<item name="Evaluation Table" href="eval.html"/>
<item name="Pre-computed Joins" href="loadPrecomputedJoin.html"/>
<item name="Inferencing" href="infer.html"/>
+ <item name="MapReduce Interface" href="mapreduce.html"/>
</menu>
<menu name="Samples">
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.reasoning/pom.xml
----------------------------------------------------------------------
diff --git a/extras/rya.reasoning/pom.xml b/extras/rya.reasoning/pom.xml
index b7b7293..bc00404 100644
--- a/extras/rya.reasoning/pom.xml
+++ b/extras/rya.reasoning/pom.xml
@@ -43,6 +43,10 @@ under the License.
<groupId>org.apache.rya</groupId>
<artifactId>rya.sail</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.rya</groupId>
+ <artifactId>rya.mapreduce</artifactId>
+ </dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/AbstractReasoningTool.java
----------------------------------------------------------------------
diff --git a/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/AbstractReasoningTool.java b/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/AbstractReasoningTool.java
index dde83c6..09b4a16 100644
--- a/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/AbstractReasoningTool.java
+++ b/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/AbstractReasoningTool.java
@@ -22,8 +22,8 @@ package mvm.rya.reasoning.mr;
import java.io.IOException;
import mvm.rya.accumulo.mr.RyaStatementWritable;
-import mvm.rya.accumulo.mr.fileinput.RdfFileInputFormat;
-import mvm.rya.accumulo.mr.utils.MRUtils;
+import mvm.rya.accumulo.mr.RdfFileInputFormat;
+import mvm.rya.accumulo.mr.MRUtils;
import mvm.rya.reasoning.Derivation;
import mvm.rya.reasoning.Fact;
import mvm.rya.reasoning.Schema;
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/ConformanceTest.java
----------------------------------------------------------------------
diff --git a/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/ConformanceTest.java b/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/ConformanceTest.java
index 02cce66..0209eff 100644
--- a/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/ConformanceTest.java
+++ b/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/ConformanceTest.java
@@ -33,7 +33,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
-import mvm.rya.accumulo.mr.utils.MRUtils;
+import mvm.rya.accumulo.mr.MRUtils;
import mvm.rya.reasoning.Fact;
import mvm.rya.reasoning.Schema;
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/MRReasoningUtils.java
----------------------------------------------------------------------
diff --git a/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/MRReasoningUtils.java b/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/MRReasoningUtils.java
index 3bed4ca..b306ee8 100644
--- a/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/MRReasoningUtils.java
+++ b/extras/rya.reasoning/src/main/java/mvm/rya/reasoning/mr/MRReasoningUtils.java
@@ -25,7 +25,7 @@ import java.io.IOException;
import mvm.rya.accumulo.AccumuloRdfConfiguration;
import mvm.rya.accumulo.AccumuloRdfConstants;
import mvm.rya.accumulo.AccumuloRyaDAO;
-import mvm.rya.accumulo.mr.utils.MRUtils;
+import mvm.rya.accumulo.mr.MRUtils;
import mvm.rya.api.RdfCloudTripleStoreConstants;
import mvm.rya.api.RdfCloudTripleStoreUtils;
import mvm.rya.api.domain.RyaStatement;
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/tinkerpop.rya/pom.xml
----------------------------------------------------------------------
diff --git a/extras/tinkerpop.rya/pom.xml b/extras/tinkerpop.rya/pom.xml
index 9ccb4b1..b92eb96 100644
--- a/extras/tinkerpop.rya/pom.xml
+++ b/extras/tinkerpop.rya/pom.xml
@@ -39,6 +39,10 @@ under the License.
<groupId>org.apache.rya</groupId>
<artifactId>accumulo.rya</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.rya</groupId>
+ <artifactId>rya.mapreduce</artifactId>
+ </dependency>
<dependency>
<groupId>com.tinkerpop.gremlin</groupId>
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/tinkerpop.rya/src/main/groovy/mvm/rya/blueprints/config/RyaGraphConfiguration.groovy
----------------------------------------------------------------------
diff --git a/extras/tinkerpop.rya/src/main/groovy/mvm/rya/blueprints/config/RyaGraphConfiguration.groovy b/extras/tinkerpop.rya/src/main/groovy/mvm/rya/blueprints/config/RyaGraphConfiguration.groovy
index fc3419d..a6e906c 100644
--- a/extras/tinkerpop.rya/src/main/groovy/mvm/rya/blueprints/config/RyaGraphConfiguration.groovy
+++ b/extras/tinkerpop.rya/src/main/groovy/mvm/rya/blueprints/config/RyaGraphConfiguration.groovy
@@ -29,7 +29,7 @@ import mvm.rya.blueprints.sail.RyaSailGraph
import mvm.rya.rdftriplestore.RdfCloudTripleStore
import mvm.rya.rdftriplestore.inference.InferenceEngine
import org.apache.commons.configuration.Configuration
-import static mvm.rya.accumulo.mr.utils.MRUtils.*
+import static mvm.rya.accumulo.mr.MRUtils.*
import org.apache.commons.configuration.MapConfiguration
import mvm.rya.blueprints.sail.RyaSailEdge
import mvm.rya.blueprints.sail.RyaSailVertex
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/TstGremlinRya.groovy
----------------------------------------------------------------------
diff --git a/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/TstGremlinRya.groovy b/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/TstGremlinRya.groovy
index fe0f4e0..ec8beb8 100644
--- a/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/TstGremlinRya.groovy
+++ b/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/TstGremlinRya.groovy
@@ -30,7 +30,7 @@
//import mvm.rya.rdftriplestore.RdfCloudTripleStore
//import mvm.rya.rdftriplestore.inference.InferenceEngine
//import org.apache.accumulo.core.client.ZooKeeperInstance
-//import static mvm.rya.accumulo.mr.utils.MRUtils.*
+//import static mvm.rya.accumulo.mr.MRUtils.*
//import static mvm.rya.api.RdfCloudTripleStoreConfiguration.CONF_QUERYPLAN_FLAG
//import static mvm.rya.api.RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX
//
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/config/RyaGraphConfigurationTest.groovy
----------------------------------------------------------------------
diff --git a/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/config/RyaGraphConfigurationTest.groovy b/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/config/RyaGraphConfigurationTest.groovy
index 9dd0627..c4f5dbb 100644
--- a/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/config/RyaGraphConfigurationTest.groovy
+++ b/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/config/RyaGraphConfigurationTest.groovy
@@ -30,7 +30,7 @@ import org.openrdf.model.impl.StatementImpl
import org.openrdf.model.impl.ValueFactoryImpl
import static mvm.rya.api.RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX
-import static mvm.rya.accumulo.mr.utils.MRUtils.*
+import static mvm.rya.accumulo.mr.MRUtils.*
import org.apache.accumulo.core.security.Authorizations
import org.apache.accumulo.core.client.Connector
import mvm.rya.accumulo.AccumuloRyaDAO
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/sail/RyaSailVertexSequenceTest.groovy
----------------------------------------------------------------------
diff --git a/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/sail/RyaSailVertexSequenceTest.groovy b/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/sail/RyaSailVertexSequenceTest.groovy
index c661350..f0d2481 100644
--- a/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/sail/RyaSailVertexSequenceTest.groovy
+++ b/extras/tinkerpop.rya/src/test/groovy/mvm/rya/blueprints/sail/RyaSailVertexSequenceTest.groovy
@@ -23,7 +23,7 @@ import mvm.rya.api.utils.IteratorWrapper
import junit.framework.TestCase
import mvm.rya.blueprints.config.RyaGraphConfiguration
import org.openrdf.model.Statement
-import static mvm.rya.accumulo.mr.utils.MRUtils.*
+import static mvm.rya.accumulo.mr.MRUtils.*
import static mvm.rya.api.RdfCloudTripleStoreConstants.VALUE_FACTORY
/**
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/42895eac/mapreduce/pom.xml
----------------------------------------------------------------------
diff --git a/mapreduce/pom.xml b/mapreduce/pom.xml
new file mode 100644
index 0000000..40dd1df
--- /dev/null
+++ b/mapreduce/pom.xml
@@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.rya</groupId>
+ <artifactId>rya-project</artifactId>
+ <version>3.2.10-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>rya.mapreduce</artifactId>
+ <name>Apache Rya MapReduce Tools</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.rya</groupId>
+ <artifactId>rya.api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.rya</groupId>
+ <artifactId>accumulo.rya</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.rya</groupId>
+ <artifactId>rya.indexing</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.accumulo</groupId>
+ <artifactId>accumulo-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.openrdf.sesame</groupId>
+ <artifactId>sesame-rio-ntriples</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.openrdf.sesame</groupId>
+ <artifactId>sesame-rio-nquads</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.openrdf.sesame</groupId>
+ <artifactId>sesame-rio-trig</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.mrunit</groupId>
+ <artifactId>mrunit</artifactId>
+ <classifier>hadoop2</classifier>
+ <version>1.1.0</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <!-- RDF data Files -->
+ <exclude>**/*.ntriples</exclude>
+ <exclude>**/*.trig</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+
+ <profiles>
+ <profile>
+ <id>mr</id>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <executions>
+ <execution>
+ <configuration>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
+</project>