You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@rya.apache.org by pu...@apache.org on 2016/06/16 14:04:17 UTC
[06/10] incubator-rya git commit: Added OPTIONAL support for
Precomputed-Joins,
including support for matching PCJs with OPTIONALs and evaluation of query
plans containing PCJs with OPTIONALs.
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/AccumuloIndexSet.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/AccumuloIndexSet.java b/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/AccumuloIndexSet.java
index 5396926..2ca8f4a 100644
--- a/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/AccumuloIndexSet.java
+++ b/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/AccumuloIndexSet.java
@@ -18,6 +18,8 @@
*/
package mvm.rya.indexing.external.tupleSet;
+import info.aduna.iteration.CloseableIteration;
+
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -28,51 +30,62 @@ import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
+import mvm.rya.accumulo.pcj.iterators.BindingSetHashJoinIterator;
+import mvm.rya.accumulo.pcj.iterators.BindingSetHashJoinIterator.HashJoinType;
+import mvm.rya.accumulo.pcj.iterators.IteratorCombiner;
+import mvm.rya.accumulo.pcj.iterators.PCJKeyToCrossProductBindingSetIterator;
+import mvm.rya.accumulo.pcj.iterators.PCJKeyToJoinBindingSetIterator;
+import mvm.rya.api.utils.IteratorWrapper;
+import mvm.rya.indexing.pcj.matching.PCJOptimizerUtilities;
+import mvm.rya.rdftriplestore.evaluation.ExternalBatchingIterator;
+
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.BatchScanner;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.security.Authorizations;
import org.apache.hadoop.io.Text;
import org.apache.rya.indexing.pcj.storage.PcjException;
import org.apache.rya.indexing.pcj.storage.PcjMetadata;
+import org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjSerializer;
+import org.apache.rya.indexing.pcj.storage.accumulo.BindingSetConverter.BindingSetConversionException;
import org.apache.rya.indexing.pcj.storage.accumulo.PcjTables;
import org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder;
+import org.openrdf.model.Value;
import org.openrdf.query.Binding;
import org.openrdf.query.BindingSet;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.query.algebra.Projection;
import org.openrdf.query.algebra.TupleExpr;
-import org.openrdf.query.algebra.Var;
-import org.openrdf.query.algebra.helpers.QueryModelVisitorBase;
+import org.openrdf.query.algebra.evaluation.QueryBindingSet;
+import org.openrdf.query.impl.BindingImpl;
import org.openrdf.query.parser.ParsedTupleQuery;
import org.openrdf.query.parser.sparql.SPARQLParser;
import org.openrdf.sail.SailException;
import com.google.common.base.Joiner;
import com.google.common.base.Optional;
+import com.google.common.base.Preconditions;
import com.google.common.collect.HashBiMap;
+import com.google.common.collect.HashMultimap;
import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
-import info.aduna.iteration.CloseableIteration;
-import mvm.rya.accumulo.precompQuery.AccumuloPcjQuery;
-import mvm.rya.api.utils.IteratorWrapper;
-import mvm.rya.indexing.PcjQuery;
-import mvm.rya.rdftriplestore.evaluation.ExternalBatchingIterator;
-
/**
* During query planning, this node is inserted into the parsed query to
* represent part of the original query (a sub-query). This sub-query is the
* value returned by {@link ExternalTupleSet#getTupleExpr()}. The results
* associated with this sub-query are stored in an external Accumulo table,
* where accCon and tablename are the associated {@link Connector} and table
- * name. During evaluation, the portion of the query in
- * {@link AccumuloIndexSet} is evaluated by scanning the external Accumulo
- * table. This class is extremely useful for caching queries and reusing results
- * from previous SPARQL queries.
+ * name. During evaluation, the portion of the query in {@link AccumuloIndexSet}
+ * is evaluated by scanning the external Accumulo table. This class is extremely
+ * useful for caching queries and reusing results from previous SPARQL queries.
* <p>
*
* The the {@link TupleExpr} returned by {@link ExternalTupleSet#getTupleExpr()}
@@ -89,73 +102,89 @@ import mvm.rya.rdftriplestore.evaluation.ExternalBatchingIterator;
* of sub-queries.
*
*/
-public class AccumuloIndexSet extends ExternalTupleSet implements ExternalBatchingIterator {
-
- private final Connector accCon; //connector to Accumulo table where results are stored
- private final String tablename; //name of Accumulo table
- private List<String> varOrder = null; // orders in which results are written to table
- private final PcjTables pcj = new PcjTables();
-
- @Override
- public Map<String, Set<String>> getSupportedVariableOrders() {
- return this.getSupportedVariableOrderMap();
- }
-
- @Override
- public String getSignature() {
- return "AccumuloIndexSet(" + tablename + ") : " + Joiner.on(", ").join(this.getTupleExpr().getBindingNames());
- }
-
- /**
- *
- * @param sparql - name of sparql query whose results will be stored in PCJ table
- * @param accCon - connection to a valid Accumulo instance
- * @param tablename - name of an existing PCJ table
- * @throws MalformedQueryException
- * @throws SailException
- * @throws QueryEvaluationException
- * @throws MutationsRejectedException
- * @throws TableNotFoundException
- */
- public AccumuloIndexSet(final String sparql, final Connector accCon, final String tablename) throws MalformedQueryException, SailException, QueryEvaluationException,
- MutationsRejectedException, TableNotFoundException {
- this.tablename = tablename;
- this.accCon = accCon;
- final SPARQLParser sp = new SPARQLParser();
- final ParsedTupleQuery pq = (ParsedTupleQuery) sp.parseQuery(sparql, null);
-
- final Optional<Projection> projection = new ParsedQueryUtil().findProjection(pq);
- if(!projection.isPresent()) {
- throw new MalformedQueryException("SPARQL query '" + sparql + "' does not contain a Projection.");
- }
- setProjectionExpr(projection.get());
-
- Set<VariableOrder> orders = null;
- try {
+public class AccumuloIndexSet extends ExternalTupleSet implements
+ ExternalBatchingIterator {
+
+ private final Connector accCon; // connector to Accumulo table where results
+ // are stored
+ private final String tablename; // name of Accumulo table
+ private List<String> varOrder = null; // orders in which results are written
+ // to table
+ private PcjTables pcj = new PcjTables();
+
+ @Override
+ public Map<String, Set<String>> getSupportedVariableOrders() {
+ return this.getSupportedVariableOrderMap();
+ }
+
+ @Override
+ public String getSignature() {
+ return "AccumuloIndexSet(" + tablename + ") : "
+ + Joiner.on(", ").join(this.getTupleExpr().getBindingNames());
+ }
+
+ /**
+ *
+ * @param sparql
+ * - name of sparql query whose results will be stored in PCJ
+ * table
+ * @param accCon
+ * - connection to a valid Accumulo instance
+ * @param tablename
+ * - name of an existing PCJ table
+ * @throws MalformedQueryException
+ * @throws SailException
+ * @throws QueryEvaluationException
+ * @throws MutationsRejectedException
+ * @throws TableNotFoundException
+ */
+ public AccumuloIndexSet(String sparql, Connector accCon,
+ String tablename) throws MalformedQueryException, SailException,
+ QueryEvaluationException, MutationsRejectedException,
+ TableNotFoundException {
+ this.tablename = tablename;
+ this.accCon = accCon;
+ SPARQLParser sp = new SPARQLParser();
+ ParsedTupleQuery pq = (ParsedTupleQuery) sp.parseQuery(sparql, null);
+ TupleExpr te = pq.getTupleExpr();
+ Preconditions.checkArgument(PCJOptimizerUtilities.isPCJValid(te),
+ "TupleExpr is an invalid PCJ.");
+
+ Optional<Projection> projection = new ParsedQueryUtil()
+ .findProjection(pq);
+ if (!projection.isPresent()) {
+ throw new MalformedQueryException("SPARQL query '" + sparql
+ + "' does not contain a Projection.");
+ }
+ setProjectionExpr(projection.get());
+ Set<VariableOrder> orders = null;
+ try {
orders = pcj.getPcjMetadata(accCon, tablename).getVarOrders();
} catch (final PcjException e) {
e.printStackTrace();
}
- varOrder = Lists.newArrayList();
- for(final VariableOrder var: orders) {
- varOrder.add(var.toString());
- }
- setLocalityGroups(tablename, accCon, varOrder);
- this.setSupportedVariableOrderMap(varOrder);
- }
-
- /**
- *
- * @param accCon - connection to a valid Accumulo instance
- * @param tablename - name of an existing PCJ table
- * @throws MalformedQueryException
- * @throws SailException
- * @throws QueryEvaluationException
- * @throws MutationsRejectedException
- * @throws TableNotFoundException
- */
- public AccumuloIndexSet(final Connector accCon, final String tablename)
+ varOrder = Lists.newArrayList();
+ for (final VariableOrder var : orders) {
+ varOrder.add(var.toString());
+ }
+ setLocalityGroups(tablename, accCon, varOrder);
+ this.setSupportedVariableOrderMap(varOrder);
+ }
+
+ /**
+ *
+ * @param accCon
+ * - connection to a valid Accumulo instance
+ * @param tablename
+ * - name of an existing PCJ table
+ * @throws MalformedQueryException
+ * @throws SailException
+ * @throws QueryEvaluationException
+ * @throws MutationsRejectedException
+ * @throws TableNotFoundException
+ */
+ public AccumuloIndexSet(Connector accCon, String tablename)
throws MalformedQueryException, SailException,
QueryEvaluationException, MutationsRejectedException,
TableNotFoundException {
@@ -168,11 +197,12 @@ public class AccumuloIndexSet extends ExternalTupleSet implements ExternalBatchi
this.tablename = tablename;
this.accCon = accCon;
- final SPARQLParser sp = new SPARQLParser();
- final ParsedTupleQuery pq = (ParsedTupleQuery) sp.parseQuery(meta.getSparql(),
- null);
+ SPARQLParser sp = new SPARQLParser();
+ ParsedTupleQuery pq = (ParsedTupleQuery) sp.parseQuery(
+ meta.getSparql(), null);
+
setProjectionExpr((Projection) pq.getTupleExpr());
- final Set<VariableOrder> orders = meta.getVarOrders();
+ Set<VariableOrder> orders = meta.getVarOrders();
varOrder = Lists.newArrayList();
for (final VariableOrder var : orders) {
@@ -185,34 +215,37 @@ public class AccumuloIndexSet extends ExternalTupleSet implements ExternalBatchi
/**
* returns size of table for query planning
*/
- @Override
- public double cardinality() {
- double cardinality = 0;
- try {
- cardinality = pcj.getPcjMetadata(accCon, tablename).getCardinality();
- } catch (final PcjException e) {
+ @Override
+ public double cardinality() {
+ double cardinality = 0;
+ try {
+ cardinality = pcj.getPcjMetadata(accCon, tablename)
+ .getCardinality();
+ } catch (PcjException e) {
e.printStackTrace();
}
- return cardinality;
- }
-
+ return cardinality;
+ }
- /**
- *
- * @param tableName
- * @param conn
- * @param groups - locality groups to be created
- *
- * Sets locality groups for more efficient scans - these are usually the variable
- * orders in the table so that scans for specific orders are more efficient
- */
- private void setLocalityGroups(final String tableName, final Connector conn, final List<String> groups) {
+ /**
+ *
+ * @param tableName
+ * @param conn
+ * @param groups
+ * - locality groups to be created
+ *
+ * Sets locality groups for more efficient scans - these are
+ * usually the variable orders in the table so that scans for
+ * specific orders are more efficient
+ */
+ private void setLocalityGroups(String tableName, Connector conn,
+ List<String> groups) {
final HashMap<String, Set<Text>> localityGroups = new HashMap<String, Set<Text>>();
for (int i = 0; i < groups.size(); i++) {
final HashSet<Text> tempColumn = new HashSet<Text>();
tempColumn.add(new Text(groups.get(i)));
- final String groupName = groups.get(i).replace(VAR_ORDER_DELIM, "");
+ final String groupName = groups.get(i).replace(VALUE_DELIM, "");
localityGroups.put(groupName, tempColumn);
}
@@ -225,173 +258,344 @@ public class AccumuloIndexSet extends ExternalTupleSet implements ExternalBatchi
}
+ @Override
+ public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(
+ BindingSet bindingset) throws QueryEvaluationException {
+ return this.evaluate(Collections.singleton(bindingset));
+ }
+ /**
+ * Core evaluation method used during query evaluation - given a collection
+ * of binding set constraints, this method finds common binding labels
+ * between the constraints and table, uses those to build a prefix scan of
+ * the Accumulo table, and creates a solution binding set by iterating of
+ * the scan results.
+ * @param bindingset - collection of {@link BindingSet}s to be joined with PCJ
+ * @return - CloseableIteration over joined results
+ */
+ @Override
+ public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(
+ final Collection<BindingSet> bindingset)
+ throws QueryEvaluationException {
+
+ if (bindingset.isEmpty()) {
+ return new IteratorWrapper<BindingSet, QueryEvaluationException>(
+ new HashSet<BindingSet>().iterator());
+ }
- @Override
- public CloseableIteration<BindingSet,QueryEvaluationException> evaluate(final BindingSet bindingset) throws QueryEvaluationException {
- return this.evaluate(Collections.singleton(bindingset));
- }
-
- /**
- * Core evaluation method used during query evaluation - given a collection of binding set constraints, this
- * method finds common binding labels between the constraints and table, uses those to build a prefix scan
- * of the Accumulo table, and creates a solution binding set by iterating of the scan results.
- */
- @Override
- public CloseableIteration<BindingSet,QueryEvaluationException> evaluate(final Collection<BindingSet> bindingset) throws QueryEvaluationException {
- String localityGroup = "";
- final Set<String> commonVars = Sets.newHashSet();
- // if bindingset is empty, there are no results, so return empty iterator
- if (bindingset.isEmpty()) {
- return new IteratorWrapper<BindingSet, QueryEvaluationException>(new HashSet<BindingSet>().iterator());
- }
- //to build range prefix, find common vars of bindingset and PCJ bindings
- else {
- final BindingSet bs = bindingset.iterator().next();
- for (final String b : this.getTupleExpr().getAssuredBindingNames()) {
- final Binding v = bs.getBinding(b);
- if (v != null) {
- commonVars.add(b);
- }
- }
- }
- //add any constant constraints to common vars to be used in range prefix
- commonVars.addAll(getConstantConstraints());
- PcjQuery apq = null;
- List<String> fullVarOrder = null;
- String commonVarOrder = null;
- try {
- if (commonVars.size() > 0) {
- commonVarOrder = getVarOrder(commonVars);
- if(commonVarOrder == null) {
- throw new IllegalStateException("Index does not support binding set!");
- }
- fullVarOrder = Lists.newArrayList(prefixToOrder(commonVarOrder).split(VAR_ORDER_DELIM));
- //use varOrder and tableVarMap to set correct scan column
- localityGroup = orderToLocGroup(fullVarOrder);
- } else {
- localityGroup = varOrder.get(0);
- }
- apq = new AccumuloPcjQuery(accCon, tablename);
- final ValueMapVisitor vmv = new ValueMapVisitor();
- this.getTupleExpr().visit(vmv);
-
- List<String> commonVarOrderList = null;
- if(commonVarOrder != null) {
- commonVarOrderList = Lists.newArrayList(commonVarOrder.split(VAR_ORDER_DELIM));
- } else {
- commonVarOrderList = new ArrayList<>();
- }
-
- return apq.queryPrecompJoin(commonVarOrderList, localityGroup, vmv.getValMap(),
- HashBiMap.create(this.getTableVarMap()).inverse(), bindingset);
- } catch(final TableNotFoundException e) {
- throw new QueryEvaluationException(e);
- }
- }
-
- /**
- *
- * @param order - variable order as indicated by query
- * @return - locality group or column family used in scan - this
- * is just the variable order expressed in terms of the variables stored
- * in the table
- */
- private String orderToLocGroup(final List<String> order) {
- String localityGroup = "";
- for (final String s : order) {
- if (localityGroup.length() == 0) {
- localityGroup = this.getTableVarMap().get(s);
- } else {
- localityGroup = localityGroup + VAR_ORDER_DELIM + this.getTableVarMap().get(s);
- }
- }
- return localityGroup;
- }
-
- /**
- *
- * @param order - prefix of a full variable order
- * @return - full variable order that includes all variables whose values
- * are stored in the table - used to obtain the locality group
- */
- //given partial order of query vars, convert to PCJ vars and determine
- //if converted partial order is a substring of a full var order of PCJ variables.
- //if converted partial order is a prefix, convert corresponding full PCJ var order to query vars
- private String prefixToOrder(String order) {
- final Map<String, String> invMap = HashBiMap.create(this.getTableVarMap()).inverse();
- String[] temp = order.split(VAR_ORDER_DELIM);
- //get order in terms of PCJ variables
- for (int i = 0; i < temp.length; i++) {
- temp[i] = this.getTableVarMap().get(temp[i]);
- }
- order = Joiner.on(VAR_ORDER_DELIM).join(temp);
- for (final String s : varOrder) {
- //verify that partial order is prefix of a PCJ varOrder
- if (s.startsWith(order)) {
- temp = s.split(VAR_ORDER_DELIM);
- //convert full PCJ varOrder back to query varOrder
- for (int i = 0; i < temp.length; i++) {
- temp[i] = invMap.get(temp[i]);
- }
- return Joiner.on(VAR_ORDER_DELIM).join(temp);
- }
- }
- throw new NoSuchElementException("Order is not a prefix of any locality group value!");
- }
-
- /**
- *
- * @param variables
- * @return - string representation of the Set variables, in an order that is in the
- * table
- */
- private String getVarOrder(final Set<String> variables) {
- final Map<String, Set<String>> varOrderMap = this.getSupportedVariableOrders();
- final Set<Map.Entry<String, Set<String>>> entries = varOrderMap.entrySet();
- for (final Map.Entry<String, Set<String>> e : entries) {
- if (e.getValue().equals(variables)) {
- return e.getKey();
- }
- }
- return null;
- }
-
- /**
- * @return - all constraints which correspond to variables
- * in {@link AccumuloIndexSet#getTupleExpr()} which are set
- * equal to a constant, but are non-constant in Accumulo table
- */
- private Set<String> getConstantConstraints() {
- final Map<String, String> tableMap = this.getTableVarMap();
- final Set<String> keys = tableMap.keySet();
- final Set<String> constants = Sets.newHashSet();
- for (final String s : keys) {
- if (s.startsWith("-const-")) {
- constants.add(s);
- }
- }
- return constants;
- }
-
- /**
- *
- * Extracts the values associated with constant labels in the query
- * Used to create binding sets from range scan
- */
- public class ValueMapVisitor extends QueryModelVisitorBase<RuntimeException> {
- Map<String, org.openrdf.model.Value> valMap = Maps.newHashMap();
- public Map<String, org.openrdf.model.Value> getValMap() {
- return valMap;
- }
- @Override
- public void meet(final Var node) {
- if (node.getName().startsWith("-const-")) {
- valMap.put(node.getName(), node.getValue());
- }
- }
- }
+ List<BindingSet> crossProductBs = new ArrayList<>();
+ Map<String, org.openrdf.model.Value> constantConstraints = new HashMap<>();
+ Set<Range> hashJoinRanges = new HashSet<>();
+ final Range EMPTY_RANGE = new Range("", true, "~", false);
+ Range crossProductRange = EMPTY_RANGE;
+ String localityGroupOrder = varOrder.get(0);
+ int maxPrefixLen = Integer.MIN_VALUE;
+ int prefixLen = 0;
+ int oldPrefixLen = 0;
+ Multimap<String, BindingSet> bindingSetHashMap = HashMultimap.create();
+ HashJoinType joinType = HashJoinType.CONSTANT_JOIN_VAR;
+ Set<String> unAssuredVariables = Sets.difference(getTupleExpr().getBindingNames(), getTupleExpr().getAssuredBindingNames());
+ boolean useColumnScan = false;
+ boolean isCrossProd = false;
+ boolean containsConstantConstraints = false;
+ BindingSet constants = getConstantConstraints();
+ containsConstantConstraints = constants.size() > 0;
-}
+ try {
+ for (BindingSet bs : bindingset) {
+ if (bindingset.size() == 1 && bs.size() == 0) {
+ // in this case, only single, empty bindingset, pcj node is
+ // first node in query plan - use full Range scan with
+ // column
+ // family set
+ useColumnScan = true;
+ }
+ // get common vars for PCJ - only use variables associated
+ // with assured Bindings
+ QueryBindingSet commonVars = new QueryBindingSet();
+ for (String b : getTupleExpr().getAssuredBindingNames()) {
+ Binding v = bs.getBinding(b);
+ if (v != null) {
+ commonVars.addBinding(v);
+ }
+ }
+ // no common vars implies cross product
+ if (commonVars.size() == 0 && bs.size() != 0) {
+ crossProductBs.add(bs);
+ isCrossProd = true;
+ }
+ //get a varOrder from orders in PCJ table - use at least
+ //one common variable
+ BindingSetVariableOrder varOrder = getVarOrder(
+ commonVars.getBindingNames(),
+ constants.getBindingNames());
+
+ // update constant constraints not used in varOrder and
+ // update Bindings used to form range by removing unused
+ // variables
+ commonVars.addAll(constants);
+ if (commonVars.size() > varOrder.varOrderLen) {
+ Map<String, Value> valMap = getConstantValueMap();
+ for (String s : new HashSet<String>(varOrder.unusedVars)) {
+ if (valMap.containsKey(s)
+ && !constantConstraints.containsKey(s)) {
+ constantConstraints.put(s, valMap.get(s));
+ }
+ commonVars.removeBinding(s);
+ }
+ }
+
+ if (containsConstantConstraints
+ && (useColumnScan || isCrossProd)) {
+ // only one range required in event of a cross product or
+ // empty BindingSet
+ // Range will either be full table Range or determined by
+ // constant constraints
+ if (crossProductRange == EMPTY_RANGE) {
+ crossProductRange = getRange(varOrder.varOrder,
+ commonVars);
+ localityGroupOrder = prefixToOrder(varOrder.varOrder);
+ }
+ } else if (!useColumnScan && !isCrossProd) {
+ // update ranges and add BindingSet to HashJoinMap if not a
+ // cross product
+ hashJoinRanges.add(getRange(varOrder.varOrder, commonVars));
+
+ prefixLen = varOrder.varOrderLen;
+ // check if common Variable Orders are changing between
+ // BindingSets (happens in case
+ // of Optional). If common variable set length changes from
+ // BindingSet to BindingSet
+ // update the HashJoinType to be VARIABLE_JOIN_VAR.
+ if (oldPrefixLen == 0) {
+ oldPrefixLen = prefixLen;
+ } else {
+ if (oldPrefixLen != prefixLen
+ && joinType == HashJoinType.CONSTANT_JOIN_VAR) {
+ joinType = HashJoinType.VARIABLE_JOIN_VAR;
+ }
+ oldPrefixLen = prefixLen;
+ }
+ // update max prefix len
+ if (prefixLen > maxPrefixLen) {
+ maxPrefixLen = prefixLen;
+ }
+ String key = getHashJoinKey(varOrder.varOrder, commonVars);
+ bindingSetHashMap.put(key, bs);
+ }
+
+ isCrossProd = false;
+ }
+
+ // create full Range scan iterator and set column family if empty
+ // collection or if cross product BindingSet exists and no hash join
+ // BindingSets
+ if ((useColumnScan || crossProductBs.size() > 0)
+ && bindingSetHashMap.size() == 0) {
+ // TODO doesn't use user specified Authorizations
+ Scanner scanner = accCon.createScanner(tablename,
+ new Authorizations());
+ // cross product with no cross product constraints here
+ scanner.setRange(crossProductRange);
+ scanner.fetchColumnFamily(new Text(localityGroupOrder));
+ return new PCJKeyToCrossProductBindingSetIterator(scanner,
+ crossProductBs, constantConstraints, unAssuredVariables, getTableVarMap());
+ } else if ((useColumnScan || crossProductBs.size() > 0)
+ && bindingSetHashMap.size() > 0) {
+
+ // in this case, both hash join BindingSets and cross product
+ // BindingSets exist
+ // create an iterator to evaluate cross product and an iterator
+ // for hash join, then combine
+
+ List<CloseableIteration<BindingSet, QueryEvaluationException>> iteratorList = new ArrayList<>();
+
+ // create cross product iterator
+ // TODO doesn't use user specified Authorizations
+ Scanner scanner1 = accCon.createScanner(tablename,
+ new Authorizations());
+ scanner1.setRange(crossProductRange);
+ scanner1.fetchColumnFamily(new Text(localityGroupOrder));
+ iteratorList.add(new PCJKeyToCrossProductBindingSetIterator(
+ scanner1, crossProductBs, constantConstraints, unAssuredVariables,
+ getTableVarMap()));
+
+ // create hash join iterator
+ // TODO doesn't use user specified Authorizations
+ BatchScanner scanner2 = accCon.createBatchScanner(tablename,
+ new Authorizations(), 10);
+ scanner2.setRanges(hashJoinRanges);
+ PCJKeyToJoinBindingSetIterator iterator = new PCJKeyToJoinBindingSetIterator(
+ scanner2, getTableVarMap(), maxPrefixLen);
+ iteratorList.add(new BindingSetHashJoinIterator(
+ bindingSetHashMap, iterator, unAssuredVariables, joinType));
+
+ // combine iterators
+ return new IteratorCombiner(iteratorList);
+
+ } else {
+ // only hash join BindingSets exist
+ // TODO doesn't use user specified auths
+ BatchScanner scanner = accCon.createBatchScanner(tablename,
+ new Authorizations(), 10);
+ // only need to create hash join iterator
+ scanner.setRanges(hashJoinRanges);
+ PCJKeyToJoinBindingSetIterator iterator = new PCJKeyToJoinBindingSetIterator(
+ scanner, getTableVarMap(), maxPrefixLen);
+ return new BindingSetHashJoinIterator(bindingSetHashMap,
+ iterator, unAssuredVariables, joinType);
+ }
+ } catch (Exception e) {
+ throw new QueryEvaluationException(e);
+ }
+ }
+
+ private String getHashJoinKey(String commonVarOrder, BindingSet bs) {
+ String[] commonVarArray = commonVarOrder.split(VAR_ORDER_DELIM);
+ String key = bs.getValue(commonVarArray[0]).toString();
+ for (int i = 1; i < commonVarArray.length; i++) {
+ key = key + VALUE_DELIM + bs.getValue(commonVarArray[i]).toString();
+ }
+ return key;
+ }
+
+ private Range getRange(String commonVarOrder, BindingSet bs)
+ throws BindingSetConversionException {
+ AccumuloPcjSerializer converter = new AccumuloPcjSerializer();
+ byte[] rangePrefix = new byte[0];
+ rangePrefix = converter.convert(bs, new VariableOrder(commonVarOrder));
+ return Range.prefix(new Text(rangePrefix));
+ }
+ /**
+ *
+ * @param variableBindingNames
+ * - names corresponding to variables
+ * @param constantBindingNames
+ * - names corresponding to constant constraints
+ * @return - {@link BindingSetVariableOrder} object containing largest
+ * possible supported variable order built from variableBindingNames
+ * and constantBindingNames
+ */
+ private BindingSetVariableOrder getVarOrder(
+ Set<String> variableBindingNames, Set<String> constantBindingNames) {
+ Map<String, Set<String>> varOrderMap = this
+ .getSupportedVariableOrders();
+ Set<Map.Entry<String, Set<String>>> entries = varOrderMap.entrySet();
+
+ Set<String> variables;
+ if (variableBindingNames.size() == 0
+ && constantBindingNames.size() == 0) {
+ return new BindingSetVariableOrder("", 0, new HashSet<String>());
+ } else if (variableBindingNames.size() > 0
+ && constantBindingNames.size() == 0) {
+ variables = variableBindingNames;
+ } else if (variableBindingNames.size() == 0
+ && constantBindingNames.size() > 0) {
+ variables = constantBindingNames;
+ } else {
+ variables = Sets.union(variableBindingNames, constantBindingNames);
+
+ String maxPrefix = null;
+ int maxPrefixLen = 0;
+ Set<String> minUnusedVariables = null;
+
+ for (Map.Entry<String, Set<String>> e : entries) {
+ Set<String> value = e.getValue();
+ if (maxPrefixLen < value.size()
+ && variables.containsAll(value)
+ && Sets.intersection(value, variableBindingNames)
+ .size() > 0) {
+ maxPrefixLen = value.size();
+ maxPrefix = e.getKey();
+ minUnusedVariables = Sets.difference(variables, value);
+ if (maxPrefixLen == variables.size()) {
+ break;
+ }
+ }
+ }
+ return new BindingSetVariableOrder(maxPrefix, maxPrefixLen,
+ minUnusedVariables);
+ }
+ String maxPrefix = null;
+ int maxPrefixLen = 0;
+ Set<String> minUnusedVariables = null;
+
+ for (Map.Entry<String, Set<String>> e : entries) {
+ Set<String> value = e.getValue();
+ if (maxPrefixLen < value.size() && variables.containsAll(value)) {
+ maxPrefixLen = value.size();
+ maxPrefix = e.getKey();
+ minUnusedVariables = Sets.difference(variables, value);
+ if (maxPrefixLen == variables.size()) {
+ break;
+ }
+ }
+ }
+ return new BindingSetVariableOrder(maxPrefix, maxPrefixLen,
+ minUnusedVariables);
+ }
+
+ /**
+ * @return - all constraints which correspond to variables in
+ * {@link AccumuloIndexSet#getTupleExpr()} which are set equal to a
+ * constant, but are non-constant in Accumulo table
+ */
+ private BindingSet getConstantConstraints() {
+ Map<String, String> tableMap = this.getTableVarMap();
+ Set<String> keys = tableMap.keySet();
+
+ QueryBindingSet constants = new QueryBindingSet();
+ for (String s : keys) {
+ if (s.startsWith("-const-")) {
+ constants.addBinding(new BindingImpl(s, getConstantValueMap()
+ .get(s)));
+ }
+ }
+ return constants;
+ }
+
+ /**
+ *
+ * @param order - prefix of a full variable order
+ * @return - full variable order that includes all variables whose values
+ * are stored in the table - used to obtain the locality group
+ */
+ //given partial order of query vars, convert to PCJ vars and determine
+ //if converted partial order is a substring of a full var order of PCJ variables.
+ //if converted partial order is a prefix, convert corresponding full PCJ var order to query vars
+ private String prefixToOrder(String order) {
+ final Map<String, String> invMap = HashBiMap.create(this.getTableVarMap()).inverse();
+ String[] temp = order.split(VAR_ORDER_DELIM);
+ //get order in terms of PCJ variables
+ for (int i = 0; i < temp.length; i++) {
+ temp[i] = this.getTableVarMap().get(temp[i]);
+ }
+ order = Joiner.on(VAR_ORDER_DELIM).join(temp);
+ for (final String s : varOrder) {
+ //verify that partial order is prefix of a PCJ varOrder
+ if (s.startsWith(order)) {
+ return s;
+ }
+ }
+ throw new NoSuchElementException("Order is not a prefix of any locality group value!");
+ }
+
+
+ private class BindingSetVariableOrder {
+
+ Set<String> unusedVars;
+ int varOrderLen = 0;
+ String varOrder;
+
+ public BindingSetVariableOrder(String varOrder, int len,
+ Set<String> unused) {
+ this.varOrder = varOrder;
+ this.varOrderLen = len;
+ this.unusedVars = unused;
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/ExternalTupleSet.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/ExternalTupleSet.java b/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/ExternalTupleSet.java
index ddf691d..b53dd66 100644
--- a/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/ExternalTupleSet.java
+++ b/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/ExternalTupleSet.java
@@ -33,7 +33,9 @@ import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.query.algebra.Projection;
import org.openrdf.query.algebra.TupleExpr;
+import org.openrdf.query.algebra.Var;
import org.openrdf.query.algebra.evaluation.impl.ExternalSet;
+import org.openrdf.query.algebra.helpers.QueryModelVisitorBase;
import com.beust.jcommander.internal.Sets;
import com.google.common.base.Joiner;
@@ -57,9 +59,11 @@ public abstract class ExternalTupleSet extends ExternalSet {
public static final String VAR_ORDER_DELIM = ";";
public static final String CONST_PREFIX = "-const-";
+ public static final String VALUE_DELIM = "\u0000";
private Projection tupleExpr;
private Map<String, String> tableVarMap = Maps.newHashMap(); //maps vars in tupleExpr to var in stored binding sets
private Map<String, Set<String>> supportedVarOrders = Maps.newHashMap(); //indicates supported var orders
+ private Map<String, org.openrdf.model.Value> valMap;
public ExternalTupleSet() {
}
@@ -67,6 +71,7 @@ public abstract class ExternalTupleSet extends ExternalSet {
public ExternalTupleSet(Projection tupleExpr) {
Preconditions.checkNotNull(tupleExpr);
this.tupleExpr = tupleExpr;
+ valMap = getValMap();
updateTableVarMap(tupleExpr, tupleExpr);
}
@@ -100,6 +105,7 @@ public abstract class ExternalTupleSet extends ExternalSet {
updateTableVarMap(tupleExpr, this.tupleExpr);
}
this.tupleExpr = tupleExpr;
+ valMap = getValMap();
if (supportedVarOrders.size() != 0) {
updateSupportedVarOrderMap();
}
@@ -128,10 +134,14 @@ public abstract class ExternalTupleSet extends ExternalSet {
return supportedVarOrders;
}
+ public Map<String, org.openrdf.model.Value> getConstantValueMap() {
+ return valMap;
+ }
+
@Override
public ExternalSet clone() {
final ExternalTupleSet clone = (ExternalTupleSet) super.clone();
- clone.tupleExpr = this.tupleExpr.clone();
+ clone.setProjectionExpr(this.tupleExpr.clone());
clone.tableVarMap = Maps.newHashMap();
for(final String s: this.tableVarMap.keySet()) {
clone.tableVarMap.put(s,this.tableVarMap.get(s));
@@ -152,7 +162,7 @@ public abstract class ExternalTupleSet extends ExternalSet {
final Set<String> bNames = Sets.newHashSet();
final Set<String> bNamesWithConstants = Sets.newHashSet();
- for (final String s : this.getTupleExpr().getAssuredBindingNames()) {
+ for (final String s : this.getTupleExpr().getBindingNames()) {
if (bindingNames.contains(s)) {
bNames.add(s);
bNamesWithConstants.add(s);
@@ -267,4 +277,33 @@ public abstract class ExternalTupleSet extends ExternalSet {
return result;
}
+ private Map<String, org.openrdf.model.Value> getValMap() {
+ ValueMapVisitor valMapVis = new ValueMapVisitor();
+ tupleExpr.visit(valMapVis);
+ return valMapVis.getValMap();
+ }
+
+
+ /**
+ *
+ * Extracts the values associated with constant labels in the query Used to
+ * create binding sets from range scan
+ */
+ private class ValueMapVisitor extends
+ QueryModelVisitorBase<RuntimeException> {
+ Map<String, org.openrdf.model.Value> valMap = Maps.newHashMap();
+
+ public Map<String, org.openrdf.model.Value> getValMap() {
+ return valMap;
+ }
+
+ @Override
+ public void meet(Var node) {
+ if (node.getName().startsWith("-const-")) {
+ valMap.put(node.getName(), node.getValue());
+ }
+ }
+ }
+
+
}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/SimpleExternalTupleSet.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/SimpleExternalTupleSet.java b/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/SimpleExternalTupleSet.java
index ca97014..2c5ef44 100644
--- a/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/SimpleExternalTupleSet.java
+++ b/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/SimpleExternalTupleSet.java
@@ -26,8 +26,6 @@ import java.util.HashSet;
import java.util.Map;
import java.util.Set;
-import mvm.rya.indexing.external.PrecompJoinOptimizer;
-
import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.query.algebra.Projection;
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractPCJMatcher.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractPCJMatcher.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractPCJMatcher.java
new file mode 100644
index 0000000..fdd9ccb
--- /dev/null
+++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractPCJMatcher.java
@@ -0,0 +1,126 @@
+package mvm.rya.indexing.pcj.matching;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import mvm.rya.indexing.external.tupleSet.ExternalTupleSet;
+import mvm.rya.indexing.pcj.matching.QueryNodesToTupleExpr.TupleExprAndNodes;
+
+import org.openrdf.query.algebra.BinaryTupleOperator;
+import org.openrdf.query.algebra.Filter;
+import org.openrdf.query.algebra.QueryModelNode;
+import org.openrdf.query.algebra.TupleExpr;
+import org.openrdf.query.algebra.UnaryTupleOperator;
+
+/**
+ * This class provides implementations of methods common to all implementations of
+ * the {@link PCJMatcher} interface.
+ *
+ */
+public abstract class AbstractPCJMatcher implements PCJMatcher {
+
+ protected QuerySegment segment;
+ protected List<QueryModelNode> segmentNodeList;
+ protected boolean tupleAndNodesUpToDate = false;
+ protected TupleExpr tuple;
+ protected Set<TupleExpr> unmatched;
+ protected PCJToSegment pcjToSegment;
+ protected Set<Filter> filters;
+
+ /**
+ * @param - pcj - PremomputedJoin to be matched to a subset of segment
+ * @return - true if match occurs and false otherwise
+ */
+ @Override
+ public boolean matchPCJ(ExternalTupleSet pcj) {
+ QuerySegment sgmnt = pcjToSegment.getSegment(pcj);
+ if(sgmnt == null) {
+ throw new IllegalArgumentException("PCJ must contain at east one Join or Left Join");
+ }
+ return matchPCJ(sgmnt, pcj);
+ }
+
+ /**
+ * In following method, order is determined by the order in which the
+ * node appear in the query.
+ * @return - an ordered view of the QueryModelNodes appearing tuple
+ *
+ */
+ @Override
+ public List<QueryModelNode> getOrderedNodes() {
+ return Collections.unmodifiableList(segmentNodeList);
+ }
+
+
+ @Override
+ public Set<Filter> getFilters() {
+ if (!tupleAndNodesUpToDate) {
+ updateTupleAndNodes();
+ }
+ return filters;
+ }
+
+ @Override
+ public TupleExpr getQuery() {
+ if (!tupleAndNodesUpToDate) {
+ updateTupleAndNodes();
+ }
+ return tuple;
+ }
+
+ @Override
+ public Set<TupleExpr> getUnmatchedArgs() {
+ if (!tupleAndNodesUpToDate) {
+ updateTupleAndNodes();
+ }
+ return unmatched;
+ }
+
+
+ private void updateTupleAndNodes() {
+ TupleExprAndNodes tupAndNodes = segment.getQuery();
+ tuple = tupAndNodes.getTupleExpr();
+ filters = tupAndNodes.getFilters();
+ unmatched = new HashSet<>();
+ List<QueryModelNode> nodes = tupAndNodes.getNodes();
+ for (QueryModelNode q : nodes) {
+ if (q instanceof UnaryTupleOperator
+ || q instanceof BinaryTupleOperator) {
+ unmatched.add((TupleExpr) q);
+ }
+ }
+ tupleAndNodesUpToDate = true;
+ }
+
+
+ /**
+ * Interface for converting an {@link ExternalTupleSet} (PCJ) into a
+ * {@link QuerySegment}.
+ *
+ */
+ interface PCJToSegment {
+ public QuerySegment getSegment(ExternalTupleSet pcj);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractQuerySegment.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractQuerySegment.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractQuerySegment.java
new file mode 100644
index 0000000..2f1e749
--- /dev/null
+++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractQuerySegment.java
@@ -0,0 +1,122 @@
+package mvm.rya.indexing.pcj.matching;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import mvm.rya.indexing.pcj.matching.QueryNodesToTupleExpr.TupleExprAndNodes;
+
+import org.openrdf.query.algebra.Filter;
+import org.openrdf.query.algebra.QueryModelNode;
+import org.openrdf.query.algebra.ValueExpr;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+/**
+ * This class provides implementations of methods common to implementations
+ * of the {@link QuerySegment} interface.
+ *
+ */
+public abstract class AbstractQuerySegment implements QuerySegment {
+
+
+ protected List<QueryModelNode> orderedNodes = new ArrayList<>();
+ protected Set<QueryModelNode> unorderedNodes;
+ protected Map<ValueExpr, Filter> conditionMap = Maps.newHashMap();
+
+ /**
+ * Returns set view of nodes contained in the segment
+ */
+ @Override
+ public Set<QueryModelNode> getUnOrderedNodes() {
+ return Collections.unmodifiableSet(unorderedNodes);
+ }
+
+ /**
+ * Returns a list view of nodes contained in this segment, where order is
+ * determined by the getJoinArgs method
+ *
+ * @param TupleExpr
+ * from top to bottom.
+ */
+ @Override
+ public List<QueryModelNode> getOrderedNodes() {
+ return Collections.unmodifiableList(orderedNodes);
+ }
+
+ /**
+ * Allows nodes to be reordered using {@link PCJMatcher} and set
+ * @param nodes - reordering of orderedNodes
+ */
+ @Override
+ public void setNodes(List<QueryModelNode> nodes) {
+ Set<QueryModelNode> nodeSet = Sets.newHashSet(nodes);
+ Preconditions.checkArgument(nodeSet.equals(unorderedNodes));
+ orderedNodes = nodes;
+ unorderedNodes = nodeSet;
+ }
+
+ /**
+ * @param query
+ * - QuerySegment that this method checks for in this
+ * JoinSegment
+ */
+ @Override
+ public boolean containsQuerySegment(QuerySegment query) {
+ return unorderedNodes.containsAll(query.getUnOrderedNodes());
+ }
+
+ /**
+ * @return - a TupleExpr representing this JoinSegment
+ */
+ @Override
+ public TupleExprAndNodes getQuery() {
+ List<QueryModelNode> nodeCopy = new ArrayList<>();
+ for (QueryModelNode q : orderedNodes) {
+ if (!(q instanceof ValueExpr)) {
+ nodeCopy.add(q.clone());
+ }
+ }
+ QueryNodesToTupleExpr qnt = new QueryNodesToTupleExpr(nodeCopy, getFilters());
+ return qnt.getTupleAndNodes();
+ }
+
+ @Override
+ public Set<Filter> getFilters() {
+ Collection<Filter> filters = conditionMap.values();
+ Set<Filter> filterSet = new HashSet<>();
+ for (Filter filter : filters) {
+ filterSet.add(filter.clone());
+ }
+
+ return filterSet;
+
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/FlattenedOptional.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/FlattenedOptional.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/FlattenedOptional.java
new file mode 100644
index 0000000..b4f8f28
--- /dev/null
+++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/FlattenedOptional.java
@@ -0,0 +1,331 @@
+package mvm.rya.indexing.pcj.matching;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import mvm.rya.rdftriplestore.inference.DoNotExpandSP;
+import mvm.rya.rdftriplestore.utils.FixedStatementPattern;
+
+import org.openrdf.query.algebra.Filter;
+import org.openrdf.query.algebra.Join;
+import org.openrdf.query.algebra.LeftJoin;
+import org.openrdf.query.algebra.QueryModelNodeBase;
+import org.openrdf.query.algebra.QueryModelVisitor;
+import org.openrdf.query.algebra.TupleExpr;
+import org.openrdf.query.algebra.ValueExpr;
+import org.openrdf.query.algebra.Var;
+
+import com.google.common.collect.Sets;
+
+/**
+ * This class is essentially a wrapper for {@link LeftJoin}. It provides a
+ * flattened view of a LeftJoin that is useful for matching {@AccumuloIndexSet
+ * } nodes to sub-queries to use for Precomputed Joins.
+ * Because LeftJoins cannot automatically be interchanged with {@link Join}s and
+ * other LeftJoins in the query plan, this class has utility methods to check
+ * when nodes can be interchanged in the query plan. These methods track which
+ * variables returned by {@link LeftJoin#getRightArg()} are bound. A variable is
+ * bound if it also contained in the set returned by
+ * {@link LeftJoin#getLeftArg()}. Nodes can be interchanged with a LeftJoin (and
+ * hence a FlattenedOptional) so long as the bound and unbound variables do not
+ * change.
+ *
+ */
+public class FlattenedOptional extends QueryModelNodeBase implements TupleExpr {
+
+ private Set<TupleExpr> rightArgs;
+ private Set<String> boundVars;
+ private Set<String> unboundVars;
+ private Map<String, Integer> leftArgVarCounts = new HashMap<String, Integer>();
+ private ValueExpr condition;
+ private TupleExpr rightArg;
+ private Set<String> bindingNames;
+ private Set<String> assuredBindingNames;
+
+ public FlattenedOptional(LeftJoin node) {
+ rightArgs = getJoinArgs(node.getRightArg(), new HashSet<TupleExpr>());
+ boundVars = setWithOutConstants(Sets
+ .intersection(node.getLeftArg().getAssuredBindingNames(), node
+ .getRightArg().getBindingNames()));
+ unboundVars = setWithOutConstants(Sets.difference(node.getRightArg()
+ .getBindingNames(), boundVars));
+ condition = node.getCondition();
+ rightArg = node.getRightArg();
+ getVarCounts(node);
+ assuredBindingNames = new HashSet<>(leftArgVarCounts.keySet());
+ bindingNames = new HashSet<>(Sets.union(assuredBindingNames,
+ unboundVars));
+ }
+
+ public FlattenedOptional(FlattenedOptional optional) {
+ this.rightArgs = optional.rightArgs;
+ this.boundVars = optional.boundVars;
+ this.unboundVars = optional.unboundVars;
+ this.condition = optional.condition;
+ this.rightArg = optional.rightArg;
+ this.leftArgVarCounts = optional.leftArgVarCounts;
+ this.bindingNames = optional.bindingNames;
+ this.assuredBindingNames = optional.assuredBindingNames;
+ }
+
+ public Set<TupleExpr> getRightArgs() {
+ return rightArgs;
+ }
+
+ public TupleExpr getRightArg() {
+ return rightArg;
+ }
+
+ /**
+ *
+ * @param te
+ * - TupleExpr to be added to leftarg of {@link LeftJoin}
+ */
+ public void addArg(TupleExpr te) {
+ if (te instanceof FlattenedOptional) {
+ return;
+ }
+ incrementVarCounts(te.getBindingNames());
+ }
+
+ public void removeArg(TupleExpr te) {
+ if (te instanceof FlattenedOptional) {
+ return;
+ }
+ decrementVarCounts(te.getBindingNames());
+ }
+
+ /**
+ *
+ * @param te
+ * - {@link TupleExpr} to be added to leftArg of LeftJoin
+ * @return - true if adding TupleExpr does not affect unbound variables and
+ * returns false otherwise
+ */
+ public boolean canAddTuple(TupleExpr te) {
+ // can only add LeftJoin if rightArg varNames do not intersect
+ // unbound vars
+ if (te instanceof FlattenedOptional) {
+ FlattenedOptional lj = (FlattenedOptional) te;
+ if (Sets.intersection(lj.rightArg.getBindingNames(), unboundVars)
+ .size() > 0) {
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ return Sets.intersection(te.getBindingNames(), unboundVars).size() == 0;
+ }
+
+ /**
+ *
+ * @param te
+ * - {@link TupleExpr} to be removed from leftArg of LeftJoin
+ * @return - true if removing TupleExpr does not affect bound variables and
+ * returns false otherwise
+ */
+ public boolean canRemoveTuple(TupleExpr te) {
+ return canRemove(te);
+ }
+
+ @Override
+ public Set<String> getBindingNames() {
+ return bindingNames;
+ }
+
+ @Override
+ public Set<String> getAssuredBindingNames() {
+ return assuredBindingNames;
+ }
+
+ public ValueExpr getCondition() {
+ return condition;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other instanceof FlattenedOptional) {
+ FlattenedOptional ljDec = (FlattenedOptional) other;
+ ValueExpr oCond = ljDec.getCondition();
+ return nullEquals(condition, oCond)
+ && ljDec.getRightArgs().equals(rightArgs);
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = prime + (rightArgs == null ? 0 : rightArgs.hashCode());
+ result = prime * result
+ + (condition == null ? 0 : condition.hashCode());
+ return result;
+ }
+
+ /**
+ * This method is used to retrieve a set view of all descendants of the
+ * rightArg of the LeftJoin (the optional part)
+ *
+ * @param tupleExpr
+ * - tupleExpr whose args are being retrieved
+ * @param joinArgs
+ * - set view of all non-join args that are descendants of
+ * tupleExpr
+ * @return joinArgs
+ */
+ private Set<TupleExpr> getJoinArgs(TupleExpr tupleExpr,
+ Set<TupleExpr> joinArgs) {
+ if (tupleExpr instanceof Join) {
+ if (!(((Join) tupleExpr).getLeftArg() instanceof FixedStatementPattern)
+ && !(((Join) tupleExpr).getRightArg() instanceof DoNotExpandSP)) {
+ Join join = (Join) tupleExpr;
+ getJoinArgs(join.getLeftArg(), joinArgs);
+ getJoinArgs(join.getRightArg(), joinArgs);
+ }
+ } else if (tupleExpr instanceof LeftJoin) { // TODO probably not
+ // necessary if not
+ // including leftarg
+ LeftJoin lj = (LeftJoin) tupleExpr;
+ joinArgs.add(new FlattenedOptional(lj));
+ getJoinArgs(lj.getLeftArg(), joinArgs);
+ } else if (tupleExpr instanceof Filter) {
+ getJoinArgs(((Filter) tupleExpr).getArg(), joinArgs);
+ } else {
+ joinArgs.add(tupleExpr);
+ }
+
+ return joinArgs;
+ }
+
+ /**
+ * This method counts the number of times each variable appears in the
+ * leftArg of the LeftJoin defining this FlattenedOptional. This information
+ * is used to whether nodes can be moved out of the leftarg above the
+ * LeftJoin in the query.
+ *
+ * @param tupleExpr
+ */
+ private void getVarCounts(TupleExpr tupleExpr) {
+ if (tupleExpr instanceof Join) {
+ Join join = (Join) tupleExpr;
+ getVarCounts(join.getLeftArg());
+ getVarCounts(join.getRightArg());
+ } else if (tupleExpr instanceof LeftJoin) {
+ LeftJoin lj = (LeftJoin) tupleExpr;
+ getVarCounts(lj.getLeftArg());
+ } else if (tupleExpr instanceof Filter) {
+ getVarCounts(((Filter) tupleExpr).getArg());
+ } else {
+ incrementVarCounts(tupleExpr.getBindingNames());
+ }
+ }
+
+ /**
+ *
+ * @param te
+ * - {@link TupleExpr} to be removed from leftArg of LeftJoin
+ * @return - true if removing te doesn't affect bounded variables of
+ * LeftJoin and false otherwise
+ */
+ private boolean canRemove(TupleExpr te) {
+ // can only remove LeftJoin if right varNames do not intersect
+ // unbound vars
+ if (te instanceof FlattenedOptional) {
+ FlattenedOptional lj = (FlattenedOptional) te;
+ if (Sets.intersection(lj.getRightArg().getBindingNames(),
+ unboundVars).size() > 0) {
+ return false;
+ } else {
+ return true;
+ }
+ }
+ Set<String> vars = te.getBindingNames();
+ Set<String> intersection = Sets.intersection(vars, boundVars);
+ if (intersection.size() == 0) {
+ return true;
+ }
+ for (String s : intersection) {
+ if (leftArgVarCounts.containsKey(s) && leftArgVarCounts.get(s) == 1) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void incrementVarCounts(Set<String> vars) {
+ for (String s : vars) {
+ if (!s.startsWith("-const-") && leftArgVarCounts.containsKey(s)) {
+ leftArgVarCounts.put(s, leftArgVarCounts.get(s) + 1);
+ } else if (!s.startsWith("-const-")) {
+ leftArgVarCounts.put(s, 1);
+ }
+ }
+ }
+
+ private void decrementVarCounts(Set<String> vars) {
+ for (String s : vars) {
+ if (leftArgVarCounts.containsKey(s) && leftArgVarCounts.get(s) > 1) {
+ leftArgVarCounts.put(s, leftArgVarCounts.get(s) - 1);
+ } else {
+ leftArgVarCounts.remove(s);
+ bindingNames.remove(s);
+ assuredBindingNames.remove(s);
+ }
+ }
+ }
+
+ /**
+ *
+ * @param vars
+ * - set of {@link Var} names, possibly contained constants
+ */
+ private Set<String> setWithOutConstants(Set<String> vars) {
+ Set<String> copy = new HashSet<>();
+ for (String s : vars) {
+ if (!s.startsWith("-const-")) {
+ copy.add(s);
+ }
+ }
+
+ return copy;
+ }
+
+ @Override
+ public <X extends Exception> void visit(QueryModelVisitor<X> visitor)
+ throws X {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String toString() {
+ return "FlattenedOptional: " + rightArgs;
+ }
+
+ @Override
+ public FlattenedOptional clone() {
+ return new FlattenedOptional(this);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegment.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegment.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegment.java
new file mode 100644
index 0000000..30f36cf
--- /dev/null
+++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegment.java
@@ -0,0 +1,130 @@
+package mvm.rya.indexing.pcj.matching;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.List;
+import java.util.Set;
+
+import mvm.rya.indexing.external.tupleSet.ExternalTupleSet;
+import mvm.rya.rdftriplestore.inference.DoNotExpandSP;
+import mvm.rya.rdftriplestore.utils.FixedStatementPattern;
+
+import org.openrdf.query.algebra.Filter;
+import org.openrdf.query.algebra.Join;
+import org.openrdf.query.algebra.QueryModelNode;
+import org.openrdf.query.algebra.TupleExpr;
+import org.openrdf.query.algebra.ValueExpr;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
+
+/**
+ * This class represents a portion of a {@link TupleExpr} query that PCJ queries
+ * are compared to. A JoinSegment is comprised of a collection of
+ * {@link QueryModelNode}s that are connected by {@link Join}s. In the case, the
+ * QueryModelNodes can commute within the JoinSegment, which makes JoinSegments
+ * a natural way to partition a query for PCJ matching. A query is decomposed
+ * into JoinSegments and PCJ queries can easily be compared to the {@link QueryModelNode}s
+ * contained in the segment using set operations.
+ *
+ */
+public class JoinSegment extends AbstractQuerySegment {
+
+ public JoinSegment(Join join) {
+ Preconditions.checkNotNull(join);
+ createJoinSegment(join);
+ }
+
+ public JoinSegment(Filter filter) {
+ Preconditions.checkNotNull(filter);
+ createJoinSegment(filter);
+ }
+
+ private void createJoinSegment(TupleExpr te) {
+ orderedNodes = getJoinArgs(te, orderedNodes);
+ unorderedNodes = Sets.newHashSet(orderedNodes);
+ }
+
+ /**
+ * This method matches the ordered nodes returned by
+ * {@link JoinSegment#getOrderedNodes()} for nodeToReplace with a subset of
+ * the ordered nodes for this JoinSegment. The order of the nodes for
+ * nodeToReplace must match the order of the nodes as a subset of
+ * orderedNodes
+ *
+ * @param nodeToReplace
+ * - nodes to be replaced by pcj
+ * @param pcj
+ * - pcj node that will replace specified query nodes
+ */
+ @Override
+ public boolean replaceWithPcj(QuerySegment nodeToReplace,
+ ExternalTupleSet pcj) {
+ Preconditions.checkNotNull(nodeToReplace != null);
+ Preconditions.checkNotNull(pcj);
+ if (!containsQuerySegment(nodeToReplace)) {
+ return false;
+ }
+ Set<QueryModelNode> nodeSet = nodeToReplace.getUnOrderedNodes();
+ orderedNodes.removeAll(nodeSet);
+ orderedNodes.add(pcj);
+ unorderedNodes.removeAll(nodeSet);
+ unorderedNodes.add(pcj);
+ for (QueryModelNode q : nodeSet) {
+ if (q instanceof ValueExpr) {
+ conditionMap.remove(q);
+ }
+ }
+ return true;
+ }
+
+ /**
+ *
+ * @param tupleExpr
+ * - the query object that will be traversed by this method
+ * @param joinArgs
+ * - all nodes connected by Joins and Filters
+ * @return - List containing all nodes connected by Joins, LeftJoins, and
+ * Filters. This List contains the
+ * @param ValueExpr
+ * in place of the Filter
+ */
+ private List<QueryModelNode> getJoinArgs(TupleExpr tupleExpr,
+ List<QueryModelNode> joinArgs) {
+
+ if (tupleExpr instanceof Join) {
+ if (!(((Join) tupleExpr).getLeftArg() instanceof FixedStatementPattern)
+ && !(((Join) tupleExpr).getRightArg() instanceof DoNotExpandSP)) {
+ Join join = (Join) tupleExpr;
+ getJoinArgs(join.getRightArg(), joinArgs);
+ getJoinArgs(join.getLeftArg(), joinArgs);
+ }
+ } else if (tupleExpr instanceof Filter) {
+ Filter filter = (Filter) tupleExpr;
+ joinArgs.add(filter.getCondition());
+ conditionMap.put(filter.getCondition(), filter);
+ getJoinArgs(filter.getArg(), joinArgs);
+ } else {
+ joinArgs.add(tupleExpr);
+ }
+ return joinArgs;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegmentPCJMatcher.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegmentPCJMatcher.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegmentPCJMatcher.java
new file mode 100644
index 0000000..29c4188
--- /dev/null
+++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegmentPCJMatcher.java
@@ -0,0 +1,101 @@
+package mvm.rya.indexing.pcj.matching;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.ArrayList;
+
+import mvm.rya.indexing.external.tupleSet.ExternalTupleSet;
+
+import org.openrdf.query.algebra.Filter;
+import org.openrdf.query.algebra.Join;
+import org.openrdf.query.algebra.QueryModelNode;
+import org.openrdf.query.algebra.TupleExpr;
+import org.openrdf.query.algebra.helpers.QueryModelVisitorBase;
+
+/**
+ * This class is responsible for matching PCJ nodes with subsets of the
+ * {@link QueryModelNode}s found in {@link JoinSegment}s. Each PCJ is reduced to
+ * a bag of QueryModelNodes and set operations can be used to determine if the
+ * PCJ is a subset of the JoinSegment. If it is a subset, the PCJ node replaces
+ * the QueryModelNodes in the JoinSegment.
+ *
+ */
+
+public class JoinSegmentPCJMatcher extends AbstractPCJMatcher {
+
+ public JoinSegmentPCJMatcher(Join join) {
+ segment = new JoinSegment(join);
+ segmentNodeList = new ArrayList<>(segment.getOrderedNodes());
+ pcjToSegment = new PCJToJoinSegment();
+ }
+
+ public JoinSegmentPCJMatcher(Filter filter) {
+ segment = new JoinSegment(filter);
+ segmentNodeList = new ArrayList<>(segment.getOrderedNodes());
+ pcjToSegment = new PCJToJoinSegment();
+ }
+
+ /**
+ * @param pcjNodes
+ * - {@link QueryModelNode}s to be replaced
+ * @param pcj
+ * - the PCJ node to be compared to pcjNodes
+ */
+ @Override
+ public boolean matchPCJ(QuerySegment pcjNodes, ExternalTupleSet pcj) {
+ boolean nodesReplaced = segment.replaceWithPcj(pcjNodes, pcj);
+ if (nodesReplaced) {
+ tupleAndNodesUpToDate = false;
+ segmentNodeList = segment.getOrderedNodes();
+ }
+
+ return nodesReplaced;
+ }
+
+ /**
+ * This class extracts the {@link JoinSegment} from the {@link TupleExpr} of
+ * specified PCJ.
+ *
+ */
+ static class PCJToJoinSegment extends
+ QueryModelVisitorBase<RuntimeException> implements PCJToSegment {
+
+ private JoinSegment segment;
+
+ @Override
+ public QuerySegment getSegment(ExternalTupleSet pcj) {
+ segment = null;
+ pcj.getTupleExpr().visit(this);
+ return segment;
+ }
+
+ @Override
+ public void meet(Join join) {
+ segment = new JoinSegment(join);
+ }
+
+ @Override
+ public void meet(Filter filter) {
+ segment = new JoinSegment(filter);
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegment.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegment.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegment.java
new file mode 100644
index 0000000..ebe5243
--- /dev/null
+++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegment.java
@@ -0,0 +1,146 @@
+package mvm.rya.indexing.pcj.matching;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.List;
+
+import mvm.rya.indexing.external.tupleSet.ExternalTupleSet;
+import mvm.rya.rdftriplestore.inference.DoNotExpandSP;
+import mvm.rya.rdftriplestore.utils.FixedStatementPattern;
+
+import org.openrdf.query.algebra.Filter;
+import org.openrdf.query.algebra.Join;
+import org.openrdf.query.algebra.LeftJoin;
+import org.openrdf.query.algebra.QueryModelNode;
+import org.openrdf.query.algebra.TupleExpr;
+import org.openrdf.query.algebra.ValueExpr;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
+
+/**
+ * An OptionalJoinSegment represents the portion of a {@link TupleExpr} that is
+ * connected by Filters, LeftJoins, and Joins. All nodes in the portion of the
+ * TupleExpr that are connected via these node types are gathered into an
+ * ordered and an unordered list that can easily be compared with
+ * {@link ExternalTupleSet} nodes for sub-query matching to use with Precomputed
+ * Joins.
+ *
+ */
+public class OptionalJoinSegment extends AbstractQuerySegment {
+
+ public OptionalJoinSegment(Join join) {
+ Preconditions.checkNotNull(join);
+ createJoinSegment(join);
+ }
+
+ public OptionalJoinSegment(LeftJoin join) {
+ Preconditions.checkNotNull(join);
+ createJoinSegment(join);
+ }
+
+ public OptionalJoinSegment(Filter filter) {
+ Preconditions.checkNotNull(filter);
+ createJoinSegment(filter);
+ }
+
+ private void createJoinSegment(TupleExpr te) {
+ orderedNodes = getJoinArgs(te, orderedNodes);
+ unorderedNodes = Sets.newHashSet(orderedNodes);
+ }
+
+ /**
+ * This method matches the ordered nodes returned by
+ * {@link JoinSegment #getOrderedNodes()} for nodeToReplace with a subset of
+ * the ordered nodes for this JoinSegment. The order of the nodes for
+ * nodeToReplace must match the order of the nodes as a subset of
+ * orderedNodes
+ *
+ * @param nodeToReplace
+ * - nodes to be replaced by pcj
+ * @param pcj
+ * - pcj node that will replace specified query nodes
+ */
+ @Override
+ public boolean replaceWithPcj(QuerySegment nodeToReplace,
+ ExternalTupleSet pcj) {
+ Preconditions.checkNotNull(nodeToReplace != null);
+ Preconditions.checkNotNull(pcj);
+ if (!containsQuerySegment(nodeToReplace)) {
+ return false;
+ }
+ List<QueryModelNode> nodeList = nodeToReplace.getOrderedNodes();
+ int begin = orderedNodes.indexOf(nodeList.get(0));
+ // TODO this assumes no duplicate nodes
+ if (begin < 0
+ || begin + nodeList.size() > orderedNodes.size()
+ || !nodeList.equals(orderedNodes.subList(begin, begin
+ + nodeList.size()))) {
+ return false;
+ }
+ orderedNodes.removeAll(nodeList);
+ orderedNodes.add(begin, pcj);
+ unorderedNodes.removeAll(nodeList);
+ unorderedNodes.add(pcj);
+ for (QueryModelNode q : nodeList) {
+ if (q instanceof ValueExpr) {
+ conditionMap.remove(q);
+ }
+ }
+ return true;
+ }
+
+ /**
+ *
+ * @param tupleExpr
+ * - the query object that will be traversed by this method
+ * @param joinArgs
+ * - all nodes connected by Joins, LeftJoins, and Filters
+ * @return - List containing all nodes connected by Joins, LeftJoins, and
+ * Filters. This List contains the {@link ValueExpr} in place of the
+ * Filter and a {@link FlattenedOptional} in place of the LeftJoin
+ * for ease of comparison with PCJ nodes.
+ */
+ private List<QueryModelNode> getJoinArgs(TupleExpr tupleExpr,
+ List<QueryModelNode> joinArgs) {
+
+ if (tupleExpr instanceof Join) {
+ if (!(((Join) tupleExpr).getLeftArg() instanceof FixedStatementPattern)
+ && !(((Join) tupleExpr).getRightArg() instanceof DoNotExpandSP)) {
+ Join join = (Join) tupleExpr;
+ getJoinArgs(join.getRightArg(), joinArgs);
+ getJoinArgs(join.getLeftArg(), joinArgs);
+ }
+ } else if (tupleExpr instanceof LeftJoin) {
+ LeftJoin lj = (LeftJoin) tupleExpr;
+ joinArgs.add(new FlattenedOptional(lj));
+ getJoinArgs(lj.getLeftArg(), joinArgs);
+ } else if (tupleExpr instanceof Filter) {
+ Filter filter = (Filter) tupleExpr;
+ joinArgs.add(filter.getCondition());
+ conditionMap.put(filter.getCondition(), filter);
+ getJoinArgs(filter.getArg(), joinArgs);
+ } else {
+ joinArgs.add(tupleExpr);
+ }
+ return joinArgs;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegmentPCJMatcher.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegmentPCJMatcher.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegmentPCJMatcher.java
new file mode 100644
index 0000000..37f6867
--- /dev/null
+++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegmentPCJMatcher.java
@@ -0,0 +1,142 @@
+package mvm.rya.indexing.pcj.matching;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import mvm.rya.indexing.external.tupleSet.ExternalTupleSet;
+
+import org.openrdf.query.algebra.Filter;
+import org.openrdf.query.algebra.Join;
+import org.openrdf.query.algebra.LeftJoin;
+import org.openrdf.query.algebra.QueryModelNode;
+import org.openrdf.query.algebra.helpers.QueryModelVisitorBase;
+
+/**
+ * This class matches PCJ queries to sub-queries of a given
+ * {@link OptionalJoinSegment}. A match will occur when the
+ * {@link QueryModelNode}s of the PCJ can be grouped together
+ * in the OptionalJoinSegment and ordered to match the PCJ query.
+ *
+ */
+
+public class OptionalJoinSegmentPCJMatcher extends AbstractPCJMatcher {
+
+ public OptionalJoinSegmentPCJMatcher(Join join) {
+ segment = new OptionalJoinSegment(join);
+ segmentNodeList = new ArrayList<>(segment.getOrderedNodes());
+ pcjToSegment = new PCJToOptionalJoinSegment();
+ }
+
+ public OptionalJoinSegmentPCJMatcher(LeftJoin join) {
+ segment = new OptionalJoinSegment(join);
+ segmentNodeList = new ArrayList<>(segment.getOrderedNodes());
+ pcjToSegment = new PCJToOptionalJoinSegment();
+ }
+
+ public OptionalJoinSegmentPCJMatcher(Filter filter) {
+ segment = new OptionalJoinSegment(filter);
+ segmentNodeList = new ArrayList<>(segment.getOrderedNodes());
+ pcjToSegment = new PCJToOptionalJoinSegment();
+ }
+
+ /**
+ * @param pcjNodes - {@link QuerySegment} to be replaced by PCJ
+ * @param pcj - PCJ to replace matchin QuerySegment
+ */
+ @Override
+ public boolean matchPCJ(QuerySegment pcjNodes, ExternalTupleSet pcj) {
+
+ if(!segment.containsQuerySegment(pcjNodes)) {
+ return false;
+ }
+ List<QueryModelNode> consolidatedNodes = groupNodesToMatchPCJ(getOrderedNodes(), pcjNodes.getOrderedNodes());
+ if(consolidatedNodes.size() == 0) {
+ return false;
+ }
+
+ //set segment nodes to the consolidated nodes to match pcj
+ segment.setNodes(consolidatedNodes);
+ boolean nodesReplaced = segment.replaceWithPcj(pcjNodes, pcj);
+
+ //if pcj nodes replaced queryNodes, update segmentNodeList
+ //otherwise restore segment nodes back to original pre-consolidated state
+ if(nodesReplaced) {
+ segmentNodeList = segment.getOrderedNodes();
+ tupleAndNodesUpToDate = false;
+ } else {
+ segment.setNodes(segmentNodeList);
+ }
+
+ return nodesReplaced;
+ }
+
+ /**
+ *
+ * @param queryNodes - query nodes to be compared to pcj for matching
+ * @param pcjNodes - pcj nodes to match to query
+ * @return - query nodes with pcj nodes grouped together (if possible), otherwise return
+ * an empty list.
+ */
+ private List<QueryModelNode> groupNodesToMatchPCJ(List<QueryModelNode> queryNodes, List<QueryModelNode> pcjNodes) {
+ PCJNodeConsolidator pnc = new PCJNodeConsolidator(queryNodes, pcjNodes);
+ boolean canConsolidate = pnc.consolidateNodes();
+ if(canConsolidate) {
+ return pnc.getQueryNodes();
+ }
+ return new ArrayList<QueryModelNode>();
+ }
+
+
+ /**
+ * This class extracts the {@link OptionalJoinSegment} of PCJ query.
+ *
+ */
+ static class PCJToOptionalJoinSegment extends QueryModelVisitorBase<RuntimeException> implements PCJToSegment {
+
+ private OptionalJoinSegment segment;
+
+ @Override
+ public QuerySegment getSegment(ExternalTupleSet pcj) {
+ segment = null;
+ pcj.getTupleExpr().visit(this);
+ return segment;
+ }
+
+ @Override
+ public void meet(Join join) {
+ segment = new OptionalJoinSegment(join);
+ }
+
+ @Override
+ public void meet(Filter filter) {
+ segment = new OptionalJoinSegment(filter);
+ }
+
+ @Override
+ public void meet(LeftJoin node) {
+ segment = new OptionalJoinSegment(node);
+ }
+
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcher.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcher.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcher.java
new file mode 100644
index 0000000..d98d367
--- /dev/null
+++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcher.java
@@ -0,0 +1,76 @@
+package mvm.rya.indexing.pcj.matching;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.List;
+import java.util.Set;
+
+import mvm.rya.indexing.external.tupleSet.ExternalTupleSet;
+
+import org.openrdf.query.algebra.Filter;
+import org.openrdf.query.algebra.QueryModelNode;
+import org.openrdf.query.algebra.TupleExpr;
+
+/**
+ * This interface provides a framework for matching PCJ {@link ExternalTupleSet}s
+ * to subsets of a given {@link QuerySegment}.
+ *
+ */
+public interface PCJMatcher {
+
+ /**
+ *
+ * @param pcjNodes - QuerySegment representation of PCJ to be used for matching
+ * @param pcj - {@link ExternalTupleSet} used to replace matching PCJ nodes when match occurs
+ * @return - true is match and replace occurs and false otherwise
+ */
+ public boolean matchPCJ(QuerySegment pcjNodes, ExternalTupleSet pcj);
+
+ /**
+ *
+ * @param pcj - {@link ExternalTupleSet} used to replace matching PCJ nodes when match occurs
+ * @return - true is match and replace occurs and false otherwise
+ */
+ public boolean matchPCJ(ExternalTupleSet pcj);
+
+ /**
+ * @return - TupleExpr constructed from {@link QuerySegment} with matched nodes
+ */
+ public TupleExpr getQuery();
+
+ /**
+ *
+ * @return - all {@link TupleExpr} that haven't been matched to a PCJ
+ */
+ public Set<TupleExpr> getUnmatchedArgs();
+
+ /**
+ *
+ * @return - provided ordered view of QuerySegment nodes
+ */
+ public List<QueryModelNode> getOrderedNodes();
+
+ /**
+ *
+ * @return - Set of {@link Filter}s of given QuerySegment
+ */
+ public Set<Filter> getFilters();
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcherFactory.java
----------------------------------------------------------------------
diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcherFactory.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcherFactory.java
new file mode 100644
index 0000000..3a42a68
--- /dev/null
+++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcherFactory.java
@@ -0,0 +1,73 @@
+package mvm.rya.indexing.pcj.matching;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.openrdf.query.algebra.Filter;
+import org.openrdf.query.algebra.Join;
+import org.openrdf.query.algebra.LeftJoin;
+import org.openrdf.query.algebra.Projection;
+import org.openrdf.query.algebra.TupleExpr;
+
+/**
+ * This class takes in a given {@link Join}, {@Filter}, or {@link LeftJoin}
+ * and provides the appropriate {@link PCJMatcher} to match PCJs to the
+ * given query.
+ *
+ */
+
+public class PCJMatcherFactory {
+
+ public static PCJMatcher getPCJMatcher(Join join) {
+ if (segmentContainsLeftJoins(join)) {
+ return new OptionalJoinSegmentPCJMatcher(join);
+ } else {
+ return new JoinSegmentPCJMatcher(join);
+ }
+ }
+
+ public static PCJMatcher getPCJMatcher(LeftJoin join) {
+ return new OptionalJoinSegmentPCJMatcher(join);
+ }
+
+ public static PCJMatcher getPCJMatcher(Filter filter) {
+ if (segmentContainsLeftJoins(filter)) {
+ return new OptionalJoinSegmentPCJMatcher(filter);
+ } else {
+ return new JoinSegmentPCJMatcher(filter);
+ }
+ }
+
+ private static boolean segmentContainsLeftJoins(TupleExpr tupleExpr) {
+
+ if (tupleExpr instanceof Projection) {
+ return segmentContainsLeftJoins(((Projection) tupleExpr).getArg());
+ } else if (tupleExpr instanceof Join) {
+ Join join = (Join) tupleExpr;
+ return segmentContainsLeftJoins(join.getRightArg())
+ || segmentContainsLeftJoins(join.getLeftArg());
+ } else if (tupleExpr instanceof LeftJoin) {
+ return true;
+ } else if (tupleExpr instanceof Filter) {
+ return segmentContainsLeftJoins(((Filter) tupleExpr).getArg());
+ } else {
+ return false;
+ }
+ }
+}