You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/06/26 11:46:34 UTC
[4/5] jena git commit: Separate out query clean up based on syntax
rewrite.
Separate out query clean up based on syntax rewrite.
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/982a2398
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/982a2398
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/982a2398
Branch: refs/heads/master
Commit: 982a2398a45baa24b513c4910961ab58feb51f02
Parents: 3cca150
Author: Andy Seaborne <an...@apache.org>
Authored: Fri Jun 26 10:40:27 2015 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Fri Jun 26 10:40:27 2015 +0100
----------------------------------------------------------------------
.../apache/jena/sparql/algebra/OpAsQuery.java | 228 +++++++------------
1 file changed, 81 insertions(+), 147 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/982a2398/jena-arq/src/main/java/org/apache/jena/sparql/algebra/OpAsQuery.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/algebra/OpAsQuery.java b/jena-arq/src/main/java/org/apache/jena/sparql/algebra/OpAsQuery.java
index edb216c..85ca6d1 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/algebra/OpAsQuery.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/algebra/OpAsQuery.java
@@ -31,8 +31,6 @@ import org.apache.jena.query.SortCondition ;
import org.apache.jena.query.Syntax ;
import org.apache.jena.sparql.ARQInternalErrorException ;
import org.apache.jena.sparql.ARQNotImplemented ;
-import org.apache.jena.sparql.algebra.Op ;
-import org.apache.jena.sparql.algebra.OpVisitor ;
import org.apache.jena.sparql.algebra.op.* ;
import org.apache.jena.sparql.core.BasicPattern ;
import org.apache.jena.sparql.core.Quad ;
@@ -43,20 +41,40 @@ import org.apache.jena.sparql.expr.* ;
import org.apache.jena.sparql.expr.aggregate.Aggregator ;
import org.apache.jena.sparql.pfunction.PropFuncArg ;
import org.apache.jena.sparql.syntax.* ;
+import org.apache.jena.sparql.syntax.syntaxtransform.ElementTransform ;
+import org.apache.jena.sparql.syntax.syntaxtransform.ElementTransformCleanGroupsOfOne ;
+import org.apache.jena.sparql.syntax.syntaxtransform.ElementTransformer ;
+import org.apache.jena.sparql.syntax.syntaxtransform.ExprTransformApplyElementTransform ;
import org.apache.jena.sparql.util.graph.GraphList ;
import org.apache.jena.vocabulary.RDF ;
/**
* Convert an Op expression in SPARQL syntax, that is, the reverse of algebra
- * generation
+ * generation.
+ * <p>
+ * The contract is to return an "equivalent query" - generates the same answers -
+ * to the original query that generated the algebra.
+ * That may be the same query (the code aims for this, assuming the original query
+ * didn't have additonal, unnecessary {}),
+ * different queries with the same alegra forms,
+ * or different equivalent queries - same answers, different algebra -
+ * usually where extra {} are added in and not easiely cleaned out.
+ * <p>
+ * Some attempt is made to handle algrebra expressions with operators from the optimizer.
+ * <p>
+ * It is possible to build algrebra expressions directly for which there is no SPARQL query
+ * that generates that algebra. This code may produce an equivalent query but that is
+ * not gauranteed.
*/
public class OpAsQuery {
+ // Query cleaning is done in fixupGroupsOfOne by applying an ElementTransform.
- // slice-distinct/reduce-project-order-filter[having]-extend*[AS and aggregate naming]-group-pattern
- // SELECT { ?s ?p ?o FILTER ( ?o > 5 ) ; }
- // OpTopN
+ // TODO OpTopN (which is an optimizer additional algebra operator).
+
+ static class /* struct */ QueryLevelDetails {
+ // The stack of processing in a query is:
+ // slice-distinct/reduce-project-order-filter[having]-extend*[AS and aggregate naming]-group-pattern
- public static class /* struct */QueryLevelDetails {
OpSlice opSlice = null ;
OpDistinct opDistinct = null ;
OpReduced opReduced = null ;
@@ -65,12 +83,14 @@ public class OpAsQuery {
OpFilter opHaving = null ;
List<OpExtend> opExtends = new ArrayList<>() ;
OpGroup opGroup = null ;
- // The pattern of the group or query if not grouped.
+ // End of the modifiers.
+ // The pattern of the group or query itself if not grouped.
Op pattern = null ;
private QueryLevelDetails() {}
- public void info() {
+ // Debugging help.
+ void info() {
if ( opSlice != null )
System.out.printf("slice: (%d, %d)\n", opSlice.getStart(), opSlice.getLength()) ;
if ( opDistinct != null )
@@ -122,15 +142,6 @@ public class OpAsQuery {
}
// Lookahead to see if an opGroup can be found.
- // Because inner SELECTs must have a project, the case of running
- // into an inner-SELECT-group does not occur.
- // And even if it did, either the inner-SELECT is part of a
- // group-pattern
- // (the {...} as in { { SELECT ... } ?s ?p ?o }
- // (so we'd see the othe parts of the pattern as intermediates
- // or if it's a singleton, collape the inner select into the outer
- // to give an equivalent query in a different form.
-
details.opGroup = getGroup(op) ;
if ( details.opGroup == null ) {
op = processExtend(op, details.opExtends) ;
@@ -143,8 +154,8 @@ public class OpAsQuery {
details.opHaving = (OpFilter)op ;
op = details.opHaving.getSubOp() ;
}
- // Can't tell is just "aggregation" except by looking at the assignment
- // variables.
+ // Can't tell if it's an "aggregation" except by looking at the
+ // assignment variables.
// AS and aggregate renames.
op = processExtend(op, details.opExtends) ;
@@ -166,7 +177,7 @@ public class OpAsQuery {
return op ;
}
/**
- * Allows multiple filters and any number of extend - good or bad?
+ * Allows multiple filters and any number of extend
*/
private static OpGroup getGroup(Op op) {
// Unwind tail recursion to protected against extreme queries.
@@ -198,8 +209,6 @@ public class OpAsQuery {
private Element element = null ;
private ElementGroup currentGroup = null ;
private Deque<ElementGroup> stack = new ArrayDeque<>() ;
- private int groupDepth = 0 ;
- private boolean inProject = false ;
private boolean hasRun = false ;
public Converter(Op op) {
@@ -236,9 +245,9 @@ public class OpAsQuery {
QueryLevelDetails level = QueryLevelDetails.analyse(queryOp) ;
processQueryPattern(level) ;
- // Modifiers.
+ // Modifier stack.
// slice-distinct/reduce-project-order-filter[having]-extend[AS]-extend[agg]-group-pattern
- // Do in reverse order because e.g. exts have effects on project.
+ // Do as executed (the reverse order) because e.g. extends have effects on project.
// Substitution mapping
Map<ExprVar, Expr> aggVarExprMap = new HashMap<>() ;
@@ -272,7 +281,6 @@ public class OpAsQuery {
}) ;
}
-
if ( level.opHaving != null ) {
level.opHaving.getExprs().getList().forEach(expr -> {
expr = rewrite(expr, varToExpr) ;
@@ -289,9 +297,8 @@ public class OpAsQuery {
else
query.addOrderBy(new SortCondition(expr, sc.getDirection())) ;
}) ;
- // level.opOrder.getConditions().forEach(sc->query.addOrderBy(sc))
- // ;
}
+
if ( level.opProject != null ) {
level.opProject.getVars().forEach(v -> {
if ( assignments.containsKey(v) ) {
@@ -300,10 +307,8 @@ public class OpAsQuery {
query.getProjectVars().add(v) ;
}) ;
- } else {
- // Insert BIND for any (extends) and no project happsn in processQueryPattern.
+ } else
query.setQueryResultStar(true) ;
- }
if ( level.opDistinct != null )
query.setDistinct(true) ;
@@ -334,38 +339,44 @@ public class OpAsQuery {
return ExprTransformer.transform(transform, expr) ;
}
+
+ // processQueryPattern : query.setQueryPattern
+
private void processQueryPattern(QueryLevelDetails level) {
Op op = level.pattern ;
op.visit(this) ;
ElementGroup eg = this.currentGroup ;
- Element e = fixupSubQueryOfOne(eg) ;
+ Element e = fixupGroupsOfOne(eg) ;
query.setQueryPattern(e) ;
query.setQuerySelectType() ;
}
- // Without level: This is SLEECT * { ... BIND (..) }
- // which is (extend ...
+ // Can't distinguish
+ // SELECT * { ... BIND ( ?v AS ...) }
+ // from
+ // SELECT ( ?v AS ...) { ... }.
+ // They have the same algebra.
+ // This code chooses to use the second form.
private void processQueryPattern(Op op, List<OpExtend> assignments) {
op.visit(this) ;
-
ElementGroup eg = this.currentGroup ;
processExtends(assignments,(v,e)->eg.addElement(new ElementBind(v, e)) ) ;
-
- Element e = fixupSubQueryOfOne(eg) ;
+ Element e = fixupGroupsOfOne(eg) ;
query.setQueryPattern(e) ;
query.setQuerySelectType() ;
}
- private Element fixupSubQueryOfOne(ElementGroup eg) {
- // Simplify currentGroup if possible, primarily look for the case of
- // a single sub-query which will mean we have an ElementGroup with a single
- // item which is ElementSubQuery.
- if ( eg.size() != 1 )
- return eg ;
- Element e = eg.get(0) ;
- if ( e instanceof ElementSubQuery )
- return e ;
- return eg ;
+ private Element fixupGroupsOfOne(ElementGroup eg) {
+ ElementTransform transform = new ElementTransformCleanGroupsOfOne() ;
+ ExprTransform exprTransform = new ExprTransformApplyElementTransform(transform) ;
+ Element el2 = ElementTransformer.transform(eg, transform, exprTransform) ;
+ // Top level is always a group or a subquery.
+ if ( ! ( el2 instanceof ElementGroup ) && ! ( el2 instanceof ElementSubQuery ) ) {
+ ElementGroup eg2 = new ElementGroup() ;
+ eg2.addElement(el2);
+ el2 = eg2 ;
+ }
+ return el2 ;
}
Element asElement(Op op) {
@@ -386,12 +397,6 @@ public class OpAsQuery {
currentGroup().addElement(process(opBGP.getPattern())) ;
}
- // public void visit(OpPropFunc opPropFunc)
- // {
- // OpBGP opBGP = opPropFunc.getBGP() ;
- // currentGroup().addElement(process(opBGP.getPattern())) ;
- // }
-
@Override
public void visit(OpTriple opTriple) {
currentGroup().addElement(process(opTriple.getTriple())) ;
@@ -430,11 +435,9 @@ public class OpAsQuery {
}
// There is one special case to consider:
- // A path expression was expaned into a OpSequence during Algenra
- // generation.
- // The simple path expressions become an OpSequence that could be
- // recombined
- // into on ElementPathBlock
+ // A path expression was expanded into a OpSequence during Algenra
+ // generation. The simple path expressions become an OpSequence that could be
+ // recombined into an ElementPathBlock
@Override
public void visit(OpSequence opSequence) {
@@ -461,8 +464,8 @@ public class OpAsQuery {
}
private Element process(BasicPattern pattern) {
- // The different SPARQL versions use different internal structures
- // for BGPs.
+ // The different SPARQL versions (1.0, 1.1) use different internal
+ // structures for BGPs.
if ( query.getSyntax() == Syntax.syntaxSPARQL_10 ) {
ElementTriplesBlock e = new ElementTriplesBlock() ;
for ( Triple t : pattern )
@@ -482,7 +485,6 @@ public class OpAsQuery {
}
throw new ARQInternalErrorException("Unrecognized syntax: " + query.getSyntax()) ;
-
}
private ElementTriplesBlock process(Triple triple) {
@@ -535,6 +537,7 @@ public class OpAsQuery {
Element eRight = eRightGroup ;
// Very special case. If the RHS is not something that risks
// reparsing into a copmbined element of a group, strip the group-of-one.
+ // See also ElementTransformCleanGroupsOfOne
if ( eRightGroup.size() == 1 ) {
// This always was a {} around it but it's unnecessary in a group of one.
if ( eRightGroup.get(0) instanceof ElementSubQuery )
@@ -616,7 +619,7 @@ public class OpAsQuery {
elUnion.addElement(eRight) ;
return ;
}
-
+ // Multiple unions.
// if ( eRight instanceof ElementUnion )
// {
// ElementUnion elUnion = (ElementUnion)eRight ;
@@ -719,10 +722,6 @@ public class OpAsQuery {
opLabel.getSubOp().visit(this) ;
}
- private void newLevel(Op op) {
- convertAsSubQuery(op) ;
- }
-
@Override
public void visit(OpAssign opAssign) {
Element e = asElement(opAssign.getSubOp()) ;
@@ -748,33 +747,12 @@ public class OpAsQuery {
opList.getSubOp().visit(this) ;
}
- @Override
- public void visit(OpOrder opOrder) {
- newLevel(opOrder) ;
- // List<SortCondition> x = opOrder.getConditions() ;
- // for ( SortCondition sc : x )
- // query.addOrderBy(sc);
- // opOrder.getSubOp().visit(this) ;
- }
+ // When some modifers (e.g. OpDistinct) are met in a pattern, they signal
+ // a new query level (new innser SELECT), where we switch back to
+ // looking for the level start in
- @Override
- public void visit(OpProject opProject) {
- newLevel(opProject) ;
- // if (inProject) {
- // // If we've already inside a project then we are reconstructing a
- // sub-query
- // // Create a new converter and call on the sub-op to get the
- // sub-query
- // convertAsSubQuery(opProject);
- // } else {
- // // Defer adding result vars until the end.
- // // OpGroup generates dupes otherwise
- // this.projectVars = allocProjectVars() ;
- // this.projectVars.addAll(opProject.getVars());
- // inProject = true;
- // opProject.getSubOp().visit(this) ;
- // inProject = false;
- // }
+ private void newLevel(Op op) {
+ convertAsSubQuery(op) ;
}
private void convertAsSubQuery(Op op) {
@@ -785,73 +763,33 @@ public class OpAsQuery {
}
@Override
+ public void visit(OpOrder opOrder) {
+ newLevel(opOrder) ;
+ }
+
+ @Override
+ public void visit(OpProject opProject) {
+ newLevel(opProject) ;
+ }
+
+ @Override
public void visit(OpReduced opReduced) {
newLevel(opReduced) ;
- // if (inProject) {
- // convertAsSubQuery(opReduced);
- // } else {
- // query.setReduced(true) ;
- // opReduced.getSubOp().visit(this) ;
- // }
}
@Override
public void visit(OpDistinct opDistinct) {
newLevel(opDistinct) ;
- // if (inProject) {
- // convertAsSubQuery(opDistinct);
- // } else {
- // query.setDistinct(true) ;
- // opDistinct.getSubOp().visit(this) ;
- // }
}
@Override
public void visit(OpSlice opSlice) {
newLevel(opSlice) ;
- // if (inProject) {
- // convertAsSubQuery(opSlice);
- // } else {
- // if ( opSlice.getStart() != Query.NOLIMIT )
- // query.setOffset(opSlice.getStart()) ;
- // if ( opSlice.getLength() != Query.NOLIMIT )
- // query.setLimit(opSlice.getLength()) ;
- // opSlice.getSubOp().visit(this) ;
- // }
}
@Override
public void visit(OpGroup opGroup) {
newLevel(opGroup) ;
- // List<ExprAggregator> a = opGroup.getAggregators();
- //
- // // Aggregators are broken up in the algebra, split between a
- // // group and an assignment (extend or assign) using a generated
- // var.
- // // We record them here and insert later.
- // for (ExprAggregator ea : a) {
- // // Substitute generated var for actual
- // Var givenVar = ea.getAggVar().asVar();
- // // Copy aggregator across (?)
- // Expr myAggr = query.allocAggregate(ea.getAggregator());
- // varExpression.put(givenVar, myAggr);
- // }
- //
- // VarExprList b = opGroup.getGroupVars();
- // for (Var v : b.getVars()) {
- // Expr e = b.getExpr(v);
- //
- // if (e != null) {
- // query.addGroupBy(v, e);
- //
- // } else {
- // query.addGroupBy(v);
- //
- // }
- // }
- // groupDepth++;
- // opGroup.getSubOp().visit(this);
- // groupDepth--;
}
@Override
@@ -899,9 +837,5 @@ public class OpAsQuery {
private void push(ElementGroup el) {
stack.push(el) ;
}
-
- private boolean inTopLevel() {
- return stack.size() == 0 ;
- }
}
-}
+}
\ No newline at end of file