You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2014/04/20 16:27:14 UTC
svn commit: r1588767 - in /jena/trunk/jena-arq/src:
main/java/com/hp/hpl/jena/sparql/engine/main/
main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/
test/java/com/hp/hpl/jena/sparql/algebra/optimize/
Author: andy
Date: Sun Apr 20 14:27:13 2014
New Revision: 1588767
URL: http://svn.apache.org/r1588767
Log:
JENA-685: Update the fixed weight BGP reorder code
Modified:
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/StageGeneratorGeneric.java
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderFixed.java
jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderLib.java
jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TS_Optimization.java
Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/StageGeneratorGeneric.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/StageGeneratorGeneric.java?rev=1588767&r1=1588766&r2=1588767&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/StageGeneratorGeneric.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/StageGeneratorGeneric.java Sun Apr 20 14:27:13 2014
@@ -18,180 +18,47 @@
package com.hp.hpl.jena.sparql.engine.main;
-import static com.hp.hpl.jena.sparql.engine.optimizer.reorder.PatternElements.TERM ;
import org.apache.jena.atlas.logging.Log ;
import com.hp.hpl.jena.graph.Graph ;
-import com.hp.hpl.jena.graph.GraphStatisticsHandler ;
-import com.hp.hpl.jena.graph.Node ;
-import com.hp.hpl.jena.mem.GraphMem ;
import com.hp.hpl.jena.sparql.core.BasicPattern ;
import com.hp.hpl.jena.sparql.engine.ExecutionContext ;
import com.hp.hpl.jena.sparql.engine.QueryIterator ;
import com.hp.hpl.jena.sparql.engine.iterator.QueryIterBlockTriples ;
-import com.hp.hpl.jena.sparql.engine.optimizer.reorder.* ;
+import com.hp.hpl.jena.sparql.engine.optimizer.reorder.ReorderFixed ;
+import com.hp.hpl.jena.sparql.engine.optimizer.reorder.ReorderLib ;
+import com.hp.hpl.jena.sparql.engine.optimizer.reorder.ReorderTransformation ;
import com.hp.hpl.jena.sparql.mgt.Explain ;
import com.hp.hpl.jena.sparql.util.Utils ;
/** Generic - always works - StageGenerator */
-public class StageGeneratorGeneric implements StageGenerator
-{
+public class StageGeneratorGeneric implements StageGenerator {
public StageGeneratorGeneric() {}
+ private static final ReorderTransformation reorderFixed = ReorderLib.fixed() ;
@Override
- public QueryIterator execute(BasicPattern pattern,
- QueryIterator input,
- ExecutionContext execCxt)
- {
+ public QueryIterator execute(BasicPattern pattern, QueryIterator input, ExecutionContext execCxt) {
if ( input == null )
- Log.fatal(this, "Null input to "+Utils.classShortName(this.getClass())) ;
+ Log.fatal(this, "Null input to " + Utils.classShortName(this.getClass())) ;
- Graph graph = execCxt.getActiveGraph() ;
+ Graph graph = execCxt.getActiveGraph() ;
// Choose reorder transformation and execution strategy.
-
- final ReorderTransformation reorder ;
- final StageGenerator executor ;
-
- if ( graph instanceof GraphMem ) // Jena in-memory graph
- {
- reorder = reorderBasicStats(graph) ;
- executor = StageBuilder.executeInline ; ;
- }
- else
- {
- // When in doubt ... use the general pass-through to graph query handler matcher.
- // Includes union graphs, InfGraphs and other composite or unusual kinds.
- reorder = null ;
- executor = StageBuilder.executeInline ;
- }
+
+ ReorderTransformation reorder = reorderFixed ;
+ StageGenerator executor = StageBuilder.executeInline ;
return execute(pattern, reorder, executor, input, execCxt) ;
}
- protected QueryIterator execute(BasicPattern pattern,
- ReorderTransformation reorder,
- StageGenerator execution,
- QueryIterator input,
- ExecutionContext execCxt)
+ protected QueryIterator execute(BasicPattern pattern, ReorderTransformation reorder, StageGenerator execution,
+ QueryIterator input, ExecutionContext execCxt)
{
-
Explain.explain(pattern, execCxt.getContext()) ;
-
- if ( reorder != null )
- {
+ if ( reorder != null ) {
pattern = reorder.reorder(pattern) ;
Explain.explain("Reorder", pattern, execCxt.getContext()) ;
}
-
- return execution.execute(pattern, input, execCxt) ;
- }
-
- // ---- Reorder policies
-
- // Fixed - Variable counting only.
- private static ReorderTransformation reorderFixed() { return ReorderLib.fixed() ; }
-
- // Uses Jena's statistics handler.
- private static ReorderTransformation reorderBasicStats(Graph graph)
- {
- GraphStatisticsHandler stats = graph.getStatisticsHandler() ;
- if ( stats == null )
- return reorderFixed() ;
- return new ReorderStatsHandler(graph, graph.getStatisticsHandler()) ;
- }
-
- /** Use the inline BGP matcher */
- public static QueryIterator executeInline(BasicPattern pattern, QueryIterator input, ExecutionContext execCxt)
- {
return QueryIterBlockTriples.create(input, pattern, execCxt) ;
}
-
- /** Reorder a basic graph pattern using a graph statistic handler */
- private static class ReorderStatsHandler extends ReorderTransformationSubstitution
- {
- static ReorderFixed fixed = (ReorderFixed)reorderFixed() ; // We need our own copy to call into.
-
- // Guesses at the selectivity of fixed, but unknown, values.
- // Choose these for large graphs because bad guesses don't harm small graphs.
-
- final long TERM_S ; // Used for S ? ? if no stats
- final long TERM_TYPE ;
- final long TERM_P ; // Used for ? P ? if no stats
- final long TERM_O ; // Used for ? ? O if no stats
- final long N ;
-
- private GraphStatisticsHandler stats ;
-
- ReorderStatsHandler(Graph graph, GraphStatisticsHandler stats)
- {
- this.stats = stats ;
- N = graph.size() ;
- // Note: when these are too badly wrong, the app can supply a statistics file.
- TERM_S = 10 ; // Wild guess: "An average subject has 10 properties".
- TERM_P = N/10 ; // Wild guess: "An average vocabulary has 10 properties"
- TERM_O = 20 ; // Wild guess: "An average object is in 20 triples".
- TERM_TYPE = N/10 ; // Wild guess: "An average class has 1/10 of the resources."
- }
-
- @Override
- protected double weight(PatternTriple pt)
- {
- double x = fixed.weight(pt) ;
- // If there are two fixed terms, use the fixed weighting, all of which are quite small.
- // This chooses a less optimal triple but the worse choice is still a very selective choice.
- // One case is IFPs: the multi term choice for PO is not 1.
- if ( x < ReorderFixed.MultiTermMax )
- {
- // Rescale it from the fixed numbers of ReorderFixed
- //x = ReorderFixed.MultiTermSampleSize * x / N ;
- }
- else
- x = weight1(pt) ;
-
- //System.out.printf("** %s: --> %s\n", pt, x) ;
- return x ;
- }
-
-
- private double weight1(PatternTriple pt)
- {
- // One or zero fixed terms.
-
- long S = -1 ;
- long P = -1 ;
- long O = -1 ;
-
- // Include guesses for SP, OP, typeClass
-
- if ( pt.subject.isNode() )
- S = stats.getStatistic(pt.subject.getNode(), Node.ANY, Node.ANY) ;
- else if ( TERM.equals(pt.subject) )
- S = TERM_S ;
-
- // rdf:type.
- if ( pt.predicate.isNode() )
- P = stats.getStatistic(Node.ANY, pt.predicate.getNode(), Node.ANY) ;
- else if ( TERM.equals(pt.predicate) )
- P = TERM_P ;
-
- if ( pt.object.isNode() )
- O = stats.getStatistic(Node.ANY, Node.ANY, pt.object.getNode()) ;
- else if ( TERM.equals(pt.object) )
- O = TERM_O ;
-
- if ( S == 0 || P == 0 || O == 0 )
- // Can't match.
- return 0 ;
-
- // Find min positive
- double x = -1 ;
- if ( S > 0 ) x = S ;
- if ( P > 0 && P < x ) x = P ;
- if ( O > 0 && O < x ) x = O ;
- //System.out.printf("** [%d, %d, %d]\n", S, P ,O) ;
-
- return x ;
- }
- }
}
Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderFixed.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderFixed.java?rev=1588767&r1=1588766&r2=1588767&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderFixed.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderFixed.java Sun Apr 20 14:27:13 2014
@@ -18,50 +18,92 @@
package com.hp.hpl.jena.sparql.engine.optimizer.reorder;
-import static com.hp.hpl.jena.sparql.engine.optimizer.reorder.PatternElements.TERM ;
-import static com.hp.hpl.jena.sparql.engine.optimizer.reorder.PatternElements.VAR ;
-
import com.hp.hpl.jena.sparql.engine.optimizer.Pattern ;
import com.hp.hpl.jena.sparql.engine.optimizer.StatsMatcher ;
+import com.hp.hpl.jena.sparql.engine.optimizer.reorder.PatternTriple ;
+import com.hp.hpl.jena.sparql.engine.optimizer.reorder.ReorderTransformationSubstitution ;
import com.hp.hpl.jena.sparql.graph.NodeConst ;
import com.hp.hpl.jena.sparql.sse.Item ;
+import static com.hp.hpl.jena.sparql.engine.optimizer.reorder.PatternElements.* ;
-public class ReorderFixed extends ReorderTransformationSubstitution
-{
- ReorderFixed() {}
- // Fixed scheme for when we have no stats.
- // It chooses a triple pattern by order of preference.
-
- private static Item type = Item.createNode(NodeConst.nodeRDFType) ;
+/** Fixed scheme for choosing based on the triple patterns, without
+ * looking at the data. It gives a weight to a triple, with more grounded terms
+ * being considered better. It weights against rdf:type because that can be
+ * very unselective (e.g. ?x rdf:type rdf:Resource)
+ */
+public class ReorderFixed extends ReorderTransformationSubstitution {
+ /*
+ * How it works:
+ *
+ * Choose the 'best' pattern, propagate the fact that variables are now
+ * bound (ReorderTransformationSubstitution) then chooses the next triple
+ * pattern.
+ *
+ * Instead of just one set of rules, we make an exception is rdf:type. ?x
+ * rdf:type :T or ?x rdf:type ?type are often very much less selective and
+ * we want to give them special, poorer weighings. We do this by controlling
+ * the order of matching: first check to see if it's rdf;type in the
+ * predicate position, then apply the appropriate matcher.
+ *
+ * If we just used a single StatsMatcher with all the rules, the
+ * VAR/TERM/TERM or VAR/TERM/VAR rules match rdf:type with lower weightings.
+ *
+ * The relative order of equal weightings is not changed.
+ *
+ * There are two StatsMatchers: 'matcher' and 'matcherRdfType'
+ * applied for the normal case and the rdf:type case.
+ */
+ public ReorderFixed() {}
+
+ private static Item type = Item.createNode(NodeConst.nodeRDFType) ;
+
/** The number of triples used for the base scale */
- public static int MultiTermSampleSize = 100 ;
+ public static final int MultiTermSampleSize = 100 ;
- /** Maximum value for a match involving two terms. */
- public static int MultiTermMax = 9 ;
+ // Used for general patterns
+ private final static StatsMatcher matcher = new StatsMatcher() ;
+ // Used to override choices made by the matcher above.
+ private final static StatsMatcher matcherRdfType = new StatsMatcher() ;
- public final static StatsMatcher matcher ;
- static {
- matcher = new StatsMatcher() ;
-
- //matcher.addPattern(new Pattern(1, TERM, TERM, TERM)) ; // SPO - built-in - not needed a s a rule
+ static { init() ; }
+
+ private static void init() {
+ // rdf:type can be a bad choice e.g rdf:type rdf:Resource
+ // with inference enabled.
+ // Weight use of rdf:type worse then the general pattern
+ // that would also match by using two matchers.
- // Numbers choosen as an approximation ratios for a graph of 100 triples
- matcher.addPattern(new Pattern(2, TERM, TERM, VAR)) ; // SP?
- matcher.addPattern(new Pattern(5, TERM, type, TERM)) ; // ? type O -- worse than ?PO
- matcher.addPattern(new Pattern(3, VAR, TERM, TERM)) ; // ?PO
- matcher.addPattern(new Pattern(2, TERM, TERM, TERM)) ; // S?O
+ // Numbers chosen as an approximation ratios for a graph of 100 triples
+
+ // 1 : TERM type TERM is builtin (SPO).
+ // matcherRdfType.addPattern(new Pattern(1, TERM, TERM, TERM)) ;
+ matcherRdfType.addPattern(new Pattern(5, VAR, type, TERM)) ;
+ matcherRdfType.addPattern(new Pattern(50, VAR, type, VAR)) ;
- matcher.addPattern(new Pattern(10, TERM, VAR, VAR)) ; // S??
- matcher.addPattern(new Pattern(20, VAR, VAR, TERM)) ; // ??O
- matcher.addPattern(new Pattern(30, VAR, TERM, VAR)) ; // ?P?
+ // SPO - built-in - not needed as a rule
+ // matcher.addPattern(new Pattern(1, TERM, TERM, TERM)) ;
+
+ matcher.addPattern(new Pattern(2, TERM, TERM, VAR)) ; // SP?
+ matcher.addPattern(new Pattern(3, VAR, TERM, TERM)) ; // ?PO
+ matcher.addPattern(new Pattern(2, TERM, TERM, TERM)) ; // S?O
+
+ matcher.addPattern(new Pattern(10, TERM, VAR, VAR)) ; // S??
+ matcher.addPattern(new Pattern(20, VAR, VAR, TERM)) ; // ??O
+ matcher.addPattern(new Pattern(30, VAR, TERM, VAR)) ; // ?P?
- matcher.addPattern(new Pattern(MultiTermSampleSize, VAR, VAR, VAR)) ; // ???
+ matcher.addPattern(new Pattern(MultiTermSampleSize, VAR, VAR, VAR)) ; // ???
}
-
+
@Override
- public double weight(PatternTriple pt)
- {
+ public double weight(PatternTriple pt) {
+ // Special case rdf:type first to make it lower(worse) than
+ // VAR, TERM, TERM which would otherwise be used.
+ if ( type.equals(pt.predicate) ) {
+ double w = matcherRdfType.match(pt) ;
+ if ( w > 0 )
+ return w ;
+ }
return matcher.match(pt) ;
}
}
Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderLib.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderLib.java?rev=1588767&r1=1588766&r2=1588767&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderLib.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderLib.java Sun Apr 20 14:27:13 2014
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-package com.hp.hpl.jena.sparql.engine.optimizer.reorder;
+package com.hp.hpl.jena.sparql.engine.optimizer.reorder ;
import com.hp.hpl.jena.sparql.core.BasicPattern ;
import com.hp.hpl.jena.sparql.engine.optimizer.StatsMatcher ;
@@ -26,13 +26,12 @@ public class ReorderLib
private static class ReorderProcIdentity implements ReorderProc
{
@Override
- public BasicPattern reorder(BasicPattern pattern)
- {
+ public BasicPattern reorder(BasicPattern pattern) {
return pattern ;
- }
+ }
+
@Override
- public String toString()
- {
+ public String toString() {
return "identity reorder" ;
}
}
@@ -41,35 +40,38 @@ public class ReorderLib
private static class ReorderTransformationIdentity implements ReorderTransformation
{
@Override
- public BasicPattern reorder(BasicPattern pattern)
- {
+ public BasicPattern reorder(BasicPattern pattern) {
return pattern ;
}
@Override
- public ReorderProc reorderIndexes(BasicPattern pattern)
- {
+ public ReorderProc reorderIndexes(BasicPattern pattern) {
return _identityProc ;
}
}
private static ReorderTransformation _identity = new ReorderTransformationIdentity() ;
- /** Return a ReorderProc that does no reordering (leaving the query writer in-control)*/
- public static ReorderProc identityProc()
- { return _identityProc ; }
-
- /** Return a ReorderTransformation that maps directly to the original (leaving the query writer in-control) */
- public static ReorderTransformation identity()
- { return _identity ; }
+ /**
+ * Return a ReorderProc that does no reordering (leaving the query writer
+ * in-control)
+ */
+ public static ReorderProc identityProc() {
+ return _identityProc ;
+ }
- public static ReorderTransformation fixed()
- {
+ /**
+ * Return a ReorderTransformation that maps directly to the original
+ * (leaving the query writer in-control)
+ */
+ public static ReorderTransformation identity() {
+ return _identity ;
+ }
+
+ public static ReorderTransformation fixed() {
return new ReorderFixed() ;
}
-
-
- public static ReorderTransformation weighted(String filename)
- {
+
+ public static ReorderTransformation weighted(String filename) {
StatsMatcher stats = new StatsMatcher(filename) ;
return new ReorderWeighted(stats) ;
}
Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TS_Optimization.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TS_Optimization.java?rev=1588767&r1=1588766&r2=1588767&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TS_Optimization.java (original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TS_Optimization.java Sun Apr 20 14:27:13 2014
@@ -25,8 +25,9 @@ import org.junit.runners.Suite ;
@RunWith(Suite.class)
@Suite.SuiteClasses( {
- TestOptDistinctReduced.class
- , TestOptimizer.class
+ TestReorderBGP.class
+ , TestVarRename.class
+ , TestOptDistinctReduced.class
, TestSemanticEquivalence.class
, TestTransformConstantFolding.class
, TestTransformFilters.class
@@ -34,7 +35,7 @@ import org.junit.runners.Suite ;
, TestTransformMergeBGPs.class
, TestTransformPromoteTableEmpty.class
, TestTransformTopN.class
- , TestVarRename.class
+ , TestOptimizer.class
})
public class TS_Optimization extends TestSuite