You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2014/04/20 16:27:14 UTC

svn commit: r1588767 - in /jena/trunk/jena-arq/src: main/java/com/hp/hpl/jena/sparql/engine/main/ main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ test/java/com/hp/hpl/jena/sparql/algebra/optimize/

Author: andy
Date: Sun Apr 20 14:27:13 2014
New Revision: 1588767

URL: http://svn.apache.org/r1588767
Log:
JENA-685: Update the fixed weight BGP reorder code

Modified:
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/StageGeneratorGeneric.java
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderFixed.java
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderLib.java
    jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TS_Optimization.java

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/StageGeneratorGeneric.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/StageGeneratorGeneric.java?rev=1588767&r1=1588766&r2=1588767&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/StageGeneratorGeneric.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/main/StageGeneratorGeneric.java Sun Apr 20 14:27:13 2014
@@ -18,180 +18,47 @@
 
 package com.hp.hpl.jena.sparql.engine.main;
 
-import static com.hp.hpl.jena.sparql.engine.optimizer.reorder.PatternElements.TERM ;
 import org.apache.jena.atlas.logging.Log ;
 
 import com.hp.hpl.jena.graph.Graph ;
-import com.hp.hpl.jena.graph.GraphStatisticsHandler ;
-import com.hp.hpl.jena.graph.Node ;
-import com.hp.hpl.jena.mem.GraphMem ;
 import com.hp.hpl.jena.sparql.core.BasicPattern ;
 import com.hp.hpl.jena.sparql.engine.ExecutionContext ;
 import com.hp.hpl.jena.sparql.engine.QueryIterator ;
 import com.hp.hpl.jena.sparql.engine.iterator.QueryIterBlockTriples ;
-import com.hp.hpl.jena.sparql.engine.optimizer.reorder.* ;
+import com.hp.hpl.jena.sparql.engine.optimizer.reorder.ReorderFixed ;
+import com.hp.hpl.jena.sparql.engine.optimizer.reorder.ReorderLib ;
+import com.hp.hpl.jena.sparql.engine.optimizer.reorder.ReorderTransformation ;
 import com.hp.hpl.jena.sparql.mgt.Explain ;
 import com.hp.hpl.jena.sparql.util.Utils ;
 
 /** Generic - always works - StageGenerator */
-public class StageGeneratorGeneric implements StageGenerator
-{
+public class StageGeneratorGeneric implements StageGenerator {
     public StageGeneratorGeneric() {}
+    private static final ReorderTransformation reorderFixed = ReorderLib.fixed() ;
     
     @Override
-    public QueryIterator execute(BasicPattern pattern, 
-                                 QueryIterator input,
-                                 ExecutionContext execCxt)
-    {
+    public QueryIterator execute(BasicPattern pattern, QueryIterator input, ExecutionContext execCxt) {
         if ( input == null )
-            Log.fatal(this, "Null input to "+Utils.classShortName(this.getClass())) ;
+            Log.fatal(this, "Null input to " + Utils.classShortName(this.getClass())) ;
 
-        Graph graph = execCxt.getActiveGraph() ; 
+        Graph graph = execCxt.getActiveGraph() ;
 
         // Choose reorder transformation and execution strategy.
-        
-        final ReorderTransformation reorder ;
-        final StageGenerator executor ;
-        
-        if ( graph instanceof GraphMem  )            // Jena in-memory graph
-        {
-            reorder = reorderBasicStats(graph) ;
-            executor = StageBuilder.executeInline ; ; 
-        }
-        else
-        {
-            // When in doubt ... use the general pass-through to graph query handler matcher.
-            // Includes union graphs, InfGraphs and other composite or unusual kinds.
-            reorder = null ;
-            executor = StageBuilder.executeInline ;
-        }
+
+        ReorderTransformation reorder = reorderFixed ;
+        StageGenerator executor = StageBuilder.executeInline ;
 
         return execute(pattern, reorder, executor, input, execCxt) ;
     }
 
-    protected QueryIterator execute(BasicPattern pattern,
-                                    ReorderTransformation reorder,
-                                    StageGenerator execution, 
-                                    QueryIterator input,
-                                    ExecutionContext execCxt)
+    protected QueryIterator execute(BasicPattern pattern, ReorderTransformation reorder, StageGenerator execution,
+                                    QueryIterator input, ExecutionContext execCxt)
     {
-        
         Explain.explain(pattern, execCxt.getContext()) ;
-        
-        if ( reorder != null )
-        {
+        if ( reorder != null ) {
             pattern = reorder.reorder(pattern) ;
             Explain.explain("Reorder", pattern, execCxt.getContext()) ;
         }
-
-        return execution.execute(pattern, input, execCxt) ; 
-    }
-    
-    // ---- Reorder policies
-        
-    // Fixed - Variable counting only. 
-    private static ReorderTransformation reorderFixed() { return ReorderLib.fixed() ; } 
-
-    // Uses Jena's statistics handler.
-    private static ReorderTransformation reorderBasicStats(Graph graph)
-    {
-        GraphStatisticsHandler stats = graph.getStatisticsHandler() ;
-        if ( stats == null )
-            return reorderFixed() ;
-        return new ReorderStatsHandler(graph, graph.getStatisticsHandler()) ;
-    }
-
-    /** Use the inline BGP matcher */ 
-    public static QueryIterator executeInline(BasicPattern pattern, QueryIterator input, ExecutionContext execCxt)
-    {
         return QueryIterBlockTriples.create(input, pattern, execCxt) ;
     }
-
-    /** Reorder a basic graph pattern using a graph statistic handler */
-    private static class ReorderStatsHandler extends ReorderTransformationSubstitution
-    {
-        static ReorderFixed fixed = (ReorderFixed)reorderFixed() ;        // We need our own copy to call into.
-        
-        // Guesses at the selectivity of fixed, but unknown, values.
-        // Choose these for large graphs because bad guesses don't harm small graphs.  
-        
-        final long TERM_S ;         // Used for S ? ? if no stats
-        final long TERM_TYPE ;
-        final long TERM_P ;         // Used for ? P ? if no stats
-        final long TERM_O ;         // Used for ? ? O if no stats
-        final long N ;
-        
-        private GraphStatisticsHandler stats ;
-
-        ReorderStatsHandler(Graph graph, GraphStatisticsHandler stats)
-        {
-            this.stats = stats ;
-            N = graph.size() ;
-            // Note: when these are too badly wrong, the app can supply a statistics file. 
-            TERM_S = 10 ;       // Wild guess: "An average subject has 10 properties".
-            TERM_P = N/10 ;     // Wild guess: "An average vocabulary has 10 properties"
-            TERM_O = 20 ;       // Wild guess: "An average object is in 20 triples".
-            TERM_TYPE = N/10 ;  // Wild guess: "An average class has 1/10 of the resources."
-        }
-        
-        @Override
-        protected double weight(PatternTriple pt)
-        {
-            double x = fixed.weight(pt) ;
-            // If there are two fixed terms, use the fixed weighting, all of which are quite small.
-            // This chooses a less optimal triple but the worse choice is still a very selective choice.
-            // One case is IFPs: the multi term choice for PO is not 1. 
-            if ( x < ReorderFixed.MultiTermMax )
-            {
-                // Rescale it from the fixed numbers of  ReorderFixed
-                //x = ReorderFixed.MultiTermSampleSize * x / N ;
-            }
-            else
-                x = weight1(pt) ;
-            
-            //System.out.printf("** %s: --> %s\n", pt, x) ;
-            return x ;
-        }
-        
-        
-        private double weight1(PatternTriple pt)
-        {
-            // One or zero fixed terms.
-            
-            long S = -1 ;
-            long P = -1 ;
-            long O = -1 ;
-            
-            // Include guesses for SP, OP, typeClass
-            
-            if ( pt.subject.isNode() )
-                S = stats.getStatistic(pt.subject.getNode(), Node.ANY, Node.ANY) ;
-            else if ( TERM.equals(pt.subject) )
-                S = TERM_S ;
-            
-            // rdf:type.
-            if ( pt.predicate.isNode() )
-                P = stats.getStatistic(Node.ANY, pt.predicate.getNode(), Node.ANY) ;
-            else if ( TERM.equals(pt.predicate) )
-                P = TERM_P ;
-            
-            if ( pt.object.isNode() )
-                O = stats.getStatistic(Node.ANY, Node.ANY, pt.object.getNode()) ;
-            else if ( TERM.equals(pt.object) )
-                O = TERM_O ;
-
-            if ( S == 0 || P == 0 || O == 0 )
-                // Can't match.
-                return 0 ;
-            
-            // Find min positive
-            double x = -1 ;
-            if ( S > 0 ) x = S ;
-            if ( P > 0 && P < x ) x = P ;
-            if ( O > 0 && O < x ) x = O ;
-            //System.out.printf("** [%d, %d, %d]\n", S, P ,O) ;
-
-            return x ;
-        }
-    }
 }

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderFixed.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderFixed.java?rev=1588767&r1=1588766&r2=1588767&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderFixed.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderFixed.java Sun Apr 20 14:27:13 2014
@@ -18,50 +18,92 @@
 
 package com.hp.hpl.jena.sparql.engine.optimizer.reorder;
 
-import static com.hp.hpl.jena.sparql.engine.optimizer.reorder.PatternElements.TERM ;
-import static com.hp.hpl.jena.sparql.engine.optimizer.reorder.PatternElements.VAR ;
-
 import com.hp.hpl.jena.sparql.engine.optimizer.Pattern ;
 import com.hp.hpl.jena.sparql.engine.optimizer.StatsMatcher ;
+import com.hp.hpl.jena.sparql.engine.optimizer.reorder.PatternTriple ;
+import com.hp.hpl.jena.sparql.engine.optimizer.reorder.ReorderTransformationSubstitution ;
 import com.hp.hpl.jena.sparql.graph.NodeConst ;
 import com.hp.hpl.jena.sparql.sse.Item ;
+import static com.hp.hpl.jena.sparql.engine.optimizer.reorder.PatternElements.* ;
 
-public class ReorderFixed extends ReorderTransformationSubstitution
-{
-    ReorderFixed() {}
-    // Fixed scheme for when we have no stats.
-    // It chooses a triple pattern by order of preference.
-    
-    private static Item type = Item.createNode(NodeConst.nodeRDFType) ;
+/** Fixed scheme for choosing based on the triple patterns, without
+ *  looking at the data.  It gives a weight to a triple, with more grounded terms
+ *  being considered better.  It weights against rdf:type because that can be 
+ *  very unselective (e.g. ?x rdf:type rdf:Resource)
+ */
+public class ReorderFixed extends ReorderTransformationSubstitution {
+    /*
+     * How it works:
+     * 
+     * Choose the 'best' pattern, propagate the fact that variables are now
+     * bound (ReorderTransformationSubstitution) then chooses the next triple
+     * pattern.
+     * 
+     * Instead of just one set of rules, we make an exception is rdf:type. ?x
+     * rdf:type :T or ?x rdf:type ?type are often very much less selective and
+     * we want to give them special, poorer weighings. We do this by controlling
+     * the order of matching: first check to see if it's rdf;type in the
+     * predicate position, then apply the appropriate matcher.
+     * 
+     * If we just used a single StatsMatcher with all the rules, the
+     * VAR/TERM/TERM or VAR/TERM/VAR rules match rdf:type with lower weightings.
+     * 
+     * The relative order of equal weightings is not changed.
+     * 
+     * There are two StatsMatchers: 'matcher' and 'matcherRdfType'
+     * applied for the normal case and the rdf:type case.
+     */
     
+    public ReorderFixed() {}
+
+    private static Item              type                = Item.createNode(NodeConst.nodeRDFType) ;
+
     /** The number of triples used for the base scale */
-    public static int MultiTermSampleSize = 100 ; 
+    public static final int                MultiTermSampleSize = 100 ;
 
-    /** Maximum value for a match involving two terms. */
-    public static int MultiTermMax = 9 ; 
+    // Used for general patterns
+    private final static StatsMatcher matcher             = new StatsMatcher() ;
+    // Used to override choices made by the matcher above.
+    private final static StatsMatcher matcherRdfType      = new StatsMatcher() ;
     
-    public final static StatsMatcher matcher ;
-    static {
-        matcher = new StatsMatcher() ;
-        
-        //matcher.addPattern(new Pattern(1,   TERM, TERM, TERM)) ;     // SPO - built-in - not needed a s a rule
+    static { init() ; }
+    
+    private static void init() {
+        // rdf:type can be a bad choice e.g rdf:type rdf:Resource
+        // with inference enabled.
+        // Weight use of rdf:type worse then the general pattern
+        // that would also match by using two matchers. 
         
-        // Numbers choosen as an approximation ratios for a graph of 100 triples
-        matcher.addPattern(new Pattern(2,   TERM, TERM, VAR)) ;     // SP?
-        matcher.addPattern(new Pattern(5,   TERM, type, TERM)) ;    // ? type O -- worse than ?PO
-        matcher.addPattern(new Pattern(3,   VAR,  TERM, TERM)) ;    // ?PO
-        matcher.addPattern(new Pattern(2,   TERM, TERM, TERM)) ;    // S?O
+        // Numbers chosen as an approximation ratios for a graph of 100 triples
+
+        // 1 : TERM type TERM is builtin (SPO).
+        // matcherRdfType.addPattern(new Pattern(1, TERM, TERM, TERM)) ; 
+        matcherRdfType.addPattern(new Pattern(5, VAR, type, TERM)) ;
+        matcherRdfType.addPattern(new Pattern(50, VAR, type, VAR)) ;
         
-        matcher.addPattern(new Pattern(10,  TERM, VAR,  VAR)) ;     // S??
-        matcher.addPattern(new Pattern(20,  VAR,  VAR,  TERM)) ;    // ??O
-        matcher.addPattern(new Pattern(30,  VAR,  TERM, VAR)) ;     // ?P?
+        // SPO - built-in - not needed as a rule
+        // matcher.addPattern(new Pattern(1, TERM, TERM, TERM)) ; 
+
+        matcher.addPattern(new Pattern(2, TERM, TERM, VAR)) ;                   // SP?
+        matcher.addPattern(new Pattern(3, VAR, TERM, TERM)) ;                   // ?PO
+        matcher.addPattern(new Pattern(2, TERM, TERM, TERM)) ;                  // S?O
+
+        matcher.addPattern(new Pattern(10, TERM, VAR, VAR)) ;                   // S??
+        matcher.addPattern(new Pattern(20, VAR, VAR, TERM)) ;                   // ??O
+        matcher.addPattern(new Pattern(30, VAR, TERM, VAR)) ;                   // ?P?
 
-        matcher.addPattern(new Pattern(MultiTermSampleSize, VAR,  VAR,  VAR)) ;     // ???
+        matcher.addPattern(new Pattern(MultiTermSampleSize, VAR, VAR, VAR)) ;   // ???
     }
-    
+
     @Override
-    public double weight(PatternTriple pt)
-    {
+    public double weight(PatternTriple pt) {
+        // Special case rdf:type first to make it lower(worse) than 
+        // VAR, TERM, TERM which would otherwise be used.
+        if ( type.equals(pt.predicate) ) {
+            double w = matcherRdfType.match(pt) ;
+            if ( w > 0 )
+                return w ;
+        }
         return matcher.match(pt) ;
     }
 }

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderLib.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderLib.java?rev=1588767&r1=1588766&r2=1588767&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderLib.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/engine/optimizer/reorder/ReorderLib.java Sun Apr 20 14:27:13 2014
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-package com.hp.hpl.jena.sparql.engine.optimizer.reorder;
+package com.hp.hpl.jena.sparql.engine.optimizer.reorder ;
 
 import com.hp.hpl.jena.sparql.core.BasicPattern ;
 import com.hp.hpl.jena.sparql.engine.optimizer.StatsMatcher ;
@@ -26,13 +26,12 @@ public class ReorderLib
     private static class ReorderProcIdentity implements ReorderProc
     {
         @Override
-        public BasicPattern reorder(BasicPattern pattern)
-        {
+        public BasicPattern reorder(BasicPattern pattern) {
             return pattern ;
-        } 
+        }
+
         @Override
-        public String toString()
-        {
+        public String toString() {
             return "identity reorder" ;
         }
     }
@@ -41,35 +40,38 @@ public class ReorderLib
     private static class ReorderTransformationIdentity implements ReorderTransformation
     {
         @Override
-        public BasicPattern reorder(BasicPattern pattern)
-        {
+        public BasicPattern reorder(BasicPattern pattern) {
             return pattern ;
         }
 
         @Override
-        public ReorderProc reorderIndexes(BasicPattern pattern)
-        {
+        public ReorderProc reorderIndexes(BasicPattern pattern) {
             return _identityProc ;
         }
     }
     private static ReorderTransformation _identity = new ReorderTransformationIdentity() ;
 
-    /** Return a  ReorderProc that does no reordering  (leaving the query writer in-control)*/
-    public static ReorderProc identityProc()
-    { return _identityProc ; }
-
-    /** Return a ReorderTransformation that maps directly to the original (leaving the query writer in-control) */
-    public static ReorderTransformation identity()
-    { return _identity ; }
+    /**
+     * Return a ReorderProc that does no reordering (leaving the query writer
+     * in-control)
+     */
+    public static ReorderProc identityProc() {
+        return _identityProc ;
+    }
 
-    public static ReorderTransformation fixed()
-    {
+    /**
+     * Return a ReorderTransformation that maps directly to the original
+     * (leaving the query writer in-control)
+     */
+    public static ReorderTransformation identity() {
+        return _identity ;
+    }
+
+    public static ReorderTransformation fixed() {
         return new ReorderFixed() ;
     }
-    
-    
-    public static ReorderTransformation weighted(String filename)
-    {
+
+    public static ReorderTransformation weighted(String filename) {
         StatsMatcher stats = new StatsMatcher(filename) ;
         return new ReorderWeighted(stats) ;
     }

Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TS_Optimization.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TS_Optimization.java?rev=1588767&r1=1588766&r2=1588767&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TS_Optimization.java (original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/algebra/optimize/TS_Optimization.java Sun Apr 20 14:27:13 2014
@@ -25,8 +25,9 @@ import org.junit.runners.Suite ;
 
 @RunWith(Suite.class)
 @Suite.SuiteClasses( {
-    TestOptDistinctReduced.class
-    , TestOptimizer.class
+    TestReorderBGP.class
+    , TestVarRename.class
+    , TestOptDistinctReduced.class
     , TestSemanticEquivalence.class
     , TestTransformConstantFolding.class
     , TestTransformFilters.class
@@ -34,7 +35,7 @@ import org.junit.runners.Suite ;
     , TestTransformMergeBGPs.class
     , TestTransformPromoteTableEmpty.class
     , TestTransformTopN.class
-    , TestVarRename.class
+    , TestOptimizer.class
 })
 
 public class TS_Optimization extends TestSuite