You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2011/09/11 21:40:09 UTC

svn commit: r1169510 - in /incubator/jena/Jena2/ARQ/trunk: src-dev/dev/ src-test/com/hp/hpl/jena/sparql/algebra/optimize/ src/com/hp/hpl/jena/sparql/algebra/optimize/ src/com/hp/hpl/jena/sparql/engine/iterator/ testing/ARQ/Optimization/

Author: andy
Date: Sun Sep 11 19:40:08 2011
New Revision: 1169510

URL: http://svn.apache.org/viewvc?rev=1169510&view=rev
Log:
Add optimization of slice-project-order

Added:
    incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-13.rq
    incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-13.srj
Modified:
    incubator/jena/Jena2/ARQ/trunk/src-dev/dev/RunARQ.java
    incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
    incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java
    incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterProject2.java
    incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java
    incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/manifest.ttl

Modified: incubator/jena/Jena2/ARQ/trunk/src-dev/dev/RunARQ.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src-dev/dev/RunARQ.java?rev=1169510&r1=1169509&r2=1169510&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src-dev/dev/RunARQ.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src-dev/dev/RunARQ.java Sun Sep 11 19:40:08 2011
@@ -123,6 +123,10 @@ public class RunARQ
     @SuppressWarnings("deprecation")
     public static void main(String[] argv) throws Exception
     {
+        arq.sparql.main("--data=D.ttl", "--query=Q1.rq") ;
+        exit(0) ;
+        
+        
         ARQ.init();
         
         Dataset ds = (Dataset)AssemblerUtils.build("D.ttl", DatasetAssemblerVocab.tDataset) ;

Modified: incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java?rev=1169510&r1=1169509&r2=1169510&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java Sun Sep 11 19:40:08 2011
@@ -137,7 +137,19 @@ public class TestOptimizer extends BaseT
             "  (bgp (triple ?s ?p ?o)))" ; 
         check(queryString, opExpectedString) ;
     }
-    
+
+//    @Test public void slice_order_to_topn_01a()
+//    {
+//        assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
+//        String queryString = "SELECT ?s { ?s ?p ?o } ORDER BY ?p ?o LIMIT 42"  ;  
+//        String opExpectedString = 
+//            "(slice _ 42\n" +
+//            "  (project (?s)\n" + 
+//            "    (top (42 ?p ?o)\n" + 
+//            "      (bgp (triple ?s ?p ?o)))))" ; 
+//        check(queryString, opExpectedString) ;
+//    }
+
     @Test public void slice_order_to_topn_02()
     {
         assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
@@ -245,6 +257,18 @@ public class TestOptimizer extends BaseT
         check(queryString, opExpectedString) ;
     }
 
+    @Test public void slice_order_to_topn_11()
+    {
+        assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
+        String queryString = "SELECT ?s { ?s ?p ?o } ORDER BY ?p ?o OFFSET 1 LIMIT 5"  ;  
+        String opExpectedString = 
+            "(slice 1 _\n" +
+            "  (project (?s)\n" + 
+            "    (top (6 ?p ?o)\n" +
+            "      (bgp (triple ?s ?p ?o)))))" ; 
+        check(queryString, opExpectedString) ;
+    }
+    
     @Test public void distinct_to_reduced_01()
     {
         assertTrue(ARQ.isTrueOrUndef(ARQ.optDistinctToReduced)) ;

Modified: incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java?rev=1169510&r1=1169509&r2=1169510&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java Sun Sep 11 19:40:08 2011
@@ -22,10 +22,11 @@ import com.hp.hpl.jena.query.ARQ ;
 import com.hp.hpl.jena.query.Query ;
 import com.hp.hpl.jena.sparql.ARQConstants ;
 import com.hp.hpl.jena.sparql.algebra.Op ;
-import com.hp.hpl.jena.sparql.algebra.op.Op1 ;
 import com.hp.hpl.jena.sparql.algebra.TransformCopy ;
+import com.hp.hpl.jena.sparql.algebra.op.Op1 ;
 import com.hp.hpl.jena.sparql.algebra.op.OpDistinct ;
 import com.hp.hpl.jena.sparql.algebra.op.OpOrder ;
+import com.hp.hpl.jena.sparql.algebra.op.OpProject ;
 import com.hp.hpl.jena.sparql.algebra.op.OpReduced ;
 import com.hp.hpl.jena.sparql.algebra.op.OpSlice ;
 import com.hp.hpl.jena.sparql.algebra.op.OpTopN ;
@@ -38,62 +39,121 @@ public class TransformTopN extends Trans
 
     @Override
 	public Op transform(OpSlice opSlice, Op subOp) { 
-
-        /* This looks for two cases:
-         * (slice X N
+        /* 
+         * This looks for all the follwoing cases of slice with optionally 
+         * distinct and/or project follow by order
+         * 
+         *  + slice order
+         *  + slice distinct|reduced order
+         *  + slice project order
+         *  but not:
+         *  + slice distinct project order
+         *
+         * In detail:
+         * 
+         * Case 1:
+         *  (slice X N
          *   (order (cond) PATTERN) )
          * ==> 
          * (slice X _
          *   (top (X+N cond) PATTERN) )
          * 
-         * and
-         * 
+         * Case 2:
          * (slice X N
          *   (distinct or reduced
          *     (order (cond) PATTERN) ))
          * ==>  
          * (slice X _
          *   (top (X+N cond) (distinct PATTERN))
+         *   
+         * Case 3: 
+         * (slice X N
+         *   (project (vars)
+         *     (order (cond) 
+         *         PATTERN ))))
+         * ==>
+         * (slice X _
+         *   (project (vars) 
+         *     (top (X+N cond) (distinct PATTERN)))
+         * The project can also be over the slice.    
          *
-         * and evaluation of (top) looks for (top N (distinct PATTERN))
-         * See OpExecutor.execute(OpTopN)
+         * The case of (slice (distinct (project (vars) (order ...))))
+         * does not work because distinct-project menas we do not know how
+         * but to make topN buffer.
          * 
+         * When there is no project, we can push the distinct under the topN,
+         * but because the sort variables may include one projected away, it's not possible
+         * to do this with project.  The key is that distinct-project can change the number
+         * of rows in ways that mean we can not predict the topN slice.
+         *
+         * A partial optimization is to see if "cond" only uses projected variables could be considered.
+         * We could add project understanding to topN.
+         *     
          * Note that in TransformDistinctToReduced (distinct (order X)) is turned into (reduced (order X))
          * and that this optimization should be before that one but this is order independent
-         * as we process reducded or distinct in the same way. 
+         * as we process reduced or distinct in the same way. 
          */
         
+        if ( opSlice.getLength() == Query.NOLIMIT )
+            return super.transform(opSlice, subOp) ;
+        
         int threshold = (Integer)ARQ.getContext().get(externalSortBufferSize, defaultTopNSortingThreshold) ;
         long offset = ( opSlice.getStart() != Query.NOLIMIT ) ? opSlice.getStart() : 0L ;
-    	if ( offset + opSlice.getLength() < threshold )
-    	{
-        	if ( subOp instanceof OpOrder ) 
-        	{
-        	    // First case.
-        	    OpOrder opOrder = (OpOrder)subOp ;
+
+        if ( offset + opSlice.getLength() >= threshold )
+            return super.transform(opSlice, subOp) ;
+            
+        if ( subOp instanceof OpOrder ) 
+        {
+            // First case: slice-order
+            OpOrder opOrder = (OpOrder)subOp ;
+            OpTopN opTopN = new OpTopN( opOrder.getSubOp(), (int)(offset+opSlice.getLength()), opOrder.getConditions() ) ;
+            if ( offset == 0 ) {
+                return opTopN ;
+            } else {
+                return new OpSlice( opTopN, offset, Query.NOLIMIT ) ;        	        
+            }
+        }
+
+        if ( subOp instanceof OpDistinct || subOp instanceof OpReduced )
+        {
+            // Second case: slice-distinct-order or slice-reduced-order 
+            Op subSubOp = ((Op1)subOp).getSubOp() ;
+            if ( subSubOp instanceof OpOrder ) {
+                OpOrder opOrder = (OpOrder)subSubOp ;
+                Op opDistinct2 = OpDistinct.create(opOrder.getSubOp()) ;
+                OpTopN opTopN = new OpTopN( opDistinct2, (int)(offset+opSlice.getLength()), opOrder.getConditions() ) ; 
+                if ( offset == 0 ) {
+                    return opTopN ;
+                } else {
+                    return new OpSlice( opTopN, offset, Query.NOLIMIT ) ;         	            
+                }
+            }
+        }
+
+        if ( subOp instanceof OpProject )
+        {
+            // Third case: slice-project-order 
+            Op subSubOp = ((Op1)subOp).getSubOp() ;
+            if ( subSubOp instanceof OpOrder ) 
+            {
+                OpProject opProject = (OpProject)subOp ;
+                OpOrder opOrder = (OpOrder)subSubOp ;
+
+                Op underOp = new OpProject(opOrder.getSubOp(), opProject.getVars()) ;
+
+                // NB leave project over topN, unlike the distinct case where distinct goes under topN.
                 OpTopN opTopN = new OpTopN( opOrder.getSubOp(), (int)(offset+opSlice.getLength()), opOrder.getConditions() ) ;
-        	    if ( offset == 0 ) {
-        	        return opTopN ;
-        	    } else {
-                    return new OpSlice( opTopN, offset, Query.NOLIMIT ) ;        	        
-        	    }
-        	}
-            	
-        	if ( subOp instanceof OpDistinct || subOp instanceof OpReduced )
-        	{
-        	    Op subSubOp = ((Op1)subOp).getSubOp() ;
-        	    if ( subSubOp instanceof OpOrder ) {
-        	        OpOrder opOrder = (OpOrder)subSubOp ;
-        	        Op opDistinct2 = OpDistinct.create(opOrder.getSubOp()) ;
-        	        OpTopN opTopN = new OpTopN( opDistinct2, (int)(offset+opSlice.getLength()), opOrder.getConditions() ) ; 
-        	        if ( offset == 0 ) {
-        	            return opTopN ;
-        	        } else {
-                        return new OpSlice( opTopN, offset, Query.NOLIMIT ) ;         	            
-        	        }
-        	    }
-        	}
-    	}
+                Op proj = new OpProject(opTopN, opProject.getVars()) ;
+
+                if ( offset == 0 ) {
+                    // Causes open iterator warnings : why? See JENA-114
+                    //return proj ;
+                } else {
+                    return new OpSlice( proj, offset, Query.NOLIMIT ) ;                       
+                }
+            }
+        }
 
     	// Pass through.
     	return super.transform(opSlice, subOp) ; 

Modified: incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterProject2.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterProject2.java?rev=1169510&r1=1169509&r2=1169510&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterProject2.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterProject2.java Sun Sep 11 19:40:08 2011
@@ -19,7 +19,6 @@ import com.hp.hpl.jena.sparql.engine.mai
 
 public class QueryIterProject2 extends QueryIterRepeatApply
 {
-    // Merge with QueryIterProject2
     List<Var> projectionVars ;
     private final OpProject opProject ;
     private final OpExecutor engine ;

Modified: incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java?rev=1169510&r1=1169509&r2=1169510&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/engine/iterator/QueryIterTopN.java Sun Sep 11 19:40:08 2011
@@ -109,7 +109,7 @@ public class QueryIterTopN extends Query
                     	}
                     }
                 }
-                
+                qIter.close() ;
                 Binding[] y = heap.toArray(new Binding[]{}) ;
                 heap = null ;
                 Arrays.sort(y, comparator) ;

Modified: incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/manifest.ttl
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/manifest.ttl?rev=1169510&r1=1169509&r2=1169510&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/manifest.ttl (original)
+++ incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/manifest.ttl Sun Sep 11 19:40:08 2011
@@ -247,6 +247,14 @@
         mf:result  <opt-top-12.srj>
       ]
 
+      [  mf:name    "opt-top-13" ;
+         rdf:type   mfx:TestQuery ; 
+         mf:action
+            [ qt:query  <opt-top-13.rq> ;
+              qt:data   <data-2.ttl> ] ;
+        mf:result  <opt-top-13.srj>
+      ]
+
       [  mf:name    "opt-distinct-to-reduced-01" ;
          rdf:type   mfx:TestQuery ; 
          mf:action

Added: incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-13.rq
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-13.rq?rev=1169510&view=auto
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-13.rq (added)
+++ incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-13.rq Sun Sep 11 19:40:08 2011
@@ -0,0 +1,6 @@
+PREFIX : <http://example/>
+
+SELECT ?x 
+{ ?x :p ?v } 
+ORDER BY ASC(?v)
+LIMIT 5

Added: incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-13.srj
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-13.srj?rev=1169510&view=auto
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-13.srj (added)
+++ incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-13.srj Sun Sep 11 19:40:08 2011
@@ -0,0 +1,24 @@
+{
+  "head": {
+    "vars": [ "x" , "v" ]
+  } ,
+  "results": {
+    "bindings": [
+      {
+        "x": { "type": "uri" , "value": "http://example/x1" }
+      } ,
+      {
+        "x": { "type": "uri" , "value": "http://example/x2" }
+      } ,
+      {
+        "x": { "type": "uri" , "value": "http://example/x3" }
+      } ,
+      {
+        "x": { "type": "uri" , "value": "http://example/x3a" }
+      } ,
+      {
+        "x": { "type": "uri" , "value": "http://example/x4" }
+      }
+    ]
+  }
+}