You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by ca...@apache.org on 2011/09/05 00:10:29 UTC

svn commit: r1165123 - in /incubator/jena/Jena2/ARQ/trunk: src-test/com/hp/hpl/jena/sparql/algebra/optimize/ src/com/hp/hpl/jena/sparql/algebra/optimize/ testing/ARQ/Optimization/

Author: castagna
Date: Sun Sep  4 22:10:29 2011
New Revision: 1165123

URL: http://svn.apache.org/viewvc?rev=1165123&view=rev
Log:
JENA-109

Now the TransformTopN we added with JENA-89 works with OFFSET as well.
We optimise to ( slice X _ ( top ( X+N ... ) ... ) ).
The threshold is 1000 by default but it can be changed via an ARQ symbol (i.e. topNSortingThreshold).

Added:
    incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-11.srj
    incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-12.rq
    incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-12.srj
Modified:
    incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
    incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java
    incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/manifest.ttl
    incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-11.rq

Modified: incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java?rev=1165123&r1=1165122&r2=1165123&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src-test/com/hp/hpl/jena/sparql/algebra/optimize/TestOptimizer.java Sun Sep  4 22:10:29 2011
@@ -151,6 +151,17 @@ public class TestOptimizer extends BaseT
 
     @Test public void slice_order_to_topn_03()
     {
+        assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
+        String queryString = "SELECT * { ?s ?p ?o } ORDER BY ?p ?o OFFSET 4242 LIMIT 10"  ;  
+        String opExpectedString = 
+            "(slice 4242 10\n" + 
+            "  (order (?p ?o)\n" +
+            "    (bgp (triple ?s ?p ?o))))" ; 
+        check(queryString, opExpectedString) ;
+    }
+
+    @Test public void slice_order_to_topn_04()
+    {
         try {
             ARQ.setFalse(ARQ.optTopNSorting) ;
             assertTrue(ARQ.isFalse(ARQ.optTopNSorting)) ;
@@ -165,7 +176,7 @@ public class TestOptimizer extends BaseT
         }
     }
 
-    @Test public void slice_order_to_topn_04()
+    @Test public void slice_order_to_topn_05()
     {
         assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
         String queryString = "SELECT DISTINCT * { ?s ?p ?o } ORDER BY ?p ?o LIMIT 42"  ;  
@@ -176,7 +187,19 @@ public class TestOptimizer extends BaseT
         check(queryString, opExpectedString) ;
     }
 
-    @Test public void slice_order_to_topn_05()
+    @Test public void slice_order_to_topn_06()
+    {
+        assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
+        String queryString = "SELECT DISTINCT * { ?s ?p ?o } ORDER BY ?p ?o OFFSET 24 LIMIT 42"  ;  
+        String opExpectedString = 
+            "(slice 24 _\n" + 
+            "  (top (66 ?p ?o)\n" + 
+            "    (distinct\n" +
+            "       (bgp (triple ?s ?p ?o)))))" ; 
+        check(queryString, opExpectedString) ;
+    }
+
+    @Test public void slice_order_to_topn_07()
     {
         assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
         String queryString = "SELECT REDUCED * { ?s ?p ?o } ORDER BY ?p ?o LIMIT 42"  ;  
@@ -187,7 +210,7 @@ public class TestOptimizer extends BaseT
         check(queryString, opExpectedString) ;
     }
 
-    @Test public void slice_order_to_topn_06()
+    @Test public void slice_order_to_topn_08()
     {
         assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
         String queryString = "SELECT DISTINCT * { ?s ?p ?o } ORDER BY ?p ?o LIMIT 4242"  ;  
@@ -199,7 +222,7 @@ public class TestOptimizer extends BaseT
         check(queryString, opExpectedString) ;
     }
 
-    @Test public void slice_order_to_topn_07()
+    @Test public void slice_order_to_topn_09()
     {
         assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
         String queryString = "SELECT REDUCED * { ?s ?p ?o } ORDER BY ?p ?o LIMIT 4242"  ;  
@@ -211,13 +234,13 @@ public class TestOptimizer extends BaseT
         check(queryString, opExpectedString) ;
     }
     
-    @Test public void slice_order_to_topn_08()
+    @Test public void slice_order_to_topn_10()
     {
         assertTrue(ARQ.isTrueOrUndef(ARQ.optTopNSorting)) ;
         String queryString = "SELECT * { ?s ?p ?o } ORDER BY ?p ?o OFFSET 1 LIMIT 5"  ;  
         String opExpectedString = 
-            "(slice 1 5\n" + 
-            "  (order (?p ?o)\n" +
+            "(slice 1 _\n" +
+            "  (top (6 ?p ?o)\n" +
             "    (bgp (triple ?s ?p ?o))))" ; 
         check(queryString, opExpectedString) ;
     }

Modified: incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java?rev=1165123&r1=1165122&r2=1165123&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/com/hp/hpl/jena/sparql/algebra/optimize/TransformTopN.java Sun Sep  4 22:10:29 2011
@@ -18,7 +18,9 @@
 
 package com.hp.hpl.jena.sparql.algebra.optimize;
 
+import com.hp.hpl.jena.query.ARQ ;
 import com.hp.hpl.jena.query.Query ;
+import com.hp.hpl.jena.sparql.ARQConstants ;
 import com.hp.hpl.jena.sparql.algebra.Op ;
 import com.hp.hpl.jena.sparql.algebra.op.Op1 ;
 import com.hp.hpl.jena.sparql.algebra.TransformCopy ;
@@ -27,24 +29,31 @@ import com.hp.hpl.jena.sparql.algebra.op
 import com.hp.hpl.jena.sparql.algebra.op.OpReduced ;
 import com.hp.hpl.jena.sparql.algebra.op.OpSlice ;
 import com.hp.hpl.jena.sparql.algebra.op.OpTopN ;
+import com.hp.hpl.jena.sparql.util.Symbol ;
 
 public class TransformTopN extends TransformCopy {
 
-	public static final int TOPN_LIMIT_THRESHOLD = 100 ;
-	
+	private static final int defaultTopNSortingThreshold = 1000;
+	public static final Symbol externalSortBufferSize = ARQConstants.allocSymbol("topNSortingThreshold") ;
+
     @Override
 	public Op transform(OpSlice opSlice, Op subOp) { 
 
         /* This looks for two cases:
-         * (slice _  N
+         * (slice X N
          *   (order (cond) PATTERN) )
-         * ==> (top (N cond) PATTERN)
+         * ==> 
+         * (slice X _
+         *   (top (X+N cond) PATTERN) )
+         * 
          * and
          * 
-         * (slice _  N
+         * (slice X N
          *   (distinct or reduced
          *     (order (cond) PATTERN) ))
-         * ==>  (top (N cond) (distinct PATTERN))
+         * ==>  
+         * (slice X _
+         *   (top (X+N cond) (distinct PATTERN))
          *
          * and evaluation of (top) looks for (top N (distinct PATTERN))
          * See OpExecutor.execute(OpTopN)
@@ -54,16 +63,20 @@ public class TransformTopN extends Trans
          * as we process reducded or distinct in the same way. 
          */
         
-        boolean acceptableStart = ( ( opSlice.getStart() == 0 ) || ( opSlice.getStart() == Query.NOLIMIT ) ) ;
-        boolean acceptableFinish =  (opSlice.getLength() < TOPN_LIMIT_THRESHOLD ) ;  
-        
-    	if ( acceptableStart && acceptableFinish )
+        int threshold = (Integer)ARQ.getContext().get(externalSortBufferSize, defaultTopNSortingThreshold) ;
+        long offset = ( opSlice.getStart() != Query.NOLIMIT ) ? opSlice.getStart() : 0L ;
+    	if ( offset + opSlice.getLength() < threshold )
     	{
         	if ( subOp instanceof OpOrder ) 
         	{
         	    // First case.
         	    OpOrder opOrder = (OpOrder)subOp ;
-        	    return new OpTopN( opOrder.getSubOp(), (int)opSlice.getLength(), opOrder.getConditions() ) ;
+                OpTopN opTopN = new OpTopN( opOrder.getSubOp(), (int)(offset+opSlice.getLength()), opOrder.getConditions() ) ;
+        	    if ( offset == 0 ) {
+        	        return opTopN ;
+        	    } else {
+                    return new OpSlice( opTopN, offset, Query.NOLIMIT ) ;        	        
+        	    }
         	}
             	
         	if ( subOp instanceof OpDistinct || subOp instanceof OpReduced )
@@ -72,11 +85,16 @@ public class TransformTopN extends Trans
         	    if ( subSubOp instanceof OpOrder ) {
         	        OpOrder opOrder = (OpOrder)subSubOp ;
         	        Op opDistinct2 = OpDistinct.create(opOrder.getSubOp()) ;
-        	        return new OpTopN( opDistinct2, (int)opSlice.getLength(), opOrder.getConditions() ) ;
+        	        OpTopN opTopN = new OpTopN( opDistinct2, (int)(offset+opSlice.getLength()), opOrder.getConditions() ) ; 
+        	        if ( offset == 0 ) {
+        	            return opTopN ;
+        	        } else {
+                        return new OpSlice( opTopN, offset, Query.NOLIMIT ) ;         	            
+        	        }
         	    }
         	}
     	}
-    	
+
     	// Pass through.
     	return super.transform(opSlice, subOp) ; 
    	}

Modified: incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/manifest.ttl
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/manifest.ttl?rev=1165123&r1=1165122&r2=1165123&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/manifest.ttl (original)
+++ incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/manifest.ttl Sun Sep  4 22:10:29 2011
@@ -231,6 +231,22 @@
         mf:result  <opt-top-10.srj>
       ]
 
+      [  mf:name    "opt-top-11" ;
+         rdf:type   mfx:TestQuery ; 
+         mf:action
+            [ qt:query  <opt-top-11.rq> ;
+              qt:data   <data-2.ttl> ] ;
+        mf:result  <opt-top-11.srj>
+      ]
+
+      [  mf:name    "opt-top-12" ;
+         rdf:type   mfx:TestQuery ; 
+         mf:action
+            [ qt:query  <opt-top-12.rq> ;
+              qt:data   <data-2.ttl> ] ;
+        mf:result  <opt-top-12.srj>
+      ]
+
       [  mf:name    "opt-distinct-to-reduced-01" ;
          rdf:type   mfx:TestQuery ; 
          mf:action

Modified: incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-11.rq
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-11.rq?rev=1165123&r1=1165122&r2=1165123&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-11.rq (original)
+++ incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-11.rq Sun Sep  4 22:10:29 2011
@@ -1,7 +1,7 @@
 PREFIX : <http://example/>
 
-SELECT DISINCT ?x 
+SELECT * 
 { ?x :p ?v } 
 ORDER BY ASC(?v)
-# Greater than the limit of using a bounded prirotiy queue.
-LIMIT 50000
+OFFSET 2
+LIMIT 5

Added: incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-11.srj
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-11.srj?rev=1165123&view=auto
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-11.srj (added)
+++ incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-11.srj Sun Sep  4 22:10:29 2011
@@ -0,0 +1,29 @@
+{
+  "head": {
+    "vars": [ "x" , "v" ]
+  } ,
+  "results": {
+    "bindings": [
+      {
+        "x": { "type": "uri" , "value": "http://example/x3" } ,
+        "v": { "datatype": "http://www.w3.org/2001/XMLSchema#integer" , "type": "typed-literal" , "value": "3" }
+      } ,
+      {
+        "x": { "type": "uri" , "value": "http://example/x3a" } ,
+        "v": { "datatype": "http://www.w3.org/2001/XMLSchema#integer" , "type": "typed-literal" , "value": "3" }
+      } ,
+      {
+        "x": { "type": "uri" , "value": "http://example/x4" } ,
+        "v": { "datatype": "http://www.w3.org/2001/XMLSchema#integer" , "type": "typed-literal" , "value": "4" }
+      } ,
+      {
+        "x": { "type": "uri" , "value": "http://example/x5" } ,
+        "v": { "datatype": "http://www.w3.org/2001/XMLSchema#integer" , "type": "typed-literal" , "value": "5" }
+      } ,
+      {
+        "x": { "type": "uri" , "value": "http://example/x6" } ,
+        "v": { "datatype": "http://www.w3.org/2001/XMLSchema#integer" , "type": "typed-literal" , "value": "6" }
+      }
+    ]
+  }
+}

Added: incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-12.rq
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-12.rq?rev=1165123&view=auto
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-12.rq (added)
+++ incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-12.rq Sun Sep  4 22:10:29 2011
@@ -0,0 +1,7 @@
+PREFIX : <http://example/>
+
+SELECT DISTINCT ?v
+{ ?x :p ?v } 
+ORDER BY ASC(?v)
+OFFSET 2
+LIMIT 5

Added: incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-12.srj
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-12.srj?rev=1165123&view=auto
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-12.srj (added)
+++ incubator/jena/Jena2/ARQ/trunk/testing/ARQ/Optimization/opt-top-12.srj Sun Sep  4 22:10:29 2011
@@ -0,0 +1,24 @@
+{
+  "head": {
+    "vars": [ "v" ]
+  } ,
+  "results": {
+    "bindings": [
+      {
+        "v": { "datatype": "http://www.w3.org/2001/XMLSchema#integer" , "type": "typed-literal" , "value": "3" }
+      } ,
+      {
+        "v": { "datatype": "http://www.w3.org/2001/XMLSchema#integer" , "type": "typed-literal" , "value": "4" }
+      } ,
+      {
+        "v": { "datatype": "http://www.w3.org/2001/XMLSchema#integer" , "type": "typed-literal" , "value": "5" }
+      } ,
+      {
+        "v": { "datatype": "http://www.w3.org/2001/XMLSchema#integer" , "type": "typed-literal" , "value": "6" }
+      } ,
+      {
+        "v": { "datatype": "http://www.w3.org/2001/XMLSchema#integer" , "type": "typed-literal" , "value": "7" }
+      }
+    ]
+  }
+}