You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/10/31 18:09:00 UTC

[2/2] jena git commit: JENA-1059: Optimize insert and delete of constant triples/quads.

JENA-1059: Optimize insert and delete of constant triples/quads.

"Constant" means uses URIs and literals only.

Remove out-of-date comment about bulk update.

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/00ba4255
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/00ba4255
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/00ba4255

Branch: refs/heads/master
Commit: 00ba425564665c6ba5742c81d1f97c790c9352fb
Parents: bc35cef
Author: Andy Seaborne <an...@apache.org>
Authored: Sat Oct 31 17:08:34 2015 +0000
Committer: Andy Seaborne <an...@apache.org>
Committed: Sat Oct 31 17:08:34 2015 +0000

----------------------------------------------------------------------
 .../jena/sparql/modify/UpdateEngineWorker.java  | 93 ++++++++++++--------
 .../sparql/modify/TestUpdateOperations.java     | 37 ++++++++
 2 files changed, 93 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/00ba4255/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java b/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java
index 333b7f3..b70b9ea 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java
@@ -20,13 +20,16 @@ package org.apache.jena.sparql.modify;
 
 import static org.apache.jena.sparql.modify.TemplateLib.template ;
 
+import java.util.ArrayList ;
 import java.util.Iterator ;
 import java.util.List ;
+
 import org.apache.jena.atlas.data.BagFactory ;
 import org.apache.jena.atlas.data.DataBag ;
 import org.apache.jena.atlas.data.ThresholdPolicy ;
 import org.apache.jena.atlas.data.ThresholdPolicyFactory ;
 import org.apache.jena.atlas.iterator.Iter ;
+import org.apache.jena.atlas.lib.Pair ;
 import org.apache.jena.atlas.lib.Sink ;
 import org.apache.jena.atlas.web.TypedInputStream ;
 import org.apache.jena.graph.Graph ;
@@ -503,45 +506,61 @@ public class UpdateEngineWorker implements UpdateVisitor
         return el ;
     }
 
-    protected void execDelete(List<Quad> quads, Node dftGraph, Iterator<Binding> bindings)
-    {
-        Iterator<Quad> it = template(quads, dftGraph, bindings) ;
+    // execDelete ; execInsert
+    // Quads involving only IRIs and literals do not change from binding to
+    // binding so any inserts, rather than repeatedly if they are going to be
+    // done at all. Note bNodes (if legal at this point) change from template
+    // instantiation to instantiation.
+
+    private static Pair<List<Quad>, List<Quad>> split(List<Quad> quads) {
+        List<Quad> constQuads = new ArrayList<>(quads.size()) ;
+        List<Quad> templateQuads = new ArrayList<>(quads.size()) ;
+        quads.forEach((q)-> {
+            if ( constQuad(q))
+                constQuads.add(q) ;
+            else
+                templateQuads.add(q) ;
+        }) ;
+        return Pair.create(constQuads, templateQuads);
+    }
+
+    private static boolean constQuad(Quad quad) {
+        return  constTerm(quad.getGraph()) &&
+                constTerm(quad.getSubject()) &&
+                constTerm(quad.getPredicate()) &&
+                constTerm(quad.getObject()) ;
+    }
+    
+    private static boolean constTerm(Node n) {
+        return n.isURI() || n.isLiteral() ;
+    }
+
+    protected void execDelete(List<Quad> quads, Node dftGraph, Iterator<Binding> bindings) {
+        Pair<List<Quad>, List<Quad>> p = split(quads) ;
+        execDelete(p.getLeft(), p.getRight(), dftGraph, bindings) ;
+    }
+    
+    protected void execDelete(List<Quad> onceQuads, List<Quad> templateQuads, Node dftGraph, Iterator<Binding> bindings) {
+        if ( onceQuads != null && bindings.hasNext() )
+            // If at least once.
+            onceQuads.forEach(datasetGraph::delete);
+        Iterator<Quad> it = template(templateQuads, dftGraph, bindings) ;
         if ( it == null ) return ;
-        
-        while (it.hasNext())
-        {
-            Quad q = it.next();
-            datasetGraph.delete(q);
-        }
-        
-        
-        // Alternate implementation that can use the graph BulkUpdateHandler, but forces all quads into
-        // memory (we don't want that!).  The issue is that all of the quads can be mixed up based on the
-        // user supplied template.  If graph stores can benefit from bulk insert/delete operations, then we
-        // need to expose a bulk update interface on datasetGraph, not just Graph.
-//        MultiMap<Node, Triple> acc = MultiMap.createMapList() ;
-//        while (it.hasNext())
-//        {
-//            Quad q = it.next();
-//            acc.put(q.getGraph(), q.asTriple()) ;
-//        }
-//        for ( Node gn : acc.keys() )
-//        {
-//            Collection<Triple> triples = acc.get(gn) ;
-//            graph(datasetGraph, gn).getBulkUpdateHandler().delete(triples.iterator()) ;
-//        }
-    }
-
-    protected void execInsert(List<Quad> quads, Node dftGraph, Iterator<Binding> bindings)
-    {
-        Iterator<Quad> it = template(quads, dftGraph, bindings) ;
+        it.forEachRemaining(datasetGraph::delete) ;
+    }
+
+    protected void execInsert(List<Quad> quads, Node dftGraph, Iterator<Binding> bindings) {
+        Pair<List<Quad>, List<Quad>> p = split(quads) ;
+        execInsert(p.getLeft(), p.getRight(), dftGraph, bindings) ;
+    }
+    
+    protected void execInsert(List<Quad> onceQuads, List<Quad> templateQuads, Node dftGraph, Iterator<Binding> bindings) {
+        if ( onceQuads != null && bindings.hasNext() )
+            // If at least once.
+            onceQuads.forEach((q)->addTodatasetGraph(datasetGraph, q)) ;
+        Iterator<Quad> it = template(templateQuads, dftGraph, bindings) ;
         if ( it == null ) return ;
-        
-        while (it.hasNext())
-        {
-            Quad q = it.next();
-            addTodatasetGraph(datasetGraph, q);
-        }
+        it.forEachRemaining((q)->addTodatasetGraph(datasetGraph, q)) ;
     }
     
     // Catch all individual adds of quads (and deletes - mainly for symmetry). 

http://git-wip-us.apache.org/repos/asf/jena/blob/00ba4255/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java b/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java
index ead8940..3f95d97 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java
@@ -18,6 +18,8 @@
 
 package org.apache.jena.sparql.modify;
 
+import java.util.concurrent.atomic.AtomicLong ;
+
 import org.apache.jena.atlas.iterator.Iter ;
 import org.apache.jena.atlas.junit.BaseTest ;
 import org.apache.jena.graph.Node ;
@@ -28,6 +30,8 @@ import org.apache.jena.rdf.model.RDFNode ;
 import org.apache.jena.rdf.model.Resource ;
 import org.apache.jena.sparql.core.DatasetGraph ;
 import org.apache.jena.sparql.core.DatasetGraphFactory ;
+import org.apache.jena.sparql.core.DatasetGraphWrapper ;
+import org.apache.jena.sparql.core.Quad ;
 import org.apache.jena.sparql.sse.SSE ;
 import org.apache.jena.update.* ;
 import org.apache.jena.vocabulary.OWL ;
@@ -95,5 +99,38 @@ public class TestUpdateOperations extends BaseTest
         assertEquals(1, m.listStatements(anon, null, (RDFNode)null).toList().size());
         assertEquals(1, m.listStatements(null, null, anon).toList().size());
     }
+    
+    // Check constant and template quads 
+    @Test public void delete_insert_where_01() {
+        DatasetGraph dsg0 = DatasetGraphFactory.createMem() ;
+        UpdateRequest req = UpdateFactory.create("INSERT DATA { <x> <p> 2 . <z> <q> 2 . <z> <q> 3 . }") ;
+        UpdateAction.execute(req, dsg0);
+        assertEquals(3, dsg0.getDefaultGraph().size()) ;
+        
+        AtomicLong counterIns = new AtomicLong(0) ;
+        AtomicLong counterDel = new AtomicLong(0) ;
+        DatasetGraph dsg = new DatasetGraphWrapper(dsg0) {
+            @Override
+            public void add(Quad quad) { 
+                counterIns.incrementAndGet() ;
+                get().add(quad) ;
+            }
+
+            @Override
+            public void delete(Quad quad) {
+                counterDel.incrementAndGet() ;
+                get().delete(quad) ; 
+            }
+        } ;
+        
+        // WHERE clause doubles the effect.
+        String s = "DELETE { ?x <p> 2 . <z> <q> 2 } INSERT { ?x <p> 1 . <x> <q> 1  } WHERE { ?x <p> ?o {} UNION {} }" ;
+        req = UpdateFactory.create(s) ;
+        UpdateAction.execute(req, dsg);
+        assertEquals(3, counterIns.get()) ;   // 3 : 1 constant, 2 from template.
+        assertEquals(3, counterIns.get()) ;
+        assertEquals(3, dsg.getDefaultGraph().size()) ;
+    }
+
 }