You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/10/31 18:08:59 UTC

[1/2] jena git commit: Correct one byte path.

Repository: jena
Updated Branches:
  refs/heads/master 93b271855 -> 00ba42556


Correct one byte path.

And reformat.

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/bc35cef7
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/bc35cef7
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/bc35cef7

Branch: refs/heads/master
Commit: bc35cef743004c506e87264f1eab6c0434684438
Parents: 93b2718
Author: Andy Seaborne <an...@apache.org>
Authored: Sat Oct 31 17:08:25 2015 +0000
Committer: Andy Seaborne <an...@apache.org>
Committed: Sat Oct 31 17:08:25 2015 +0000

----------------------------------------------------------------------
 .../org/apache/jena/atlas/io/InStreamUTF8.java  | 143 +++++++++----------
 1 file changed, 65 insertions(+), 78 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/bc35cef7/jena-base/src/main/java/org/apache/jena/atlas/io/InStreamUTF8.java
----------------------------------------------------------------------
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/io/InStreamUTF8.java b/jena-base/src/main/java/org/apache/jena/atlas/io/InStreamUTF8.java
index acb9034..5253b64 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/io/InStreamUTF8.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/io/InStreamUTF8.java
@@ -113,31 +113,27 @@ public final class InStreamUTF8 extends Reader implements CharStream
     { IO.close(input) ; }
 
     @Override
-    public int read(char[] cbuf, int off, int len)
-    {
+    public int read(char[] cbuf, int off, int len) {
         // Doing this on a block of bytes may be faster.
-        for ( int i = off ; i < off+len ; i++ )
-        {
-            int x = read() ;
-            if ( x == -1 )
-            {
+        for ( int i = off ; i < off + len ; i++ ) {
+            int x = read();
+            if ( x == -1 ) {
                 if ( i == off )
-                    return -1 ;
-                return (i-off) ;
+                    return -1;
+                return (i - off);
             }
-            cbuf[i] = (char)x ;
+            cbuf[i] = (char)x;
         }
-        return len ; 
+        return len;
     }
 
     @Override
-    public final int read()
-    { 
-        int ch = advance(input) ;
-        //if ( ! Character.isDefined(ch) ) throw new AtlasException(String.format("Undefined codepoint: 0x%04X", ch)) ;
-        return ch ;
-    }
-    
+    public final int read() {
+        int ch = advance(input);
+        // if ( ! Character.isDefined(ch) ) throw new
+        // AtlasException(String.format("Undefined codepoint: 0x%04X", ch)) ;
+        return ch;
+    }    
     
     /** Next codepoint, given the first byte of any UTF-8 byte sequence is already known.
      *  Not necessarily a valid char (this function can be used a straight UTF8 decoder
@@ -147,72 +143,66 @@ public final class InStreamUTF8 extends Reader implements CharStream
     { return advance(input) ; }
     
     /** Next codepoint */
-    public static final int advance(InputStreamBuffered input)
-    {
+    public static final int advance(InputStreamBuffered input) {
         int x = input.advance() ;
         if ( x == -1 ) return -1 ;
         return advance(input, x) ;
     }
     
     /** Next codepoint, given the first byte of any UTF-8 byte sequence is already known.
-     * Not necessarily a valid char (this function can be used a straight UTF8 decoder
+     * Not necessarily a valid char (this function can be used as a straight UTF8 decoder).
      */
     
-    public static final int advance(InputStreamBuffered input, int x)
-    {
+    private static final int advance(InputStreamBuffered input, int x) {
         //count++ ;
-        // Fastpath
-        if ( x == -1 || x <= 127 ) 
-        {
-            //count++ ;
-            return x ;
+        // ASCII Fastpath
+        if ( x == -1 || (x >= 0 && x <= 127) ) {
+            // count++ ;
+            return x;
         }
 
         // 10 => extension byte
         // 110..... => 2 bytes
-        if ( (x & 0xE0) == 0xC0 )
-        {
-            int ch = readMultiBytes(input, x & 0x1F, 2) ;
+        if ( (x & 0xE0) == 0xC0 ) {
+            int ch = readMultiBytes(input, x & 0x1F, 2);
             // count += 2 ;
-            return ch ;
-            
+            return ch;
+
         }
-        //  1110.... => 3 bytes : 16 bits : not outside 16bit chars 
-        if ( (x & 0xF0) == 0xE0 ) 
-        {
-            int ch = readMultiBytes(input, x & 0x0F, 3) ;
+        // 1110.... => 3 bytes : 16 bits : not outside 16bit chars
+        if ( (x & 0xF0) == 0xE0 ) {
+            int ch = readMultiBytes(input, x & 0x0F, 3);
             // count += 3 ;
-            //if ( ! Character.isDefined(ch) ) throw new AtlasException(String.format("Undefined codepoint: 0x%04X", ch)) ;
-            return ch ;
+            // if ( ! Character.isDefined(ch) ) throw new
+            // AtlasException(String.format("Undefined codepoint: 0x%04X", ch))
+            // ;
+            return ch;
         }
 
-        // Looking like 4 byte charcater.
-        int ch = -2 ;
+        // Looking like 4 byte character.
+        int ch = -2;
         // 11110zzz => 4 bytes.
-        if ( (x & 0xF8) == 0xF0 )
-        {
-             ch = readMultiBytes(input, x & 0x08, 4) ;
-             // Opsp - need two returns. Character.toChars(ch, chars, 0) ;
-             // count += 4 ;
+        if ( (x & 0xF8) == 0xF0 ) {
+            ch = readMultiBytes(input, x & 0x08, 4);
+            // Opps - need two returns. Character.toChars(ch, chars, 0) ;
+            // count += 4 ;
         }
-             
-        else 
-            IO.exception(new IOException("Illegal UTF-8: "+x)) ;
 
-        // This test will go off.  We're processing a 4 byte sequence but Java only supports 16 bit chars. 
+        else
+            IO.exception(new IOException("Illegal UTF-8: " + x));
+
+        // This test will go off. We're processing a 4 byte sequence but Java
+        // only supports 16 bit chars.
         if ( ch > Character.MAX_VALUE )
-            throw new AtlasException("Out of range character (must use a surrogate pair)") ;
-        if ( ! Character.isDefined(ch) ) throw new AtlasException(String.format("Undefined codepoint: 0x%04X", ch)) ;
-        return ch ;
+            throw new AtlasException("Out of range character (must use a surrogate pair)");
+        if ( !Character.isDefined(ch) )
+            throw new AtlasException(String.format("Undefined codepoint: 0x%04X", ch));
+        return ch;
     }
     
-    private static int readMultiBytes(InputStreamBuffered input, int start, int len) //throws IOException
-    {
-        //System.out.print(" -("+len+")") ; p(start) ;
-        
+    private static int readMultiBytes(InputStreamBuffered input, int start, int len) {
         int x = start ;
-        for ( int i = 0 ; i < len-1 ; i++ )
-        {
+        for ( int i = 0 ; i < len-1 ; i++ ) {
             int x2 = input.advance() ;
             if ( x2 == -1 )
                 throw new AtlasException("Premature end to UTF-8 sequence at end of input") ;
@@ -226,28 +216,25 @@ public final class InStreamUTF8 extends Reader implements CharStream
         return x ;
     }
 
-    private static void p(int ch)
-    {
-        System.out.printf(" %02X", ch) ;
+    private static void p(int ch) {
+        System.out.printf(" %02X", ch);
         if ( ch == -1 )
             System.out.println();
     }
-    
-    public static String decode(byte[] bytes)
-    {
-        try
-        {
-            char[] chars = new char[bytes.length] ;
-            InputStream in = new ByteArrayInputStream(bytes) ;
-            Reader r = new InStreamUTF8(in) ;
-            int len ;
-            len = r.read(chars) ;
-            IO.close(r) ;
-            return new String(chars, 0, len) ;
-        } catch (IOException ex)
-        {
-            IO.exception(ex) ;
-            return null ;
+
+    public static String decode(byte[] bytes) {
+        try {
+            char[] chars = new char[bytes.length];
+            InputStream in = new ByteArrayInputStream(bytes);
+            Reader r = new InStreamUTF8(in);
+            int len;
+            len = r.read(chars);
+            IO.close(r);
+            return new String(chars, 0, len);
+        }
+        catch (IOException ex) {
+            IO.exception(ex);
+            return null;
         }
     }
 }


[2/2] jena git commit: JENA-1059: Optimize insert and delete of constant triples/quads.

Posted by an...@apache.org.
JENA-1059: Optimize insert and delete of constant triples/quads.

"Constant" means uses URIs and literals only.

Remove out-of-date comment about bulk update.

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/00ba4255
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/00ba4255
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/00ba4255

Branch: refs/heads/master
Commit: 00ba425564665c6ba5742c81d1f97c790c9352fb
Parents: bc35cef
Author: Andy Seaborne <an...@apache.org>
Authored: Sat Oct 31 17:08:34 2015 +0000
Committer: Andy Seaborne <an...@apache.org>
Committed: Sat Oct 31 17:08:34 2015 +0000

----------------------------------------------------------------------
 .../jena/sparql/modify/UpdateEngineWorker.java  | 93 ++++++++++++--------
 .../sparql/modify/TestUpdateOperations.java     | 37 ++++++++
 2 files changed, 93 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/00ba4255/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java b/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java
index 333b7f3..b70b9ea 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java
@@ -20,13 +20,16 @@ package org.apache.jena.sparql.modify;
 
 import static org.apache.jena.sparql.modify.TemplateLib.template ;
 
+import java.util.ArrayList ;
 import java.util.Iterator ;
 import java.util.List ;
+
 import org.apache.jena.atlas.data.BagFactory ;
 import org.apache.jena.atlas.data.DataBag ;
 import org.apache.jena.atlas.data.ThresholdPolicy ;
 import org.apache.jena.atlas.data.ThresholdPolicyFactory ;
 import org.apache.jena.atlas.iterator.Iter ;
+import org.apache.jena.atlas.lib.Pair ;
 import org.apache.jena.atlas.lib.Sink ;
 import org.apache.jena.atlas.web.TypedInputStream ;
 import org.apache.jena.graph.Graph ;
@@ -503,45 +506,61 @@ public class UpdateEngineWorker implements UpdateVisitor
         return el ;
     }
 
-    protected void execDelete(List<Quad> quads, Node dftGraph, Iterator<Binding> bindings)
-    {
-        Iterator<Quad> it = template(quads, dftGraph, bindings) ;
+    // execDelete ; execInsert
+    // Quads involving only IRIs and literals do not change from binding to
+    // binding so any inserts, rather than repeatedly if they are going to be
+    // done at all. Note bNodes (if legal at this point) change from template
+    // instantiation to instantiation.
+
+    private static Pair<List<Quad>, List<Quad>> split(List<Quad> quads) {
+        List<Quad> constQuads = new ArrayList<>(quads.size()) ;
+        List<Quad> templateQuads = new ArrayList<>(quads.size()) ;
+        quads.forEach((q)-> {
+            if ( constQuad(q))
+                constQuads.add(q) ;
+            else
+                templateQuads.add(q) ;
+        }) ;
+        return Pair.create(constQuads, templateQuads);
+    }
+
+    private static boolean constQuad(Quad quad) {
+        return  constTerm(quad.getGraph()) &&
+                constTerm(quad.getSubject()) &&
+                constTerm(quad.getPredicate()) &&
+                constTerm(quad.getObject()) ;
+    }
+    
+    private static boolean constTerm(Node n) {
+        return n.isURI() || n.isLiteral() ;
+    }
+
+    protected void execDelete(List<Quad> quads, Node dftGraph, Iterator<Binding> bindings) {
+        Pair<List<Quad>, List<Quad>> p = split(quads) ;
+        execDelete(p.getLeft(), p.getRight(), dftGraph, bindings) ;
+    }
+    
+    protected void execDelete(List<Quad> onceQuads, List<Quad> templateQuads, Node dftGraph, Iterator<Binding> bindings) {
+        if ( onceQuads != null && bindings.hasNext() )
+            // If at least once.
+            onceQuads.forEach(datasetGraph::delete);
+        Iterator<Quad> it = template(templateQuads, dftGraph, bindings) ;
         if ( it == null ) return ;
-        
-        while (it.hasNext())
-        {
-            Quad q = it.next();
-            datasetGraph.delete(q);
-        }
-        
-        
-        // Alternate implementation that can use the graph BulkUpdateHandler, but forces all quads into
-        // memory (we don't want that!).  The issue is that all of the quads can be mixed up based on the
-        // user supplied template.  If graph stores can benefit from bulk insert/delete operations, then we
-        // need to expose a bulk update interface on datasetGraph, not just Graph.
-//        MultiMap<Node, Triple> acc = MultiMap.createMapList() ;
-//        while (it.hasNext())
-//        {
-//            Quad q = it.next();
-//            acc.put(q.getGraph(), q.asTriple()) ;
-//        }
-//        for ( Node gn : acc.keys() )
-//        {
-//            Collection<Triple> triples = acc.get(gn) ;
-//            graph(datasetGraph, gn).getBulkUpdateHandler().delete(triples.iterator()) ;
-//        }
-    }
-
-    protected void execInsert(List<Quad> quads, Node dftGraph, Iterator<Binding> bindings)
-    {
-        Iterator<Quad> it = template(quads, dftGraph, bindings) ;
+        it.forEachRemaining(datasetGraph::delete) ;
+    }
+
+    protected void execInsert(List<Quad> quads, Node dftGraph, Iterator<Binding> bindings) {
+        Pair<List<Quad>, List<Quad>> p = split(quads) ;
+        execInsert(p.getLeft(), p.getRight(), dftGraph, bindings) ;
+    }
+    
+    protected void execInsert(List<Quad> onceQuads, List<Quad> templateQuads, Node dftGraph, Iterator<Binding> bindings) {
+        if ( onceQuads != null && bindings.hasNext() )
+            // If at least once.
+            onceQuads.forEach((q)->addTodatasetGraph(datasetGraph, q)) ;
+        Iterator<Quad> it = template(templateQuads, dftGraph, bindings) ;
         if ( it == null ) return ;
-        
-        while (it.hasNext())
-        {
-            Quad q = it.next();
-            addTodatasetGraph(datasetGraph, q);
-        }
+        it.forEachRemaining((q)->addTodatasetGraph(datasetGraph, q)) ;
     }
     
     // Catch all individual adds of quads (and deletes - mainly for symmetry). 

http://git-wip-us.apache.org/repos/asf/jena/blob/00ba4255/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java b/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java
index ead8940..3f95d97 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java
@@ -18,6 +18,8 @@
 
 package org.apache.jena.sparql.modify;
 
+import java.util.concurrent.atomic.AtomicLong ;
+
 import org.apache.jena.atlas.iterator.Iter ;
 import org.apache.jena.atlas.junit.BaseTest ;
 import org.apache.jena.graph.Node ;
@@ -28,6 +30,8 @@ import org.apache.jena.rdf.model.RDFNode ;
 import org.apache.jena.rdf.model.Resource ;
 import org.apache.jena.sparql.core.DatasetGraph ;
 import org.apache.jena.sparql.core.DatasetGraphFactory ;
+import org.apache.jena.sparql.core.DatasetGraphWrapper ;
+import org.apache.jena.sparql.core.Quad ;
 import org.apache.jena.sparql.sse.SSE ;
 import org.apache.jena.update.* ;
 import org.apache.jena.vocabulary.OWL ;
@@ -95,5 +99,38 @@ public class TestUpdateOperations extends BaseTest
         assertEquals(1, m.listStatements(anon, null, (RDFNode)null).toList().size());
         assertEquals(1, m.listStatements(null, null, anon).toList().size());
     }
+    
+    // Check constant and template quads 
+    @Test public void delete_insert_where_01() {
+        DatasetGraph dsg0 = DatasetGraphFactory.createMem() ;
+        UpdateRequest req = UpdateFactory.create("INSERT DATA { <x> <p> 2 . <z> <q> 2 . <z> <q> 3 . }") ;
+        UpdateAction.execute(req, dsg0);
+        assertEquals(3, dsg0.getDefaultGraph().size()) ;
+        
+        AtomicLong counterIns = new AtomicLong(0) ;
+        AtomicLong counterDel = new AtomicLong(0) ;
+        DatasetGraph dsg = new DatasetGraphWrapper(dsg0) {
+            @Override
+            public void add(Quad quad) { 
+                counterIns.incrementAndGet() ;
+                get().add(quad) ;
+            }
+
+            @Override
+            public void delete(Quad quad) {
+                counterDel.incrementAndGet() ;
+                get().delete(quad) ; 
+            }
+        } ;
+        
+        // WHERE clause doubles the effect.
+        String s = "DELETE { ?x <p> 2 . <z> <q> 2 } INSERT { ?x <p> 1 . <x> <q> 1  } WHERE { ?x <p> ?o {} UNION {} }" ;
+        req = UpdateFactory.create(s) ;
+        UpdateAction.execute(req, dsg);
+        assertEquals(3, counterIns.get()) ;   // 3 : 1 constant, 2 from template.
+        assertEquals(3, counterIns.get()) ;
+        assertEquals(3, dsg.getDefaultGraph().size()) ;
+    }
+
 }