You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/10/31 18:08:59 UTC
[1/2] jena git commit: Correct one byte path.
Repository: jena
Updated Branches:
refs/heads/master 93b271855 -> 00ba42556
Correct one byte path.
And reformat.
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/bc35cef7
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/bc35cef7
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/bc35cef7
Branch: refs/heads/master
Commit: bc35cef743004c506e87264f1eab6c0434684438
Parents: 93b2718
Author: Andy Seaborne <an...@apache.org>
Authored: Sat Oct 31 17:08:25 2015 +0000
Committer: Andy Seaborne <an...@apache.org>
Committed: Sat Oct 31 17:08:25 2015 +0000
----------------------------------------------------------------------
.../org/apache/jena/atlas/io/InStreamUTF8.java | 143 +++++++++----------
1 file changed, 65 insertions(+), 78 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/bc35cef7/jena-base/src/main/java/org/apache/jena/atlas/io/InStreamUTF8.java
----------------------------------------------------------------------
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/io/InStreamUTF8.java b/jena-base/src/main/java/org/apache/jena/atlas/io/InStreamUTF8.java
index acb9034..5253b64 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/io/InStreamUTF8.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/io/InStreamUTF8.java
@@ -113,31 +113,27 @@ public final class InStreamUTF8 extends Reader implements CharStream
{ IO.close(input) ; }
@Override
- public int read(char[] cbuf, int off, int len)
- {
+ public int read(char[] cbuf, int off, int len) {
// Doing this on a block of bytes may be faster.
- for ( int i = off ; i < off+len ; i++ )
- {
- int x = read() ;
- if ( x == -1 )
- {
+ for ( int i = off ; i < off + len ; i++ ) {
+ int x = read();
+ if ( x == -1 ) {
if ( i == off )
- return -1 ;
- return (i-off) ;
+ return -1;
+ return (i - off);
}
- cbuf[i] = (char)x ;
+ cbuf[i] = (char)x;
}
- return len ;
+ return len;
}
@Override
- public final int read()
- {
- int ch = advance(input) ;
- //if ( ! Character.isDefined(ch) ) throw new AtlasException(String.format("Undefined codepoint: 0x%04X", ch)) ;
- return ch ;
- }
-
+ public final int read() {
+ int ch = advance(input);
+ // if ( ! Character.isDefined(ch) ) throw new
+ // AtlasException(String.format("Undefined codepoint: 0x%04X", ch)) ;
+ return ch;
+ }
/** Next codepoint, given the first byte of any UTF-8 byte sequence is already known.
* Not necessarily a valid char (this function can be used a straight UTF8 decoder
@@ -147,72 +143,66 @@ public final class InStreamUTF8 extends Reader implements CharStream
{ return advance(input) ; }
/** Next codepoint */
- public static final int advance(InputStreamBuffered input)
- {
+ public static final int advance(InputStreamBuffered input) {
int x = input.advance() ;
if ( x == -1 ) return -1 ;
return advance(input, x) ;
}
/** Next codepoint, given the first byte of any UTF-8 byte sequence is already known.
- * Not necessarily a valid char (this function can be used a straight UTF8 decoder
+ * Not necessarily a valid char (this function can be used as a straight UTF8 decoder).
*/
- public static final int advance(InputStreamBuffered input, int x)
- {
+ private static final int advance(InputStreamBuffered input, int x) {
//count++ ;
- // Fastpath
- if ( x == -1 || x <= 127 )
- {
- //count++ ;
- return x ;
+ // ASCII Fastpath
+ if ( x == -1 || (x >= 0 && x <= 127) ) {
+ // count++ ;
+ return x;
}
// 10 => extension byte
// 110..... => 2 bytes
- if ( (x & 0xE0) == 0xC0 )
- {
- int ch = readMultiBytes(input, x & 0x1F, 2) ;
+ if ( (x & 0xE0) == 0xC0 ) {
+ int ch = readMultiBytes(input, x & 0x1F, 2);
// count += 2 ;
- return ch ;
-
+ return ch;
+
}
- // 1110.... => 3 bytes : 16 bits : not outside 16bit chars
- if ( (x & 0xF0) == 0xE0 )
- {
- int ch = readMultiBytes(input, x & 0x0F, 3) ;
+ // 1110.... => 3 bytes : 16 bits : not outside 16bit chars
+ if ( (x & 0xF0) == 0xE0 ) {
+ int ch = readMultiBytes(input, x & 0x0F, 3);
// count += 3 ;
- //if ( ! Character.isDefined(ch) ) throw new AtlasException(String.format("Undefined codepoint: 0x%04X", ch)) ;
- return ch ;
+ // if ( ! Character.isDefined(ch) ) throw new
+ // AtlasException(String.format("Undefined codepoint: 0x%04X", ch))
+ // ;
+ return ch;
}
- // Looking like 4 byte charcater.
- int ch = -2 ;
+ // Looking like 4 byte character.
+ int ch = -2;
// 11110zzz => 4 bytes.
- if ( (x & 0xF8) == 0xF0 )
- {
- ch = readMultiBytes(input, x & 0x08, 4) ;
- // Opsp - need two returns. Character.toChars(ch, chars, 0) ;
- // count += 4 ;
+ if ( (x & 0xF8) == 0xF0 ) {
+ ch = readMultiBytes(input, x & 0x08, 4);
+ // Opps - need two returns. Character.toChars(ch, chars, 0) ;
+ // count += 4 ;
}
-
- else
- IO.exception(new IOException("Illegal UTF-8: "+x)) ;
- // This test will go off. We're processing a 4 byte sequence but Java only supports 16 bit chars.
+ else
+ IO.exception(new IOException("Illegal UTF-8: " + x));
+
+ // This test will go off. We're processing a 4 byte sequence but Java
+ // only supports 16 bit chars.
if ( ch > Character.MAX_VALUE )
- throw new AtlasException("Out of range character (must use a surrogate pair)") ;
- if ( ! Character.isDefined(ch) ) throw new AtlasException(String.format("Undefined codepoint: 0x%04X", ch)) ;
- return ch ;
+ throw new AtlasException("Out of range character (must use a surrogate pair)");
+ if ( !Character.isDefined(ch) )
+ throw new AtlasException(String.format("Undefined codepoint: 0x%04X", ch));
+ return ch;
}
- private static int readMultiBytes(InputStreamBuffered input, int start, int len) //throws IOException
- {
- //System.out.print(" -("+len+")") ; p(start) ;
-
+ private static int readMultiBytes(InputStreamBuffered input, int start, int len) {
int x = start ;
- for ( int i = 0 ; i < len-1 ; i++ )
- {
+ for ( int i = 0 ; i < len-1 ; i++ ) {
int x2 = input.advance() ;
if ( x2 == -1 )
throw new AtlasException("Premature end to UTF-8 sequence at end of input") ;
@@ -226,28 +216,25 @@ public final class InStreamUTF8 extends Reader implements CharStream
return x ;
}
- private static void p(int ch)
- {
- System.out.printf(" %02X", ch) ;
+ private static void p(int ch) {
+ System.out.printf(" %02X", ch);
if ( ch == -1 )
System.out.println();
}
-
- public static String decode(byte[] bytes)
- {
- try
- {
- char[] chars = new char[bytes.length] ;
- InputStream in = new ByteArrayInputStream(bytes) ;
- Reader r = new InStreamUTF8(in) ;
- int len ;
- len = r.read(chars) ;
- IO.close(r) ;
- return new String(chars, 0, len) ;
- } catch (IOException ex)
- {
- IO.exception(ex) ;
- return null ;
+
+ public static String decode(byte[] bytes) {
+ try {
+ char[] chars = new char[bytes.length];
+ InputStream in = new ByteArrayInputStream(bytes);
+ Reader r = new InStreamUTF8(in);
+ int len;
+ len = r.read(chars);
+ IO.close(r);
+ return new String(chars, 0, len);
+ }
+ catch (IOException ex) {
+ IO.exception(ex);
+ return null;
}
}
}
[2/2] jena git commit: JENA-1059: Optimize insert and delete of
constant triples/quads.
Posted by an...@apache.org.
JENA-1059: Optimize insert and delete of constant triples/quads.
"Constant" means uses URIs and literals only.
Remove out-of-date comment about bulk update.
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/00ba4255
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/00ba4255
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/00ba4255
Branch: refs/heads/master
Commit: 00ba425564665c6ba5742c81d1f97c790c9352fb
Parents: bc35cef
Author: Andy Seaborne <an...@apache.org>
Authored: Sat Oct 31 17:08:34 2015 +0000
Committer: Andy Seaborne <an...@apache.org>
Committed: Sat Oct 31 17:08:34 2015 +0000
----------------------------------------------------------------------
.../jena/sparql/modify/UpdateEngineWorker.java | 93 ++++++++++++--------
.../sparql/modify/TestUpdateOperations.java | 37 ++++++++
2 files changed, 93 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/00ba4255/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java b/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java
index 333b7f3..b70b9ea 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/modify/UpdateEngineWorker.java
@@ -20,13 +20,16 @@ package org.apache.jena.sparql.modify;
import static org.apache.jena.sparql.modify.TemplateLib.template ;
+import java.util.ArrayList ;
import java.util.Iterator ;
import java.util.List ;
+
import org.apache.jena.atlas.data.BagFactory ;
import org.apache.jena.atlas.data.DataBag ;
import org.apache.jena.atlas.data.ThresholdPolicy ;
import org.apache.jena.atlas.data.ThresholdPolicyFactory ;
import org.apache.jena.atlas.iterator.Iter ;
+import org.apache.jena.atlas.lib.Pair ;
import org.apache.jena.atlas.lib.Sink ;
import org.apache.jena.atlas.web.TypedInputStream ;
import org.apache.jena.graph.Graph ;
@@ -503,45 +506,61 @@ public class UpdateEngineWorker implements UpdateVisitor
return el ;
}
- protected void execDelete(List<Quad> quads, Node dftGraph, Iterator<Binding> bindings)
- {
- Iterator<Quad> it = template(quads, dftGraph, bindings) ;
+ // execDelete ; execInsert
+ // Quads involving only IRIs and literals do not change from binding to
+ // binding so any inserts, rather than repeatedly if they are going to be
+ // done at all. Note bNodes (if legal at this point) change from template
+ // instantiation to instantiation.
+
+ private static Pair<List<Quad>, List<Quad>> split(List<Quad> quads) {
+ List<Quad> constQuads = new ArrayList<>(quads.size()) ;
+ List<Quad> templateQuads = new ArrayList<>(quads.size()) ;
+ quads.forEach((q)-> {
+ if ( constQuad(q))
+ constQuads.add(q) ;
+ else
+ templateQuads.add(q) ;
+ }) ;
+ return Pair.create(constQuads, templateQuads);
+ }
+
+ private static boolean constQuad(Quad quad) {
+ return constTerm(quad.getGraph()) &&
+ constTerm(quad.getSubject()) &&
+ constTerm(quad.getPredicate()) &&
+ constTerm(quad.getObject()) ;
+ }
+
+ private static boolean constTerm(Node n) {
+ return n.isURI() || n.isLiteral() ;
+ }
+
+ protected void execDelete(List<Quad> quads, Node dftGraph, Iterator<Binding> bindings) {
+ Pair<List<Quad>, List<Quad>> p = split(quads) ;
+ execDelete(p.getLeft(), p.getRight(), dftGraph, bindings) ;
+ }
+
+ protected void execDelete(List<Quad> onceQuads, List<Quad> templateQuads, Node dftGraph, Iterator<Binding> bindings) {
+ if ( onceQuads != null && bindings.hasNext() )
+ // If at least once.
+ onceQuads.forEach(datasetGraph::delete);
+ Iterator<Quad> it = template(templateQuads, dftGraph, bindings) ;
if ( it == null ) return ;
-
- while (it.hasNext())
- {
- Quad q = it.next();
- datasetGraph.delete(q);
- }
-
-
- // Alternate implementation that can use the graph BulkUpdateHandler, but forces all quads into
- // memory (we don't want that!). The issue is that all of the quads can be mixed up based on the
- // user supplied template. If graph stores can benefit from bulk insert/delete operations, then we
- // need to expose a bulk update interface on datasetGraph, not just Graph.
-// MultiMap<Node, Triple> acc = MultiMap.createMapList() ;
-// while (it.hasNext())
-// {
-// Quad q = it.next();
-// acc.put(q.getGraph(), q.asTriple()) ;
-// }
-// for ( Node gn : acc.keys() )
-// {
-// Collection<Triple> triples = acc.get(gn) ;
-// graph(datasetGraph, gn).getBulkUpdateHandler().delete(triples.iterator()) ;
-// }
- }
-
- protected void execInsert(List<Quad> quads, Node dftGraph, Iterator<Binding> bindings)
- {
- Iterator<Quad> it = template(quads, dftGraph, bindings) ;
+ it.forEachRemaining(datasetGraph::delete) ;
+ }
+
+ protected void execInsert(List<Quad> quads, Node dftGraph, Iterator<Binding> bindings) {
+ Pair<List<Quad>, List<Quad>> p = split(quads) ;
+ execInsert(p.getLeft(), p.getRight(), dftGraph, bindings) ;
+ }
+
+ protected void execInsert(List<Quad> onceQuads, List<Quad> templateQuads, Node dftGraph, Iterator<Binding> bindings) {
+ if ( onceQuads != null && bindings.hasNext() )
+ // If at least once.
+ onceQuads.forEach((q)->addTodatasetGraph(datasetGraph, q)) ;
+ Iterator<Quad> it = template(templateQuads, dftGraph, bindings) ;
if ( it == null ) return ;
-
- while (it.hasNext())
- {
- Quad q = it.next();
- addTodatasetGraph(datasetGraph, q);
- }
+ it.forEachRemaining((q)->addTodatasetGraph(datasetGraph, q)) ;
}
// Catch all individual adds of quads (and deletes - mainly for symmetry).
http://git-wip-us.apache.org/repos/asf/jena/blob/00ba4255/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java b/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java
index ead8940..3f95d97 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/modify/TestUpdateOperations.java
@@ -18,6 +18,8 @@
package org.apache.jena.sparql.modify;
+import java.util.concurrent.atomic.AtomicLong ;
+
import org.apache.jena.atlas.iterator.Iter ;
import org.apache.jena.atlas.junit.BaseTest ;
import org.apache.jena.graph.Node ;
@@ -28,6 +30,8 @@ import org.apache.jena.rdf.model.RDFNode ;
import org.apache.jena.rdf.model.Resource ;
import org.apache.jena.sparql.core.DatasetGraph ;
import org.apache.jena.sparql.core.DatasetGraphFactory ;
+import org.apache.jena.sparql.core.DatasetGraphWrapper ;
+import org.apache.jena.sparql.core.Quad ;
import org.apache.jena.sparql.sse.SSE ;
import org.apache.jena.update.* ;
import org.apache.jena.vocabulary.OWL ;
@@ -95,5 +99,38 @@ public class TestUpdateOperations extends BaseTest
assertEquals(1, m.listStatements(anon, null, (RDFNode)null).toList().size());
assertEquals(1, m.listStatements(null, null, anon).toList().size());
}
+
+ // Check constant and template quads
+ @Test public void delete_insert_where_01() {
+ DatasetGraph dsg0 = DatasetGraphFactory.createMem() ;
+ UpdateRequest req = UpdateFactory.create("INSERT DATA { <x> <p> 2 . <z> <q> 2 . <z> <q> 3 . }") ;
+ UpdateAction.execute(req, dsg0);
+ assertEquals(3, dsg0.getDefaultGraph().size()) ;
+
+ AtomicLong counterIns = new AtomicLong(0) ;
+ AtomicLong counterDel = new AtomicLong(0) ;
+ DatasetGraph dsg = new DatasetGraphWrapper(dsg0) {
+ @Override
+ public void add(Quad quad) {
+ counterIns.incrementAndGet() ;
+ get().add(quad) ;
+ }
+
+ @Override
+ public void delete(Quad quad) {
+ counterDel.incrementAndGet() ;
+ get().delete(quad) ;
+ }
+ } ;
+
+ // WHERE clause doubles the effect.
+ String s = "DELETE { ?x <p> 2 . <z> <q> 2 } INSERT { ?x <p> 1 . <x> <q> 1 } WHERE { ?x <p> ?o {} UNION {} }" ;
+ req = UpdateFactory.create(s) ;
+ UpdateAction.execute(req, dsg);
+ assertEquals(3, counterIns.get()) ; // 3 : 1 constant, 2 from template.
+ assertEquals(3, counterIns.get()) ;
+ assertEquals(3, dsg.getDefaultGraph().size()) ;
+ }
+
}