You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by sa...@apache.org on 2013/04/09 01:10:08 UTC
svn commit: r1465822 - in /jena/Scratch/sallen/sallen-dev: ./ pom.xml
results.xlsx src/ src/main/ src/main/java/ src/main/java/TestInClause.java
src/test/ src/test/java/
Author: sallen
Date: Mon Apr 8 23:10:08 2013
New Revision: 1465822
URL: http://svn.apache.org/r1465822
Log:
Add a benchmark for large IN vs. VALUE clauses (results are also included, and they aren't pretty)
Added:
jena/Scratch/sallen/sallen-dev/
jena/Scratch/sallen/sallen-dev/pom.xml
jena/Scratch/sallen/sallen-dev/results.xlsx (with props)
jena/Scratch/sallen/sallen-dev/src/
jena/Scratch/sallen/sallen-dev/src/main/
jena/Scratch/sallen/sallen-dev/src/main/java/
jena/Scratch/sallen/sallen-dev/src/main/java/TestInClause.java
jena/Scratch/sallen/sallen-dev/src/test/
jena/Scratch/sallen/sallen-dev/src/test/java/
Added: jena/Scratch/sallen/sallen-dev/pom.xml
URL: http://svn.apache.org/viewvc/jena/Scratch/sallen/sallen-dev/pom.xml?rev=1465822&view=auto
==============================================================================
--- jena/Scratch/sallen/sallen-dev/pom.xml (added)
+++ jena/Scratch/sallen/sallen-dev/pom.xml Mon Apr 8 23:10:08 2013
@@ -0,0 +1,63 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <groupId>org.apache.jena</groupId>
+ <artifactId>sallen-dev</artifactId>
+ <version>0.0.1-SNAPSHOT</version>
+ <packaging>jar</packaging>
+
+ <name>sallen-dev</name>
+ <url>http://maven.apache.org</url>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ <jdk.version>1.6</jdk.version>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.2</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>14.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.jena</groupId>
+ <artifactId>jena-arq</artifactId>
+ <version>2.10.1-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.jena</groupId>
+ <artifactId>jena-tdb</artifactId>
+ <version>0.10.1-SNAPSHOT</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <!-- Plugin version list: http://maven.apache.org/plugins/index.html -->
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.4</version>
+ <configuration>
+ <encoding>UTF-8</encoding>
+ <debug>true</debug>
+ <debuglevel>source,lines,vars</debuglevel>
+ <optimize>true</optimize>
+ <source>${jdk.version}</source>
+ <target>${jdk.version}</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+
+</project>
Added: jena/Scratch/sallen/sallen-dev/results.xlsx
URL: http://svn.apache.org/viewvc/jena/Scratch/sallen/sallen-dev/results.xlsx?rev=1465822&view=auto
==============================================================================
Binary file - no diff available.
Propchange: jena/Scratch/sallen/sallen-dev/results.xlsx
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: jena/Scratch/sallen/sallen-dev/src/main/java/TestInClause.java
URL: http://svn.apache.org/viewvc/jena/Scratch/sallen/sallen-dev/src/main/java/TestInClause.java?rev=1465822&view=auto
==============================================================================
--- jena/Scratch/sallen/sallen-dev/src/main/java/TestInClause.java (added)
+++ jena/Scratch/sallen/sallen-dev/src/main/java/TestInClause.java Mon Apr 8 23:10:08 2013
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Iterator ;
+import java.util.Random ;
+import java.util.concurrent.TimeUnit ;
+
+import com.google.common.base.Function ;
+import com.google.common.base.Stopwatch ;
+import com.google.common.collect.ContiguousSet ;
+import com.google.common.collect.DiscreteDomain ;
+import com.google.common.collect.Iterables ;
+import com.google.common.collect.Range ;
+import com.hp.hpl.jena.graph.Node ;
+import com.hp.hpl.jena.graph.NodeFactory ;
+import com.hp.hpl.jena.graph.Triple ;
+import com.hp.hpl.jena.graph.TripleMatch ;
+import com.hp.hpl.jena.graph.impl.GraphBase ;
+import com.hp.hpl.jena.query.ARQ ;
+import com.hp.hpl.jena.query.Query ;
+import com.hp.hpl.jena.query.QueryExecution ;
+import com.hp.hpl.jena.query.QueryExecutionFactory ;
+import com.hp.hpl.jena.query.QueryFactory ;
+import com.hp.hpl.jena.query.ResultSet ;
+import com.hp.hpl.jena.rdf.model.Model ;
+import com.hp.hpl.jena.rdf.model.ModelFactory ;
+import com.hp.hpl.jena.rdf.model.ResourceFactory ;
+import com.hp.hpl.jena.tdb.TDBFactory ;
+import com.hp.hpl.jena.util.iterator.ExtendedIterator ;
+import com.hp.hpl.jena.util.iterator.NullIterator ;
+import com.hp.hpl.jena.util.iterator.SingletonIterator ;
+import com.hp.hpl.jena.util.iterator.WrappedIterator ;
+
+import static com.google.common.base.Preconditions.* ;
+
+/**
+ *
+ */
+public class TestInClause
+{
+ private static final int NUM_STATEMENTS = 100000;
+ private static final long SEED = 22;
+
+ private enum ModelType { VIRTUAL, IN_MEMORY, TDB };
+
+ public static void main( String[] args )
+ {
+ ARQ.init();
+
+ Model m = createModel(ModelType.TDB);
+
+ int values = 10000;
+ int inNonOpt = 10000;
+ int inOpt = 3000; // Can't go higher than this or we'll get a stack overflow
+ int interval = 500;
+
+ System.out.print("Method");
+ for (int i=interval; i<=Math.max(Math.max(inOpt, inNonOpt), values); i+=interval)
+ {
+ System.out.print("," + i);
+ }
+ System.out.println();
+
+ System.out.print("VALUES");
+ for (int i=0; i<100; i++) execute(createValuesQuery(2000), m); // Warm-up
+ for (int i=interval; i<=values; i+=interval)
+ {
+ System.out.print("," + execute(createValuesQuery(i), m));
+ }
+ System.out.println();
+
+ System.out.print("IN (non-opt)");
+ ARQ.set(ARQ.optFilterExpandOneOf, false);
+ for (int i=0; i<100; i++) execute(createInQuery(100), m); // Warm-up
+ for (int i=interval; i<=inNonOpt; i+=interval)
+ {
+ System.out.print("," + execute(createInQuery(i), m));
+ }
+ System.out.println();
+
+ System.out.print("IN (opt)");
+ ARQ.set(ARQ.optFilterExpandOneOf, true);
+ for (int i=0; i<100; i++) execute(createInQuery(100), m); // Warm-up
+ for (int i=interval; i<=inOpt; i+=interval)
+ {
+ System.out.print("," + execute(createInQuery(i), m));
+ }
+ System.out.println();
+ }
+
+ private static long execute(Query query, Model m)
+ {
+ Stopwatch sw = new Stopwatch().start();
+ QueryExecution qe = QueryExecutionFactory.create(query, m);
+ ResultSet results = qe.execSelect();
+ while (results.hasNext())
+ {
+ results.next();
+ }
+ return sw.elapsed(TimeUnit.MILLISECONDS);
+ }
+
+ private static Model createModel(ModelType modelType)
+ {
+ final Node s = NodeFactory.createURI("http://example.org/Foo");
+ final Node p = NodeFactory.createURI("http://example.org/Bar");
+ final String oFragment = "value";
+
+ switch (modelType)
+ {
+ case IN_MEMORY:
+ Model m = ModelFactory.createDefaultModel();
+ for (int i=0; i<NUM_STATEMENTS; i++)
+ {
+ m.add(ResourceFactory.createResource(s.getURI()), ResourceFactory.createProperty(p.getURI()), oFragment + i);
+ }
+ return m;
+
+ case TDB:
+ Model m2 = TDBFactory.createDataset().getDefaultModel();
+ for (int i=0; i<NUM_STATEMENTS; i++)
+ {
+ m2.add(ResourceFactory.createResource(s.getURI()), ResourceFactory.createProperty(p.getURI()), oFragment + i);
+ }
+ return m2;
+
+ case VIRTUAL:
+ // A model that contains all of our virtual statements
+ return ModelFactory.createModelForGraph(new GraphBase()
+ {
+ @Override
+ protected ExtendedIterator<Triple> graphBaseFind(TripleMatch m)
+ {
+ if ((m.getMatchSubject() == null || m.getMatchSubject().equals(Node.ANY) || m.getMatchSubject().equals(s)) &&
+ (m.getMatchPredicate() == null || m.getMatchPredicate().equals(Node.ANY) || m.getMatchPredicate().equals(p)))
+ {
+ if (m.getMatchObject() == null || m.getMatchObject().equals(Node.ANY))
+ {
+ Iterable<Triple> it = Iterables.transform(
+ ContiguousSet.create(Range.closedOpen(0, NUM_STATEMENTS), DiscreteDomain.integers()),
+ new Function<Integer, Triple>()
+ {
+ @Override
+ public Triple apply(Integer input)
+ {
+ return Triple.create(s, p, NodeFactory.createLiteral(oFragment + input));
+ }
+ });
+
+ return WrappedIterator.create(it.iterator());
+ }
+ else if (m.getMatchObject().isLiteral() && m.getMatchObject().getLiteralLexicalForm().startsWith(oFragment))
+ {
+ String value = m.getMatchObject().getLiteralLexicalForm();
+ try
+ {
+ int v = Integer.parseInt(value.substring(oFragment.length()));
+ if (v >= 0 && v < NUM_STATEMENTS)
+ {
+ return new SingletonIterator<Triple>(Triple.create(s, p, m.getMatchObject()));
+ }
+ }
+ catch (NumberFormatException e) { }
+ }
+ }
+
+ return NullIterator.instance();
+ }
+ });
+
+ default:
+ throw new UnsupportedOperationException("unknown model type");
+ }
+ }
+
+ private static Query createInQuery(int numResources)
+ {
+ checkArgument(numResources <= NUM_STATEMENTS, "numResources must be <= NUM_STATEMENTS");
+ StringBuilder sb = new StringBuilder("select (count(*) as ?c) where {\n ?s ?p ?o .\n FILTER( ?o IN (\n");
+ boolean firstTime = true;
+ for (int v : Iterables.limit(new MaximalLFSR(NUM_STATEMENTS, SEED), numResources))
+ {
+ if (!firstTime)
+ {
+ sb.append(",\n");
+ }
+ sb.append(" \"value" + v + "\"");
+ firstTime = false;
+ }
+ sb.append("\n ) )\n}");
+// System.out.println(sb.toString());
+ return QueryFactory.create(sb.toString());
+ }
+
+ private static Query createValuesQuery(int numResources)
+ {
+ checkArgument(numResources <= NUM_STATEMENTS, "numResources must be <= NUM_STATEMENTS");
+ StringBuilder sb = new StringBuilder("select (count(*) as ?c) where {\n ?s ?p ?o .\n}\nVALUES ?o {\n");
+
+ boolean firstTime = true;
+ for (int v : Iterables.limit(new MaximalLFSR(NUM_STATEMENTS, SEED), numResources))
+ {
+ if (!firstTime)
+ {
+ sb.append("\n");
+ }
+ sb.append(" \"value" + v + "\"");
+ firstTime = false;
+ }
+ sb.append("\n}");
+// System.out.println(sb.toString());
+ return QueryFactory.create(sb.toString());
+ }
+
+
+ /**
+ * Maximal Linear Feedback Shift Register, which will return an iterator over a psuedo-random sequence
+ * of integers in the range 0 (inclusive) to <code>count</code> (exclusive) without any repeated numbers.
+ * <p/>
+ * O(1) memory and roughly O(1) time.
+ * <p/>
+ * @see http://stackoverflow.com/questions/1816534/random-playlist-algorithm
+ */
+ private static class MaximalLFSR implements Iterable<Integer>
+ {
+ private final int count;
+ private final int feedback;
+ private final int start;
+ //private final Long seed;
+
+ private static int[] _feedback = new int[]
+ {
+ 0x9, 0x17, 0x30, 0x44, 0x8e,
+ 0x108, 0x20d, 0x402, 0x829, 0x1013, 0x203d, 0x4001, 0x801f,
+ 0x1002a, 0x2018b, 0x400e3, 0x801e1, 0x10011e, 0x2002cc, 0x400079, 0x80035e,
+ 0x1000160, 0x20001e4, 0x4000203, 0x8000100, 0x10000235, 0x2000027d, 0x4000016f, 0x80000478
+ };
+
+ public MaximalLFSR(int count)
+ {
+ this(count, null);
+ }
+
+ public MaximalLFSR(int count, long seed)
+ {
+ this(count, (Long)seed);
+ }
+
+ private MaximalLFSR(int count, Long seed)
+ {
+ if (count < 0) throw new IllegalArgumentException("count");
+
+ this.count = count;
+
+ Random r = (null == seed) ? new Random() : new Random(seed);
+ this.start = r.nextInt(count - 2) + 1;
+
+ int bitsForFeedback = getFeedbackSize(count);
+ this.feedback = getFeedbackTerm(bitsForFeedback);
+ }
+
+
+ private int getFeedbackSize(int v)
+ {
+ int r = 0;
+
+ while ((v >>= 1) != 0)
+ {
+ r++;
+ }
+ if (r < 4)
+ r = 4;
+ return r;
+ }
+
+ private int getFeedbackTerm(int bits)
+ {
+ if (bits < 4 || bits >= 28)
+ throw new IndexOutOfBoundsException("bits");
+ return _feedback[bits];
+ }
+
+ @Override
+ public Iterator<Integer> iterator()
+ {
+ return new MaximalLFSR_Iterator();
+ }
+
+
+ private class MaximalLFSR_Iterator implements Iterator<Integer>
+ {
+ private int i;
+ private int valuesReturned = 0;
+
+ MaximalLFSR_Iterator()
+ {
+ i = start;
+ }
+
+ @Override
+ public boolean hasNext()
+ {
+ return valuesReturned < count;
+ }
+
+ @Override
+ public Integer next()
+ {
+ while (true)
+ {
+ if ((i & 1) != 0)
+ {
+ i = (i >> 1) ^ feedback;
+ }
+ else {
+ i = (i >> 1);
+ }
+ if (i <= count)
+ {
+ valuesReturned++;
+ return i-1;
+ }
+ }
+ }
+
+ @Override
+ public void remove()
+ {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ }
+
+
+}