You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by sa...@apache.org on 2013/04/09 01:10:08 UTC

svn commit: r1465822 - in /jena/Scratch/sallen/sallen-dev: ./ pom.xml results.xlsx src/ src/main/ src/main/java/ src/main/java/TestInClause.java src/test/ src/test/java/

Author: sallen
Date: Mon Apr  8 23:10:08 2013
New Revision: 1465822

URL: http://svn.apache.org/r1465822
Log:
Add a benchmark for large IN vs. VALUE clauses (results are also included, and they aren't pretty)

Added:
    jena/Scratch/sallen/sallen-dev/
    jena/Scratch/sallen/sallen-dev/pom.xml
    jena/Scratch/sallen/sallen-dev/results.xlsx   (with props)
    jena/Scratch/sallen/sallen-dev/src/
    jena/Scratch/sallen/sallen-dev/src/main/
    jena/Scratch/sallen/sallen-dev/src/main/java/
    jena/Scratch/sallen/sallen-dev/src/main/java/TestInClause.java
    jena/Scratch/sallen/sallen-dev/src/test/
    jena/Scratch/sallen/sallen-dev/src/test/java/

Added: jena/Scratch/sallen/sallen-dev/pom.xml
URL: http://svn.apache.org/viewvc/jena/Scratch/sallen/sallen-dev/pom.xml?rev=1465822&view=auto
==============================================================================
--- jena/Scratch/sallen/sallen-dev/pom.xml (added)
+++ jena/Scratch/sallen/sallen-dev/pom.xml Mon Apr  8 23:10:08 2013
@@ -0,0 +1,63 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>org.apache.jena</groupId>
+  <artifactId>sallen-dev</artifactId>
+  <version>0.0.1-SNAPSHOT</version>
+  <packaging>jar</packaging>
+
+  <name>sallen-dev</name>
+  <url>http://maven.apache.org</url>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <jdk.version>1.6</jdk.version>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.8.2</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>14.0</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.jena</groupId>
+      <artifactId>jena-arq</artifactId>
+      <version>2.10.1-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.jena</groupId>
+      <artifactId>jena-tdb</artifactId>
+      <version>0.10.1-SNAPSHOT</version>
+    </dependency>
+  </dependencies>
+  
+  <build>
+    <!-- Plugin version list: http://maven.apache.org/plugins/index.html -->
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-compiler-plugin</artifactId>
+          <version>2.4</version>
+          <configuration>
+            <encoding>UTF-8</encoding>
+            <debug>true</debug>
+            <debuglevel>source,lines,vars</debuglevel>
+            <optimize>true</optimize>
+            <source>${jdk.version}</source>
+            <target>${jdk.version}</target>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+  
+</project>

Added: jena/Scratch/sallen/sallen-dev/results.xlsx
URL: http://svn.apache.org/viewvc/jena/Scratch/sallen/sallen-dev/results.xlsx?rev=1465822&view=auto
==============================================================================
Binary file - no diff available.

Propchange: jena/Scratch/sallen/sallen-dev/results.xlsx
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: jena/Scratch/sallen/sallen-dev/src/main/java/TestInClause.java
URL: http://svn.apache.org/viewvc/jena/Scratch/sallen/sallen-dev/src/main/java/TestInClause.java?rev=1465822&view=auto
==============================================================================
--- jena/Scratch/sallen/sallen-dev/src/main/java/TestInClause.java (added)
+++ jena/Scratch/sallen/sallen-dev/src/main/java/TestInClause.java Mon Apr  8 23:10:08 2013
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Iterator ;
+import java.util.Random ;
+import java.util.concurrent.TimeUnit ;
+
+import com.google.common.base.Function ;
+import com.google.common.base.Stopwatch ;
+import com.google.common.collect.ContiguousSet ;
+import com.google.common.collect.DiscreteDomain ;
+import com.google.common.collect.Iterables ;
+import com.google.common.collect.Range ;
+import com.hp.hpl.jena.graph.Node ;
+import com.hp.hpl.jena.graph.NodeFactory ;
+import com.hp.hpl.jena.graph.Triple ;
+import com.hp.hpl.jena.graph.TripleMatch ;
+import com.hp.hpl.jena.graph.impl.GraphBase ;
+import com.hp.hpl.jena.query.ARQ ;
+import com.hp.hpl.jena.query.Query ;
+import com.hp.hpl.jena.query.QueryExecution ;
+import com.hp.hpl.jena.query.QueryExecutionFactory ;
+import com.hp.hpl.jena.query.QueryFactory ;
+import com.hp.hpl.jena.query.ResultSet ;
+import com.hp.hpl.jena.rdf.model.Model ;
+import com.hp.hpl.jena.rdf.model.ModelFactory ;
+import com.hp.hpl.jena.rdf.model.ResourceFactory ;
+import com.hp.hpl.jena.tdb.TDBFactory ;
+import com.hp.hpl.jena.util.iterator.ExtendedIterator ;
+import com.hp.hpl.jena.util.iterator.NullIterator ;
+import com.hp.hpl.jena.util.iterator.SingletonIterator ;
+import com.hp.hpl.jena.util.iterator.WrappedIterator ;
+
+import static com.google.common.base.Preconditions.* ;
+
+/**
+ * 
+ */
+public class TestInClause
+{
+    private static final int NUM_STATEMENTS = 100000;
+    private static final long SEED = 22;
+    
+    private enum ModelType { VIRTUAL, IN_MEMORY, TDB };
+    
+    public static void main( String[] args )
+    {
+        ARQ.init();
+        
+        Model m = createModel(ModelType.TDB);
+        
+        int values = 10000;
+        int inNonOpt = 10000;
+        int inOpt = 3000;  // Can't go higher than this or we'll get a stack overflow
+        int interval = 500;
+        
+        System.out.print("Method");
+        for (int i=interval; i<=Math.max(Math.max(inOpt, inNonOpt), values); i+=interval)
+        {
+            System.out.print("," + i);
+        }
+        System.out.println();
+        
+        System.out.print("VALUES");
+        for (int i=0; i<100; i++) execute(createValuesQuery(2000), m); // Warm-up
+        for (int i=interval; i<=values; i+=interval)
+        {
+            System.out.print("," + execute(createValuesQuery(i), m));
+        }
+        System.out.println();
+        
+        System.out.print("IN (non-opt)");
+        ARQ.set(ARQ.optFilterExpandOneOf, false);
+        for (int i=0; i<100; i++) execute(createInQuery(100), m); // Warm-up
+        for (int i=interval; i<=inNonOpt; i+=interval)
+        {
+            System.out.print("," + execute(createInQuery(i), m));
+        }
+        System.out.println();
+        
+        System.out.print("IN (opt)");
+        ARQ.set(ARQ.optFilterExpandOneOf, true);
+        for (int i=0; i<100; i++) execute(createInQuery(100), m); // Warm-up
+        for (int i=interval; i<=inOpt; i+=interval)
+        {
+            System.out.print("," + execute(createInQuery(i), m));
+        }
+        System.out.println();
+    }
+    
+    private static long execute(Query query, Model m)
+    {
+        Stopwatch sw = new Stopwatch().start();
+        QueryExecution qe = QueryExecutionFactory.create(query, m);
+        ResultSet results = qe.execSelect();
+        while (results.hasNext())
+        {
+            results.next();
+        }
+        return sw.elapsed(TimeUnit.MILLISECONDS);
+    }
+    
+    private static Model createModel(ModelType modelType)
+    {
+        final Node s = NodeFactory.createURI("http://example.org/Foo");
+        final Node p = NodeFactory.createURI("http://example.org/Bar");
+        final String oFragment = "value";
+        
+        switch (modelType)
+        {
+        case IN_MEMORY:
+            Model m = ModelFactory.createDefaultModel();
+            for (int i=0; i<NUM_STATEMENTS; i++)
+            {
+                m.add(ResourceFactory.createResource(s.getURI()), ResourceFactory.createProperty(p.getURI()), oFragment + i);
+            }
+            return m;
+            
+        case TDB:
+            Model m2 = TDBFactory.createDataset().getDefaultModel();
+            for (int i=0; i<NUM_STATEMENTS; i++)
+            {
+                m2.add(ResourceFactory.createResource(s.getURI()), ResourceFactory.createProperty(p.getURI()), oFragment + i);
+            }
+            return m2;
+            
+        case VIRTUAL:
+            // A model that contains all of our virtual statements
+            return ModelFactory.createModelForGraph(new GraphBase()
+            {
+                @Override
+                protected ExtendedIterator<Triple> graphBaseFind(TripleMatch m)
+                {
+                    if ((m.getMatchSubject() == null || m.getMatchSubject().equals(Node.ANY) || m.getMatchSubject().equals(s)) &&
+                        (m.getMatchPredicate() == null || m.getMatchPredicate().equals(Node.ANY) || m.getMatchPredicate().equals(p)))
+                    {
+                        if (m.getMatchObject() == null || m.getMatchObject().equals(Node.ANY))
+                        {
+                            Iterable<Triple> it = Iterables.transform(
+                                ContiguousSet.create(Range.closedOpen(0, NUM_STATEMENTS), DiscreteDomain.integers()),
+                                new Function<Integer, Triple>()
+                                {
+                                    @Override
+                                    public Triple apply(Integer input)
+                                    {
+                                        return Triple.create(s, p, NodeFactory.createLiteral(oFragment + input));
+                                    }
+                                });
+                            
+                            return WrappedIterator.create(it.iterator());
+                        }
+                        else if (m.getMatchObject().isLiteral() && m.getMatchObject().getLiteralLexicalForm().startsWith(oFragment))
+                        {
+                            String value = m.getMatchObject().getLiteralLexicalForm();
+                            try
+                            {
+                                int v = Integer.parseInt(value.substring(oFragment.length()));
+                                if (v >= 0 && v < NUM_STATEMENTS)
+                                {
+                                    return new SingletonIterator<Triple>(Triple.create(s, p, m.getMatchObject()));
+                                }
+                            }
+                            catch (NumberFormatException e) { }
+                        }
+                    }
+                    
+                    return NullIterator.instance();
+                }
+            });
+            
+        default:
+            throw new UnsupportedOperationException("unknown model type");
+        }
+    }
+    
+    private static Query createInQuery(int numResources)
+    {
+        checkArgument(numResources <= NUM_STATEMENTS, "numResources must be <= NUM_STATEMENTS");
+        StringBuilder sb = new StringBuilder("select (count(*) as ?c) where {\n  ?s ?p ?o .\n  FILTER( ?o IN (\n");
+        boolean firstTime = true;
+        for (int v : Iterables.limit(new MaximalLFSR(NUM_STATEMENTS, SEED), numResources))
+        {
+            if (!firstTime)
+            {
+                sb.append(",\n");
+            }
+            sb.append("    \"value" + v + "\"");
+            firstTime = false;
+        }
+        sb.append("\n  ) )\n}");
+//        System.out.println(sb.toString());
+        return QueryFactory.create(sb.toString());
+    }
+    
+    private static Query createValuesQuery(int numResources)
+    {
+        checkArgument(numResources <= NUM_STATEMENTS, "numResources must be <= NUM_STATEMENTS");
+        StringBuilder sb = new StringBuilder("select (count(*) as ?c) where {\n  ?s ?p ?o .\n}\nVALUES ?o {\n");
+        
+        boolean firstTime = true;
+        for (int v : Iterables.limit(new MaximalLFSR(NUM_STATEMENTS, SEED), numResources))
+        {
+            if (!firstTime)
+            {
+                sb.append("\n");
+            }
+            sb.append("  \"value" + v + "\"");
+            firstTime = false;
+        }
+        sb.append("\n}");
+//        System.out.println(sb.toString());
+        return QueryFactory.create(sb.toString());
+    }
+    
+    
+    /**
+     * Maximal Linear Feedback Shift Register, which will return an iterator over a psuedo-random sequence
+     * of integers in the range 0 (inclusive) to <code>count</code> (exclusive) without any repeated numbers.
+     * <p/>
+     * O(1) memory and roughly O(1) time.
+     * <p/>
+     * @see http://stackoverflow.com/questions/1816534/random-playlist-algorithm
+     */
+    private static class MaximalLFSR implements Iterable<Integer>
+    {
+        private final int count;
+        private final int feedback;
+        private final int start;
+        //private final Long seed;
+        
+        private static int[] _feedback = new int[]
+        {
+            0x9, 0x17, 0x30, 0x44, 0x8e,
+            0x108, 0x20d, 0x402, 0x829, 0x1013, 0x203d, 0x4001, 0x801f,
+            0x1002a, 0x2018b, 0x400e3, 0x801e1, 0x10011e, 0x2002cc, 0x400079, 0x80035e,
+            0x1000160, 0x20001e4, 0x4000203, 0x8000100, 0x10000235, 0x2000027d, 0x4000016f, 0x80000478
+        };
+        
+        public MaximalLFSR(int count)
+        {
+            this(count, null);
+        }
+        
+        public MaximalLFSR(int count, long seed)
+        {
+            this(count, (Long)seed);
+        }
+        
+        private MaximalLFSR(int count, Long seed)
+        {
+            if (count < 0) throw new IllegalArgumentException("count");
+            
+            this.count = count;
+            
+            Random r = (null == seed) ? new Random() : new Random(seed);
+            this.start = r.nextInt(count - 2) + 1;
+            
+            int bitsForFeedback = getFeedbackSize(count);
+            this.feedback = getFeedbackTerm(bitsForFeedback);
+        }
+        
+        
+        private int getFeedbackSize(int v)
+        {
+            int r = 0;
+
+            while ((v >>= 1) != 0)
+            {
+              r++;
+            }
+            if (r < 4)
+                r = 4;
+            return r;
+        }
+        
+        private int getFeedbackTerm(int bits)
+        {
+            if (bits < 4 || bits >= 28)
+                throw new IndexOutOfBoundsException("bits");
+            return _feedback[bits];
+        }
+
+        @Override
+        public Iterator<Integer> iterator()
+        {
+            return new MaximalLFSR_Iterator();
+        }
+        
+        
+        private class MaximalLFSR_Iterator implements Iterator<Integer>
+        {
+            private int i;
+            private int valuesReturned = 0;
+            
+            MaximalLFSR_Iterator()
+            {
+                i = start;
+            }
+            
+            @Override
+            public boolean hasNext()
+            {
+                return valuesReturned < count;
+            }
+
+            @Override
+            public Integer next()
+            {
+                while (true)
+                {
+                    if ((i & 1) != 0)
+                    {
+                        i = (i >> 1) ^ feedback;
+                    }
+                    else {
+                        i = (i >> 1);
+                    }
+                    if (i <= count)
+                    {
+                        valuesReturned++;
+                        return i-1;
+                    }
+                }
+            }
+
+            @Override
+            public void remove()
+            {
+                throw new UnsupportedOperationException();
+            }
+        }
+        
+    }
+    
+    
+}