You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ju...@apache.org on 2013/02/26 15:07:24 UTC
svn commit: r1450173 -
/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/benchmark/wikipedia/WikipediaImport.java
Author: jukka
Date: Tue Feb 26 14:07:24 2013
New Revision: 1450173
URL: http://svn.apache.org/r1450173
Log:
OAK-641: Improved benchmark tooling
Verify that the wikipedia import was properly completed
Modified:
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/benchmark/wikipedia/WikipediaImport.java
Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/benchmark/wikipedia/WikipediaImport.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/benchmark/wikipedia/WikipediaImport.java?rev=1450173&r1=1450172&r2=1450173&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/benchmark/wikipedia/WikipediaImport.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/benchmark/wikipedia/WikipediaImport.java Tue Feb 26 14:07:24 2013
@@ -16,9 +16,12 @@
*/
package org.apache.jackrabbit.oak.benchmark.wikipedia;
+import static com.google.common.base.Preconditions.checkState;
+
import java.io.File;
import javax.jcr.Node;
+import javax.jcr.NodeIterator;
import javax.jcr.Repository;
import javax.jcr.Session;
import javax.jcr.SimpleCredentials;
@@ -44,8 +47,9 @@ public class WikipediaImport extends Ben
if (dump.isFile()) {
for (RepositoryFixture fixture : fixtures) {
if (fixture.isAvailable(1)) {
+ System.out.format(
+ "%s: Wikipedia import benchmark%n", fixture);
try {
- System.out.format("%s: importing %s...%n", fixture, dump);
Repository[] cluster = fixture.setUpCluster(1);
try {
run(cluster[0]);
@@ -67,56 +71,95 @@ public class WikipediaImport extends Ben
}
private void run(Repository repository) throws Exception {
- long start = System.currentTimeMillis();
- int pages = 0;
-
Session session = repository.login(
new SimpleCredentials("admin", "admin".toCharArray()));
try {
- Node wikipedia = session.getRootNode().addNode("wikipedia");
+ int before = importWikipedia(session);
+ int after = traverseWikipedia(session);
+ checkState(before == after, "Import vs. traverse mismatch");
+ } finally {
+ session.logout();
+ }
+ }
- String title = null;
- String text = null;
- XMLInputFactory factory = XMLInputFactory.newInstance();
- XMLStreamReader reader =
- factory.createXMLStreamReader(new StreamSource(dump));
- while (reader.hasNext()) {
- switch (reader.next()) {
- case XMLStreamConstants.START_ELEMENT:
- if ("title".equals(reader.getLocalName())) {
- title = reader.getElementText();
- } else if ("text".equals(reader.getLocalName())) {
- text = reader.getElementText();
- }
- break;
- case XMLStreamConstants.END_ELEMENT:
- if ("page".equals(reader.getLocalName())) {
- String name = Text.escapeIllegalJcrChars(title);
- Node page = wikipedia.addNode(name);
- page.setProperty("title", title);
- page.setProperty("text", text);
- pages++;
- if (pages % 1000 == 0) {
- long millis = System.currentTimeMillis() - start;
- System.out.format(
- "Added %d pages in %d seconds (%.2fms/page)%n",
- pages, millis / 1000, (double) millis / pages);
- }
+ private int importWikipedia(Session session) throws Exception {
+ long start = System.currentTimeMillis();
+ int count = 0;
+ int code = 0;
+ System.out.format("Importing %s...%n", dump);
+ Node wikipedia = session.getRootNode().addNode("wikipedia");
+
+ String title = null;
+ String text = null;
+ XMLInputFactory factory = XMLInputFactory.newInstance();
+ XMLStreamReader reader =
+ factory.createXMLStreamReader(new StreamSource(dump));
+ while (reader.hasNext()) {
+ switch (reader.next()) {
+ case XMLStreamConstants.START_ELEMENT:
+ if ("title".equals(reader.getLocalName())) {
+ title = reader.getElementText();
+ } else if ("text".equals(reader.getLocalName())) {
+ text = reader.getElementText();
+ }
+ break;
+ case XMLStreamConstants.END_ELEMENT:
+ if ("page".equals(reader.getLocalName())) {
+ String name = Text.escapeIllegalJcrChars(title);
+ Node page = wikipedia.addNode(name);
+ page.setProperty("title", title);
+ page.setProperty("text", text);
+ code += title.hashCode();
+ code += text.hashCode();
+ count++;
+ if (count % 1000 == 0) {
+ long millis = System.currentTimeMillis() - start;
+ System.out.format(
+ "Added %d pages in %d seconds (%.2fms/page)%n",
+ count, millis / 1000, (double) millis / count);
}
- break;
}
+ break;
}
-
- session.save();
- } finally {
- session.logout();
}
+ session.save();
+
long millis = System.currentTimeMillis() - start;
System.out.format(
"Imported %d pages in %d seconds (%.2fms/page)%n",
- pages, millis / 1000, (double) millis / pages);
+ count, millis / 1000, (double) millis / count);
+ return code;
+ }
+
+ private int traverseWikipedia(Session session) throws Exception {
+ long start = System.currentTimeMillis();
+ int count = 0;
+ int code = 0;
+
+ System.out.format("Traversing imported pages...%n");
+ Node wikipedia = session.getNode("/wikipedia");
+
+ NodeIterator pages = wikipedia.getNodes();
+ while (pages.hasNext()) {
+ Node page = pages.nextNode();
+ code += page.getProperty("title").getString().hashCode();
+ code += page.getProperty("text").getString().hashCode();
+ count++;
+ if (count % 1000 == 0) {
+ long millis = System.currentTimeMillis() - start;
+ System.out.format(
+ "Read %d pages in %d seconds (%.2fms/page)%n",
+ count, millis / 1000, (double) millis / count);
+ }
+ }
+
+ long millis = System.currentTimeMillis() - start;
+ System.out.format(
+ "Traversed %d pages in %d seconds (%.2fms/page)%n",
+ count, millis / 1000, (double) millis / count);
+ return code;
}
}