You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by mr...@apache.org on 2014/05/02 10:41:29 UTC
svn commit: r1591826 - in /jackrabbit/oak/trunk/oak-run: pom.xml
src/main/java/org/apache/jackrabbit/oak/benchmark/wikipedia/WikipediaImport.java
Author: mreutegg
Date: Fri May 2 08:41:29 2014
New Revision: 1591826
URL: http://svn.apache.org/r1591826
Log:
OAK-1790: Import of compressed wikipedia dump
Modified:
jackrabbit/oak/trunk/oak-run/pom.xml
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/benchmark/wikipedia/WikipediaImport.java
Modified: jackrabbit/oak/trunk/oak-run/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/pom.xml?rev=1591826&r1=1591825&r2=1591826&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-run/pom.xml Fri May 2 08:41:29 2014
@@ -183,6 +183,11 @@
<version>2.0</version>
</dependency>
<dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ <version>1.8</version>
+ </dependency>
+ <dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-servlet</artifactId>
<version>${jetty.version}</version>
Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/benchmark/wikipedia/WikipediaImport.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/benchmark/wikipedia/WikipediaImport.java?rev=1591826&r1=1591825&r2=1591826&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/benchmark/wikipedia/WikipediaImport.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/benchmark/wikipedia/WikipediaImport.java Fri May 2 08:41:29 2014
@@ -19,7 +19,9 @@ package org.apache.jackrabbit.oak.benchm
import static com.google.common.base.Preconditions.checkState;
import static java.lang.Math.min;
+import java.io.BufferedInputStream;
import java.io.File;
+import java.io.FileInputStream;
import javax.jcr.Node;
import javax.jcr.NodeIterator;
@@ -32,6 +34,7 @@ import javax.xml.stream.XMLStreamConstan
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.stream.StreamSource;
+import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.jackrabbit.commons.JcrUtils;
import org.apache.jackrabbit.oak.benchmark.Benchmark;
import org.apache.jackrabbit.oak.fixture.RepositoryFixture;
@@ -101,7 +104,7 @@ public class WikipediaImport extends Ben
}
String type = "nt:unstructured";
- if (flat) {
+ if (session.getWorkspace().getNodeTypeManager().hasNodeType("oak:Unstructured")) {
type = "oak:Unstructured";
}
Node wikipedia = session.getRootNode().addNode("wikipedia", type);
@@ -118,8 +121,15 @@ public class WikipediaImport extends Ben
String title = null;
String text = null;
XMLInputFactory factory = XMLInputFactory.newInstance();
- XMLStreamReader reader =
- factory.createXMLStreamReader(new StreamSource(dump));
+ StreamSource source;
+ if (dump.getName().endsWith(".xml")) {
+ source = new StreamSource(dump);
+ } else {
+ CompressorStreamFactory csf = new CompressorStreamFactory();
+ source = new StreamSource(csf.createCompressorInputStream(
+ new BufferedInputStream(new FileInputStream(dump))));
+ }
+ XMLStreamReader reader = factory.createXMLStreamReader(source);
while (reader.hasNext()) {
switch (reader.next()) {
case XMLStreamConstants.START_ELEMENT: