You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2015/07/10 13:45:37 UTC
svn commit: r1690247 - in /jackrabbit/oak/trunk/oak-core/src:
main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/
main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/
test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/
Author: chetanm
Date: Fri Jul 10 11:45:36 2015
New Revision: 1690247
URL: http://svn.apache.org/r1690247
Log:
OAK-2892 - Speed up lucene indexing post migration by pre extracting the text content from binaries
Introducing new PreExtractedTextProvider API and a DataStore based storage implementation
Added:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java (with props)
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/TextWriter.java (with props)
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java (with props)
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/PreExtractedTextProvider.java (with props)
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/package-info.java (with props)
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriterTest.java (with props)
Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java?rev=1690247&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java Fri Jul 10 11:45:36 2015
@@ -0,0 +1,296 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.blob.datastore;
+
+import java.io.BufferedWriter;
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.lang.ref.SoftReference;
+import java.util.Set;
+import java.util.concurrent.Callable;
+
+import javax.annotation.Nonnull;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Sets;
+import com.google.common.io.Files;
+import org.apache.commons.io.FileUtils;
+import org.apache.jackrabbit.oak.api.Blob;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText.ExtractionResult;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+
+/**
+ * TextWriter implementation which just stores the extracted text
+ * as files using the same layout as used by FileDataStore
+ */
+public class DataStoreTextWriter implements TextWriter, Closeable, PreExtractedTextProvider {
+ private static final String ERROR_BLOB_FILE = "blobs_error.txt";
+ private static final String EMPTY_BLOB_FILE = "blobs_empty.txt";
+
+ private static final Logger log = LoggerFactory.getLogger(DataStoreTextWriter.class);
+ private File directory;
+
+ private final SetHolder emptyBlobsHolder;
+ private final SetHolder errorBlobsHolder;
+ private boolean closed;
+ /**
+ * Flag indicating that blobId passed is one from DataStoreBlobStore
+ * As those blobId's have the length encoded which would need to be
+ * stripped of
+ */
+ private boolean dataStoreBlobId = true;
+
+ private final boolean readOnlyMode;
+
+ public DataStoreTextWriter(File directory, boolean readOnlyMode) throws IOException {
+ if (!directory.exists()) {
+ checkArgument(directory.mkdirs(), "Cannot create directory %s", directory.getAbsolutePath());
+ }
+ this.directory = directory;
+ this.readOnlyMode = readOnlyMode;
+ this.emptyBlobsHolder = new SetHolder(createLoader(EMPTY_BLOB_FILE), readOnlyMode);
+ this.errorBlobsHolder = new SetHolder(createLoader(ERROR_BLOB_FILE), readOnlyMode);
+
+ if (!readOnlyMode) {
+ log.info("Using {} to store the extracted text content. Empty count {}, Error count {}",
+ directory.getAbsolutePath(), getEmptyBlobs().size(), getErrorBlobs().size());
+ } else {
+ log.info("Using extracted store from {}", directory.getAbsolutePath());
+ }
+ }
+
+ @Override
+ public ExtractedText getText(String propertyPath, Blob blob) throws IOException {
+ String blobId = blob.getContentIdentity();
+ if (blobId == null) {
+ log.debug("No id found for blob at path {}", propertyPath);
+ }
+
+ blobId = stripLength(blobId);
+ ExtractedText result = null;
+ if (getEmptyBlobs().contains(blobId)) {
+ result = new ExtractedText(ExtractionResult.EMPTY, null);
+ } else if (getErrorBlobs().contains(blobId)) {
+ result = new ExtractedText(ExtractionResult.ERROR, null);
+ } else {
+ File textFile = getFile(blobId);
+ if (textFile.exists()) {
+ String text = Files.toString(textFile, Charsets.UTF_8);
+ result = new ExtractedText(ExtractionResult.SUCCESS, text);
+ }
+ }
+
+ return result;
+ }
+
+ @Override
+ public void write(@Nonnull String blobId,@Nonnull String text) throws IOException {
+ checkIfReadOnlyModeEnabled();
+ checkNotNull(blobId, "BlobId cannot be null");
+ checkNotNull(text, "Text passed for [%s] was null", blobId);
+
+ File textFile = getFile(stripLength(blobId));
+ ensureParentExists(textFile);
+ //TODO should we compress
+ Files.write(text, textFile, Charsets.UTF_8);
+ }
+
+ @Override
+ public synchronized void markEmpty(String blobId) {
+ checkIfReadOnlyModeEnabled();
+ getEmptyBlobs().add(stripLength(blobId));
+ }
+
+ @Override
+ public synchronized void markError(String blobId) {
+ checkIfReadOnlyModeEnabled();
+ getErrorBlobs().add(stripLength(blobId));
+ }
+
+ @Override
+ public synchronized boolean isProcessed(String blobId) {
+ blobId = stripLength(blobId);
+ if (getEmptyBlobs().contains(blobId) || getErrorBlobs().contains(blobId)) {
+ return true;
+ }
+ File textFile = getFile(blobId);
+ return textFile.exists();
+ }
+
+ @Override
+ public synchronized void close() throws IOException {
+ if (closed) {
+ return;
+ }
+ writeToFile(EMPTY_BLOB_FILE, getEmptyBlobs());
+ writeToFile(ERROR_BLOB_FILE, getErrorBlobs());
+ closed = true;
+ }
+
+ SetHolder getEmptyBlobsHolder(){
+ return emptyBlobsHolder;
+ }
+
+ SetHolder getErrorBlobsHolder() {
+ return errorBlobsHolder;
+ }
+
+ /**
+ * Returns the identified file. This method implements the pattern
+ * used to avoid problems with too many files in a single directory.
+ * <p/>
+ * No sanity checks are performed on the given identifier.
+ *
+ * @param identifier file name
+ * @return identified file
+ */
+ private File getFile(String identifier) {
+ File file = directory;
+ file = new File(file, identifier.substring(0, 2));
+ file = new File(file, identifier.substring(2, 4));
+ file = new File(file, identifier.substring(4, 6));
+ return new File(file, identifier);
+ }
+
+ private String stripLength(String blobId) {
+ if (dataStoreBlobId) {
+ return DataStoreBlobStore.BlobId.of(blobId).blobId;
+ }
+ return blobId;
+ }
+
+ private Set<String> getEmptyBlobs() {
+ return emptyBlobsHolder.get();
+ }
+
+ private Set<String> getErrorBlobs() {
+ return errorBlobsHolder.get();
+ }
+
+ private void checkIfReadOnlyModeEnabled() {
+ checkState(!readOnlyMode, "Read only mode enabled");
+ }
+
+ private Callable<Set<String>> createLoader(final String fileName) {
+ final File file = new File(directory, fileName);
+ return new Callable<Set<String>>() {
+ @Override
+ public Set<String> call() throws Exception {
+ return loadFromFile(file);
+ }
+
+ @Override
+ public String toString() {
+ return "Loading state from " + file.getAbsolutePath();
+ }
+ };
+ }
+
+ private Set<String> loadFromFile(File file) throws IOException {
+ Set<String> result = Sets.newHashSet();
+ if (file.exists()) {
+ result.addAll(Files.readLines(file, Charsets.UTF_8));
+ }
+ return result;
+ }
+
+ private void writeToFile(String fileName, Set<String> blobIds) throws IOException {
+ if (blobIds.isEmpty()){
+ return;
+ }
+ File file = new File(directory, fileName);
+ BufferedWriter bw = Files.newWriter(file, Charsets.UTF_8);
+ for (String id : blobIds) {
+ bw.write(id);
+ bw.newLine();
+ }
+ bw.close();
+ }
+
+ private static void ensureParentExists(File file) throws IOException {
+ if (!file.exists()) {
+ File parent = file.getParentFile();
+ FileUtils.forceMkdir(parent);
+ }
+ }
+
+
+
+ /**
+ * While running in read only mode the PreExtractedTextProvider
+ * would only be used while reindexing. So as to avoid holding memory
+ * SoftReference would be used
+ */
+ static class SetHolder {
+ private final Set<String> state;
+ private SoftReference<Set<String>> stateRef;
+ private final Callable<Set<String>> loader;
+ private int loadCount;
+
+ public SetHolder(Callable<Set<String>> loader, boolean softRef) {
+ this.loader = loader;
+ if (softRef) {
+ this.state = null;
+ } else {
+ this.state = load();
+ }
+ }
+
+ public Set<String> get() {
+ Set<String> result = state;
+ if (result != null) {
+ return result;
+ }
+
+ if (stateRef != null) {
+ result = stateRef.get();
+ }
+
+ if (result == null) {
+ result = load();
+ stateRef = new SoftReference<Set<String>>(result);
+ }
+
+ return result;
+ }
+
+ public int getLoadCount() {
+ return loadCount;
+ }
+
+ private Set<String> load() {
+ try {
+ loadCount++;
+ return loader.call();
+ } catch (Exception e) {
+ log.warn("Error occurred while loading the state via {}", loader, e);
+ return Sets.newHashSet();
+ }
+ }
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/TextWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/TextWriter.java?rev=1690247&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/TextWriter.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/TextWriter.java Fri Jul 10 11:45:36 2015
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.blob.datastore;
+
+import java.io.IOException;
+
+import javax.annotation.Nonnull;
+
+public interface TextWriter {
+
+ void write(@Nonnull String blobId, @Nonnull String text) throws IOException;
+
+ void markEmpty(String blobId);
+
+ void markError(String blobId);
+
+ boolean isProcessed(String blobId);
+}
Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/TextWriter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java?rev=1690247&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java Fri Jul 10 11:45:36 2015
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.fulltext;
+
+import javax.annotation.CheckForNull;
+import javax.annotation.Nonnull;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+public class ExtractedText {
+ public enum ExtractionResult {
+ /**
+ * Indicates that text extraction was successful and some text
+ * was extracted
+ */
+ SUCCESS,
+ /**
+ * Indicates that no text was extracted. This can happen if the
+ * mimeType for the binary is part of exclusion list
+ */
+ EMPTY,
+ /**
+ * Indicates that text extraction resulted in an error.
+ * The {@link ExtractedText#getExtractedText()} might contain
+ * more details
+ */
+ ERROR
+ }
+
+ private final ExtractionResult extractionResult;
+ private final CharSequence extractedText;
+
+ public ExtractedText(@Nonnull ExtractionResult extractionResult,CharSequence extractedText) {
+ this.extractionResult = extractionResult;
+ this.extractedText = extractedText;
+ checkState();
+ }
+
+ @Nonnull
+ public ExtractionResult getExtractionResult() {
+ return extractionResult;
+ }
+
+ @CheckForNull
+ public CharSequence getExtractedText() {
+ return extractedText;
+ }
+
+ private void checkState() {
+ if (extractionResult == ExtractionResult.SUCCESS){
+ checkNotNull(extractedText, "extractedText must not be null for SUCCESS");
+ }
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/PreExtractedTextProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/PreExtractedTextProvider.java?rev=1690247&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/PreExtractedTextProvider.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/PreExtractedTextProvider.java Fri Jul 10 11:45:36 2015
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.fulltext;
+
+import java.io.IOException;
+
+import javax.annotation.CheckForNull;
+
+import aQute.bnd.annotation.ConsumerType;
+import org.apache.jackrabbit.oak.api.Blob;
+
+@ConsumerType
+public interface PreExtractedTextProvider {
+
+ /**
+ * Get pre extracted text for given blob at given path
+ *
+ * @param propertyPath path of the binary property
+ * @param blob binary property value
+ *
+ * @return pre extracted text or null if no
+ * pre extracted text found for given blob
+ */
+ @CheckForNull
+ ExtractedText getText(String propertyPath, Blob blob) throws IOException;
+}
Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/PreExtractedTextProvider.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/package-info.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/package-info.java?rev=1690247&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/package-info.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/package-info.java Fri Jul 10 11:45:36 2015
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+@Version("1.0.0")
+@Export(optional = "provide:=true")
+package org.apache.jackrabbit.oak.plugins.index.fulltext;
+
+import aQute.bnd.annotation.Export;
+import aQute.bnd.annotation.Version;
\ No newline at end of file
Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriterTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriterTest.java?rev=1690247&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriterTest.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriterTest.java Fri Jul 10 11:45:36 2015
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.blob.datastore;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.jackrabbit.core.data.DataRecord;
+import org.apache.jackrabbit.core.data.FileDataStore;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreTextWriter;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.TextWriter;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText.ExtractionResult;
+import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+public class DataStoreTextWriterTest {
+ @Rule
+ public final TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+ @Test
+ public void basicOperation() throws Exception {
+ File fdsDir = temporaryFolder.newFolder();
+ FileDataStore fds = createFDS(fdsDir);
+ ByteArrayInputStream is = new ByteArrayInputStream("hello".getBytes());
+ DataRecord dr = fds.addRecord(is);
+
+ File writerDir = temporaryFolder.newFolder();
+ TextWriter writer = new DataStoreTextWriter(writerDir, false);
+ writer.write(dr.getIdentifier().toString(), "hello");
+
+ FileDataStore fds2 = createFDS(writerDir);
+ DataRecord dr2 = fds2.getRecordIfStored(dr.getIdentifier());
+
+ is.reset();
+ assertTrue(IOUtils.contentEquals(is, dr2.getStream()));
+
+ }
+
+ @Test
+ public void noLoadingInReadOnlyMode() throws Exception{
+ DataStoreTextWriter w = new DataStoreTextWriter(temporaryFolder.getRoot(), true);
+ assertEquals(0, w.getEmptyBlobsHolder().getLoadCount());
+ assertEquals(0, w.getErrorBlobsHolder().getLoadCount());
+
+ DataStoreTextWriter w1 = new DataStoreTextWriter(temporaryFolder.getRoot(), false);
+ assertEquals(1, w1.getEmptyBlobsHolder().getLoadCount());
+ assertEquals(1, w1.getErrorBlobsHolder().getLoadCount());
+ }
+
+ @Test
+ public void checkEmptyAndErrorBlobs() throws Exception{
+ DataStoreTextWriter w = new DataStoreTextWriter(temporaryFolder.getRoot(), false);
+ w.markEmpty("a");
+ w.markError("b");
+ w.close();
+
+ DataStoreTextWriter w2 = new DataStoreTextWriter(temporaryFolder.getRoot(), true);
+ assertEquals(ExtractionResult.EMPTY, w2.getText("/a", new IdBlob("foo", "a")).getExtractionResult());
+ assertEquals(ExtractionResult.ERROR, w2.getText("/a", new IdBlob("foo", "b")).getExtractionResult());
+ }
+
+ @Test
+ public void nonExistingEntry() throws Exception{
+ File fdsDir = temporaryFolder.newFolder();
+ FileDataStore fds = createFDS(fdsDir);
+ ByteArrayInputStream is = new ByteArrayInputStream("hello".getBytes());
+ DataRecord dr = fds.addRecord(is);
+
+ File writerDir = temporaryFolder.newFolder();
+ DataStoreTextWriter w = new DataStoreTextWriter(writerDir, false);
+ String id = dr.getIdentifier().toString();
+ assertFalse(w.isProcessed(id));
+ assertNull(w.getText("/a", new IdBlob("foo", id)));
+
+ w.write(id, "foo");
+ assertTrue(w.isProcessed(id));
+ ExtractedText et = w.getText("/a", new IdBlob("foo", id));
+ assertEquals("foo", et.getExtractedText());
+ assertEquals(ExtractionResult.SUCCESS, et.getExtractionResult());
+
+ w.markEmpty("a");
+ assertTrue(w.isProcessed("a"));
+
+ }
+
+ private FileDataStore createFDS(File root) {
+ FileDataStore fds = new FileDataStore();
+ fds.setPath(root.getAbsolutePath());
+ fds.setMinRecordLength(0);
+ fds.init(null);
+ return fds;
+ }
+
+ private static class IdBlob extends ArrayBasedBlob {
+ final String id;
+
+ public IdBlob(String value, String id) {
+ super(value.getBytes());
+ this.id = id;
+ }
+
+ @Override
+ public String getContentIdentity() {
+ return id;
+ }
+ }
+}
\ No newline at end of file
Propchange: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriterTest.java
------------------------------------------------------------------------------
svn:eol-style = native