You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2015/07/10 13:45:37 UTC

svn commit: r1690247 - in /jackrabbit/oak/trunk/oak-core/src: main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/ main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/

Author: chetanm
Date: Fri Jul 10 11:45:36 2015
New Revision: 1690247

URL: http://svn.apache.org/r1690247
Log:
OAK-2892 - Speed up lucene indexing post migration by pre extracting the text content from binaries

Introducing new PreExtractedTextProvider API and a DataStore based storage implementation

Added:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java   (with props)
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/TextWriter.java   (with props)
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java   (with props)
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/PreExtractedTextProvider.java   (with props)
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/package-info.java   (with props)
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriterTest.java   (with props)

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java?rev=1690247&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java Fri Jul 10 11:45:36 2015
@@ -0,0 +1,296 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.blob.datastore;
+
+import java.io.BufferedWriter;
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.lang.ref.SoftReference;
+import java.util.Set;
+import java.util.concurrent.Callable;
+
+import javax.annotation.Nonnull;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Sets;
+import com.google.common.io.Files;
+import org.apache.commons.io.FileUtils;
+import org.apache.jackrabbit.oak.api.Blob;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText.ExtractionResult;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+
+/**
+ * TextWriter implementation which just stores the extracted text
+ * as files using the same layout as used by FileDataStore
+ */
+public class DataStoreTextWriter implements TextWriter, Closeable, PreExtractedTextProvider {
+    private static final String ERROR_BLOB_FILE = "blobs_error.txt";
+    private static final String EMPTY_BLOB_FILE = "blobs_empty.txt";
+
+    private static final Logger log = LoggerFactory.getLogger(DataStoreTextWriter.class);
+    private File directory;
+
+    private final SetHolder emptyBlobsHolder;
+    private final SetHolder errorBlobsHolder;
+    private boolean closed;
+    /**
+     * Flag indicating that blobId passed is one from DataStoreBlobStore
+     * As those blobId's have the length encoded which would need to be
+     * stripped of
+     */
+    private boolean dataStoreBlobId = true;
+
+    private final boolean readOnlyMode;
+
+    public DataStoreTextWriter(File directory, boolean readOnlyMode) throws IOException {
+        if (!directory.exists()) {
+            checkArgument(directory.mkdirs(), "Cannot create directory %s", directory.getAbsolutePath());
+        }
+        this.directory = directory;
+        this.readOnlyMode = readOnlyMode;
+        this.emptyBlobsHolder = new SetHolder(createLoader(EMPTY_BLOB_FILE), readOnlyMode);
+        this.errorBlobsHolder = new SetHolder(createLoader(ERROR_BLOB_FILE), readOnlyMode);
+
+        if (!readOnlyMode) {
+            log.info("Using {} to store the extracted text content. Empty count {}, Error count {}",
+                    directory.getAbsolutePath(), getEmptyBlobs().size(), getErrorBlobs().size());
+        } else {
+            log.info("Using extracted store from {}", directory.getAbsolutePath());
+        }
+    }
+
+    @Override
+    public ExtractedText getText(String propertyPath, Blob blob) throws IOException {
+        String blobId = blob.getContentIdentity();
+        if (blobId == null) {
+            log.debug("No id found for blob at path {}", propertyPath);
+        }
+
+        blobId = stripLength(blobId);
+        ExtractedText result = null;
+        if (getEmptyBlobs().contains(blobId)) {
+            result = new ExtractedText(ExtractionResult.EMPTY, null);
+        } else if (getErrorBlobs().contains(blobId)) {
+            result = new ExtractedText(ExtractionResult.ERROR, null);
+        } else {
+            File textFile = getFile(blobId);
+            if (textFile.exists()) {
+                String text = Files.toString(textFile, Charsets.UTF_8);
+                result = new ExtractedText(ExtractionResult.SUCCESS, text);
+            }
+        }
+
+        return result;
+    }
+
+    @Override
+    public void write(@Nonnull String blobId,@Nonnull String text) throws IOException {
+        checkIfReadOnlyModeEnabled();
+        checkNotNull(blobId, "BlobId cannot be null");
+        checkNotNull(text, "Text passed for [%s] was null", blobId);
+
+        File textFile = getFile(stripLength(blobId));
+        ensureParentExists(textFile);
+        //TODO should we compress
+        Files.write(text, textFile, Charsets.UTF_8);
+    }
+
+    @Override
+    public synchronized void markEmpty(String blobId) {
+        checkIfReadOnlyModeEnabled();
+        getEmptyBlobs().add(stripLength(blobId));
+    }
+
+    @Override
+    public synchronized void markError(String blobId) {
+        checkIfReadOnlyModeEnabled();
+        getErrorBlobs().add(stripLength(blobId));
+    }
+
+    @Override
+    public synchronized boolean isProcessed(String blobId) {
+        blobId = stripLength(blobId);
+        if (getEmptyBlobs().contains(blobId) || getErrorBlobs().contains(blobId)) {
+            return true;
+        }
+        File textFile = getFile(blobId);
+        return textFile.exists();
+    }
+
+    @Override
+    public synchronized void close() throws IOException {
+        if (closed) {
+            return;
+        }
+        writeToFile(EMPTY_BLOB_FILE, getEmptyBlobs());
+        writeToFile(ERROR_BLOB_FILE, getErrorBlobs());
+        closed = true;
+    }
+
+    SetHolder getEmptyBlobsHolder(){
+        return emptyBlobsHolder;
+    }
+
+    SetHolder getErrorBlobsHolder() {
+        return errorBlobsHolder;
+    }
+
+    /**
+     * Returns the identified file. This method implements the pattern
+     * used to avoid problems with too many files in a single directory.
+     * <p/>
+     * No sanity checks are performed on the given identifier.
+     *
+     * @param identifier file name
+     * @return identified file
+     */
+    private File getFile(String identifier) {
+        File file = directory;
+        file = new File(file, identifier.substring(0, 2));
+        file = new File(file, identifier.substring(2, 4));
+        file = new File(file, identifier.substring(4, 6));
+        return new File(file, identifier);
+    }
+
+    private String stripLength(String blobId) {
+        if (dataStoreBlobId) {
+            return DataStoreBlobStore.BlobId.of(blobId).blobId;
+        }
+        return blobId;
+    }
+
+    private Set<String> getEmptyBlobs() {
+        return emptyBlobsHolder.get();
+    }
+
+    private Set<String> getErrorBlobs() {
+        return errorBlobsHolder.get();
+    }
+
+    private void checkIfReadOnlyModeEnabled() {
+        checkState(!readOnlyMode, "Read only mode enabled");
+    }
+
+    private Callable<Set<String>> createLoader(final String fileName) {
+        final File file = new File(directory, fileName);
+        return new Callable<Set<String>>() {
+            @Override
+            public Set<String> call() throws Exception {
+                return loadFromFile(file);
+            }
+
+            @Override
+            public String toString() {
+                return "Loading state from " + file.getAbsolutePath();
+            }
+        };
+    }
+
+    private Set<String> loadFromFile(File file) throws IOException {
+        Set<String> result = Sets.newHashSet();
+        if (file.exists()) {
+            result.addAll(Files.readLines(file, Charsets.UTF_8));
+        }
+        return result;
+    }
+
+    private void writeToFile(String fileName, Set<String> blobIds) throws IOException {
+        if (blobIds.isEmpty()){
+            return;
+        }
+        File file = new File(directory, fileName);
+        BufferedWriter bw = Files.newWriter(file, Charsets.UTF_8);
+        for (String id : blobIds) {
+            bw.write(id);
+            bw.newLine();
+        }
+        bw.close();
+    }
+
+    private static void ensureParentExists(File file) throws IOException {
+        if (!file.exists()) {
+            File parent = file.getParentFile();
+            FileUtils.forceMkdir(parent);
+        }
+    }
+
+
+
+    /**
+     * While running in read only mode the PreExtractedTextProvider
+     * would only be used while reindexing. So as to avoid holding memory
+     * SoftReference would be used
+     */
+    static class SetHolder {
+        private final Set<String> state;
+        private SoftReference<Set<String>> stateRef;
+        private final Callable<Set<String>> loader;
+        private int loadCount;
+
+        public SetHolder(Callable<Set<String>> loader, boolean softRef) {
+            this.loader = loader;
+            if (softRef) {
+                this.state = null;
+            } else {
+                this.state = load();
+            }
+        }
+
+        public Set<String> get() {
+            Set<String> result = state;
+            if (result != null) {
+                return result;
+            }
+
+            if (stateRef != null) {
+                result = stateRef.get();
+            }
+
+            if (result == null) {
+                result = load();
+                stateRef = new SoftReference<Set<String>>(result);
+            }
+
+            return result;
+        }
+
+        public int getLoadCount() {
+            return loadCount;
+        }
+
+        private Set<String> load() {
+            try {
+                loadCount++;
+                return loader.call();
+            } catch (Exception e) {
+                log.warn("Error occurred while loading the state via {}", loader, e);
+                return Sets.newHashSet();
+            }
+        }
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/TextWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/TextWriter.java?rev=1690247&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/TextWriter.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/TextWriter.java Fri Jul 10 11:45:36 2015
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.blob.datastore;
+
+import java.io.IOException;
+
+import javax.annotation.Nonnull;
+
+public interface TextWriter {
+
+    void write(@Nonnull String blobId, @Nonnull String text) throws IOException;
+
+    void markEmpty(String blobId);
+
+    void markError(String blobId);
+
+    boolean isProcessed(String blobId);
+}

Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/TextWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java?rev=1690247&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java Fri Jul 10 11:45:36 2015
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.fulltext;
+
+import javax.annotation.CheckForNull;
+import javax.annotation.Nonnull;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+public class ExtractedText {
+    public enum ExtractionResult {
+        /**
+         * Indicates that text extraction was successful and some text
+         * was extracted
+         */
+        SUCCESS,
+        /**
+         * Indicates that no text was extracted. This can happen if the
+         * mimeType for the binary is part of exclusion list
+         */
+        EMPTY,
+        /**
+         * Indicates that text extraction resulted in an error.
+         * The {@link ExtractedText#getExtractedText()} might contain
+         * more details
+         */
+        ERROR
+    }
+
+    private final ExtractionResult extractionResult;
+    private final CharSequence extractedText;
+
+    public ExtractedText(@Nonnull ExtractionResult extractionResult,CharSequence extractedText) {
+        this.extractionResult = extractionResult;
+        this.extractedText = extractedText;
+        checkState();
+    }
+
+    @Nonnull
+    public ExtractionResult getExtractionResult() {
+        return extractionResult;
+    }
+
+    @CheckForNull
+    public CharSequence getExtractedText() {
+        return extractedText;
+    }
+
+    private void checkState() {
+        if (extractionResult == ExtractionResult.SUCCESS){
+            checkNotNull(extractedText, "extractedText must not be null for SUCCESS");
+        }
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/PreExtractedTextProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/PreExtractedTextProvider.java?rev=1690247&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/PreExtractedTextProvider.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/PreExtractedTextProvider.java Fri Jul 10 11:45:36 2015
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.fulltext;
+
+import java.io.IOException;
+
+import javax.annotation.CheckForNull;
+
+import aQute.bnd.annotation.ConsumerType;
+import org.apache.jackrabbit.oak.api.Blob;
+
+@ConsumerType
+public interface PreExtractedTextProvider {
+
+    /**
+     * Get pre extracted text for given blob at given path
+     *
+     * @param propertyPath path of the binary property
+     * @param blob binary property value
+     *
+     * @return pre extracted text or null if no
+     * pre extracted text found for given blob
+     */
+    @CheckForNull
+    ExtractedText getText(String propertyPath, Blob blob) throws IOException;
+}

Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/PreExtractedTextProvider.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/package-info.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/package-info.java?rev=1690247&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/package-info.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/package-info.java Fri Jul 10 11:45:36 2015
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+@Version("1.0.0")
+@Export(optional = "provide:=true")
+package org.apache.jackrabbit.oak.plugins.index.fulltext;
+
+import aQute.bnd.annotation.Export;
+import aQute.bnd.annotation.Version;
\ No newline at end of file

Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/package-info.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriterTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriterTest.java?rev=1690247&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriterTest.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriterTest.java Fri Jul 10 11:45:36 2015
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.blob.datastore;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.jackrabbit.core.data.DataRecord;
+import org.apache.jackrabbit.core.data.FileDataStore;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreTextWriter;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.TextWriter;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText.ExtractionResult;
+import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+public class DataStoreTextWriterTest {
+    @Rule
+    public final TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+    @Test
+    public void basicOperation() throws Exception {
+        File fdsDir = temporaryFolder.newFolder();
+        FileDataStore fds = createFDS(fdsDir);
+        ByteArrayInputStream is = new ByteArrayInputStream("hello".getBytes());
+        DataRecord dr = fds.addRecord(is);
+
+        File writerDir = temporaryFolder.newFolder();
+        TextWriter writer = new DataStoreTextWriter(writerDir, false);
+        writer.write(dr.getIdentifier().toString(), "hello");
+
+        FileDataStore fds2 = createFDS(writerDir);
+        DataRecord dr2 = fds2.getRecordIfStored(dr.getIdentifier());
+
+        is.reset();
+        assertTrue(IOUtils.contentEquals(is, dr2.getStream()));
+
+    }
+
+    @Test
+    public void noLoadingInReadOnlyMode() throws Exception{
+        DataStoreTextWriter w = new DataStoreTextWriter(temporaryFolder.getRoot(), true);
+        assertEquals(0, w.getEmptyBlobsHolder().getLoadCount());
+        assertEquals(0, w.getErrorBlobsHolder().getLoadCount());
+
+        DataStoreTextWriter w1 = new DataStoreTextWriter(temporaryFolder.getRoot(), false);
+        assertEquals(1, w1.getEmptyBlobsHolder().getLoadCount());
+        assertEquals(1, w1.getErrorBlobsHolder().getLoadCount());
+    }
+
+    @Test
+    public void checkEmptyAndErrorBlobs() throws Exception{
+        DataStoreTextWriter w = new DataStoreTextWriter(temporaryFolder.getRoot(), false);
+        w.markEmpty("a");
+        w.markError("b");
+        w.close();
+
+        DataStoreTextWriter w2 = new DataStoreTextWriter(temporaryFolder.getRoot(), true);
+        assertEquals(ExtractionResult.EMPTY, w2.getText("/a", new IdBlob("foo", "a")).getExtractionResult());
+        assertEquals(ExtractionResult.ERROR, w2.getText("/a", new IdBlob("foo", "b")).getExtractionResult());
+    }
+
+    @Test
+    public void nonExistingEntry() throws Exception{
+        File fdsDir = temporaryFolder.newFolder();
+        FileDataStore fds = createFDS(fdsDir);
+        ByteArrayInputStream is = new ByteArrayInputStream("hello".getBytes());
+        DataRecord dr = fds.addRecord(is);
+
+        File writerDir = temporaryFolder.newFolder();
+        DataStoreTextWriter w = new DataStoreTextWriter(writerDir, false);
+        String id = dr.getIdentifier().toString();
+        assertFalse(w.isProcessed(id));
+        assertNull(w.getText("/a", new IdBlob("foo", id)));
+
+        w.write(id, "foo");
+        assertTrue(w.isProcessed(id));
+        ExtractedText et = w.getText("/a", new IdBlob("foo", id));
+        assertEquals("foo", et.getExtractedText());
+        assertEquals(ExtractionResult.SUCCESS, et.getExtractionResult());
+
+        w.markEmpty("a");
+        assertTrue(w.isProcessed("a"));
+
+    }
+
+    private FileDataStore createFDS(File root) {
+        FileDataStore fds = new FileDataStore();
+        fds.setPath(root.getAbsolutePath());
+        fds.setMinRecordLength(0);
+        fds.init(null);
+        return fds;
+    }
+
+    private static class IdBlob extends ArrayBasedBlob {
+        final String id;
+
+        public IdBlob(String value, String id) {
+            super(value.getBytes());
+            this.id = id;
+        }
+
+        @Override
+        public String getContentIdentity() {
+            return id;
+        }
+    }
+}
\ No newline at end of file

Propchange: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriterTest.java
------------------------------------------------------------------------------
    svn:eol-style = native