You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2015/05/21 12:24:05 UTC
svn commit: r1680806 - in /jackrabbit/oak/trunk:
oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/
oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/io/
oak-core/src/main/java/org/apache/jackrabbit/oak/util/
oak-lucene/src/main/...
Author: chetanm
Date: Thu May 21 10:24:05 2015
New Revision: 1680806
URL: http://svn.apache.org/r1680806
Log:
OAK-2895 - Avoid accessing binary content if the mimeType is excluded from indexing
-- Use TypeDetector instead of DefaultDetector to avoid Tika sniffing the mimeType by reading the input stream
-- Use a LazyInputStream to lazily load the stream if and when required
Added:
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/LazyInputStream.java (with props)
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/package-info.java (with props)
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/io/
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/io/LazyInputStreamTest.java (with props)
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/util/BlobByteSource.java (with props)
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
jackrabbit/oak/trunk/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
Added: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/LazyInputStream.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/LazyInputStream.java?rev=1680806&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/LazyInputStream.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/LazyInputStream.java Thu May 21 10:24:05 2015
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.commons.io;
+
+import java.io.FilterInputStream;
+import java.io.IOException;
+
+import com.google.common.io.ByteSource;
+import org.apache.commons.io.input.ClosedInputStream;
+
+/**
+ * * This input stream delays accessing the ByteSource until the first byte is read
+ */
+public class LazyInputStream extends FilterInputStream {
+ private final ByteSource byteSource;
+ private boolean opened;
+
+ public LazyInputStream(ByteSource byteSource) {
+ super(null);
+ this.byteSource = byteSource;
+ }
+
+ @Override
+ public int read() throws IOException {
+ ensureOpen();
+ return super.read();
+ }
+
+ @Override
+ public int read(byte[] b) throws IOException {
+ ensureOpen();
+ return super.read(b);
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ ensureOpen();
+ return super.read(b, off, len);
+ }
+
+ @Override
+ public long skip(long n) throws IOException {
+ ensureOpen();
+ return super.skip(n);
+ }
+
+ @Override
+ public int available() throws IOException {
+ ensureOpen();
+ return super.available();
+ }
+
+ @Override
+ public void close() throws IOException {
+ // make sure the file is not opened afterwards
+ opened = true;
+
+ // only close the file if it was in fact opened
+ if (in != null) {
+ super.close();
+ } else {
+ in = ClosedInputStream.CLOSED_INPUT_STREAM;
+ }
+ }
+
+ @Override
+ public synchronized void mark(int readlimit) {
+ ensureOpenWithUnCheckedException();
+ super.mark(readlimit);
+ }
+
+ @Override
+ public synchronized void reset() throws IOException {
+ ensureOpen();
+ super.reset();
+ }
+
+ @Override
+ public boolean markSupported() {
+ ensureOpenWithUnCheckedException();
+ return super.markSupported();
+ }
+
+ private void ensureOpen() throws IOException {
+ if (!opened) {
+ opened = true;
+ in = byteSource.openStream();
+ }
+ }
+
+ private void ensureOpenWithUnCheckedException(){
+ try {
+ ensureOpen();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/LazyInputStream.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/package-info.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/package-info.java?rev=1680806&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/package-info.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/package-info.java Thu May 21 10:24:05 2015
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+@Version("1.0")
+@Export(optional = "provide:=true")
+package org.apache.jackrabbit.oak.commons.io;
+
+import aQute.bnd.annotation.Export;
+import aQute.bnd.annotation.Version;
\ No newline at end of file
Propchange: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/io/package-info.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/io/LazyInputStreamTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/io/LazyInputStreamTest.java?rev=1680806&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/io/LazyInputStreamTest.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/io/LazyInputStreamTest.java Thu May 21 10:24:05 2015
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.commons.io;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import static com.google.common.io.Files.asByteSource;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.fail;
+
+/**
+ * Tests the LazyInputStream class.
+ */
+public class LazyInputStreamTest {
+
+ private File file;
+
+ @Rule
+ public final TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+ @Test
+ public void test() throws IOException {
+ createFile();
+
+ // test open / close (without reading)
+ LazyInputStream in = new LazyInputStream(asByteSource(file));
+ in.close();
+
+ // test reading too much and closing too much
+ in = new LazyInputStream(asByteSource(file));
+ assertEquals(0, in.read());
+ assertEquals(-1, in.read());
+ assertEquals(-1, in.read());
+ assertEquals(-1, in.read());
+ in.close();
+ in.close();
+ in.close();
+
+ // test markSupported, mark, and reset
+ in = new LazyInputStream(asByteSource(file));
+ assertFalse(in.markSupported());
+ in.mark(1);
+ assertEquals(0, in.read());
+ try {
+ in.reset();
+ fail();
+ } catch (IOException e) {
+ // expected
+ }
+ assertEquals(-1, in.read());
+ in.close();
+
+ // test read(byte[])
+ in = new LazyInputStream(asByteSource(file));
+ byte[] test = new byte[2];
+ assertEquals(1, in.read(test));
+ in.close();
+
+ // test read(byte[],int,int)
+ in = new LazyInputStream(asByteSource(file));
+ assertEquals(1, in.read(test, 0, 2));
+ in.close();
+
+ // test skip
+ in = new LazyInputStream(asByteSource(file));
+ assertEquals(2, in.skip(2));
+ assertEquals(-1, in.read(test));
+ in.close();
+
+ createFile();
+
+ // test that the file is closed after reading the last byte
+ in = new LazyInputStream(asByteSource(file));
+ assertEquals(0, in.read());
+ assertEquals(-1, in.read());
+
+ in.close();
+
+ file.delete();
+
+ }
+
+ private void createFile() throws IOException {
+ file = temporaryFolder.newFile();
+ FileOutputStream out = new FileOutputStream(file);
+ out.write(new byte[1]);
+ out.close();
+ }
+
+}
Propchange: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/io/LazyInputStreamTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/util/BlobByteSource.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/util/BlobByteSource.java?rev=1680806&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/util/BlobByteSource.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/util/BlobByteSource.java Thu May 21 10:24:05 2015
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import com.google.common.io.ByteSource;
+import org.apache.jackrabbit.oak.api.Blob;
+
+public final class BlobByteSource extends ByteSource {
+ private final Blob blob;
+
+ public BlobByteSource(Blob blob) {
+ this.blob = blob;
+ }
+
+ @Override
+ public InputStream openStream() throws IOException {
+ return blob.getNewStream();
+ }
+
+ @Override
+ public long size() throws IOException {
+ return blob.length();
+ }
+
+ @Override
+ public boolean isEmpty() throws IOException {
+ return blob.length() == 0;
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/util/BlobByteSource.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1680806&r1=1680805&r2=1680806&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java Thu May 21 10:24:05 2015
@@ -46,6 +46,7 @@ import org.apache.jackrabbit.oak.api.Pro
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.commons.io.LazyInputStream;
import org.apache.jackrabbit.oak.plugins.index.IndexEditor;
import org.apache.jackrabbit.oak.plugins.index.IndexUpdateCallback;
import org.apache.jackrabbit.oak.plugins.index.PathFilter;
@@ -55,6 +56,7 @@ import org.apache.jackrabbit.oak.plugins
import org.apache.jackrabbit.oak.spi.commit.Editor;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.util.BlobByteSource;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.DoubleField;
@@ -803,7 +805,7 @@ public class LuceneIndexEditor implement
long start = System.currentTimeMillis();
long size = 0;
try {
- CountingInputStream stream = new CountingInputStream(v.getNewStream());
+ CountingInputStream stream = new CountingInputStream(new LazyInputStream(new BlobByteSource(v)));
try {
context.getParser().parse(stream, handler, metadata, new ParseContext());
} finally {
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml?rev=1680806&r1=1680805&r2=1680806&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/resources/org/apache/jackrabbit/oak/plugins/index/lucene/tika-config.xml Thu May 21 10:24:05 2015
@@ -21,7 +21,7 @@
<properties>
<detectors>
- <detector class="org.apache.tika.detect.DefaultDetector"/>
+ <detector class="org.apache.tika.detect.TypeDetector"/>
</detectors>
<parsers>
<parser class="org.apache.tika.parser.DefaultParser"/>
Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java?rev=1680806&r1=1680805&r2=1680806&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java Thu May 21 10:24:05 2015
@@ -19,6 +19,7 @@
package org.apache.jackrabbit.oak.plugins.index.lucene;
+import java.io.InputStream;
import java.text.ParseException;
import java.util.Calendar;
import java.util.Collections;
@@ -26,16 +27,20 @@ import java.util.List;
import java.util.Random;
import java.util.Set;
+import javax.annotation.Nonnull;
import javax.jcr.PropertyType;
+import com.google.common.base.Charsets;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import com.google.common.io.CountingInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.JcrConstants;
import org.apache.jackrabbit.oak.Oak;
+import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.api.ContentRepository;
import org.apache.jackrabbit.oak.api.PropertyValue;
@@ -45,6 +50,7 @@ import org.apache.jackrabbit.oak.api.Typ
import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
import org.apache.jackrabbit.oak.plugins.index.nodetype.NodeTypeIndexProvider;
import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider;
+import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
import org.apache.jackrabbit.oak.plugins.memory.PropertyStates;
import org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent;
import org.apache.jackrabbit.oak.plugins.nodetype.write.NodeTypeRegistry;
@@ -79,6 +85,7 @@ import static org.apache.jackrabbit.oak.
import static org.apache.jackrabbit.oak.plugins.memory.PropertyStates.createProperty;
import static org.hamcrest.CoreMatchers.not;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
import static org.junit.matchers.JUnitMatchers.containsString;
@@ -1208,6 +1215,22 @@ public class LucenePropertyIndexTest ext
}
@Test
+ public void excludedBlobContentNotAccessed() throws Exception{
+ Tree idx = createFulltextIndex(root.getTree("/"), "test");
+ TestUtil.useV2(idx);
+
+ AccessStateProvidingBlob testBlob =
+ new AccessStateProvidingBlob("<?xml version=\"1.0\" encoding=\"UTF-8\"?><msg>sky is blue</msg>");
+
+ Tree test = root.getTree("/").addChild("test");
+ createFileNode(test, "zip", testBlob, "application/zip");
+ root.commit();
+
+ assertFalse(testBlob.isStreamAccessed());
+ assertEquals(0, testBlob.readByteCount());
+ }
+
+ @Test
public void maxFieldLengthCheck() throws Exception{
Tree idx = createFulltextIndex(root.getTree("/"), "test");
TestUtil.useV2(idx);
@@ -1312,8 +1335,12 @@ public class LucenePropertyIndexTest ext
}
private Tree createFileNode(Tree tree, String name, String content, String mimeType){
+ return createFileNode(tree, name, new ArrayBasedBlob(content.getBytes()), mimeType);
+ }
+
+ private Tree createFileNode(Tree tree, String name, Blob content, String mimeType){
Tree jcrContent = tree.addChild(name).addChild(JCR_CONTENT);
- jcrContent.setProperty(JcrConstants.JCR_DATA, content.getBytes());
+ jcrContent.setProperty(JcrConstants.JCR_DATA, content);
jcrContent.setProperty(JcrConstants.JCR_MIMETYPE, mimeType);
return jcrContent;
}
@@ -1483,4 +1510,38 @@ public class LucenePropertyIndexTest ext
'}';
}
}
+
+ private static class AccessStateProvidingBlob extends ArrayBasedBlob {
+ private CountingInputStream stream;
+
+ public AccessStateProvidingBlob(byte[] value) {
+ super(value);
+ }
+
+ public AccessStateProvidingBlob(String content) {
+ this(content.getBytes(Charsets.UTF_8));
+ }
+
+ @Nonnull
+ @Override
+ public InputStream getNewStream() {
+ stream = new CountingInputStream(super.getNewStream());
+ return stream;
+ }
+
+ public boolean isStreamAccessed() {
+ return stream != null;
+ }
+
+ public void resetState(){
+ stream = null;
+ }
+
+ public long readByteCount(){
+ if (stream == null){
+ return 0;
+ }
+ return stream.getCount();
+ }
+ }
}