You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bo...@apache.org on 2016/03/19 21:48:51 UTC
tika git commit: TIKA-1904 - Create Proxy Parser and Detectors
Repository: tika
Updated Branches:
refs/heads/2.x c58af959b -> 74e998d0f
TIKA-1904 - Create Proxy Parser and Detectors
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/74e998d0
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/74e998d0
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/74e998d0
Branch: refs/heads/2.x
Commit: 74e998d0ff359813dc06c695a7e786694c818932
Parents: c58af95
Author: Bob Paulin <bo...@apache.org>
Authored: Sat Mar 19 15:48:42 2016 -0500
Committer: Bob Paulin <bo...@apache.org>
Committed: Sat Mar 19 15:48:42 2016 -0500
----------------------------------------------------------------------
.../org/apache/tika/detect/DetectorProxy.java | 67 ++++++++++++++++
.../org/apache/tika/parser/ParserProxy.java | 83 ++++++++++++++++++++
.../apache/tika/detect/DetectorProxyTest.java | 54 +++++++++++++
.../apache/tika/detect/DummyProxyDetector.java | 31 ++++++++
.../apache/tika/parser/DummyProxyParser.java | 44 +++++++++++
.../org/apache/tika/parser/ParserProxyTest.java | 63 +++++++++++++++
6 files changed, 342 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/main/java/org/apache/tika/detect/DetectorProxy.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/detect/DetectorProxy.java b/tika-core/src/main/java/org/apache/tika/detect/DetectorProxy.java
new file mode 100644
index 0000000..5714cd3
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/detect/DetectorProxy.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.config.LoadErrorHandler;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+/**
+ * This detector is a proxy for another detector
+ * this allows modules to use detectors from other modules
+ * as optional dependencies since not including the classes
+ * simply does nothing rather than throwing a ClassNotFoundException.
+ *
+ * @since Apache Tika 2.0
+ */
+public class DetectorProxy implements Detector
+{
+ private static final long serialVersionUID = 4534101565629801667L;
+
+ private Detector detector;
+
+ public DetectorProxy(String detectorClassName)
+ {
+ this(detectorClassName, LoadErrorHandler.WARN);
+ }
+
+ public DetectorProxy(String detectorClassName, LoadErrorHandler handler)
+ {
+ try
+ {
+ this.detector = (Detector)Class.forName(detectorClassName).newInstance();
+ }
+ catch (InstantiationException | IllegalAccessException | ClassNotFoundException e)
+ {
+ handler.handleLoadError(detectorClassName, e);
+ }
+ }
+
+ @Override
+ public MediaType detect(InputStream input, Metadata metadata) throws IOException
+ {
+ if(detector != null)
+ {
+ return detector.detect(input, metadata);
+ }
+ return null;
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/main/java/org/apache/tika/parser/ParserProxy.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParserProxy.java b/tika-core/src/main/java/org/apache/tika/parser/ParserProxy.java
new file mode 100644
index 0000000..b664c0a
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParserProxy.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.tika.config.LoadErrorHandler;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * This parser is a proxy for another detector
+ * this allows modules to use parsers from other modules
+ * as optional dependencies since not including the classes
+ * simply does nothing rather than throwing a ClassNotFoundException.
+ *
+ * @since Apache Tika 2.0
+ */
+public class ParserProxy extends AbstractParser
+{
+
+ private static final long serialVersionUID = -4838436708916910179L;
+ private Parser parser;
+
+ public ParserProxy(String parserClassName)
+ {
+ this(parserClassName, LoadErrorHandler.WARN);
+ }
+
+ public ParserProxy(String parserClassName, LoadErrorHandler handler)
+ {
+ try
+ {
+ this.parser = (Parser)Class.forName(parserClassName).newInstance();
+ }
+ catch (InstantiationException | IllegalAccessException | ClassNotFoundException e)
+ {
+ handler.handleLoadError(parserClassName, e);
+ }
+
+ }
+
+ @Override
+ public Set<MediaType> getSupportedTypes(ParseContext context)
+ {
+ if (parser == null)
+ {
+ return Collections.emptySet();
+ }
+ return parser.getSupportedTypes(context);
+ }
+
+ @Override
+ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
+ throws IOException, SAXException, TikaException
+ {
+ if(parser != null)
+ {
+ parser.parse(stream, handler, metadata, context);
+ }
+ //Otherwise do nothing
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/test/java/org/apache/tika/detect/DetectorProxyTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/detect/DetectorProxyTest.java b/tika-core/src/test/java/org/apache/tika/detect/DetectorProxyTest.java
new file mode 100644
index 0000000..800413d
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/detect/DetectorProxyTest.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+
+import org.apache.tika.config.LoadErrorHandler;
+import org.apache.tika.mime.MediaType;
+import org.junit.Test;
+
+public class DetectorProxyTest
+{
+ @Test
+ public void testDetectorProxyExists() throws IOException
+ {
+ Detector dummyDetector = new DetectorProxy("org.apache.tika.detect.DummyProxyDetector",
+ LoadErrorHandler.IGNORE);
+
+ MediaType result = dummyDetector.detect(null, null);
+
+ assertEquals("Detector being proxied exists so result should not be null",
+ MediaType.TEXT_PLAIN, result );
+
+ }
+
+ @Test
+ public void testParserProxyNotExists() throws IOException
+ {
+ Detector dummyDetector = new DetectorProxy("org.apache.tika.detect.DoesNotExist",
+ LoadErrorHandler.IGNORE);
+
+ MediaType result = dummyDetector.detect(null, null);
+
+ assertNull("Detector being proxied does not exists so result should be null", result );
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/test/java/org/apache/tika/detect/DummyProxyDetector.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/detect/DummyProxyDetector.java b/tika-core/src/test/java/org/apache/tika/detect/DummyProxyDetector.java
new file mode 100644
index 0000000..a11b584
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/detect/DummyProxyDetector.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+public class DummyProxyDetector implements Detector
+{
+ @Override
+ public MediaType detect(InputStream input, Metadata metadata) throws IOException {
+ return MediaType.TEXT_PLAIN;
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/test/java/org/apache/tika/parser/DummyProxyParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyProxyParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyProxyParser.java
new file mode 100644
index 0000000..ca766c9
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyProxyParser.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class DummyProxyParser extends AbstractParser
+{
+ @Override
+ public Set<MediaType> getSupportedTypes(ParseContext context)
+ {
+ return null;
+ }
+
+ @Override
+ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
+ throws IOException, SAXException, TikaException
+ {
+ metadata.add("Test", "value");
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/test/java/org/apache/tika/parser/ParserProxyTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/ParserProxyTest.java b/tika-core/src/test/java/org/apache/tika/parser/ParserProxyTest.java
new file mode 100644
index 0000000..13a8665
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/parser/ParserProxyTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+
+import org.apache.tika.config.LoadErrorHandler;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+import org.xml.sax.SAXException;
+
+public class ParserProxyTest
+{
+
+ @Test
+ public void testParserProxyExists() throws IOException, SAXException, TikaException
+ {
+ Parser dummyParser = new ParserProxy("org.apache.tika.parser.DummyProxyParser",
+ LoadErrorHandler.IGNORE);
+
+ Metadata metadata = new Metadata();
+
+ dummyParser.parse(null, null, metadata, null);
+
+ assertEquals("Parser being proxied exists so metadata should be added",
+ 1, metadata.size());
+
+ }
+
+ @Test
+ public void testParserProxyNotExists() throws IOException, SAXException, TikaException
+ {
+ Parser dummyParser = new ParserProxy("org.apache.tika.parser.NotExists",
+ LoadErrorHandler.IGNORE);
+
+ Metadata metadata = new Metadata();
+
+ dummyParser.parse(null, null, metadata, null);
+
+ assertEquals("Parser being proxied doesn't exist so metadata not change",
+ 0, metadata.size());
+
+ }
+
+
+}