You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bo...@apache.org on 2016/03/19 21:48:51 UTC

tika git commit: TIKA-1904 - Create Proxy Parser and Detectors

Repository: tika
Updated Branches:
  refs/heads/2.x c58af959b -> 74e998d0f


TIKA-1904 - Create Proxy Parser and Detectors

Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/74e998d0
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/74e998d0
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/74e998d0

Branch: refs/heads/2.x
Commit: 74e998d0ff359813dc06c695a7e786694c818932
Parents: c58af95
Author: Bob Paulin <bo...@apache.org>
Authored: Sat Mar 19 15:48:42 2016 -0500
Committer: Bob Paulin <bo...@apache.org>
Committed: Sat Mar 19 15:48:42 2016 -0500

----------------------------------------------------------------------
 .../org/apache/tika/detect/DetectorProxy.java   | 67 ++++++++++++++++
 .../org/apache/tika/parser/ParserProxy.java     | 83 ++++++++++++++++++++
 .../apache/tika/detect/DetectorProxyTest.java   | 54 +++++++++++++
 .../apache/tika/detect/DummyProxyDetector.java  | 31 ++++++++
 .../apache/tika/parser/DummyProxyParser.java    | 44 +++++++++++
 .../org/apache/tika/parser/ParserProxyTest.java | 63 +++++++++++++++
 6 files changed, 342 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/main/java/org/apache/tika/detect/DetectorProxy.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/detect/DetectorProxy.java b/tika-core/src/main/java/org/apache/tika/detect/DetectorProxy.java
new file mode 100644
index 0000000..5714cd3
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/detect/DetectorProxy.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.config.LoadErrorHandler;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+/**
+ * This detector is a proxy for another detector 
+ * this allows modules to use detectors from other modules
+ * as optional dependencies since not including the classes
+ * simply does nothing rather than throwing a ClassNotFoundException.
+ *
+ * @since Apache Tika 2.0
+ */
+public class DetectorProxy implements Detector
+{
+    private static final long serialVersionUID = 4534101565629801667L;
+    
+    private Detector detector;
+    
+    public DetectorProxy(String detectorClassName) 
+    {
+        this(detectorClassName, LoadErrorHandler.WARN);
+    }
+    
+    public DetectorProxy(String detectorClassName, LoadErrorHandler handler) 
+    {
+        try 
+        {
+            this.detector = (Detector)Class.forName(detectorClassName).newInstance();
+        } 
+        catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) 
+        {
+            handler.handleLoadError(detectorClassName, e);
+        }
+    }
+
+    @Override
+    public MediaType detect(InputStream input, Metadata metadata) throws IOException 
+    {
+        if(detector != null)
+        {
+            return detector.detect(input, metadata);
+        }
+        return null;
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/main/java/org/apache/tika/parser/ParserProxy.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParserProxy.java b/tika-core/src/main/java/org/apache/tika/parser/ParserProxy.java
new file mode 100644
index 0000000..b664c0a
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParserProxy.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.tika.config.LoadErrorHandler;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * This parser is a proxy for another detector 
+ * this allows modules to use parsers from other modules
+ * as optional dependencies since not including the classes
+ * simply does nothing rather than throwing a ClassNotFoundException.
+ *
+ * @since Apache Tika 2.0
+ */
+public class ParserProxy extends AbstractParser 
+{
+    
+    private static final long serialVersionUID = -4838436708916910179L;
+    private Parser parser;
+    
+    public ParserProxy(String parserClassName) 
+    {
+        this(parserClassName, LoadErrorHandler.WARN);
+    }
+    
+    public ParserProxy(String parserClassName, LoadErrorHandler handler) 
+    {
+            try 
+            {
+                this.parser = (Parser)Class.forName(parserClassName).newInstance();
+            } 
+            catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) 
+            {
+                handler.handleLoadError(parserClassName, e);
+            }
+        
+    }
+    
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) 
+    {
+        if (parser == null)
+        {
+            return Collections.emptySet();
+        }
+        return parser.getSupportedTypes(context);
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException 
+    {
+        if(parser != null)
+        {
+            parser.parse(stream, handler, metadata, context);
+        }
+        //Otherwise do nothing
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/test/java/org/apache/tika/detect/DetectorProxyTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/detect/DetectorProxyTest.java b/tika-core/src/test/java/org/apache/tika/detect/DetectorProxyTest.java
new file mode 100644
index 0000000..800413d
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/detect/DetectorProxyTest.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+
+import org.apache.tika.config.LoadErrorHandler;
+import org.apache.tika.mime.MediaType;
+import org.junit.Test;
+
+public class DetectorProxyTest 
+{
+    @Test
+    public void testDetectorProxyExists() throws IOException 
+    {
+        Detector dummyDetector = new DetectorProxy("org.apache.tika.detect.DummyProxyDetector",
+                LoadErrorHandler.IGNORE);
+        
+        MediaType result = dummyDetector.detect(null, null);
+        
+        assertEquals("Detector being proxied exists so result should not be null", 
+                MediaType.TEXT_PLAIN, result );
+        
+    }
+    
+    @Test
+    public void testParserProxyNotExists() throws IOException 
+    {
+        Detector dummyDetector = new DetectorProxy("org.apache.tika.detect.DoesNotExist",
+                LoadErrorHandler.IGNORE);
+        
+        MediaType result = dummyDetector.detect(null, null);
+        
+        assertNull("Detector being proxied does not exists so result should be null", result );
+        
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/test/java/org/apache/tika/detect/DummyProxyDetector.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/detect/DummyProxyDetector.java b/tika-core/src/test/java/org/apache/tika/detect/DummyProxyDetector.java
new file mode 100644
index 0000000..a11b584
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/detect/DummyProxyDetector.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+public class DummyProxyDetector implements Detector
+{
+    @Override
+    public MediaType detect(InputStream input, Metadata metadata) throws IOException {
+        return MediaType.TEXT_PLAIN;
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/test/java/org/apache/tika/parser/DummyProxyParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyProxyParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyProxyParser.java
new file mode 100644
index 0000000..ca766c9
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyProxyParser.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class DummyProxyParser extends AbstractParser 
+{
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) 
+    {
+        return null;
+    }
+    
+    @Override
+    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException 
+    {
+        metadata.add("Test", "value");
+        
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/test/java/org/apache/tika/parser/ParserProxyTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/ParserProxyTest.java b/tika-core/src/test/java/org/apache/tika/parser/ParserProxyTest.java
new file mode 100644
index 0000000..13a8665
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/parser/ParserProxyTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+
+import org.apache.tika.config.LoadErrorHandler;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+import org.xml.sax.SAXException;
+
+public class ParserProxyTest 
+{
+
+    @Test
+    public void testParserProxyExists() throws IOException, SAXException, TikaException 
+    {
+        Parser dummyParser = new ParserProxy("org.apache.tika.parser.DummyProxyParser",
+                LoadErrorHandler.IGNORE);
+        
+        Metadata metadata = new Metadata();
+        
+        dummyParser.parse(null, null, metadata, null);
+        
+        assertEquals("Parser being proxied exists so metadata should be added", 
+                1, metadata.size());
+        
+    }
+    
+    @Test
+    public void testParserProxyNotExists() throws IOException, SAXException, TikaException 
+    {
+        Parser dummyParser = new ParserProxy("org.apache.tika.parser.NotExists",
+                LoadErrorHandler.IGNORE);
+        
+        Metadata metadata = new Metadata();
+        
+        dummyParser.parse(null, null, metadata, null);
+        
+        assertEquals("Parser being proxied doesn't exist so metadata not change", 
+                0, metadata.size());
+        
+    }
+    
+
+}