You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bo...@apache.org on 2016/01/17 01:48:55 UTC

svn commit: r1725053 - in /tika/branches/2.x: tika-parser-modules/ tika-parser-modules/tika-parser-advanced-module/ tika-parser-modules/tika-parser-advanced-module/src/main/java/org/apache/tika/parser/crypto/ tika-parser-modules/tika-parser-advanced-mo...

Author: bob
Date: Sun Jan 17 00:48:55 2016
New Revision: 1725053

URL: http://svn.apache.org/viewvc?rev=1725053&view=rev
Log:
TIKA- 1832 Decouple Crypto module from advanced.

Added:
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/pom.xml
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/
    tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java
Removed:
    tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/src/main/java/org/apache/tika/parser/crypto/
    tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/src/main/resources/META-INF/
    tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/src/test/java/org/apache/tika/parser/crypto/
Modified:
    tika/branches/2.x/tika-parser-modules/pom.xml
    tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/pom.xml
    tika/branches/2.x/tika-parsers/pom.xml

Modified: tika/branches/2.x/tika-parser-modules/pom.xml
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/pom.xml?rev=1725053&r1=1725052&r2=1725053&view=diff
==============================================================================
--- tika/branches/2.x/tika-parser-modules/pom.xml (original)
+++ tika/branches/2.x/tika-parser-modules/pom.xml Sun Jan 17 00:48:55 2016
@@ -45,6 +45,7 @@
     <module>tika-parser-advanced-module</module>
     <module>tika-parser-cad-module</module>
     <module>tika-parser-code-module</module>
+    <module>tika-parser-crypto-module</module>
     <module>tika-parser-database-module</module>
     <module>tika-parser-ebook-module</module>
     <module>tika-parser-journal-module</module>

Modified: tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/pom.xml
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/pom.xml?rev=1725053&r1=1725052&r2=1725053&view=diff
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/pom.xml (original)
+++ tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/pom.xml Sun Jan 17 00:48:55 2016
@@ -38,11 +38,6 @@
       <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>org.bouncycastle</groupId>
-      <artifactId>bcmail-jdk15on</artifactId>
-      <version>1.52</version>
-    </dependency>
-    <dependency>
       <groupId>commons-io</groupId>
       <artifactId>commons-io</artifactId>
       <version>${commons.io.version}</version>

Added: tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/pom.xml
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/pom.xml?rev=1725053&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/pom.xml (added)
+++ tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/pom.xml Sun Jan 17 00:48:55 2016
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+  license agreements. See the NOTICE file distributed with this work for additional 
+  information regarding copyright ownership. The ASF licenses this file to 
+  you under the Apache License, Version 2.0 (the "License"); you may not use 
+  this file except in compliance with the License. You may obtain a copy of 
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+  by applicable law or agreed to in writing, software distributed under the 
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+  OF ANY KIND, either express or implied. See the License for the specific 
+  language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.tika</groupId>
+    <artifactId>tika-parser-modules</artifactId>
+    <version>2.0-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>tika-parser-crypto-module</artifactId>
+  <name>Apache Tika parser crypto module</name>
+  <url>http://tika.apache.org/</url>
+  
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+        <dependency>
+      <groupId>org.bouncycastle</groupId>
+      <artifactId>bcmail-jdk15on</artifactId>
+      <version>1.52</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <version>${commons.io.version}</version>
+    </dependency>
+  </dependencies>
+  
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>
\ No newline at end of file

Added: tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java?rev=1725053&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java Sun Jan 17 00:48:55 2016
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.crypto;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Set;
+
+import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.EmptyParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.bouncycastle.cms.CMSException;
+import org.bouncycastle.cms.CMSSignedDataParser;
+import org.bouncycastle.cms.CMSTypedStream;
+import org.bouncycastle.operator.DigestCalculatorProvider;
+import org.bouncycastle.operator.OperatorCreationException;
+import org.bouncycastle.operator.jcajce.JcaDigestCalculatorProviderBuilder;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Basic parser for PKCS7 data.
+ */
+public class Pkcs7Parser extends AbstractParser {
+
+    /** Serial version UID */
+    private static final long serialVersionUID = -7310531559075115044L;
+
+    private static final MediaType PKCS7_MIME =
+            MediaType.application("pkcs7-mime");
+
+    private static final MediaType PKCS7_SIGNATURE =
+            MediaType.application("pkcs7-signature");
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return MediaType.set(PKCS7_MIME, PKCS7_SIGNATURE);
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        try {
+            DigestCalculatorProvider digestCalculatorProvider =
+                    new JcaDigestCalculatorProviderBuilder().setProvider("BC").build();
+            CMSSignedDataParser parser =
+                    new CMSSignedDataParser(digestCalculatorProvider, new CloseShieldInputStream(stream));
+            try {
+                CMSTypedStream content = parser.getSignedContent();
+                if (content == null) {
+                    throw new TikaException("cannot parse detached pkcs7 signature (no signed data to parse)");
+                }
+                try (InputStream input = content.getContentStream()) {
+                    Parser delegate =
+                            context.get(Parser.class, EmptyParser.INSTANCE);
+                    delegate.parse(input, handler, metadata, context);
+                }
+            } finally {
+                parser.close();
+            }
+        } catch (OperatorCreationException e) {
+            throw new TikaException("Unable to create DigestCalculatorProvider", e);
+        } catch (CMSException e) {
+            throw new TikaException("Unable to parse pkcs7 signed data", e);
+        }
+    }
+
+}

Added: tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser?rev=1725053&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser (added)
+++ tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser Sun Jan 17 00:48:55 2016
@@ -0,0 +1,17 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+
+org.apache.tika.parser.crypto.Pkcs7Parser

Added: tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java?rev=1725053&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java Sun Jan 17 00:48:55 2016
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.crypto;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.InputStream;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+
+public class Pkcs7ParserTest extends TikaTest {
+    public void testDetachedSignature() throws Exception {
+        try (InputStream input = Pkcs7ParserTest.class.getResourceAsStream(
+                "/test-documents/testDetached.p7s")) {
+            ContentHandler handler = new BodyContentHandler();
+            Metadata metadata = new Metadata();
+            new Pkcs7Parser().parse(input, handler, metadata, new ParseContext());
+        } catch (NullPointerException npe) {
+            fail("should not get NPE");
+        } catch (TikaException te) {
+            assertTrue(te.toString().contains("cannot parse detached pkcs7 signature"));
+        }
+    }
+}

Modified: tika/branches/2.x/tika-parsers/pom.xml
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parsers/pom.xml?rev=1725053&r1=1725052&r2=1725053&view=diff
==============================================================================
--- tika/branches/2.x/tika-parsers/pom.xml (original)
+++ tika/branches/2.x/tika-parsers/pom.xml Sun Jan 17 00:48:55 2016
@@ -167,6 +167,11 @@
     </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>
+      <artifactId>tika-parser-crypto-module</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
       <artifactId>tika-parser-database-module</artifactId>
       <version>${project.version}</version>
     </dependency>
@@ -326,6 +331,7 @@
                   <include>org.apache.tika:tika-parser-advanced-module</include>
                   <include>org.apache.tika:tika-parser-cad-module</include>
                   <include>org.apache.tika:tika-parser-code-module</include>
+                  <include>org.apache.tika:tika-parser-crypto-module</include>
                   <include>org.apache.tika:tika-parser-database-module</include>
                   <include>org.apache.tika:tika-parser-ebook-module</include>
                   <include>org.apache.tika:tika-parser-journal-module</include>