You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bo...@apache.org on 2016/01/17 01:48:55 UTC
svn commit: r1725053 - in /tika/branches/2.x: tika-parser-modules/
tika-parser-modules/tika-parser-advanced-module/
tika-parser-modules/tika-parser-advanced-module/src/main/java/org/apache/tika/parser/crypto/
tika-parser-modules/tika-parser-advanced-mo...
Author: bob
Date: Sun Jan 17 00:48:55 2016
New Revision: 1725053
URL: http://svn.apache.org/viewvc?rev=1725053&view=rev
Log:
TIKA- 1832 Decouple Crypto module from advanced.
Added:
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/pom.xml
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/
tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java
Removed:
tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/src/main/java/org/apache/tika/parser/crypto/
tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/src/main/resources/META-INF/
tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/src/test/java/org/apache/tika/parser/crypto/
Modified:
tika/branches/2.x/tika-parser-modules/pom.xml
tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/pom.xml
tika/branches/2.x/tika-parsers/pom.xml
Modified: tika/branches/2.x/tika-parser-modules/pom.xml
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/pom.xml?rev=1725053&r1=1725052&r2=1725053&view=diff
==============================================================================
--- tika/branches/2.x/tika-parser-modules/pom.xml (original)
+++ tika/branches/2.x/tika-parser-modules/pom.xml Sun Jan 17 00:48:55 2016
@@ -45,6 +45,7 @@
<module>tika-parser-advanced-module</module>
<module>tika-parser-cad-module</module>
<module>tika-parser-code-module</module>
+ <module>tika-parser-crypto-module</module>
<module>tika-parser-database-module</module>
<module>tika-parser-ebook-module</module>
<module>tika-parser-journal-module</module>
Modified: tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/pom.xml
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/pom.xml?rev=1725053&r1=1725052&r2=1725053&view=diff
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/pom.xml (original)
+++ tika/branches/2.x/tika-parser-modules/tika-parser-advanced-module/pom.xml Sun Jan 17 00:48:55 2016
@@ -38,11 +38,6 @@
<scope>test</scope>
</dependency>
<dependency>
- <groupId>org.bouncycastle</groupId>
- <artifactId>bcmail-jdk15on</artifactId>
- <version>1.52</version>
- </dependency>
- <dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>${commons.io.version}</version>
Added: tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/pom.xml
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/pom.xml?rev=1725053&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/pom.xml (added)
+++ tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/pom.xml Sun Jan 17 00:48:55 2016
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ you under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parser-modules</artifactId>
+ <version>2.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>tika-parser-crypto-module</artifactId>
+ <name>Apache Tika parser crypto module</name>
+ <url>http://tika.apache.org/</url>
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcmail-jdk15on</artifactId>
+ <version>1.52</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>${commons.io.version}</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
\ No newline at end of file
Added: tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java?rev=1725053&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/Pkcs7Parser.java Sun Jan 17 00:48:55 2016
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.crypto;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Set;
+
+import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.EmptyParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.bouncycastle.cms.CMSException;
+import org.bouncycastle.cms.CMSSignedDataParser;
+import org.bouncycastle.cms.CMSTypedStream;
+import org.bouncycastle.operator.DigestCalculatorProvider;
+import org.bouncycastle.operator.OperatorCreationException;
+import org.bouncycastle.operator.jcajce.JcaDigestCalculatorProviderBuilder;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Basic parser for PKCS7 data.
+ */
+public class Pkcs7Parser extends AbstractParser {
+
+ /** Serial version UID */
+ private static final long serialVersionUID = -7310531559075115044L;
+
+ private static final MediaType PKCS7_MIME =
+ MediaType.application("pkcs7-mime");
+
+ private static final MediaType PKCS7_SIGNATURE =
+ MediaType.application("pkcs7-signature");
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return MediaType.set(PKCS7_MIME, PKCS7_SIGNATURE);
+ }
+
+ public void parse(
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context)
+ throws IOException, SAXException, TikaException {
+ try {
+ DigestCalculatorProvider digestCalculatorProvider =
+ new JcaDigestCalculatorProviderBuilder().setProvider("BC").build();
+ CMSSignedDataParser parser =
+ new CMSSignedDataParser(digestCalculatorProvider, new CloseShieldInputStream(stream));
+ try {
+ CMSTypedStream content = parser.getSignedContent();
+ if (content == null) {
+ throw new TikaException("cannot parse detached pkcs7 signature (no signed data to parse)");
+ }
+ try (InputStream input = content.getContentStream()) {
+ Parser delegate =
+ context.get(Parser.class, EmptyParser.INSTANCE);
+ delegate.parse(input, handler, metadata, context);
+ }
+ } finally {
+ parser.close();
+ }
+ } catch (OperatorCreationException e) {
+ throw new TikaException("Unable to create DigestCalculatorProvider", e);
+ } catch (CMSException e) {
+ throw new TikaException("Unable to parse pkcs7 signed data", e);
+ }
+ }
+
+}
Added: tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser?rev=1725053&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser (added)
+++ tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser Sun Jan 17 00:48:55 2016
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+org.apache.tika.parser.crypto.Pkcs7Parser
Added: tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java?rev=1725053&view=auto
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java (added)
+++ tika/branches/2.x/tika-parser-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/Pkcs7ParserTest.java Sun Jan 17 00:48:55 2016
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.crypto;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.InputStream;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+
+public class Pkcs7ParserTest extends TikaTest {
+ public void testDetachedSignature() throws Exception {
+ try (InputStream input = Pkcs7ParserTest.class.getResourceAsStream(
+ "/test-documents/testDetached.p7s")) {
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+ new Pkcs7Parser().parse(input, handler, metadata, new ParseContext());
+ } catch (NullPointerException npe) {
+ fail("should not get NPE");
+ } catch (TikaException te) {
+ assertTrue(te.toString().contains("cannot parse detached pkcs7 signature"));
+ }
+ }
+}
Modified: tika/branches/2.x/tika-parsers/pom.xml
URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parsers/pom.xml?rev=1725053&r1=1725052&r2=1725053&view=diff
==============================================================================
--- tika/branches/2.x/tika-parsers/pom.xml (original)
+++ tika/branches/2.x/tika-parsers/pom.xml Sun Jan 17 00:48:55 2016
@@ -167,6 +167,11 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
+ <artifactId>tika-parser-crypto-module</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
<artifactId>tika-parser-database-module</artifactId>
<version>${project.version}</version>
</dependency>
@@ -326,6 +331,7 @@
<include>org.apache.tika:tika-parser-advanced-module</include>
<include>org.apache.tika:tika-parser-cad-module</include>
<include>org.apache.tika:tika-parser-code-module</include>
+ <include>org.apache.tika:tika-parser-crypto-module</include>
<include>org.apache.tika:tika-parser-database-module</include>
<include>org.apache.tika:tika-parser-ebook-module</include>
<include>org.apache.tika:tika-parser-journal-module</include>