You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/05/04 16:55:13 UTC

[tika] 01/02: TIKA-3750 -- add unit test

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit cfa9a5274a317e3ea34bad5a3cb4dc3e8d709a0a
Author: tallison <ta...@apache.org>
AuthorDate: Wed May 4 12:53:55 2022 -0400

    TIKA-3750 -- add unit test
---
 .../tika-resource-loading-tests/pom.xml            | 36 +++++++++++++++
 .../org/apache/custom/parser/CustomParserTest.java | 43 ++++++++++++++++++
 .../org/apache/custom/parser/MyCustomParser.java   | 51 ++++++++++++++++++++++
 .../services/org.apache.tika.parser.Parser         | 15 +++++++
 4 files changed, 145 insertions(+)

diff --git a/tika-integration-tests/tika-resource-loading-tests/pom.xml b/tika-integration-tests/tika-resource-loading-tests/pom.xml
new file mode 100644
index 000000000..aaf8acb02
--- /dev/null
+++ b/tika-integration-tests/tika-resource-loading-tests/pom.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>tika-integration-tests</artifactId>
+    <groupId>org.apache.tika</groupId>
+    <version>2.4.1-SNAPSHOT</version>
+  </parent>
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>tika-resource-loading-tests</artifactId>
+
+  <properties>
+    <maven.compiler.source>8</maven.compiler.source>
+    <maven.compiler.target>8</maven.compiler.target>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/tika-integration-tests/tika-resource-loading-tests/src/test/java/org/apache/custom/parser/CustomParserTest.java b/tika-integration-tests/tika-resource-loading-tests/src/test/java/org/apache/custom/parser/CustomParserTest.java
new file mode 100644
index 000000000..f42096a16
--- /dev/null
+++ b/tika-integration-tests/tika-resource-loading-tests/src/test/java/org/apache/custom/parser/CustomParserTest.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.custom.parser;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.Map;
+
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.DefaultParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+
+public class CustomParserTest extends TikaTest {
+
+    @Test
+    @Disabled("test fails because of sorting")
+    public void testBasic() throws Exception {
+        DefaultParser p = new DefaultParser();
+        assertEquals(2, p.getAllComponentParsers().size());
+        Map<MediaType, Parser> map = p.getParsers(new ParseContext());
+        Parser parser = map.get(MediaType.application("mock+xml"));
+        assertEquals(MyCustomParser.class, parser.getClass());
+    }
+}
diff --git a/tika-integration-tests/tika-resource-loading-tests/src/test/java/org/apache/custom/parser/MyCustomParser.java b/tika-integration-tests/tika-resource-loading-tests/src/test/java/org/apache/custom/parser/MyCustomParser.java
new file mode 100644
index 000000000..e9b1cf841
--- /dev/null
+++ b/tika-integration-tests/tika-resource-loading-tests/src/test/java/org/apache/custom/parser/MyCustomParser.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.custom.parser;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+
+public class MyCustomParser extends AbstractParser {
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return Collections.singleton(MediaType.application("mock+xml"));
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
+                      ParseContext context) throws IOException, SAXException, TikaException {
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        xhtml.startElement("p");
+        xhtml.characters("my custom parser");
+        xhtml.endElement("p");
+        xhtml.endDocument();
+    }
+}
diff --git a/tika-integration-tests/tika-resource-loading-tests/src/test/resources/META-INF/services/org.apache.tika.parser.Parser b/tika-integration-tests/tika-resource-loading-tests/src/test/resources/META-INF/services/org.apache.tika.parser.Parser
new file mode 100644
index 000000000..505f1108a
--- /dev/null
+++ b/tika-integration-tests/tika-resource-loading-tests/src/test/resources/META-INF/services/org.apache.tika.parser.Parser
@@ -0,0 +1,15 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+org.apache.custom.parser.MyCustomParser
\ No newline at end of file