You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/03/16 11:48:43 UTC
[tika] branch main updated: Minor cleaning and added missing
javadoc on TikaServer (#415)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 01dca21 Minor cleaning and added missing javadoc on TikaServer (#415)
01dca21 is described below
commit 01dca2102ab35dfa9fb2d6504ee44d372229304b
Author: Subhajit Das <Su...@users.noreply.github.com>
AuthorDate: Tue Mar 16 17:18:34 2021 +0530
Minor cleaning and added missing javadoc on TikaServer (#415)
* Added case-insensitivity to tika server ocr header names
* Minor restructure and added missing javadoc
---
.../server/classic/config/PDFServerConfig.java | 24 +++++++++++++++++---
.../classic/config/TesseractServerConfig.java | 26 ++++++++++++++++++----
.../tika/server/core/ParseContextConfig.java | 7 ++++++
.../tika/server/core/resource/TikaResource.java | 18 ++++++---------
4 files changed, 57 insertions(+), 18 deletions(-)
diff --git a/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/PDFServerConfig.java b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/PDFServerConfig.java
index 9058272..1c49652 100644
--- a/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/PDFServerConfig.java
+++ b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/PDFServerConfig.java
@@ -26,12 +26,29 @@ import org.apache.tika.server.core.ParseContextConfig;
import javax.ws.rs.core.MultivaluedMap;
+import java.util.List;
+import java.util.Map;
+
import static org.apache.tika.server.core.resource.TikaResource.processHeaderConfig;
+/**
+ * PDF parser configuration, for the request
+ */
public class PDFServerConfig implements ParseContextConfig {
+ /**
+ * The HTTP header prefix required (case-insensitive) by this config.
+ */
public static final String X_TIKA_PDF_HEADER_PREFIX = "X-Tika-PDF";
+ /**
+ * Configures the parseContext with present headers.
+ * Note: only first value of header is considered.
+ *
+ * @param httpHeaders the headers.
+ * @param metadata the metadata.
+ * @param parseContext the parse context to configure.
+ */
@Override
public void configure(MultivaluedMap<String, String> httpHeaders,
Metadata metadata, ParseContext parseContext) {
@@ -39,14 +56,15 @@ public class PDFServerConfig implements ParseContextConfig {
//if a header is submitted, any params set in --tika-config tika-config.xml
//upon server startup will be ignored.
PDFParserConfig pdfParserConfig = null;
- for (String key : httpHeaders.keySet()) {
- if (StringUtils.startsWithIgnoreCase(key, X_TIKA_PDF_HEADER_PREFIX)) {
+ for (Map.Entry<String, List<String>> kvp : httpHeaders.entrySet()) {
+ if (StringUtils.startsWithIgnoreCase(kvp.getKey(), X_TIKA_PDF_HEADER_PREFIX)) {
pdfParserConfig = (pdfParserConfig == null) ? new PDFParserConfig() : pdfParserConfig;
- processHeaderConfig(httpHeaders, pdfParserConfig, key, X_TIKA_PDF_HEADER_PREFIX);
+ processHeaderConfig(pdfParserConfig, kvp.getKey(), kvp.getValue().get(0).trim(), X_TIKA_PDF_HEADER_PREFIX);
}
}
if (pdfParserConfig != null) {
parseContext.set(PDFParserConfig.class, pdfParserConfig);
}
}
+
}
diff --git a/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/TesseractServerConfig.java b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/TesseractServerConfig.java
index 3041400..aaddfed 100644
--- a/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/TesseractServerConfig.java
+++ b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/TesseractServerConfig.java
@@ -26,11 +26,29 @@ import org.apache.tika.server.core.ParseContextConfig;
import javax.ws.rs.core.MultivaluedMap;
+import java.util.List;
+import java.util.Map;
+
import static org.apache.tika.server.core.resource.TikaResource.processHeaderConfig;
+/**
+ * Tesseract configuration, for the request
+ */
public class TesseractServerConfig implements ParseContextConfig {
+ /**
+ * The HTTP header prefix required (case-insensitive) by this config.
+ */
public static final String X_TIKA_OCR_HEADER_PREFIX = "X-Tika-OCR";
+
+ /**
+ * Configures the parseContext with present headers.
+ * Note: only first value of header is considered.
+ *
+ * @param httpHeaders the headers.
+ * @param metadata the metadata.
+ * @param parseContext the parse context to configure.
+ */
@Override
public void configure(MultivaluedMap<String, String> httpHeaders,
Metadata metadata, ParseContext parseContext) {
@@ -38,15 +56,15 @@ public class TesseractServerConfig implements ParseContextConfig {
//if a header is submitted, any params set in --tika-config tika-config.xml
//upon server startup will be ignored.
TesseractOCRConfig ocrConfig = null;
- DocumentSelector documentSelector = null;
- for (String key : httpHeaders.keySet()) {
- if (StringUtils.startsWithIgnoreCase(key, X_TIKA_OCR_HEADER_PREFIX)) {
+ for (Map.Entry<String, List<String>> kvp : httpHeaders.entrySet()) {
+ if (StringUtils.startsWithIgnoreCase(kvp.getKey(), X_TIKA_OCR_HEADER_PREFIX)) {
ocrConfig = (ocrConfig == null) ? new TesseractOCRConfig() : ocrConfig;
- processHeaderConfig(httpHeaders, ocrConfig, key, X_TIKA_OCR_HEADER_PREFIX);
+ processHeaderConfig(ocrConfig, kvp.getKey(), kvp.getValue().get(0).trim(), X_TIKA_OCR_HEADER_PREFIX);
}
}
if (ocrConfig != null) {
parseContext.set(TesseractOCRConfig.class, ocrConfig);
}
}
+
}
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ParseContextConfig.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ParseContextConfig.java
index 81b4155..0bc26df 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ParseContextConfig.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ParseContextConfig.java
@@ -45,6 +45,13 @@ import javax.ws.rs.core.MultivaluedMap;
*/
public interface ParseContextConfig {
+ /**
+ * Configures the parseContext with present headers.
+ *
+ * @param headers the headers.
+ * @param metadata the metadata.
+ * @param context the parse context to configure.
+ */
void configure(MultivaluedMap<String, String> headers,
Metadata metadata, ParseContext context);
}
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index d1b25a6..731d7b1 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@ -154,16 +154,13 @@ public class TikaResource {
/**
* Utility method to set a property on a class via reflection.
*
- * @param httpHeaders the HTTP headers set.
* @param object the <code>Object</code> to set the property on.
* @param key the key of the HTTP Header.
+ * @param val the value of HTTP header.
* @param prefix the name of the HTTP Header prefix used to find property.
* @throws WebApplicationException thrown when field cannot be found.
*/
- public static void processHeaderConfig(MultivaluedMap<String, String> httpHeaders, Object object, String key, String prefix) {
- String val = httpHeaders.getFirst(key);
- val = val.trim();
-
+ public static void processHeaderConfig(Object object, String key, String val, String prefix) {
try {
String property = StringUtils.removeStartIgnoreCase(key, prefix);
Field field = null;
@@ -259,18 +256,17 @@ public class TikaResource {
/**
* Tries to get method. Silently swallows NoMethodException and returns
* <code>null</code> if not found.
- * @param object
- * @param method
- * @param clazz
- * @return
+ * @param object the object to get method from.
+ * @param method the name of the method to get.
+ * @param clazz the parameter type of the method to get.
+ * @return the found method instance
*/
private static Method tryToGetMethod(Object object, String method, Class clazz) {
try {
return object.getClass().getMethod(method, clazz);
} catch (NoSuchMethodException e) {
- //swallow
+ return null;
}
- return null;
}
@SuppressWarnings("serial")