You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/03/16 11:48:43 UTC

[tika] branch main updated: Minor cleaning and added missing javadoc on TikaServer (#415)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 01dca21  Minor cleaning and added missing javadoc on TikaServer (#415)
01dca21 is described below

commit 01dca2102ab35dfa9fb2d6504ee44d372229304b
Author: Subhajit Das <Su...@users.noreply.github.com>
AuthorDate: Tue Mar 16 17:18:34 2021 +0530

    Minor cleaning and added missing javadoc on TikaServer (#415)
    
    * Added case-insensitivity to tika server ocr header names
    
    * Minor restructure and added missing javadoc
---
 .../server/classic/config/PDFServerConfig.java     | 24 +++++++++++++++++---
 .../classic/config/TesseractServerConfig.java      | 26 ++++++++++++++++++----
 .../tika/server/core/ParseContextConfig.java       |  7 ++++++
 .../tika/server/core/resource/TikaResource.java    | 18 ++++++---------
 4 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/PDFServerConfig.java b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/PDFServerConfig.java
index 9058272..1c49652 100644
--- a/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/PDFServerConfig.java
+++ b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/PDFServerConfig.java
@@ -26,12 +26,29 @@ import org.apache.tika.server.core.ParseContextConfig;
 
 import javax.ws.rs.core.MultivaluedMap;
 
+import java.util.List;
+import java.util.Map;
+
 import static org.apache.tika.server.core.resource.TikaResource.processHeaderConfig;
 
+/**
+ * PDF parser configuration, for the request
+ */
 public class PDFServerConfig implements ParseContextConfig {
 
+    /**
+     * The HTTP header prefix required (case-insensitive) by this config.
+     */
     public static final String X_TIKA_PDF_HEADER_PREFIX = "X-Tika-PDF";
 
+    /**
+     * Configures the parseContext with present headers.
+     * Note: only first value of header is considered.
+     *
+     * @param httpHeaders the headers.
+     * @param metadata  the metadata.
+     * @param parseContext  the parse context to configure.
+     */
     @Override
     public void configure(MultivaluedMap<String, String> httpHeaders,
                           Metadata metadata, ParseContext parseContext) {
@@ -39,14 +56,15 @@ public class PDFServerConfig implements ParseContextConfig {
         //if a header is submitted, any params set in --tika-config tika-config.xml
         //upon server startup will be ignored.
         PDFParserConfig pdfParserConfig = null;
-        for (String key : httpHeaders.keySet()) {
-            if (StringUtils.startsWithIgnoreCase(key, X_TIKA_PDF_HEADER_PREFIX)) {
+        for (Map.Entry<String, List<String>> kvp : httpHeaders.entrySet()) {
+            if (StringUtils.startsWithIgnoreCase(kvp.getKey(), X_TIKA_PDF_HEADER_PREFIX)) {
                 pdfParserConfig = (pdfParserConfig == null) ? new PDFParserConfig() : pdfParserConfig;
-                processHeaderConfig(httpHeaders, pdfParserConfig, key, X_TIKA_PDF_HEADER_PREFIX);
+                processHeaderConfig(pdfParserConfig, kvp.getKey(), kvp.getValue().get(0).trim(), X_TIKA_PDF_HEADER_PREFIX);
             }
         }
         if (pdfParserConfig != null) {
             parseContext.set(PDFParserConfig.class, pdfParserConfig);
         }
     }
+
 }
diff --git a/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/TesseractServerConfig.java b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/TesseractServerConfig.java
index 3041400..aaddfed 100644
--- a/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/TesseractServerConfig.java
+++ b/tika-server/tika-server-classic/src/main/java/org/apache/tika/server/classic/config/TesseractServerConfig.java
@@ -26,11 +26,29 @@ import org.apache.tika.server.core.ParseContextConfig;
 
 import javax.ws.rs.core.MultivaluedMap;
 
+import java.util.List;
+import java.util.Map;
+
 import static org.apache.tika.server.core.resource.TikaResource.processHeaderConfig;
 
+/**
+ * Tesseract configuration, for the request
+ */
 public class TesseractServerConfig implements ParseContextConfig {
 
+    /**
+     * The HTTP header prefix required (case-insensitive) by this config.
+     */
     public static final String X_TIKA_OCR_HEADER_PREFIX = "X-Tika-OCR";
+
+    /**
+     * Configures the parseContext with present headers.
+     * Note: only first value of header is considered.
+     *
+     * @param httpHeaders the headers.
+     * @param metadata  the metadata.
+     * @param parseContext  the parse context to configure.
+     */
     @Override
     public void configure(MultivaluedMap<String, String> httpHeaders,
                           Metadata metadata, ParseContext parseContext) {
@@ -38,15 +56,15 @@ public class TesseractServerConfig implements ParseContextConfig {
         //if a header is submitted, any params set in --tika-config tika-config.xml
         //upon server startup will be ignored.
         TesseractOCRConfig ocrConfig = null;
-        DocumentSelector documentSelector = null;
-        for (String key : httpHeaders.keySet()) {
-            if (StringUtils.startsWithIgnoreCase(key, X_TIKA_OCR_HEADER_PREFIX)) {
+        for (Map.Entry<String, List<String>> kvp : httpHeaders.entrySet()) {
+            if (StringUtils.startsWithIgnoreCase(kvp.getKey(), X_TIKA_OCR_HEADER_PREFIX)) {
                 ocrConfig = (ocrConfig == null) ? new TesseractOCRConfig() : ocrConfig;
-                processHeaderConfig(httpHeaders, ocrConfig, key, X_TIKA_OCR_HEADER_PREFIX);
+                processHeaderConfig(ocrConfig, kvp.getKey(), kvp.getValue().get(0).trim(), X_TIKA_OCR_HEADER_PREFIX);
             }
         }
         if (ocrConfig != null) {
             parseContext.set(TesseractOCRConfig.class, ocrConfig);
         }
     }
+
 }
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ParseContextConfig.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ParseContextConfig.java
index 81b4155..0bc26df 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ParseContextConfig.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ParseContextConfig.java
@@ -45,6 +45,13 @@ import javax.ws.rs.core.MultivaluedMap;
  */
 public interface ParseContextConfig {
 
+    /**
+     * Configures the parseContext with present headers.
+     *
+     * @param headers the headers.
+     * @param metadata  the metadata.
+     * @param context  the parse context to configure.
+     */
     void configure(MultivaluedMap<String, String> headers,
                    Metadata metadata, ParseContext context);
 }
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index d1b25a6..731d7b1 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@ -154,16 +154,13 @@ public class TikaResource {
     /**
      * Utility method to set a property on a class via reflection.
      *
-     * @param httpHeaders the HTTP headers set.
      * @param object      the <code>Object</code> to set the property on.
      * @param key         the key of the HTTP Header.
+     * @param val         the value of HTTP header.
      * @param prefix      the name of the HTTP Header prefix used to find property.
      * @throws WebApplicationException thrown when field cannot be found.
      */
-    public static void processHeaderConfig(MultivaluedMap<String, String> httpHeaders, Object object, String key, String prefix) {
-        String val = httpHeaders.getFirst(key);
-        val = val.trim();
-
+    public static void processHeaderConfig(Object object, String key, String val, String prefix) {
         try {
             String property = StringUtils.removeStartIgnoreCase(key, prefix);
             Field field = null;
@@ -259,18 +256,17 @@ public class TikaResource {
     /**
      * Tries to get method. Silently swallows NoMethodException and returns
      * <code>null</code> if not found.
-     * @param object
-     * @param method
-     * @param clazz
-     * @return
+     * @param object the object to get method from.
+     * @param method the name of the method to get.
+     * @param clazz the parameter type of the method to get.
+     * @return the found method instance
      */
     private static Method tryToGetMethod(Object object, String method, Class clazz) {
         try {
             return object.getClass().getMethod(method, clazz);
         } catch (NoSuchMethodException e) {
-            //swallow
+            return null;
         }
-        return null;
     }
 
     @SuppressWarnings("serial")