You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by jm...@apache.org on 2021/11/24 14:24:06 UTC

svn commit: r1895299 - /manifoldcf/trunk/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java

Author: jmssiera
Date: Wed Nov 24 14:24:06 2021
New Revision: 1895299

URL: http://svn.apache.org/viewvc?rev=1895299&view=rev
Log:
Fix CONNECTORS-1681

Modified:
    manifoldcf/trunk/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java

Modified: manifoldcf/trunk/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java?rev=1895299&r1=1895298&r2=1895299&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java (original)
+++ manifoldcf/trunk/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java Wed Nov 24 14:24:06 2021
@@ -704,7 +704,7 @@ public class TikaExtractor extends org.a
       // We only log the extraction
       final long startTime = System.currentTimeMillis();
       String resultCode = "OK";
-      String description = null;
+      String description = "";
       Long length = 0L;
       boolean truncated = false;
       boolean resources_limit = false;
@@ -837,7 +837,9 @@ public class TikaExtractor extends org.a
 
         } catch (final IOException e) {
           resultCode = "TIKASERVERRESPONSEISSUE";
-          description = e.getMessage();
+          if (e.getMessage() != null) {
+            description = e.getMessage();
+          }
           tikaServerResultCode = handleTikaServerException(e);
         } finally {
           if (response != null) {
@@ -853,6 +855,8 @@ public class TikaExtractor extends org.a
         }
 
       } finally {
+        // Before injecting activity record, clean the description as it can contains non ascii chars that can cause errors during SQL insertion
+        description = description.replaceAll("[^\\x20-\\x7e]", "");
         // Log the extraction processing
         activities.recordActivity(startTime, ACTIVITY_EXTRACT, length, documentURI, resultCode, description);
       }