You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by jm...@apache.org on 2021/06/25 13:40:39 UTC
svn commit: r1891042 - in
/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main:
java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/
native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rme...
Author: jmssiera
Date: Fri Jun 25 13:40:38 2021
New Revision: 1891042
URL: http://svn.apache.org/viewvc?rev=1891042&view=rev
Log:
CONNECTORS-1667: Add specific interval param when tika is down
Modified:
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html
Modified: manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java (original)
+++ manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java Fri Jun 25 13:40:38 2021
@@ -30,12 +30,14 @@ public class TikaConfig {
public static final String PARAM_CONNECTIONTIMEOUT = "connectionTimeout";
public static final String PARAM_SOCKETTIMEOUT = "socketTimeout";
public static final String PARAM_RETRYINTERVAL = "retryInterval";
+ public static final String PARAM_RETRYINTERVALTIKADOWN = "retryIntervalTikaDown";
public static final String PARAM_RETRYNUMBER = "retryNumber";
public static final String TIKAHOSTNAME_DEFAULT = "localhost";
public static final String TIKAPORT_DEFAULT = "9998";
public static final String CONNECTIONTIMEOUT_DEFAULT = "60000";
public static final String SOCKETTIMEOUT_DEFAULT = "60000";
public static final String RETRYINTERVAL_DEFAULT = "20000";
+ public static final String RETRYINTERVALTIKADOWN_DEFAULT = "120000";
public static final String RETRYNUMBER_DEFAULT = "1";
// Specification nodes and values
Modified: manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java (original)
+++ manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java Fri Jun 25 13:40:38 2021
@@ -126,6 +126,9 @@ public class TikaExtractor extends org.a
/** Retry interval */
private String retryIntervalString = null;
+ /** Retry interval when Tika seems down */
+ private String retryIntervalTikaDownString = null;
+
/** Retry number */
private String retryNumberString = null;
@@ -146,6 +149,9 @@ public class TikaExtractor extends org.a
/** Retry interval */
private long retryInterval = -1L;
+ /** Retry interval */
+ private long retryIntervalTikaDown = -1L;
+
/** Retry number */
private int retryNumber = -1;
@@ -221,6 +227,7 @@ public class TikaExtractor extends org.a
connectionTimeoutString = configParameters.getParameter(TikaConfig.PARAM_CONNECTIONTIMEOUT);
socketTimeoutString = configParameters.getParameter(TikaConfig.PARAM_SOCKETTIMEOUT);
retryIntervalString = configParameters.getParameter(TikaConfig.PARAM_RETRYINTERVAL);
+ retryIntervalTikaDownString = configParameters.getParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN);
retryNumberString = configParameters.getParameter(TikaConfig.PARAM_RETRYNUMBER);
}
@@ -235,6 +242,7 @@ public class TikaExtractor extends org.a
connectionTimeoutString = null;
socketTimeoutString = null;
retryIntervalString = null;
+ retryIntervalTikaDownString = null;
retryNumberString = null;
super.disconnect();
@@ -293,6 +301,11 @@ public class TikaExtractor extends org.a
throw new ManifoldCFException("Bad retry interval number: " + retryIntervalString);
}
try {
+ this.retryIntervalTikaDown = Long.parseLong(retryIntervalTikaDownString);
+ } catch (final NumberFormatException e) {
+ throw new ManifoldCFException("Bad retry interval when tika is down number: " + retryIntervalTikaDownString);
+ }
+ try {
this.retryNumber = Integer.parseInt(retryNumberString);
} catch (final NumberFormatException e) {
throw new ManifoldCFException("Bad retry number: " + retryNumberString);
@@ -448,6 +461,11 @@ public class TikaExtractor extends org.a
parameters.setParameter(TikaConfig.PARAM_RETRYINTERVAL, retryInterval);
}
+ final String retryIntervalTikaDown = variableContext.getParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN);
+ if (retryIntervalTikaDown != null) {
+ parameters.setParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN, retryIntervalTikaDown);
+ }
+
final String retryNumber = variableContext.getParameter(TikaConfig.PARAM_RETRYNUMBER);
if (retryNumber != null) {
parameters.setParameter(TikaConfig.PARAM_RETRYNUMBER, retryNumber);
@@ -497,6 +515,11 @@ public class TikaExtractor extends org.a
retryInterval = TikaConfig.RETRYINTERVAL_DEFAULT;
}
+ String retryIntervalTikaDown = parameters.getParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN);
+ if (retryIntervalTikaDown == null) {
+ retryIntervalTikaDown = TikaConfig.RETRYINTERVALTIKADOWN_DEFAULT;
+ }
+
String retryNumber = parameters.getParameter(TikaConfig.PARAM_RETRYNUMBER);
if (retryNumber == null) {
retryNumber = TikaConfig.RETRYNUMBER_DEFAULT;
@@ -508,6 +531,7 @@ public class TikaExtractor extends org.a
velocityContext.put("CONNECTIONTIMEOUT", connectionTimeout);
velocityContext.put("SOCKETTIMEOUT", socketTimeout);
velocityContext.put("RETRYINTERVAL", retryInterval);
+ velocityContext.put("RETRYINTERVALTIKADOWN", retryIntervalTikaDown);
velocityContext.put("RETRYNUMBER", retryNumber);
}
@@ -584,7 +608,7 @@ public class TikaExtractor extends org.a
// work
Logging.ingest.warn("Tika Server unreachable while trying to process " + documentURI + ", retrying...", e);
final long currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("Tika Server connection down: " + e.getMessage(), e, currentTime + retryInterval, -1L, -1, false);
+ throw new ServiceInterruption("Tika Server connection down: " + e.getMessage(), e, currentTime + retryIntervalTikaDown, -1L, retryNumber, false);
}
private void retryWithoutAbort(final Exception e) throws ServiceInterruption {
@@ -723,6 +747,9 @@ public class TikaExtractor extends org.a
} else { // The tika server seams to be down : retry {retryNumber} times and abort the
// job if it fails on
// each retry
+ resultCode = "TIKASERVEREXCEPTION";
+ description = "Tika seemed to be down when requested to process document " + documentURI + " : " + e.getMessage();
+ tikaServerResultCode = handleTikaServerError(description);
triggerServiceInterruption(documentURI, e);
}
} catch (final NoHttpResponseException e) {
@@ -733,6 +760,9 @@ public class TikaExtractor extends org.a
} catch (final IOException e) { // Unknown problem with the Tika Server. Retry {retryNumber} times and abort
// the job if it fails on
// each retry
+ resultCode = "TIKASERVEREXCEPTION";
+ description = "Unknown Tika problem when processing document " + documentURI + " : " + e.getMessage();
+ tikaServerResultCode = handleTikaServerError(description);
triggerServiceInterruption(documentURI, e);
}
if (response != null) {
@@ -1174,7 +1204,7 @@ public class TikaExtractor extends org.a
final List<Map<String, String>> fieldMappings = new ArrayList<>();
String keepAllMetadataValue = "true";
String lowernamesValue = "true";
- String writeLimitValue = "1000000";
+ String writeLimitValue = "1000000"; // 1Mo by default
String extractArchives = "false";
String maxEmbeddedResources = "";
for (int i = 0; i < os.getChildCount(); i++) {
Modified: manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties (original)
+++ manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties Fri Jun 25 13:40:38 2021
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
TikaExtractor.RetryNumber=Number of retries:
TikaExtractor.ExtractArchives=Extract archives content:
TikaExtractor.ConnectionTimeout=Connection timeout:
Modified: manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties (original)
+++ manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties Fri Jun 25 13:40:38 2021
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
TikaExtractor.RetryNumber=Number of retries:
TikaExtractor.ExtractArchives=Extract archives content:
TikaExtractor.ConnectionTimeout=Connection timeout:
Modified: manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties (original)
+++ manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties Fri Jun 25 13:40:38 2021
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Intervalle entre les tentatives (en ms):
+TikaExtractor.RetryIntervalTikaDown=Intervalle entre les tentatives quand Tika est injoignable (in ms):
TikaExtractor.RetryNumber=Nombre de tentatives:
TikaExtractor.ExtractArchives=Extraire le contenu des archives:
TikaExtractor.ConnectionTimeout=Connexion timeout:
Modified: manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties (original)
+++ manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties Fri Jun 25 13:40:38 2021
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
TikaExtractor.RetryNumber=Number of retries:
TikaExtractor.ExtractArchives=Extract archives content:
TikaExtractor.ConnectionTimeout=Connection timeout:
Modified: manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties (original)
+++ manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties Fri Jun 25 13:40:38 2021
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
TikaExtractor.RetryNumber=Number of retries:
TikaExtractor.ExtractArchives=Extract archives content:
TikaExtractor.ConnectionTimeout=Connection timeout:
Modified: manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html (original)
+++ manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html Fri Jun 25 13:40:38 2021
@@ -50,6 +50,12 @@
</td>
</tr>
<tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryIntervalTikaDown'))</nobr></td>
+ <td class="value"><input name="retryIntervalTikaDown" type="text"
+ value="$Encoder.attributeEscape($RETRYINTERVALTIKADOWN)" size="20" />
+ </td>
+ </tr>
+ <tr>
<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryNumber'))</nobr></td>
<td class="value"><input name="retryNumber" type="text"
value="$Encoder.attributeEscape($RETRYNUMBER)" size="5" />
@@ -63,6 +69,7 @@
<input type="hidden" name="connectionTimeout" value="$Encoder.attributeEscape($CONNECTIONTIMEOUT)"/>
<input type="hidden" name="socketTimeout" value="$Encoder.attributeEscape($SOCKETTIMEOUT)"/>
<input type="hidden" name="retryInterval" value="$Encoder.attributeEscape($RETRYINTERVAL)"/>
+<input type="hidden" name="retryIntervalTikaDown" value="$Encoder.attributeEscape($RETRYINTERVALTIKADOWN)"/>
<input type="hidden" name="retryNumber" value="$Encoder.attributeEscape($RETRYNUMBER)"/>
#end
\ No newline at end of file
Modified: manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html (original)
+++ manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html Fri Jun 25 13:40:38 2021
@@ -37,6 +37,10 @@
<td class="value"><nobr>$Encoder.bodyEscape($RETRYINTERVAL)</nobr></td>
</tr>
<tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryIntervalTikaDown'))</nobr></td>
+ <td class="value"><nobr>$Encoder.bodyEscape($RETRYINTERVALTIKADOWN)</nobr></td>
+ </tr>
+ <tr>
<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryNumber'))</nobr></td>
<td class="value"><nobr>$Encoder.bodyEscape($RETRYNUMBER)</nobr></td>
</tr>