You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/06/25 15:56:09 UTC
svn commit: r1605406 -
/manifoldcf/branches/CONNECTORS-981/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
Author: kwright
Date: Wed Jun 25 13:56:08 2014
New Revision: 1605406
URL: http://svn.apache.org/r1605406
Log:
If extracting update handler is off, only accept character content types
Modified:
manifoldcf/branches/CONNECTORS-981/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
Modified: manifoldcf/branches/CONNECTORS-981/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-981/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java?rev=1605406&r1=1605405&r2=1605406&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-981/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-981/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java Wed Jun 25 13:56:08 2014
@@ -25,6 +25,8 @@ import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
+import java.util.Set;
+import java.util.HashSet;
import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
import org.apache.manifoldcf.agents.interfaces.IOutputNotifyActivity;
@@ -78,6 +80,8 @@ public class SolrConnector extends org.a
protected String excludedMimeTypesString = null;
/** Excluded mime types */
protected Map<String,String> excludedMimeTypes = null;
+ /** Use extractiing update handler? */
+ protected boolean useExtractUpdateHandler = true;
/** Whether or not to commit */
protected boolean doCommits = false;
@@ -162,6 +166,7 @@ public class SolrConnector extends org.a
includedMimeTypes = null;
excludedMimeTypesString = null;
excludedMimeTypes = null;
+ useExtractUpdateHandler = true;
super.disconnect();
}
@@ -208,7 +213,7 @@ public class SolrConnector extends org.a
mimeTypeAttributeName = null;
String contentAttributeName = "content"; // ??? -- should be settable
- boolean useExtractUpdateHandler = true; // ???
+ useExtractUpdateHandler = true; // ???
String commits = params.getParameter(SolrConfig.PARAM_COMMITS);
if (commits == null || commits.length() == 0)
@@ -467,6 +472,15 @@ public class SolrConnector extends org.a
return sp.toPackedString();
}
+ private final static Set<String> acceptableMimeTypes = new HashSet<String>();
+ static
+ {
+ acceptableMimeTypes.add("text/plain;charset=utf-8");
+ acceptableMimeTypes.add("text/plain;charset=ascii");
+ acceptableMimeTypes.add("text/plain;charset=us-ascii");
+ acceptableMimeTypes.add("text/plain");
+ }
+
/** Detect if a mime type is indexable or not. This method is used by participating repository connectors to pre-filter the number of
* unusable documents that will be passed to this output connector.
*@param outputDescription is the document's output version.
@@ -477,11 +491,15 @@ public class SolrConnector extends org.a
throws ManifoldCFException, ServiceInterruption
{
getSession();
- if (includedMimeTypes != null && includedMimeTypes.get(mimeType) == null)
- return false;
- if (excludedMimeTypes != null && excludedMimeTypes.get(mimeType) != null)
- return false;
- return super.checkMimeTypeIndexable(outputDescription,mimeType);
+ if (useExtractUpdateHandler)
+ {
+ if (includedMimeTypes != null && includedMimeTypes.get(mimeType) == null)
+ return false;
+ if (excludedMimeTypes != null && excludedMimeTypes.get(mimeType) != null)
+ return false;
+ return super.checkMimeTypeIndexable(outputDescription,mimeType);
+ }
+ return acceptableMimeTypes.contains(mimeType.toLowerCase(Locale.ROOT));
}
/** Pre-determine whether a document's length is indexable by this connector. This method is used by participating repository connectors