You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2015/05/29 12:03:38 UTC
svn commit: r1682410 - in /manifoldcf/trunk: ./
connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/
connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/
connectors/solr/connector/sr...
Author: kwright
Date: Fri May 29 10:03:38 2015
New Revision: 1682410
URL: http://svn.apache.org/r1682410
Log:
Fix for CONNECTORS-1204.
Modified:
manifoldcf/trunk/ (props changed)
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java
manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConfig.java
manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_en_US.properties
manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_ja_JP.properties
manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_zh_CN.properties
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java
Propchange: manifoldcf/trunk/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri May 29 10:03:38 2015
@@ -53,6 +53,7 @@
/manifoldcf/branches/CONNECTORS-1177:1670213-1670613
/manifoldcf/branches/CONNECTORS-120:1406712-1407974,1407982-1411043,1411049-1416451
/manifoldcf/branches/CONNECTORS-120-1:1416450-1417056
+/manifoldcf/branches/CONNECTORS-1204:1682204-1682409
/manifoldcf/branches/CONNECTORS-13:1525862-1527182,1539324-1541634
/manifoldcf/branches/CONNECTORS-470:1349741-1360750,1360808
/manifoldcf/branches/CONNECTORS-474:1349741-1353803
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1682410&r1=1682409&r2=1682410&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Fri May 29 10:03:38 2015
@@ -3,6 +3,10 @@ $Id$
======================= 2.2-dev =====================
+CONNECTORS-1204: Add ability to post original binary length of document
+to Solr from JCIFS connector. Also improve efficiency of JCIFS connector.
+(Karl Wright)
+
CONNECTORS-1193: Add ability to discard web content based on a
set of regular expressions.
(Arcadius Ahouansou)
Modified: manifoldcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java?rev=1682410&r1=1682409&r2=1682410&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java (original)
+++ manifoldcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java Fri May 29 10:03:38 2015
@@ -52,6 +52,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InterruptedIOException;
import java.net.MalformedURLException;
+import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
@@ -472,15 +473,11 @@ public class SharedDriveConnector extend
}
}
}
- catch (java.net.SocketTimeoutException e)
+ catch (MalformedURLException e)
{
- throw new ManifoldCFException("Couldn't map to canonical path: "+e.getMessage(),e);
+ throw new ManifoldCFException("Could not get a canonical path: "+e.getMessage(),e);
}
- catch (InterruptedIOException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (IOException e)
+ catch (UnknownHostException e)
{
throw new ManifoldCFException("Could not get a canonical path: "+e.getMessage(),e);
}
@@ -571,116 +568,142 @@ public class SharedDriveConnector extend
String[] documentDeny = null;
boolean documentSecurityOn = false;
+ // Common info we really need to fetch only once
+ long fileLength = 0L;
+ long lastModified = 0L;
+ boolean fileExists = false;
+ boolean fileIsDirectory = false;
+
try
{
file = new SmbFile(documentIdentifier,pa);
+ fileExists = fileExists(file);
// File has to exist AND have a non-null canonical path to be readable. If the canonical path is
// null, it means that the windows permissions are not right and directory/file is not readable!!!
String newPath = getFileCanonicalPath(file);
// We MUST check the specification here, otherwise a recrawl may not delete what it's supposed to!
- if (fileExists(file) && newPath != null && checkInclude(file,newPath,spec,activities))
+ if (fileExists && newPath != null)
{
- if (fileIsDirectory(file))
+ fileIsDirectory = fileIsDirectory(file);
+ if (checkInclude(fileIsDirectory,newPath,spec))
{
- // Hmm, this is not correct; version string should be empty for windows directories, since
- // they are not hierarchical in modified date propagation.
- // It's a directory. The version ID will be the
- // last modified date.
- //long lastModified = fileLastModified(file);
- //versionString = new Long(lastModified).toString();
- versionString = "";
-
- }
- else
- {
- // It's a file of acceptable length.
- // The ability to get ACLs, list files, and an inputstream under DFS all work now.
- // The SmbFile for parentFolder acls.
- SmbFile parentFolder = new SmbFile(file.getParent(),pa);
-
- // Compute the security information
- String[] modelArray = new String[0];
-
- List<String> allowList = new ArrayList<String>();
- List<String> denyList = new ArrayList<String>();
- shareSecurityOn = getFileShareSecuritySet(allowList, denyList, file, shareAcls);
- shareAllow = allowList.toArray(modelArray);
- shareDeny = denyList.toArray(modelArray);
-
- allowList.clear();
- denyList.clear();
- parentSecurityOn = getFileSecuritySet(allowList, denyList, parentFolder, parentFolderAcls);
- parentAllow = allowList.toArray(modelArray);
- parentDeny = denyList.toArray(modelArray);
-
- allowList.clear();
- denyList.clear();
- documentSecurityOn = getFileSecuritySet(allowList, denyList, file, acls);
- documentAllow = allowList.toArray(modelArray);
- documentDeny = denyList.toArray(modelArray);
-
- // The format of this string changed on 11/8/2006 to be comformant with the standard way
- // acls and metadata descriptions are being stuffed into the version string across connectors.
-
- // The format of this string changed again on 7/3/2009 to permit the ingestion uri/iri to be included.
- // This was to support filename/uri mapping functionality.
-
- StringBuilder sb = new StringBuilder();
-
- addSecuritySet(sb,shareSecurityOn,shareAllow,shareDeny);
- addSecuritySet(sb,parentSecurityOn,parentAllow,parentDeny);
- addSecuritySet(sb,documentSecurityOn,documentAllow,documentDeny);
+ if (fileIsDirectory)
+ {
+ // Hmm, this is not correct; version string should be empty for windows directories, since
+ // they are not hierarchical in modified date propagation.
+ // It's a directory. The version ID will be the
+ // last modified date.
+ //long lastModified = fileLastModified(file);
+ //versionString = new Long(lastModified).toString();
+ versionString = "";
- // Include the path attribute name and value in the parseable area.
- if (pathAttributeName != null)
+ }
+ else
{
- sb.append('+');
- pack(sb,pathAttributeName,'+');
- // Calculate path string; we'll include that wholesale in the version
- pathAttributeValue = documentIdentifier;
- // 3/13/2008
- // In looking at what comes into the path metadata attribute by default, and cogitating a bit, I've concluded that
- // the smb:// and the server/domain name at the start of the path are just plain old noise, and should be stripped.
- // This changes a behavior that has been around for a while, so there is a risk, but a quick back-and-forth with the
- // SE's leads me to believe that this is safe.
+ fileLength = fileLength(file);
+ if (checkIncludeFile(fileLength,newPath,spec,activities))
+ {
+ // It's a file of acceptable length.
+ // The ability to get ACLs, list files, and an inputstream under DFS all work now.
+ // The SmbFile for parentFolder acls.
+ SmbFile parentFolder = new SmbFile(file.getParent(),pa);
+
+ // Compute the security information
+ String[] modelArray = new String[0];
+
+ List<String> allowList = new ArrayList<String>();
+ List<String> denyList = new ArrayList<String>();
+ shareSecurityOn = getFileShareSecuritySet(allowList, denyList, file, shareAcls);
+ shareAllow = allowList.toArray(modelArray);
+ shareDeny = denyList.toArray(modelArray);
+
+ allowList.clear();
+ denyList.clear();
+ parentSecurityOn = getFileSecuritySet(allowList, denyList, parentFolder, parentFolderAcls);
+ parentAllow = allowList.toArray(modelArray);
+ parentDeny = denyList.toArray(modelArray);
+
+ allowList.clear();
+ denyList.clear();
+ documentSecurityOn = getFileSecuritySet(allowList, denyList, file, acls);
+ documentAllow = allowList.toArray(modelArray);
+ documentDeny = denyList.toArray(modelArray);
+
+ // This is stuff we need for computing the version string AND for indexing
+ lastModified = fileLastModified(file);
+
+ // The format of this string changed on 11/8/2006 to be comformant with the standard way
+ // acls and metadata descriptions are being stuffed into the version string across connectors.
+
+ // The format of this string changed again on 7/3/2009 to permit the ingestion uri/iri to be included.
+ // This was to support filename/uri mapping functionality.
+
+ StringBuilder sb = new StringBuilder();
+
+ addSecuritySet(sb,shareSecurityOn,shareAllow,shareDeny);
+ addSecuritySet(sb,parentSecurityOn,parentAllow,parentDeny);
+ addSecuritySet(sb,documentSecurityOn,documentAllow,documentDeny);
+
+ // Include the path attribute name and value in the parseable area.
+ if (pathAttributeName != null)
+ {
+ sb.append('+');
+ pack(sb,pathAttributeName,'+');
+ // Calculate path string; we'll include that wholesale in the version
+ pathAttributeValue = documentIdentifier;
+ // 3/13/2008
+ // In looking at what comes into the path metadata attribute by default, and cogitating a bit, I've concluded that
+ // the smb:// and the server/domain name at the start of the path are just plain old noise, and should be stripped.
+ // This changes a behavior that has been around for a while, so there is a risk, but a quick back-and-forth with the
+ // SE's leads me to believe that this is safe.
- if (pathAttributeValue.startsWith("smb://"))
+ if (pathAttributeValue.startsWith("smb://"))
+ {
+ int index = pathAttributeValue.indexOf("/","smb://".length());
+ if (index == -1)
+ index = pathAttributeValue.length();
+ pathAttributeValue = pathAttributeValue.substring(index);
+ }
+ // Now, translate
+ pathAttributeValue = matchMap.translate(pathAttributeValue);
+ pack(sb,pathAttributeValue,'+');
+ }
+ else
+ sb.append('-');
+
+ // Calculate the ingestion IRI/URI, and include that in the parseable area.
+ ingestionURI = convertToURI(documentIdentifier,fileMap,uriMap);
+ pack(sb,ingestionURI,'+');
+
+ // The stuff from here on down is non-parseable.
+ sb.append(new Long(lastModified).toString()).append(":")
+ .append(new Long(fileLength).toString());
+ // Also include the specification-based answer for the question of whether fingerprinting is
+ // going to be done. Although we may not consider this to truly be "version" information, the
+ // specification does affect whether anything is ingested or not, so it really is. The alternative
+ // is to fingerprint right here, in the version part of the world, but that's got a performance
+ // downside, because it means that we'd have to suck over pretty much everything just to determine
+ // what we wanted to ingest.
+ boolean ifIndexable = wouldFileBeIncluded(newPath,spec,true);
+ boolean ifNotIndexable = wouldFileBeIncluded(newPath,spec,false);
+ if (ifIndexable == ifNotIndexable)
+ sb.append("I");
+ else
+ sb.append(ifIndexable?"Y":"N");
+ versionString = sb.toString();
+ }
+ else
{
- int index = pathAttributeValue.indexOf("/","smb://".length());
- if (index == -1)
- index = pathAttributeValue.length();
- pathAttributeValue = pathAttributeValue.substring(index);
+ activities.deleteDocument(documentIdentifier);
+ continue;
}
- // Now, translate
- pathAttributeValue = matchMap.translate(pathAttributeValue);
- pack(sb,pathAttributeValue,'+');
}
- else
- sb.append('-');
-
- // Calculate the ingestion IRI/URI, and include that in the parseable area.
- ingestionURI = convertToURI(documentIdentifier,fileMap,uriMap);
- pack(sb,ingestionURI,'+');
-
- // The stuff from here on down is non-parseable.
- // Get the file's modified date.
- long lastModified = fileLastModified(file);
- sb.append(new Long(lastModified).toString()).append(":")
- .append(new Long(fileLength(file)).toString());
- // Also include the specification-based answer for the question of whether fingerprinting is
- // going to be done. Although we may not consider this to truly be "version" information, the
- // specification does affect whether anything is ingested or not, so it really is. The alternative
- // is to fingerprint right here, in the version part of the world, but that's got a performance
- // downside, because it means that we'd have to suck over pretty much everything just to determine
- // what we wanted to ingest.
- boolean ifIndexable = wouldFileBeIncluded(newPath,spec,true);
- boolean ifNotIndexable = wouldFileBeIncluded(newPath,spec,false);
- if (ifIndexable == ifNotIndexable)
- sb.append("I");
- else
- sb.append(ifIndexable?"Y":"N");
- versionString = sb.toString();
+ }
+ else
+ {
+ activities.deleteDocument(documentIdentifier);
+ continue;
}
}
else
@@ -742,9 +765,9 @@ public class SharedDriveConnector extend
try
{
- if (fileExists(file))
+ if (fileExists)
{
- if (fileIsDirectory(file))
+ if (fileIsDirectory)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("JCIFS: '"+documentIdentifier+"' is a directory");
@@ -781,8 +804,9 @@ public class SharedDriveConnector extend
{
String uri = ingestionURI;
String fileNameString = file.getName();
- Date lastModifiedDate = new Date(file.lastModified());
+ Date lastModifiedDate = new Date(lastModified);
Date creationDate = new Date(file.createTime());
+ Long originalLength = new Long(fileLength);
String contentType = mapExtensionToMimeType(fileNameString);
if (!activities.checkURLIndexable(uri))
@@ -821,6 +845,8 @@ public class SharedDriveConnector extend
String shareName = file.getShare();
rd.setFileName(fileNameString);
+ rd.setOriginalSize(originalLength);
+
if (contentType != null)
rd.setMimeType(contentType);
rd.addField("lastModified", lastModifiedDate.toString());
@@ -882,7 +908,7 @@ public class SharedDriveConnector extend
if (checkIngest(tempFile, newPath, spec, activities))
{
- long fileLength = tempFile.length();
+ // Not needed; fetched earlier: long fileLength = tempFile.length();
if (!activities.checkLengthIndexable(fileLength))
{
Logging.connectors.debug("JCIFS: Skipping file because output connector cannot accept length ("+fileLength+")");
@@ -902,7 +928,7 @@ public class SharedDriveConnector extend
activities.ingestDocumentWithException(documentIdentifier, versionString, uri, rd);
errorCode = "OK";
- fileLengthLong = new Long(tempFile.length());
+ fileLengthLong = new Long(fileLength);
}
finally
{
@@ -931,7 +957,7 @@ public class SharedDriveConnector extend
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("JCIFS: Local file data not needed for '"+documentIdentifier+"'");
- long fileLength = fileLength(file);
+ // Not needed; fetched earlier: long fileLength = fileLength(file);
if (!activities.checkLengthIndexable(fileLength))
{
Logging.connectors.debug("JCIFS: Skipping file because output connector cannot accept length ("+fileLength+")");
@@ -952,7 +978,7 @@ public class SharedDriveConnector extend
activities.ingestDocumentWithException(documentIdentifier, versionString, uri, rd);
errorCode = "OK";
- fileLengthLong = new Long(fileLength(file));
+ fileLengthLong = new Long(fileLength);
}
finally
{
@@ -1380,14 +1406,47 @@ public class SharedDriveConnector extend
// Protected methods
+ /** Check if a file's stats are OK for inclusion.
+ */
+ protected static boolean checkIncludeFile(long fileLength, String fileName, Specification documentSpecification, IFingerprintActivity activities)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ // If it's a file, make sure the maximum length is not exceeded
+ if (!activities.checkLengthIndexable(fileLength) ||
+ !activities.checkMimeTypeIndexable(mapExtensionToMimeType(fileName)))
+ return false;
+ long maxFileLength = Long.MAX_VALUE;
+ for (int i = 0; i < documentSpecification.getChildCount(); i++)
+ {
+ SpecificationNode sn = documentSpecification.getChild(i++);
+ if (sn.getType().equals(NODE_MAXLENGTH))
+ {
+ try
+ {
+ String value = sn.getAttributeValue(ATTRIBUTE_VALUE);
+ if (value != null && value.length() > 0)
+ maxFileLength = new Long(value).longValue();
+ }
+ catch (NumberFormatException e)
+ {
+ throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
+ }
+ }
+ }
+ if (fileLength > maxFileLength)
+ return false;
+ return true;
+ }
+
+
/** Check if a file or directory should be included, given a document specification.
- *@param file is the file object.
+ *@param isDirectory is true if the file is a directory.
*@param fileName is the canonical file name.
*@param documentSpecification is the specification.
*@return true if it should be included.
*/
- protected boolean checkInclude(SmbFile file, String fileName, Specification documentSpecification, IFingerprintActivity activities)
- throws ManifoldCFException, ServiceInterruption
+ protected boolean checkInclude(boolean isDirectory, String fileName, Specification documentSpecification)
+ throws ManifoldCFException
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("JCIFS: In checkInclude for '"+fileName+"'");
@@ -1400,7 +1459,6 @@ public class SharedDriveConnector extend
{
String pathPart;
String filePart;
- boolean isDirectory = fileIsDirectory(file);
if (isDirectory)
{
@@ -1423,36 +1481,7 @@ public class SharedDriveConnector extend
}
}
- // If it's a file, make sure the maximum length is not exceeded
int i;
- if (!isDirectory)
- {
- long fileLength = fileLength(file);
- if (!activities.checkLengthIndexable(fileLength) ||
- !activities.checkMimeTypeIndexable(mapExtensionToMimeType(fileName)))
- return false;
- long maxFileLength = Long.MAX_VALUE;
- i = 0;
- while (i < documentSpecification.getChildCount())
- {
- SpecificationNode sn = documentSpecification.getChild(i++);
- if (sn.getType().equals(NODE_MAXLENGTH))
- {
- try
- {
- String value = sn.getAttributeValue(ATTRIBUTE_VALUE);
- if (value != null && value.length() > 0)
- maxFileLength = new Long(value).longValue();
- }
- catch (NumberFormatException e)
- {
- throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
- }
- }
- }
- if (fileLength > maxFileLength)
- return false;
- }
// Scan until we match a startpoint
i = 0;
@@ -1564,25 +1593,11 @@ public class SharedDriveConnector extend
}
return false;
}
- catch (jcifs.smb.SmbAuthException e)
- {
- Logging.connectors.warn("JCIFS: Authorization exception checking inclusion for "+fileName+" - skipping");
- return false;
- }
- catch (SmbException se)
- {
- processSMBException(se, fileName, "checking inclusion", "canonical path mapping");
- return false;
- }
- catch (java.net.SocketTimeoutException e)
+ catch (MalformedURLException e)
{
throw new ManifoldCFException("Couldn't map to canonical path: "+e.getMessage(),e);
}
- catch (InterruptedIOException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (IOException e)
+ catch (UnknownHostException e)
{
throw new ManifoldCFException("Couldn't map to canonical path: "+e.getMessage(),e);
}
@@ -1712,15 +1727,11 @@ public class SharedDriveConnector extend
}
return false;
}
- catch (java.net.SocketTimeoutException e)
+ catch (MalformedURLException e)
{
throw new ManifoldCFException("Couldn't map to canonical path: "+e.getMessage(),e);
}
- catch (InterruptedIOException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (IOException e)
+ catch (UnknownHostException e)
{
throw new ManifoldCFException("Couldn't map to canonical path: "+e.getMessage(),e);
}
@@ -1860,25 +1871,11 @@ public class SharedDriveConnector extend
}
return false;
}
- catch (jcifs.smb.SmbAuthException e)
- {
- Logging.connectors.warn("JCIFS: Authorization exception checking ingestion for "+fileName+" - skipping");
- return false;
- }
- catch (SmbException se)
- {
- processSMBException(se, fileName, "checking ingestion", "reading document");
- return false;
- }
- catch (java.net.SocketTimeoutException e)
+ catch (MalformedURLException e)
{
throw new ManifoldCFException("Couldn't map to canonical path: "+e.getMessage(),e);
}
- catch (InterruptedIOException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (IOException e)
+ catch (UnknownHostException e)
{
throw new ManifoldCFException("Couldn't map to canonical path: "+e.getMessage(),e);
}
@@ -2107,7 +2104,7 @@ public class SharedDriveConnector extend
/** Map a "path" specification to a full identifier.
*/
protected String mapToIdentifier(String path)
- throws IOException
+ throws MalformedURLException, UnknownHostException
{
String smburi = smbconnectionPath;
String uri = smburi + path + "/";
@@ -4823,11 +4820,30 @@ public class SharedDriveConnector extend
// documents that we will immediately turn around and remove. However, if this
// check was not here, everything should still function, provided the getDocumentVersions()
// method does the right thing.
- if (checkInclude(f, newPath, spec, activities))
+ boolean fileIsDirectory = fileIsDirectory(f);
+ if (checkInclude(fileIsDirectory, newPath, spec))
{
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("JCIFS: Recorded path is '" + newPath + "' and is included.");
- activities.addDocumentReference(newPath);
+ if (fileIsDirectory)
+ {
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("JCIFS: Recorded path is '" + newPath + "' and is included.");
+ activities.addDocumentReference(newPath);
+ }
+ else
+ {
+ long fileLength = fileLength(f);
+ if (checkIncludeFile(fileLength, newPath, spec, activities))
+ {
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("JCIFS: Recorded path is '" + newPath + "' and is included.");
+ activities.addDocumentReference(newPath);
+ }
+ else
+ {
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("JCIFS: Recorded path '"+newPath+"' is excluded!");
+ }
+ }
}
else
{
Modified: manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java?rev=1682410&r1=1682409&r2=1682410&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java (original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java Fri May 29 10:03:38 2015
@@ -100,6 +100,7 @@ public class HttpPoster
private final String allowAttributeName;
private final String denyAttributeName;
private final String idAttributeName;
+ private final String originalSizeAttributeName;
private final String modifiedDateAttributeName;
private final String createdDateAttributeName;
private final String indexedDateAttributeName;
@@ -131,7 +132,7 @@ public class HttpPoster
int zkClientTimeout, int zkConnectTimeout,
String updatePath, String removePath, String statusPath,
String allowAttributeName, String denyAttributeName, String idAttributeName,
- String modifiedDateAttributeName, String createdDateAttributeName, String indexedDateAttributeName,
+ String originalSizeAttributeName, String modifiedDateAttributeName, String createdDateAttributeName, String indexedDateAttributeName,
String fileNameAttributeName, String mimeTypeAttributeName, String contentAttributeName,
Long maxDocumentLength,
String commitWithin, boolean useExtractUpdateHandler)
@@ -147,6 +148,7 @@ public class HttpPoster
this.allowAttributeName = allowAttributeName;
this.denyAttributeName = denyAttributeName;
this.idAttributeName = idAttributeName;
+ this.originalSizeAttributeName = originalSizeAttributeName;
this.modifiedDateAttributeName = modifiedDateAttributeName;
this.createdDateAttributeName = createdDateAttributeName;
this.indexedDateAttributeName = indexedDateAttributeName;
@@ -179,7 +181,7 @@ public class HttpPoster
String updatePath, String removePath, String statusPath,
String realm, String userID, String password,
String allowAttributeName, String denyAttributeName, String idAttributeName,
- String modifiedDateAttributeName, String createdDateAttributeName, String indexedDateAttributeName,
+ String originalSizeAttributeName, String modifiedDateAttributeName, String createdDateAttributeName, String indexedDateAttributeName,
String fileNameAttributeName, String mimeTypeAttributeName, String contentAttributeName,
IKeystoreManager keystoreManager, Long maxDocumentLength,
String commitWithin, boolean useExtractUpdateHandler)
@@ -195,6 +197,7 @@ public class HttpPoster
this.allowAttributeName = allowAttributeName;
this.denyAttributeName = denyAttributeName;
this.idAttributeName = idAttributeName;
+ this.originalSizeAttributeName = originalSizeAttributeName;
this.modifiedDateAttributeName = modifiedDateAttributeName;
this.createdDateAttributeName = createdDateAttributeName;
this.indexedDateAttributeName = indexedDateAttributeName;
@@ -995,6 +998,14 @@ public class HttpPoster
}
// Write the rest of the attributes
+ if ( originalSizeAttributeName != null )
+ {
+ Long size = document.getOriginalSize();
+ if ( size != null )
+ {
+ outputDoc.addField( originalSizeAttributeName, size.toString() );
+ }
+ }
if ( modifiedDateAttributeName != null )
{
Date date = document.getModifiedDate();
@@ -1067,6 +1078,13 @@ public class HttpPoster
// Write the id field
writeField(out,LITERAL+idAttributeName,documentURI);
// Write the rest of the attributes
+ if (originalSizeAttributeName != null)
+ {
+ Long size = document.getOriginalSize();
+ if (size != null)
+ // Write value
+ writeField(out,LITERAL+modifiedDateAttributeName,size.toString());
+ }
if (modifiedDateAttributeName != null)
{
Date date = document.getModifiedDate();
Modified: manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConfig.java?rev=1682410&r1=1682409&r2=1682410&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConfig.java (original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConfig.java Fri May 29 10:03:38 2015
@@ -83,6 +83,8 @@ public class SolrConfig
public static final String PARAM_STATUSPATH = "Server status handler";
/** Id field */
public static final String PARAM_IDFIELD = "Solr id field name";
+ /** Optional original size field */
+ public static final String PARAM_ORIGINALSIZEFIELD = "Solr original size field name";
/** Optional modified date field */
public static final String PARAM_MODIFIEDDATEFIELD = "Solr modified date field name";
/** Optional created date field */
Modified: manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java?rev=1682410&r1=1682409&r2=1682410&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java (original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java Fri May 29 10:03:38 2015
@@ -85,6 +85,7 @@ public class SolrConnector extends org.a
// Attributes going into Solr
protected String idAttributeName = null;
+ protected String originalSizeAttributeName = null;
protected String modifiedDateAttributeName = null;
protected String createdDateAttributeName = null;
protected String indexedDateAttributeName = null;
@@ -181,6 +182,7 @@ public class SolrConnector extends org.a
excludedMimeTypesString = null;
excludedMimeTypes = null;
idAttributeName = null;
+ originalSizeAttributeName = null;
modifiedDateAttributeName = null;
createdDateAttributeName = null;
indexedDateAttributeName = null;
@@ -214,6 +216,10 @@ public class SolrConnector extends org.a
if (idAttributeName == null || idAttributeName.length() == 0)
idAttributeName = "id";
+ originalSizeAttributeName = params.getParameter(SolrConfig.PARAM_ORIGINALSIZEFIELD);
+ if (originalSizeAttributeName == null || originalSizeAttributeName.length() == 0)
+ originalSizeAttributeName = null;
+
modifiedDateAttributeName = params.getParameter(SolrConfig.PARAM_MODIFIEDDATEFIELD);
if (modifiedDateAttributeName == null || modifiedDateAttributeName.length() == 0)
modifiedDateAttributeName = null;
@@ -355,7 +361,7 @@ public class SolrConnector extends org.a
connectTimeout,socketTimeout,
updatePath,removePath,statusPath,realm,userID,password,
allowAttributeName,denyAttributeName,idAttributeName,
- modifiedDateAttributeName,createdDateAttributeName,indexedDateAttributeName,
+ originalSizeAttributeName,modifiedDateAttributeName,createdDateAttributeName,indexedDateAttributeName,
fileNameAttributeName,mimeTypeAttributeName,contentAttributeName,
keystoreManager,maxDocumentLength,commitWithin,useExtractUpdateHandler);
@@ -411,7 +417,7 @@ public class SolrConnector extends org.a
zkClientTimeout,zkConnectTimeout,
updatePath,removePath,statusPath,
allowAttributeName,denyAttributeName,idAttributeName,
- modifiedDateAttributeName,createdDateAttributeName,indexedDateAttributeName,
+ originalSizeAttributeName,modifiedDateAttributeName,createdDateAttributeName,indexedDateAttributeName,
fileNameAttributeName,mimeTypeAttributeName,contentAttributeName,
maxDocumentLength,commitWithin,useExtractUpdateHandler);
@@ -1009,6 +1015,10 @@ public class SolrConnector extends org.a
String idField = parameters.getParameter(SolrConfig.PARAM_IDFIELD);
if (idField == null)
idField = "id";
+
+ String originalSizeField = parameters.getParameter(SolrConfig.PARAM_ORIGINALSIZEFIELD);
+ if (originalSizeField == null)
+ originalSizeField = "";
String modifiedDateField = parameters.getParameter(SolrConfig.PARAM_MODIFIEDDATEFIELD);
if (modifiedDateField == null)
@@ -1463,6 +1473,12 @@ public class SolrConnector extends org.a
" </td>\n"+
" </tr>\n"+
" <tr>\n"+
+" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"SolrConnector.OriginalSizeFieldName") + "</nobr></td>\n"+
+" <td class=\"value\">\n"+
+" <input name=\"originalsizefield\" type=\"text\" size=\"32\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(originalSizeField)+"\"/>\n"+
+" </td>\n"+
+" </tr>\n"+
+" <tr>\n"+
" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"SolrConnector.ModifiedDateFieldName") + "</nobr></td>\n"+
" <td class=\"value\">\n"+
" <input name=\"modifieddatefield\" type=\"text\" size=\"32\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(modifiedDateField)+"\"/>\n"+
@@ -1526,6 +1542,7 @@ public class SolrConnector extends org.a
{
out.print(
"<input type=\"hidden\" name=\"idfield\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(idField)+"\"/>\n"+
+"<input type=\"hidden\" name=\"originalsizefield\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(originalSizeField)+"\"/>\n"+
"<input type=\"hidden\" name=\"modifieddatefield\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(modifiedDateField)+"\"/>\n"+
"<input type=\"hidden\" name=\"createddatefield\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(createdDateField)+"\"/>\n"+
"<input type=\"hidden\" name=\"indexeddatefield\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(indexedDateField)+"\"/>\n"+
@@ -1814,6 +1831,10 @@ public class SolrConnector extends org.a
if (idField != null)
parameters.setParameter(SolrConfig.PARAM_IDFIELD,idField);
+ String originalSizeField = variableContext.getParameter("originalsizefield");
+ if (originalSizeField != null)
+ parameters.setParameter(SolrConfig.PARAM_ORIGINALSIZEFIELD,originalSizeField);
+
String modifiedDateField = variableContext.getParameter("modifieddatefield");
if (modifiedDateField != null)
parameters.setParameter(SolrConfig.PARAM_MODIFIEDDATEFIELD,modifiedDateField);
@@ -2254,6 +2275,14 @@ public class SolrConnector extends org.a
}
else
sb.append('-');
+
+ if (originalSizeAttributeName != null)
+ {
+ sb.append('+');
+ pack(sb,originalSizeAttributeName,'+');
+ }
+ else
+ sb.append('-');
if (modifiedDateAttributeName != null)
{
Modified: manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_en_US.properties?rev=1682410&r1=1682409&r2=1682410&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_en_US.properties (original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_en_US.properties Fri May 29 10:03:38 2015
@@ -53,6 +53,7 @@ SolrConnector.Add=Add
SolrConnector.AddZookeeperHost=Add ZooKeeper host
SolrConnector.Certificate=Certificate:
SolrConnector.IDFieldName=ID field name:
+SolrConnector.OriginalSizeFieldName=Original size field name:
SolrConnector.ModifiedDateFieldName=Modified date field name:
SolrConnector.CreatedDateFieldName=Created date field name:
SolrConnector.IndexedDateFieldName=Indexed date field name:
Modified: manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_ja_JP.properties?rev=1682410&r1=1682409&r2=1682410&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_ja_JP.properties (original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_ja_JP.properties Fri May 29 10:03:38 2015
@@ -53,6 +53,7 @@ SolrConnector.Add=追å
SolrConnector.AddZookeeperHost=ZooKeeperãã¹ãã追å
SolrConnector.Certificate=証æ証ï¼
SolrConnector.IDFieldName=IDãã£ã¼ã«ãåï¼
+SolrConnector.OriginalSizeFieldName=Original size field name:
SolrConnector.ModifiedDateFieldName=æ´æ°æ¥ä»ãã£ã¼ã«ãåï¼
SolrConnector.CreatedDateFieldName=ä½ææ¥ä»ãã£ã¼ã«ãåï¼
SolrConnector.IndexedDateFieldName=Indexed date field name:
Modified: manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_zh_CN.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_zh_CN.properties?rev=1682410&r1=1682409&r2=1682410&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_zh_CN.properties (original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_zh_CN.properties Fri May 29 10:03:38 2015
@@ -53,6 +53,7 @@ SolrConnector.Add=æ·»å
SolrConnector.AddZookeeperHost=æ·»å ZooKeeper主æº
SolrConnector.Certificate=è¯ä¹¦:
SolrConnector.IDFieldName=IDå段å:
+SolrConnector.OriginalSizeFieldName=Original size field name:
SolrConnector.ModifiedDateFieldName=æ´æ°æ¥æå段å:
SolrConnector.CreatedDateFieldName=çææ¥æå段å:
SolrConnector.IndexedDateFieldName=ç´¢å¼åçæ¥æå段å:
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java?rev=1682410&r1=1682409&r2=1682410&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java Fri May 29 10:03:38 2015
@@ -56,6 +56,7 @@ public class RepositoryDocument
protected Date createdDate = null;
protected Date modifiedDate = null;
protected Date indexingDate = null;
+ protected Long originalSize = null;
/** Constructor.
*/
@@ -79,6 +80,7 @@ public class RepositoryDocument
rval.createdDate = createdDate;
rval.modifiedDate = modifiedDate;
rval.indexingDate = indexingDate;
+ rval.originalSize = originalSize;
for (String key : fields.keySet())
{
rval.fields.put(key,fields.get(key));
@@ -112,6 +114,23 @@ public class RepositoryDocument
readerFields.clear();
}
+ /** Set the document's original (repository) size. Use null to indicate that the size is
+ * unknown.
+ *@param size is the size.
+ */
+ public void setOriginalSize(Long size)
+ {
+ originalSize = size;
+ }
+
+ /** Get the document's original size.
+ *@return the original repository document size, or null if unknown.
+ */
+ public Long getOriginalSize()
+ {
+ return originalSize;
+ }
+
/** Set the document's created date. Use null to indicate that the date is unknown.
*@param date is the date.
*/