You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by sh...@apache.org on 2013/09/03 02:29:35 UTC
svn commit: r1519533 - in /manifoldcf/trunk: CHANGES.txt
connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Author: shinichiro
Date: Tue Sep 3 00:29:35 2013
New Revision: 1519533
URL: http://svn.apache.org/r1519533
Log:
Fix for CONNECTORS-767
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1519533&r1=1519532&r2=1519533&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Tue Sep 3 00:29:35 2013
@@ -3,6 +3,9 @@ $Id$
======================= 1.4-dev =====================
+CONNECTORS-767: Add filename support to Web connector.
+(Shinichiro Abe)
+
CONNECTORS-769: Include general_parentid in Livelink metadata.
(David Morana, Karl Wright)
Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1519533&r1=1519532&r2=1519533&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java Tue Sep 3 00:29:35 2013
@@ -1343,6 +1343,17 @@ public class WebcrawlerConnector extends
RepositoryDocument rd = new RepositoryDocument();
+ // Set the file name
+ String fileName = "";
+ try {
+ fileName = documentIdentifiertoFileName(documentIdentifier);
+ } catch (URISyntaxException e1) {
+ fileName = "";
+ }
+ if (fileName.length() > 0){
+ rd.setFileName(fileName);
+ }
+
// Set the content type
rd.setMimeType(cache.getContentType(documentIdentifier));
@@ -5672,6 +5683,44 @@ public class WebcrawlerConnector extends
return rval;
}
+ /** Convert a document identifier to filename.
+ * @param documentIdentifier
+ * @return
+ * @throws URISyntaxException
+ */
+ protected String documentIdentifiertoFileName(String documentIdentifier)
+ throws URISyntaxException
+ {
+ StringBuffer path = new StringBuffer();
+ URI uri = null;
+
+ uri = new URI(documentIdentifier);
+
+ if (uri.getRawPath() != null) {
+ if (uri.getRawPath().equals("")) {
+ path.append("");
+ } else if (uri.getRawPath().equals("/")) {
+ path.append("index.html");
+ } else if (uri.getRawPath().length() != 0) {
+ if (uri.getRawPath().endsWith("/")) {
+ path.append("index.html");
+ } else {
+ String[] names = uri.getRawPath().split("/");
+ path.append(names[names.length - 1]);
+ }
+ }
+ }
+
+ if (path.length() > 0) {
+ if (uri.getRawQuery() != null) {
+ path.append("?");
+ path.append(uri.getRawQuery());
+ }
+ }
+
+ return path.toString();
+ }
+
/** Find a redirection URI, if it exists */
protected String findRedirectionURI(String currentURI)
throws ManifoldCFException