You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by mi...@apache.org on 2013/06/13 13:10:24 UTC
svn commit: r1492619 - in /manifoldcf/trunk: ./
connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/
connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/
connectors/...
Author: minoru
Date: Thu Jun 13 11:10:24 2013
New Revision: 1492619
URL: http://svn.apache.org/r1492619
Log:
CONNECTORS-710.
FileConnector should have option of outputting a full http url based on Wget conventions, not just a file:/ url
Modified:
manifoldcf/trunk/ (props changed)
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java
manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
manifoldcf/trunk/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_en_US.properties
manifoldcf/trunk/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_ja_JP.properties
Propchange: manifoldcf/trunk/
------------------------------------------------------------------------------
Merged /manifoldcf/branches/CONNECTORS-710:r1491769-1492603
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1492619&r1=1492618&r2=1492619&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Thu Jun 13 11:10:24 2013
@@ -4,6 +4,9 @@ $Id$
======================= 1.3-dev =====================
+CONNECTORS-710: FileConnector should have option of outputting a full http url based on Wget conventions, not just a file:/ url
+(Minoru Osuka, Karl Wright)
+
CONNECTORS-667: Refix NPE problem with Livelink authority. This time
we were seeing it when SSL was not on.
(David Morana, Karl Wright)
Modified: manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java?rev=1492619&r1=1492618&r2=1492619&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java (original)
+++ manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java Thu Jun 13 11:10:24 2013
@@ -181,6 +181,7 @@ public class FileOutputConnector extends
if (specs.getRootPath() != null) {
path.append(specs.getRootPath());
}
+ path.append("/");
path.append(documentURItoFilePath(documentURI));
File file = new File(path.toString());
@@ -298,6 +299,7 @@ public class FileOutputConnector extends
if (specs.getRootPath() != null) {
path.append(specs.getRootPath());
}
+ path.append("/");
path.append(documentURItoFilePath(documentURI));
File file = new File(path.toString());
@@ -494,53 +496,40 @@ public class FileOutputConnector extends
uri = new URI(documentURI);
if (uri.getScheme() != null) {
- if (!path.toString().endsWith("/")) {
- path.append("/");
- }
path.append(uri.getScheme());
+ path.append("/");
}
if (uri.getHost() != null) {
- if (!path.toString().endsWith("/")) {
- path.append("/");
- }
path.append(uri.getHost());
if (uri.getPort() != -1) {
- if (System.getProperty("os.name").toLowerCase().contains("windows")) {
- path.append("+");
- } else {
- path.append(":");
- }
+ path.append(":");
path.append(uri.getPort());
}
- if (uri.getPath() != null) {
- if (uri.getPath().length() == 0) {
+ if (uri.getRawPath() != null) {
+ if (uri.getRawPath().length() == 0) {
path.append("/");
- } else if (uri.getPath().equals("/")) {
- path.append(uri.getPath());
+ } else if (uri.getRawPath().equals("/")) {
+ path.append(uri.getRawPath());
} else {
- for (String name : uri.getPath().split("/")) {
+ for (String name : uri.getRawPath().split("/")) {
if (name.length() > 0) {
path.append("/");
- try {
- path.append(URLEncoder.encode(name, "UTF-8"));
- } catch(UnsupportedEncodingException e) {
- path.append(name);
- }
+ path.append(name);
}
}
}
}
+ if (uri.getRawQuery() != null) {
+ path.append("?");
+ path.append(uri.getRawQuery());
+ }
} else {
- if (uri.getSchemeSpecificPart() != null) {
- for (String name : uri.getSchemeSpecificPart().split("/")) {
+ if (uri.getRawSchemeSpecificPart() != null) {
+ for (String name : uri.getRawSchemeSpecificPart().split("/")) {
if (name.length() > 0) {
path.append("/");
- try {
- path.append(URLEncoder.encode(name, "UTF-8"));
- } catch(UnsupportedEncodingException e) {
- path.append(name);
- }
+ path.append(name);
}
}
}
Modified: manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java?rev=1492619&r1=1492618&r2=1492619&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java (original)
+++ manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java Thu Jun 13 11:10:24 2013
@@ -25,6 +25,10 @@ import org.apache.manifoldcf.crawler.sys
import org.apache.manifoldcf.core.extmimemap.ExtensionMimeMap;
import java.util.*;
import java.io.*;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URLDecoder;
+import java.net.URLEncoder;
/** This is the "repository connector" for a file system. It's a relative of the share crawler, and should have
* comparable basic functionality, with the exception of the ability to use ActiveDirectory and look at other shares.
@@ -103,6 +107,61 @@ public class FileConnector extends org.a
/** Convert a document identifier to a URI. The URI is the URI that will be the unique key from
* the search index, and will be presented to the user as part of the search results.
+ *@param filePath is the document filePath.
+ *@param repositoryPath is the document repositoryPath.
+ *@return the document uri.
+ */
+ protected String convertToURI(String documentIdentifier, String[] repositoryPaths)
+ throws ManifoldCFException
+ {
+ //
+ // Note well: This MUST be a legal URI!!!
+ try
+ {
+ String path = new File(documentIdentifier).getAbsolutePath();
+ for (String repositoryPath : repositoryPaths) {
+ if (path.startsWith(repositoryPath)) {
+ StringBuffer sb = new StringBuffer();
+ path = path.replaceFirst(repositoryPath, "");
+ if (path.startsWith("/")) {
+ path = path.replaceFirst("/", "");
+ }
+ String[] tmp = path.split("/", 3);
+ String scheme = "";
+ String host = "";
+ String other = "";
+ try {
+ scheme = tmp[0];
+ } catch (ArrayIndexOutOfBoundsException e) {
+ scheme = "http";
+ }
+ try {
+ host = tmp[1];
+ } catch (ArrayIndexOutOfBoundsException e) {
+ host = "localhost";
+ }
+ try {
+ other = "/" + tmp[2];
+ } catch (ArrayIndexOutOfBoundsException e) {
+ other = "/";
+ }
+ return new URI(scheme + "://" + host + other).toURL().toString();
+ }
+ }
+ return convertToURI(documentIdentifier);
+ }
+ catch (URISyntaxException e)
+ {
+ throw new ManifoldCFException("Bad url",e);
+ }
+ catch (IOException e)
+ {
+ throw new ManifoldCFException("Bad url",e);
+ }
+ }
+
+ /** Convert a document identifier to a URI. The URI is the URI that will be the unique key from
+ * the search index, and will be presented to the user as part of the search results.
*@param documentIdentifier is the document identifier.
*@return the document uri.
*/
@@ -164,8 +223,22 @@ public class FileConnector extends org.a
DocumentSpecification spec, int jobMode, boolean usesDefaultAuthority)
throws ManifoldCFException, ServiceInterruption
{
- String[] rval = new String[documentIdentifiers.length];
int i = 0;
+
+ /*
+ * get filepathtouri value
+ */
+ boolean filePathToUri = false;
+ i = 0;
+ while (i < spec.getChildCount()) {
+ SpecificationNode sn = spec.getChild(i++);
+ if (sn.getType().equals("filepathtouri")) {
+ filePathToUri = Boolean.valueOf(sn.getValue());
+ }
+ }
+
+ String[] rval = new String[documentIdentifiers.length];
+ i = 0;
while (i < rval.length)
{
File file = new File(documentIdentifiers[i]);
@@ -190,6 +263,10 @@ public class FileConnector extends org.a
// Get the file's modified date.
long lastModified = file.lastModified();
StringBuilder sb = new StringBuilder();
+ if (filePathToUri)
+ sb.append("+");
+ else
+ sb.append("-");
sb.append(new Long(lastModified).toString()).append(":").append(new Long(fileLength).toString());
rval[i] = sb.toString();
}
@@ -223,7 +300,9 @@ public class FileConnector extends org.a
int i = 0;
while (i < documentIdentifiers.length)
{
- File file = new File(documentIdentifiers[i]);
+ String version = versions[i];
+ String documentIdentifier = documentIdentifiers[i];
+ File file = new File(documentIdentifier);
if (file.exists())
{
if (file.isDirectory())
@@ -232,7 +311,6 @@ public class FileConnector extends org.a
long startTime = System.currentTimeMillis();
String errorCode = "OK";
String errorDesc = null;
- String documentIdentifier = documentIdentifiers[i];
String entityReference = documentIdentifier;
try
{
@@ -271,12 +349,35 @@ public class FileConnector extends org.a
// We still need to check based on file data.
if (checkIngest(file,spec))
{
+ int j = 0;
+
+ /*
+ * get repository paths
+ */
+ j = 0;
+ List<String> repositoryPaths = new ArrayList<String>();
+ while ( j < spec.getChildCount())
+ {
+ SpecificationNode sn = spec.getChild(j++);
+ if (sn.getType().equals("startpoint"))
+ {
+ if (sn.getAttributeValue("path").length() > 0) {
+ repositoryPaths.add(sn.getAttributeValue("path"));
+ }
+ }
+ }
+
+ /*
+ * get filepathtouri value
+ */
+ boolean filePathToUri = false;
+ if (version.length() > 0 && version.startsWith("+"))
+ filePathToUri = true;
+
long startTime = System.currentTimeMillis();
String errorCode = "OK";
String errorDesc = null;
Long fileLength = null;
- String documentIdentifier = documentIdentifiers[i];
- String version = versions[i];
String entityDescription = documentIdentifier;
try
{
@@ -293,9 +394,15 @@ public class FileConnector extends org.a
data.setFileName(fileName);
data.setMimeType(mapExtensionToMimeType(fileName));
data.setModifiedDate(new Date(file.lastModified()));
- data.addField("uri",file.toString());
- // MHL for other metadata
- activities.ingestDocument(documentIdentifier,version,convertToURI(documentIdentifier),data);
+ if (filePathToUri) {
+ data.addField("uri",convertToURI(documentIdentifier,repositoryPaths.toArray(new String[0])));
+ // MHL for other metadata
+ activities.ingestDocument(documentIdentifier,version,convertToURI(documentIdentifier,repositoryPaths.toArray(new String[0])),data);
+ } else {
+ data.addField("uri",file.toString());
+ // MHL for other metadata
+ activities.ingestDocument(documentIdentifier,version,convertToURI(documentIdentifier),data);
+ }
fileLength = new Long(fileBytes);
}
finally
@@ -422,6 +529,7 @@ public class FileConnector extends org.a
throws ManifoldCFException, IOException
{
tabsArray.add(Messages.getString(locale,"FileConnector.Paths"));
+ tabsArray.add(Messages.getString(locale,"FileConnector.FilePathToURITab"));
out.print(
"<script type=\"text/javascript\">\n"+
@@ -689,6 +797,44 @@ public class FileConnector extends org.a
"<input type=\"hidden\" name=\"pathcount\" value=\""+Integer.toString(k)+"\"/>\n"
);
}
+
+
+ /*
+ * get filepathtouri value
+ */
+ boolean filePathToUri = false;
+ i = 0;
+ while (i < ds.getChildCount()) {
+ SpecificationNode sn = ds.getChild(i++);
+ if (sn.getType().equals("filepathtouri")) {
+ filePathToUri = Boolean.valueOf(sn.getValue());
+ }
+ }
+
+ /*
+ * File path to URI tab
+ */
+ if (tabName.equals(Messages.getString(locale,"FileConnector.FilePathToURITab"))) {
+ out.print(
+"<table class=\"displaytable\">\n"+
+" <tr><td colspan=\"2\" class=\"separator\"><hr/></td></tr>\n"+
+" <tr>\n"+
+" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"FileConnector.FilePathToURI") + "</nobr></td>\n"+
+" <td class=\"value\">\n"+
+" <input name=\"filepathtouri\" type=\"checkbox\" value=\"true\"" + (filePathToUri ? "checked" : "") +"/>\n" +
+" </td>\n"+
+" </tr>\n"+
+"</table>\n"
+ );
+ } else {
+ /*
+ * File path to URI tab hiddens
+ */
+ out.print(
+"<input type=\"hidden\" name=\"filepathtouri\" value=\"" + org.apache.manifoldcf.ui.util.Encoder.attributeEscape(Boolean.toString(filePathToUri)) + "\"/>\n"
+ );
+ }
+
}
/** Process a specification post.
@@ -804,6 +950,26 @@ public class FileConnector extends org.a
ds.addChild(k,node);
}
}
+
+ /*
+ * "filepathtouri"
+ */
+ String filepathtouri = variableContext.getParameter("filepathtouri");
+ if (filepathtouri != null) {
+ SpecificationNode sn;
+ int i = 0;
+ while (i < ds.getChildCount()) {
+ if (ds.getChild(i).getType().equals("filepathtouri")) {
+ ds.removeChild(i);
+ } else {
+ i++;
+ }
+ }
+ sn = new SpecificationNode("filepathtouri");
+ sn.setValue(filepathtouri);
+ ds.addChild(ds.getChildCount(),sn);
+ }
+
return null;
}
@@ -817,6 +983,8 @@ public class FileConnector extends org.a
public void viewSpecification(IHTTPOutput out, Locale locale, DocumentSpecification ds)
throws ManifoldCFException, IOException
{
+ int i = 0;
+
out.print(
"<table class=\"displaytable\">\n"+
" <tr>\n"+
@@ -824,7 +992,7 @@ public class FileConnector extends org.a
" </tr>\n"
);
- int i = 0;
+ i = 0;
boolean seenAny = false;
while (i < ds.getChildCount())
{
@@ -867,6 +1035,29 @@ public class FileConnector extends org.a
out.print(
"</table>\n"
);
+
+ /*
+ * get filepathtouri value
+ */
+ boolean filePathToUri = false;
+ i = 0;
+ while (i < ds.getChildCount()) {
+ SpecificationNode sn = ds.getChild(i++);
+ if (sn.getType().equals("filepathtouri")) {
+ filePathToUri = Boolean.valueOf(sn.getValue());
+ }
+ }
+
+ out.print(
+"<table class=\"displaytable\">\n"+
+" <tr><td colspan=\"2\" class=\"separator\"><hr/></td></tr>\n"+
+" <tr>\n"+
+" <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"FileConnector.FilePathToURI") + "</nobr></td>\n"+
+" <td class=\"value\">" + org.apache.manifoldcf.ui.util.Encoder.bodyEscape(Boolean.toString(filePathToUri)) + "</td>\n"+
+" </tr>\n"+
+"</table>\n"
+ );
+
}
// Protected static methods
Modified: manifoldcf/trunk/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_en_US.properties?rev=1492619&r1=1492618&r2=1492619&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_en_US.properties (original)
+++ manifoldcf/trunk/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_en_US.properties Thu Jun 13 11:10:24 2013
@@ -36,3 +36,5 @@ FileConnector.DeletePath=Delete path #
FileConnector.AddNewMatchForPath=Add new match for path #
FileConnector.AddNewPath=Add new path
+FileConnector.FilePathToURITab=Convert file path to URI
+FileConnector.FilePathToURI=Convert file path to URI:
Modified: manifoldcf/trunk/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_ja_JP.properties?rev=1492619&r1=1492618&r2=1492619&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_ja_JP.properties (original)
+++ manifoldcf/trunk/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_ja_JP.properties Thu Jun 13 11:10:24 2013
@@ -35,3 +35,6 @@ FileConnector.InsertNewMatchForPath=ã
FileConnector.DeletePath=ãã¹ãåé¤ï¼ #
FileConnector.AddNewMatchForPath=ãã¹ç¨ã«æ°ãããã¿ã¼ã³ã追å ï¼ #
FileConnector.AddNewPath=æ°ãããã¹ã追å
+
+FileConnector.FilePathToURITab=ãã¡ã¤ã«ãã¹ãURIã¸å¤æãã
+FileConnector.FilePathToURI=ãã¡ã¤ã«ãã¹ãURIã¸å¤æããï¼