You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by mi...@apache.org on 2013/06/11 16:09:29 UTC

svn commit: r1491800 - in /manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main: java/org/apache/manifoldcf/agents/output/filesystem/ java/org/apache/manifoldcf/crawler/connectors/filesystem/ native2ascii/org/apache/manifoldcf/cr...

Author: minoru
Date: Tue Jun 11 14:09:28 2013
New Revision: 1491800

URL: http://svn.apache.org/r1491800
Log: (empty)

Modified:
    manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java
    manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
    manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_en_US.properties
    manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_ja_JP.properties

Modified: manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java?rev=1491800&r1=1491799&r2=1491800&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java Tue Jun 11 14:09:28 2013
@@ -181,6 +181,7 @@ public class FileOutputConnector extends
       if (specs.getRootPath() != null) {
         path.append(specs.getRootPath());
       }
+      path.append("/");
       path.append(documentURItoFilePath(documentURI));
 
       File file = new File(path.toString());
@@ -298,6 +299,7 @@ public class FileOutputConnector extends
       if (specs.getRootPath() != null) {
         path.append(specs.getRootPath());
       }
+      path.append("/");
       path.append(documentURItoFilePath(documentURI));
 
       File file = new File(path.toString());
@@ -494,53 +496,40 @@ public class FileOutputConnector extends
     uri = new URI(documentURI);
 
     if (uri.getScheme() != null) {
-      if (!path.toString().endsWith("/")) {
-        path.append("/");
-      }
       path.append(uri.getScheme());
+      path.append("/");
     }
 
     if (uri.getHost() != null) {
-      if (!path.toString().endsWith("/")) {
-        path.append("/");
-      }
       path.append(uri.getHost());
       if (uri.getPort() != -1) {
-        if (System.getProperty("os.name").toLowerCase().contains("windows")) {
-          path.append("+");
-        } else {
-          path.append(":");
-        }
+        path.append(":");
         path.append(uri.getPort());
       }
-      if (uri.getPath() != null) {
-        if (uri.getPath().length() == 0) {
+      if (uri.getRawPath() != null) {
+        if (uri.getRawPath().length() == 0) {
           path.append("/");
-        } else if (uri.getPath().equals("/")) {
-          path.append(uri.getPath());
+        } else if (uri.getRawPath().equals("/")) {
+          path.append(uri.getRawPath());
         } else {
-          for (String name : uri.getPath().split("/")) {
+          for (String name : uri.getRawPath().split("/")) {
             if (name.length() > 0) {
               path.append("/");
-              try {
-                path.append(URLEncoder.encode(name, "UTF-8"));
-              } catch(UnsupportedEncodingException e) {
-                path.append(name);
-              }
+              path.append(name);
             }
           }
         }
       }
+      if (uri.getRawQuery() != null) {
+        path.append("?");
+        path.append(uri.getRawQuery());
+      }
     } else {
-      if (uri.getSchemeSpecificPart() != null) {
-        for (String name : uri.getSchemeSpecificPart().split("/")) {
+      if (uri.getRawSchemeSpecificPart() != null) {
+        for (String name : uri.getRawSchemeSpecificPart().split("/")) {
           if (name.length() > 0) {
             path.append("/");
-            try {
-              path.append(URLEncoder.encode(name, "UTF-8"));
-            } catch(UnsupportedEncodingException e) {
-              path.append(name);
-            }
+            path.append(name);
           }
         }
       }

Modified: manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java?rev=1491800&r1=1491799&r2=1491800&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java Tue Jun 11 14:09:28 2013
@@ -25,6 +25,10 @@ import org.apache.manifoldcf.crawler.sys
 import org.apache.manifoldcf.core.extmimemap.ExtensionMimeMap;
 import java.util.*;
 import java.io.*;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URLDecoder;
+import java.net.URLEncoder;
 
 /** This is the "repository connector" for a file system.  It's a relative of the share crawler, and should have
 * comparable basic functionality, with the exception of the ability to use ActiveDirectory and look at other shares.
@@ -103,6 +107,61 @@ public class FileConnector extends org.a
 
   /** Convert a document identifier to a URI.  The URI is the URI that will be the unique key from
   * the search index, and will be presented to the user as part of the search results.
+  *@param filePath is the document filePath.
+  *@param repositoryPath is the document repositoryPath.
+  *@return the document uri.
+  */
+  protected String convertToURI(String documentIdentifier, String[] repositoryPaths)
+    throws ManifoldCFException
+  {
+    //
+    // Note well:  This MUST be a legal URI!!!
+    try
+    {
+      String path = new File(documentIdentifier).getAbsolutePath();
+      for (String repositoryPath : repositoryPaths) {
+        if (path.startsWith(repositoryPath)) {
+          StringBuffer sb = new StringBuffer();
+          path = path.replaceFirst(repositoryPath, "");
+          if (path.startsWith("/")) {
+            path = path.replaceFirst("/", "");
+          }
+          String[] tmp = path.split("/", 3);
+          String scheme = "";
+          String host = "";
+          String other = "";
+          try {
+            scheme = tmp[0];
+          } catch (ArrayIndexOutOfBoundsException e) {
+            scheme = "http";
+          }
+          try {
+            host = tmp[1];
+          } catch (ArrayIndexOutOfBoundsException e) {
+            host = "localhost";
+          }
+          try {
+            other = "/" + tmp[2];
+          } catch (ArrayIndexOutOfBoundsException e) {
+            other = "/";
+          }
+          return new URI(scheme + "://" + host + other).toURL().toString();
+        }
+      }
+      return convertToURI(documentIdentifier);
+    }
+    catch (URISyntaxException e)
+    {
+      throw new ManifoldCFException("Bad url",e);
+    }
+    catch (IOException e)
+    {
+      throw new ManifoldCFException("Bad url",e);
+    }
+  }
+  
+  /** Convert a document identifier to a URI.  The URI is the URI that will be the unique key from
+  * the search index, and will be presented to the user as part of the search results.
   *@param documentIdentifier is the document identifier.
   *@return the document uri.
   */
@@ -271,6 +330,36 @@ public class FileConnector extends org.a
             // We still need to check based on file data.
             if (checkIngest(file,spec))
             {
+              int j = 0;
+              
+              /*
+               * get repository paths
+               */
+              j = 0;
+              List<String> repositoryPaths = new ArrayList<String>();
+              while ( j < spec.getChildCount())
+              {
+                SpecificationNode sn = spec.getChild(j++);
+                if (sn.getType().equals("startpoint"))
+                {
+                  if (sn.getAttributeValue("path").length() > 0) {
+                    repositoryPaths.add(sn.getAttributeValue("path"));
+                  }
+                }
+              }
+              
+              /*
+               * get filepathtouri value
+               */
+              boolean filePathToUri = false;
+              j = 0;
+              while (j < spec.getChildCount()) {
+                SpecificationNode sn = spec.getChild(j++);
+                if (sn.getType().equals("filepathtouri")) {
+                  filePathToUri = Boolean.valueOf(sn.getValue());
+                }
+              }
+              
               long startTime = System.currentTimeMillis();
               String errorCode = "OK";
               String errorDesc = null;
@@ -293,9 +382,15 @@ public class FileConnector extends org.a
                     data.setFileName(fileName);
                     data.setMimeType(mapExtensionToMimeType(fileName));
                     data.setModifiedDate(new Date(file.lastModified()));
-                    data.addField("uri",file.toString());
-                    // MHL for other metadata
-                    activities.ingestDocument(documentIdentifier,version,convertToURI(documentIdentifier),data);
+                    if (filePathToUri) {
+                      data.addField("uri",convertToURI(documentIdentifier,repositoryPaths.toArray(new String[0])));
+                      // MHL for other metadata
+                      activities.ingestDocument(documentIdentifier,version,convertToURI(documentIdentifier,repositoryPaths.toArray(new String[0])),data);
+                    } else {
+                      data.addField("uri",file.toString());
+                      // MHL for other metadata
+                      activities.ingestDocument(documentIdentifier,version,convertToURI(documentIdentifier),data);
+                    }
                     fileLength = new Long(fileBytes);
                   }
                   finally
@@ -422,6 +517,7 @@ public class FileConnector extends org.a
     throws ManifoldCFException, IOException
   {
     tabsArray.add(Messages.getString(locale,"FileConnector.Paths"));
+    tabsArray.add(Messages.getString(locale,"FileConnector.FilePathToURITab"));
 
     out.print(
 "<script type=\"text/javascript\">\n"+
@@ -689,6 +785,44 @@ public class FileConnector extends org.a
 "<input type=\"hidden\" name=\"pathcount\" value=\""+Integer.toString(k)+"\"/>\n"
       );
     }
+    
+    
+    /*
+     * get filepathtouri value
+     */
+    boolean filePathToUri = false;
+    i = 0;
+    while (i < ds.getChildCount()) {
+      SpecificationNode sn = ds.getChild(i++);
+      if (sn.getType().equals("filepathtouri")) {
+        filePathToUri = Boolean.valueOf(sn.getValue());
+      }
+    }	    
+
+    /*
+     * File path to URI tab
+     */
+    if (tabName.equals(Messages.getString(locale,"FileConnector.FilePathToURITab"))) {
+      out.print(
+"<table class=\"displaytable\">\n"+
+"  <tr><td colspan=\"2\" class=\"separator\"><hr/></td></tr>\n"+
+"  <tr>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"FileConnector.FilePathToURI") + "</nobr></td>\n"+
+"    <td class=\"value\">\n"+
+"      <input name=\"filepathtouri\" type=\"checkbox\" value=\"true\"" + (filePathToUri ? "checked" : "") +"/>\n" +
+"    </td>\n"+
+"  </tr>\n"+
+"</table>\n"
+      );
+    } else {
+      /*
+       * File path to URI tab hiddens
+       */
+      out.print(
+"<input type=\"hidden\" name=\"filepathtouri\" value=\"" + org.apache.manifoldcf.ui.util.Encoder.attributeEscape(Boolean.toString(filePathToUri)) + "\"/>\n"
+      );
+    }
+
   }
   
   /** Process a specification post.
@@ -804,6 +938,26 @@ public class FileConnector extends org.a
         ds.addChild(k,node);
       }
     }
+    
+    /*
+     * "filepathtouri"
+     */
+    String filepathtouri = variableContext.getParameter("filepathtouri");
+    if (filepathtouri != null) {
+      SpecificationNode sn;
+      int i = 0;
+      while (i < ds.getChildCount()) {
+        if (ds.getChild(i).getType().equals("filepathtouri")) {
+          ds.removeChild(i);
+        } else {
+          i++;
+        }
+      }
+      sn = new SpecificationNode("filepathtouri");
+      sn.setValue(filepathtouri);
+      ds.addChild(ds.getChildCount(),sn);
+    }
+    
     return null;
   }
   
@@ -817,6 +971,8 @@ public class FileConnector extends org.a
   public void viewSpecification(IHTTPOutput out, Locale locale, DocumentSpecification ds)
     throws ManifoldCFException, IOException
   {
+    int i = 0;
+    
     out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr>\n"+
@@ -824,7 +980,7 @@ public class FileConnector extends org.a
 "  </tr>\n"
     );
 
-    int i = 0;
+    i = 0;
     boolean seenAny = false;
     while (i < ds.getChildCount())
     {
@@ -867,6 +1023,29 @@ public class FileConnector extends org.a
     out.print(
 "</table>\n"
     );
+    
+    /*
+     * get filepathtouri value
+     */
+    boolean filePathToUri = false;
+    i = 0;
+    while (i < ds.getChildCount()) {
+      SpecificationNode sn = ds.getChild(i++);
+      if (sn.getType().equals("filepathtouri")) {
+        filePathToUri = Boolean.valueOf(sn.getValue());
+      }
+    }
+
+    out.print(
+"<table class=\"displaytable\">\n"+
+"  <tr><td colspan=\"2\" class=\"separator\"><hr/></td></tr>\n"+
+"  <tr>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"FileConnector.FilePathToURI") + "</nobr></td>\n"+
+"    <td class=\"value\">" + org.apache.manifoldcf.ui.util.Encoder.bodyEscape(Boolean.toString(filePathToUri)) + "</td>\n"+
+"  </tr>\n"+
+"</table>\n"
+    );
+  
   }
 
   // Protected static methods

Modified: manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_en_US.properties?rev=1491800&r1=1491799&r2=1491800&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_en_US.properties (original)
+++ manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_en_US.properties Tue Jun 11 14:09:28 2013
@@ -36,3 +36,5 @@ FileConnector.DeletePath=Delete path #
 FileConnector.AddNewMatchForPath=Add new match for path #
 FileConnector.AddNewPath=Add new path
 
+FileConnector.FilePathToURITab=Convert file path to URI
+FileConnector.FilePathToURI=Convert file path to URI:

Modified: manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_ja_JP.properties?rev=1491800&r1=1491799&r2=1491800&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_ja_JP.properties (original)
+++ manifoldcf/branches/CONNECTORS-710/connectors/filesystem/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/filesystem/common_ja_JP.properties Tue Jun 11 14:09:28 2013
@@ -35,3 +35,6 @@ FileConnector.InsertNewMatchForPath=ãƒ
 FileConnector.DeletePath=パスを削除: #
 FileConnector.AddNewMatchForPath=パス用に新しいパターンを追加: #
 FileConnector.AddNewPath=新しいパスを追加
+
+FileConnector.FilePathToURITab=ファイルパスをURIへ変換する
+FileConnector.FilePathToURI=ファイルパスをURIへ変換する: