You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by ml...@apache.org on 2013/06/25 09:10:38 UTC

svn commit: r1496363 - in /manifoldcf/branches/CONNECTORS-727: connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/ connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/...

Author: mlizewski
Date: Tue Jun 25 07:10:37 2013
New Revision: 1496363

URL: http://svn.apache.org/r1496363
Log:
fixes
moved entity classes to different package so it will be easier to reuse them when implementing API
added possibility to return related documents for graph and tree type repositories

Added:
    manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/
    manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/DateAdapter.java
      - copied, changed from r1495366, manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/DateAdapter.java
    manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Item.java
    manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Items.java
    manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Meta.java
    manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Seed.java
    manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Seeds.java
Removed:
    manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/DateAdapter.java
Modified:
    manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/GenericConnector.java
    manifoldcf/branches/CONNECTORS-727/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml

Modified: manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/GenericConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/GenericConnector.java?rev=1496363&r1=1496362&r2=1496363&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/GenericConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/GenericConnector.java Tue Jun 25 07:10:37 2013
@@ -17,6 +17,9 @@
  */
 package org.apache.manifoldcf.crawler.connectors.generic;
 
+import org.apache.manifoldcf.crawler.connectors.generic.api.Meta;
+import org.apache.manifoldcf.crawler.connectors.generic.api.Item;
+import org.apache.manifoldcf.crawler.connectors.generic.api.Items;
 import java.io.*;
 import java.net.MalformedURLException;
 import java.net.URL;
@@ -24,8 +27,6 @@ import java.net.URLEncoder;
 import java.text.SimpleDateFormat;
 import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
 import javax.xml.bind.JAXBContext;
 import javax.xml.bind.JAXBException;
 import javax.xml.bind.Unmarshaller;
@@ -48,7 +49,6 @@ import org.apache.http.HttpStatus;
 import org.apache.http.auth.AuthScope;
 import org.apache.http.auth.Credentials;
 import org.apache.http.auth.UsernamePasswordCredentials;
-import org.apache.http.client.ClientProtocolException;
 import org.apache.http.client.HttpClient;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.impl.auth.BasicScheme;
@@ -56,6 +56,8 @@ import org.apache.http.impl.client.Defau
 import org.apache.http.protocol.HttpContext;
 import org.apache.http.util.EntityUtils;
 import org.apache.manifoldcf.agents.interfaces.*;
+import org.apache.manifoldcf.core.common.XThreadInputStream;
+import org.apache.manifoldcf.core.common.XThreadStringBuffer;
 import org.apache.manifoldcf.core.interfaces.*;
 import org.apache.manifoldcf.core.system.ManifoldCF;
 import org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector;
@@ -90,6 +92,8 @@ public class GenericConnector extends Ba
 
   private String genericEntryPoint = null;
 
+  protected static final String RELATIONSHIP_RELATED = "related";
+
   private ConcurrentHashMap<String, Item> documentCache = new ConcurrentHashMap<String, Item>(10);
 
   /**
@@ -104,6 +108,11 @@ public class GenericConnector extends Ba
   }
 
   @Override
+  public String[] getRelationshipTypes() {
+    return new String[]{RELATIONSHIP_RELATED};
+  }
+
+  @Override
   public int getConnectorModel() {
     return GenericConnector.MODEL_ADD_CHANGE;
   }
@@ -166,7 +175,7 @@ public class GenericConnector extends Ba
       }
       return checkThread.getResult();
     } catch (InterruptedException ex) {
-      return "Check exception: " + ex.getMessage();
+      throw new ManifoldCFException(ex.getMessage(), ex, ManifoldCFException.INTERRUPTED);
     }
   }
 
@@ -178,46 +187,58 @@ public class GenericConnector extends Ba
     HttpClient client = getClient();
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
 
-    try {
-      StringBuilder url = new StringBuilder(genericEntryPoint);
-      url.append("?").append(ACTION_PARAM_NAME).append("=").append(ACTION_SEED);
-      if (startTime > 0) {
-        url.append("&startTime=").append(sdf.format(new Date(startTime)));
-      }
-      url.append("&endTime=").append(sdf.format(new Date(endTime)));
-      for (int i = 0; i < spec.getChildCount(); i++) {
-        SpecificationNode sn = spec.getChild(i);
-        if (sn.getType().equals("param")) {
-          try {
-            String paramName = sn.getAttributeValue("name");
-            String paramValue = sn.getValue();
-            url.append("&").append(URLEncoder.encode(paramName, "UTF-8")).append("=").append(URLEncoder.encode(paramValue, "UTF-8"));
-          } catch (UnsupportedEncodingException ex) {
-            Logger.getLogger(GenericConnector.class.getName()).log(Level.SEVERE, null, ex);
-          }
+    StringBuilder url = new StringBuilder(genericEntryPoint);
+    url.append("?").append(ACTION_PARAM_NAME).append("=").append(ACTION_SEED);
+    if (startTime > 0) {
+      url.append("&startTime=").append(sdf.format(new Date(startTime)));
+    }
+    url.append("&endTime=").append(sdf.format(new Date(endTime)));
+    for (int i = 0; i < spec.getChildCount(); i++) {
+      SpecificationNode sn = spec.getChild(i);
+      if (sn.getType().equals("param")) {
+        try {
+          String paramName = sn.getAttributeValue("name");
+          String paramValue = sn.getValue();
+          url.append("&").append(URLEncoder.encode(paramName, "UTF-8")).append("=").append(URLEncoder.encode(paramValue, "UTF-8"));
+        } catch (UnsupportedEncodingException ex) {
+          throw new ManifoldCFException("addSeedDocuments error: " + ex.getMessage(), ex);
         }
       }
-      ExecuteSeedingThread seedingThread = new ExecuteSeedingThread(client, activities, url.toString());
-      seedingThread.start();
-      seedingThread.join();
-      if (seedingThread.getException() != null) {
-        Throwable thr = seedingThread.getException();
-        if (thr instanceof ManifoldCFException) {
-          if (((ManifoldCFException) thr).getErrorCode() == ManifoldCFException.INTERRUPTED) {
-            throw new InterruptedException(thr.getMessage());
+    }
+    ExecuteSeedingThread t = new ExecuteSeedingThread(client, url.toString());
+    try {
+      t.start();
+      boolean wasInterrupted = false;
+      try {
+        XThreadStringBuffer seedBuffer = t.getBuffer();
+
+        // Pick up the paths, and add them to the activities, before we join with the child thread.
+        while (true) {
+          // The only kind of exceptions this can throw are going to shut the process down.
+          String docPath = seedBuffer.fetch();
+          if (docPath == null) {
+            break;
           }
-          throw (ManifoldCFException) thr;
-        } else if (thr instanceof ServiceInterruption) {
-          throw (ServiceInterruption) thr;
-        } else if (thr instanceof IOException) {
-          handleIOException((IOException) thr);
-        } else if (thr instanceof RuntimeException) {
-          throw (RuntimeException) thr;
+          // Add the pageID to the queue
+          activities.addSeedDocument(docPath);
+        }
+      } catch (InterruptedException e) {
+        wasInterrupted = true;
+        throw e;
+      } catch (ManifoldCFException e) {
+        if (e.getErrorCode() == ManifoldCFException.INTERRUPTED) {
+          wasInterrupted = true;
+        }
+        throw e;
+      } finally {
+        if (!wasInterrupted) {
+          t.finishUp();
         }
-        throw new ManifoldCFException("addSeedDocuments error: " + thr.getMessage(), thr);
       }
-    } catch (InterruptedException ex) {
-      throw new ManifoldCFException("addSeedDocuments error: " + ex.getMessage(), ex);
+    } catch (InterruptedException e) {
+      t.interrupt();
+      throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+        ManifoldCFException.INTERRUPTED);
     }
   }
 
@@ -257,7 +278,7 @@ public class GenericConnector extends Ba
         }
       }
     } catch (UnsupportedEncodingException ex) {
-      throw new ManifoldCFException("getDocumentVersions error - invalid chars in id: " + ex.getMessage(), ex);
+      throw new ManifoldCFException("getDocumentVersions error: " + ex.getMessage(), ex);
     }
     try {
       DocumentVersionThread versioningThread = new DocumentVersionThread(client, url.toString(), documentIdentifiers, genericAuthMode, rights, documentCache);
@@ -281,7 +302,7 @@ public class GenericConnector extends Ba
       }
       return versioningThread.getVersions();
     } catch (InterruptedException ex) {
-      throw new ManifoldCFException("getDocumentVersions error: " + ex.getMessage(), ex);
+      throw new ManifoldCFException(ex.getMessage(), ex, ManifoldCFException.INTERRUPTED);
     }
   }
 
@@ -304,16 +325,22 @@ public class GenericConnector extends Ba
 
     HttpClient client = getClient();
     for (int i = 0; i < documentIdentifiers.length; i++) {
-      if (scanOnly[i]) {
-        continue;
-      }
       activities.checkJobStillActive();
-      
+
       Item item = documentCache.get(documentIdentifiers[i]);
       if (item == null) {
         throw new ManifoldCFException("processDocuments error - no cache entry for: " + documentIdentifiers[i]);
       }
 
+      if (item.related != null) {
+        for (String rel : item.related) {
+          activities.addDocumentReference(rel, documentIdentifiers[i], RELATIONSHIP_RELATED);
+        }
+      }
+      if (scanOnly[i]) {
+        continue;
+      }
+
       RepositoryDocument doc = new RepositoryDocument();
       if (item.mimeType != null) {
         doc.setMimeType(item.mimeType);
@@ -381,8 +408,8 @@ public class GenericConnector extends Ba
           handleIOException(ex);
         }
       } else {
+        StringBuilder url = new StringBuilder(genericEntryPoint);
         try {
-          StringBuilder url = new StringBuilder(genericEntryPoint);
           url.append("?").append(ACTION_PARAM_NAME).append("=").append(ACTION_ITEM);
           url.append("&id=").append(URLEncoder.encode(documentIdentifiers[i], "UTF-8"));
           for (int j = 0; j < spec.getChildCount(); j++) {
@@ -393,31 +420,48 @@ public class GenericConnector extends Ba
               url.append("&").append(URLEncoder.encode(paramName, "UTF-8")).append("=").append(URLEncoder.encode(paramValue, "UTF-8"));
             }
           }
-          
-          ExecuteProcessThread ingestThread = new ExecuteProcessThread(client, activities, doc, item.url, url.toString(), item.id, versions[i]);
-          ingestThread.start();
-          ingestThread.join();
-          if (ingestThread.getException() != null) {
-            Throwable thr = ingestThread.getException();
-            if (thr instanceof ManifoldCFException) {
-              if (((ManifoldCFException) thr).getErrorCode() == ManifoldCFException.INTERRUPTED) {
-                throw new InterruptedException(thr.getMessage());
-              }
-              throw (ManifoldCFException) thr;
-            } else if (thr instanceof ServiceInterruption) {
-              throw (ServiceInterruption) thr;
-            } else if (thr instanceof IOException) {
-              handleIOException((IOException) thr);
-            } else if (thr instanceof RuntimeException) {
-              throw (RuntimeException) thr;
-            }
-            throw new ManifoldCFException("processDocuments error: " + thr.getMessage(), thr);
-          }
-        } catch (InterruptedException ex) {
-          throw new ManifoldCFException("processDocuments error: " + ex.getMessage(), ex);
         } catch (UnsupportedEncodingException ex) {
           throw new ManifoldCFException("processDocuments error: " + ex.getMessage(), ex);
         }
+
+        ExecuteProcessThread t = new ExecuteProcessThread(client, url.toString());
+        try {
+          t.start();
+          boolean wasInterrupted = false;
+          try {
+            InputStream is = t.getSafeInputStream();
+            long fileLength = t.getStreamLength();
+            try {
+              // Can only index while background thread is running!
+              doc.setBinary(is, fileLength);
+              activities.ingestDocument(documentIdentifiers[i], versions[i], item.url, doc);
+            } finally {
+              is.close();
+            }
+          } catch (ManifoldCFException e) {
+            if (e.getErrorCode() == ManifoldCFException.INTERRUPTED) {
+              wasInterrupted = true;
+            }
+            throw e;
+          } catch (java.net.SocketTimeoutException e) {
+            throw e;
+          } catch (InterruptedIOException e) {
+            wasInterrupted = true;
+            throw e;
+          } finally {
+            if (!wasInterrupted) {
+              t.finishUp();
+            }
+          }
+        } catch (InterruptedException e) {
+          t.interrupt();
+          throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
+        } catch (InterruptedIOException e) {
+          t.interrupt();
+          throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
+        } catch (IOException e) {
+          handleIOException(e);
+        }
       }
     }
   }
@@ -964,8 +1008,7 @@ public class GenericConnector extends Ba
   protected static void handleIOException(IOException e)
     throws ManifoldCFException, ServiceInterruption {
     if (!(e instanceof java.net.SocketTimeoutException) && (e instanceof InterruptedIOException)) {
-      throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
-        ManifoldCFException.INTERRUPTED);
+      throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED);
     }
     long currentTime = System.currentTimeMillis();
     throw new ServiceInterruption("IO exception: " + e.getMessage(), e, currentTime + 300000L,
@@ -1039,16 +1082,36 @@ public class GenericConnector extends Ba
 
     protected String url;
 
-    protected ISeedingActivity activities;
+    protected final XThreadStringBuffer seedBuffer;
 
     protected Throwable exception = null;
 
-    public ExecuteSeedingThread(HttpClient client, ISeedingActivity activities, String url) {
+    public ExecuteSeedingThread(HttpClient client, String url) {
       super();
       setDaemon(true);
       this.client = client;
       this.url = url;
-      this.activities = activities;
+      seedBuffer = new XThreadStringBuffer();
+    }
+
+    public XThreadStringBuffer getBuffer() {
+      return seedBuffer;
+    }
+
+    public void finishUp()
+      throws InterruptedException {
+      seedBuffer.abandon();
+      join();
+      Throwable thr = exception;
+      if (thr != null) {
+        if (thr instanceof RuntimeException) {
+          throw (RuntimeException) thr;
+        } else if (thr instanceof Error) {
+          throw (Error) thr;
+        } else {
+          throw new RuntimeException("Unhandled exception of type: " + thr.getClass().getName(), thr);
+        }
+      }
     }
 
     @Override
@@ -1067,7 +1130,7 @@ public class GenericConnector extends Ba
             SAXParserFactory factory = SAXParserFactory.newInstance();
             factory.setNamespaceAware(true);
             SAXParser parser = factory.newSAXParser();
-            DefaultHandler handler = new SAXSeedingHandler(activities);
+            DefaultHandler handler = new SAXSeedingHandler(seedBuffer);
             parser.parse(response.getEntity().getContent(), handler);
           } catch (FactoryConfigurationError ex) {
             exception = new ManifoldCFException("addSeedDocuments error: " + ex.getMessage(), ex);
@@ -1076,6 +1139,7 @@ public class GenericConnector extends Ba
           } catch (SAXException ex) {
             exception = new ManifoldCFException("addSeedDocuments error: " + ex.getMessage(), ex);
           }
+          seedBuffer.signalDone();
         } finally {
           EntityUtils.consume(response.getEntity());
           method.releaseConnection();
@@ -1179,55 +1243,116 @@ public class GenericConnector extends Ba
 
     protected String url;
 
-    protected String sourceUrl;
-
-    protected IProcessActivity activities;
-
     protected Throwable exception = null;
 
-    protected String id;
+    protected XThreadInputStream threadStream;
 
-    protected String version;
+    protected boolean abortThread = false;
 
-    RepositoryDocument doc;
+    protected long streamLength = 0;
 
-    public ExecuteProcessThread(HttpClient client, IProcessActivity activities, RepositoryDocument doc, String url, String sourceUrl, String id, String version) {
+    public ExecuteProcessThread(HttpClient client, String url) {
       super();
       setDaemon(true);
       this.client = client;
       this.url = url;
-      this.sourceUrl = sourceUrl;
-      this.activities = activities;
-      this.id = id;
-      this.version = version;
-      this.doc = doc;
     }
 
     @Override
     public void run() {
       try {
-        HttpGet method = new HttpGet(sourceUrl);
+        HttpGet method = new HttpGet(url);
         HttpResponse response = client.execute(method);
         try {
           if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
-            exception = new ManifoldCFException("processDocuments error - interface returned incorrect return code for: " + id);
+            exception = new ManifoldCFException("processDocuments error - interface returned incorrect return code for: " + url);
+            return;
+          }
+          synchronized (this) {
+            if (!abortThread) {
+              streamLength = response.getEntity().getContentLength();
+              threadStream = new XThreadInputStream(response.getEntity().getContent());
+              this.notifyAll();
+            }
           }
 
-          doc.setBinary(response.getEntity().getContent(), response.getEntity().getContentLength());
-          activities.ingestDocument(id, version, url, doc);
-        } catch (ManifoldCFException ex) {
-          exception = ex;
-        } catch (ServiceInterruption ex) {
+          if (threadStream != null) {
+            // Stuff the content until we are done
+            threadStream.stuffQueue();
+          }
+        } catch (Throwable ex) {
           exception = ex;
         } finally {
           EntityUtils.consume(response.getEntity());
           method.releaseConnection();
         }
-      } catch (IOException ex) {
-        exception = ex;
+      } catch (Throwable e) {
+        exception = e;
+      }
+    }
+
+    public InputStream getSafeInputStream() throws InterruptedException, IOException {
+      while (true) {
+        synchronized (this) {
+          if (exception != null) {
+            throw new IllegalStateException("Check for response before getting stream");
+          }
+          checkException(exception);
+          if (threadStream != null) {
+            return threadStream;
+          }
+          wait();
+        }
       }
     }
 
+    public long getStreamLength() throws IOException, InterruptedException {
+      while (true) {
+        synchronized (this) {
+          if (exception != null) {
+            throw new IllegalStateException("Check for response before getting stream");
+          }
+          checkException(exception);
+          if (threadStream != null) {
+            return streamLength;
+          }
+          wait();
+        }
+      }
+    }
+
+    protected synchronized void checkException(Throwable exception)
+      throws IOException {
+      if (exception != null) {
+        Throwable e = exception;
+        if (e instanceof IOException) {
+          throw (IOException) e;
+        } else if (e instanceof RuntimeException) {
+          throw (RuntimeException) e;
+        } else if (e instanceof Error) {
+          throw (Error) e;
+        } else {
+          throw new RuntimeException("Unhandled exception of type: " + e.getClass().getName(), e);
+        }
+      }
+    }
+
+    public void finishUp()
+      throws InterruptedException, IOException {
+      // This will be called during the finally
+      // block in the case where all is well (and
+      // the stream completed) and in the case where
+      // there were exceptions.
+      synchronized (this) {
+        if (threadStream != null) {
+          threadStream.abort();
+        }
+        abortThread = true;
+      }
+      join();
+      checkException(exception);
+    }
+
     public Throwable getException() {
       return exception;
     }
@@ -1235,92 +1360,21 @@ public class GenericConnector extends Ba
 
   static public class SAXSeedingHandler extends DefaultHandler {
 
-    protected ISeedingActivity activities;
+    protected XThreadStringBuffer seedBuffer;
 
-    public SAXSeedingHandler(ISeedingActivity activities) {
-      this.activities = activities;
+    public SAXSeedingHandler(XThreadStringBuffer seedBuffer) {
+      this.seedBuffer = seedBuffer;
     }
 
     @Override
     public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
       if ("seed".equals(localName) && attributes.getValue("id") != null) {
         try {
-          activities.addSeedDocument(attributes.getValue("id"));
-        } catch (ManifoldCFException ex) {
+          seedBuffer.add(attributes.getValue("id"));
+        } catch (InterruptedException ex) {
           throw new SAXException("Adding seed failed: " + ex.getMessage(), ex);
         }
       }
     }
   }
-
-  @XmlRootElement(name = "meta")
-  public static class Meta {
-
-    @XmlAttribute(name = "name")
-    String name;
-
-    @XmlValue
-    String value;
-  }
-
-  @XmlRootElement(name = "item")
-  public static class Item {
-
-    @XmlAttribute(name = "id", required = true)
-    String id;
-
-    @XmlElement(name = "url", required = true)
-    String url;
-
-    @XmlElement(name = "version", required = true)
-    String version;
-
-    @XmlElement(name = "content")
-    String content;
-
-    @XmlElement(name = "mimetype")
-    String mimeType;
-
-    @XmlElement(name = "created")
-    @XmlJavaTypeAdapter(DateAdapter.class)
-    Date created;
-
-    @XmlElement(name = "updated")
-    @XmlJavaTypeAdapter(DateAdapter.class)
-    Date updated;
-
-    @XmlElement(name = "filename")
-    String fileName;
-
-    @XmlElementWrapper(name = "metadata")
-    @XmlElements({
-      @XmlElement(name = "meta", type = Meta.class)})
-    List<Meta> metadata;
-
-    @XmlElementWrapper(name = "auth")
-    @XmlElements({
-      @XmlElement(name = "token", type = String.class)})
-    List<String> auth;
-
-    public String getVersionString() {
-      if (version == null) {
-        return "";
-      }
-      StringBuilder sb = new StringBuilder(version);
-      if (auth != null) {
-        for (String t : auth) {
-          sb.append("|").append(t);
-        }
-      }
-      return sb.toString();
-    }
-  }
-
-  @XmlRootElement(name = "items")
-  public static class Items {
-
-    @XmlElements({
-      @XmlElement(name = "item", type = Item.class)})
-    List<Item> items;
-  }
 }

Copied: manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/DateAdapter.java (from r1495366, manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/DateAdapter.java)
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/DateAdapter.java?p2=manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/DateAdapter.java&p1=manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/DateAdapter.java&r1=1495366&r2=1496363&rev=1496363&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/DateAdapter.java (original)
+++ manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/DateAdapter.java Tue Jun 25 07:10:37 2013
@@ -13,11 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.manifoldcf.crawler.connectors.generic;
+package org.apache.manifoldcf.crawler.connectors.generic.api;
 
-import java.text.SimpleDateFormat;
 import java.util.Date;
 import javax.xml.bind.annotation.adapters.XmlAdapter;
+import org.apache.manifoldcf.core.common.DateParser;
 
 /**
  *
@@ -25,15 +25,13 @@ import javax.xml.bind.annotation.adapter
  */
 public class DateAdapter extends XmlAdapter<String, Date> {
 
-  private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
-
   @Override
   public Date unmarshal(String v) throws Exception {
-    return dateFormat.parse(v);
+    return DateParser.parseISO8601Date(v);
   }
 
   @Override
   public String marshal(Date v) throws Exception {
-    return dateFormat.format(v);
+    return DateParser.formatISO8601Date(v);
   }
 }

Added: manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Item.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Item.java?rev=1496363&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Item.java (added)
+++ manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Item.java Tue Jun 25 07:10:37 2013
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.crawler.connectors.generic.api;
+
+import java.util.Date;
+import java.util.List;
+import javax.xml.bind.annotation.XmlAttribute;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlElementWrapper;
+import javax.xml.bind.annotation.XmlElements;
+import javax.xml.bind.annotation.XmlRootElement;
+import javax.xml.bind.annotation.adapters.XmlJavaTypeAdapter;
+
+/**
+ *
+ * @author krycek
+ */
+@XmlRootElement(name = "item")
+public class Item {
+
+  @XmlAttribute(name = "id", required = true)
+  public String id;
+
+  @XmlElement(name = "url", required = true)
+  public String url;
+
+  @XmlElement(name = "version", required = true)
+  public String version;
+
+  @XmlElement(name = "content")
+  public String content;
+
+  @XmlElement(name = "mimetype")
+  public String mimeType;
+
+  @XmlElement(name = "created")
+  @XmlJavaTypeAdapter(value = DateAdapter.class)
+  public Date created;
+
+  @XmlElement(name = "updated")
+  @XmlJavaTypeAdapter(value = DateAdapter.class)
+  public Date updated;
+
+  @XmlElement(name = "filename")
+  public String fileName;
+
+  @XmlElementWrapper(name = "metadata")
+  @XmlElements(value = {
+    @XmlElement(name = "meta", type = Meta.class)})
+  public List<Meta> metadata;
+
+  @XmlElementWrapper(name = "auth")
+  @XmlElements(value = {
+    @XmlElement(name = "token", type = String.class)})
+  public List<String> auth;
+
+  @XmlElementWrapper(name = "related")
+  @XmlElements(value = {
+    @XmlElement(name = "id", type = String.class)})
+  public List<String> related;
+
+  public String getVersionString() {
+    if (version == null) {
+      return "";
+    }
+    StringBuilder sb = new StringBuilder(version);
+    if (auth != null) {
+      for (String t : auth) {
+        sb.append("|").append(t);
+      }
+    }
+    return sb.toString();
+  }
+}

Added: manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Items.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Items.java?rev=1496363&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Items.java (added)
+++ manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Items.java Tue Jun 25 07:10:37 2013
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.manifoldcf.crawler.connectors.generic.api;
+
+import java.util.List;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlElements;
+import javax.xml.bind.annotation.XmlRootElement;
+
+/**
+ *
+ * @author krycek
+ */
+@XmlRootElement(name = "items")
+public class Items {
+
+  @XmlElements(value = {
+    @XmlElement(name = "item", type = Item.class)})
+  public List<Item> items;
+}

Added: manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Meta.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Meta.java?rev=1496363&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Meta.java (added)
+++ manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Meta.java Tue Jun 25 07:10:37 2013
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.manifoldcf.crawler.connectors.generic.api;
+
+import javax.xml.bind.annotation.XmlAttribute;
+import javax.xml.bind.annotation.XmlRootElement;
+import javax.xml.bind.annotation.XmlValue;
+
+/**
+ *
+ * @author krycek
+ */
+@XmlRootElement(name = "meta")
+public class Meta {
+
+  @XmlAttribute(name = "name")
+  public String name;
+
+  @XmlValue
+  public String value;
+}

Added: manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Seed.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Seed.java?rev=1496363&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Seed.java (added)
+++ manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Seed.java Tue Jun 25 07:10:37 2013
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.crawler.connectors.generic.api;
+
+import javax.xml.bind.annotation.XmlAttribute;
+import javax.xml.bind.annotation.XmlRootElement;
+
+@XmlRootElement(name = "seed")
+public class Seed {
+
+  @XmlAttribute(name = "id", required = true)
+  public String id;
+}

Added: manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Seeds.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Seeds.java?rev=1496363&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Seeds.java (added)
+++ manifoldcf/branches/CONNECTORS-727/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/api/Seeds.java Tue Jun 25 07:10:37 2013
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.crawler.connectors.generic.api;
+
+import java.util.List;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlElements;
+import javax.xml.bind.annotation.XmlRootElement;
+
+@XmlRootElement(name = "seeds")
+public class Seeds {
+
+  @XmlElements({
+    @XmlElement(name = "seed", type = Seed.class)})
+  public List<Seed> seeds;
+}

Modified: manifoldcf/branches/CONNECTORS-727/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-727/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml?rev=1496363&r1=1496362&r2=1496363&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-727/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml (original)
+++ manifoldcf/branches/CONNECTORS-727/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml Tue Jun 25 07:10:37 2013
@@ -2167,8 +2167,10 @@ curl -XGET http://localhost:9200/index/_
             
             <section id="genericconnector">
               <title>Generic Connector</title>
-              <p>Generic connector allows you to index any source that follows provided API specification.</p>
-              <p>API should be implemented as web page (entry point) returning results based on provided GET params. API can be secured with HTTP basic authentication.</p>
+              <p>Generic connector allows you to index any source that follows provided API specification. The idea is that you can use it and implement only the API which is designed
+			  to be fine grained and as simple as it is possible to handle document indexing.</p>
+              <p>API should be implemented as xml web page (entry point) returning results based on provided GET params. It may be a simple server script or part of the bigger application.
+			  API can be secured with HTTP basic authentication.</p>
               <br/>
               <p>There are 4 actions:</p>
 			  <ul>
@@ -2180,7 +2182,7 @@ curl -XGET http://localhost:9200/index/_
 			  <p>Action is passed as "action" GET param to the entrypoint.</p>
               <br/><br/>
 			  <p><b>[entrypoint]?action=check</b></p>
-			  <p>Should return HTTP status code 200 providing information that entrypoint is working properly.</p>
+			  <p>Should return HTTP status code 200 providing information that entrypoint is working properly. Any content returned will be ignored, only the status code matters.</p>
               <br/><br/>
 			  
 			  <p><b>[entrypoint]?action=seed&amp;startDate=YYYY-MM-DDTHH:mm:ssZ&amp;endDate=YYYY-MM-DDTHH:mm:ssZ</b></p>
@@ -2225,6 +2227,11 @@ curl -XGET http://localhost:9200/index/_
 		 &lt;token&gt;auth_token_2&lt;/token&gt;
 		 ...
 	  &lt;/auth&gt;
+	  &lt;related&gt;
+		 &lt;id&gt;other_document_id_1&lt;/id&gt;
+		 &lt;id&gt;other_document_id_2&lt;/id&gt;
+		 ...
+	  &lt;/related&gt;
 	  &lt;content&gt;Document content&lt;/content&gt;
    &lt;/item&gt;
    ...
@@ -2232,7 +2239,9 @@ curl -XGET http://localhost:9200/index/_
 			  </source>
 			  <p><code>id, url, version</code> are required, the rest is optional.</p>
  			  <p>If <code>auth</code> tag is provided - document will be treated as non-public with defined access tokens, if it is ommited - document will be public.</p>
-			  <p>if <code>content</code> tag is ommited - connector will ask for document content as <code>action=item</code> separate API call.</p>
+			  <p>If <code>content</code> tag is ommited - connector will ask for document content as <code>action=item</code> separate API call.</p>
+			  <p>You may provide related document ids when document repository is a graph or tree. Provided documents will also be indexed. In case you want to use relations -
+			  seeding do not have to return all documents, only starting points. Rest of documents will be fetched using relations.</p>
               <br/><br/>
 
 			  <p><b>[entrypoint]?action=item&id=document_id</b></p>