You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/10 02:11:31 UTC

svn commit: r1630623 - in /manifoldcf/trunk: ./ connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/ connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transfo...

Author: kwright
Date: Fri Oct 10 00:11:30 2014
New Revision: 1630623

URL: http://svn.apache.org/r1630623
Log:
Fix for CONNECTORS-1068.

Modified:
    manifoldcf/trunk/   (props changed)
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
    manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java
    manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_en_US.properties
    manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_ja_JP.properties
    manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_zh_CN.properties
    manifoldcf/trunk/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/editSpecification_Contents.html
    manifoldcf/trunk/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/viewSpecification.html
    manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java

Propchange: manifoldcf/trunk/
------------------------------------------------------------------------------
  Merged /manifoldcf/branches/CONNECTORS-1068:r1630245-1630621

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1630623&r1=1630622&r2=1630623&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Fri Oct 10 00:11:30 2014
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 2.0-dev =====================
 
+CONNECTORS-1068: Enhancements for Document Filter transformation
+connector.
+(Karl Wright)
+
 CONNECTORS-1070: Exit immediately upon finding a misconfigured
 ManifoldCF.
 (Kamil Żyta, Karl Wright)

Modified: manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java?rev=1630623&r1=1630622&r2=1630623&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java (original)
+++ manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java Fri Oct 10 00:11:30 2014
@@ -63,6 +63,29 @@ public class DocumentFilter extends org.
     return new VersionContext(sp.toPackedString(),params,os);
   }
 
+  /** Detect if a document date is acceptable or not.  This method is used to determine whether it makes sense to fetch a document
+  * in the first place.
+  *@param outputDescription is the document's output version.
+  *@param date is the date of the document.
+  *@param activities is an object including the activities that can be performed by this method.
+  *@return true if the document with that date can be accepted by this connector.
+  */
+  @Override
+  public boolean checkDateIndexable(VersionContext outputDescription, Date date, IOutputCheckActivity activities)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    SpecPacker sp = new SpecPacker(outputDescription.getSpecification());
+    return checkDateIndexable(sp, outputDescription, date, activities);
+  }
+  
+  protected boolean checkDateIndexable(SpecPacker sp, VersionContext outputDescription, Date date, IOutputCheckActivity activities)
+    throws ManifoldCFException, ServiceInterruption {
+    if (sp.checkDate(date))
+      return super.checkDateIndexable(outputDescription, date, activities);
+    else
+      return false;
+  }
+
   /** Detect if a mime type is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
   * unusable documents that will be passed to this output connector.
   *@param outputDescription is the document's output version.
@@ -74,6 +97,11 @@ public class DocumentFilter extends org.
     throws ManifoldCFException, ServiceInterruption
   {
     SpecPacker sp = new SpecPacker(outputDescription.getSpecification());
+    return checkMimeTypeIndexable(sp, outputDescription, mimeType, activities);
+  }
+  
+  protected boolean checkMimeTypeIndexable(SpecPacker sp, VersionContext outputDescription, String mimeType, IOutputCheckActivity activities)
+    throws ManifoldCFException, ServiceInterruption {
     if (sp.checkMimeType(mimeType))
       return super.checkMimeTypeIndexable(outputDescription, mimeType, activities);
     else
@@ -84,6 +112,11 @@ public class DocumentFilter extends org.
   public boolean checkLengthIndexable(VersionContext outputDescription, long length, IOutputCheckActivity activities)
     throws ManifoldCFException, ServiceInterruption {
     SpecPacker sp = new SpecPacker(outputDescription.getSpecification());
+    return checkLengthIndexable(sp, outputDescription, length, activities);
+  }
+  
+  protected boolean checkLengthIndexable(SpecPacker sp, VersionContext outputDescription, long length, IOutputCheckActivity activities)
+    throws ManifoldCFException, ServiceInterruption {
     if (sp.checkLengthIndexable(length))
       return super.checkLengthIndexable(outputDescription, length, activities);
     else
@@ -94,6 +127,11 @@ public class DocumentFilter extends org.
   public boolean checkURLIndexable(VersionContext outputDescription, String url, IOutputCheckActivity activities)
     throws ManifoldCFException, ServiceInterruption {
     SpecPacker sp = new SpecPacker(outputDescription.getSpecification());
+    return checkURLIndexable(sp, outputDescription, url, activities);
+  }
+  
+  protected boolean checkURLIndexable(SpecPacker sp, VersionContext outputDescription, String url, IOutputCheckActivity activities)
+    throws ManifoldCFException, ServiceInterruption {
     if (sp.checkURLIndexable(url))
       return super.checkURLIndexable(outputDescription, url, activities);
     else
@@ -103,9 +141,6 @@ public class DocumentFilter extends org.
   /** Add (or replace) a document in the output data store using the connector.
   * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
   * necessary.
-  * The OutputSpecification is *not* provided to this method, because the goal is consistency, and if output is done it must be consistent with the
-  * output description, since that was what was partly used to determine if output should be taking place.  So it may be necessary for this method to decode
-  * an output description string in order to determine what should be done.
   *@param documentURI is the URI of the document.  The URI is presumed to be the unique identifier which the output data store will use to process
   * and serve the document.  This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
   *@param outputDescription is the description string that was constructed for this document by the getOutputDescription() method.
@@ -118,6 +153,15 @@ public class DocumentFilter extends org.
   public int addOrReplaceDocumentWithException(String documentURI, VersionContext outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption, IOException
   {
+    // Hard filtering (in case connectors don't call check methods above)
+    SpecPacker sp = new SpecPacker(outputDescription.getSpecification());
+    if (!checkURLIndexable(sp, outputDescription, documentURI, activities) ||
+      !checkLengthIndexable(sp, outputDescription, document.getBinaryLength(), activities) ||
+      !checkMimeTypeIndexable(sp, outputDescription, document.getMimeType(), activities) ||
+      !checkDateIndexable(sp, outputDescription, document.getModifiedDate(), activities)) {
+      activities.noDocument();
+      return DOCUMENTSTATUS_REJECTED;
+    }
     return activities.sendDocument(documentURI, document);
   }
   
@@ -127,6 +171,7 @@ public class DocumentFilter extends org.
     String maxFileSize = DocumentFilterConfig.MAXLENGTH_DEFAULT;
     String allowedMimeTypes = DocumentFilterConfig.MIMETYPES_DEFAULT;
     String allowedFileExtensions = DocumentFilterConfig.EXTENSIONS_DEFAULT;
+    Long minDate = null;
     for (int i = 0; i < os.getChildCount(); i++)
     {
       SpecificationNode sn = os.getChild(i);
@@ -138,11 +183,21 @@ public class DocumentFilter extends org.
         allowedMimeTypes = sn.getValue();
       else if (sn.getType().equals(DocumentFilterConfig.NODE_EXTENSIONS))
         allowedFileExtensions = sn.getValue();
+      else if (sn.getType().equals(DocumentFilterConfig.NODE_MINDATE))
+        minDate = new Long(sn.getAttributeValue(DocumentFilterConfig.ATTRIBUTE_VALUE));
     }
     paramMap.put("MINFILESIZE",minFileSize);
     paramMap.put("MAXFILESIZE",maxFileSize);
     paramMap.put("MIMETYPES",allowedMimeTypes);
     paramMap.put("EXTENSIONS",allowedFileExtensions);
+    
+    Calendar c = new GregorianCalendar();
+    c.setTimeInMillis((minDate==null)?0L:minDate.longValue());
+    paramMap.put("MINDATEYEAR",Integer.toString(c.get(Calendar.YEAR)));
+    paramMap.put("MINDATEMONTH",Integer.toString(c.get(Calendar.MONTH)));
+    paramMap.put("MINDATEDAY",Integer.toString(c.get(Calendar.DAY_OF_MONTH)));
+    paramMap.put("MINDATEHOUR",Integer.toString(c.get(Calendar.HOUR_OF_DAY)));
+    paramMap.put("MINDATEMINUTE",String.format("%02d",c.get(Calendar.MINUTE)));
   }
   
   /** Obtain the name of the form check javascript method to call.
@@ -234,6 +289,36 @@ public class DocumentFilter extends org.
     throws ManifoldCFException {
     String seqPrefix = "s"+connectionSequenceNumber+"_";
 
+    String minDateYear = variableContext.getParameter(seqPrefix+"mindateyear");
+    String minDateMonth = variableContext.getParameter(seqPrefix+"mindatemonth");
+    String minDateDay = variableContext.getParameter(seqPrefix + "mindateday");
+    String minDateHour = variableContext.getParameter(seqPrefix + "mindatehour");
+    String minDateMinute = variableContext.getParameter(seqPrefix + "mindateminute");
+    if (minDateYear != null && minDateMonth != null && minDateDay != null && minDateHour != null && minDateMinute != null)
+    {
+      Calendar c = new GregorianCalendar();
+      try
+      {
+        c.set(Integer.parseInt(minDateYear),Integer.parseInt(minDateMonth),Integer.parseInt(minDateDay),Integer.parseInt(minDateHour),Integer.parseInt(minDateMinute));
+      }
+      catch (Exception e)
+      {
+      }
+      long theTime = c.getTimeInMillis();
+      int i = 0;
+      while (i < os.getChildCount())
+      {
+        SpecificationNode node = os.getChild(i);
+        if (node.getType().equals(DocumentFilterConfig.NODE_MINDATE))
+          os.removeChild(i);
+        else
+          i++;
+      }
+      SpecificationNode sn = new SpecificationNode(DocumentFilterConfig.NODE_MINDATE);
+      sn.setAttribute(DocumentFilterConfig.ATTRIBUTE_VALUE,new Long(theTime).toString());
+      os.addChild(os.getChildCount(),sn);
+    }
+    
     String x;
 
     x = variableContext.getParameter(seqPrefix+"minfilesize");
@@ -331,7 +416,8 @@ public class DocumentFilter extends org.
     
   }
   
-  protected static void fillSet(Set<String> set, String input) {
+  protected static Set<String> fillSet(String input) {
+    Set<String> rval = new HashSet<String>();
     try
     {
       StringReader sr = new StringReader(input);
@@ -340,8 +426,10 @@ public class DocumentFilter extends org.
       while ((line = br.readLine()) != null)
       {
         line = line.trim();
-        if (line.length() > 0)
-          set.add(line.toLowerCase(Locale.ROOT));
+        if (line.equals("*"))
+          rval = null;
+        else if (rval != null && line.length() > 0)
+          rval.add(line.toLowerCase(Locale.ROOT));
       }
     }
     catch (IOException e)
@@ -349,16 +437,21 @@ public class DocumentFilter extends org.
       // Should never happen
       throw new RuntimeException("IO exception reading strings: "+e.getMessage(),e);
     }
+    return rval;
   }
   
   protected static class SpecPacker {
     
-    private final Set<String> extensions = new HashSet<String>();
-    private final Set<String> mimeTypes = new HashSet<String>();
+    // null means "match everything"
+    private final Set<String> extensions;
+    // null means "match everything"
+    private final Set<String> mimeTypes;
     private final Long minLength;
     private final Long lengthCutoff;
+    private final Long minDate;
     
     public SpecPacker(Specification os) {
+      Long minDate = null;
       Long minLength = null;
       Long lengthCutoff = null;
       String extensions = null;
@@ -376,12 +469,16 @@ public class DocumentFilter extends org.
         } else if (sn.getType().equals(DocumentFilterConfig.NODE_MINLENGTH)) {
           String value = sn.getAttributeValue(DocumentFilterConfig.ATTRIBUTE_VALUE);
           minLength = new Long(value);
+        } else if (sn.getType().equals(DocumentFilterConfig.NODE_MINDATE)) {
+          String value = sn.getAttributeValue(DocumentFilterConfig.ATTRIBUTE_VALUE);
+          minDate = new Long(value);
         }
       }
+      this.minDate = minDate;
       this.minLength = minLength;
       this.lengthCutoff = lengthCutoff;
-      fillSet(this.extensions, extensions);
-      fillSet(this.mimeTypes, mimeTypes);
+      this.extensions = fillSet(extensions);
+      this.mimeTypes = fillSet(mimeTypes);
     }
     
     public String toPackedString() {
@@ -397,22 +494,34 @@ public class DocumentFilter extends org.
       }
       
       // Mime types
-      String[] mimeTypes = new String[this.mimeTypes.size()];
-      i = 0;
-      for (String mimeType : this.mimeTypes) {
-        mimeTypes[i++] = mimeType;
+      if (this.mimeTypes == null)
+        sb.append('-');
+      else
+      {
+        sb.append('+');
+        String[] mimeTypes = new String[this.mimeTypes.size()];
+        i = 0;
+        for (String mimeType : this.mimeTypes) {
+          mimeTypes[i++] = mimeType;
+        }
+        java.util.Arrays.sort(mimeTypes);
+        packList(sb,mimeTypes,'+');
       }
-      java.util.Arrays.sort(mimeTypes);
-      packList(sb,mimeTypes,'+');
       
       // Extensions
-      String[] extensions = new String[this.extensions.size()];
-      i = 0;
-      for (String extension : this.extensions) {
-        extensions[i++] = extension;
+      if (this.extensions == null)
+        sb.append('-');
+      else
+      {
+        sb.append('+');
+        String[] extensions = new String[this.extensions.size()];
+        i = 0;
+        for (String extension : this.extensions) {
+          extensions[i++] = extension;
+        }
+        java.util.Arrays.sort(extensions);
+        packList(sb,extensions,'+');
       }
-      java.util.Arrays.sort(extensions);
-      packList(sb,extensions,'+');
 
       // Min length
       if (minLength == null)
@@ -422,6 +531,14 @@ public class DocumentFilter extends org.
         pack(sb,minLength.toString(),'+');
       }
       
+      // Min date
+      if (minDate == null)
+        sb.append('-');
+      else {
+        sb.append('+');
+        pack(sb,minDate.toString(),'+');
+      }
+
       return sb.toString();
     }
     
@@ -433,9 +550,17 @@ public class DocumentFilter extends org.
       return true;
     }
     
+    public boolean checkDate(Date date) {
+      if (minDate != null && date != null && date.getTime() < minDate)
+        return false;
+      return true;
+    }
+    
     public boolean checkMimeType(String mimeType) {
       if (mimeType == null)
         mimeType = "application/unknown";
+      if (mimeTypes == null)
+        return true;
       return mimeTypes.contains(mimeType.toLowerCase(Locale.ROOT));
     }
     
@@ -453,6 +578,8 @@ public class DocumentFilter extends org.
       }
       if (extension == null || extension.length() == 0)
         extension = ".";
+      if (extensions == null)
+        return true;
       return extensions.contains(extension.toLowerCase(Locale.ROOT));
     }
     

Modified: manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java?rev=1630623&r1=1630622&r2=1630623&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java (original)
+++ manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java Fri Oct 10 00:11:30 2014
@@ -29,10 +29,11 @@ public class DocumentFilterConfig {
   public static final String NODE_MINLENGTH = "minlength";
   public static final String MINLENGTH_DEFAULT = "0";
   public static final String NODE_MAXLENGTH = "maxlength";
-  public static final String MAXLENGTH_DEFAULT = "16777216";
+  public static final String MAXLENGTH_DEFAULT = "1000000000";
   public static final String NODE_MIMETYPES = "mimetypes";
   public static final String MIMETYPES_DEFAULT =
-                        "application/msword\n"
+                        "*\n"
+                        + "application/msword\n"
 		        + "application/vnd.ms-excel\n"
 		        + "application/vnd.openxmlformats-officedocument.wordprocessingml.document\n"
 		        + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\n"
@@ -48,8 +49,9 @@ public class DocumentFilterConfig {
 		        + "application/x-bittorrent";
   public static final String NODE_EXTENSIONS = "extensions";
   public static final String EXTENSIONS_DEFAULT =
-                    "doc\n" + "docx\n" + "xls\n" + "xlsx\n" + "ppt\n" + "pptx\n"
+                    "*\n" + "log\n" + "doc\n" + "docx\n" + "xls\n" + "xlsx\n" + "ppt\n" + "pptx\n"
 		    + "html\n" + "pdf\n" + "odt\n" + "ods\n" + "rtf\n" + "txt\n" + "mp3\n"
 		    + "mp4\n" + "wav\n" + "ogg\n" + "flac\n" + "torrent";
+  public static final String NODE_MINDATE = "mindate";
   public static final String ATTRIBUTE_VALUE = "value";
 }

Modified: manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_en_US.properties?rev=1630623&r1=1630622&r2=1630623&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_en_US.properties (original)
+++ manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_en_US.properties Fri Oct 10 00:11:30 2014
@@ -18,3 +18,17 @@ DocumentFilter.MinFileSizeBytesColon=Min
 DocumentFilter.MaxFileSizeBytesColon=Max file size (bytes):
 DocumentFilter.AllowedMIMETypesColon=Allowed MIME types:
 DocumentFilter.AllowedFileExtensionsColon=Allowed file extensions:
+
+DocumentFilter.MinDateColon=Minimum document date:
+DocumentFilter.Month_0=January
+DocumentFilter.Month_1=February
+DocumentFilter.Month_2=March
+DocumentFilter.Month_3=April
+DocumentFilter.Month_4=May
+DocumentFilter.Month_5=June
+DocumentFilter.Month_6=July
+DocumentFilter.Month_7=August
+DocumentFilter.Month_8=September
+DocumentFilter.Month_9=October
+DocumentFilter.Month_10=November
+DocumentFilter.Month_11=December

Modified: manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_ja_JP.properties?rev=1630623&r1=1630622&r2=1630623&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_ja_JP.properties (original)
+++ manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_ja_JP.properties Fri Oct 10 00:11:30 2014
@@ -18,3 +18,17 @@ DocumentFilter.MinFileSizeBytesColon=æ�
 DocumentFilter.MaxFileSizeBytesColon=最大ファイルサイズ (バイト):
 DocumentFilter.AllowedMIMETypesColon=利用可能なMIMEタイプ:
 DocumentFilter.AllowedFileExtensionsColon=利用可能なファイル拡張子:
+
+DocumentFilter.MinDateColon=Minimum document date:
+DocumentFilter.Month_0=January
+DocumentFilter.Month_1=February
+DocumentFilter.Month_2=March
+DocumentFilter.Month_3=April
+DocumentFilter.Month_4=May
+DocumentFilter.Month_5=June
+DocumentFilter.Month_6=July
+DocumentFilter.Month_7=August
+DocumentFilter.Month_8=September
+DocumentFilter.Month_9=October
+DocumentFilter.Month_10=November
+DocumentFilter.Month_11=December

Modified: manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_zh_CN.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_zh_CN.properties?rev=1630623&r1=1630622&r2=1630623&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_zh_CN.properties (original)
+++ manifoldcf/trunk/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_zh_CN.properties Fri Oct 10 00:11:30 2014
@@ -18,3 +18,17 @@ DocumentFilter.MinFileSizeBytesColon=æ�
 DocumentFilter.MaxFileSizeBytesColon=最大文件大小(字节):
 DocumentFilter.AllowedMIMETypesColon=可利用的MIME类型:
 DocumentFilter.AllowedFileExtensionsColon=可利用的文件扩展名:
+
+DocumentFilter.MinDateColon=Minimum document date:
+DocumentFilter.Month_0=January
+DocumentFilter.Month_1=February
+DocumentFilter.Month_2=March
+DocumentFilter.Month_3=April
+DocumentFilter.Month_4=May
+DocumentFilter.Month_5=June
+DocumentFilter.Month_6=July
+DocumentFilter.Month_7=August
+DocumentFilter.Month_8=September
+DocumentFilter.Month_9=October
+DocumentFilter.Month_10=November
+DocumentFilter.Month_11=December

Modified: manifoldcf/trunk/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/editSpecification_Contents.html
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/editSpecification_Contents.html?rev=1630623&r1=1630622&r2=1630623&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/editSpecification_Contents.html (original)
+++ manifoldcf/trunk/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/editSpecification_Contents.html Fri Oct 10 00:11:30 2014
@@ -19,6 +19,61 @@
 
 <table class="displaytable">
   <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('DocumentFilter.MinDateColon'))</nobr></td>
+    <td class="value">
+      <nobr>
+        <select name="s${SEQNUM}_mindateyear" size="5">
+  #foreach ($year in [1969..2020])
+    #if ($MINDATEYEAR == $year)
+          <option value="$year" selected="true">$year</option>
+    #else
+          <option value="$year">$year</option>
+    #end
+  #end
+        </select>
+        <select name="s${SEQNUM}_mindatemonth" size="5">
+  #foreach ($month in [0..11])
+    #set ($monthkey = "DocumentFilter.Month_${month}")
+    #if ($MINDATEMONTH == $month)
+          <option value="$month" selected="true">$Encoder.bodyEscape($ResourceBundle.getString($monthkey))</option>
+    #else
+          <option value="$month">$Encoder.bodyEscape($ResourceBundle.getString($monthkey))</option>
+    #end
+  #end
+        </select>
+        <select name="s${SEQNUM}_mindateday" size="5">
+  #foreach ($day in [1..31])
+    #set ($dayvalue = $day)
+    #if ($MINDATEDAY == $day)
+          <option value="$day" selected="true">$dayvalue</option>
+    #else
+          <option value="$day">$dayvalue</option>
+    #end
+  #end
+        </select>
+        <select name="s${SEQNUM}_mindatehour" size="5">
+  #foreach ($hour in [0..23])
+    #if ($MINDATEHOUR == $hour)
+          <option value="$hour" selected="true">$hour</option>
+    #else
+          <option value="$hour">$hour</option>
+    #end
+  #end
+        </select>
+        :
+        <select name="s${SEQNUM}_mindateminute" size="5">
+  #foreach ($minute in ["00","01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40","41","42","43","44","45","46","47","48","49","50","51","52","53","54","55","56","57","58","59"])
+    #if ($MINDATEMINUTE == $minute)
+          <option value="$minute" selected="true">$minute</option>
+    #else
+          <option value="$minute">$minute</option>
+    #end
+  #end
+        </select>
+      </nobr>
+    </td>
+  </tr>
+  <tr>
     <td class="description">
       <nobr>$Encoder.bodyEscape($ResourceBundle.getString('DocumentFilter.MinFileSizeBytesColon'))</nobr>
     </td>
@@ -50,6 +105,12 @@
 
 #else
 
+<input type="hidden" name="s${SEQNUM}_mindateyear" value="$MINDATEYEAR" />
+<input type="hidden" name="s${SEQNUM}_mindatemonth" value="$MINDATEMONTH" />
+<input type="hidden" name="s${SEQNUM}_mindateday" value="$MINDATEDAY" />
+<input type="hidden" name="s${SEQNUM}_mindatehour" value="$MINDATEHOUR" />
+<input type="hidden" name="s${SEQNUM}_mindateminute" value="$MINDATEMINUTE" />
+
 <input type="hidden" name="s${SEQNUM}_minfilesize" value="$Encoder.attributeEscape($MINFILESIZE)" />
 <input type="hidden" name="s${SEQNUM}_maxfilesize" value="$Encoder.attributeEscape($MAXFILESIZE)" />
 <input type="hidden" name="s${SEQNUM}_mimetypes" value="$Encoder.attributeEscape($MIMETYPES)" />

Modified: manifoldcf/trunk/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/viewSpecification.html
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/viewSpecification.html?rev=1630623&r1=1630622&r2=1630623&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/viewSpecification.html (original)
+++ manifoldcf/trunk/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/viewSpecification.html Fri Oct 10 00:11:30 2014
@@ -15,8 +15,17 @@
  limitations under the License.
 -->
 
+#set ($month = "DocumentFilter.Month_${MINDATEMONTH}")
 <table class="displaytable">
   <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('DocumentFilter.MinDateColon'))</nobr></td>
+    <td class="value">
+      <nobr>
+        $MINDATEYEAR $Encoder.bodyEscape($ResourceBundle.getString($month)) $MINDATEDAY $MINDATEHOUR:$MINDATEMINUTE
+      </nobr>
+    </td>
+  </tr>
+  <tr>
     <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('DocumentFilter.MinFileSizeBytesColon'))</nobr></td>
     <td class="value">$Encoder.bodyEscape($MINFILESIZE)</td>
   </tr>

Modified: manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java?rev=1630623&r1=1630622&r2=1630623&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java (original)
+++ manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java Fri Oct 10 00:11:30 2014
@@ -298,31 +298,23 @@ public class FileConnector extends org.a
           String versionString;
           String convertPath;
           long fileLength = file.length();
-          if (activities.checkLengthIndexable(fileLength))
-          {
-            // Get the file's modified date.
-            long lastModified = file.lastModified();
+          // Get the file's modified date.
+          long lastModified = file.lastModified();
             
-            // Check if the path is to be converted.  We record that info in the version string so that we'll reindex documents whose
-            // URI's change.
-            convertPath = findConvertPath(spec, file);
-            StringBuilder sb = new StringBuilder();
-            if (convertPath != null)
-            {
-              // Record the path.
-              sb.append("+");
-              pack(sb,convertPath,'+');
-            }
-            else
-              sb.append("-");
-            sb.append(new Long(lastModified).toString()).append(":").append(new Long(fileLength).toString());
-            versionString = sb.toString();
-          }
-          else
+          // Check if the path is to be converted.  We record that info in the version string so that we'll reindex documents whose
+          // URI's change.
+          convertPath = findConvertPath(spec, file);
+          StringBuilder sb = new StringBuilder();
+          if (convertPath != null)
           {
-            activities.deleteDocument(documentIdentifier);
-            continue;
+            // Record the path.
+            sb.append("+");
+            pack(sb,convertPath,'+');
           }
+          else
+            sb.append("-");
+          sb.append(new Long(lastModified).toString()).append(":").append(new Long(fileLength).toString());
+          versionString = sb.toString();
     
           if (activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
           {
@@ -345,6 +337,7 @@ public class FileConnector extends org.a
               {
                 Logging.connectors.debug("Skipping file '"+documentIdentifier+"' because length was excluded by output connector.");
                 activities.noDocument(documentIdentifier,versionString);
+                activities.recordActivity(null,ACTIVITY_READ,null,documentIdentifier,"FILETOOLONG","Document rejected because of length",null);
                 continue;
               }
               
@@ -352,6 +345,7 @@ public class FileConnector extends org.a
               {
                 Logging.connectors.debug("Skipping file '"+documentIdentifier+"' because URL was excluded by output connector.");
                 activities.noDocument(documentIdentifier,versionString);
+                activities.recordActivity(null,ACTIVITY_READ,null,documentIdentifier,"URLREJECTED","Document rejected because of URL",null);
                 continue;
               }
               
@@ -359,6 +353,7 @@ public class FileConnector extends org.a
               {
                 Logging.connectors.debug("Skipping file '"+documentIdentifier+"' because date ("+modifiedDate+") was excluded by output connector.");
                 activities.noDocument(documentIdentifier,versionString);
+                activities.recordActivity(null,ACTIVITY_READ,null,documentIdentifier,"DATEREJECTED","Document rejected because of date",null);
                 continue;
               }
               
@@ -366,6 +361,7 @@ public class FileConnector extends org.a
               {
                 Logging.connectors.debug("Skipping file '"+documentIdentifier+"' because mime type ('"+mimeType+"') was excluded by output connector.");
                 activities.noDocument(documentIdentifier,versionString);
+                activities.recordActivity(null,ACTIVITY_READ,null,documentIdentifier,"MIMETYPEREJECTED","Document rejected because of mime type",null);
                 continue;
               }