You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/02/06 22:48:52 UTC

svn commit: r1443233 - in /manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml: PretagParseState.java SingleByteReceiver.java SingleCharacterReceiver.java TagParseState.java

Author: kwright
Date: Wed Feb  6 21:48:52 2013
New Revision: 1443233

URL: http://svn.apache.org/viewvc?rev=1443233&view=rev
Log:
Code for hooking up passthrough into second level CharacterReceiver

Modified:
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java

Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java?rev=1443233&r1=1443232&r2=1443233&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java (original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java Wed Feb  6 21:48:52 2013
@@ -21,6 +21,7 @@ package org.apache.manifoldcf.core.fuzzy
 import org.apache.manifoldcf.core.interfaces.*;
 import org.apache.manifoldcf.core.system.Logging;
 import java.util.*;
+import java.io.*;
 
 /** This class represents the ability to parse <?...?> preamble tags.
 */
@@ -39,10 +40,10 @@ public class PretagParseState extends Si
   protected static final int PRETAGPARSESTATE_IN_ATTR_LOOKING_FOR_VALUE = 12;
   protected static final int PRETAGPARSESTATE_IN_SINGLE_QUOTES_ATTR_VALUE = 13;
   protected static final int PRETAGPARSESTATE_IN_DOUBLE_QUOTES_ATTR_VALUE = 14;
-  protected static final int PRETAGPARSESTATE_POST = 15;
 
   protected int currentState = PRETAGPARSESTATE_NORMAL;
-
+  protected boolean passThrough = false;
+  
   protected StringBuilder currentTagNameBuffer = null;
   protected StringBuilder currentAttrNameBuffer = null;
   protected StringBuilder currentValueBuffer = null;
@@ -50,6 +51,7 @@ public class PretagParseState extends Si
   protected String currentTagName = null;
   protected String currentAttrName = null;
   protected Map<String,String> currentAttrMap = null;
+  protected final CharacterBuffer charBuffer = new CharacterBuffer();
 
   protected static final Map<String,String> mapLookup = new HashMap<String,String>();
   static
@@ -71,13 +73,42 @@ public class PretagParseState extends Si
     this.postPreambleReceiver = postPreambleReceiver;
   }
 
-  /** Receive a byte.
+  /** Receive a set of characters; process one chunk worth.
+  *@return true if done.
+  */
+  @Override
+  public boolean dealWithCharacters()
+    throws IOException, ManifoldCFException
+  {
+    if (passThrough)
+    {
+      if (postPreambleReceiver == null)
+        return true;
+      return postPreambleReceiver.dealWithCharacters();
+    }
+    return super.dealWithCharacters();
+  }
+
+  /** Receive a character.
   * @return true if done.
   */
   @Override
   public boolean dealWithCharacter(char c)
-    throws ManifoldCFException
+    throws IOException, ManifoldCFException
   {
+    c = Character.toLowerCase(c);
+    if (currentState == PRETAGPARSESTATE_NORMAL && isWhitespace(c))
+      return false;
+    if (currentState == PRETAGPARSESTATE_NORMAL && c != '<' ||
+      currentState == PRETAGPARSESTATE_SAWLEFTBRACKET && c != '?' && c != '!')
+    {
+      // Initialize the post preamble receiver with a wrapped reader
+      if (postPreambleReceiver == null)
+        return true;
+      postPreambleReceiver.setReader(new PrefixedReader(charBuffer,reader));
+      passThrough = true;
+      return false;
+    }
     // MHL
     return true;
   }
@@ -88,5 +119,10 @@ public class PretagParseState extends Si
     Logging.misc.debug(" Saw pretag '"+tagName+"'");
   }
 
+  /** Is a character markup language whitespace? */
+  protected static boolean isWhitespace(char x)
+  {
+    return x <= ' ';
+  }
 
 }

Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java?rev=1443233&r1=1443232&r2=1443233&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java (original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java Wed Feb  6 21:48:52 2013
@@ -55,7 +55,7 @@ public abstract class SingleByteReceiver
   *@return true to stop further processing.
   */
   public abstract boolean dealWithByte(byte b)
-    throws ManifoldCFException;
+    throws IOException, ManifoldCFException;
 
 
 }

Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java?rev=1443233&r1=1443232&r2=1443233&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java (original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java Wed Feb  6 21:48:52 2013
@@ -56,6 +56,6 @@ public abstract class SingleCharacterRec
   * @return true if done.
   */
   public abstract boolean dealWithCharacter(char c)
-    throws ManifoldCFException;
+    throws IOException, ManifoldCFException;
   
 }

Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java?rev=1443233&r1=1443232&r2=1443233&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java (original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java Wed Feb  6 21:48:52 2013
@@ -96,7 +96,7 @@ public class TagParseState extends Singl
       {
         currentState = TAGPARSESTATE_IN_TAG_NAME;
         currentTagNameBuffer = new StringBuilder();
-        if (!isHTMLWhitespace(thisChar))
+        if (!isWhitespace(thisChar))
           currentTagNameBuffer.append(thisCharLower);
       }
       break;
@@ -130,7 +130,7 @@ public class TagParseState extends Singl
         currentState = TAGPARSESTATE_IN_COMMENT;
       break;
     case TAGPARSESTATE_IN_TAG_NAME:
-      if (isHTMLWhitespace(thisChar))
+      if (isWhitespace(thisChar))
       {
         if (currentTagNameBuffer.length() > 0)
         {
@@ -178,7 +178,7 @@ public class TagParseState extends Singl
         currentTagNameBuffer.append(thisCharLower);
       break;
     case TAGPARSESTATE_IN_ATTR_NAME:
-      if (isHTMLWhitespace(thisChar))
+      if (isWhitespace(thisChar))
       {
         if (currentAttrNameBuffer.length() > 0)
         {
@@ -253,7 +253,7 @@ public class TagParseState extends Singl
         currentAttrName = null;
         noteTag(currentTagName,currentAttrMap);
       }
-      else if (!isHTMLWhitespace(thisChar))
+      else if (!isWhitespace(thisChar))
       {
         currentAttrMap.put(currentAttrName,"");
         currentState = TAGPARSESTATE_IN_ATTR_NAME;
@@ -267,7 +267,7 @@ public class TagParseState extends Singl
         currentState = TAGPARSESTATE_IN_SINGLE_QUOTES_ATTR_VALUE;
       else if (thisChar == '"')
         currentState = TAGPARSESTATE_IN_DOUBLE_QUOTES_ATTR_VALUE;
-      else if (!isHTMLWhitespace(thisChar))
+      else if (!isWhitespace(thisChar))
       {
         currentState = TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE;
         currentValueBuffer.append(thisChar);
@@ -283,7 +283,7 @@ public class TagParseState extends Singl
       }
       break;
     case TAGPARSESTATE_IN_END_TAG_NAME:
-      if (isHTMLWhitespace(thisChar))
+      if (isWhitespace(thisChar))
       {
         if (currentTagNameBuffer != null && currentTagNameBuffer.length() > 0)
         {
@@ -334,7 +334,7 @@ public class TagParseState extends Singl
         currentValueBuffer.append(thisChar);
       break;
     case TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE:
-      if (isHTMLWhitespace(thisChar))
+      if (isWhitespace(thisChar))
       {
         currentAttrMap.put(currentAttrName,attributeDecode(currentValueBuffer.toString()));
         currentAttrName = null;
@@ -440,8 +440,8 @@ public class TagParseState extends Singl
       return mapLookup.get(input);
   }
 
-  /** Is a character HTML whitespace? */
-  protected static boolean isHTMLWhitespace(char x)
+  /** Is a character markup language whitespace? */
+  protected static boolean isWhitespace(char x)
   {
     return x <= ' ';
   }