You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/02/08 12:24:27 UTC
svn commit: r1443940 - in
/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml:
BOMEncodingDetector.java SingleByteReceiver.java SingleCharacterReceiver.java
Author: kwright
Date: Fri Feb 8 11:24:26 2013
New Revision: 1443940
URL: http://svn.apache.org/r1443940
Log:
Finish BOMEncodingDetector.
Modified:
manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMEncodingDetector.java
manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java
manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java
Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMEncodingDetector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMEncodingDetector.java?rev=1443940&r1=1443939&r2=1443940&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMEncodingDetector.java (original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMEncodingDetector.java Fri Feb 8 11:24:26 2013
@@ -20,6 +20,7 @@ package org.apache.manifoldcf.core.fuzzy
import org.apache.manifoldcf.core.interfaces.*;
import java.util.*;
+import java.io.*;
/** This class represents the parse state of the BOM (byte order mark) parser.
* The byte order mark parser looks for a byte order mark at the start of a byte sequence,
@@ -120,6 +121,7 @@ public class BOMEncodingDetector extends
if (theByte == 0xff)
{
// UTF-16BE detected
+ mark();
return establishEncoding("UTF-16BE");
}
else
@@ -136,6 +138,7 @@ public class BOMEncodingDetector extends
if (theByte == 0xbf)
{
// Encoding detected as utf-8
+ mark();
return establishEncoding("UTF-8");
}
else
@@ -149,6 +152,7 @@ public class BOMEncodingDetector extends
else
{
// Encoding detected as UTF-16LE
+ mark();
return establishEncoding("UTF-16LE");
}
break;
@@ -163,17 +167,21 @@ public class BOMEncodingDetector extends
case BOM_SEEN_FFFE00:
if (theByte == 0x00)
{
+ mark();
return establishEncoding("UTF-32LE");
}
else
{
- resetToMark();
+ // Leave mark alone.
return establishEncoding("UTF-16LE");
}
case BOM_SEEN_0000FE:
if (theByte == 0xff)
+ {
+ mark();
return establishEncoding("UTF-32BE");
+ }
else
return replay();
@@ -190,21 +198,14 @@ public class BOMEncodingDetector extends
throws ManifoldCFException
{
this.encoding = encoding;
- return playFromCurrentPoint();
+ return true;
}
/** Set a "mark".
*/
protected void mark()
{
- // MHL
- }
-
- /** Reset the "stream" to the last saved mark.
- */
- protected void resetToMark()
- {
- // MHL
+ replayBuffer.clear();
}
/** Establish NO encoding, and replay from the current saved point to the child, if any.
@@ -212,8 +213,7 @@ public class BOMEncodingDetector extends
protected boolean replay()
throws ManifoldCFException
{
- resetToMark();
- return playFromCurrentPoint();
+ return true;
}
/** Send stream from current point onward with the current encoding.
@@ -221,8 +221,30 @@ public class BOMEncodingDetector extends
protected boolean playFromCurrentPoint()
throws ManifoldCFException
{
- // MHL
+ mark();
return true;
}
+ /** Deal with the remainder of the input.
+ * This is called only when dealWithByte() returns true.
+ *@param buffer is the buffer of characters that should come first.
+ *@param offset is the offset within the buffer of the first character.
+ *@param len is the number of characters in the buffer.
+ *@param inputStream is the stream that should come after the characters in the buffer.
+ *@return true to abort, false if the end of the stream has been reached.
+ */
+ @Override
+ protected boolean dealWithRemainder(byte[] buffer, int offset, int len, InputStream inputStream)
+ throws IOException, ManifoldCFException
+ {
+ if (overflowByteReceiver == null)
+ return super.dealWithRemainder(buffer,offset,len,inputStream);
+ // Create a wrapped input stream with all the missing bytes
+ while (len > 0)
+ {
+ replayBuffer.appendByte(buffer[offset++]);
+ }
+ return overflowByteReceiver.dealWithBytes(new PrefixedInputStream(replayBuffer,inputStream));
+ }
+
}
Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java?rev=1443940&r1=1443939&r2=1443940&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java (original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java Fri Feb 8 11:24:26 2013
@@ -48,7 +48,9 @@ public abstract class SingleByteReceiver
for (int i = 0; i < amt; i++)
{
if (dealWithByte(byteBuffer[i]))
- return true;
+ {
+ return dealWithRemainder(byteBuffer,i+1,amt-(i+1),inputStream);
+ }
}
}
}
@@ -59,5 +61,18 @@ public abstract class SingleByteReceiver
public abstract boolean dealWithByte(byte b)
throws IOException, ManifoldCFException;
-
+ /** Deal with the remainder of the input.
+ * This is called only when dealWithByte() returns true.
+ *@param buffer is the buffer of characters that should come first.
+ *@param offset is the offset within the buffer of the first character.
+ *@param len is the number of characters in the buffer.
+ *@param inputStream is the stream that should come after the characters in the buffer.
+ *@return true to abort, false if the end of the stream has been reached.
+ */
+ protected boolean dealWithRemainder(byte[] buffer, int offset, int len, InputStream inputStream)
+ throws IOException, ManifoldCFException
+ {
+ return true;
+ }
+
}
Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java?rev=1443940&r1=1443939&r2=1443940&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java (original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java Fri Feb 8 11:24:26 2013
@@ -49,7 +49,9 @@ public abstract class SingleCharacterRec
for (int i = 0; i < amt; i++)
{
if (dealWithCharacter(charBuffer[i]))
- return true;
+ {
+ return dealWithRemainder(charBuffer, i+1, amt-(i+1), reader);
+ }
}
}
}
@@ -60,4 +62,18 @@ public abstract class SingleCharacterRec
public abstract boolean dealWithCharacter(char c)
throws IOException, ManifoldCFException;
+ /** Deal with the remainder of the input.
+ * This is called only when dealWithCharacter() returns true.
+ *@param buffer is the buffer of characters that should come first.
+ *@param offset is the offset within the buffer of the first character.
+ *@param len is the number of characters in the buffer.
+ *@param inputStream is the stream that should come after the characters in the buffer.
+ *@return true to abort, false if the end of the stream has been reached.
+ */
+ protected boolean dealWithRemainder(char[] buffer, int offset, int len, Reader reader)
+ throws IOException, ManifoldCFException
+ {
+ return true;
+ }
+
}