You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by mi...@apache.org on 2011/11/08 20:07:07 UTC

svn commit: r1199387 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecog_sbcs.java

Author: mikemccand
Date: Tue Nov  8 19:07:07 2011
New Revision: 1199387

URL: http://svn.apache.org/viewvc?rev=1199387&view=rev
Log:
TIKA-529: don't allocate byte[] for each byte when detecting IBM420 charset

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecog_sbcs.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecog_sbcs.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecog_sbcs.java?rev=1199387&r1=1199386&r2=1199387&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecog_sbcs.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecog_sbcs.java Tue Nov  8 19:07:07 2011
@@ -1297,11 +1297,12 @@ abstract class CharsetRecog_sbcs extends
         }
         
         private boolean isLamAlef(byte b) {
-            byte shapedLamAlef[] = {(byte)0xb2,(byte)0xb3,(byte)0xb4,(byte)0xb5,(byte)0xb7,(byte)0xb8 };
-            for (int i = 0; i<shapedLamAlef.length; i++)
-                if (b == shapedLamAlef[i])
-                    return true;
-            return false;
+            // Return true if byte is any of these:
+            //
+            //   {(byte)0xb2,(byte)0xb3,(byte)0xb4,(byte)0xb5,(byte)0xb7,(byte)0xb8}
+            // 
+            // NOTE: 0xb2 is -78; 0xb8 is -72:
+            return (b <= (byte)0xb8) && (b >= (byte)0xb2) && (b != (byte)0xb6);
         }
         
         protected void matchFinish(CharsetDetector det) {