You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2011/09/05 18:06:09 UTC
[lucy-commits] svn commit: r1165345 - /incubator/lucy/trunk/core/Lucy/Util/UTF8Decoder.java
Author: marvin
Date: Mon Sep 5 16:06:09 2011
New Revision: 1165345
URL: http://svn.apache.org/viewvc?rev=1165345&view=rev
Log:
Strip out extraneous materials.
Modified:
incubator/lucy/trunk/core/Lucy/Util/UTF8Decoder.java
Modified: incubator/lucy/trunk/core/Lucy/Util/UTF8Decoder.java
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Util/UTF8Decoder.java?rev=1165345&r1=1165344&r2=1165345&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Util/UTF8Decoder.java (original)
+++ incubator/lucy/trunk/core/Lucy/Util/UTF8Decoder.java Mon Sep 5 16:06:09 2011
@@ -17,24 +17,6 @@
*/
-package org.apache.batik.util.io;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-/**
- * This class represents an object which decodes UTF-8 characters from
- * a stream of bytes.
- *
- * @author <a href="mailto:stephane@hillion.org">Stephane Hillion</a>
- * @version $Id$
- */
-public class UTF8Decoder extends AbstractCharDecoder {
-
- /**
- * The number of bytes of a UTF-8 sequence indexed by the first
- * byte of the sequence.
- */
protected static final byte[] UTF8_BYTES = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -46,104 +28,3 @@ public class UTF8Decoder extends Abstrac
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
};
- /**
- * The next char, in case of a 4 bytes sequence.
- */
- protected int nextChar = -1;
-
- /**
- * Creates a new UTF8Decoder.
- */
- public UTF8Decoder(InputStream is) {
- super(is);
- }
-
- /**
- * Reads the next character.
- * @return a character or END_OF_STREAM.
- */
- public int readChar() throws IOException {
- if (nextChar != -1) {
- int result = nextChar;
- nextChar = -1;
- return result;
- }
- if (position == count) {
- fillBuffer();
- }
- if (count == -1) {
- return END_OF_STREAM;
- }
- int b1 = buffer[position++] & 0xff;
- switch (UTF8_BYTES[b1]) {
- default:
- charError("UTF-8");
-
- case 1:
- return b1;
-
- case 2:
- if (position == count) {
- fillBuffer();
- }
- if (count == -1) {
- endOfStreamError("UTF-8");
- }
- return ((b1 & 0x1f) << 6) | (buffer[position++] & 0x3f);
-
- case 3:
- if (position == count) {
- fillBuffer();
- }
- if (count == -1) {
- endOfStreamError("UTF-8");
- }
- int b2 = buffer[position++];
- if (position == count) {
- fillBuffer();
- }
- if (count == -1) {
- endOfStreamError("UTF-8");
- }
- int b3 = buffer[position++];
- if ((b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80) {
- charError("UTF-8");
- }
- return ((b1 & 0x1f) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x1f);
-
- case 4:
- if (position == count) {
- fillBuffer();
- }
- if (count == -1) {
- endOfStreamError("UTF-8");
- }
- b2 = buffer[position++];
- if (position == count) {
- fillBuffer();
- }
- if (count == -1) {
- endOfStreamError("UTF-8");
- }
- b3 = buffer[position++];
- if (position == count) {
- fillBuffer();
- }
- if (count == -1) {
- endOfStreamError("UTF-8");
- }
- int b4 = buffer[position++];
- if ((b2 & 0xc0) != 0x80 ||
- (b3 & 0xc0) != 0x80 ||
- (b4 & 0xc0) != 0x80) {
- charError("UTF-8");
- }
- int c = ((b1 & 0x1f) << 18)
- | ((b2 & 0x3f) << 12)
- | ((b3 & 0x1f) << 6)
- | (b4 & 0x1f);
- nextChar = (c - 0x10000) % 0x400 + 0xdc00;
- return (c - 0x10000) / 0x400 + 0xd800;
- }
- }
-}