You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hc.apache.org by ol...@apache.org on 2014/08/05 10:15:47 UTC

svn commit: r1615885 - in /httpcomponents/httpcore/trunk/httpcore/src: main/java/org/apache/http/message/TokenParser.java test/java/org/apache/http/message/TestTokenParser.java

Author: olegk
Date: Tue Aug  5 08:15:47 2014
New Revision: 1615885

URL: http://svn.apache.org/r1615885
Log:
New token parser implementation partially based on Mime4J code (written by me)

Added:
    httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java   (with props)
    httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java   (with props)

Added: httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java
URL: http://svn.apache.org/viewvc/httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java?rev=1615885&view=auto
==============================================================================
--- httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java (added)
+++ httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java Tue Aug  5 08:15:47 2014
@@ -0,0 +1,257 @@
+/*
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ *
+ */
+
+package org.apache.http.message;
+
+import java.util.BitSet;
+
+import org.apache.http.util.CharArrayBuffer;
+
+/**
+ * Low level parser for header field elements. The parsing routines of this class are designed
+ * to produce near zero intermediate garbage and make no intermediate copies of input data.
+ * <p>
+ * This class is immutable and thread safe.
+ */
+public class TokenParser {
+
+    public static BitSet INIT_BITSET(final int ... b) {
+        final BitSet bitset = new BitSet(b.length);
+        for (final int aB : b) {
+            bitset.set(aB);
+        }
+        return bitset;
+    }
+
+    /** US-ASCII CR, carriage return (13) */
+    public static final int CR = '\r';
+
+    /** US-ASCII LF, line feed (10) */
+    public static final int LF = '\n';
+
+    /** US-ASCII SP, space (32) */
+    public static final int SP = ' ';
+
+    /** US-ASCII HT, horizontal-tab (9) */
+    public static final int HT = '\t';
+
+    public static boolean isWhitespace(final char ch) {
+        return ch == SP || ch == HT || ch == CR || ch == LF;
+    }
+
+    public static final TokenParser INSTANCE = new TokenParser();
+
+    /**
+     * Extracts from the sequence of bytes a token terminated with any of the given delimiters
+     * discarding semantically insignificant whitespace characters.
+     *
+     * @param buf buffer with the sequence of bytes to be parsed
+     * @param cursor defines the bounds and current position of the buffer
+     * @param delimiters set of delimiting characters. Can be <code>null</code> if the token
+     *  is not delimited by any character.
+     */
+    public String parseToken(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {
+        final StringBuilder dst = new StringBuilder();
+        boolean whitespace = false;
+        while (!cursor.atEnd()) {
+            final char current = buf.charAt(cursor.getPos());
+            if (delimiters != null && delimiters.get(current)) {
+                break;
+            } else if (isWhitespace(current)) {
+                skipWhiteSpace(buf, cursor);
+                whitespace = true;
+            } else {
+                if (dst.length() > 0 && whitespace) {
+                    dst.append(' ');
+                }
+                copyContent(buf, cursor, delimiters, dst);
+                whitespace = false;
+            }
+        }
+        return dst.toString();
+    }
+
+    /**
+     * Extracts from the sequence of bytes a value which can be enclosed in quote marks and
+     * terminated with any of the given delimiters discarding semantically insignificant
+     * whitespace characters.
+     *
+     * @param buf buffer with the sequence of bytes to be parsed
+     * @param cursor defines the bounds and current position of the buffer
+     * @param delimiters set of delimiting characters. Can be <code>null</code> if the value
+     *  is not delimited by any character.
+     */
+    public String parseValue(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {
+        final StringBuilder dst = new StringBuilder();
+        boolean whitespace = false;
+        while (!cursor.atEnd()) {
+            final char current = buf.charAt(cursor.getPos());
+            if (delimiters != null && delimiters.get(current)) {
+                break;
+            } else if (isWhitespace(current)) {
+                skipWhiteSpace(buf, cursor);
+                whitespace = true;
+            } else if (current == '\"') {
+                if (dst.length() > 0 && whitespace) {
+                    dst.append(' ');
+                }
+                copyQuotedContent(buf, cursor, dst);
+                whitespace = false;
+            } else {
+                if (dst.length() > 0 && whitespace) {
+                    dst.append(' ');
+                }
+                copyUnquotedContent(buf, cursor, delimiters, dst);
+                whitespace = false;
+            }
+        }
+        return dst.toString();
+    }
+
+    /**
+     * Skips semantically insignificant whitespace characters and moves the cursor to the closest
+     * non-whitespace character.
+     *
+     * @param buf buffer with the sequence of bytes to be parsed
+     * @param cursor defines the bounds and current position of the buffer
+     */
+    public void skipWhiteSpace(final CharArrayBuffer buf, final ParserCursor cursor) {
+        int pos = cursor.getPos();
+        final int indexFrom = cursor.getPos();
+        final int indexTo = cursor.getUpperBound();
+        for (int i = indexFrom; i < indexTo; i++) {
+            final char current = buf.charAt(i);
+            if (!isWhitespace(current)) {
+                break;
+            } else {
+                pos++;
+            }
+        }
+        cursor.updatePos(pos);
+    }
+
+    /**
+     * Transfers content into the destination buffer until a whitespace character or any of
+     * the given delimiters is encountered.
+     *
+     * @param buf buffer with the sequence of bytes to be parsed
+     * @param cursor defines the bounds and current position of the buffer
+     * @param delimiters set of delimiting characters. Can be <code>null</code> if the value
+     *  is delimited by a whitespace only.
+     * @param dst destination buffer
+     */
+    public void copyContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters,
+            final StringBuilder dst) {
+        int pos = cursor.getPos();
+        final int indexFrom = cursor.getPos();
+        final int indexTo = cursor.getUpperBound();
+        for (int i = indexFrom; i < indexTo; i++) {
+            final char current = buf.charAt(i);
+            if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) {
+                break;
+            } else {
+                pos++;
+                dst.append(current);
+            }
+        }
+        cursor.updatePos(pos);
+    }
+
+    /**
+     * Transfers content into the destination buffer until a whitespace character,  a quote,
+     * or any of the given delimiters is encountered.
+     *
+     * @param buf buffer with the sequence of bytes to be parsed
+     * @param cursor defines the bounds and current position of the buffer
+     * @param delimiters set of delimiting characters. Can be <code>null</code> if the value
+     *  is delimited by a whitespace or a quote only.
+     * @param dst destination buffer
+     */
+    public void copyUnquotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
+            final BitSet delimiters, final StringBuilder dst) {
+        int pos = cursor.getPos();
+        final int indexFrom = cursor.getPos();
+        final int indexTo = cursor.getUpperBound();
+        for (int i = indexFrom; i < indexTo; i++) {
+            final char current = buf.charAt(i);
+            if ((delimiters != null && delimiters.get(current))
+                    || isWhitespace(current) || current == '\"') {
+                break;
+            } else {
+                pos++;
+                dst.append(current);
+            }
+        }
+        cursor.updatePos(pos);
+    }
+
+    /**
+     * Transfers content enclosed with quote marks into the destination buffer.
+     *
+     * @param buf buffer with the sequence of bytes to be parsed
+     * @param cursor defines the bounds and current position of the buffer
+     * @param dst destination buffer
+     */
+    public void copyQuotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
+            final StringBuilder dst) {
+        if (cursor.atEnd()) {
+            return;
+        }
+        int pos = cursor.getPos();
+        int indexFrom = cursor.getPos();
+        final int indexTo = cursor.getUpperBound();
+        char current = buf.charAt(pos);
+        if (current != '\"') {
+            return;
+        }
+        pos++;
+        indexFrom++;
+        boolean escaped = false;
+        for (int i = indexFrom; i < indexTo; i++, pos++) {
+            current = buf.charAt(i);
+            if (escaped) {
+                if (current != '\"' && current != '\\') {
+                    dst.append('\\');
+                }
+                dst.append(current);
+                escaped = false;
+            } else {
+                if (current == '\"') {
+                    pos++;
+                    break;
+                }
+                if (current == '\\') {
+                    escaped = true;
+                } else if (current != '\r' && current != '\n') {
+                    dst.append(current);
+                }
+            }
+        }
+        cursor.updatePos(pos);
+    }
+
+}

Propchange: httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java
------------------------------------------------------------------------------
    svn:keywords = Date Revision

Propchange: httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java
URL: http://svn.apache.org/viewvc/httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java?rev=1615885&view=auto
==============================================================================
--- httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java (added)
+++ httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java Tue Aug  5 08:15:47 2014
@@ -0,0 +1,193 @@
+/*
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ *
+ */
+
+package org.apache.http.message;
+
+import org.apache.http.util.CharArrayBuffer;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestTokenParser {
+
+    private TokenParser parser;
+
+    @Before
+    public void setUp() throws Exception {
+        parser = new TokenParser();
+    }
+
+    private static CharArrayBuffer createBuffer(final String value) {
+        if (value == null) {
+            return null;
+        }
+        final CharArrayBuffer buffer = new CharArrayBuffer(value.length());
+        buffer.append(value);
+        return buffer;
+    }
+
+    @Test
+    public void testBasicTokenParsing() throws Exception {
+        final String s = "   raw: \" some stuff \"";
+        final CharArrayBuffer raw = createBuffer(s);
+        final ParserCursor cursor = new ParserCursor(0, s.length());
+
+        parser.skipWhiteSpace(raw, cursor);
+
+        Assert.assertFalse(cursor.atEnd());
+        Assert.assertEquals(3, cursor.getPos());
+
+        final StringBuilder strbuf1 = new StringBuilder();
+        parser.copyContent(raw, cursor, TokenParser.INIT_BITSET(':'), strbuf1);
+
+        Assert.assertFalse(cursor.atEnd());
+        Assert.assertEquals(6, cursor.getPos());
+        Assert.assertEquals("raw", strbuf1.toString());
+        Assert.assertEquals(':', raw.charAt(cursor.getPos()));
+        cursor.updatePos(cursor.getPos() + 1);
+
+        parser.skipWhiteSpace(raw, cursor);
+
+        Assert.assertFalse(cursor.atEnd());
+        Assert.assertEquals(8, cursor.getPos());
+
+        final StringBuilder strbuf2 = new StringBuilder();
+        parser.copyQuotedContent(raw, cursor, strbuf2);
+
+        Assert.assertTrue(cursor.atEnd());
+        Assert.assertEquals(" some stuff ", strbuf2.toString());
+
+        parser.copyQuotedContent(raw, cursor, strbuf2);
+        Assert.assertTrue(cursor.atEnd());
+
+        parser.skipWhiteSpace(raw, cursor);
+        Assert.assertTrue(cursor.atEnd());
+    }
+
+    @Test
+    public void testTokenParsingWithQuotedPairs() throws Exception {
+        final String s = "raw: \"\\\"some\\stuff\\\\\"";
+        final CharArrayBuffer raw = createBuffer(s);
+        final ParserCursor cursor = new ParserCursor(0, s.length());
+
+        parser.skipWhiteSpace(raw, cursor);
+
+        Assert.assertFalse(cursor.atEnd());
+        Assert.assertEquals(0, cursor.getPos());
+
+        final StringBuilder strbuf1 = new StringBuilder();
+        parser.copyContent(raw, cursor, TokenParser.INIT_BITSET(':'), strbuf1);
+
+        Assert.assertFalse(cursor.atEnd());
+        Assert.assertEquals("raw", strbuf1.toString());
+        Assert.assertEquals(':', raw.charAt(cursor.getPos()));
+        cursor.updatePos(cursor.getPos() + 1);
+
+        parser.skipWhiteSpace(raw, cursor);
+
+        Assert.assertFalse(cursor.atEnd());
+
+        final StringBuilder strbuf2 = new StringBuilder();
+        parser.copyQuotedContent(raw, cursor, strbuf2);
+
+        Assert.assertTrue(cursor.atEnd());
+        Assert.assertEquals("\"some\\stuff\\", strbuf2.toString());
+    }
+
+    @Test
+    public void testTokenParsingIncompleteQuote() throws Exception {
+        final String s = "\"stuff and more stuff  ";
+        final CharArrayBuffer raw = createBuffer(s);
+        final ParserCursor cursor = new ParserCursor(0, s.length());
+        final StringBuilder strbuf1 = new StringBuilder();
+        parser.copyQuotedContent(raw, cursor, strbuf1);
+        Assert.assertEquals("stuff and more stuff  ", strbuf1.toString());
+    }
+
+    @Test
+    public void testTokenParsingTokensWithUnquotedBlanks() throws Exception {
+        final String s = "  stuff and   \tsome\tmore  stuff  ;";
+        final CharArrayBuffer raw = createBuffer(s);
+        final ParserCursor cursor = new ParserCursor(0, s.length());
+        final String result = parser.parseToken(raw, cursor, TokenParser.INIT_BITSET(';'));
+        Assert.assertEquals("stuff and some more stuff", result);
+    }
+
+    @Test
+    public void testTokenParsingMixedValuesAndQuotedValues() throws Exception {
+        final String s = "  stuff and    \" some more \"   \"stuff  ;";
+        final CharArrayBuffer raw = createBuffer(s);
+        final ParserCursor cursor = new ParserCursor(0, s.length());
+        final String result = parser.parseValue(raw, cursor, TokenParser.INIT_BITSET(';'));
+        Assert.assertEquals("stuff and  some more  stuff  ;", result);
+    }
+
+    @Test
+    public void testTokenParsingMixedValuesAndQuotedValues2() throws Exception {
+        final String s = "stuff\"more\"stuff;";
+        final CharArrayBuffer raw = createBuffer(s);
+        final ParserCursor cursor = new ParserCursor(0, s.length());
+        final String result = parser.parseValue(raw, cursor, TokenParser.INIT_BITSET(';'));
+        Assert.assertEquals("stuffmorestuff", result);
+    }
+
+    @Test
+    public void testTokenParsingEscapedQuotes() throws Exception {
+        final String s = "stuff\"\\\"more\\\"\"stuff;";
+        final CharArrayBuffer raw = createBuffer(s);
+        final ParserCursor cursor = new ParserCursor(0, s.length());
+        final String result = parser.parseValue(raw, cursor, TokenParser.INIT_BITSET(';'));
+        Assert.assertEquals("stuff\"more\"stuff", result);
+    }
+
+    @Test
+    public void testTokenParsingEscapedDelimiter() throws Exception {
+        final String s = "stuff\"\\\"more\\\";\"stuff;";
+        final CharArrayBuffer raw = createBuffer(s);
+        final ParserCursor cursor = new ParserCursor(0, s.length());
+        final String result = parser.parseValue(raw, cursor, TokenParser.INIT_BITSET(';'));
+        Assert.assertEquals("stuff\"more\";stuff", result);
+    }
+
+    @Test
+    public void testTokenParsingEscapedSlash() throws Exception {
+        final String s = "stuff\"\\\"more\\\";\\\\\"stuff;";
+        final CharArrayBuffer raw = createBuffer(s);
+        final ParserCursor cursor = new ParserCursor(0, s.length());
+        final String result = parser.parseValue(raw, cursor, TokenParser.INIT_BITSET(';'));
+        Assert.assertEquals("stuff\"more\";\\stuff", result);
+    }
+
+    @Test
+    public void testTokenParsingSlashOutsideQuotes() throws Exception {
+        final String s = "stuff\\; more stuff;";
+        final CharArrayBuffer raw = createBuffer(s);
+        final ParserCursor cursor = new ParserCursor(0, s.length());
+        final String result = parser.parseValue(raw, cursor, TokenParser.INIT_BITSET(';'));
+        Assert.assertEquals("stuff\\", result);
+    }
+}

Propchange: httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java
------------------------------------------------------------------------------
    svn:keywords = Date Revision

Propchange: httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Re: Fwd: svn commit: r1615885 - in /httpcomponents/httpcore/trunk/httpcore/src: main/java/org/apache/http/message/TokenParser.java test/java/org/apache/http/message/TestTokenParser.java

Posted by Oleg Kalnichevski <ol...@apache.org>.
On 07/08/14 17:26 , sebb wrote:
> On 5 August 2014 09:15,  <ol...@apache.org> wrote:
>> Author: olegk
>> Date: Tue Aug  5 08:15:47 2014
>> New Revision: 1615885
>>
>> URL: http://svn.apache.org/r1615885
>> Log:
>> New token parser implementation partially based on Mime4J code (written by me)
>>
>> Added:
>>      httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java   (with props)
>
> Looks generally OK, though I question whether the input buffer needs
> to be resizeable - do we need to use CharArrayBuffer?
>
> Why not use String / CharSequence?
> Or even char[], but it might be best to use immutable input.
>

Usually the content that we need to parse comes from the wire and 
therefore the underlying content buffer must be able to grow as more 
data comes in. For consistency sake we should continue using 
CharArrayBuffer in 4.x but we can well consider using CharSequence in 5.0.


> There are a few code/doc tweaks it might be useful to make (see below).

Done.

>> +
>> +/**
>> + * Low level parser for header field elements. The parsing routines of this class are designed
>> + * to produce near zero intermediate garbage and make no intermediate copies of input data.
>> + * <p>
>> + * This class is immutable and thread safe.
>
> Are we sure about that?
> e.g it uses CharArrayBuffer and ParserCursor which are not.
>

I am reasonably sure. The class maintains no internal state and 
therefore is perfectly thread safe. CharArrayBuffer and ParserCursor 
though mutable should be provided as input by individual threads.

Oleg

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@hc.apache.org
For additional commands, e-mail: dev-help@hc.apache.org


Fwd: svn commit: r1615885 - in /httpcomponents/httpcore/trunk/httpcore/src: main/java/org/apache/http/message/TokenParser.java test/java/org/apache/http/message/TestTokenParser.java

Posted by sebb <se...@gmail.com>.
On 5 August 2014 09:15,  <ol...@apache.org> wrote:
> Author: olegk
> Date: Tue Aug  5 08:15:47 2014
> New Revision: 1615885
>
> URL: http://svn.apache.org/r1615885
> Log:
> New token parser implementation partially based on Mime4J code (written by me)
>
> Added:
>     httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java   (with props)

Looks generally OK, though I question whether the input buffer needs
to be resizeable - do we need to use CharArrayBuffer?

Why not use String / CharSequence?
Or even char[], but it might be best to use immutable input.

There are a few code/doc tweaks it might be useful to make (see below).

>     httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java   (with props)
>
> Added: httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java
> URL: http://svn.apache.org/viewvc/httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java?rev=1615885&view=auto
> ==============================================================================
> --- httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java (added)
> +++ httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java Tue Aug  5 08:15:47 2014
> @@ -0,0 +1,257 @@
> +/*
> + * ====================================================================
> + * Licensed to the Apache Software Foundation (ASF) under one
> + * or more contributor license agreements.  See the NOTICE file
> + * distributed with this work for additional information
> + * regarding copyright ownership.  The ASF licenses this file
> + * to you under the Apache License, Version 2.0 (the
> + * "License"); you may not use this file except in compliance
> + * with the License.  You may obtain a copy of the License at
> + *
> + *   http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing,
> + * software distributed under the License is distributed on an
> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> + * KIND, either express or implied.  See the License for the
> + * specific language governing permissions and limitations
> + * under the License.
> + * ====================================================================
> + *
> + * This software consists of voluntary contributions made by many
> + * individuals on behalf of the Apache Software Foundation.  For more
> + * information on the Apache Software Foundation, please see
> + * <http://www.apache.org/>.
> + *
> + */
> +
> +package org.apache.http.message;
> +
> +import java.util.BitSet;
> +
> +import org.apache.http.util.CharArrayBuffer;
> +
> +/**
> + * Low level parser for header field elements. The parsing routines of this class are designed
> + * to produce near zero intermediate garbage and make no intermediate copies of input data.
> + * <p>
> + * This class is immutable and thread safe.

Are we sure about that?
e.g it uses CharArrayBuffer and ParserCursor which are not.

> + */
> +public class TokenParser {

> +
> +    public static BitSet INIT_BITSET(final int ... b) {
> +        final BitSet bitset = new BitSet(b.length);

b.length is the number of entries, which bears no relation to the maximum index.
Not sure it helps to provide the initial bitsize here.
But if it does, then it should be set using Math.max(b)

Also, these are
> +        for (final int aB : b) {
> +            bitset.set(aB);
> +        }
> +        return bitset;
> +    }
> +
> +    /** US-ASCII CR, carriage return (13) */
> +    public static final int CR = '\r';
> +
> +    /** US-ASCII LF, line feed (10) */
> +    public static final int LF = '\n';
> +
> +    /** US-ASCII SP, space (32) */
> +    public static final int SP = ' ';
> +
> +    /** US-ASCII HT, horizontal-tab (9) */
> +    public static final int HT = '\t';
> +
> +    public static boolean isWhitespace(final char ch) {
> +        return ch == SP || ch == HT || ch == CR || ch == LF;
> +    }
> +
> +    public static final TokenParser INSTANCE = new TokenParser();
> +
> +    /**
> +     * Extracts from the sequence of bytes a token terminated with any of the given delimiters

They are characters, not bytes.

> +     * discarding semantically insignificant whitespace characters.
> +     *
> +     * @param buf buffer with the sequence of bytes to be parsed

chars

> +     * @param cursor defines the bounds and current position of the buffer
> +     * @param delimiters set of delimiting characters. Can be <code>null</code> if the token
> +     *  is not delimited by any character.
> +     */
> +    public String parseToken(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {

The input buffer is not mutated so I wonder whether it needs to use
CharArrayBuffer?

> +        final StringBuilder dst = new StringBuilder();
> +        boolean whitespace = false;
> +        while (!cursor.atEnd()) {
> +            final char current = buf.charAt(cursor.getPos());
> +            if (delimiters != null && delimiters.get(current)) {
> +                break;
> +            } else if (isWhitespace(current)) {
> +                skipWhiteSpace(buf, cursor);
> +                whitespace = true;
> +            } else {
> +                if (dst.length() > 0 && whitespace) {

Probably cheaper to check whitespace first

> +                    dst.append(' ');
> +                }
> +                copyContent(buf, cursor, delimiters, dst);
> +                whitespace = false;
> +            }
> +        }
> +        return dst.toString();
> +    }
> +
> +    /**
> +     * Extracts from the sequence of bytes a value which can be enclosed in quote marks and

characters, not bytes

> +     * terminated with any of the given delimiters discarding semantically insignificant
> +     * whitespace characters.
> +     *
> +     * @param buf buffer with the sequence of bytes to be parsed

chars

> +     * @param cursor defines the bounds and current position of the buffer
> +     * @param delimiters set of delimiting characters. Can be <code>null</code> if the value
> +     *  is not delimited by any character.
> +     */
> +    public String parseValue(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {

Again, no need for a resizable buffer here.

> +        final StringBuilder dst = new StringBuilder();
> +        boolean whitespace = false;
> +        while (!cursor.atEnd()) {
> +            final char current = buf.charAt(cursor.getPos());
> +            if (delimiters != null && delimiters.get(current)) {
> +                break;
> +            } else if (isWhitespace(current)) {
> +                skipWhiteSpace(buf, cursor);
> +                whitespace = true;
> +            } else if (current == '\"') {
> +                if (dst.length() > 0 && whitespace) {

Swap the checks.

> +                    dst.append(' ');
> +                }
> +                copyQuotedContent(buf, cursor, dst);
> +                whitespace = false;
> +            } else {
> +                if (dst.length() > 0 && whitespace) {
> +                    dst.append(' ');
> +                }
> +                copyUnquotedContent(buf, cursor, delimiters, dst);
> +                whitespace = false;
> +            }
> +        }
> +        return dst.toString();
> +    }
> +
> +    /**
> +     * Skips semantically insignificant whitespace characters and moves the cursor to the closest
> +     * non-whitespace character.
> +     *
> +     * @param buf buffer with the sequence of bytes to be parsed
> +     * @param cursor defines the bounds and current position of the buffer
> +     */
> +    public void skipWhiteSpace(final CharArrayBuffer buf, final ParserCursor cursor) {
> +        int pos = cursor.getPos();
> +        final int indexFrom = cursor.getPos();
> +        final int indexTo = cursor.getUpperBound();
> +        for (int i = indexFrom; i < indexTo; i++) {
> +            final char current = buf.charAt(i);
> +            if (!isWhitespace(current)) {
> +                break;
> +            } else {
> +                pos++;
> +            }
> +        }
> +        cursor.updatePos(pos);
> +    }
> +
> +    /**
> +     * Transfers content into the destination buffer until a whitespace character or any of
> +     * the given delimiters is encountered.
> +     *
> +     * @param buf buffer with the sequence of bytes to be parsed

chars

> +     * @param cursor defines the bounds and current position of the buffer
> +     * @param delimiters set of delimiting characters. Can be <code>null</code> if the value
> +     *  is delimited by a whitespace only.
> +     * @param dst destination buffer
> +     */
> +    public void copyContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters,
> +            final StringBuilder dst) {
> +        int pos = cursor.getPos();
> +        final int indexFrom = cursor.getPos();
> +        final int indexTo = cursor.getUpperBound();
> +        for (int i = indexFrom; i < indexTo; i++) {
> +            final char current = buf.charAt(i);
> +            if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) {
> +                break;
> +            } else {
> +                pos++;
> +                dst.append(current);
> +            }
> +        }
> +        cursor.updatePos(pos);
> +    }
> +
> +    /**
> +     * Transfers content into the destination buffer until a whitespace character,  a quote,
> +     * or any of the given delimiters is encountered.
> +     *
> +     * @param buf buffer with the sequence of bytes to be parsed
> +     * @param cursor defines the bounds and current position of the buffer
> +     * @param delimiters set of delimiting characters. Can be <code>null</code> if the value
> +     *  is delimited by a whitespace or a quote only.
> +     * @param dst destination buffer
> +     */
> +    public void copyUnquotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
> +            final BitSet delimiters, final StringBuilder dst) {
> +        int pos = cursor.getPos();
> +        final int indexFrom = cursor.getPos();
> +        final int indexTo = cursor.getUpperBound();
> +        for (int i = indexFrom; i < indexTo; i++) {
> +            final char current = buf.charAt(i);
> +            if ((delimiters != null && delimiters.get(current))
> +                    || isWhitespace(current) || current == '\"') {

Should use constant for double-quote.

> +                break;
> +            } else {
> +                pos++;
> +                dst.append(current);
> +            }
> +        }
> +        cursor.updatePos(pos);
> +    }
> +
> +    /**
> +     * Transfers content enclosed with quote marks into the destination buffer.
> +     *
> +     * @param buf buffer with the sequence of bytes to be parsed
> +     * @param cursor defines the bounds and current position of the buffer
> +     * @param dst destination buffer
> +     */
> +    public void copyQuotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
> +            final StringBuilder dst) {
> +        if (cursor.atEnd()) {
> +            return;
> +        }
> +        int pos = cursor.getPos();
> +        int indexFrom = cursor.getPos();
> +        final int indexTo = cursor.getUpperBound();
> +        char current = buf.charAt(pos);
> +        if (current != '\"') {
> +            return;
> +        }
> +        pos++;
> +        indexFrom++;
> +        boolean escaped = false;
> +        for (int i = indexFrom; i < indexTo; i++, pos++) {
> +            current = buf.charAt(i);
> +            if (escaped) {
> +                if (current != '\"' && current != '\\') {

Should use constants here

> +                    dst.append('\\');
> +                }
> +                dst.append(current);
> +                escaped = false;
> +            } else {
> +                if (current == '\"') {

Use a constant.

> +                    pos++;
> +                    break;
> +                }
> +                if (current == '\\') {

Use constant

> +                    escaped = true;
> +                } else if (current != '\r' && current != '\n') {

Should use constants

> +                    dst.append(current);
> +                }
> +            }
> +        }
> +        cursor.updatePos(pos);
> +    }
> +
> +}
>
> Propchange: httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java
> ------------------------------------------------------------------------------
>     svn:eol-style = native
>
> Propchange: httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java
> ------------------------------------------------------------------------------
>     svn:keywords = Date Revision
>
> Propchange: httpcomponents/httpcore/trunk/httpcore/src/main/java/org/apache/http/message/TokenParser.java
> ------------------------------------------------------------------------------
>     svn:mime-type = text/plain
>
> Added: httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java
> URL: http://svn.apache.org/viewvc/httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java?rev=1615885&view=auto
> ==============================================================================
> --- httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java (added)
> +++ httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java Tue Aug  5 08:15:47 2014
> @@ -0,0 +1,193 @@
> +/*
> + * ====================================================================
> + * Licensed to the Apache Software Foundation (ASF) under one
> + * or more contributor license agreements.  See the NOTICE file
> + * distributed with this work for additional information
> + * regarding copyright ownership.  The ASF licenses this file
> + * to you under the Apache License, Version 2.0 (the
> + * "License"); you may not use this file except in compliance
> + * with the License.  You may obtain a copy of the License at
> + *
> + *   http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing,
> + * software distributed under the License is distributed on an
> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> + * KIND, either express or implied.  See the License for the
> + * specific language governing permissions and limitations
> + * under the License.
> + * ====================================================================
> + *
> + * This software consists of voluntary contributions made by many
> + * individuals on behalf of the Apache Software Foundation.  For more
> + * information on the Apache Software Foundation, please see
> + * <http://www.apache.org/>.
> + *
> + */
> +
> +package org.apache.http.message;
> +
> +import org.apache.http.util.CharArrayBuffer;
> +import org.junit.Assert;
> +import org.junit.Before;
> +import org.junit.Test;
> +
> +public class TestTokenParser {
> +
> +    private TokenParser parser;
> +
> +    @Before
> +    public void setUp() throws Exception {
> +        parser = new TokenParser();
> +    }
> +
> +    private static CharArrayBuffer createBuffer(final String value) {
> +        if (value == null) {
> +            return null;
> +        }
> +        final CharArrayBuffer buffer = new CharArrayBuffer(value.length());
> +        buffer.append(value);
> +        return buffer;
> +    }
> +
> +    @Test
> +    public void testBasicTokenParsing() throws Exception {
> +        final String s = "   raw: \" some stuff \"";
> +        final CharArrayBuffer raw = createBuffer(s);
> +        final ParserCursor cursor = new ParserCursor(0, s.length());
> +
> +        parser.skipWhiteSpace(raw, cursor);
> +
> +        Assert.assertFalse(cursor.atEnd());
> +        Assert.assertEquals(3, cursor.getPos());
> +
> +        final StringBuilder strbuf1 = new StringBuilder();
> +        parser.copyContent(raw, cursor, TokenParser.INIT_BITSET(':'), strbuf1);
> +
> +        Assert.assertFalse(cursor.atEnd());
> +        Assert.assertEquals(6, cursor.getPos());
> +        Assert.assertEquals("raw", strbuf1.toString());
> +        Assert.assertEquals(':', raw.charAt(cursor.getPos()));
> +        cursor.updatePos(cursor.getPos() + 1);
> +
> +        parser.skipWhiteSpace(raw, cursor);
> +
> +        Assert.assertFalse(cursor.atEnd());
> +        Assert.assertEquals(8, cursor.getPos());
> +
> +        final StringBuilder strbuf2 = new StringBuilder();
> +        parser.copyQuotedContent(raw, cursor, strbuf2);
> +
> +        Assert.assertTrue(cursor.atEnd());
> +        Assert.assertEquals(" some stuff ", strbuf2.toString());
> +
> +        parser.copyQuotedContent(raw, cursor, strbuf2);
> +        Assert.assertTrue(cursor.atEnd());
> +
> +        parser.skipWhiteSpace(raw, cursor);
> +        Assert.assertTrue(cursor.atEnd());
> +    }
> +
> +    @Test
> +    public void testTokenParsingWithQuotedPairs() throws Exception {
> +        final String s = "raw: \"\\\"some\\stuff\\\\\"";
> +        final CharArrayBuffer raw = createBuffer(s);
> +        final ParserCursor cursor = new ParserCursor(0, s.length());
> +
> +        parser.skipWhiteSpace(raw, cursor);
> +
> +        Assert.assertFalse(cursor.atEnd());
> +        Assert.assertEquals(0, cursor.getPos());
> +
> +        final StringBuilder strbuf1 = new StringBuilder();
> +        parser.copyContent(raw, cursor, TokenParser.INIT_BITSET(':'), strbuf1);
> +
> +        Assert.assertFalse(cursor.atEnd());
> +        Assert.assertEquals("raw", strbuf1.toString());
> +        Assert.assertEquals(':', raw.charAt(cursor.getPos()));
> +        cursor.updatePos(cursor.getPos() + 1);
> +
> +        parser.skipWhiteSpace(raw, cursor);
> +
> +        Assert.assertFalse(cursor.atEnd());
> +
> +        final StringBuilder strbuf2 = new StringBuilder();
> +        parser.copyQuotedContent(raw, cursor, strbuf2);
> +
> +        Assert.assertTrue(cursor.atEnd());
> +        Assert.assertEquals("\"some\\stuff\\", strbuf2.toString());
> +    }
> +
> +    @Test
> +    public void testTokenParsingIncompleteQuote() throws Exception {
> +        final String s = "\"stuff and more stuff  ";
> +        final CharArrayBuffer raw = createBuffer(s);
> +        final ParserCursor cursor = new ParserCursor(0, s.length());
> +        final StringBuilder strbuf1 = new StringBuilder();
> +        parser.copyQuotedContent(raw, cursor, strbuf1);
> +        Assert.assertEquals("stuff and more stuff  ", strbuf1.toString());
> +    }
> +
> +    @Test
> +    public void testTokenParsingTokensWithUnquotedBlanks() throws Exception {
> +        final String s = "  stuff and   \tsome\tmore  stuff  ;";
> +        final CharArrayBuffer raw = createBuffer(s);
> +        final ParserCursor cursor = new ParserCursor(0, s.length());
> +        final String result = parser.parseToken(raw, cursor, TokenParser.INIT_BITSET(';'));
> +        Assert.assertEquals("stuff and some more stuff", result);
> +    }
> +
> +    @Test
> +    public void testTokenParsingMixedValuesAndQuotedValues() throws Exception {
> +        final String s = "  stuff and    \" some more \"   \"stuff  ;";
> +        final CharArrayBuffer raw = createBuffer(s);
> +        final ParserCursor cursor = new ParserCursor(0, s.length());
> +        final String result = parser.parseValue(raw, cursor, TokenParser.INIT_BITSET(';'));
> +        Assert.assertEquals("stuff and  some more  stuff  ;", result);
> +    }
> +
> +    @Test
> +    public void testTokenParsingMixedValuesAndQuotedValues2() throws Exception {
> +        final String s = "stuff\"more\"stuff;";
> +        final CharArrayBuffer raw = createBuffer(s);
> +        final ParserCursor cursor = new ParserCursor(0, s.length());
> +        final String result = parser.parseValue(raw, cursor, TokenParser.INIT_BITSET(';'));
> +        Assert.assertEquals("stuffmorestuff", result);
> +    }
> +
> +    @Test
> +    public void testTokenParsingEscapedQuotes() throws Exception {
> +        final String s = "stuff\"\\\"more\\\"\"stuff;";
> +        final CharArrayBuffer raw = createBuffer(s);
> +        final ParserCursor cursor = new ParserCursor(0, s.length());
> +        final String result = parser.parseValue(raw, cursor, TokenParser.INIT_BITSET(';'));
> +        Assert.assertEquals("stuff\"more\"stuff", result);
> +    }
> +
> +    @Test
> +    public void testTokenParsingEscapedDelimiter() throws Exception {
> +        final String s = "stuff\"\\\"more\\\";\"stuff;";
> +        final CharArrayBuffer raw = createBuffer(s);
> +        final ParserCursor cursor = new ParserCursor(0, s.length());
> +        final String result = parser.parseValue(raw, cursor, TokenParser.INIT_BITSET(';'));
> +        Assert.assertEquals("stuff\"more\";stuff", result);
> +    }
> +
> +    @Test
> +    public void testTokenParsingEscapedSlash() throws Exception {
> +        final String s = "stuff\"\\\"more\\\";\\\\\"stuff;";
> +        final CharArrayBuffer raw = createBuffer(s);
> +        final ParserCursor cursor = new ParserCursor(0, s.length());
> +        final String result = parser.parseValue(raw, cursor, TokenParser.INIT_BITSET(';'));
> +        Assert.assertEquals("stuff\"more\";\\stuff", result);
> +    }
> +
> +    @Test
> +    public void testTokenParsingSlashOutsideQuotes() throws Exception {
> +        final String s = "stuff\\; more stuff;";
> +        final CharArrayBuffer raw = createBuffer(s);
> +        final ParserCursor cursor = new ParserCursor(0, s.length());
> +        final String result = parser.parseValue(raw, cursor, TokenParser.INIT_BITSET(';'));
> +        Assert.assertEquals("stuff\\", result);
> +    }
> +}
>
> Propchange: httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java
> ------------------------------------------------------------------------------
>     svn:eol-style = native
>
> Propchange: httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java
> ------------------------------------------------------------------------------
>     svn:keywords = Date Revision
>
> Propchange: httpcomponents/httpcore/trunk/httpcore/src/test/java/org/apache/http/message/TestTokenParser.java
> ------------------------------------------------------------------------------
>     svn:mime-type = text/plain
>
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@hc.apache.org
For additional commands, e-mail: dev-help@hc.apache.org