You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by ma...@apache.org on 2022/12/08 19:32:06 UTC
[tomcat] 01/02: Add an RFC 8941 structured field parser.
This is an automated email from the ASF dual-hosted git repository.
markt pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tomcat.git
commit beed6c936ece5d540365991d00fe35459e5c3505
Author: Mark Thomas <ma...@apache.org>
AuthorDate: Thu Dec 8 19:23:00 2022 +0000
Add an RFC 8941 structured field parser.
---
.../util/http/parser/LocalStrings.properties | 14 +
.../tomcat/util/http/parser/StructuredField.java | 599 +++++++++++++++++++++
.../http/parser/TesterHttpWgStructuredField.java | 130 +++++
webapps/docs/changelog.xml | 3 +
4 files changed, 746 insertions(+)
diff --git a/java/org/apache/tomcat/util/http/parser/LocalStrings.properties b/java/org/apache/tomcat/util/http/parser/LocalStrings.properties
index ae024240d9..325e7da57f 100644
--- a/java/org/apache/tomcat/util/http/parser/LocalStrings.properties
+++ b/java/org/apache/tomcat/util/http/parser/LocalStrings.properties
@@ -44,3 +44,17 @@ http.tooFewHextets=An IPv6 address must consist of 8 hextets but this address co
http.tooManyColons=An IPv6 address may not contain more than 2 sequential colon characters.
http.tooManyDoubleColons=An IPv6 address may only contain a single '::' sequence.
http.tooManyHextets=The IPv6 address contains [{0}] hextets but a valid IPv6 address may not have more than 8.
+
+sf.bareitem.invalidCharacter=The invalid character [{0}] was found parsing when start of a bare item
+sf.base64.invalidCharacter=The [{0}] character is not valid inside a base64 sequence
+sf.boolean.invalidCharacter=The [{0}] character is not a valid boolean value
+sf.invalidCharacter=The [{0}] character is not valid here
+sf.key.invalidFirstCharacter=The invalid character [{0}] was found parsing when start of a key
+sf.numeric.decimalInvalidFinal=The final character of a decimal value must be a digit
+sf.numeric.decimalPartTooLong=More than 3 digits after the decimal point
+sf.numeric.decimalTooLong=More than 16 characters found in a decimal
+sf.numeric.integerTooLong=More than 15 digits found in an integer
+sf.numeric.integralPartTooLong=More than 12 digits found in the integral part of a decimal
+sf.numeric.invalidCharacter=The invalid character [{0}] was found parsing a numeric value where a digit was expected
+sf.string.invalidCharacter=The [{0}] character is not valid inside a string
+sf.string.invalidEscape=The [{0}] character must not be escaped
diff --git a/java/org/apache/tomcat/util/http/parser/StructuredField.java b/java/org/apache/tomcat/util/http/parser/StructuredField.java
new file mode 100644
index 0000000000..5b8a695440
--- /dev/null
+++ b/java/org/apache/tomcat/util/http/parser/StructuredField.java
@@ -0,0 +1,599 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tomcat.util.http.parser;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.tomcat.util.codec.binary.Base64;
+import org.apache.tomcat.util.res.StringManager;
+
+/**
+ * Parsing of structured fields as per RFC 8941.
+ * <p>
+ * The parsing implementation is complete but not all elements are currently
+ * exposed via getters. Additional getters will be added as required as the
+ * use of structured fields expands.
+ * <p>
+ * The serialization of structured fields has not been implemented.
+ */
+public class StructuredField {
+
+ private static final StringManager sm = StringManager.getManager(StructuredField.class);
+
+ private static final int ARRAY_SIZE = 128;
+
+ private static final boolean[] IS_KEY_FIRST = new boolean[ARRAY_SIZE];
+ private static final boolean[] IS_KEY = new boolean[ARRAY_SIZE];
+ private static final boolean[] IS_OWS = new boolean[ARRAY_SIZE];
+ private static final boolean[] IS_BASE64 = new boolean[ARRAY_SIZE];
+ private static final boolean[] IS_TOKEN = new boolean[ARRAY_SIZE];
+
+ static {
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ if (i == '*' || i >= 'a' && i <= 'z') {
+ IS_KEY_FIRST[i] = true;
+ IS_KEY[i] = true;
+ } else if (i >= '0' && i <= '9' || i == '_' || i == '-' || i == '.'){
+ IS_KEY[i] = true;
+ }
+ }
+
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ if (i == 9 || i == ' ') {
+ IS_OWS[i] = true;
+ }
+ }
+
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ if (i == '+' || i == '/' || i >= '0' && i <= '9' || i == '=' || i >= 'A' && i <= 'Z' ||
+ i >= 'a' && i <= 'z') {
+ IS_BASE64[i] = true;
+ }
+ }
+
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ if (HttpParser.isToken(i) || i == ':' || i == '/') {
+ IS_TOKEN[i] = true;
+ }
+ }
+ }
+
+
+ static SfList parseSfList(Reader input) throws IOException {
+ skipSP(input);
+
+ SfList result = new SfList();
+
+ if (peek(input) != -1) {
+ while (true) {
+ SfListMember listMember = parseSfListMember(input);
+ result.addListMember(listMember);
+ skipOWS(input);
+ if (peek(input) == -1) {
+ break;
+ }
+ requireChar(input, ',');
+ skipOWS(input);
+ requireNotChar(input, -1);
+ }
+ }
+
+ skipSP(input);
+ requireChar(input, -1);
+ return result;
+ }
+
+
+ // Item or inner list
+ static SfListMember parseSfListMember(Reader input) throws IOException {
+ SfListMember listMember;
+ if (peek(input) == '(') {
+ listMember = parseSfInnerList(input);
+ } else {
+ listMember = parseSfBareItem(input);
+ }
+ parseSfParameters(input, listMember);
+ return listMember;
+ }
+
+
+ static SfInnerList parseSfInnerList(Reader input) throws IOException {
+ requireChar(input, '(');
+
+ SfInnerList innerList = new SfInnerList();
+
+ while (true) {
+ skipSP(input);
+ if (peek(input) == ')') {
+ break;
+ }
+ SfItem<?> item = parseSfBareItem(input);
+ parseSfParameters(input, item);
+ innerList.addListItem(item);
+ input.mark(1);
+ requireChar(input, ' ', ')');
+ input.reset();
+ }
+ requireChar(input, ')');
+
+ return innerList;
+ }
+
+
+ static SfDictionary parseSfDictionary(Reader input) throws IOException {
+ skipSP(input);
+
+ SfDictionary result = new SfDictionary();
+
+ if (peek(input) != -1) {
+ while (true) {
+ String key = parseSfKey(input);
+ SfListMember listMember;
+ input.mark(1);
+ int c = input.read();
+ if (c == '=') {
+ listMember = parseSfListMember(input);
+ } else {
+ listMember = new SfBoolean(true);
+ input.reset();
+ }
+ parseSfParameters(input, listMember);
+ result.addDictionaryMember(key, listMember);
+ skipOWS(input);
+ if (peek(input) == -1) {
+ break;
+ }
+ requireChar(input, ',');
+ skipOWS(input);
+ requireNotChar(input, -1);
+ }
+ }
+
+ skipSP(input);
+ requireChar(input, -1);
+ return result;
+ }
+
+
+ static SfItem<?> parseSfItem(Reader input) throws IOException {
+ skipSP(input);
+
+ SfItem<?> item = parseSfBareItem(input);
+ parseSfParameters(input, item);
+
+ skipSP(input);
+ requireChar(input, -1);
+ return item;
+ }
+
+
+ static SfItem<?> parseSfBareItem(Reader input) throws IOException {
+ int c = input.read();
+
+ SfItem<?> item;
+ if (c == '-' || HttpParser.isNumeric(c)) {
+ item = parseSfNumeric(input, c);
+ } else if (c == '\"') {
+ item = parseSfString(input);
+ } else if (c == '*' || HttpParser.isAlpha(c)) {
+ item = parseSfToken(input, c);
+ } else if (c == ':') {
+ item = parseSfByteSequence(input);
+ } else if (c == '?') {
+ item = parseSfBoolean(input);
+ } else {
+ throw new IllegalArgumentException(
+ sm.getString("sf.bareitem.invalidCharacter", String.format("\\u%40X", Integer.valueOf(c))));
+ }
+
+ return item;
+ }
+
+
+ static void parseSfParameters(Reader input, SfListMember listMember) throws IOException {
+ while (true) {
+ if (peek(input) != ';') {
+ break;
+ }
+ requireChar(input, ';');
+ skipSP(input);
+ String key = parseSfKey(input);
+ SfItem<?> item;
+ input.mark(1);
+ int c = input.read();
+ if (c == '=') {
+ item = parseSfBareItem(input);
+ } else {
+ item = new SfBoolean(true);
+ input.reset();
+ }
+ listMember.addParameter(key, item);
+ }
+ }
+
+
+ static String parseSfKey(Reader input) throws IOException {
+ StringBuilder result = new StringBuilder();
+
+ input.mark(1);
+ int c = input.read();
+ if (!isKeyFirst(c)) {
+ throw new IllegalArgumentException(
+ sm.getString("sf.key.invalidFirstCharacter", String.format("\\u%40X", Integer.valueOf(c))));
+ }
+
+ while (c != -1 && isKey(c)) {
+ result.append((char) c);
+ input.mark(1);
+ c = input.read();
+ }
+ input.reset();
+ return result.toString();
+ }
+
+
+ static SfItem<?> parseSfNumeric(Reader input, int first) throws IOException {
+ int sign = 1;
+ boolean integer = true;
+ int decimalPos = 0;
+
+ StringBuilder result = new StringBuilder();
+
+ int c;
+ if (first == '-') {
+ sign = -1;
+ c = input.read();
+ } else {
+ c = first;
+ }
+
+ if (!HttpParser.isNumeric(c)) {
+ throw new IllegalArgumentException(
+ sm.getString("sf.numeric.invalidCharacter", String.format("\\u%40X", Integer.valueOf(c))));
+ }
+ result.append((char) c);
+ input.mark(1);
+ c = input.read();
+
+ while (c != -1) {
+ if (HttpParser.isNumeric(c)) {
+ result.append((char) c);
+ } else if (integer && c == '.') {
+ if (result.length() > 12) {
+ throw new IllegalArgumentException(sm.getString("sf.numeric.integralPartTooLong"));
+ }
+ integer = false;
+ result.append((char) c);
+ decimalPos = result.length();
+ } else {
+ input.reset();
+ break;
+ }
+ if (integer && result.length() > 15) {
+ throw new IllegalArgumentException(sm.getString("sf.numeric.integerTooLong"));
+ }
+ if (!integer && result.length() > 16) {
+ throw new IllegalArgumentException(sm.getString("sf.numeric.decimalTooLong"));
+ }
+ input.mark(1);
+ c = input.read();
+ }
+
+ if (integer) {
+ return new SfInteger(Long.parseLong(result.toString()) * sign);
+ }
+
+ if (result.charAt(result.length() - 1) == '.') {
+ throw new IllegalArgumentException(sm.getString("sf.numeric.decimalInvalidFinal"));
+ }
+
+ if (result.length() - decimalPos > 3) {
+ throw new IllegalArgumentException(sm.getString("sf.numeric.decimalPartTooLong"));
+ }
+
+ return new SfDecimal(Double.parseDouble(result.toString()) * sign);
+ }
+
+
+ static SfString parseSfString(Reader input) throws IOException {
+ // It is known first character was '"'
+ StringBuilder result = new StringBuilder();
+
+ while (true) {
+ int c = input.read();
+ if (c == '\\') {
+ requireNotChar(input, -1);
+ c = input.read();
+ if (c != '\\' && c != '\"') {
+ throw new IllegalArgumentException(
+ sm.getString("sf.string.invalidEscape", String.format("\\u%40X", Integer.valueOf(c))));
+ }
+ } else {
+ if (c == '\"') {
+ break;
+ }
+ // This test also covers unexpected EOF
+ if (c < 32 || c > 126) {
+ throw new IllegalArgumentException(
+ sm.getString("sf.string.invalidCharacter", String.format("\\u%40X", Integer.valueOf(c))));
+ }
+ }
+ result.append((char) c);
+ }
+
+ return new SfString(result.toString());
+ }
+
+
+ static SfToken parseSfToken(Reader input, int first) throws IOException {
+ // It is known first character is valid
+ StringBuilder result = new StringBuilder();
+
+ result.append((char) first);
+ while (true) {
+ input.mark(1);
+ int c = input.read();
+ if (!isToken(c)) {
+ input.reset();
+ break;
+ }
+ result.append((char) c);
+ }
+
+ return new SfToken(result.toString());
+ }
+
+
+ static SfByteSequence parseSfByteSequence(Reader input) throws IOException {
+ // It is known first character was ':'
+ StringBuilder base64 = new StringBuilder();
+
+ while (true) {
+ int c = input.read();
+
+ if (c == ':') {
+ break;
+ } else if (isBase64(c)) {
+ base64.append((char) c);
+ } else {
+ throw new IllegalArgumentException(
+ sm.getString("sf.base64.invalidCharacter", String.format("\\u%40X", Integer.valueOf(c))));
+ }
+ }
+
+ return new SfByteSequence(Base64.decodeBase64(base64.toString()));
+ }
+
+
+ static SfBoolean parseSfBoolean(Reader input) throws IOException {
+ // It is known first character was '?'
+ int c = input.read();
+
+ if (c == '1') {
+ return new SfBoolean(true);
+ } else if (c == '0') {
+ return new SfBoolean(false);
+ } else {
+ throw new IllegalArgumentException(
+ sm.getString("sf.boolean.invalidCharacter", String.format("\\u%40X", Integer.valueOf(c))));
+ }
+ }
+
+
+ static void skipSP(Reader input) throws IOException {
+ input.mark(1);
+ int c = input.read();
+ while (c == 32) {
+ input.mark(1);
+ c = input.read();
+ }
+ input.reset();
+ }
+
+
+ static void skipOWS(Reader input) throws IOException {
+ input.mark(1);
+ int c = input.read();
+ while (isOws(c)) {
+ input.mark(1);
+ c = input.read();
+ }
+ input.reset();
+ }
+
+
+ static void requireChar(Reader input, int... required) throws IOException {
+ int c = input.read();
+ for (int r : required) {
+ if (c == r) {
+ return;
+ }
+ }
+ throw new IllegalArgumentException(
+ sm.getString("sf.invalidCharacter", String.format("\\u%40X", Integer.valueOf(c))));
+ }
+
+
+ static void requireNotChar(Reader input, int required) throws IOException {
+ input.mark(1);
+ int c = input.read();
+ if (c == required) {
+ throw new IllegalArgumentException(
+ sm.getString("sf.invalidCharacter", String.format("\\u%40X", Integer.valueOf(c))));
+ }
+ input.reset();
+ }
+
+
+ static int peek(Reader input) throws IOException {
+ input.mark(1);
+ int c = input.read();
+ input.reset();
+ return c;
+ }
+
+
+ static boolean isKeyFirst(int c) {
+ try {
+ return IS_KEY_FIRST[c];
+ } catch (ArrayIndexOutOfBoundsException ex) {
+ return false;
+ }
+ }
+
+
+ static boolean isKey(int c) {
+ try {
+ return IS_KEY[c];
+ } catch (ArrayIndexOutOfBoundsException ex) {
+ return false;
+ }
+ }
+
+
+ static boolean isOws(int c) {
+ try {
+ return IS_OWS[c];
+ } catch (ArrayIndexOutOfBoundsException ex) {
+ return false;
+ }
+ }
+
+
+ static boolean isBase64(int c) {
+ try {
+ return IS_BASE64[c];
+ } catch (ArrayIndexOutOfBoundsException ex) {
+ return false;
+ }
+ }
+
+
+ static boolean isToken(int c) {
+ try {
+ return IS_TOKEN[c];
+ } catch (ArrayIndexOutOfBoundsException ex) {
+ return false;
+ }
+ }
+
+
+ private StructuredField() {
+ // Utility class. Hide default constructor.
+ }
+
+
+ static class SfDictionary {
+ private Map<String,SfListMember> dictionary = new LinkedHashMap<>();
+
+ void addDictionaryMember(String key, SfListMember value) {
+ dictionary.put(key, value);
+ }
+
+ SfListMember getDictionaryMember(String key) {
+ return dictionary.get(key);
+ }
+ }
+
+ static class SfList {
+ private List<SfListMember> listMembers = new ArrayList<>();
+
+ void addListMember(SfListMember listMember) {
+ listMembers.add(listMember);
+ }
+ }
+
+ static class SfListMember {
+ private Map<String,SfItem<?>> parameters = null;
+
+ void addParameter(String key, SfItem<?> value) {
+ if (parameters == null) {
+ parameters = new LinkedHashMap<>();
+ }
+ parameters.put(key, value);
+ }
+ }
+
+ static class SfInnerList extends SfListMember {
+ List<SfItem<?>> listItems = new ArrayList<>();
+
+ SfInnerList() {
+ // Default constructor is NO-OP
+ }
+
+ void addListItem(SfItem<?> item) {
+ listItems.add(item);
+ }
+
+ List<SfItem<?>> getListItem() {
+ return listItems;
+ }
+ }
+
+ abstract static class SfItem<T> extends SfListMember {
+ private final T value;
+
+ SfItem(T value) {
+ this.value = value;
+ }
+
+ T getVaue() {
+ return value;
+ }
+ }
+
+ static class SfInteger extends SfItem<Long> {
+ SfInteger(long value) {
+ super(Long.valueOf(value));
+ }
+ }
+
+ static class SfDecimal extends SfItem<Double> {
+ SfDecimal(double value) {
+ super(Double.valueOf(value));
+ }
+ }
+
+ static class SfString extends SfItem<String> {
+ SfString(String value) {
+ super(value);
+ }
+ }
+
+ static class SfToken extends SfItem<String> {
+ SfToken(String value) {
+ super(value);
+ }
+ }
+
+ static class SfByteSequence extends SfItem<byte[]> {
+ SfByteSequence(byte[] value) {
+ super(value);
+ }
+ }
+
+ static class SfBoolean extends SfItem<Boolean> {
+ SfBoolean(boolean value) {
+ super(Boolean.valueOf(value));
+ }
+ }
+}
diff --git a/test/org/apache/tomcat/util/http/parser/TesterHttpWgStructuredField.java b/test/org/apache/tomcat/util/http/parser/TesterHttpWgStructuredField.java
new file mode 100644
index 0000000000..794d180934
--- /dev/null
+++ b/test/org/apache/tomcat/util/http/parser/TesterHttpWgStructuredField.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tomcat.util.http.parser;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.List;
+import java.util.Map;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.tomcat.util.buf.StringUtils;
+import org.apache.tomcat.util.json.JSONParser;
+
+/*
+ * Not run automatically (due to name) as if requires a local git clone of
+ * https://github.com/httpwg/structured-field-tests
+ */
+public class TesterHttpWgStructuredField {
+
+ private static final String testsPath = System.getProperty("user.home") + "/repos/httpwg-sf-tests";
+
+
+ @Test
+ public void test() throws Exception {
+ File testDir = new File(testsPath);
+ doTestDirectory(testDir);
+ }
+
+
+ private void doTestDirectory(File directory) throws Exception {
+ for (File file : directory.listFiles()) {
+ if (file.isDirectory()) {
+ if (!file.getName().equals("serialisation-tests")) {
+ doTestDirectory(file);
+ }
+ } else if (file.isFile()) {
+ if (file.getName().endsWith(".json")) {
+ doTestFile(file);
+ }
+ }
+ }
+ }
+
+
+ private void doTestFile(File file) throws Exception {
+ System.out.println(file.getAbsolutePath());
+
+ try (FileInputStream fis = new FileInputStream(file)) {
+ JSONParser parser = new JSONParser(fis);
+ List<Object> array = parser.parseArray();
+ for (Object obj : array) {
+ if (obj instanceof Map) {
+ doTestMap((Map<?,?>) obj);
+ } else {
+ Assert.fail();
+ }
+ }
+ }
+ }
+
+
+ private void doTestMap(Map<?,?> map) throws Exception {
+ String name = (String) map.get("name");
+ @SuppressWarnings("unchecked")
+ List<String> rawLines = (List<String>) map.get("raw");
+ String headerType = (String) map.get("header_type");
+ Boolean mustFail = ((Boolean) map.get("must_fail"));
+ if (mustFail == null) {
+ mustFail = Boolean.FALSE;
+ }
+ Boolean canFail = ((Boolean) map.get("can_fail"));
+ if (canFail == null) {
+ canFail = Boolean.FALSE;
+ }
+ String raw = StringUtils.join(rawLines);
+ /*
+ * The simple JSON parser may not be handling escape sequences
+ * correctly.
+ */
+ String unescaped = raw.replace("\\\"", "\"");
+ unescaped = unescaped.replace("\\b", "\u0008");
+ unescaped = unescaped.replace("\\t", "\t");
+ unescaped = unescaped.replace("\\n", "\n");
+ unescaped = unescaped.replace("\\f", "\u000c");
+ unescaped = unescaped.replace("\\r", "\r");
+ unescaped = unescaped.replace("\\\\", "\\");
+ Reader input = new StringReader(unescaped);
+
+ try {
+ switch (headerType) {
+ case "item": {
+ StructuredField.parseSfItem(input);
+ break;
+ }
+ case "list": {
+ StructuredField.parseSfList(input);
+ break;
+ }
+ case "dictionary": {
+ StructuredField.parseSfDictionary(input);
+ break;
+ }
+ default:
+ System.out.println("Type unsupported " + headerType);
+ }
+ } catch (Exception e) {
+ Assert.assertTrue(name + ": raw [" + unescaped + "]", mustFail.booleanValue() || canFail.booleanValue());
+ return;
+ }
+ Assert.assertFalse(name + ": raw [" + unescaped + "]", mustFail.booleanValue());
+ }
+}
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index 2a8128d48d..7bdd1cc6dd 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -112,6 +112,9 @@
cookie value as part of the value as required by RFC 6265 and explicitly
clarified in RFC 6265bis. (markt)
</fix>
+ <add>
+ Add an RFC 8941 structured field parser. (markt)
+ </add>
</changelog>
</subsection>
<subsection name="Other">
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org