You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@sling.apache.org by GitBox <gi...@apache.org> on 2022/09/03 09:39:12 UTC

[GitHub] [sling-org-apache-sling-xss] kwin commented on a diff in pull request #28: SLING-7231 Move to owasp sanitizer library

kwin commented on code in PR #28:
URL: https://github.com/apache/sling-org-apache-sling-xss/pull/28#discussion_r962129419


##########
src/main/java/org/apache/sling/xss/impl/style/CssValidator.java:
##########
@@ -0,0 +1,57 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl.style;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.sling.xss.impl.xml.AntiSamyPolicy.CssPolicy;
+import org.owasp.html.AttributePolicy;
+import org.owasp.html.HtmlStreamEventProcessor;
+
+public class CssValidator {
+
+    public static final String STYLE_TAG_NAME = "style";
+    public static final String STYLE_ATTRIBUTE_NAME = STYLE_TAG_NAME;
+
+    private final BatikCssCleaner cssParser;
+    private final List<String> disallowedTagNames = new ArrayList<>();
+
+    public CssValidator(CssPolicy cssPolicy) {
+        cssParser = new BatikCssCleaner(cssPolicy);
+    }
+
+    public HtmlStreamEventProcessor newStyleTagProcessor() {
+        return new StyleTagProcessor(cssParser);
+    }
+
+    public AttributePolicy newCssAttributePolicy() {
+        return new AttributePolicy() {
+            @Override
+            public String apply(String elementName, String attributeName, String value) {

Review Comment:
   convert to lambda



##########
src/main/java/org/apache/sling/xss/impl/style/ValidatingDocumentHandler.java:
##########
@@ -0,0 +1,342 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl.style;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.StringJoiner;
+import java.util.stream.Collectors;
+
+import org.apache.sling.xss.impl.xml.AntiSamyPolicy.CssPolicy;
+import org.apache.sling.xss.impl.xml.Property;
+import org.w3c.css.sac.CSSException;
+import org.w3c.css.sac.CombinatorCondition;
+import org.w3c.css.sac.Condition;
+import org.w3c.css.sac.ConditionalSelector;
+import org.w3c.css.sac.DescendantSelector;
+import org.w3c.css.sac.DocumentHandler;
+import org.w3c.css.sac.InputSource;
+import org.w3c.css.sac.LexicalUnit;
+import org.w3c.css.sac.NegativeCondition;
+import org.w3c.css.sac.SACMediaList;
+import org.w3c.css.sac.Selector;
+import org.w3c.css.sac.SelectorList;
+import org.w3c.css.sac.SiblingSelector;
+
+public class ValidatingDocumentHandler implements DocumentHandler {
+
+    private final CssPolicy cssPolicy;
+    private final StringBuilder cleanCss = new StringBuilder();
+    private final boolean inline;

Review Comment:
   isInline



##########
src/main/java/org/apache/sling/xss/impl/style/BatikCssCleaner.java:
##########
@@ -0,0 +1,82 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl.style;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.apache.batik.css.parser.Parser;
+import org.apache.sling.xss.impl.xml.AntiSamyPolicy.CssPolicy;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.css.sac.CSSException;
+import org.w3c.css.sac.InputSource;
+
+public class BatikCssCleaner {
+
+    private final Logger logger = LoggerFactory.getLogger(getClass());
+    private final CssPolicy cssPolicy;
+
+    private static final String CDATA_PRE = "<![CDATA[";
+    private static final String CDATA_POST = "]]>";
+
+    public BatikCssCleaner(CssPolicy cssPolicy) {
+        this.cssPolicy = cssPolicy;
+    }
+
+    /**
+     * Parses a CSS stylesheet and returns it in a safe form
+     *
+     * @param untrustedCss a complete CSS stylesheet
+     * @return the cleaned CSS stylesheet text
+     */
+    public String cleanStylesheet(String untrustedCss) {
+        try {
+            if ( untrustedCss.startsWith(CDATA_PRE) && untrustedCss.endsWith(CDATA_POST) )
+                untrustedCss = untrustedCss.substring(CDATA_PRE.length(), untrustedCss.length() - CDATA_POST.length());
+            Parser parser = new Parser();
+            ValidatingDocumentHandler handler = new ValidatingDocumentHandler(cssPolicy, false);
+            parser.setDocumentHandler(handler);
+            parser.parseStyleSheet(new InputSource(new StringReader(untrustedCss)));
+            return handler.getValidCss();
+        } catch (CSSException | IOException e) {
+            logger.debug("Unexpected error while cleaning stylesheet", e);

Review Comment:
   Should we really silently return the empty string in case of exceptions? I would rather not catch here



##########
src/main/java/org/apache/sling/xss/impl/AntiSamyPolicyAdapter.java:
##########
@@ -0,0 +1,289 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nullable;
+
+import org.apache.sling.xss.impl.style.CssValidator;
+import org.apache.sling.xss.impl.xml.Attribute;
+import org.apache.sling.xss.impl.xml.AntiSamyPolicy;
+import org.apache.sling.xss.impl.xml.Tag;
+import org.owasp.html.AttributePolicy;
+import org.owasp.html.HtmlPolicyBuilder;
+import org.owasp.html.PolicyFactory;
+
+import com.google.common.base.Predicate;
+
+public class AntiSamyPolicyAdapter {
+    private static final String ALLOW_DYNAMIC_ATTRIBUTES = "allowDynamicAttributes";
+    private static final String REMOVE_TAG_ON_INVALID_ACTION = "removeTag";
+
+    private final List<String> onInvalidRemoveTagList = new ArrayList<>();
+    private final Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>();
+
+    private PolicyFactory policyFactory;
+    private CssValidator cssValidator;
+
+    public AntiSamyPolicyAdapter(AntiSamyPolicy policy) {
+        removeAttributeGuards();
+        HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder();
+
+        cssValidator = new CssValidator(policy.getCssPolicy());
+
+        // ------------ this is for the global attributes -------------
+        Map<String, Attribute> globalAttributes = policy.getGlobalAttributes();
+
+        for (Attribute attribute : globalAttributes.values()) {
+            if (attribute.getOnInvalid().equals(REMOVE_TAG_ON_INVALID_ACTION)) {
+                onInvalidRemoveTagList.add(attribute.getName());
+            }
+
+            if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) {
+                // we match style tags separately
+                policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy())
+                        .globally();
+            } else {
+                List<String> allowedValuesFromAttribute = attribute.getLiterals();
+                for (String allowedValue : allowedValuesFromAttribute) {
+                    policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally();
+                }
+
+                List<Pattern> regexsFromAttribute = attribute.getPatternList();
+                if (!regexsFromAttribute.isEmpty()) {
+                    policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute))
+                            .globally();
+                } else {
+                    policyBuilder.allowAttributes(attribute.getName()).globally();
+                }
+            }
+        }
+
+        // ------------ this is for the allowed empty tags -------------
+        List<String> allowedEmptyTags = policy.getAllowedEmptyTags();
+        for (String allowedEmptyTag : allowedEmptyTags) {
+            policyBuilder.allowWithoutAttributes(allowedEmptyTag);
+        }
+
+        // ------------ this is for the tag rules -------------
+        Map<String, Tag> tagMap = policy.getTagRules();
+        for (Map.Entry<String, Tag> tag : tagMap.entrySet()) {
+
+            String tagAction = tag.getValue().getAction();
+            switch (tagAction) {
+                // Tag.action
+                case AntiSamyActions.TRUNCATE:
+                    policyBuilder.allowElements(tag.getValue().getName());
+                    break;
+
+                // filter: remove tags, but keep content,
+                case AntiSamyActions.FILTER:
+                    break;
+
+                // remove: remove tag and contents
+                case AntiSamyActions.REMOVE:
+                    policyBuilder.disallowElements(tag.getValue().getName());
+                    break;
+
+                case AntiSamyActions.VALIDATE:
+                case "":
+                    policyBuilder.allowElements(tag.getValue().getName());
+                    boolean styleSeen = false;
+                    // get the allowed Attributes for the tag
+                    Map<String, Attribute> allowedAttributes = tag.getValue().getAttributeMap();
+                    // if there are allowed Attributes, map over them
+                    for (Attribute attribute : allowedAttributes.values()) {
+
+                        if (attribute.getOnInvalid().equals(REMOVE_TAG_ON_INVALID_ACTION)) {
+                            onInvalidRemoveTagList.add(attribute.getName());
+                        }
+
+                        styleSeen = CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName());
+
+                        List<String> literalList = attribute.getLiterals();
+                        List<Pattern> patternList = attribute.getPatternList();
+
+                        if (!literalList.isEmpty() && !patternList.isEmpty()) {
+                            // if both, the patterns and the literals are not empty, the value should be checked with them with an OR and not with an AND.
+                            policyBuilder.allowAttributes(attribute.getName())
+                                .matching(matchesToPatternsAndLiterals(patternList, true, literalList))
+                                .onElements(tag.getValue().getName());
+                        }
+                        else if (!literalList.isEmpty()) {
+                            policyBuilder.allowAttributes(attribute.getName())
+                                .matching(true, literalList.toArray(new String[0]))
+                                .onElements(tag.getValue().getName());
+                            policyBuilder.allowAttributes(attribute.getName()).onElements(tag.getValue().getName());
+                        }
+                        else if (!patternList.isEmpty()) {
+                            policyBuilder.allowAttributes(attribute.getName())
+                                    .matching(matchesToPatterns(patternList))
+                                    .onElements(tag.getValue().getName());
+                        }
+                    }
+
+                    if (!styleSeen) {
+                        policyBuilder.allowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME)
+                                .matching(cssValidator.newCssAttributePolicy()).onElements(tag.getValue().getName());
+                    }
+                    break;
+
+                default:
+                    throw new RuntimeException("No tag action found.");
+            }
+        }
+
+        // disallow style tag on specific elements
+        policyBuilder.disallowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME)
+                .onElements(cssValidator.getDisallowedTagNames().toArray(new String[0]));
+
+        // ---------- dynamic attributes ------------
+        Map<String, Attribute> dynamicAttributes = new HashMap<>();
+
+        // checks if the dynamic attributes are allowed
+        if (policy.getDirectives().get(ALLOW_DYNAMIC_ATTRIBUTES).equals("true")) {
+            dynamicAttributes.putAll(policy.getDynamicAttributes());
+            for (Attribute attribute : dynamicAttributes.values()) {
+                if (attribute.getOnInvalid().equals(REMOVE_TAG_ON_INVALID_ACTION)) {
+                    onInvalidRemoveTagList.add(attribute.getName());
+                }
+
+                List<Pattern> regexsFromAttribute = attribute.getPatternList();
+                for (Pattern regex : regexsFromAttribute) {
+                    dynamicAttributesPolicyMap.put(attribute.getName(), newDynamicAttributePolicy(regex));
+                }
+
+                List<String> allowedValuesFromAttribute = attribute.getLiterals();
+                if (!allowedValuesFromAttribute.isEmpty()) {
+                    dynamicAttributesPolicyMap.put(attribute.getName(),
+                            newDynamicAttributePolicy(true, allowedValuesFromAttribute.toArray(new String[0])));
+                }
+
+            }
+        }
+
+        policyFactory = policyBuilder.allowTextIn(CssValidator.STYLE_TAG_NAME).toFactory();
+
+    }
+
+    public PolicyFactory getHtmlCleanerPolicyFactory() {
+        return policyFactory;
+    }
+
+    public Map<String, AttributePolicy> getDynamicAttributesPolicyMap() {
+        return dynamicAttributesPolicyMap;
+    }
+
+    public List<String> getOnInvalidRemoveTagList() {
+        return onInvalidRemoveTagList;
+    }
+
+    public CssValidator getCssValidator() {
+        return cssValidator;
+    }
+
+    private static Predicate<String> matchesToPatterns(List<Pattern> patternList) {
+        return new Predicate<String>() {
+            @Override
+            public boolean apply(String s) {
+                for (Pattern pattern : patternList) {
+                    if (pattern.matcher(s).matches()) {
+                        return true;
+                    }
+                }
+                return false;
+            }
+        };
+    }
+
+    private static Predicate<String> matchesToPatternsAndLiterals(List<Pattern> patternList, boolean ignoreCase, List<String> literalList) {

Review Comment:
   `matchesPatternsOrLiterals`. Reuse the pattern matcher from above and combine with `or` with a literal matcher to reduce duplication



##########
src/main/java/org/apache/sling/xss/impl/HtmlSanitizer.java:
##########
@@ -0,0 +1,88 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl;
+
+import java.lang.reflect.Field;
+
+import org.apache.sling.xss.impl.xml.AntiSamyPolicy;
+import org.owasp.html.DynamicAttributesSanitizerPolicy;
+import org.owasp.html.Handler;
+import org.owasp.html.HtmlStreamEventReceiver;
+import org.owasp.html.HtmlStreamRenderer;
+import org.owasp.html.PolicyFactory;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+
+public class HtmlSanitizer {
+
+    private AntiSamyPolicyAdapter customPolicy;
+    private ImmutableMap policies;
+    private ImmutableSet<String> textContainers;
+
+    public HtmlSanitizer(AntiSamyPolicy policy) {
+        this.customPolicy = new AntiSamyPolicyAdapter(policy);
+        policies = reflectionGetPolicies(customPolicy.getHtmlCleanerPolicyFactory());
+        textContainers = reflectionGetTextContainers(customPolicy.getHtmlCleanerPolicyFactory());
+    }
+
+    public SanitizedResult scan(String taintedHTML) {
+        StringBuilder sb = new StringBuilder(taintedHTML.length());
+        HtmlStreamEventReceiver out = HtmlStreamRenderer.create(sb, Handler.DO_NOTHING);
+        DynamicAttributesSanitizerPolicy dynamicPolicy = new DynamicAttributesSanitizerPolicy(out, policies,
+                textContainers, customPolicy.getDynamicAttributesPolicyMap(), customPolicy.getOnInvalidRemoveTagList());
+
+        org.owasp.html.HtmlSanitizer.sanitize(taintedHTML, dynamicPolicy,
+                customPolicy.getCssValidator().newStyleTagProcessor());
+        return new SanitizedResult(sb.toString(), dynamicPolicy.getNumberOfErrors());
+    }
+
+    private ImmutableSet<String> reflectionGetTextContainers(PolicyFactory policyFactory) {
+        Class<?> c = policyFactory.getClass();
+        try {
+            Field field = c.getDeclaredField("textContainers");
+            field.setAccessible(true);
+            return (ImmutableSet<String>) field.get(policyFactory);
+        } catch (NoSuchFieldException | SecurityException | IllegalAccessException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private ImmutableMap reflectionGetPolicies(PolicyFactory policyFactory) {
+        Class<?> c = policyFactory.getClass();
+        try {
+            Field field = c.getDeclaredField("policies");
+            field.setAccessible(true);
+            return (ImmutableMap) field.get(policyFactory);
+        } catch (NoSuchFieldException | SecurityException | IllegalAccessException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public String scan(String taintedHTML, AntiSamyPolicy policy) throws Exception {
+        if (taintedHTML == null) {

Review Comment:
   Rather use https://docs.oracle.com/javase/8/docs/api/java/util/Objects.html#requireNonNull-T-java.lang.String-



##########
src/main/java/org/apache/sling/xss/impl/AntiSamyPolicyAdapter.java:
##########
@@ -0,0 +1,289 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nullable;

Review Comment:
   Sling uses Jetbrains null annotations 



##########
src/main/java/org/apache/sling/xss/impl/xml/AntiSamyXmlParser.java:
##########
@@ -0,0 +1,55 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl.xml;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.ctc.wstx.stax.WstxInputFactory;
+import com.ctc.wstx.stax.WstxOutputFactory;
+import com.fasterxml.jackson.dataformat.xml.XmlMapper;
+
+public class AntiSamyXmlParser {
+
+    private static final String DIRECTIVE_EMBED_STYLE_SHEETS = "embedStyleSheets";
+
+    private final Logger logger = LoggerFactory.getLogger(getClass());
+
+    public AntiSamyRules createRules(InputStream input) throws XMLStreamException, IOException {
+
+        XMLInputFactory xmlInputFactory = new WstxInputFactory();
+        XMLStreamReader xmlStreamReader;
+        AntiSamyRules rules = null;
+        xmlStreamReader = xmlInputFactory.createXMLStreamReader(input);

Review Comment:
   reader is not closed



##########
src/main/java/org/apache/sling/xss/impl/AntiSamyPolicyAdapter.java:
##########
@@ -0,0 +1,289 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nullable;
+
+import org.apache.sling.xss.impl.style.CssValidator;
+import org.apache.sling.xss.impl.xml.Attribute;
+import org.apache.sling.xss.impl.xml.AntiSamyPolicy;
+import org.apache.sling.xss.impl.xml.Tag;
+import org.owasp.html.AttributePolicy;
+import org.owasp.html.HtmlPolicyBuilder;
+import org.owasp.html.PolicyFactory;
+
+import com.google.common.base.Predicate;
+
+public class AntiSamyPolicyAdapter {
+    private static final String ALLOW_DYNAMIC_ATTRIBUTES = "allowDynamicAttributes";
+    private static final String REMOVE_TAG_ON_INVALID_ACTION = "removeTag";
+
+    private final List<String> onInvalidRemoveTagList = new ArrayList<>();
+    private final Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>();
+
+    private PolicyFactory policyFactory;
+    private CssValidator cssValidator;
+
+    public AntiSamyPolicyAdapter(AntiSamyPolicy policy) {
+        removeAttributeGuards();
+        HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder();
+
+        cssValidator = new CssValidator(policy.getCssPolicy());
+
+        // ------------ this is for the global attributes -------------
+        Map<String, Attribute> globalAttributes = policy.getGlobalAttributes();
+
+        for (Attribute attribute : globalAttributes.values()) {
+            if (attribute.getOnInvalid().equals(REMOVE_TAG_ON_INVALID_ACTION)) {
+                onInvalidRemoveTagList.add(attribute.getName());
+            }
+
+            if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) {
+                // we match style tags separately
+                policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy())
+                        .globally();
+            } else {
+                List<String> allowedValuesFromAttribute = attribute.getLiterals();
+                for (String allowedValue : allowedValuesFromAttribute) {
+                    policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally();
+                }
+
+                List<Pattern> regexsFromAttribute = attribute.getPatternList();
+                if (!regexsFromAttribute.isEmpty()) {
+                    policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute))
+                            .globally();
+                } else {
+                    policyBuilder.allowAttributes(attribute.getName()).globally();
+                }
+            }
+        }
+
+        // ------------ this is for the allowed empty tags -------------
+        List<String> allowedEmptyTags = policy.getAllowedEmptyTags();
+        for (String allowedEmptyTag : allowedEmptyTags) {
+            policyBuilder.allowWithoutAttributes(allowedEmptyTag);
+        }
+
+        // ------------ this is for the tag rules -------------
+        Map<String, Tag> tagMap = policy.getTagRules();
+        for (Map.Entry<String, Tag> tag : tagMap.entrySet()) {
+
+            String tagAction = tag.getValue().getAction();
+            switch (tagAction) {
+                // Tag.action
+                case AntiSamyActions.TRUNCATE:
+                    policyBuilder.allowElements(tag.getValue().getName());
+                    break;
+
+                // filter: remove tags, but keep content,
+                case AntiSamyActions.FILTER:
+                    break;
+
+                // remove: remove tag and contents
+                case AntiSamyActions.REMOVE:
+                    policyBuilder.disallowElements(tag.getValue().getName());
+                    break;
+
+                case AntiSamyActions.VALIDATE:
+                case "":
+                    policyBuilder.allowElements(tag.getValue().getName());
+                    boolean styleSeen = false;
+                    // get the allowed Attributes for the tag
+                    Map<String, Attribute> allowedAttributes = tag.getValue().getAttributeMap();
+                    // if there are allowed Attributes, map over them
+                    for (Attribute attribute : allowedAttributes.values()) {
+
+                        if (attribute.getOnInvalid().equals(REMOVE_TAG_ON_INVALID_ACTION)) {
+                            onInvalidRemoveTagList.add(attribute.getName());
+                        }
+
+                        styleSeen = CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName());
+
+                        List<String> literalList = attribute.getLiterals();
+                        List<Pattern> patternList = attribute.getPatternList();
+
+                        if (!literalList.isEmpty() && !patternList.isEmpty()) {
+                            // if both, the patterns and the literals are not empty, the value should be checked with them with an OR and not with an AND.
+                            policyBuilder.allowAttributes(attribute.getName())
+                                .matching(matchesToPatternsAndLiterals(patternList, true, literalList))
+                                .onElements(tag.getValue().getName());
+                        }
+                        else if (!literalList.isEmpty()) {
+                            policyBuilder.allowAttributes(attribute.getName())
+                                .matching(true, literalList.toArray(new String[0]))
+                                .onElements(tag.getValue().getName());
+                            policyBuilder.allowAttributes(attribute.getName()).onElements(tag.getValue().getName());
+                        }
+                        else if (!patternList.isEmpty()) {
+                            policyBuilder.allowAttributes(attribute.getName())
+                                    .matching(matchesToPatterns(patternList))
+                                    .onElements(tag.getValue().getName());
+                        }
+                    }
+
+                    if (!styleSeen) {
+                        policyBuilder.allowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME)
+                                .matching(cssValidator.newCssAttributePolicy()).onElements(tag.getValue().getName());
+                    }
+                    break;
+
+                default:
+                    throw new RuntimeException("No tag action found.");

Review Comment:
   More specific IllegalArgumentException would make sense here



##########
src/test/java/org/apache/sling/xss/impl/xml/PolicyTest.java:
##########
@@ -0,0 +1,69 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or
+ * more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the
+ * Apache License, Version 2.0 (the "License"); you may not use
+ * this file except in compliance with the License. You may obtain
+ * a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by
+ * applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ ******************************************************************************/
+package org.apache.sling.xss.impl.xml;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.InputStream;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import org.apache.sling.xss.impl.PolicyException;
+import org.apache.sling.xss.impl.xml.AntiSamyPolicy.CssPolicy;
+import org.junit.jupiter.api.Test;
+
+class PolicyTest {
+
+    @Test
+    void loadDefaultPolicy() throws Exception, PolicyException {
+        InputStream input = AntiSamyPolicy.class.getClassLoader().getResourceAsStream("SLING-INF/content/config.xml");

Review Comment:
   use try with resources to close inputstream



##########
src/main/java/org/apache/sling/xss/impl/AntiSamyPolicyAdapter.java:
##########
@@ -0,0 +1,289 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nullable;
+
+import org.apache.sling.xss.impl.style.CssValidator;
+import org.apache.sling.xss.impl.xml.Attribute;
+import org.apache.sling.xss.impl.xml.AntiSamyPolicy;
+import org.apache.sling.xss.impl.xml.Tag;
+import org.owasp.html.AttributePolicy;
+import org.owasp.html.HtmlPolicyBuilder;
+import org.owasp.html.PolicyFactory;
+
+import com.google.common.base.Predicate;
+
+public class AntiSamyPolicyAdapter {
+    private static final String ALLOW_DYNAMIC_ATTRIBUTES = "allowDynamicAttributes";
+    private static final String REMOVE_TAG_ON_INVALID_ACTION = "removeTag";
+
+    private final List<String> onInvalidRemoveTagList = new ArrayList<>();
+    private final Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>();
+
+    private PolicyFactory policyFactory;
+    private CssValidator cssValidator;
+
+    public AntiSamyPolicyAdapter(AntiSamyPolicy policy) {
+        removeAttributeGuards();
+        HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder();
+
+        cssValidator = new CssValidator(policy.getCssPolicy());
+
+        // ------------ this is for the global attributes -------------
+        Map<String, Attribute> globalAttributes = policy.getGlobalAttributes();
+
+        for (Attribute attribute : globalAttributes.values()) {
+            if (attribute.getOnInvalid().equals(REMOVE_TAG_ON_INVALID_ACTION)) {
+                onInvalidRemoveTagList.add(attribute.getName());
+            }
+
+            if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) {
+                // we match style tags separately
+                policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy())
+                        .globally();
+            } else {
+                List<String> allowedValuesFromAttribute = attribute.getLiterals();
+                for (String allowedValue : allowedValuesFromAttribute) {
+                    policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally();
+                }
+
+                List<Pattern> regexsFromAttribute = attribute.getPatternList();
+                if (!regexsFromAttribute.isEmpty()) {
+                    policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute))
+                            .globally();
+                } else {
+                    policyBuilder.allowAttributes(attribute.getName()).globally();
+                }
+            }
+        }
+
+        // ------------ this is for the allowed empty tags -------------
+        List<String> allowedEmptyTags = policy.getAllowedEmptyTags();
+        for (String allowedEmptyTag : allowedEmptyTags) {
+            policyBuilder.allowWithoutAttributes(allowedEmptyTag);
+        }
+
+        // ------------ this is for the tag rules -------------
+        Map<String, Tag> tagMap = policy.getTagRules();
+        for (Map.Entry<String, Tag> tag : tagMap.entrySet()) {
+
+            String tagAction = tag.getValue().getAction();
+            switch (tagAction) {
+                // Tag.action
+                case AntiSamyActions.TRUNCATE:
+                    policyBuilder.allowElements(tag.getValue().getName());
+                    break;
+
+                // filter: remove tags, but keep content,
+                case AntiSamyActions.FILTER:
+                    break;
+
+                // remove: remove tag and contents
+                case AntiSamyActions.REMOVE:
+                    policyBuilder.disallowElements(tag.getValue().getName());
+                    break;
+
+                case AntiSamyActions.VALIDATE:
+                case "":
+                    policyBuilder.allowElements(tag.getValue().getName());
+                    boolean styleSeen = false;
+                    // get the allowed Attributes for the tag
+                    Map<String, Attribute> allowedAttributes = tag.getValue().getAttributeMap();
+                    // if there are allowed Attributes, map over them
+                    for (Attribute attribute : allowedAttributes.values()) {
+
+                        if (attribute.getOnInvalid().equals(REMOVE_TAG_ON_INVALID_ACTION)) {
+                            onInvalidRemoveTagList.add(attribute.getName());
+                        }
+
+                        styleSeen = CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName());
+
+                        List<String> literalList = attribute.getLiterals();
+                        List<Pattern> patternList = attribute.getPatternList();
+
+                        if (!literalList.isEmpty() && !patternList.isEmpty()) {
+                            // if both, the patterns and the literals are not empty, the value should be checked with them with an OR and not with an AND.
+                            policyBuilder.allowAttributes(attribute.getName())
+                                .matching(matchesToPatternsAndLiterals(patternList, true, literalList))
+                                .onElements(tag.getValue().getName());
+                        }
+                        else if (!literalList.isEmpty()) {
+                            policyBuilder.allowAttributes(attribute.getName())
+                                .matching(true, literalList.toArray(new String[0]))
+                                .onElements(tag.getValue().getName());
+                            policyBuilder.allowAttributes(attribute.getName()).onElements(tag.getValue().getName());
+                        }
+                        else if (!patternList.isEmpty()) {
+                            policyBuilder.allowAttributes(attribute.getName())
+                                    .matching(matchesToPatterns(patternList))
+                                    .onElements(tag.getValue().getName());
+                        }
+                    }
+
+                    if (!styleSeen) {
+                        policyBuilder.allowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME)
+                                .matching(cssValidator.newCssAttributePolicy()).onElements(tag.getValue().getName());
+                    }
+                    break;
+
+                default:
+                    throw new RuntimeException("No tag action found.");
+            }
+        }
+
+        // disallow style tag on specific elements
+        policyBuilder.disallowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME)
+                .onElements(cssValidator.getDisallowedTagNames().toArray(new String[0]));
+
+        // ---------- dynamic attributes ------------
+        Map<String, Attribute> dynamicAttributes = new HashMap<>();
+
+        // checks if the dynamic attributes are allowed
+        if (policy.getDirectives().get(ALLOW_DYNAMIC_ATTRIBUTES).equals("true")) {
+            dynamicAttributes.putAll(policy.getDynamicAttributes());
+            for (Attribute attribute : dynamicAttributes.values()) {
+                if (attribute.getOnInvalid().equals(REMOVE_TAG_ON_INVALID_ACTION)) {
+                    onInvalidRemoveTagList.add(attribute.getName());
+                }
+
+                List<Pattern> regexsFromAttribute = attribute.getPatternList();
+                for (Pattern regex : regexsFromAttribute) {
+                    dynamicAttributesPolicyMap.put(attribute.getName(), newDynamicAttributePolicy(regex));
+                }
+
+                List<String> allowedValuesFromAttribute = attribute.getLiterals();
+                if (!allowedValuesFromAttribute.isEmpty()) {
+                    dynamicAttributesPolicyMap.put(attribute.getName(),
+                            newDynamicAttributePolicy(true, allowedValuesFromAttribute.toArray(new String[0])));
+                }
+
+            }
+        }
+
+        policyFactory = policyBuilder.allowTextIn(CssValidator.STYLE_TAG_NAME).toFactory();
+
+    }
+
+    public PolicyFactory getHtmlCleanerPolicyFactory() {
+        return policyFactory;
+    }
+
+    public Map<String, AttributePolicy> getDynamicAttributesPolicyMap() {
+        return dynamicAttributesPolicyMap;
+    }
+
+    public List<String> getOnInvalidRemoveTagList() {
+        return onInvalidRemoveTagList;
+    }
+
+    public CssValidator getCssValidator() {
+        return cssValidator;
+    }
+
+    private static Predicate<String> matchesToPatterns(List<Pattern> patternList) {
+        return new Predicate<String>() {
+            @Override
+            public boolean apply(String s) {
+                for (Pattern pattern : patternList) {
+                    if (pattern.matcher(s).matches()) {
+                        return true;
+                    }
+                }
+                return false;
+            }
+        };
+    }
+
+    private static Predicate<String> matchesToPatternsAndLiterals(List<Pattern> patternList, boolean ignoreCase, List<String> literalList) {
+        return new Predicate<String>() {
+            @Override
+            public boolean apply(String s) {
+                // check if the string matches to the pattern
+                for (Pattern pattern : patternList) {
+                    if (pattern.matcher(s).matches()) {
+                        return true;
+                    }
+                }
+                // if the pattern does not match it goes through the literals
+                for (String string : literalList) {
+                    s = ignoreCase
+                        ? s.toLowerCase()
+                        : s;
+                    if (string.equals(s)) {
+                        return true;
+                    }
+                }
+                // if it neither matches the patterns nor the literals it returns false
+                return false;
+            }
+        };
+    }
+
+    public AttributePolicy newDynamicAttributePolicy(final Pattern pattern) {
+        return new AttributePolicy() {
+            @Override
+            public @Nullable String apply(String elementName, String attributeName, String value) {
+                return pattern.matcher(value).matches() ? value : null;
+            }
+        };
+    }
+
+    public AttributePolicy newDynamicAttributePolicy(boolean ignoreCase, String... allowedValues) {
+        final List<String> allowed = Arrays.asList(allowedValues);
+        return new AttributePolicy() {
+            @Override
+            public @Nullable String apply(String elementName, String attributeName, String uncanonValue) {
+                String value = ignoreCase ? uncanonValue.toLowerCase() : uncanonValue;
+                return allowed.contains(value) ? value : null;
+            }
+        };
+    }
+
+    // java html sanitizer has some default Attribute Guards, which we don't want.
+    // So we are removing them here
+    private void removeAttributeGuards() {
+        try {
+            Field guards = HtmlPolicyBuilder.class.getDeclaredField("ATTRIBUTE_GUARDS");
+            letMeIn(guards);
+            guards.set(null, new HashMap<>());
+        } catch (ReflectiveOperationException e) {
+            throw new RuntimeException(e);

Review Comment:
   `IllegalStateException`



##########
src/main/java/org/apache/sling/xss/impl/AntiSamyPolicyAdapter.java:
##########
@@ -0,0 +1,289 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nullable;
+
+import org.apache.sling.xss.impl.style.CssValidator;
+import org.apache.sling.xss.impl.xml.Attribute;
+import org.apache.sling.xss.impl.xml.AntiSamyPolicy;
+import org.apache.sling.xss.impl.xml.Tag;
+import org.owasp.html.AttributePolicy;
+import org.owasp.html.HtmlPolicyBuilder;
+import org.owasp.html.PolicyFactory;
+
+import com.google.common.base.Predicate;
+
+public class AntiSamyPolicyAdapter {
+    private static final String ALLOW_DYNAMIC_ATTRIBUTES = "allowDynamicAttributes";
+    private static final String REMOVE_TAG_ON_INVALID_ACTION = "removeTag";
+
+    private final List<String> onInvalidRemoveTagList = new ArrayList<>();
+    private final Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>();
+
+    private PolicyFactory policyFactory;
+    private CssValidator cssValidator;
+
+    public AntiSamyPolicyAdapter(AntiSamyPolicy policy) {
+        removeAttributeGuards();
+        HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder();
+
+        cssValidator = new CssValidator(policy.getCssPolicy());
+
+        // ------------ this is for the global attributes -------------
+        Map<String, Attribute> globalAttributes = policy.getGlobalAttributes();
+
+        for (Attribute attribute : globalAttributes.values()) {
+            if (attribute.getOnInvalid().equals(REMOVE_TAG_ON_INVALID_ACTION)) {
+                onInvalidRemoveTagList.add(attribute.getName());
+            }
+
+            if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) {
+                // we match style tags separately
+                policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy())
+                        .globally();
+            } else {
+                List<String> allowedValuesFromAttribute = attribute.getLiterals();
+                for (String allowedValue : allowedValuesFromAttribute) {
+                    policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally();
+                }
+
+                List<Pattern> regexsFromAttribute = attribute.getPatternList();
+                if (!regexsFromAttribute.isEmpty()) {
+                    policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute))
+                            .globally();
+                } else {
+                    policyBuilder.allowAttributes(attribute.getName()).globally();
+                }
+            }
+        }
+
+        // ------------ this is for the allowed empty tags -------------
+        List<String> allowedEmptyTags = policy.getAllowedEmptyTags();
+        for (String allowedEmptyTag : allowedEmptyTags) {
+            policyBuilder.allowWithoutAttributes(allowedEmptyTag);
+        }
+
+        // ------------ this is for the tag rules -------------
+        Map<String, Tag> tagMap = policy.getTagRules();
+        for (Map.Entry<String, Tag> tag : tagMap.entrySet()) {
+
+            String tagAction = tag.getValue().getAction();
+            switch (tagAction) {
+                // Tag.action
+                case AntiSamyActions.TRUNCATE:
+                    policyBuilder.allowElements(tag.getValue().getName());
+                    break;
+
+                // filter: remove tags, but keep content,
+                case AntiSamyActions.FILTER:
+                    break;
+
+                // remove: remove tag and contents
+                case AntiSamyActions.REMOVE:
+                    policyBuilder.disallowElements(tag.getValue().getName());
+                    break;
+
+                case AntiSamyActions.VALIDATE:
+                case "":
+                    policyBuilder.allowElements(tag.getValue().getName());
+                    boolean styleSeen = false;
+                    // get the allowed Attributes for the tag
+                    Map<String, Attribute> allowedAttributes = tag.getValue().getAttributeMap();
+                    // if there are allowed Attributes, map over them
+                    for (Attribute attribute : allowedAttributes.values()) {
+
+                        if (attribute.getOnInvalid().equals(REMOVE_TAG_ON_INVALID_ACTION)) {
+                            onInvalidRemoveTagList.add(attribute.getName());
+                        }
+
+                        styleSeen = CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName());
+
+                        List<String> literalList = attribute.getLiterals();
+                        List<Pattern> patternList = attribute.getPatternList();
+
+                        if (!literalList.isEmpty() && !patternList.isEmpty()) {
+                            // if both, the patterns and the literals are not empty, the value should be checked with them with an OR and not with an AND.
+                            policyBuilder.allowAttributes(attribute.getName())
+                                .matching(matchesToPatternsAndLiterals(patternList, true, literalList))
+                                .onElements(tag.getValue().getName());
+                        }
+                        else if (!literalList.isEmpty()) {
+                            policyBuilder.allowAttributes(attribute.getName())
+                                .matching(true, literalList.toArray(new String[0]))
+                                .onElements(tag.getValue().getName());
+                            policyBuilder.allowAttributes(attribute.getName()).onElements(tag.getValue().getName());
+                        }
+                        else if (!patternList.isEmpty()) {
+                            policyBuilder.allowAttributes(attribute.getName())
+                                    .matching(matchesToPatterns(patternList))
+                                    .onElements(tag.getValue().getName());
+                        }
+                    }
+
+                    if (!styleSeen) {
+                        policyBuilder.allowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME)
+                                .matching(cssValidator.newCssAttributePolicy()).onElements(tag.getValue().getName());
+                    }
+                    break;
+
+                default:
+                    throw new RuntimeException("No tag action found.");
+            }
+        }
+
+        // disallow style tag on specific elements
+        policyBuilder.disallowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME)
+                .onElements(cssValidator.getDisallowedTagNames().toArray(new String[0]));
+
+        // ---------- dynamic attributes ------------
+        Map<String, Attribute> dynamicAttributes = new HashMap<>();
+
+        // checks if the dynamic attributes are allowed
+        if (policy.getDirectives().get(ALLOW_DYNAMIC_ATTRIBUTES).equals("true")) {
+            dynamicAttributes.putAll(policy.getDynamicAttributes());
+            for (Attribute attribute : dynamicAttributes.values()) {
+                if (attribute.getOnInvalid().equals(REMOVE_TAG_ON_INVALID_ACTION)) {
+                    onInvalidRemoveTagList.add(attribute.getName());
+                }
+
+                List<Pattern> regexsFromAttribute = attribute.getPatternList();
+                for (Pattern regex : regexsFromAttribute) {
+                    dynamicAttributesPolicyMap.put(attribute.getName(), newDynamicAttributePolicy(regex));
+                }
+
+                List<String> allowedValuesFromAttribute = attribute.getLiterals();
+                if (!allowedValuesFromAttribute.isEmpty()) {
+                    dynamicAttributesPolicyMap.put(attribute.getName(),
+                            newDynamicAttributePolicy(true, allowedValuesFromAttribute.toArray(new String[0])));
+                }
+
+            }
+        }
+
+        policyFactory = policyBuilder.allowTextIn(CssValidator.STYLE_TAG_NAME).toFactory();
+
+    }
+
+    public PolicyFactory getHtmlCleanerPolicyFactory() {
+        return policyFactory;
+    }
+
+    public Map<String, AttributePolicy> getDynamicAttributesPolicyMap() {
+        return dynamicAttributesPolicyMap;
+    }
+
+    public List<String> getOnInvalidRemoveTagList() {
+        return onInvalidRemoveTagList;
+    }
+
+    public CssValidator getCssValidator() {
+        return cssValidator;
+    }
+
+    private static Predicate<String> matchesToPatterns(List<Pattern> patternList) {
+        return new Predicate<String>() {
+            @Override
+            public boolean apply(String s) {

Review Comment:
   Rather use https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html#asPredicate-- together with https://docs.oracle.com/javase/8/docs/api/java/util/function/Predicate.html#or-java.util.function.Predicate-



##########
src/main/java/org/apache/sling/xss/impl/PolicyException.java:
##########
@@ -0,0 +1,31 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl;
+
+public class PolicyException extends Exception {

Review Comment:
   Should this really be a checked exception?



##########
src/main/java/org/apache/sling/xss/impl/xml/AntiSamyXmlParser.java:
##########
@@ -0,0 +1,55 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl.xml;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.ctc.wstx.stax.WstxInputFactory;
+import com.ctc.wstx.stax.WstxOutputFactory;
+import com.fasterxml.jackson.dataformat.xml.XmlMapper;
+
+public class AntiSamyXmlParser {
+
+    private static final String DIRECTIVE_EMBED_STYLE_SHEETS = "embedStyleSheets";
+
+    private final Logger logger = LoggerFactory.getLogger(getClass());
+
+    public AntiSamyRules createRules(InputStream input) throws XMLStreamException, IOException {
+
+        XMLInputFactory xmlInputFactory = new WstxInputFactory();

Review Comment:
   Switch from Stax to Sax to leverage JDK implementation and get rid of woodstox



##########
src/main/java/org/apache/sling/xss/impl/style/ValidatingDocumentHandler.java:
##########
@@ -0,0 +1,342 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl.style;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.StringJoiner;
+import java.util.stream.Collectors;
+
+import org.apache.sling.xss.impl.xml.AntiSamyPolicy.CssPolicy;
+import org.apache.sling.xss.impl.xml.Property;
+import org.w3c.css.sac.CSSException;
+import org.w3c.css.sac.CombinatorCondition;
+import org.w3c.css.sac.Condition;
+import org.w3c.css.sac.ConditionalSelector;
+import org.w3c.css.sac.DescendantSelector;
+import org.w3c.css.sac.DocumentHandler;
+import org.w3c.css.sac.InputSource;
+import org.w3c.css.sac.LexicalUnit;
+import org.w3c.css.sac.NegativeCondition;
+import org.w3c.css.sac.SACMediaList;
+import org.w3c.css.sac.Selector;
+import org.w3c.css.sac.SelectorList;
+import org.w3c.css.sac.SiblingSelector;
+
+public class ValidatingDocumentHandler implements DocumentHandler {
+
+    private final CssPolicy cssPolicy;
+    private final StringBuilder cleanCss = new StringBuilder();
+    private final boolean inline;
+
+    private boolean inSelector;

Review Comment:
   isInSelector



##########
src/main/java/org/apache/sling/xss/impl/HtmlSanitizer.java:
##########
@@ -0,0 +1,88 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl;
+
+import java.lang.reflect.Field;
+
+import org.apache.sling.xss.impl.xml.AntiSamyPolicy;
+import org.owasp.html.DynamicAttributesSanitizerPolicy;
+import org.owasp.html.Handler;
+import org.owasp.html.HtmlStreamEventReceiver;
+import org.owasp.html.HtmlStreamRenderer;
+import org.owasp.html.PolicyFactory;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+
+public class HtmlSanitizer {
+
+    private AntiSamyPolicyAdapter customPolicy;
+    private ImmutableMap policies;
+    private ImmutableSet<String> textContainers;
+
+    public HtmlSanitizer(AntiSamyPolicy policy) {
+        this.customPolicy = new AntiSamyPolicyAdapter(policy);
+        policies = reflectionGetPolicies(customPolicy.getHtmlCleanerPolicyFactory());
+        textContainers = reflectionGetTextContainers(customPolicy.getHtmlCleanerPolicyFactory());
+    }
+
+    public SanitizedResult scan(String taintedHTML) {
+        StringBuilder sb = new StringBuilder(taintedHTML.length());
+        HtmlStreamEventReceiver out = HtmlStreamRenderer.create(sb, Handler.DO_NOTHING);
+        DynamicAttributesSanitizerPolicy dynamicPolicy = new DynamicAttributesSanitizerPolicy(out, policies,
+                textContainers, customPolicy.getDynamicAttributesPolicyMap(), customPolicy.getOnInvalidRemoveTagList());
+
+        org.owasp.html.HtmlSanitizer.sanitize(taintedHTML, dynamicPolicy,
+                customPolicy.getCssValidator().newStyleTagProcessor());
+        return new SanitizedResult(sb.toString(), dynamicPolicy.getNumberOfErrors());
+    }
+
+    private ImmutableSet<String> reflectionGetTextContainers(PolicyFactory policyFactory) {
+        Class<?> c = policyFactory.getClass();
+        try {
+            Field field = c.getDeclaredField("textContainers");
+            field.setAccessible(true);
+            return (ImmutableSet<String>) field.get(policyFactory);
+        } catch (NoSuchFieldException | SecurityException | IllegalAccessException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private ImmutableMap reflectionGetPolicies(PolicyFactory policyFactory) {
+        Class<?> c = policyFactory.getClass();
+        try {
+            Field field = c.getDeclaredField("policies");
+            field.setAccessible(true);
+            return (ImmutableMap) field.get(policyFactory);
+        } catch (NoSuchFieldException | SecurityException | IllegalAccessException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public String scan(String taintedHTML, AntiSamyPolicy policy) throws Exception {
+        if (taintedHTML == null) {
+            throw new Exception("Null html input");
+        }
+
+        if (policy == null) {
+            throw new Exception("No policy loaded");
+        }
+        return "safeHTML";

Review Comment:
   What is the use case for this methods which always returns the literal?



##########
src/main/java/org/apache/sling/xss/impl/style/BatikCssCleaner.java:
##########
@@ -0,0 +1,82 @@
+/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements.  See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership.  The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License.  You may obtain a copy of the License at
+ ~
+ ~   http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied.  See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
+package org.apache.sling.xss.impl.style;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.apache.batik.css.parser.Parser;
+import org.apache.sling.xss.impl.xml.AntiSamyPolicy.CssPolicy;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.css.sac.CSSException;
+import org.w3c.css.sac.InputSource;
+
+public class BatikCssCleaner {
+
+    private final Logger logger = LoggerFactory.getLogger(getClass());
+    private final CssPolicy cssPolicy;
+
+    private static final String CDATA_PRE = "<![CDATA[";
+    private static final String CDATA_POST = "]]>";
+
+    public BatikCssCleaner(CssPolicy cssPolicy) {
+        this.cssPolicy = cssPolicy;
+    }
+
+    /**
+     * Parses a CSS stylesheet and returns it in a safe form
+     *
+     * @param untrustedCss a complete CSS stylesheet
+     * @return the cleaned CSS stylesheet text
+     */
+    public String cleanStylesheet(String untrustedCss) {
+        try {
+            if ( untrustedCss.startsWith(CDATA_PRE) && untrustedCss.endsWith(CDATA_POST) )
+                untrustedCss = untrustedCss.substring(CDATA_PRE.length(), untrustedCss.length() - CDATA_POST.length());
+            Parser parser = new Parser();
+            ValidatingDocumentHandler handler = new ValidatingDocumentHandler(cssPolicy, false);
+            parser.setDocumentHandler(handler);
+            parser.parseStyleSheet(new InputSource(new StringReader(untrustedCss)));
+            return handler.getValidCss();
+        } catch (CSSException | IOException e) {
+            logger.debug("Unexpected error while cleaning stylesheet", e);
+            return "";
+        }
+    }
+
+    /**
+     * Parses a CSS style declaration (i.e. the text of a <tt>style</tt> attribute) and returns it in a safe form
+     *
+     * @param untrustedCss a css style declaration
+     * @return the cleaned CSS style declaration
+     */
+    public String cleanStyleDeclaration(String untrustedCss) {
+        try {
+            Parser parser = new Parser();
+            ValidatingDocumentHandler handler = new ValidatingDocumentHandler(cssPolicy, true);
+            parser.setDocumentHandler(handler);
+            parser.parseStyleDeclaration(new InputSource(new StringReader(untrustedCss)));
+            return handler.getValidCss();
+        } catch (CSSException | IOException e) {
+            logger.debug("Unexpected error while cleaning style declaration", e);

Review Comment:
   Exception handling



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@sling.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org