You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by je...@apache.org on 2019/08/23 17:42:23 UTC

[sling-org-apache-sling-commons-html] 01/02: Adding an ElementFactory to utils to assist in creating elements

This is an automated email from the ASF dual-hosted git repository.

jeb pushed a commit to branch SLING-8654
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-commons-html.git

commit 21840d64bfe8ea0785a0c4c7a8328e9af1966f9c
Author: JE Bailey <je...@apache.org>
AuthorDate: Thu Aug 22 11:54:14 2019 -0400

    Adding an ElementFactory to utils to assist in creating elements
---
 .../sling/commons/html/impl/tag/DocType.java       |   7 ++
 .../sling/commons/html/impl/tag/StartTag.java      |  10 ++
 .../sling/commons/html/util/ElementFactory.java    |  51 ++++++++
 .../sling/commons/html/util/HtmlElements.java      | 134 +++++++++++++++++++++
 .../org/apache/sling/commons/html/HtmlStreams.java |  73 -----------
 .../sling/commons/html/TagstreamHtmlParseTest.java |  27 +++--
 6 files changed, 221 insertions(+), 81 deletions(-)

diff --git a/src/main/java/org/apache/sling/commons/html/impl/tag/DocType.java b/src/main/java/org/apache/sling/commons/html/impl/tag/DocType.java
index c90a9b4..29a1cf2 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/tag/DocType.java
+++ b/src/main/java/org/apache/sling/commons/html/impl/tag/DocType.java
@@ -13,6 +13,7 @@
  */
 package org.apache.sling.commons.html.impl.tag; 
 
+import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.sling.commons.html.AttrValue;
@@ -29,6 +30,12 @@ public class DocType implements HtmlElement {
         this.attributes = attrList;
     }
     
+    public DocType(String text) {
+       this.value = "!DOCTYPE";
+       this.attributes = new HashMap<>();
+       this.attributes.put(text, new AttrValue());
+    }
+
     @Override
     public HtmlElementType getType() {
         return HtmlElementType.DOCTYPE;
diff --git a/src/main/java/org/apache/sling/commons/html/impl/tag/StartTag.java b/src/main/java/org/apache/sling/commons/html/impl/tag/StartTag.java
index 2dc8468..f87642b 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/tag/StartTag.java
+++ b/src/main/java/org/apache/sling/commons/html/impl/tag/StartTag.java
@@ -15,6 +15,7 @@ package org.apache.sling.commons.html.impl.tag;
 
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -30,6 +31,10 @@ public class StartTag implements HtmlElement {
     private static List<String> voidTags = Arrays.asList("area", "base", "br", "col", "embed", "hr", "img", "input",
             "link", "meta", "param", "source", "track", "wbr");
 
+    public StartTag(String value) {
+        this(value, null, voidTags.contains(value));
+    }
+    
     public StartTag(String value, Map<String, AttrValue> attrList) {
         this(value, attrList, voidTags.contains(value));
     }
@@ -37,7 +42,11 @@ public class StartTag implements HtmlElement {
     public StartTag(String value, Map<String, AttrValue> attrList, boolean selfClosing) {
         this.value = value;
         this.selfClosing = selfClosing;
+        if (attrList == null) {
+            attrList = new HashMap<>();
+        }
         attributes = attrList;
+
     }
 
     @Override
@@ -55,6 +64,7 @@ public class StartTag implements HtmlElement {
         return true;
     }
 
+    @Override
     public boolean getVoidTag() {
         return selfClosing;
     }
diff --git a/src/main/java/org/apache/sling/commons/html/util/ElementFactory.java b/src/main/java/org/apache/sling/commons/html/util/ElementFactory.java
new file mode 100644
index 0000000..1f8dc12
--- /dev/null
+++ b/src/main/java/org/apache/sling/commons/html/util/ElementFactory.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.commons.html.util;
+
+import org.apache.sling.commons.html.HtmlElement;
+import org.apache.sling.commons.html.HtmlElementType;
+import org.apache.sling.commons.html.impl.tag.Comment;
+import org.apache.sling.commons.html.impl.tag.DocType;
+import org.apache.sling.commons.html.impl.tag.EndTag;
+import org.apache.sling.commons.html.impl.tag.StartTag;
+import org.apache.sling.commons.html.impl.tag.TextData;
+
+public class ElementFactory {
+    
+    
+    public static HtmlElement create(HtmlElementType type, String text) {
+        switch (type) {
+        case COMMENT:
+            return new Comment(text);
+        case DOCTYPE:
+            return new DocType(text);
+        case END_TAG:
+            return new EndTag(text);
+        case EOF:
+            break;
+        case START_TAG:
+            return new StartTag(text);
+        case TEXT:
+            return new TextData(text);
+        default:
+            break;
+        
+        }
+        return null;
+    }
+
+}
diff --git a/src/main/java/org/apache/sling/commons/html/util/HtmlElements.java b/src/main/java/org/apache/sling/commons/html/util/HtmlElements.java
new file mode 100644
index 0000000..d13c2c0
--- /dev/null
+++ b/src/main/java/org/apache/sling/commons/html/util/HtmlElements.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.commons.html.util;
+
+/**
+ * Utility class to collect elements at the end of a stream and convert back to 
+ * HTML text
+ * 
+ */
+import java.util.HashSet;
+import java.util.Set;
+import java.util.function.BiConsumer;
+import java.util.function.BinaryOperator;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Collector;
+import java.util.stream.Collectors;
+
+import org.apache.sling.commons.html.AttrValue;
+import org.apache.sling.commons.html.HtmlElement;
+import org.apache.sling.commons.html.impl.tag.StartTag;
+
+public class HtmlElements implements Collector<HtmlElement, HtmlElements, String> {
+
+    private StringBuilder sb = new StringBuilder();
+    
+    public HtmlElements() {
+        //to allow for supplier method
+    }
+    
+    public HtmlElements append(HtmlElement element) {
+        sb.append(TO_HTML.apply(element));
+        return this;
+    }
+    
+    public HtmlElements append(HtmlElements collector) {
+        sb.append(collector.toString());
+        return this;
+    }
+    
+    
+    @Override
+    public Supplier<HtmlElements> supplier() {
+        return HtmlElements::new;
+    }
+
+    @Override
+    public BiConsumer<HtmlElements, HtmlElement> accumulator() {
+        return HtmlElements::append;
+    }
+
+    @Override
+    public BinaryOperator<HtmlElements> combiner() {
+        return (left,right) -> left.append(right);
+    }
+
+    @Override
+    public Function<HtmlElements, String> finisher() {
+        return HtmlElements::toString;
+    }
+
+    @Override
+    public Set<Characteristics> characteristics() {
+        return new HashSet<>();
+    }
+    
+    public String toString() {
+        return sb.toString();
+    }
+    
+    public static final  Function<HtmlElement, String> TO_HTML = element ->{
+        StringBuilder buffer = new StringBuilder();
+        switch (element.getType()) {
+        case COMMENT:
+            buffer.append("<!--");
+            buffer.append(element.getValue());
+            buffer.append("-->");
+            break;
+        case DOCTYPE:
+            buffer.append("<!");
+            buffer.append(element.getValue());
+            buffer.append(">");
+            break;
+        case END_TAG:
+            buffer.append("</");
+            buffer.append(element.getValue());
+            buffer.append('>');
+            break;
+        case EOF:
+            break;
+        case START_TAG:
+            buffer.append('<');
+            buffer.append(element.getValue());
+            StartTag tag = (StartTag) element;
+            if (tag.hasAttributes()) {
+                buffer.append(' ');
+                buffer.append(tag.getAttributes().entrySet().stream().map(entry -> {
+                    StringBuilder sb2 = new StringBuilder();
+                    sb2.append(entry.getKey());
+                    AttrValue value = entry.getValue();
+                    if (!value.isEmpty()) {
+                        sb2.append("=");
+                        sb2.append(value.quoteIfNeeded());
+                    } 
+                    return sb2.toString();
+                }).collect(Collectors.joining(" ")));
+            }
+            buffer.append('>');
+            break;
+        case TEXT:
+            buffer.append(element.toString());
+        }
+        return buffer.toString();
+    };
+    
+    public static HtmlElements  elementsToHtml() {
+        return new HtmlElements();
+    }
+
+}
diff --git a/src/test/java/org/apache/sling/commons/html/HtmlStreams.java b/src/test/java/org/apache/sling/commons/html/HtmlStreams.java
deleted file mode 100644
index caff982..0000000
--- a/src/test/java/org/apache/sling/commons/html/HtmlStreams.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.sling.commons.html;
-
-import java.util.function.Function;
-import java.util.stream.Collectors;
-
-import org.apache.sling.commons.html.AttrValue;
-import org.apache.sling.commons.html.HtmlElement;
-import org.apache.sling.commons.html.impl.tag.StartTag;
-
-
-public class HtmlStreams {
-    
-    private HtmlStreams() {
-    }
-
-    public static final  Function<HtmlElement, String> TO_HTML = element ->{
-        StringBuilder sb = new StringBuilder();
-        switch (element.getType()) {
-        case COMMENT:
-            sb.append("<!--");
-            sb.append(element.getValue());
-            sb.append("-->");
-            break;
-        case DOCTYPE:
-            sb.append("<!");
-            sb.append(element.getValue());
-            sb.append(">");
-            break;
-        case END_TAG:
-            sb.append("</");
-            sb.append(element.getValue());
-            sb.append('>');
-            break;
-        case EOF:
-            break;
-        case START_TAG:
-            sb.append('<');
-            sb.append(element.getValue());
-            StartTag tag = (StartTag) element;
-            if (tag.hasAttributes()) {
-                sb.append(' ');
-                sb.append(tag.getAttributes().entrySet().stream().map(entry -> {
-                    StringBuilder sb2 = new StringBuilder();
-                    sb2.append(entry.getKey());
-                    AttrValue value = entry.getValue();
-                    if (!value.isEmpty()) {
-                        sb2.append("=");
-                        sb2.append(value.quoteIfNeeded());
-                    } 
-                    return sb2.toString();
-                }).collect(Collectors.joining(" ")));
-            }
-            sb.append('>');
-            break;
-        case TEXT:
-            sb.append(element.toString());
-        }
-        return sb.toString();
-    };
-}
diff --git a/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java b/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java
index a3579df..72eb1c1 100644
--- a/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java
+++ b/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java
@@ -24,10 +24,12 @@ import static org.junit.Assert.assertTrue;
 import java.io.IOException;
 import java.io.InputStream;
 import java.text.ParseException;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Function;
 import java.util.stream.Stream;
 
 import org.apache.sling.commons.html.internal.TagstreamHtmlParser;
+import org.apache.sling.commons.html.util.HtmlElements;
 import org.apache.sling.commons.html.util.HtmlSAXSupport;
 import org.junit.Before;
 import org.junit.Test;
@@ -60,40 +62,49 @@ public class TagstreamHtmlParseTest {
     }
 
     @Test
-    public void docParseTagTest() throws Exception {
+    public void parseDocAndCountStartTags() throws Exception {
         long count = stream.filter(elem -> elem.getType() == HtmlElementType.START_TAG).count();
         assertEquals(902, count);
     }
+    
+    @Test
+    public void parseDocumentConvertBackToHtml() throws Exception {
+        String content = stream.collect(HtmlElements.elementsToHtml());
+        assertEquals(62062, content.length());
+    }
+
 
     @Test
-    public void docParseAllTest() throws Exception {
+    public void parseDocumentAndCountElements() throws Exception {
         long count = stream.count();
         assertEquals(2928, count);
     }
 
     @Test
-    public void docParseAllTestToString() throws Exception {
-        stream.map(HtmlStreams.TO_HTML).count();
+    public void parseDocConvertToStringsAndCount() throws Exception {
+        assertEquals(2928, stream.map(HtmlElements.TO_HTML).count());
     }
 
     @Test
-    public void docParseSAXTest() {
+    public void parseDocAndUseSaxToCountStartTags() {
+        final AtomicInteger count = new AtomicInteger();
         HtmlSAXSupport support = new HtmlSAXSupport(new DefaultHandler2() {
+            
             @Override
             public void startElement(String uri, String localName, String qName, Attributes attributes)
                     throws SAXException {
-                // System.out.println(localName);
+                count.incrementAndGet();
             }
 
         }, new DefaultHandler2());
         stream.forEach(support);
+        assertEquals(902, count.get());
     }
 
     @Test
-    public void docParseTagTest3() throws Exception {
+    public void parseDocFilterOnHrefUsingMapper() throws Exception {
         long count = stream.flatMap(TagMapper.map((element, process) -> {
             if (element.containsAttribute("href")) {
-                // System.out.println(element.getAttributeValue("href"));
                 process.next(element);
             }
         })).count();