You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by je...@apache.org on 2019/08/23 17:42:23 UTC
[sling-org-apache-sling-commons-html] 01/02: Adding an
ElementFactory to utils to assist in creating elements
This is an automated email from the ASF dual-hosted git repository.
jeb pushed a commit to branch SLING-8654
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-commons-html.git
commit 21840d64bfe8ea0785a0c4c7a8328e9af1966f9c
Author: JE Bailey <je...@apache.org>
AuthorDate: Thu Aug 22 11:54:14 2019 -0400
Adding an ElementFactory to utils to assist in creating elements
---
.../sling/commons/html/impl/tag/DocType.java | 7 ++
.../sling/commons/html/impl/tag/StartTag.java | 10 ++
.../sling/commons/html/util/ElementFactory.java | 51 ++++++++
.../sling/commons/html/util/HtmlElements.java | 134 +++++++++++++++++++++
.../org/apache/sling/commons/html/HtmlStreams.java | 73 -----------
.../sling/commons/html/TagstreamHtmlParseTest.java | 27 +++--
6 files changed, 221 insertions(+), 81 deletions(-)
diff --git a/src/main/java/org/apache/sling/commons/html/impl/tag/DocType.java b/src/main/java/org/apache/sling/commons/html/impl/tag/DocType.java
index c90a9b4..29a1cf2 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/tag/DocType.java
+++ b/src/main/java/org/apache/sling/commons/html/impl/tag/DocType.java
@@ -13,6 +13,7 @@
*/
package org.apache.sling.commons.html.impl.tag;
+import java.util.HashMap;
import java.util.Map;
import org.apache.sling.commons.html.AttrValue;
@@ -29,6 +30,12 @@ public class DocType implements HtmlElement {
this.attributes = attrList;
}
+ public DocType(String text) {
+ this.value = "!DOCTYPE";
+ this.attributes = new HashMap<>();
+ this.attributes.put(text, new AttrValue());
+ }
+
@Override
public HtmlElementType getType() {
return HtmlElementType.DOCTYPE;
diff --git a/src/main/java/org/apache/sling/commons/html/impl/tag/StartTag.java b/src/main/java/org/apache/sling/commons/html/impl/tag/StartTag.java
index 2dc8468..f87642b 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/tag/StartTag.java
+++ b/src/main/java/org/apache/sling/commons/html/impl/tag/StartTag.java
@@ -15,6 +15,7 @@ package org.apache.sling.commons.html.impl.tag;
import java.util.Arrays;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -30,6 +31,10 @@ public class StartTag implements HtmlElement {
private static List<String> voidTags = Arrays.asList("area", "base", "br", "col", "embed", "hr", "img", "input",
"link", "meta", "param", "source", "track", "wbr");
+ public StartTag(String value) {
+ this(value, null, voidTags.contains(value));
+ }
+
public StartTag(String value, Map<String, AttrValue> attrList) {
this(value, attrList, voidTags.contains(value));
}
@@ -37,7 +42,11 @@ public class StartTag implements HtmlElement {
public StartTag(String value, Map<String, AttrValue> attrList, boolean selfClosing) {
this.value = value;
this.selfClosing = selfClosing;
+ if (attrList == null) {
+ attrList = new HashMap<>();
+ }
attributes = attrList;
+
}
@Override
@@ -55,6 +64,7 @@ public class StartTag implements HtmlElement {
return true;
}
+ @Override
public boolean getVoidTag() {
return selfClosing;
}
diff --git a/src/main/java/org/apache/sling/commons/html/util/ElementFactory.java b/src/main/java/org/apache/sling/commons/html/util/ElementFactory.java
new file mode 100644
index 0000000..1f8dc12
--- /dev/null
+++ b/src/main/java/org/apache/sling/commons/html/util/ElementFactory.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.commons.html.util;
+
+import org.apache.sling.commons.html.HtmlElement;
+import org.apache.sling.commons.html.HtmlElementType;
+import org.apache.sling.commons.html.impl.tag.Comment;
+import org.apache.sling.commons.html.impl.tag.DocType;
+import org.apache.sling.commons.html.impl.tag.EndTag;
+import org.apache.sling.commons.html.impl.tag.StartTag;
+import org.apache.sling.commons.html.impl.tag.TextData;
+
+public class ElementFactory {
+
+
+ public static HtmlElement create(HtmlElementType type, String text) {
+ switch (type) {
+ case COMMENT:
+ return new Comment(text);
+ case DOCTYPE:
+ return new DocType(text);
+ case END_TAG:
+ return new EndTag(text);
+ case EOF:
+ break;
+ case START_TAG:
+ return new StartTag(text);
+ case TEXT:
+ return new TextData(text);
+ default:
+ break;
+
+ }
+ return null;
+ }
+
+}
diff --git a/src/main/java/org/apache/sling/commons/html/util/HtmlElements.java b/src/main/java/org/apache/sling/commons/html/util/HtmlElements.java
new file mode 100644
index 0000000..d13c2c0
--- /dev/null
+++ b/src/main/java/org/apache/sling/commons/html/util/HtmlElements.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.commons.html.util;
+
+/**
+ * Utility class to collect elements at the end of a stream and convert back to
+ * HTML text
+ *
+ */
+import java.util.HashSet;
+import java.util.Set;
+import java.util.function.BiConsumer;
+import java.util.function.BinaryOperator;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Collector;
+import java.util.stream.Collectors;
+
+import org.apache.sling.commons.html.AttrValue;
+import org.apache.sling.commons.html.HtmlElement;
+import org.apache.sling.commons.html.impl.tag.StartTag;
+
+public class HtmlElements implements Collector<HtmlElement, HtmlElements, String> {
+
+ private StringBuilder sb = new StringBuilder();
+
+ public HtmlElements() {
+ //to allow for supplier method
+ }
+
+ public HtmlElements append(HtmlElement element) {
+ sb.append(TO_HTML.apply(element));
+ return this;
+ }
+
+ public HtmlElements append(HtmlElements collector) {
+ sb.append(collector.toString());
+ return this;
+ }
+
+
+ @Override
+ public Supplier<HtmlElements> supplier() {
+ return HtmlElements::new;
+ }
+
+ @Override
+ public BiConsumer<HtmlElements, HtmlElement> accumulator() {
+ return HtmlElements::append;
+ }
+
+ @Override
+ public BinaryOperator<HtmlElements> combiner() {
+ return (left,right) -> left.append(right);
+ }
+
+ @Override
+ public Function<HtmlElements, String> finisher() {
+ return HtmlElements::toString;
+ }
+
+ @Override
+ public Set<Characteristics> characteristics() {
+ return new HashSet<>();
+ }
+
+ public String toString() {
+ return sb.toString();
+ }
+
+ public static final Function<HtmlElement, String> TO_HTML = element ->{
+ StringBuilder buffer = new StringBuilder();
+ switch (element.getType()) {
+ case COMMENT:
+ buffer.append("<!--");
+ buffer.append(element.getValue());
+ buffer.append("-->");
+ break;
+ case DOCTYPE:
+ buffer.append("<!");
+ buffer.append(element.getValue());
+ buffer.append(">");
+ break;
+ case END_TAG:
+ buffer.append("</");
+ buffer.append(element.getValue());
+ buffer.append('>');
+ break;
+ case EOF:
+ break;
+ case START_TAG:
+ buffer.append('<');
+ buffer.append(element.getValue());
+ StartTag tag = (StartTag) element;
+ if (tag.hasAttributes()) {
+ buffer.append(' ');
+ buffer.append(tag.getAttributes().entrySet().stream().map(entry -> {
+ StringBuilder sb2 = new StringBuilder();
+ sb2.append(entry.getKey());
+ AttrValue value = entry.getValue();
+ if (!value.isEmpty()) {
+ sb2.append("=");
+ sb2.append(value.quoteIfNeeded());
+ }
+ return sb2.toString();
+ }).collect(Collectors.joining(" ")));
+ }
+ buffer.append('>');
+ break;
+ case TEXT:
+ buffer.append(element.toString());
+ }
+ return buffer.toString();
+ };
+
+ public static HtmlElements elementsToHtml() {
+ return new HtmlElements();
+ }
+
+}
diff --git a/src/test/java/org/apache/sling/commons/html/HtmlStreams.java b/src/test/java/org/apache/sling/commons/html/HtmlStreams.java
deleted file mode 100644
index caff982..0000000
--- a/src/test/java/org/apache/sling/commons/html/HtmlStreams.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.sling.commons.html;
-
-import java.util.function.Function;
-import java.util.stream.Collectors;
-
-import org.apache.sling.commons.html.AttrValue;
-import org.apache.sling.commons.html.HtmlElement;
-import org.apache.sling.commons.html.impl.tag.StartTag;
-
-
-public class HtmlStreams {
-
- private HtmlStreams() {
- }
-
- public static final Function<HtmlElement, String> TO_HTML = element ->{
- StringBuilder sb = new StringBuilder();
- switch (element.getType()) {
- case COMMENT:
- sb.append("<!--");
- sb.append(element.getValue());
- sb.append("-->");
- break;
- case DOCTYPE:
- sb.append("<!");
- sb.append(element.getValue());
- sb.append(">");
- break;
- case END_TAG:
- sb.append("</");
- sb.append(element.getValue());
- sb.append('>');
- break;
- case EOF:
- break;
- case START_TAG:
- sb.append('<');
- sb.append(element.getValue());
- StartTag tag = (StartTag) element;
- if (tag.hasAttributes()) {
- sb.append(' ');
- sb.append(tag.getAttributes().entrySet().stream().map(entry -> {
- StringBuilder sb2 = new StringBuilder();
- sb2.append(entry.getKey());
- AttrValue value = entry.getValue();
- if (!value.isEmpty()) {
- sb2.append("=");
- sb2.append(value.quoteIfNeeded());
- }
- return sb2.toString();
- }).collect(Collectors.joining(" ")));
- }
- sb.append('>');
- break;
- case TEXT:
- sb.append(element.toString());
- }
- return sb.toString();
- };
-}
diff --git a/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java b/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java
index a3579df..72eb1c1 100644
--- a/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java
+++ b/src/test/java/org/apache/sling/commons/html/TagstreamHtmlParseTest.java
@@ -24,10 +24,12 @@ import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.stream.Stream;
import org.apache.sling.commons.html.internal.TagstreamHtmlParser;
+import org.apache.sling.commons.html.util.HtmlElements;
import org.apache.sling.commons.html.util.HtmlSAXSupport;
import org.junit.Before;
import org.junit.Test;
@@ -60,40 +62,49 @@ public class TagstreamHtmlParseTest {
}
@Test
- public void docParseTagTest() throws Exception {
+ public void parseDocAndCountStartTags() throws Exception {
long count = stream.filter(elem -> elem.getType() == HtmlElementType.START_TAG).count();
assertEquals(902, count);
}
+
+ @Test
+ public void parseDocumentConvertBackToHtml() throws Exception {
+ String content = stream.collect(HtmlElements.elementsToHtml());
+ assertEquals(62062, content.length());
+ }
+
@Test
- public void docParseAllTest() throws Exception {
+ public void parseDocumentAndCountElements() throws Exception {
long count = stream.count();
assertEquals(2928, count);
}
@Test
- public void docParseAllTestToString() throws Exception {
- stream.map(HtmlStreams.TO_HTML).count();
+ public void parseDocConvertToStringsAndCount() throws Exception {
+ assertEquals(2928, stream.map(HtmlElements.TO_HTML).count());
}
@Test
- public void docParseSAXTest() {
+ public void parseDocAndUseSaxToCountStartTags() {
+ final AtomicInteger count = new AtomicInteger();
HtmlSAXSupport support = new HtmlSAXSupport(new DefaultHandler2() {
+
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
- // System.out.println(localName);
+ count.incrementAndGet();
}
}, new DefaultHandler2());
stream.forEach(support);
+ assertEquals(902, count.get());
}
@Test
- public void docParseTagTest3() throws Exception {
+ public void parseDocFilterOnHrefUsingMapper() throws Exception {
long count = stream.flatMap(TagMapper.map((element, process) -> {
if (element.containsAttribute("href")) {
- // System.out.println(element.getAttributeValue("href"));
process.next(element);
}
})).count();