You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@maven.apache.org by kw...@apache.org on 2023/01/22 10:11:54 UTC
[maven-doxia] 01/01: [DOXIA-690] Improved support of metadata (both YAML front matter and MultiMarkdown)
This is an automated email from the ASF dual-hosted git repository.
kwin pushed a commit to branch bugfix/improve-metadata-parsing
in repository https://gitbox.apache.org/repos/asf/maven-doxia.git
commit effb97e33f9213d4d8f7c0c668ed9fabf0811346
Author: Konrad Windszus <kw...@apache.org>
AuthorDate: Sun Jan 22 11:11:46 2023 +0100
[DOXIA-690] Improved support of metadata (both YAML front matter and
MultiMarkdown)
Properly support multiline values. Always emit with normalized
separators.
---
doxia-modules/doxia-module-markdown/pom.xml | 5 +
.../doxia/module/markdown/MarkdownParser.java | 104 ++++++++++++++-------
.../doxia-module-markdown/src/site/apt/index.apt | 11 ++-
3 files changed, 86 insertions(+), 34 deletions(-)
diff --git a/doxia-modules/doxia-module-markdown/pom.xml b/doxia-modules/doxia-module-markdown/pom.xml
index f6153534..5d094d7e 100644
--- a/doxia-modules/doxia-module-markdown/pom.xml
+++ b/doxia-modules/doxia-module-markdown/pom.xml
@@ -116,6 +116,11 @@ under the License.
<artifactId>flexmark-ext-wikilink</artifactId>
<version>${flexmarkVersion}</version>
</dependency>
+ <dependency>
+ <groupId>com.vladsch.flexmark</groupId>
+ <artifactId>flexmark-ext-yaml-front-matter</artifactId>
+ <version>${flexmarkVersion}</version>
+ </dependency>
<dependency>
<groupId>org.jetbrains</groupId>
diff --git a/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java b/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java
index c1cb3521..f6ab75e1 100644
--- a/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java
+++ b/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java
@@ -44,8 +44,14 @@ import javax.inject.Singleton;
import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import java.util.stream.Collectors;
import com.vladsch.flexmark.ast.Heading;
import com.vladsch.flexmark.ast.HtmlCommentBlock;
@@ -58,6 +64,7 @@ import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension;
import com.vladsch.flexmark.ext.tables.TablesExtension;
import com.vladsch.flexmark.ext.typographic.TypographicExtension;
import com.vladsch.flexmark.ext.wikilink.WikiLinkExtension;
+import com.vladsch.flexmark.ext.yaml.front.matter.AbstractYamlFrontMatterVisitor;
import com.vladsch.flexmark.html.HtmlRenderer;
import com.vladsch.flexmark.util.ast.Node;
import com.vladsch.flexmark.util.data.MutableDataSet;
@@ -175,6 +182,67 @@ public class MarkdownParser extends AbstractTextParser implements TextMarkup {
}
}
+ boolean processMetadataForHtml(StringBuilder html, StringBuilder source) {
+ final Map<String, List<String>> metaData;
+ // support two types of metadata:
+ if (source.toString().startsWith("---")) {
+ Node documentRoot = FLEXMARK_PARSER.parse(source.toString());
+ // YAML front matter (https://github.com/vsch/flexmark-java/wiki/Extensions#yaml-front-matter)
+ AbstractYamlFrontMatterVisitor visitor = new AbstractYamlFrontMatterVisitor();
+ visitor.visit(documentRoot);
+ metaData = visitor.getData();
+
+ } else {
+ metaData = new HashMap<>();
+ // First, we interpret the "metadata" section of the document and add the corresponding HTML headers
+ Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher(source);
+ if (metadataMatcher.find()) {
+ Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher(metadataMatcher.group(0));
+ while (entryMatcher.find()) {
+ String key = entryMatcher.group(1);
+ String value = entryMatcher.group(2);
+ metaData.put(key, Collections.singletonList(value));
+ }
+
+ // Trim the metadata from the source
+ source.delete(0, metadataMatcher.end(0));
+ }
+ }
+ return writeHtmlMetadata(html, metaData);
+ }
+
+ boolean writeHtmlMetadata(StringBuilder html, Map<String, List<String>> data) {
+ boolean containsTitle = false;
+ for (Entry<String, List<String>> entry : data.entrySet()) {
+ if (writeHtmlMetadata(html, entry.getKey(), entry.getValue())) {
+ containsTitle = true;
+ }
+ }
+ return containsTitle;
+ }
+
+ boolean writeHtmlMetadata(StringBuilder html, String key, List<String> values) {
+ if ("title".equalsIgnoreCase(key)) {
+ html.append("<title>");
+ html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(", ")), false));
+ html.append("</title>");
+ return true;
+ } else {
+ // for multiple authors emit multiple meta tags
+ if (key.equalsIgnoreCase("author") && values.size() > 1) {
+ for (String value : values) {
+ writeHtmlMetadata(html, key, Collections.singletonList(value));
+ }
+ }
+ html.append("<meta name='");
+ html.append(HtmlTools.escapeHTML(key));
+ html.append("' content='");
+ html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(", "))));
+ html.append("' />");
+ return false;
+ }
+ }
+
/**
* uses flexmark-java library to parse content and generate HTML output.
*
@@ -184,48 +252,18 @@ public class MarkdownParser extends AbstractTextParser implements TextMarkup {
*/
String toHtml(Reader source) throws IOException {
// Read the source
- String text = IOUtil.toString(source);
+ StringBuilder markdownText = new StringBuilder(IOUtil.toString(source));
// Now, build the HTML document
StringBuilder html = new StringBuilder(1000);
html.append("<html>");
html.append("<head>");
- // detect yaml style metadata
- if (text.startsWith("---")) {
- // remove the enclosing --- to get back to classical metadata
- text = text.replaceFirst("---", "").replaceFirst("---", "");
- }
-
- // First, we interpret the "metadata" section of the document and add the corresponding HTML headers
- Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher(text);
- boolean haveTitle = false;
- if (metadataMatcher.find()) {
- Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher(metadataMatcher.group(0));
- while (entryMatcher.find()) {
- String key = entryMatcher.group(1);
- String value = entryMatcher.group(2);
- if ("title".equalsIgnoreCase(key)) {
- haveTitle = true;
- html.append("<title>");
- html.append(HtmlTools.escapeHTML(value, false));
- html.append("</title>");
- } else {
- html.append("<meta name='");
- html.append(HtmlTools.escapeHTML(key));
- html.append("' content='");
- html.append(HtmlTools.escapeHTML(value));
- html.append("' />");
- }
- }
-
- // Trim the metadata from the source
- text = text.substring(metadataMatcher.end(0));
- }
+ boolean haveTitle = processMetadataForHtml(html, markdownText);
// Now is the time to parse the Markdown document
// (after we've trimmed out the metadatas, and before we check for its headings)
- Node documentRoot = FLEXMARK_PARSER.parse(text);
+ Node documentRoot = FLEXMARK_PARSER.parse(markdownText.toString());
// Special trick: if there is no title specified as a metadata in the header, we will use the first
// heading as the document title
diff --git a/doxia-modules/doxia-module-markdown/src/site/apt/index.apt b/doxia-modules/doxia-module-markdown/src/site/apt/index.apt
index b896ffc9..b83bf1ca 100644
--- a/doxia-modules/doxia-module-markdown/src/site/apt/index.apt
+++ b/doxia-modules/doxia-module-markdown/src/site/apt/index.apt
@@ -30,7 +30,16 @@ doxia-module-markdown
Markdown is a popular lightweight markup language, easy to read and easy to write.
It is supported by a large panel of websites, text editors/IDEs and converter tools.
- Markdown format is only supported as Doxia source format.
+ Markdown format is supported both as source (parser) and destination (sink), the latter only since version 1.12.0.
+
+* Metadata
+
+ Although metadata was not part of the original Markdown format it is now widely supported through multiple extensions.
+ This modules supports the following two metadata formats:
+
+ * {{{http://fletcher.github.io/MultiMarkdown-5/metadata.html}MultiMarkdown Metadata}}
+
+ * {{{https://github.com/vsch/flexmark-java/wiki/Extensions#yaml-front-matter}YAML front matter}}
* References