You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@maven.apache.org by kw...@apache.org on 2023/01/22 10:11:54 UTC
[maven-doxia] 01/01: [DOXIA-690] Improved support of metadata (both YAML front matter and MultiMarkdown)

This is an automated email from the ASF dual-hosted git repository.

kwin pushed a commit to branch bugfix/improve-metadata-parsing
in repository https://gitbox.apache.org/repos/asf/maven-doxia.git

commit effb97e33f9213d4d8f7c0c668ed9fabf0811346
Author: Konrad Windszus <kw...@apache.org>
AuthorDate: Sun Jan 22 11:11:46 2023 +0100

    [DOXIA-690] Improved support of metadata (both YAML front matter and
    MultiMarkdown)
    
    Properly support multiline values. Always emit with normalized
    separators.
---
 doxia-modules/doxia-module-markdown/pom.xml        |   5 +
 .../doxia/module/markdown/MarkdownParser.java      | 104 ++++++++++++++-------
 .../doxia-module-markdown/src/site/apt/index.apt   |  11 ++-
 3 files changed, 86 insertions(+), 34 deletions(-)

diff --git a/doxia-modules/doxia-module-markdown/pom.xml b/doxia-modules/doxia-module-markdown/pom.xml
index f6153534..5d094d7e 100644
--- a/doxia-modules/doxia-module-markdown/pom.xml
+++ b/doxia-modules/doxia-module-markdown/pom.xml
@@ -116,6 +116,11 @@ under the License.
       <artifactId>flexmark-ext-wikilink</artifactId>
       <version>${flexmarkVersion}</version>
     </dependency>
+    <dependency>
+      <groupId>com.vladsch.flexmark</groupId>
+      <artifactId>flexmark-ext-yaml-front-matter</artifactId>
+      <version>${flexmarkVersion}</version>
+    </dependency>
 
     <dependency>
       <groupId>org.jetbrains</groupId>
diff --git a/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java b/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java
index c1cb3521..f6ab75e1 100644
--- a/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java
+++ b/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java
@@ -44,8 +44,14 @@ import javax.inject.Singleton;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 
 import com.vladsch.flexmark.ast.Heading;
 import com.vladsch.flexmark.ast.HtmlCommentBlock;
@@ -58,6 +64,7 @@ import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension;
 import com.vladsch.flexmark.ext.tables.TablesExtension;
 import com.vladsch.flexmark.ext.typographic.TypographicExtension;
 import com.vladsch.flexmark.ext.wikilink.WikiLinkExtension;
+import com.vladsch.flexmark.ext.yaml.front.matter.AbstractYamlFrontMatterVisitor;
 import com.vladsch.flexmark.html.HtmlRenderer;
 import com.vladsch.flexmark.util.ast.Node;
 import com.vladsch.flexmark.util.data.MutableDataSet;
@@ -175,6 +182,67 @@ public class MarkdownParser extends AbstractTextParser implements TextMarkup {
         }
     }
 
+    boolean processMetadataForHtml(StringBuilder html, StringBuilder source) {
+        final Map<String, List<String>> metaData;
+        // support two types of metadata:
+        if (source.toString().startsWith("---")) {
+            Node documentRoot = FLEXMARK_PARSER.parse(source.toString());
+            // YAML front matter (https://github.com/vsch/flexmark-java/wiki/Extensions#yaml-front-matter)
+            AbstractYamlFrontMatterVisitor visitor = new AbstractYamlFrontMatterVisitor();
+            visitor.visit(documentRoot);
+            metaData = visitor.getData();
+
+        } else {
+            metaData = new HashMap<>();
+            // First, we interpret the "metadata" section of the document and add the corresponding HTML headers
+            Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher(source);
+            if (metadataMatcher.find()) {
+                Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher(metadataMatcher.group(0));
+                while (entryMatcher.find()) {
+                    String key = entryMatcher.group(1);
+                    String value = entryMatcher.group(2);
+                    metaData.put(key, Collections.singletonList(value));
+                }
+
+                // Trim the metadata from the source
+                source.delete(0, metadataMatcher.end(0));
+            }
+        }
+        return writeHtmlMetadata(html, metaData);
+    }
+
+    boolean writeHtmlMetadata(StringBuilder html, Map<String, List<String>> data) {
+        boolean containsTitle = false;
+        for (Entry<String, List<String>> entry : data.entrySet()) {
+            if (writeHtmlMetadata(html, entry.getKey(), entry.getValue())) {
+                containsTitle = true;
+            }
+        }
+        return containsTitle;
+    }
+
+    boolean writeHtmlMetadata(StringBuilder html, String key, List<String> values) {
+        if ("title".equalsIgnoreCase(key)) {
+            html.append("<title>");
+            html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(", ")), false));
+            html.append("</title>");
+            return true;
+        } else {
+            // for multiple authors emit multiple meta tags
+            if (key.equalsIgnoreCase("author") && values.size() > 1) {
+                for (String value : values) {
+                    writeHtmlMetadata(html, key, Collections.singletonList(value));
+                }
+            }
+            html.append("<meta name='");
+            html.append(HtmlTools.escapeHTML(key));
+            html.append("' content='");
+            html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(", "))));
+            html.append("' />");
+            return false;
+        }
+    }
+
     /**
      * uses flexmark-java library to parse content and generate HTML output.
      *
@@ -184,48 +252,18 @@ public class MarkdownParser extends AbstractTextParser implements TextMarkup {
      */
     String toHtml(Reader source) throws IOException {
         // Read the source
-        String text = IOUtil.toString(source);
+        StringBuilder markdownText = new StringBuilder(IOUtil.toString(source));
 
         // Now, build the HTML document
         StringBuilder html = new StringBuilder(1000);
         html.append("<html>");
         html.append("<head>");
 
-        // detect yaml style metadata
-        if (text.startsWith("---")) {
-            // remove the enclosing --- to get back to classical metadata
-            text = text.replaceFirst("---", "").replaceFirst("---", "");
-        }
-
-        // First, we interpret the "metadata" section of the document and add the corresponding HTML headers
-        Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher(text);
-        boolean haveTitle = false;
-        if (metadataMatcher.find()) {
-            Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher(metadataMatcher.group(0));
-            while (entryMatcher.find()) {
-                String key = entryMatcher.group(1);
-                String value = entryMatcher.group(2);
-                if ("title".equalsIgnoreCase(key)) {
-                    haveTitle = true;
-                    html.append("<title>");
-                    html.append(HtmlTools.escapeHTML(value, false));
-                    html.append("</title>");
-                } else {
-                    html.append("<meta name='");
-                    html.append(HtmlTools.escapeHTML(key));
-                    html.append("' content='");
-                    html.append(HtmlTools.escapeHTML(value));
-                    html.append("' />");
-                }
-            }
-
-            // Trim the metadata from the source
-            text = text.substring(metadataMatcher.end(0));
-        }
+        boolean haveTitle = processMetadataForHtml(html, markdownText);
 
         // Now is the time to parse the Markdown document
         // (after we've trimmed out the metadatas, and before we check for its headings)
-        Node documentRoot = FLEXMARK_PARSER.parse(text);
+        Node documentRoot = FLEXMARK_PARSER.parse(markdownText.toString());
 
         // Special trick: if there is no title specified as a metadata in the header, we will use the first
         // heading as the document title
diff --git a/doxia-modules/doxia-module-markdown/src/site/apt/index.apt b/doxia-modules/doxia-module-markdown/src/site/apt/index.apt
index b896ffc9..b83bf1ca 100644
--- a/doxia-modules/doxia-module-markdown/src/site/apt/index.apt
+++ b/doxia-modules/doxia-module-markdown/src/site/apt/index.apt
@@ -30,7 +30,16 @@ doxia-module-markdown
 
   Markdown is a popular lightweight markup language, easy to read and easy to write.
   It is supported by a large panel of websites, text editors/IDEs and converter tools.
-  Markdown format is only supported as Doxia source format.
+  Markdown format is supported both as source (parser) and destination (sink), the latter only since version 1.12.0.
+
+* Metadata
+
+  Although metadata was not part of the original Markdown format it is now widely supported through multiple extensions.
+  This modules supports the following two metadata formats:
+
+  * {{{http://fletcher.github.io/MultiMarkdown-5/metadata.html}MultiMarkdown Metadata}}
+
+  * {{{https://github.com/vsch/flexmark-java/wiki/Extensions#yaml-front-matter}YAML front matter}}
 
 * References