You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@maven.apache.org by kw...@apache.org on 2023/01/22 10:11:53 UTC

[maven-doxia] branch bugfix/improve-metadata-parsing created (now effb97e3)

This is an automated email from the ASF dual-hosted git repository.

kwin pushed a change to branch bugfix/improve-metadata-parsing
in repository https://gitbox.apache.org/repos/asf/maven-doxia.git


      at effb97e3 [DOXIA-690] Improved support of metadata (both YAML front matter and MultiMarkdown)

This branch includes the following new commits:

     new effb97e3 [DOXIA-690] Improved support of metadata (both YAML front matter and MultiMarkdown)

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

[maven-doxia] 01/01: [DOXIA-690] Improved support of metadata (both YAML front matter and MultiMarkdown)

Posted by kw...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

kwin pushed a commit to branch bugfix/improve-metadata-parsing
in repository https://gitbox.apache.org/repos/asf/maven-doxia.git

commit effb97e33f9213d4d8f7c0c668ed9fabf0811346
Author: Konrad Windszus <kw...@apache.org>
AuthorDate: Sun Jan 22 11:11:46 2023 +0100

    [DOXIA-690] Improved support of metadata (both YAML front matter and
    MultiMarkdown)
    
    Properly support multiline values. Always emit with normalized
    separators.
---
 doxia-modules/doxia-module-markdown/pom.xml        |   5 +
 .../doxia/module/markdown/MarkdownParser.java      | 104 ++++++++++++++-------
 .../doxia-module-markdown/src/site/apt/index.apt   |  11 ++-
 3 files changed, 86 insertions(+), 34 deletions(-)

diff --git a/doxia-modules/doxia-module-markdown/pom.xml b/doxia-modules/doxia-module-markdown/pom.xml
index f6153534..5d094d7e 100644
--- a/doxia-modules/doxia-module-markdown/pom.xml
+++ b/doxia-modules/doxia-module-markdown/pom.xml
@@ -116,6 +116,11 @@ under the License.
       <artifactId>flexmark-ext-wikilink</artifactId>
       <version>${flexmarkVersion}</version>
     </dependency>
+    <dependency>
+      <groupId>com.vladsch.flexmark</groupId>
+      <artifactId>flexmark-ext-yaml-front-matter</artifactId>
+      <version>${flexmarkVersion}</version>
+    </dependency>
 
     <dependency>
       <groupId>org.jetbrains</groupId>
diff --git a/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java b/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java
index c1cb3521..f6ab75e1 100644
--- a/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java
+++ b/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java
@@ -44,8 +44,14 @@ import javax.inject.Singleton;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 
 import com.vladsch.flexmark.ast.Heading;
 import com.vladsch.flexmark.ast.HtmlCommentBlock;
@@ -58,6 +64,7 @@ import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension;
 import com.vladsch.flexmark.ext.tables.TablesExtension;
 import com.vladsch.flexmark.ext.typographic.TypographicExtension;
 import com.vladsch.flexmark.ext.wikilink.WikiLinkExtension;
+import com.vladsch.flexmark.ext.yaml.front.matter.AbstractYamlFrontMatterVisitor;
 import com.vladsch.flexmark.html.HtmlRenderer;
 import com.vladsch.flexmark.util.ast.Node;
 import com.vladsch.flexmark.util.data.MutableDataSet;
@@ -175,6 +182,67 @@ public class MarkdownParser extends AbstractTextParser implements TextMarkup {
         }
     }
 
+    boolean processMetadataForHtml(StringBuilder html, StringBuilder source) {
+        final Map<String, List<String>> metaData;
+        // support two types of metadata:
+        if (source.toString().startsWith("---")) {
+            Node documentRoot = FLEXMARK_PARSER.parse(source.toString());
+            // YAML front matter (https://github.com/vsch/flexmark-java/wiki/Extensions#yaml-front-matter)
+            AbstractYamlFrontMatterVisitor visitor = new AbstractYamlFrontMatterVisitor();
+            visitor.visit(documentRoot);
+            metaData = visitor.getData();
+
+        } else {
+            metaData = new HashMap<>();
+            // First, we interpret the "metadata" section of the document and add the corresponding HTML headers
+            Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher(source);
+            if (metadataMatcher.find()) {
+                Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher(metadataMatcher.group(0));
+                while (entryMatcher.find()) {
+                    String key = entryMatcher.group(1);
+                    String value = entryMatcher.group(2);
+                    metaData.put(key, Collections.singletonList(value));
+                }
+
+                // Trim the metadata from the source
+                source.delete(0, metadataMatcher.end(0));
+            }
+        }
+        return writeHtmlMetadata(html, metaData);
+    }
+
+    boolean writeHtmlMetadata(StringBuilder html, Map<String, List<String>> data) {
+        boolean containsTitle = false;
+        for (Entry<String, List<String>> entry : data.entrySet()) {
+            if (writeHtmlMetadata(html, entry.getKey(), entry.getValue())) {
+                containsTitle = true;
+            }
+        }
+        return containsTitle;
+    }
+
+    boolean writeHtmlMetadata(StringBuilder html, String key, List<String> values) {
+        if ("title".equalsIgnoreCase(key)) {
+            html.append("<title>");
+            html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(", ")), false));
+            html.append("</title>");
+            return true;
+        } else {
+            // for multiple authors emit multiple meta tags
+            if (key.equalsIgnoreCase("author") && values.size() > 1) {
+                for (String value : values) {
+                    writeHtmlMetadata(html, key, Collections.singletonList(value));
+                }
+            }
+            html.append("<meta name='");
+            html.append(HtmlTools.escapeHTML(key));
+            html.append("' content='");
+            html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(", "))));
+            html.append("' />");
+            return false;
+        }
+    }
+
     /**
      * uses flexmark-java library to parse content and generate HTML output.
      *
@@ -184,48 +252,18 @@ public class MarkdownParser extends AbstractTextParser implements TextMarkup {
      */
     String toHtml(Reader source) throws IOException {
         // Read the source
-        String text = IOUtil.toString(source);
+        StringBuilder markdownText = new StringBuilder(IOUtil.toString(source));
 
         // Now, build the HTML document
         StringBuilder html = new StringBuilder(1000);
         html.append("<html>");
         html.append("<head>");
 
-        // detect yaml style metadata
-        if (text.startsWith("---")) {
-            // remove the enclosing --- to get back to classical metadata
-            text = text.replaceFirst("---", "").replaceFirst("---", "");
-        }
-
-        // First, we interpret the "metadata" section of the document and add the corresponding HTML headers
-        Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher(text);
-        boolean haveTitle = false;
-        if (metadataMatcher.find()) {
-            Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher(metadataMatcher.group(0));
-            while (entryMatcher.find()) {
-                String key = entryMatcher.group(1);
-                String value = entryMatcher.group(2);
-                if ("title".equalsIgnoreCase(key)) {
-                    haveTitle = true;
-                    html.append("<title>");
-                    html.append(HtmlTools.escapeHTML(value, false));
-                    html.append("</title>");
-                } else {
-                    html.append("<meta name='");
-                    html.append(HtmlTools.escapeHTML(key));
-                    html.append("' content='");
-                    html.append(HtmlTools.escapeHTML(value));
-                    html.append("' />");
-                }
-            }
-
-            // Trim the metadata from the source
-            text = text.substring(metadataMatcher.end(0));
-        }
+        boolean haveTitle = processMetadataForHtml(html, markdownText);
 
         // Now is the time to parse the Markdown document
         // (after we've trimmed out the metadatas, and before we check for its headings)
-        Node documentRoot = FLEXMARK_PARSER.parse(text);
+        Node documentRoot = FLEXMARK_PARSER.parse(markdownText.toString());
 
         // Special trick: if there is no title specified as a metadata in the header, we will use the first
         // heading as the document title
diff --git a/doxia-modules/doxia-module-markdown/src/site/apt/index.apt b/doxia-modules/doxia-module-markdown/src/site/apt/index.apt
index b896ffc9..b83bf1ca 100644
--- a/doxia-modules/doxia-module-markdown/src/site/apt/index.apt
+++ b/doxia-modules/doxia-module-markdown/src/site/apt/index.apt
@@ -30,7 +30,16 @@ doxia-module-markdown
 
   Markdown is a popular lightweight markup language, easy to read and easy to write.
   It is supported by a large panel of websites, text editors/IDEs and converter tools.
-  Markdown format is only supported as Doxia source format.
+  Markdown format is supported both as source (parser) and destination (sink), the latter only since version 1.12.0.
+
+* Metadata
+
+  Although metadata was not part of the original Markdown format it is now widely supported through multiple extensions.
+  This modules supports the following two metadata formats:
+
+  * {{{http://fletcher.github.io/MultiMarkdown-5/metadata.html}MultiMarkdown Metadata}}
+
+  * {{{https://github.com/vsch/flexmark-java/wiki/Extensions#yaml-front-matter}YAML front matter}}
 
 * References