You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2014/11/23 18:43:15 UTC

[03/16] lucy-clownfish git commit: Switch man page creator to Markdown

Switch man page creator to Markdown


Project: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/commit/95e36dbf
Tree: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/tree/95e36dbf
Diff: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/diff/95e36dbf

Branch: refs/heads/markdown
Commit: 95e36dbf5439b425f39558c935ce9d723b6ce546
Parents: 4916920
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sun Nov 9 16:38:23 2014 +0100
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Sun Nov 23 17:33:56 2014 +0100

----------------------------------------------------------------------
 compiler/src/CFCCMan.c | 354 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 264 insertions(+), 90 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/95e36dbf/compiler/src/CFCCMan.c
----------------------------------------------------------------------
diff --git a/compiler/src/CFCCMan.c b/compiler/src/CFCCMan.c
index 17ad340..ef0f0b5 100644
--- a/compiler/src/CFCCMan.c
+++ b/compiler/src/CFCCMan.c
@@ -16,6 +16,8 @@
 
 #include <string.h>
 
+#include <cmark.h>
+
 #include "charmony.h"
 #include "CFCCMan.h"
 #include "CFCClass.h"
@@ -33,12 +35,6 @@
     #define false 0
 #endif
 
-typedef struct CFCPodLink {
-    size_t      total_size;
-    const char *text;
-    size_t      text_size;
-} CFCPodLink;
-
 static char*
 S_man_create_name(CFCClass *klass);
 
@@ -68,10 +64,13 @@ static char*
 S_man_create_inheritance(CFCClass *klass);
 
 static char*
-S_man_escape_content(const char *content);
+S_md_to_man(const char *md, int needs_indent);
+
+static char*
+S_nodes_to_man(cmark_node *node, int needs_indent);
 
-static void
-S_parse_pod_link(const char *content, CFCPodLink *pod_link);
+static char*
+S_man_escape(const char *content);
 
 char*
 CFCCMan_create_man_page(CFCClass *klass) {
@@ -125,17 +124,19 @@ S_man_create_name(CFCClass *klass) {
     char *result = CFCUtil_strdup(".SH NAME\n");
     result = CFCUtil_cat(result, CFCClass_get_class_name(klass), NULL);
 
+    const char *raw_brief = NULL;
     CFCDocuComment *docucom = CFCClass_get_docucomment(klass);
     if (docucom) {
-        const char *raw_brief = CFCDocuComment_get_brief(docucom);
-        if (raw_brief && raw_brief[0] != '\0') {
-            char *brief = S_man_escape_content(raw_brief);
-            result = CFCUtil_cat(result, " \\- ", brief, NULL);
-            FREEMEM(brief);
-        }
+        raw_brief = CFCDocuComment_get_brief(docucom);
+    }
+    if (raw_brief && raw_brief[0] != '\0') {
+        char *brief = S_md_to_man(raw_brief, false);
+        result = CFCUtil_cat(result, " \\- ", brief, NULL);
+        FREEMEM(brief);
+    }
+    else {
+        result = CFCUtil_cat(result, "\n", NULL);
     }
-
-    result = CFCUtil_cat(result, "\n", NULL);
 
     return result;
 }
@@ -156,8 +157,8 @@ S_man_create_description(CFCClass *klass) {
     const char *raw_description = CFCDocuComment_get_long(docucom);
     if (!raw_description || raw_description[0] == '\0') { return result; }
 
-    char *description = S_man_escape_content(raw_description);
-    result = CFCUtil_cat(result, ".SH DESCRIPTION\n", description, "\n", NULL);
+    char *description = S_md_to_man(raw_description, false);
+    result = CFCUtil_cat(result, ".SH DESCRIPTION\n", description, NULL);
     FREEMEM(description);
 
     return result;
@@ -298,11 +299,12 @@ S_man_create_func(CFCClass *klass, CFCFunction *func, const char *short_sym,
     const char *pattern =
         ".TP\n"
         ".B %s\n"
-        ".na\n"
+        ".nf\n"
+        ".fam C\n"
         "%s%s\n"
-        ".br\n"
         "%s"
-        ".ad\n";
+        ".fam\n"
+        ".fi\n";
     char *result = CFCUtil_sprintf(pattern, short_sym, return_type_c,
                                    incremented, param_list);
 
@@ -325,8 +327,8 @@ S_man_create_func(CFCClass *klass, CFCFunction *func, const char *short_sym,
     if (docucomment) {
         // Description
         const char *raw_desc = CFCDocuComment_get_description(docucomment);
-        char *desc = S_man_escape_content(raw_desc);
-        result = CFCUtil_cat(result, ".IP\n", desc, "\n", NULL);
+        char *desc = S_md_to_man(raw_desc, true);
+        result = CFCUtil_cat(result, ".IP\n", desc, NULL);
         FREEMEM(desc);
 
         // Params
@@ -337,9 +339,9 @@ S_man_create_func(CFCClass *klass, CFCFunction *func, const char *short_sym,
         if (param_names[0]) {
             result = CFCUtil_cat(result, ".RS\n", NULL);
             for (size_t i = 0; param_names[i] != NULL; i++) {
-                char *doc = S_man_escape_content(param_docs[i]);
+                char *doc = S_md_to_man(param_docs[i], true);
                 result = CFCUtil_cat(result, ".TP\n.I ", param_names[i],
-                                     "\n", doc, "\n", NULL);
+                                     "\n", doc, NULL);
                 FREEMEM(doc);
             }
             result = CFCUtil_cat(result, ".RE\n", NULL);
@@ -348,9 +350,8 @@ S_man_create_func(CFCClass *klass, CFCFunction *func, const char *short_sym,
         // Return value
         const char *retval_doc = CFCDocuComment_get_retval(docucomment);
         if (retval_doc && strlen(retval_doc)) {
-            char *doc = S_man_escape_content(retval_doc);
-            result = CFCUtil_cat(result, ".IP\n.B Returns:\n", doc, "\n",
-                                 NULL);
+            char *doc = S_md_to_man(retval_doc, true);
+            result = CFCUtil_cat(result, ".IP\n.B Returns:\n", doc, NULL);
             FREEMEM(doc);
         }
     }
@@ -375,7 +376,7 @@ S_man_create_param_list(CFCFunction *func, const char *full_sym) {
         const char  *type_c   = CFCType_to_c(type);
         const char  *name     = CFCVariable_micro_sym(variable);
 
-        result = CFCUtil_cat(result, "\n.br\n.RB \"    ", type_c, " \" ", name,
+        result = CFCUtil_cat(result, "\n.RB \"    ", type_c, " \" ", name,
                              NULL);
 
         if (variables[i+1] || CFCType_decremented(type)) {
@@ -390,7 +391,7 @@ S_man_create_param_list(CFCFunction *func, const char *full_sym) {
         }
     }
 
-    result = CFCUtil_cat(result, "\n.br\n);\n.br\n", NULL);
+    result = CFCUtil_cat(result, "\n);\n", NULL);
 
     return result;
 }
@@ -415,12 +416,232 @@ S_man_create_inheritance(CFCClass *klass) {
 }
 
 static char*
-S_man_escape_content(const char *content) {
+S_md_to_man(const char *md, int needs_indent) {
+    cmark_node *doc = cmark_parse_document(md, strlen(md));
+    char *result = S_nodes_to_man(doc, needs_indent);
+    cmark_node_destroy(doc);
+
+    return result;
+}
+
+static char*
+S_nodes_to_man(cmark_node *node, int needs_indent) {
+    char *result = CFCUtil_strdup("");
+    int has_indent = needs_indent;
+    int has_vspace = true;
+
+    while (node) {
+        cmark_node_type type = cmark_node_get_type(node);
+
+        switch (type) {
+            case NODE_DOCUMENT: {
+                cmark_node *child = cmark_node_first_child(node);
+                char *children_man = S_nodes_to_man(child, needs_indent);
+                result = CFCUtil_cat(result, children_man, NULL);
+                FREEMEM(children_man);
+                break;
+            }
+
+            case NODE_PARAGRAPH: {
+                if (needs_indent && !has_indent) {
+                    result = CFCUtil_cat(result, ".IP\n", NULL);
+                    has_indent = true;
+                }
+                else if (!needs_indent && has_indent) {
+                    result = CFCUtil_cat(result, ".P\n", NULL);
+                    has_indent = false;
+                }
+                else if (!has_vspace) {
+                    result = CFCUtil_cat(result, "\n", NULL);
+                }
+
+                cmark_node *child = cmark_node_first_child(node);
+                char *children_man = S_nodes_to_man(child, needs_indent);
+                result = CFCUtil_cat(result, children_man, "\n", NULL);
+                FREEMEM(children_man);
+
+                has_vspace = false;
+
+                break;
+            }
+
+            case NODE_BLOCK_QUOTE: {
+                if (needs_indent) {
+                    result = CFCUtil_cat(result, ".RS\n", NULL);
+                }
+
+                cmark_node *child = cmark_node_first_child(node);
+                char *children_man = S_nodes_to_man(child, true);
+                result = CFCUtil_cat(result, ".IP\n", children_man, NULL);
+                FREEMEM(children_man);
+
+                if (needs_indent) {
+                    result = CFCUtil_cat(result, ".RE\n", NULL);
+                    has_indent = false;
+                }
+                else {
+                    has_indent = true;
+                }
+
+                break;
+            }
+
+            case NODE_LIST_ITEM: {
+                cmark_node *child = cmark_node_first_child(node);
+                char *children_man = S_nodes_to_man(child, true);
+                result = CFCUtil_cat(result, ".IP \\(bu\n", children_man,
+                                     NULL);
+                FREEMEM(children_man);
+                break;
+            }
+
+            case NODE_LIST: {
+                if (needs_indent) {
+                    result = CFCUtil_cat(result, ".RS\n", NULL);
+                }
+
+                cmark_node *child = cmark_node_first_child(node);
+                char *children_man = S_nodes_to_man(child, needs_indent);
+                result = CFCUtil_cat(result, children_man, NULL);
+                FREEMEM(children_man);
+
+                if (needs_indent) {
+                    result = CFCUtil_cat(result, ".RE\n", NULL);
+                    has_indent = false;
+                }
+                else {
+                    has_indent = true;
+                }
+
+                break;
+            }
+
+            case NODE_ATX_HEADER:
+            case NODE_SETEXT_HEADER: {
+                cmark_node *child = cmark_node_first_child(node);
+                char *children_man = S_nodes_to_man(child, needs_indent);
+                result = CFCUtil_cat(result, ".SS\n", children_man, "\n", NULL);
+                FREEMEM(children_man);
+                has_indent = false;
+                has_vspace = true;
+                break;
+            }
+
+            case NODE_INDENTED_CODE:
+            case NODE_FENCED_CODE: {
+                if (needs_indent) {
+                    result = CFCUtil_cat(result, ".RS\n", NULL);
+                }
+
+                const char *content = cmark_node_get_string_content(node);
+                char *escaped = S_man_escape(content);
+                result = CFCUtil_cat(result, ".IP\n.nf\n.fam C\n", escaped,
+                                     ".fam\n.fi\n", NULL);
+                FREEMEM(escaped);
+
+                if (needs_indent) {
+                    result = CFCUtil_cat(result, ".RE\n", NULL);
+                    has_indent = false;
+                }
+                else {
+                    has_indent = true;
+                }
+
+                break;
+            }
+
+            case NODE_HTML:
+                CFCUtil_warn("HTML not supported in man pages");
+                break;
+
+            case NODE_HRULE:
+                break;
+
+            case NODE_REFERENCE_DEF:
+                break;
+
+            case NODE_STRING: {
+                const char *content = cmark_node_get_string_content(node);
+                char *escaped = S_man_escape(content);
+                result = CFCUtil_cat(result, escaped, NULL);
+                FREEMEM(escaped);
+                break;
+            }
+
+            case NODE_LINEBREAK:
+                result = CFCUtil_cat(result, "\n.br\n", NULL);
+                break;
+
+            case NODE_SOFTBREAK:
+                result = CFCUtil_cat(result, "\n", NULL);
+                break;
+
+            case NODE_INLINE_CODE: {
+                const char *content = cmark_node_get_string_content(node);
+                char *escaped = S_man_escape(content);
+                result = CFCUtil_cat(result, "\\FC", escaped, "\\F[]", NULL);
+                FREEMEM(escaped);
+                break;
+            }
+
+            case NODE_INLINE_HTML: {
+                const char *html = cmark_node_get_string_content(node);
+                CFCUtil_warn("HTML not supported in man pages: %s", html);
+                break;
+            }
+
+            case NODE_LINK: {
+                cmark_node *child = cmark_node_first_child(node);
+                char *children_man = S_nodes_to_man(child, needs_indent);
+                const char *url = cmark_node_get_url(node);
+                result = CFCUtil_cat(result, "\n.UR ", url, "\n",
+                                     children_man, "\n.UE\n",
+                                     NULL);
+                FREEMEM(children_man);
+                break;
+            }
+
+            case NODE_IMAGE:
+                CFCUtil_warn("Images not supported in man pages");
+                break;
+
+            case NODE_STRONG: {
+                cmark_node *child = cmark_node_first_child(node);
+                char *children_man = S_nodes_to_man(child, needs_indent);
+                result = CFCUtil_cat(result, "\\fB", children_man, "\\f[]",
+                                     NULL);
+                FREEMEM(children_man);
+                break;
+            }
+
+            case NODE_EMPH: {
+                cmark_node *child = cmark_node_first_child(node);
+                char *children_man = S_nodes_to_man(child, needs_indent);
+                result = CFCUtil_cat(result, "\\fI", children_man, "\\f[]",
+                                     NULL);
+                FREEMEM(children_man);
+                break;
+            }
+
+            default:
+                CFCUtil_die("Invalid cmark node type: %d", type);
+                break;
+        }
+
+        node = cmark_node_next(node);
+    }
+
+    return result;
+}
+
+static char*
+S_man_escape(const char *content) {
+    size_t  len        = strlen(content);
     size_t  result_len = 0;
-    size_t  result_cap = strlen(content) + 256;
+    size_t  result_cap = len + 256;
     char   *result     = (char*)MALLOCATE(result_cap + 1);
 
-    for (size_t i = 0; content[i]; i++) {
+    for (size_t i = 0; i < len; i++) {
         const char *subst      = content + i;
         size_t      subst_size = 1;
 
@@ -435,36 +656,18 @@ S_man_escape_content(const char *content) {
                 subst      = "\\-";
                 subst_size = 2;
                 break;
-            case '\n':
-                // Escape dot after newline.
-                if (content[i+1] == '.') {
-                    subst      = "\n\\";
-                    subst_size = 2;
-                }
-                break;
-            case '<':
-                // <code> markup.
-                if (strncmp(content + i + 1, "code>", 5) == 0) {
-                    subst      = "\\fI";
+            case '.':
+                // Escape dot at start of line.
+                if (i == 0 || content[i-1] == '\n') {
+                    subst      = "\\&.";
                     subst_size = 3;
-                    i += 5;
-                }
-                else if (strncmp(content + i + 1, "/code>", 6) == 0) {
-                    subst      = "\\fP";
-                    subst_size = 3;
-                    i += 6;
                 }
                 break;
-            case 'L':
-                if (content[i+1] == '<') {
-                    // POD-style link.
-                    struct CFCPodLink pod_link;
-                    S_parse_pod_link(content + i + 2, &pod_link);
-                    if (pod_link.total_size) {
-                        subst      = pod_link.text;
-                        subst_size = pod_link.text_size;
-                        i += pod_link.total_size + 1;
-                    }
+            case '\'':
+                // Escape single quote at start of line.
+                if (i == 0 || content[i-1] == '\n') {
+                    subst      = "\\&'";
+                    subst_size = 3;
                 }
                 break;
             default:
@@ -485,32 +688,3 @@ S_man_escape_content(const char *content) {
     return result;
 }
 
-// Quick and dirty parsing of POD links. The syntax isn't fully supported
-// and the result isn't man-escaped. But it should be good enough for now
-// since at some point we'll switch to another format anyway.
-static void
-S_parse_pod_link(const char *content, CFCPodLink *pod_link) {
-    int in_text = true;
-
-    for (size_t i = 0; i < 256 && content[i]; ++i) {
-        if (content[i] == '|') {
-            if (in_text) {
-                pod_link->text_size = i;
-                in_text = false;
-            }
-        }
-        else if (content[i] == '>') {
-            pod_link->total_size = i + 1;
-            pod_link->text       = content;
-            if (in_text) {
-                pod_link->text_size = i;
-            }
-            return;
-        }
-    }
-
-    pod_link->total_size = 0;
-    pod_link->text       = NULL;
-    pod_link->text_size  = 0;
-}
-