You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2014/11/23 18:43:20 UTC
[08/16] lucy-clownfish git commit: Add CommonMark source files
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/inlines.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/inlines.c b/compiler/modules/CommonMark/src/inlines.c
new file mode 100644
index 0000000..9bc4e35
--- /dev/null
+++ b/compiler/modules/CommonMark/src/inlines.c
@@ -0,0 +1,993 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include "config.h"
+#include "node.h"
+#include "parser.h"
+#include "references.h"
+#include "cmark.h"
+#include "html/houdini.h"
+#include "utf8.h"
+#include "scanners.h"
+#include "inlines.h"
+
+
+// Macros for creating various kinds of simple.
+#define make_str(s) make_literal(CMARK_NODE_STRING, s)
+#define make_code(s) make_literal(CMARK_NODE_INLINE_CODE, s)
+#define make_raw_html(s) make_literal(CMARK_NODE_INLINE_HTML, s)
+#define make_linebreak() make_simple(CMARK_NODE_LINEBREAK)
+#define make_softbreak() make_simple(CMARK_NODE_SOFTBREAK)
+#define make_emph(contents) make_inlines(CMARK_NODE_EMPH, contents)
+#define make_strong(contents) make_inlines(CMARK_NODE_STRONG, contents)
+
+typedef struct DelimiterStack {
+ struct DelimiterStack *previous;
+ struct DelimiterStack *next;
+ cmark_node *first_inline;
+ int delim_count;
+ unsigned char delim_char;
+ int position;
+ bool can_open;
+ bool can_close;
+} delimiter_stack;
+
+typedef struct Subject {
+ chunk input;
+ int pos;
+ reference_map *refmap;
+ delimiter_stack *delimiters;
+} subject;
+
+static int parse_inline(subject* subj, cmark_node * parent);
+
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap);
+static int subject_find_special_char(subject *subj);
+
+static unsigned char *cmark_clean_autolink(chunk *url, int is_email)
+{
+ strbuf buf = GH_BUF_INIT;
+
+ chunk_trim(url);
+
+ if (url->len == 0)
+ return NULL;
+
+ if (is_email)
+ strbuf_puts(&buf, "mailto:");
+
+ houdini_unescape_html_f(&buf, url->data, url->len);
+ return strbuf_detach(&buf);
+}
+
+static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsigned char *title)
+{
+ cmark_node* e = (cmark_node *)calloc(1, sizeof(*e));
+ if(e != NULL) {
+ e->type = CMARK_NODE_LINK;
+ e->first_child = label;
+ e->last_child = label;
+ e->as.link.url = url;
+ e->as.link.title = title;
+ e->next = NULL;
+ label->parent = e;
+ }
+ return e;
+}
+
+static inline cmark_node* make_autolink(cmark_node* label, cmark_chunk url, int is_email)
+{
+ return make_link(label, cmark_clean_autolink(&url, is_email), NULL);
+}
+
+// Setting 'last_child' and the parent of 'contents' is up to the caller.
+static inline cmark_node* make_inlines(cmark_node_type t, cmark_node* contents)
+{
+ cmark_node * e = (cmark_node *)calloc(1, sizeof(*e));
+ if(e != NULL) {
+ e->type = t;
+ e->first_child = contents;
+ e->next = NULL;
+ e->prev = NULL;
+ e->parent = NULL;
+ // These fields aren't used for inlines:
+ e->start_line = 0;
+ e->start_column = 0;
+ e->end_line = 0;
+ }
+ return e;
+}
+
+// Create an inline with a literal string value.
+static inline cmark_node* make_literal(cmark_node_type t, cmark_chunk s)
+{
+ cmark_node * e = (cmark_node *)calloc(1, sizeof(*e));
+ if(e != NULL) {
+ e->type = t;
+ e->as.literal = s;
+ e->next = NULL;
+ e->prev = NULL;
+ e->parent = NULL;
+ e->first_child = NULL;
+ e->last_child = NULL;
+ // These fields aren't used for inlines:
+ e->start_line = 0;
+ e->start_column = 0;
+ e->end_line = 0;
+ }
+ return e;
+}
+
+// Create an inline with no value.
+static inline cmark_node* make_simple(cmark_node_type t)
+{
+ cmark_node* e = (cmark_node *)calloc(1, sizeof(*e));
+ if(e != NULL) {
+ e->type = t;
+ e->next = NULL;
+ e->prev = NULL;
+ e->parent = NULL;
+ e->first_child = NULL;
+ e->last_child = NULL;
+ // These fields aren't used for inlines:
+ e->start_line = 0;
+ e->start_column = 0;
+ e->end_line = 0;
+ }
+ return e;
+}
+
+static unsigned char *bufdup(const unsigned char *buf)
+{
+ unsigned char *new_buf = NULL;
+
+ if (buf) {
+ int len = strlen((char *)buf);
+ new_buf = (unsigned char *)calloc(len + 1, sizeof(*new_buf));
+ if(new_buf != NULL) {
+ memcpy(new_buf, buf, len + 1);
+ }
+ }
+
+ return new_buf;
+}
+
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
+{
+ e->input.data = buffer->ptr;
+ e->input.len = buffer->size;
+ e->input.alloc = 0;
+ e->pos = 0;
+ e->refmap = refmap;
+ e->delimiters = NULL;
+
+ chunk_rtrim(&e->input);
+}
+
+static inline int isbacktick(int c)
+{
+ return (c == '`');
+}
+
+static inline unsigned char peek_char(subject *subj)
+{
+ return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
+}
+
+static inline unsigned char peek_at(subject *subj, int pos)
+{
+ return subj->input.data[pos];
+}
+
+// Return true if there are more characters in the subject.
+static inline int is_eof(subject* subj)
+{
+ return (subj->pos >= subj->input.len);
+}
+
+// Advance the subject. Doesn't check for eof.
+#define advance(subj) (subj)->pos += 1
+
+// Take characters while a predicate holds, and return a string.
+static inline chunk take_while(subject* subj, int (*f)(int))
+{
+ unsigned char c;
+ int startpos = subj->pos;
+ int len = 0;
+
+ while ((c = peek_char(subj)) && (*f)(c)) {
+ advance(subj);
+ len++;
+ }
+
+ return chunk_dup(&subj->input, startpos, len);
+}
+
+// Try to process a backtick code span that began with a
+// span of ticks of length openticklength length (already
+// parsed). Return 0 if you don't find matching closing
+// backticks, otherwise return the position in the subject
+// after the closing backticks.
+static int scan_to_closing_backticks(subject* subj, int openticklength)
+{
+ // read non backticks
+ unsigned char c;
+ while ((c = peek_char(subj)) && c != '`') {
+ advance(subj);
+ }
+ if (is_eof(subj)) {
+ return 0; // did not find closing ticks, return 0
+ }
+ int numticks = 0;
+ while (peek_char(subj) == '`') {
+ advance(subj);
+ numticks++;
+ }
+ if (numticks != openticklength){
+ return(scan_to_closing_backticks(subj, openticklength));
+ }
+ return (subj->pos);
+}
+
+// Parse backtick code section or raw backticks, return an inline.
+// Assumes that the subject has a backtick at the current position.
+static cmark_node* handle_backticks(subject *subj)
+{
+ chunk openticks = take_while(subj, isbacktick);
+ int startpos = subj->pos;
+ int endpos = scan_to_closing_backticks(subj, openticks.len);
+
+ if (endpos == 0) { // not found
+ subj->pos = startpos; // rewind
+ return make_str(openticks);
+ } else {
+ strbuf buf = GH_BUF_INIT;
+
+ strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
+ strbuf_trim(&buf);
+ strbuf_normalize_whitespace(&buf);
+
+ return make_code(chunk_buf_detach(&buf));
+ }
+}
+
+// Scan ***, **, or * and return number scanned, or 0.
+// Advances position.
+static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
+{
+ int numdelims = 0;
+ unsigned char char_before, char_after;
+
+ char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1);
+ while (peek_char(subj) == c) {
+ numdelims++;
+ advance(subj);
+ }
+ char_after = peek_char(subj);
+ *can_open = numdelims > 0 && !isspace(char_after);
+ *can_close = numdelims > 0 && !isspace(char_before);
+ if (c == '_') {
+ *can_open = *can_open && !isalnum(char_before);
+ *can_close = *can_close && !isalnum(char_after);
+ }
+ return numdelims;
+}
+
+/*
+static void print_delimiters(subject *subj)
+{
+ delimiter_stack *tempstack;
+ tempstack = subj->delimiters;
+ while (tempstack != NULL) {
+ printf("Item at %p: %d %d %d %d next(%p) prev(%p)\n",
+ tempstack, tempstack->delim_count, tempstack->delim_char,
+ tempstack->can_open, tempstack->can_close,
+ tempstack->next, tempstack->previous);
+ tempstack = tempstack->previous;
+ }
+}
+*/
+
+static void remove_delimiter(subject *subj, delimiter_stack *stack)
+{
+ if (stack->previous != NULL) {
+ stack->previous->next = stack->next;
+ }
+ if (stack->next == NULL) {
+ // top of stack
+ subj->delimiters = stack->previous;
+ } else {
+ stack->next->previous = stack->previous;
+ }
+ free(stack);
+}
+
+static delimiter_stack * push_delimiter(subject *subj,
+ int numdelims,
+ unsigned char c,
+ bool can_open,
+ bool can_close,
+ cmark_node *inl_text)
+{
+ delimiter_stack *istack =
+ (delimiter_stack*)malloc(sizeof(delimiter_stack));
+ if (istack == NULL) {
+ return NULL;
+ }
+ istack->delim_count = numdelims;
+ istack->delim_char = c;
+ istack->can_open = can_open;
+ istack->can_close = can_close;
+ istack->first_inline = inl_text;
+ istack->previous = subj->delimiters;
+ istack->next = NULL;
+ if (istack->previous != NULL) {
+ istack->previous->next = istack;
+ }
+ istack->position = subj->pos;
+ return istack;
+}
+
+// Parse strong/emph or a fallback.
+// Assumes the subject has '_' or '*' at the current position.
+static cmark_node* handle_strong_emph(subject* subj, unsigned char c)
+{
+ int numdelims;
+ cmark_node * inl_text;
+ bool can_open, can_close;
+
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+
+ inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
+
+ if (can_open || can_close) {
+ subj->delimiters = push_delimiter(subj, numdelims, c, can_open, can_close,
+ inl_text);
+ }
+
+ return inl_text;
+}
+
+static void process_emphasis(subject *subj, delimiter_stack *stack_bottom)
+{
+ delimiter_stack *closer = subj->delimiters;
+ delimiter_stack *opener, *tempstack, *nextstack;
+ int use_delims;
+ cmark_node *inl, *tmp, *emph;
+
+ // move back to first relevant delim.
+ while (closer != NULL && closer->previous != stack_bottom) {
+ closer = closer->previous;
+ }
+
+ // now move forward, looking for closers, and handling each
+ while (closer != NULL) {
+ if (closer->can_close &&
+ (closer->delim_char == '*' || closer->delim_char == '_')) {
+ // Now look backwards for first matching opener:
+ opener = closer->previous;
+ while (opener != NULL && opener != stack_bottom) {
+ if (opener->delim_char == closer->delim_char &&
+ opener->can_open) {
+ break;
+ }
+ opener = opener->previous;
+ }
+ if (opener != NULL && opener != stack_bottom) {
+ // calculate the actual number of delimeters used from this closer
+ if (closer->delim_count < 3 || opener->delim_count < 3) {
+ use_delims = closer->delim_count <= opener->delim_count ?
+ closer->delim_count : opener->delim_count;
+ } else { // closer and opener both have >= 3 delims
+ use_delims = closer->delim_count % 2 == 0 ? 2 : 1;
+ }
+
+ inl = opener->first_inline;
+
+ // remove used delimiters from stack elements and associated inlines.
+ opener->delim_count -= use_delims;
+ closer->delim_count -= use_delims;
+ inl->as.literal.len = opener->delim_count;
+ closer->first_inline->as.literal.len = closer->delim_count;
+
+ // free delimiters between opener and closer
+ tempstack = closer->previous;
+ while (tempstack != NULL && tempstack != opener) {
+ nextstack = tempstack->previous;
+ remove_delimiter(subj, tempstack);
+ tempstack = nextstack;
+ }
+
+ // create new emph or strong, and splice it in to our inlines
+ // between the opener and closer
+ emph = use_delims == 1 ? make_emph(inl->next) : make_strong(inl->next);
+ emph->next = closer->first_inline;
+ emph->prev = inl;
+ emph->parent = inl->parent;
+ inl->next = emph;
+
+ // if opener has 0 delims, remove it and its associated inline
+ if (opener->delim_count == 0) {
+ // replace empty opener inline with emph
+ chunk_free(&(inl->as.literal));
+ inl->type = emph->type;
+ inl->next = emph->next;
+ inl->first_child = emph->first_child;
+ free(emph);
+ emph = inl;
+ // remove opener from stack
+ remove_delimiter(subj, opener);
+ }
+
+ // fix tree structure
+ tmp = emph->first_child;
+ while (tmp->next != NULL && tmp->next != closer->first_inline) {
+ tmp->parent = emph;
+ tmp = tmp->next;
+ }
+ tmp->parent = emph;
+ if (tmp->next) {
+ tmp->next->prev = emph;
+ }
+ tmp->next = NULL;
+ emph->last_child = tmp;
+
+ // if closer has 0 delims, remove it and its associated inline
+ if (closer->delim_count == 0) {
+ // remove empty closer inline
+ tmp = closer->first_inline;
+ emph->next = tmp->next;
+ if (tmp->next) {
+ tmp->next->prev = emph;
+ }
+ cmark_node_unlink(tmp);
+ cmark_free_nodes(tmp);
+ // remove closer from stack
+ tempstack = closer->next;
+ remove_delimiter(subj, closer);
+ closer = tempstack;
+ }
+ } else {
+ closer = closer->next;
+ }
+ } else {
+ closer = closer->next;
+ }
+ }
+ // free all delimiters in stack down to stack_bottom:
+ while (subj->delimiters != stack_bottom) {
+ remove_delimiter(subj, subj->delimiters);
+ }
+}
+
+// Parse backslash-escape or just a backslash, returning an inline.
+static cmark_node* handle_backslash(subject *subj)
+{
+ advance(subj);
+ unsigned char nextchar = peek_char(subj);
+ if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped
+ advance(subj);
+ return make_str(chunk_dup(&subj->input, subj->pos - 1, 1));
+ } else if (nextchar == '\n') {
+ advance(subj);
+ return make_linebreak();
+ } else {
+ return make_str(chunk_literal("\\"));
+ }
+}
+
+// Parse an entity or a regular "&" string.
+// Assumes the subject has an '&' character at the current position.
+static cmark_node* handle_entity(subject* subj)
+{
+ strbuf ent = GH_BUF_INIT;
+ size_t len;
+
+ advance(subj);
+
+ len = houdini_unescape_ent(&ent,
+ subj->input.data + subj->pos,
+ subj->input.len - subj->pos
+ );
+
+ if (len == 0)
+ return make_str(chunk_literal("&"));
+
+ subj->pos += len;
+ return make_str(chunk_buf_detach(&ent));
+}
+
+// Like make_str, but parses entities.
+// Returns an inline sequence consisting of str and entity elements.
+static cmark_node *make_str_with_entities(chunk *content)
+{
+ strbuf unescaped = GH_BUF_INIT;
+
+ if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
+ return make_str(chunk_buf_detach(&unescaped));
+ } else {
+ return make_str(*content);
+ }
+}
+
+// Clean a URL: remove surrounding whitespace and surrounding <>,
+// and remove \ that escape punctuation.
+unsigned char *clean_url(chunk *url)
+{
+ strbuf buf = GH_BUF_INIT;
+
+ chunk_trim(url);
+
+ if (url->len == 0)
+ return NULL;
+
+ if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
+ houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
+ } else {
+ houdini_unescape_html_f(&buf, url->data, url->len);
+ }
+
+ strbuf_unescape(&buf);
+ return strbuf_detach(&buf);
+}
+
+unsigned char *clean_title(chunk *title)
+{
+ strbuf buf = GH_BUF_INIT;
+ unsigned char first, last;
+
+ if (title->len == 0)
+ return NULL;
+
+ first = title->data[0];
+ last = title->data[title->len - 1];
+
+ // remove surrounding quotes if any:
+ if ((first == '\'' && last == '\'') ||
+ (first == '(' && last == ')') ||
+ (first == '"' && last == '"')) {
+ houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
+ } else {
+ houdini_unescape_html_f(&buf, title->data, title->len);
+ }
+
+ strbuf_unescape(&buf);
+ return strbuf_detach(&buf);
+}
+
+// Parse an autolink or HTML tag.
+// Assumes the subject has a '<' character at the current position.
+static cmark_node* handle_pointy_brace(subject* subj)
+{
+ int matchlen = 0;
+ chunk contents;
+
+ advance(subj); // advance past first <
+
+ // first try to match a URL autolink
+ matchlen = scan_autolink_uri(&subj->input, subj->pos);
+ if (matchlen > 0) {
+ contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
+ subj->pos += matchlen;
+
+ return make_autolink(
+ make_str_with_entities(&contents),
+ contents, 0
+ );
+ }
+
+ // next try to match an email autolink
+ matchlen = scan_autolink_email(&subj->input, subj->pos);
+ if (matchlen > 0) {
+ contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
+ subj->pos += matchlen;
+
+ return make_autolink(
+ make_str_with_entities(&contents),
+ contents, 1
+ );
+ }
+
+ // finally, try to match an html tag
+ matchlen = scan_html_tag(&subj->input, subj->pos);
+ if (matchlen > 0) {
+ contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
+ subj->pos += matchlen;
+ return make_raw_html(contents);
+ }
+
+ // if nothing matches, just return the opening <:
+ return make_str(chunk_literal("<"));
+}
+
+// Parse a link label. Returns 1 if successful.
+// Note: unescaped brackets are not allowed in labels.
+// The label begins with `[` and ends with the first `]` character
+// encountered. Backticks in labels do not start code spans.
+static int link_label(subject* subj, chunk *raw_label)
+{
+ int startpos = subj->pos;
+ int length = 0;
+
+ advance(subj); // advance past [
+ unsigned char c;
+ while ((c = peek_char(subj)) && c != '[' && c != ']') {
+ if (c == '\\') {
+ advance(subj);
+ length++;
+ if (ispunct(peek_char(subj))) {
+ advance(subj);
+ length++;
+ }
+ } else {
+ advance(subj);
+ length++;
+ }
+ if (length > MAX_LINK_LABEL_LENGTH) {
+ goto noMatch;
+ }
+ }
+
+ if (c == ']') { // match found
+ *raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
+ advance(subj); // advance past ]
+ return 1;
+ }
+
+ noMatch:
+ subj->pos = startpos; // rewind
+ return 0;
+
+}
+
+// Return a link, an image, or a literal close bracket.
+static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
+{
+ int initial_pos;
+ int starturl, endurl, starttitle, endtitle, endall;
+ int n;
+ int sps;
+ reference *ref;
+ bool is_image = false;
+ chunk urlchunk, titlechunk;
+ unsigned char *url, *title;
+ delimiter_stack *opener;
+ delimiter_stack *tempstack;
+ cmark_node *link_text;
+ cmark_node *inl;
+ chunk raw_label;
+ int found_label;
+
+ advance(subj); // advance past ]
+ initial_pos = subj->pos;
+
+ // look through stack of delimiters for a [ or !
+ opener = subj->delimiters;
+ while (opener) {
+ if (opener->delim_char == '[' || opener->delim_char == '!') {
+ break;
+ }
+ opener = opener->previous;
+ }
+
+ if (opener == NULL) {
+ return make_str(chunk_literal("]"));
+ }
+
+ // If we got here, we matched a potential link/image text.
+ is_image = opener->delim_char == '!';
+ link_text = opener->first_inline->next;
+
+ // Now we check to see if it's a link/image.
+
+ // First, look for an inline link.
+ if (peek_char(subj) == '(' &&
+ ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
+ ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
+
+ // try to parse an explicit link:
+ starturl = subj->pos + 1 + sps; // after (
+ endurl = starturl + n;
+ starttitle = endurl + scan_spacechars(&subj->input, endurl);
+
+ // ensure there are spaces btw url and title
+ endtitle = (starttitle == endurl) ? starttitle :
+ starttitle + scan_link_title(&subj->input, starttitle);
+
+ endall = endtitle + scan_spacechars(&subj->input, endtitle);
+
+ if (peek_at(subj, endall) == ')') {
+ subj->pos = endall + 1;
+
+ urlchunk = chunk_dup(&subj->input, starturl, endurl - starturl);
+ titlechunk = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
+ url = clean_url(&urlchunk);
+ title = clean_title(&titlechunk);
+ chunk_free(&urlchunk);
+ chunk_free(&titlechunk);
+ goto match;
+
+ } else {
+ goto noMatch;
+ }
+ }
+
+ // Next, look for a following [link label] that matches in refmap.
+ // skip spaces
+ subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos);
+ raw_label = chunk_literal("");
+ found_label = link_label(subj, &raw_label);
+ if (!found_label || raw_label.len == 0) {
+ chunk_free(&raw_label);
+ raw_label = chunk_dup(&subj->input, opener->position,
+ initial_pos - opener->position - 1);
+ }
+
+ if (!found_label) {
+ // If we have a shortcut reference link, back up
+ // to before the spacse we skipped.
+ subj->pos = initial_pos;
+ }
+
+ ref = reference_lookup(subj->refmap, &raw_label);
+ chunk_free(&raw_label);
+
+ if (ref != NULL) { // found
+ url = bufdup(ref->url);
+ title = bufdup(ref->title);
+ goto match;
+ } else {
+ goto noMatch;
+ }
+
+noMatch:
+ // If we fall through to here, it means we didn't match a link:
+ remove_delimiter(subj, opener); // remove this opener from delimiter stack
+ subj->pos = initial_pos;
+ return make_str(chunk_literal("]"));
+
+match:
+ inl = opener->first_inline;
+ inl->type = is_image ? NODE_IMAGE : NODE_LINK;
+ chunk_free(&inl->as.literal);
+ inl->first_child = link_text;
+ process_emphasis(subj, opener->previous);
+ inl->as.link.url = url;
+ inl->as.link.title = title;
+ inl->next = NULL;
+ if (link_text) {
+ cmark_node *tmp;
+ link_text->prev = NULL;
+ for (tmp = link_text; tmp->next != NULL; tmp = tmp->next) {
+ tmp->parent = inl;
+ }
+ tmp->parent = inl;
+ inl->last_child = tmp;
+ }
+ parent->last_child = inl;
+
+ // process_emphasis will remove this delimiter and all later ones.
+ // Now, if we have a link, we also want to remove earlier link
+ // delimiters. (This code can be removed if we decide to allow links
+ // inside links.)
+ if (!is_image) {
+ opener = subj->delimiters;
+ while (opener != NULL) {
+ tempstack = opener->previous;
+ if (opener->delim_char == '[') {
+ remove_delimiter(subj, opener);
+ }
+ opener = tempstack;
+ }
+ }
+
+ return NULL;
+}
+
+// Parse a hard or soft linebreak, returning an inline.
+// Assumes the subject has a newline at the current position.
+static cmark_node* handle_newline(subject *subj)
+{
+ int nlpos = subj->pos;
+ // skip over newline
+ advance(subj);
+ // skip spaces at beginning of line
+ while (peek_char(subj) == ' ') {
+ advance(subj);
+ }
+ if (nlpos > 1 &&
+ peek_at(subj, nlpos - 1) == ' ' &&
+ peek_at(subj, nlpos - 2) == ' ') {
+ return make_linebreak();
+ } else {
+ return make_softbreak();
+ }
+}
+
+static int subject_find_special_char(subject *subj)
+{
+ // "\n\\`&_*[]<!"
+ static const int8_t SPECIAL_CHARS[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+ int n = subj->pos + 1;
+
+ while (n < subj->input.len) {
+ if (SPECIAL_CHARS[subj->input.data[n]])
+ return n;
+ n++;
+ }
+
+ return subj->input.len;
+}
+
+// Parse an inline, advancing subject, and add it as a child of parent.
+// Return 0 if no inline can be parsed, 1 otherwise.
+static int parse_inline(subject* subj, cmark_node * parent)
+{
+ cmark_node* new_inl = NULL;
+ chunk contents;
+ unsigned char c;
+ int endpos;
+ c = peek_char(subj);
+ if (c == 0) {
+ return 0;
+ }
+ switch(c){
+ case '\n':
+ new_inl = handle_newline(subj);
+ break;
+ case '`':
+ new_inl = handle_backticks(subj);
+ break;
+ case '\\':
+ new_inl = handle_backslash(subj);
+ break;
+ case '&':
+ new_inl = handle_entity(subj);
+ break;
+ case '<':
+ new_inl = handle_pointy_brace(subj);
+ break;
+ case '*':
+ case '_':
+ new_inl = handle_strong_emph(subj, c);
+ break;
+ case '[':
+ advance(subj);
+ new_inl = make_str(chunk_literal("["));
+ subj->delimiters = push_delimiter(subj, 1, '[', true, false, new_inl);
+ break;
+ case ']':
+ new_inl = handle_close_bracket(subj, parent);
+ break;
+ case '!':
+ advance(subj);
+ if (peek_char(subj) == '[') {
+ advance(subj);
+ new_inl = make_str(chunk_literal("!["));
+ subj->delimiters = push_delimiter(subj, 1, '!', false, true, new_inl);
+ } else {
+ new_inl = make_str(chunk_literal("!"));
+ }
+ break;
+ default:
+ endpos = subject_find_special_char(subj);
+ contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
+ subj->pos = endpos;
+
+ // if we're at a newline, strip trailing spaces.
+ if (peek_char(subj) == '\n') {
+ chunk_rtrim(&contents);
+ }
+
+ new_inl = make_str(contents);
+ }
+ if (new_inl != NULL) {
+ cmark_node_append_child(parent, new_inl);
+ }
+
+ return 1;
+}
+
+// Parse inlines from parent's string_content, adding as children of parent.
+extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap)
+{
+ subject subj;
+ subject_from_buf(&subj, &parent->string_content, refmap);
+
+ while (!is_eof(&subj) && parse_inline(&subj, parent)) ;
+
+ process_emphasis(&subj, NULL);
+}
+
+// Parse zero or more space characters, including at most one newline.
+static void spnl(subject* subj)
+{
+ bool seen_newline = false;
+ while (peek_char(subj) == ' ' ||
+ (!seen_newline &&
+ (seen_newline = peek_char(subj) == '\n'))) {
+ advance(subj);
+ }
+}
+
+// Parse reference. Assumes string begins with '[' character.
+// Modify refmap if a reference is encountered.
+// Return 0 if no reference found, otherwise position of subject
+// after reference is parsed.
+int parse_reference_inline(strbuf *input, reference_map *refmap)
+{
+ subject subj;
+
+ chunk lab;
+ chunk url;
+ chunk title;
+
+ int matchlen = 0;
+ int beforetitle;
+
+ subject_from_buf(&subj, input, NULL);
+
+ // parse label:
+ if (!link_label(&subj, &lab))
+ return 0;
+
+ // colon:
+ if (peek_char(&subj) == ':') {
+ advance(&subj);
+ } else {
+ return 0;
+ }
+
+ // parse link url:
+ spnl(&subj);
+ matchlen = scan_link_url(&subj.input, subj.pos);
+ if (matchlen) {
+ url = chunk_dup(&subj.input, subj.pos, matchlen);
+ subj.pos += matchlen;
+ } else {
+ return 0;
+ }
+
+ // parse optional link_title
+ beforetitle = subj.pos;
+ spnl(&subj);
+ matchlen = scan_link_title(&subj.input, subj.pos);
+ if (matchlen) {
+ title = chunk_dup(&subj.input, subj.pos, matchlen);
+ subj.pos += matchlen;
+ } else {
+ subj.pos = beforetitle;
+ title = chunk_literal("");
+ }
+ // parse final spaces and newline:
+ while (peek_char(&subj) == ' ') {
+ advance(&subj);
+ }
+ if (peek_char(&subj) == '\n') {
+ advance(&subj);
+ } else if (peek_char(&subj) != 0) {
+ return 0;
+ }
+ // insert reference into refmap
+ reference_create(refmap, &lab, &url, &title);
+ return subj.pos;
+}
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/inlines.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/inlines.h b/compiler/modules/CommonMark/src/inlines.h
new file mode 100644
index 0000000..92b3b7a
--- /dev/null
+++ b/compiler/modules/CommonMark/src/inlines.h
@@ -0,0 +1,26 @@
+#ifndef CMARK_INLINES_H
+#define CMARK_INLINES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned char *cmark_clean_url(cmark_chunk *url);
+unsigned char *cmark_clean_title(cmark_chunk *title);
+
+void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap);
+
+int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
+
+#ifndef CMARK_NO_SHORT_NAMES
+ #define parse_inlines cmark_parse_inlines
+ #define parse_reference_inline cmark_parse_reference_inline
+ #define clean_url cmark_clean_url
+ #define clean_title cmark_clean_title
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/node.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/node.c b/compiler/modules/CommonMark/src/node.c
new file mode 100644
index 0000000..35e19d2
--- /dev/null
+++ b/compiler/modules/CommonMark/src/node.c
@@ -0,0 +1,657 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "config.h"
+#include "node.h"
+
+static void
+S_node_unlink(cmark_node *node);
+
+cmark_node*
+cmark_node_new(cmark_node_type type) {
+ cmark_node *node = (cmark_node *)calloc(1, sizeof(*node));
+ node->type = type;
+
+ switch (node->type) {
+ case CMARK_NODE_ATX_HEADER:
+ case CMARK_NODE_SETEXT_HEADER:
+ node->as.header.level = 1;
+ break;
+
+ case CMARK_NODE_LIST: {
+ cmark_list *list = &node->as.list;
+ list->list_type = CMARK_BULLET_LIST;
+ list->start = 1;
+ list->tight = false;
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return node;
+}
+
+void
+cmark_node_destroy(cmark_node *node) {
+ S_node_unlink(node);
+ node->next = NULL;
+ cmark_free_nodes(node);
+}
+
+cmark_node_type
+cmark_node_get_type(cmark_node *node)
+{
+ return node->type;
+}
+
+static const char*
+S_type_string(cmark_node *node)
+{
+ switch (node->type) {
+ case CMARK_NODE_DOCUMENT: return "DOCUMENT";
+ case CMARK_NODE_BLOCK_QUOTE: return "BLOCK_QUOTE";
+ case CMARK_NODE_LIST: return "LIST";
+ case CMARK_NODE_LIST_ITEM: return "LIST_ITEM";
+ case CMARK_NODE_FENCED_CODE: return "FENCED_CODE";
+ case CMARK_NODE_INDENTED_CODE: return "INDENTED_CODE";
+ case CMARK_NODE_HTML: return "HTML";
+ case CMARK_NODE_PARAGRAPH: return "PARAGRAPH";
+ case CMARK_NODE_ATX_HEADER: return "ATX_HEADER";
+ case CMARK_NODE_SETEXT_HEADER: return "SETEXT_HEADER";
+ case CMARK_NODE_HRULE: return "HRULE";
+ case CMARK_NODE_REFERENCE_DEF: return "REFERENCE_DEF";
+ case CMARK_NODE_STRING: return "STRING";
+ case CMARK_NODE_SOFTBREAK: return "SOFTBREAK";
+ case CMARK_NODE_LINEBREAK: return "LINEBREAK";
+ case CMARK_NODE_INLINE_CODE: return "INLINE_CODE";
+ case CMARK_NODE_INLINE_HTML: return "INLINE_HTML";
+ case CMARK_NODE_EMPH: return "EMPH";
+ case CMARK_NODE_STRONG: return "STRONG";
+ case CMARK_NODE_LINK: return "LINK";
+ case CMARK_NODE_IMAGE: return "IMAGE";
+ }
+
+ return "<unknown>";
+}
+
+cmark_node*
+cmark_node_next(cmark_node *node)
+{
+ return node->next;
+}
+
+cmark_node*
+cmark_node_previous(cmark_node *node)
+{
+ return node->prev;
+}
+
+cmark_node*
+cmark_node_parent(cmark_node *node)
+{
+ return node->parent;
+}
+
+cmark_node*
+cmark_node_first_child(cmark_node *node)
+{
+ return node->first_child;
+}
+
+cmark_node*
+cmark_node_last_child(cmark_node *node)
+{
+ return node->last_child;
+}
+
+static char*
+S_strdup(const char *str) {
+ size_t size = strlen(str) + 1;
+ char *dup = (char *)malloc(size);
+ memcpy(dup, str, size);
+ return dup;
+}
+
+const char*
+cmark_node_get_string_content(cmark_node *node) {
+ switch (node->type) {
+ case NODE_INDENTED_CODE:
+ case NODE_FENCED_CODE:
+ case NODE_HTML:
+ return cmark_strbuf_cstr(&node->string_content);
+
+ case NODE_STRING:
+ case NODE_INLINE_HTML:
+ case NODE_INLINE_CODE:
+ return cmark_chunk_to_cstr(&node->as.literal);
+
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+int
+cmark_node_set_string_content(cmark_node *node, const char *content) {
+ switch (node->type) {
+ case NODE_INDENTED_CODE:
+ case NODE_FENCED_CODE:
+ case NODE_HTML:
+ cmark_strbuf_sets(&node->string_content, content);
+ return 1;
+
+ case NODE_STRING:
+ case NODE_INLINE_HTML:
+ case NODE_INLINE_CODE:
+ cmark_chunk_set_cstr(&node->as.literal, content);
+ return 1;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int
+cmark_node_get_header_level(cmark_node *node) {
+ switch (node->type) {
+ case CMARK_NODE_ATX_HEADER:
+ case CMARK_NODE_SETEXT_HEADER:
+ return node->as.header.level;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int
+cmark_node_set_header_level(cmark_node *node, int level) {
+ if (level < 1 || level > 6) {
+ return 0;
+ }
+
+ switch (node->type) {
+ case CMARK_NODE_ATX_HEADER:
+ case CMARK_NODE_SETEXT_HEADER:
+ node->as.header.level = level;
+ return 1;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+cmark_list_type
+cmark_node_get_list_type(cmark_node *node) {
+ if (node->type == CMARK_NODE_LIST) {
+ return node->as.list.list_type;
+ }
+ else {
+ return CMARK_NO_LIST;
+ }
+}
+
+int
+cmark_node_set_list_type(cmark_node *node, cmark_list_type type) {
+ if (!(type == CMARK_BULLET_LIST || type == CMARK_ORDERED_LIST)) {
+ return 0;
+ }
+
+ if (node->type == CMARK_NODE_LIST) {
+ node->as.list.list_type = type;
+ return 1;
+ }
+ else {
+ return 0;
+ }
+}
+
+int
+cmark_node_get_list_start(cmark_node *node) {
+ if (node->type == CMARK_NODE_LIST) {
+ return node->as.list.start;
+ }
+ else {
+ return 0;
+ }
+}
+
+int
+cmark_node_set_list_start(cmark_node *node, int start) {
+ if (start < 0) {
+ return 0;
+ }
+
+ if (node->type == CMARK_NODE_LIST) {
+ node->as.list.start = start;
+ return 1;
+ }
+ else {
+ return 0;
+ }
+}
+
+int
+cmark_node_get_list_tight(cmark_node *node) {
+ if (node->type == CMARK_NODE_LIST) {
+ return node->as.list.tight;
+ }
+ else {
+ return 0;
+ }
+}
+
+int
+cmark_node_set_list_tight(cmark_node *node, int tight) {
+ if (node->type == CMARK_NODE_LIST) {
+ node->as.list.tight = tight;
+ return 1;
+ }
+ else {
+ return 0;
+ }
+}
+
+const char*
+cmark_node_get_fence_info(cmark_node *node) {
+ if (node->type == NODE_FENCED_CODE) {
+ return cmark_strbuf_cstr(&node->as.code.info);
+ }
+ else {
+ return NULL;
+ }
+}
+
+int
+cmark_node_set_fence_info(cmark_node *node, const char *info) {
+ if (node->type == NODE_FENCED_CODE) {
+ cmark_strbuf_sets(&node->as.code.info, info);
+ return 1;
+ }
+ else {
+ return 0;
+ }
+}
+
+const char*
+cmark_node_get_url(cmark_node *node) {
+ switch (node->type) {
+ case NODE_LINK:
+ case NODE_IMAGE:
+ return (char *)node->as.link.url;
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+int
+cmark_node_set_url(cmark_node *node, const char *url) {
+ switch (node->type) {
+ case NODE_LINK:
+ case NODE_IMAGE:
+ free(node->as.link.url);
+ node->as.link.url = (unsigned char *)S_strdup(url);
+ return 1;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+const char*
+cmark_node_get_title(cmark_node *node) {
+ switch (node->type) {
+ case NODE_LINK:
+ case NODE_IMAGE:
+ return (char *)node->as.link.title;
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+int
+cmark_node_set_title(cmark_node *node, const char *title) {
+ switch (node->type) {
+ case NODE_LINK:
+ case NODE_IMAGE:
+ free(node->as.link.title);
+ node->as.link.title = (unsigned char *)S_strdup(title);
+ return 1;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int
+cmark_node_get_start_line(cmark_node *node) {
+ return node->start_line;
+}
+
+int
+cmark_node_get_start_column(cmark_node *node) {
+ return node->start_column;
+}
+
+int
+cmark_node_get_end_line(cmark_node *node) {
+ return node->end_line;
+}
+
+static inline bool
+S_is_block(cmark_node *node) {
+ return node->type >= CMARK_NODE_FIRST_BLOCK
+ && node->type <= CMARK_NODE_LAST_BLOCK;
+}
+
+static inline bool
+S_is_inline(cmark_node *node) {
+ return node->type >= CMARK_NODE_FIRST_INLINE
+ && node->type <= CMARK_NODE_LAST_INLINE;
+}
+
+static bool
+S_can_contain(cmark_node *node, cmark_node *child)
+{
+ cmark_node *cur;
+
+ // Verify that child is not an ancestor of node or equal to node.
+ cur = node;
+ do {
+ if (cur == child) {
+ return false;
+ }
+ cur = cur->parent;
+ } while (cur != NULL);
+
+ if (child->type == CMARK_NODE_DOCUMENT) {
+ return false;
+ }
+
+ switch (node->type) {
+ case CMARK_NODE_DOCUMENT:
+ case CMARK_NODE_BLOCK_QUOTE:
+ case CMARK_NODE_LIST_ITEM:
+ return S_is_block(child)
+ && child->type != CMARK_NODE_LIST_ITEM;
+
+ case CMARK_NODE_LIST:
+ return child->type == CMARK_NODE_LIST_ITEM;
+
+ case CMARK_NODE_PARAGRAPH:
+ case CMARK_NODE_ATX_HEADER:
+ case CMARK_NODE_SETEXT_HEADER:
+ case CMARK_NODE_EMPH:
+ case CMARK_NODE_STRONG:
+ case CMARK_NODE_LINK:
+ case CMARK_NODE_IMAGE:
+ return S_is_inline(child);
+
+ default:
+ break;
+ }
+
+ return false;
+}
+
+// Unlink a node without adjusting its next, prev, and parent pointers.
+static void
+S_node_unlink(cmark_node *node)
+{
+ if (node->prev) {
+ node->prev->next = node->next;
+ }
+ if (node->next) {
+ node->next->prev = node->prev;
+ }
+
+ // Adjust first_child and last_child of parent.
+ cmark_node *parent = node->parent;
+ if (parent) {
+ if (parent->first_child == node) {
+ parent->first_child = node->next;
+ }
+ if (parent->last_child == node) {
+ parent->last_child = node->prev;
+ }
+ }
+}
+
+void
+cmark_node_unlink(cmark_node *node) {
+ S_node_unlink(node);
+
+ node->next = NULL;
+ node->prev = NULL;
+ node->parent = NULL;
+
+}
+
+int
+cmark_node_insert_before(cmark_node *node, cmark_node *sibling)
+{
+ if (!S_can_contain(node->parent, sibling)) {
+ return 0;
+ }
+
+ S_node_unlink(sibling);
+
+ cmark_node *old_prev = node->prev;
+
+ // Insert 'sibling' between 'old_prev' and 'node'.
+ if (old_prev) {
+ old_prev->next = sibling;
+ }
+ sibling->prev = old_prev;
+ sibling->next = node;
+ node->prev = sibling;
+
+ // Set new parent.
+ cmark_node *parent = node->parent;
+ sibling->parent = parent;
+
+ // Adjust first_child of parent if inserted as first child.
+ if (parent && !old_prev) {
+ parent->first_child = sibling;
+ }
+
+ return 1;
+}
+
+int
+cmark_node_insert_after(cmark_node *node, cmark_node *sibling)
+{
+ if (!S_can_contain(node->parent, sibling)) {
+ return 0;
+ }
+
+ S_node_unlink(sibling);
+
+ cmark_node *old_next = node->next;
+
+ // Insert 'sibling' between 'node' and 'old_next'.
+ if (old_next) {
+ old_next->prev = sibling;
+ }
+ sibling->next = old_next;
+ sibling->prev = node;
+ node->next = sibling;
+
+ // Set new parent.
+ cmark_node *parent = node->parent;
+ sibling->parent = parent;
+
+ // Adjust last_child of parent if inserted as last child.
+ if (parent && !old_next) {
+ parent->last_child = sibling;
+ }
+
+ return 1;
+}
+
+int
+cmark_node_prepend_child(cmark_node *node, cmark_node *child)
+{
+ if (!S_can_contain(node, child)) {
+ return 0;
+ }
+
+ S_node_unlink(child);
+
+ cmark_node *old_first_child = node->first_child;
+
+ child->next = old_first_child;
+ child->prev = NULL;
+ child->parent = node;
+ node->first_child = child;
+
+ if (old_first_child) {
+ old_first_child->prev = child;
+ }
+ else {
+ // Also set last_child if node previously had no children.
+ node->last_child = child;
+ }
+
+ return 1;
+}
+
+int
+cmark_node_append_child(cmark_node *node, cmark_node *child)
+{
+ if (!S_can_contain(node, child)) {
+ return 0;
+ }
+
+ S_node_unlink(child);
+
+ cmark_node *old_last_child = node->last_child;
+
+ child->next = NULL;
+ child->prev = old_last_child;
+ child->parent = node;
+ node->last_child = child;
+
+ if (old_last_child) {
+ old_last_child->next = child;
+ }
+ else {
+ // Also set first_child if node previously had no children.
+ node->first_child = child;
+ }
+
+ return 1;
+}
+
+static void
+S_print_error(FILE *out, cmark_node *node, const char *elem)
+{
+ if (out == NULL) {
+ return;
+ }
+ fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem,
+ S_type_string(node), node->start_line, node->start_column);
+}
+
+int
+cmark_node_check(cmark_node *node, FILE *out)
+{
+ cmark_node *cur;
+ int errors = 0;
+
+ if (!node) {
+ return 0;
+ }
+
+ cur = node;
+ while (true) {
+ if (cur->first_child) {
+ if (cur->first_child->parent != cur) {
+ S_print_error(out, cur->first_child, "parent");
+ cur->first_child->parent = cur;
+ ++errors;
+ }
+ cur = cur->first_child;
+ continue;
+ }
+
+ next_sibling:
+ if (cur == node) {
+ break;
+ }
+ if (cur->next) {
+ if (cur->next->prev != cur) {
+ S_print_error(out, cur->next, "prev");
+ cur->next->prev = cur;
+ ++errors;
+ }
+ if (cur->next->parent != cur->parent) {
+ S_print_error(out, cur->next, "parent");
+ cur->next->parent = cur->parent;
+ ++errors;
+ }
+ cur = cur->next;
+ continue;
+ }
+
+ if (cur->parent->last_child != cur) {
+ S_print_error(out, cur->parent, "last_child");
+ cur->parent->last_child = cur;
+ ++errors;
+ }
+ cur = cur->parent;
+ goto next_sibling;
+ }
+
+ return errors;
+}
+
+// Free a cmark_node list and any children.
+void cmark_free_nodes(cmark_node *e)
+{
+ cmark_node *next;
+ while (e != NULL) {
+ strbuf_free(&e->string_content);
+ switch (e->type){
+ case NODE_FENCED_CODE:
+ strbuf_free(&e->as.code.info);
+ break;
+ case NODE_STRING:
+ case NODE_INLINE_HTML:
+ case NODE_INLINE_CODE:
+ cmark_chunk_free(&e->as.literal);
+ break;
+ case NODE_LINK:
+ case NODE_IMAGE:
+ free(e->as.link.url);
+ free(e->as.link.title);
+ break;
+ default:
+ break;
+ }
+ if (e->last_child) {
+ // Splice children into list
+ e->last_child->next = e->next;
+ e->next = e->first_child;
+ }
+ next = e->next;
+ free(e);
+ e = next;
+ }
+}
+
+
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/node.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/node.h b/compiler/modules/CommonMark/src/node.h
new file mode 100644
index 0000000..d1245a5
--- /dev/null
+++ b/compiler/modules/CommonMark/src/node.h
@@ -0,0 +1,74 @@
+#ifndef CMARK_NODE_H
+#define CMARK_NODE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+
+#include "cmark.h"
+#include "buffer.h"
+#include "chunk.h"
+
+typedef struct {
+ cmark_list_type list_type;
+ int marker_offset;
+ int padding;
+ int start;
+ cmark_delim_type delimiter;
+ unsigned char bullet_char;
+ bool tight;
+} cmark_list;
+
+typedef struct {
+ int fence_length;
+ int fence_offset;
+ unsigned char fence_char;
+ cmark_strbuf info;
+} cmark_fenced_code;
+
+typedef struct {
+ int level;
+} cmark_header;
+
+typedef struct {
+ unsigned char *url;
+ unsigned char *title;
+} cmark_link;
+
+struct cmark_node {
+ cmark_node_type type;
+
+ struct cmark_node *next;
+ struct cmark_node *prev;
+ struct cmark_node *parent;
+ struct cmark_node *first_child;
+ struct cmark_node *last_child;
+
+ int start_line;
+ int start_column;
+ int end_line;
+ bool open;
+ bool last_line_blank;
+
+ cmark_strbuf string_content;
+
+ union {
+ cmark_chunk literal;
+ cmark_list list;
+ cmark_fenced_code code;
+ cmark_header header;
+ cmark_link link;
+ } as;
+};
+
+CMARK_EXPORT int
+cmark_node_check(cmark_node *node, FILE *out);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/parser.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/parser.h b/compiler/modules/CommonMark/src/parser.h
new file mode 100644
index 0000000..4bbea09
--- /dev/null
+++ b/compiler/modules/CommonMark/src/parser.h
@@ -0,0 +1,27 @@
+#ifndef CMARK_AST_H
+#define CMARK_AST_H
+
+#include <stdio.h>
+#include "node.h"
+#include "references.h"
+#include "buffer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_LINK_LABEL_LENGTH 1000
+
+struct cmark_doc_parser {
+ struct cmark_reference_map *refmap;
+ struct cmark_node* root;
+ struct cmark_node* current;
+ int line_number;
+ cmark_strbuf *curline;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/print.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/print.c b/compiler/modules/CommonMark/src/print.c
new file mode 100644
index 0000000..b1bab4b
--- /dev/null
+++ b/compiler/modules/CommonMark/src/print.c
@@ -0,0 +1,182 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "cmark.h"
+#include "node.h"
+#include "debug.h"
+
+static void print_str(const unsigned char *s, int len)
+{
+ int i;
+
+ if (len < 0)
+ len = strlen((char *)s);
+
+ putchar('"');
+ for (i = 0; i < len; ++i) {
+ unsigned char c = s[i];
+
+ switch (c) {
+ case '\n':
+ printf("\\n");
+ break;
+ case '"':
+ printf("\\\"");
+ break;
+ case '\\':
+ printf("\\\\");
+ break;
+ default:
+ putchar((int)c);
+ }
+ }
+ putchar('"');
+}
+
+// Prettyprint an inline list, for debugging.
+static void print_inlines(cmark_node* ils, int indent)
+{
+ int i;
+
+ while(ils != NULL) {
+ for (i=0; i < indent; i++) {
+ putchar(' ');
+ }
+ switch(ils->type) {
+ case NODE_STRING:
+ printf("str ");
+ print_str(ils->as.literal.data, ils->as.literal.len);
+ putchar('\n');
+ break;
+ case NODE_LINEBREAK:
+ printf("linebreak\n");
+ break;
+ case NODE_SOFTBREAK:
+ printf("softbreak\n");
+ break;
+ case NODE_INLINE_CODE:
+ printf("code ");
+ print_str(ils->as.literal.data, ils->as.literal.len);
+ putchar('\n');
+ break;
+ case NODE_INLINE_HTML:
+ printf("html ");
+ print_str(ils->as.literal.data, ils->as.literal.len);
+ putchar('\n');
+ break;
+ case NODE_LINK:
+ case NODE_IMAGE:
+ printf("%s url=", ils->type == NODE_LINK ? "link" : "image");
+
+ if (ils->as.link.url)
+ print_str(ils->as.link.url, -1);
+
+ if (ils->as.link.title) {
+ printf(" title=");
+ print_str(ils->as.link.title, -1);
+ }
+ putchar('\n');
+ print_inlines(ils->first_child, indent + 2);
+ break;
+ case NODE_STRONG:
+ printf("strong\n");
+ print_inlines(ils->first_child, indent + 2);
+ break;
+ case NODE_EMPH:
+ printf("emph\n");
+ print_inlines(ils->first_child, indent + 2);
+ break;
+ default:
+ break;
+ }
+ ils = ils->next;
+ }
+}
+
+// Functions to pretty-print inline and cmark_node lists, for debugging.
+// Prettyprint an inline list, for debugging.
+static void print_blocks(cmark_node* b, int indent)
+{
+ cmark_list *data;
+ int i;
+
+ while(b != NULL) {
+ for (i=0; i < indent; i++) {
+ putchar(' ');
+ }
+
+ switch(b->type) {
+ case NODE_DOCUMENT:
+ printf("document\n");
+ print_blocks(b->first_child, indent + 2);
+ break;
+ case NODE_BLOCK_QUOTE:
+ printf("block_quote\n");
+ print_blocks(b->first_child, indent + 2);
+ break;
+ case NODE_LIST_ITEM:
+ printf("list_item\n");
+ print_blocks(b->first_child, indent + 2);
+ break;
+ case NODE_LIST:
+ data = &(b->as.list);
+ if (data->list_type == CMARK_ORDERED_LIST) {
+ printf("list (type=ordered tight=%s start=%d delim=%s)\n",
+ (data->tight ? "true" : "false"),
+ data->start,
+ (data->delimiter == CMARK_PAREN_DELIM ? "parens" : "period"));
+ } else {
+ printf("list (type=bullet tight=%s bullet_char=%c)\n",
+ (data->tight ? "true" : "false"),
+ data->bullet_char);
+ }
+ print_blocks(b->first_child, indent + 2);
+ break;
+ case NODE_ATX_HEADER:
+ printf("atx_header (level=%d)\n", b->as.header.level);
+ print_inlines(b->first_child, indent + 2);
+ break;
+ case NODE_SETEXT_HEADER:
+ printf("setext_header (level=%d)\n", b->as.header.level);
+ print_inlines(b->first_child, indent + 2);
+ break;
+ case NODE_PARAGRAPH:
+ printf("paragraph\n");
+ print_inlines(b->first_child, indent + 2);
+ break;
+ case NODE_HRULE:
+ printf("hrule\n");
+ break;
+ case NODE_INDENTED_CODE:
+ printf("indented_code ");
+ print_str(b->string_content.ptr, -1);
+ putchar('\n');
+ break;
+ case NODE_FENCED_CODE:
+ printf("fenced_code length=%d info=",
+ b->as.code.fence_length);
+ print_str(b->as.code.info.ptr, -1);
+ putchar(' ');
+ print_str(b->string_content.ptr, -1);
+ putchar('\n');
+ break;
+ case NODE_HTML:
+ printf("html_block ");
+ print_str(b->string_content.ptr, -1);
+ putchar('\n');
+ break;
+ case NODE_REFERENCE_DEF:
+ printf("reference_def\n");
+ break;
+ default:
+ printf("# NOT IMPLEMENTED (%d)\n", b->type);
+ break;
+ }
+ b = b->next;
+ }
+}
+
+void cmark_debug_print(cmark_node *root)
+{
+ print_blocks(root, 0);
+}
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/references.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/references.c b/compiler/modules/CommonMark/src/references.c
new file mode 100644
index 0000000..1738de1
--- /dev/null
+++ b/compiler/modules/CommonMark/src/references.c
@@ -0,0 +1,153 @@
+#include "cmark.h"
+#include "utf8.h"
+#include "parser.h"
+#include "references.h"
+#include "inlines.h"
+#include "chunk.h"
+
+static unsigned int
+refhash(const unsigned char *link_ref)
+{
+ unsigned int hash = 0;
+
+ while (*link_ref)
+ hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash;
+
+ return hash;
+}
+
+static void reference_free(reference *ref)
+{
+ if(ref != NULL) {
+ free(ref->label);
+ free(ref->url);
+ free(ref->title);
+ free(ref);
+ }
+}
+
+// normalize reference: collapse internal whitespace to single space,
+// remove leading/trailing whitespace, case fold
+// Return NULL if the reference name is actually empty (i.e. composed
+// solely from whitespace)
+static unsigned char *normalize_reference(chunk *ref)
+{
+ strbuf normalized = GH_BUF_INIT;
+ unsigned char *result;
+
+ if(ref == NULL)
+ return NULL;
+
+ if (ref->len == 0)
+ return NULL;
+
+ utf8proc_case_fold(&normalized, ref->data, ref->len);
+ strbuf_trim(&normalized);
+ strbuf_normalize_whitespace(&normalized);
+
+ result = strbuf_detach(&normalized);
+ assert(result);
+
+ if (result[0] == '\0') {
+ free(result);
+ return NULL;
+ }
+
+ return result;
+}
+
+static void add_reference(reference_map *map, reference* ref)
+{
+ reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE];
+
+ while (t) {
+ if (t->hash == ref->hash &&
+ !strcmp((char *)t->label, (char *)ref->label)) {
+ reference_free(ref);
+ return;
+ }
+
+ t = t->next;
+ }
+
+ map->table[ref->hash % REFMAP_SIZE] = ref;
+}
+
+extern void reference_create(reference_map *map, chunk *label, chunk *url, chunk *title)
+{
+ reference *ref;
+ unsigned char *reflabel = normalize_reference(label);
+
+ /* empty reference name, or composed from only whitespace */
+ if (reflabel == NULL)
+ return;
+
+ ref = (reference *)calloc(1, sizeof(*ref));
+ if(ref != NULL) {
+ ref->label = reflabel;
+ ref->hash = refhash(ref->label);
+ ref->url = clean_url(url);
+ ref->title = clean_title(title);
+ ref->next = NULL;
+
+ add_reference(map, ref);
+ }
+}
+
+// Returns reference if refmap contains a reference with matching
+// label, otherwise NULL.
+reference* reference_lookup(reference_map *map, chunk *label)
+{
+ reference *ref = NULL;
+ unsigned char *norm;
+ unsigned int hash;
+
+ if (label->len > MAX_LINK_LABEL_LENGTH)
+ return NULL;
+
+ if (map == NULL)
+ return NULL;
+
+ norm = normalize_reference(label);
+ if (norm == NULL)
+ return NULL;
+
+ hash = refhash(norm);
+ ref = map->table[hash % REFMAP_SIZE];
+
+ while (ref) {
+ if (ref->hash == hash &&
+ !strcmp((char *)ref->label, (char *)norm))
+ break;
+ ref = ref->next;
+ }
+
+ free(norm);
+ return ref;
+}
+
+void reference_map_free(reference_map *map)
+{
+ unsigned int i;
+
+ if(map == NULL)
+ return;
+
+ for (i = 0; i < REFMAP_SIZE; ++i) {
+ reference *ref = map->table[i];
+ reference *next;
+
+ while (ref) {
+ next = ref->next;
+ reference_free(ref);
+ ref = next;
+ }
+ }
+
+ free(map);
+}
+
+reference_map *reference_map_new(void)
+{
+ return (reference_map *)calloc(1, sizeof(reference_map));
+}
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/references.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/references.h b/compiler/modules/CommonMark/src/references.h
new file mode 100644
index 0000000..572178d
--- /dev/null
+++ b/compiler/modules/CommonMark/src/references.h
@@ -0,0 +1,46 @@
+#ifndef CMARK_REFERENCES_H
+#define CMARK_REFERENCES_H
+
+#include "chunk.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define REFMAP_SIZE 16
+
+struct cmark_reference {
+ struct cmark_reference *next;
+ unsigned char *label;
+ unsigned char *url;
+ unsigned char *title;
+ unsigned int hash;
+};
+
+typedef struct cmark_reference cmark_reference;
+
+struct cmark_reference_map {
+ cmark_reference *table[REFMAP_SIZE];
+};
+
+typedef struct cmark_reference_map cmark_reference_map;
+
+cmark_reference_map *cmark_reference_map_new(void);
+void cmark_reference_map_free(cmark_reference_map *map);
+cmark_reference* cmark_reference_lookup(cmark_reference_map *map, cmark_chunk *label);
+extern void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, cmark_chunk *url, cmark_chunk *title);
+
+#ifndef CMARK_NO_SHORT_NAMES
+ #define reference cmark_reference
+ #define reference_map cmark_reference_map
+ #define reference_map_new cmark_reference_map_new
+ #define reference_map_free cmark_reference_map_free
+ #define reference_lookup cmark_reference_lookup
+ #define reference_create cmark_reference_create
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif