You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2014/11/23 18:43:20 UTC
[08/16] lucy-clownfish git commit: Add CommonMark source files

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/inlines.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/inlines.c b/compiler/modules/CommonMark/src/inlines.c
new file mode 100644
index 0000000..9bc4e35
--- /dev/null
+++ b/compiler/modules/CommonMark/src/inlines.c
@@ -0,0 +1,993 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include "config.h"
+#include "node.h"
+#include "parser.h"
+#include "references.h"
+#include "cmark.h"
+#include "html/houdini.h"
+#include "utf8.h"
+#include "scanners.h"
+#include "inlines.h"
+
+
+// Macros for creating various kinds of simple.
+#define make_str(s) make_literal(CMARK_NODE_STRING, s)
+#define make_code(s) make_literal(CMARK_NODE_INLINE_CODE, s)
+#define make_raw_html(s) make_literal(CMARK_NODE_INLINE_HTML, s)
+#define make_linebreak() make_simple(CMARK_NODE_LINEBREAK)
+#define make_softbreak() make_simple(CMARK_NODE_SOFTBREAK)
+#define make_emph(contents) make_inlines(CMARK_NODE_EMPH, contents)
+#define make_strong(contents) make_inlines(CMARK_NODE_STRONG, contents)
+
+typedef struct DelimiterStack {
+	struct DelimiterStack *previous;
+	struct DelimiterStack *next;
+	cmark_node *first_inline;
+	int delim_count;
+	unsigned char delim_char;
+	int position;
+	bool can_open;
+	bool can_close;
+} delimiter_stack;
+
+typedef struct Subject {
+	chunk input;
+	int pos;
+	reference_map *refmap;
+	delimiter_stack *delimiters;
+} subject;
+
+static int parse_inline(subject* subj, cmark_node * parent);
+
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap);
+static int subject_find_special_char(subject *subj);
+
+static unsigned char *cmark_clean_autolink(chunk *url, int is_email)
+{
+	strbuf buf = GH_BUF_INIT;
+
+	chunk_trim(url);
+
+	if (url->len == 0)
+		return NULL;
+
+	if (is_email)
+		strbuf_puts(&buf, "mailto:");
+
+	houdini_unescape_html_f(&buf, url->data, url->len);
+	return strbuf_detach(&buf);
+}
+
+static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsigned char *title)
+{
+	cmark_node* e = (cmark_node *)calloc(1, sizeof(*e));
+	if(e != NULL) {
+		e->type = CMARK_NODE_LINK;
+		e->first_child   = label;
+                e->last_child    = label;
+		e->as.link.url   = url;
+		e->as.link.title = title;
+		e->next = NULL;
+                label->parent = e;
+	}
+	return e;
+}
+
+static inline cmark_node* make_autolink(cmark_node* label, cmark_chunk url, int is_email)
+{
+	return make_link(label, cmark_clean_autolink(&url, is_email), NULL);
+}
+
+// Setting 'last_child' and the parent of 'contents' is up to the caller.
+static inline cmark_node* make_inlines(cmark_node_type t, cmark_node* contents)
+{
+	cmark_node * e = (cmark_node *)calloc(1, sizeof(*e));
+	if(e != NULL) {
+		e->type = t;
+		e->first_child = contents;
+		e->next = NULL;
+                e->prev = NULL;
+                e->parent = NULL;
+                // These fields aren't used for inlines:
+                e->start_line = 0;
+                e->start_column = 0;
+                e->end_line = 0;
+	}
+	return e;
+}
+
+// Create an inline with a literal string value.
+static inline cmark_node* make_literal(cmark_node_type t, cmark_chunk s)
+{
+	cmark_node * e = (cmark_node *)calloc(1, sizeof(*e));
+	if(e != NULL) {
+		e->type = t;
+		e->as.literal = s;
+		e->next = NULL;
+                e->prev = NULL;
+                e->parent = NULL;
+                e->first_child = NULL;
+                e->last_child = NULL;
+                // These fields aren't used for inlines:
+                e->start_line = 0;
+                e->start_column = 0;
+                e->end_line = 0;
+	}
+	return e;
+}
+
+// Create an inline with no value.
+static inline cmark_node* make_simple(cmark_node_type t)
+{
+	cmark_node* e = (cmark_node *)calloc(1, sizeof(*e));
+	if(e != NULL) {
+		e->type = t;
+		e->next = NULL;
+                e->prev = NULL;
+                e->parent = NULL;
+                e->first_child = NULL;
+                e->last_child = NULL;
+                // These fields aren't used for inlines:
+                e->start_line = 0;
+                e->start_column = 0;
+                e->end_line = 0;
+	}
+	return e;
+}
+
+static unsigned char *bufdup(const unsigned char *buf)
+{
+	unsigned char *new_buf = NULL;
+
+	if (buf) {
+		int len = strlen((char *)buf);
+		new_buf = (unsigned char *)calloc(len + 1, sizeof(*new_buf));
+		if(new_buf != NULL) {
+			memcpy(new_buf, buf, len + 1);
+		}
+	}
+
+	return new_buf;
+}
+
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
+{
+	e->input.data = buffer->ptr;
+	e->input.len = buffer->size;
+	e->input.alloc = 0;
+	e->pos = 0;
+	e->refmap = refmap;
+	e->delimiters = NULL;
+
+	chunk_rtrim(&e->input);
+}
+
+static inline int isbacktick(int c)
+{
+	return (c == '`');
+}
+
+static inline unsigned char peek_char(subject *subj)
+{
+	return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
+}
+
+static inline unsigned char peek_at(subject *subj, int pos)
+{
+	return subj->input.data[pos];
+}
+
+// Return true if there are more characters in the subject.
+static inline int is_eof(subject* subj)
+{
+	return (subj->pos >= subj->input.len);
+}
+
+// Advance the subject.  Doesn't check for eof.
+#define advance(subj) (subj)->pos += 1
+
+// Take characters while a predicate holds, and return a string.
+static inline chunk take_while(subject* subj, int (*f)(int))
+{
+	unsigned char c;
+	int startpos = subj->pos;
+	int len = 0;
+
+	while ((c = peek_char(subj)) && (*f)(c)) {
+		advance(subj);
+		len++;
+	}
+
+	return chunk_dup(&subj->input, startpos, len);
+}
+
+// Try to process a backtick code span that began with a
+// span of ticks of length openticklength length (already
+// parsed).  Return 0 if you don't find matching closing
+// backticks, otherwise return the position in the subject
+// after the closing backticks.
+static int scan_to_closing_backticks(subject* subj, int openticklength)
+{
+	// read non backticks
+	unsigned char c;
+	while ((c = peek_char(subj)) && c != '`') {
+		advance(subj);
+	}
+	if (is_eof(subj)) {
+		return 0;  // did not find closing ticks, return 0
+	}
+	int numticks = 0;
+	while (peek_char(subj) == '`') {
+		advance(subj);
+		numticks++;
+	}
+	if (numticks != openticklength){
+		return(scan_to_closing_backticks(subj, openticklength));
+	}
+	return (subj->pos);
+}
+
+// Parse backtick code section or raw backticks, return an inline.
+// Assumes that the subject has a backtick at the current position.
+static cmark_node* handle_backticks(subject *subj)
+{
+	chunk openticks = take_while(subj, isbacktick);
+	int startpos = subj->pos;
+	int endpos = scan_to_closing_backticks(subj, openticks.len);
+
+	if (endpos == 0) { // not found
+		subj->pos = startpos; // rewind
+		return make_str(openticks);
+	} else {
+		strbuf buf = GH_BUF_INIT;
+
+		strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
+		strbuf_trim(&buf);
+		strbuf_normalize_whitespace(&buf);
+
+		return make_code(chunk_buf_detach(&buf));
+	}
+}
+
+// Scan ***, **, or * and return number scanned, or 0.
+// Advances position.
+static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
+{
+	int numdelims = 0;
+	unsigned char char_before, char_after;
+
+	char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1);
+	while (peek_char(subj) == c) {
+		numdelims++;
+		advance(subj);
+	}
+	char_after = peek_char(subj);
+	*can_open = numdelims > 0 && !isspace(char_after);
+	*can_close = numdelims > 0 && !isspace(char_before);
+	if (c == '_') {
+		*can_open = *can_open && !isalnum(char_before);
+		*can_close = *can_close && !isalnum(char_after);
+	}
+	return numdelims;
+}
+
+/*
+static void print_delimiters(subject *subj)
+{
+	delimiter_stack *tempstack;
+	tempstack = subj->delimiters;
+	while (tempstack != NULL) {
+		printf("Item at %p: %d %d %d %d next(%p) prev(%p)\n",
+		       tempstack, tempstack->delim_count, tempstack->delim_char,
+		       tempstack->can_open, tempstack->can_close,
+		       tempstack->next, tempstack->previous);
+		tempstack = tempstack->previous;
+	}
+}
+*/
+
+static void remove_delimiter(subject *subj, delimiter_stack *stack)
+{
+	if (stack->previous != NULL) {
+		stack->previous->next = stack->next;
+	}
+	if (stack->next == NULL) {
+		// top of stack
+		subj->delimiters = stack->previous;
+	} else {
+		stack->next->previous = stack->previous;
+	}
+	free(stack);
+}
+
+static delimiter_stack * push_delimiter(subject *subj,
+					int numdelims,
+					unsigned char c,
+					bool can_open,
+					bool can_close,
+					cmark_node *inl_text)
+{
+	delimiter_stack *istack =
+		(delimiter_stack*)malloc(sizeof(delimiter_stack));
+	if (istack == NULL) {
+		return NULL;
+	}
+	istack->delim_count = numdelims;
+	istack->delim_char = c;
+	istack->can_open = can_open;
+	istack->can_close = can_close;
+	istack->first_inline = inl_text;
+	istack->previous = subj->delimiters;
+	istack->next = NULL;
+	if (istack->previous != NULL) {
+		istack->previous->next = istack;
+	}
+	istack->position = subj->pos;
+	return istack;
+}
+
+// Parse strong/emph or a fallback.
+// Assumes the subject has '_' or '*' at the current position.
+static cmark_node* handle_strong_emph(subject* subj, unsigned char c)
+{
+	int numdelims;
+	cmark_node * inl_text;
+	bool can_open, can_close;
+
+	numdelims = scan_delims(subj, c, &can_open, &can_close);
+
+	inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
+
+	if (can_open || can_close) {
+		subj->delimiters = push_delimiter(subj, numdelims, c, can_open, can_close,
+						  inl_text);
+	}
+
+	return inl_text;
+}
+
+static void process_emphasis(subject *subj, delimiter_stack *stack_bottom)
+{
+	delimiter_stack *closer = subj->delimiters;
+	delimiter_stack *opener, *tempstack, *nextstack;
+	int use_delims;
+	cmark_node *inl, *tmp, *emph;
+
+	// move back to first relevant delim.
+	while (closer != NULL && closer->previous != stack_bottom) {
+		closer = closer->previous;
+	}
+
+	// now move forward, looking for closers, and handling each
+	while (closer != NULL) {
+		if (closer->can_close &&
+		    (closer->delim_char == '*' || closer->delim_char == '_')) {
+			// Now look backwards for first matching opener:
+			opener = closer->previous;
+			while (opener != NULL && opener != stack_bottom) {
+				if (opener->delim_char == closer->delim_char &&
+				    opener->can_open) {
+					break;
+				}
+				opener = opener->previous;
+			}
+			if (opener != NULL && opener != stack_bottom) {
+				// calculate the actual number of delimeters used from this closer
+				if (closer->delim_count < 3 || opener->delim_count < 3) {
+					use_delims = closer->delim_count <= opener->delim_count ?
+						closer->delim_count : opener->delim_count;
+				} else { // closer and opener both have >= 3 delims
+					use_delims = closer->delim_count % 2 == 0 ? 2 : 1;
+				}
+
+				inl = opener->first_inline;
+
+				// remove used delimiters from stack elements and associated inlines.
+				opener->delim_count -= use_delims;
+				closer->delim_count -= use_delims;
+				inl->as.literal.len = opener->delim_count;
+				closer->first_inline->as.literal.len = closer->delim_count;
+
+				// free delimiters between opener and closer
+				tempstack = closer->previous;
+				while (tempstack != NULL && tempstack != opener) {
+					nextstack = tempstack->previous;
+					remove_delimiter(subj, tempstack);
+					tempstack = nextstack;
+				}
+
+				// create new emph or strong, and splice it in to our inlines
+				// between the opener and closer
+				emph = use_delims == 1 ? make_emph(inl->next) : make_strong(inl->next);
+				emph->next = closer->first_inline;
+				emph->prev = inl;
+				emph->parent = inl->parent;
+				inl->next = emph;
+
+				// if opener has 0 delims, remove it and its associated inline
+				if (opener->delim_count == 0) {
+					// replace empty opener inline with emph
+					chunk_free(&(inl->as.literal));
+					inl->type = emph->type;
+					inl->next = emph->next;
+					inl->first_child = emph->first_child;
+					free(emph);
+					emph = inl;
+					// remove opener from stack
+					remove_delimiter(subj, opener);
+				}
+
+				// fix tree structure
+				tmp = emph->first_child;
+				while (tmp->next != NULL && tmp->next != closer->first_inline) {
+					tmp->parent = emph;
+					tmp = tmp->next;
+				}
+				tmp->parent = emph;
+				if (tmp->next) {
+					tmp->next->prev = emph;
+				}
+				tmp->next = NULL;
+				emph->last_child = tmp;
+
+				// if closer has 0 delims, remove it and its associated inline
+				if (closer->delim_count == 0) {
+					// remove empty closer inline
+					tmp = closer->first_inline;
+					emph->next = tmp->next;
+					if (tmp->next) {
+						tmp->next->prev = emph;
+					}
+					cmark_node_unlink(tmp);
+					cmark_free_nodes(tmp);
+					// remove closer from stack
+					tempstack = closer->next;
+					remove_delimiter(subj, closer);
+					closer = tempstack;
+				}
+			} else {
+				closer = closer->next;
+			}
+		} else {
+			closer = closer->next;
+		}
+	}
+	// free all delimiters in stack down to stack_bottom:
+	while (subj->delimiters != stack_bottom) {
+		remove_delimiter(subj, subj->delimiters);
+	}
+}
+
+// Parse backslash-escape or just a backslash, returning an inline.
+static cmark_node* handle_backslash(subject *subj)
+{
+	advance(subj);
+	unsigned char nextchar = peek_char(subj);
+	if (ispunct(nextchar)) {  // only ascii symbols and newline can be escaped
+		advance(subj);
+		return make_str(chunk_dup(&subj->input, subj->pos - 1, 1));
+	} else if (nextchar == '\n') {
+		advance(subj);
+		return make_linebreak();
+	} else {
+		return make_str(chunk_literal("\\"));
+	}
+}
+
+// Parse an entity or a regular "&" string.
+// Assumes the subject has an '&' character at the current position.
+static cmark_node* handle_entity(subject* subj)
+{
+	strbuf ent = GH_BUF_INIT;
+	size_t len;
+
+	advance(subj);
+
+	len = houdini_unescape_ent(&ent,
+				   subj->input.data + subj->pos,
+				   subj->input.len - subj->pos
+				   );
+
+	if (len == 0)
+		return make_str(chunk_literal("&"));
+
+	subj->pos += len;
+	return make_str(chunk_buf_detach(&ent));
+}
+
+// Like make_str, but parses entities.
+// Returns an inline sequence consisting of str and entity elements.
+static cmark_node *make_str_with_entities(chunk *content)
+{
+	strbuf unescaped = GH_BUF_INIT;
+
+	if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
+		return make_str(chunk_buf_detach(&unescaped));
+	} else {
+		return make_str(*content);
+	}
+}
+
+// Clean a URL: remove surrounding whitespace and surrounding <>,
+// and remove \ that escape punctuation.
+unsigned char *clean_url(chunk *url)
+{
+	strbuf buf = GH_BUF_INIT;
+
+	chunk_trim(url);
+
+	if (url->len == 0)
+		return NULL;
+
+	if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
+		houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
+	} else {
+		houdini_unescape_html_f(&buf, url->data, url->len);
+	}
+
+	strbuf_unescape(&buf);
+	return strbuf_detach(&buf);
+}
+
+unsigned char *clean_title(chunk *title)
+{
+       strbuf buf = GH_BUF_INIT;
+       unsigned char first, last;
+
+       if (title->len == 0)
+               return NULL;
+
+       first = title->data[0];
+       last = title->data[title->len - 1];
+
+       // remove surrounding quotes if any:
+       if ((first == '\'' && last == '\'') ||
+           (first == '(' && last == ')') ||
+           (first == '"' && last == '"')) {
+               houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
+       } else {
+               houdini_unescape_html_f(&buf, title->data, title->len);
+       }
+
+       strbuf_unescape(&buf);
+       return strbuf_detach(&buf);
+}
+
+// Parse an autolink or HTML tag.
+// Assumes the subject has a '<' character at the current position.
+static cmark_node* handle_pointy_brace(subject* subj)
+{
+	int matchlen = 0;
+	chunk contents;
+
+	advance(subj);  // advance past first <
+
+	// first try to match a URL autolink
+	matchlen = scan_autolink_uri(&subj->input, subj->pos);
+	if (matchlen > 0) {
+		contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
+		subj->pos += matchlen;
+
+		return make_autolink(
+				     make_str_with_entities(&contents),
+				     contents, 0
+				     );
+	}
+
+	// next try to match an email autolink
+	matchlen = scan_autolink_email(&subj->input, subj->pos);
+	if (matchlen > 0) {
+		contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
+		subj->pos += matchlen;
+
+		return make_autolink(
+				     make_str_with_entities(&contents),
+				     contents, 1
+				     );
+	}
+
+	// finally, try to match an html tag
+	matchlen = scan_html_tag(&subj->input, subj->pos);
+	if (matchlen > 0) {
+		contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
+		subj->pos += matchlen;
+		return make_raw_html(contents);
+	}
+
+	// if nothing matches, just return the opening <:
+	return make_str(chunk_literal("<"));
+}
+
+// Parse a link label.  Returns 1 if successful.
+// Note:  unescaped brackets are not allowed in labels.
+// The label begins with `[` and ends with the first `]` character
+// encountered.  Backticks in labels do not start code spans.
+static int link_label(subject* subj, chunk *raw_label)
+{
+	int startpos = subj->pos;
+	int length = 0;
+
+	advance(subj);  // advance past [
+	unsigned char c;
+	while ((c = peek_char(subj)) && c != '[' && c != ']') {
+		if (c == '\\') {
+			advance(subj);
+			length++;
+			if (ispunct(peek_char(subj))) {
+				advance(subj);
+				length++;
+			}
+		} else {
+			advance(subj);
+			length++;
+		}
+		if (length > MAX_LINK_LABEL_LENGTH) {
+			goto noMatch;
+		}
+	}
+
+	if (c == ']') { // match found
+		*raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
+		advance(subj);  // advance past ]
+		return 1;
+	}
+
+ noMatch:
+	subj->pos = startpos; // rewind
+	return 0;
+
+}
+
+// Return a link, an image, or a literal close bracket.
+static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
+{
+	int initial_pos;
+	int starturl, endurl, starttitle, endtitle, endall;
+	int n;
+	int sps;
+	reference *ref;
+	bool is_image = false;
+	chunk urlchunk, titlechunk;
+	unsigned char *url, *title;
+	delimiter_stack *opener;
+	delimiter_stack *tempstack;
+	cmark_node *link_text;
+	cmark_node *inl;
+	chunk raw_label;
+	int found_label;
+
+	advance(subj);  // advance past ]
+	initial_pos = subj->pos;
+
+	// look through stack of delimiters for a [ or !
+	opener = subj->delimiters;
+	while (opener) {
+		if (opener->delim_char == '[' || opener->delim_char == '!') {
+			break;
+		}
+		opener = opener->previous;
+	}
+
+	if (opener == NULL) {
+		return make_str(chunk_literal("]"));
+	}
+
+	// If we got here, we matched a potential link/image text.
+	is_image = opener->delim_char == '!';
+	link_text = opener->first_inline->next;
+
+	// Now we check to see if it's a link/image.
+
+	// First, look for an inline link.
+	if (peek_char(subj) == '(' &&
+	    ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
+	    ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
+
+		// try to parse an explicit link:
+		starturl = subj->pos + 1 + sps; // after (
+		endurl = starturl + n;
+		starttitle = endurl + scan_spacechars(&subj->input, endurl);
+
+		// ensure there are spaces btw url and title
+		endtitle = (starttitle == endurl) ? starttitle :
+			starttitle + scan_link_title(&subj->input, starttitle);
+
+		endall = endtitle + scan_spacechars(&subj->input, endtitle);
+
+		if (peek_at(subj, endall) == ')') {
+			subj->pos = endall + 1;
+
+			urlchunk = chunk_dup(&subj->input, starturl, endurl - starturl);
+			titlechunk = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
+			url = clean_url(&urlchunk);
+			title = clean_title(&titlechunk);
+			chunk_free(&urlchunk);
+			chunk_free(&titlechunk);
+			goto match;
+
+		} else {
+			goto noMatch;
+		}
+	}
+
+	// Next, look for a following [link label] that matches in refmap.
+	// skip spaces
+	subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos);
+	raw_label = chunk_literal("");
+	found_label = link_label(subj, &raw_label);
+	if (!found_label || raw_label.len == 0) {
+		chunk_free(&raw_label);
+		raw_label = chunk_dup(&subj->input, opener->position,
+				      initial_pos - opener->position - 1);
+	}
+
+	if (!found_label) {
+		// If we have a shortcut reference link, back up
+		// to before the spacse we skipped.
+		subj->pos = initial_pos;
+	}
+
+	ref = reference_lookup(subj->refmap, &raw_label);
+	chunk_free(&raw_label);
+
+	if (ref != NULL) { // found
+		url = bufdup(ref->url);
+		title = bufdup(ref->title);
+		goto match;
+	} else {
+		goto noMatch;
+	}
+
+noMatch:
+	// If we fall through to here, it means we didn't match a link:
+	remove_delimiter(subj, opener);  // remove this opener from delimiter stack
+	subj->pos = initial_pos;
+	return make_str(chunk_literal("]"));
+
+match:
+	inl = opener->first_inline;
+	inl->type = is_image ? NODE_IMAGE : NODE_LINK;
+	chunk_free(&inl->as.literal);
+	inl->first_child = link_text;
+	process_emphasis(subj, opener->previous);
+	inl->as.link.url   = url;
+	inl->as.link.title = title;
+	inl->next = NULL;
+	if (link_text) {
+		cmark_node *tmp;
+		link_text->prev = NULL;
+		for (tmp = link_text; tmp->next != NULL; tmp = tmp->next) {
+			tmp->parent = inl;
+		}
+		tmp->parent = inl;
+		inl->last_child = tmp;
+	}
+	parent->last_child = inl;
+
+	// process_emphasis will remove this delimiter and all later ones.
+	// Now, if we have a link, we also want to remove earlier link
+        // delimiters. (This code can be removed if we decide to allow links
+	// inside links.)
+	if (!is_image) {
+		opener = subj->delimiters;
+		while (opener != NULL) {
+			tempstack = opener->previous;
+			if (opener->delim_char == '[') {
+				remove_delimiter(subj, opener);
+			}
+			opener = tempstack;
+		}
+	}
+
+	return NULL;
+}
+
+// Parse a hard or soft linebreak, returning an inline.
+// Assumes the subject has a newline at the current position.
+static cmark_node* handle_newline(subject *subj)
+{
+	int nlpos = subj->pos;
+	// skip over newline
+	advance(subj);
+	// skip spaces at beginning of line
+	while (peek_char(subj) == ' ') {
+		advance(subj);
+	}
+	if (nlpos > 1 &&
+	    peek_at(subj, nlpos - 1) == ' ' &&
+	    peek_at(subj, nlpos - 2) == ' ') {
+		return make_linebreak();
+	} else {
+		return make_softbreak();
+	}
+}
+
+static int subject_find_special_char(subject *subj)
+{
+	// "\n\\`&_*[]<!"
+	static const int8_t SPECIAL_CHARS[256] = {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
+		1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+	int n = subj->pos + 1;
+
+	while (n < subj->input.len) {
+		if (SPECIAL_CHARS[subj->input.data[n]])
+			return n;
+		n++;
+	}
+
+	return subj->input.len;
+}
+
+// Parse an inline, advancing subject, and add it as a child of parent.
+// Return 0 if no inline can be parsed, 1 otherwise.
+static int parse_inline(subject* subj, cmark_node * parent)
+{
+	cmark_node* new_inl = NULL;
+	chunk contents;
+	unsigned char c;
+	int endpos;
+	c = peek_char(subj);
+	if (c == 0) {
+		return 0;
+	}
+	switch(c){
+	case '\n':
+		new_inl = handle_newline(subj);
+		break;
+	case '`':
+		new_inl = handle_backticks(subj);
+		break;
+	case '\\':
+		new_inl = handle_backslash(subj);
+		break;
+	case '&':
+		new_inl = handle_entity(subj);
+		break;
+	case '<':
+		new_inl = handle_pointy_brace(subj);
+		break;
+	case '*':
+	case '_':
+		new_inl = handle_strong_emph(subj, c);
+		break;
+	case '[':
+		advance(subj);
+		new_inl = make_str(chunk_literal("["));
+		subj->delimiters = push_delimiter(subj, 1, '[', true, false, new_inl);
+		break;
+	case ']':
+		new_inl = handle_close_bracket(subj, parent);
+		break;
+	case '!':
+		advance(subj);
+		if (peek_char(subj) == '[') {
+			advance(subj);
+			new_inl = make_str(chunk_literal("!["));
+			subj->delimiters = push_delimiter(subj, 1, '!', false, true, new_inl);
+		} else {
+			new_inl = make_str(chunk_literal("!"));
+		}
+		break;
+	default:
+		endpos = subject_find_special_char(subj);
+		contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
+		subj->pos = endpos;
+
+		// if we're at a newline, strip trailing spaces.
+		if (peek_char(subj) == '\n') {
+			chunk_rtrim(&contents);
+		}
+
+		new_inl = make_str(contents);
+	}
+	if (new_inl != NULL) {
+		cmark_node_append_child(parent, new_inl);
+	}
+
+	return 1;
+}
+
+// Parse inlines from parent's string_content, adding as children of parent.
+extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap)
+{
+	subject subj;
+	subject_from_buf(&subj, &parent->string_content, refmap);
+
+	while (!is_eof(&subj) && parse_inline(&subj, parent)) ;
+
+	process_emphasis(&subj, NULL);
+}
+
+// Parse zero or more space characters, including at most one newline.
+static void spnl(subject* subj)
+{
+	bool seen_newline = false;
+	while (peek_char(subj) == ' ' ||
+	       (!seen_newline &&
+		(seen_newline = peek_char(subj) == '\n'))) {
+		advance(subj);
+	}
+}
+
+// Parse reference.  Assumes string begins with '[' character.
+// Modify refmap if a reference is encountered.
+// Return 0 if no reference found, otherwise position of subject
+// after reference is parsed.
+int parse_reference_inline(strbuf *input, reference_map *refmap)
+{
+	subject subj;
+
+	chunk lab;
+	chunk url;
+	chunk title;
+
+	int matchlen = 0;
+	int beforetitle;
+
+	subject_from_buf(&subj, input, NULL);
+
+	// parse label:
+	if (!link_label(&subj, &lab))
+		return 0;
+
+	// colon:
+	if (peek_char(&subj) == ':') {
+		advance(&subj);
+	} else {
+		return 0;
+	}
+
+	// parse link url:
+	spnl(&subj);
+	matchlen = scan_link_url(&subj.input, subj.pos);
+	if (matchlen) {
+		url = chunk_dup(&subj.input, subj.pos, matchlen);
+		subj.pos += matchlen;
+	} else {
+		return 0;
+	}
+
+	// parse optional link_title
+	beforetitle = subj.pos;
+	spnl(&subj);
+	matchlen = scan_link_title(&subj.input, subj.pos);
+	if (matchlen) {
+		title = chunk_dup(&subj.input, subj.pos, matchlen);
+		subj.pos += matchlen;
+	} else {
+		subj.pos = beforetitle;
+		title = chunk_literal("");
+	}
+	// parse final spaces and newline:
+	while (peek_char(&subj) == ' ') {
+		advance(&subj);
+	}
+	if (peek_char(&subj) == '\n') {
+		advance(&subj);
+	} else if (peek_char(&subj) != 0) {
+		return 0;
+	}
+	// insert reference into refmap
+	reference_create(refmap, &lab, &url, &title);
+	return subj.pos;
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/inlines.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/inlines.h b/compiler/modules/CommonMark/src/inlines.h
new file mode 100644
index 0000000..92b3b7a
--- /dev/null
+++ b/compiler/modules/CommonMark/src/inlines.h
@@ -0,0 +1,26 @@
+#ifndef CMARK_INLINES_H
+#define CMARK_INLINES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned char *cmark_clean_url(cmark_chunk *url);
+unsigned char *cmark_clean_title(cmark_chunk *title);
+
+void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap);
+
+int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
+
+#ifndef CMARK_NO_SHORT_NAMES
+  #define parse_inlines             cmark_parse_inlines
+  #define parse_reference_inline    cmark_parse_reference_inline
+  #define clean_url                 cmark_clean_url
+  #define clean_title               cmark_clean_title
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/node.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/node.c b/compiler/modules/CommonMark/src/node.c
new file mode 100644
index 0000000..35e19d2
--- /dev/null
+++ b/compiler/modules/CommonMark/src/node.c
@@ -0,0 +1,657 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "config.h"
+#include "node.h"
+
+static void
+S_node_unlink(cmark_node *node);
+
+cmark_node*
+cmark_node_new(cmark_node_type type) {
+	cmark_node *node = (cmark_node *)calloc(1, sizeof(*node));
+	node->type = type;
+
+	switch (node->type) {
+	case CMARK_NODE_ATX_HEADER:
+	case CMARK_NODE_SETEXT_HEADER:
+		node->as.header.level = 1;
+		break;
+
+	case CMARK_NODE_LIST: {
+		cmark_list *list = &node->as.list;
+		list->list_type = CMARK_BULLET_LIST;
+		list->start     = 1;
+		list->tight     = false;
+		break;
+	}
+
+	default:
+		break;
+	}
+
+	return node;
+}
+
+void
+cmark_node_destroy(cmark_node *node) {
+	S_node_unlink(node);
+	node->next = NULL;
+	cmark_free_nodes(node);
+}
+
+cmark_node_type
+cmark_node_get_type(cmark_node *node)
+{
+	return node->type;
+}
+
+static const char*
+S_type_string(cmark_node *node)
+{
+	switch (node->type) {
+	case CMARK_NODE_DOCUMENT:      return "DOCUMENT";
+	case CMARK_NODE_BLOCK_QUOTE:   return "BLOCK_QUOTE";
+	case CMARK_NODE_LIST:          return "LIST";
+	case CMARK_NODE_LIST_ITEM:     return "LIST_ITEM";
+	case CMARK_NODE_FENCED_CODE:   return "FENCED_CODE";
+	case CMARK_NODE_INDENTED_CODE: return "INDENTED_CODE";
+	case CMARK_NODE_HTML:          return "HTML";
+	case CMARK_NODE_PARAGRAPH:     return "PARAGRAPH";
+	case CMARK_NODE_ATX_HEADER:    return "ATX_HEADER";
+	case CMARK_NODE_SETEXT_HEADER: return "SETEXT_HEADER";
+	case CMARK_NODE_HRULE:         return "HRULE";
+	case CMARK_NODE_REFERENCE_DEF: return "REFERENCE_DEF";
+	case CMARK_NODE_STRING:        return "STRING";
+	case CMARK_NODE_SOFTBREAK:     return "SOFTBREAK";
+	case CMARK_NODE_LINEBREAK:     return "LINEBREAK";
+	case CMARK_NODE_INLINE_CODE:   return "INLINE_CODE";
+	case CMARK_NODE_INLINE_HTML:   return "INLINE_HTML";
+	case CMARK_NODE_EMPH:          return "EMPH";
+	case CMARK_NODE_STRONG:        return "STRONG";
+	case CMARK_NODE_LINK:          return "LINK";
+	case CMARK_NODE_IMAGE:         return "IMAGE";
+	}
+
+	return "<unknown>";
+}
+
+cmark_node*
+cmark_node_next(cmark_node *node)
+{
+	return node->next;
+}
+
+cmark_node*
+cmark_node_previous(cmark_node *node)
+{
+	return node->prev;
+}
+
+cmark_node*
+cmark_node_parent(cmark_node *node)
+{
+	return node->parent;
+}
+
+cmark_node*
+cmark_node_first_child(cmark_node *node)
+{
+	return node->first_child;
+}
+
+cmark_node*
+cmark_node_last_child(cmark_node *node)
+{
+	return node->last_child;
+}
+
+static char*
+S_strdup(const char *str) {
+	size_t size = strlen(str) + 1;
+	char *dup = (char *)malloc(size);
+	memcpy(dup, str, size);
+	return dup;
+}
+
+const char*
+cmark_node_get_string_content(cmark_node *node) {
+	switch (node->type) {
+	case NODE_INDENTED_CODE:
+	case NODE_FENCED_CODE:
+	case NODE_HTML:
+		return cmark_strbuf_cstr(&node->string_content);
+
+	case NODE_STRING:
+	case NODE_INLINE_HTML:
+	case NODE_INLINE_CODE:
+		return cmark_chunk_to_cstr(&node->as.literal);
+
+	default:
+		break;
+	}
+
+	return NULL;
+}
+
+int
+cmark_node_set_string_content(cmark_node *node, const char *content) {
+	switch (node->type) {
+	case NODE_INDENTED_CODE:
+	case NODE_FENCED_CODE:
+	case NODE_HTML:
+		cmark_strbuf_sets(&node->string_content, content);
+		return 1;
+
+	case NODE_STRING:
+	case NODE_INLINE_HTML:
+	case NODE_INLINE_CODE:
+		cmark_chunk_set_cstr(&node->as.literal, content);
+		return 1;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int
+cmark_node_get_header_level(cmark_node *node) {
+	switch (node->type) {
+	case CMARK_NODE_ATX_HEADER:
+	case CMARK_NODE_SETEXT_HEADER:
+		return node->as.header.level;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int
+cmark_node_set_header_level(cmark_node *node, int level) {
+	if (level < 1 || level > 6) {
+		return 0;
+	}
+
+	switch (node->type) {
+	case CMARK_NODE_ATX_HEADER:
+	case CMARK_NODE_SETEXT_HEADER:
+		node->as.header.level = level;
+		return 1;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+cmark_list_type
+cmark_node_get_list_type(cmark_node *node) {
+	if (node->type == CMARK_NODE_LIST) {
+		return node->as.list.list_type;
+	}
+	else {
+		return CMARK_NO_LIST;
+	}
+}
+
+int
+cmark_node_set_list_type(cmark_node *node, cmark_list_type type) {
+	if (!(type == CMARK_BULLET_LIST || type == CMARK_ORDERED_LIST)) {
+		return 0;
+	}
+
+	if (node->type == CMARK_NODE_LIST) {
+		node->as.list.list_type = type;
+		return 1;
+	}
+	else {
+		return 0;
+	}
+}
+
+int
+cmark_node_get_list_start(cmark_node *node) {
+	if (node->type == CMARK_NODE_LIST) {
+		return node->as.list.start;
+	}
+	else {
+		return 0;
+	}
+}
+
+int
+cmark_node_set_list_start(cmark_node *node, int start) {
+	if (start < 0) {
+		return 0;
+	}
+
+	if (node->type == CMARK_NODE_LIST) {
+		node->as.list.start = start;
+		return 1;
+	}
+	else {
+		return 0;
+	}
+}
+
+int
+cmark_node_get_list_tight(cmark_node *node) {
+	if (node->type == CMARK_NODE_LIST) {
+		return node->as.list.tight;
+	}
+	else {
+		return 0;
+	}
+}
+
+int
+cmark_node_set_list_tight(cmark_node *node, int tight) {
+	if (node->type == CMARK_NODE_LIST) {
+		node->as.list.tight = tight;
+		return 1;
+	}
+	else {
+		return 0;
+	}
+}
+
+const char*
+cmark_node_get_fence_info(cmark_node *node) {
+	if (node->type == NODE_FENCED_CODE) {
+		return cmark_strbuf_cstr(&node->as.code.info);
+	}
+	else {
+		return NULL;
+	}
+}
+
+int
+cmark_node_set_fence_info(cmark_node *node, const char *info) {
+	if (node->type == NODE_FENCED_CODE) {
+		cmark_strbuf_sets(&node->as.code.info, info);
+		return 1;
+	}
+	else {
+		return 0;
+	}
+}
+
+const char*
+cmark_node_get_url(cmark_node *node) {
+	switch (node->type) {
+	case NODE_LINK:
+	case NODE_IMAGE:
+		return (char *)node->as.link.url;
+	default:
+		break;
+	}
+
+	return NULL;
+}
+
+int
+cmark_node_set_url(cmark_node *node, const char *url) {
+	switch (node->type) {
+	case NODE_LINK:
+	case NODE_IMAGE:
+		free(node->as.link.url);
+		node->as.link.url = (unsigned char *)S_strdup(url);
+		return 1;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+const char*
+cmark_node_get_title(cmark_node *node) {
+	switch (node->type) {
+	case NODE_LINK:
+	case NODE_IMAGE:
+		return (char *)node->as.link.title;
+	default:
+		break;
+	}
+
+	return NULL;
+}
+
+int
+cmark_node_set_title(cmark_node *node, const char *title) {
+	switch (node->type) {
+	case NODE_LINK:
+	case NODE_IMAGE:
+		free(node->as.link.title);
+		node->as.link.title = (unsigned char *)S_strdup(title);
+		return 1;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int
+cmark_node_get_start_line(cmark_node *node) {
+	return node->start_line;
+}
+
+int
+cmark_node_get_start_column(cmark_node *node) {
+	return node->start_column;
+}
+
+int
+cmark_node_get_end_line(cmark_node *node) {
+	return node->end_line;
+}
+
+static inline bool
+S_is_block(cmark_node *node) {
+	return node->type >= CMARK_NODE_FIRST_BLOCK
+	       && node->type <= CMARK_NODE_LAST_BLOCK;
+}
+
+static inline bool
+S_is_inline(cmark_node *node) {
+	return node->type >= CMARK_NODE_FIRST_INLINE
+	       && node->type <= CMARK_NODE_LAST_INLINE;
+}
+
+static bool
+S_can_contain(cmark_node *node, cmark_node *child)
+{
+	cmark_node *cur;
+
+	// Verify that child is not an ancestor of node or equal to node.
+	cur = node;
+	do {
+		if (cur == child) {
+			return false;
+		}
+		cur = cur->parent;
+	} while (cur != NULL);
+
+	if (child->type == CMARK_NODE_DOCUMENT) {
+		return false;
+	}
+
+	switch (node->type) {
+	case CMARK_NODE_DOCUMENT:
+	case CMARK_NODE_BLOCK_QUOTE:
+	case CMARK_NODE_LIST_ITEM:
+		return S_is_block(child)
+		       && child->type != CMARK_NODE_LIST_ITEM;
+
+	case CMARK_NODE_LIST:
+		return child->type == CMARK_NODE_LIST_ITEM;
+
+	case CMARK_NODE_PARAGRAPH:
+	case CMARK_NODE_ATX_HEADER:
+	case CMARK_NODE_SETEXT_HEADER:
+	case CMARK_NODE_EMPH:
+	case CMARK_NODE_STRONG:
+	case CMARK_NODE_LINK:
+	case CMARK_NODE_IMAGE:
+		return S_is_inline(child);
+
+	default:
+		break;
+	}
+
+	return false;
+}
+
+// Unlink a node without adjusting its next, prev, and parent pointers.
+static void
+S_node_unlink(cmark_node *node)
+{
+	if (node->prev) {
+		node->prev->next = node->next;
+	}
+	if (node->next) {
+		node->next->prev = node->prev;
+	}
+
+	// Adjust first_child and last_child of parent.
+	cmark_node *parent = node->parent;
+	if (parent) {
+		if (parent->first_child == node) {
+			parent->first_child = node->next;
+		}
+		if (parent->last_child == node) {
+			parent->last_child = node->prev;
+		}
+	}
+}
+
+void
+cmark_node_unlink(cmark_node *node) {
+	S_node_unlink(node);
+
+	node->next   = NULL;
+	node->prev   = NULL;
+	node->parent = NULL;
+
+}
+
+int
+cmark_node_insert_before(cmark_node *node, cmark_node *sibling)
+{
+	if (!S_can_contain(node->parent, sibling)) {
+		return 0;
+	}
+
+	S_node_unlink(sibling);
+
+	cmark_node *old_prev = node->prev;
+
+	// Insert 'sibling' between 'old_prev' and 'node'.
+	if (old_prev) {
+		old_prev->next = sibling;
+	}
+	sibling->prev = old_prev;
+	sibling->next = node;
+	node->prev    = sibling;
+
+	// Set new parent.
+	cmark_node *parent = node->parent;
+	sibling->parent = parent;
+
+	// Adjust first_child of parent if inserted as first child.
+	if (parent && !old_prev) {
+		parent->first_child = sibling;
+	}
+
+	return 1;
+}
+
+int
+cmark_node_insert_after(cmark_node *node, cmark_node *sibling)
+{
+	if (!S_can_contain(node->parent, sibling)) {
+		return 0;
+	}
+
+	S_node_unlink(sibling);
+
+	cmark_node *old_next = node->next;
+
+	// Insert 'sibling' between 'node' and 'old_next'.
+	if (old_next) {
+		old_next->prev = sibling;
+	}
+	sibling->next = old_next;
+	sibling->prev = node;
+	node->next    = sibling;
+
+	// Set new parent.
+	cmark_node *parent = node->parent;
+	sibling->parent = parent;
+
+	// Adjust last_child of parent if inserted as last child.
+	if (parent && !old_next) {
+		parent->last_child = sibling;
+	}
+
+	return 1;
+}
+
+int
+cmark_node_prepend_child(cmark_node *node, cmark_node *child)
+{
+	if (!S_can_contain(node, child)) {
+		return 0;
+	}
+
+	S_node_unlink(child);
+
+	cmark_node *old_first_child = node->first_child;
+
+	child->next       = old_first_child;
+	child->prev       = NULL;
+	child->parent     = node;
+	node->first_child = child;
+
+	if (old_first_child) {
+		old_first_child->prev = child;
+	}
+	else {
+		// Also set last_child if node previously had no children.
+		node->last_child = child;
+	}
+
+	return 1;
+}
+
+int
+cmark_node_append_child(cmark_node *node, cmark_node *child)
+{
+	if (!S_can_contain(node, child)) {
+		return 0;
+	}
+
+	S_node_unlink(child);
+
+	cmark_node *old_last_child = node->last_child;
+
+	child->next      = NULL;
+	child->prev      = old_last_child;
+	child->parent    = node;
+	node->last_child = child;
+
+	if (old_last_child) {
+		old_last_child->next = child;
+	}
+	else {
+		// Also set first_child if node previously had no children.
+		node->first_child = child;
+	}
+
+	return 1;
+}
+
+static void
+S_print_error(FILE *out, cmark_node *node, const char *elem)
+{
+	if (out == NULL) {
+		return;
+	}
+	fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem,
+		S_type_string(node), node->start_line, node->start_column);
+}
+
+int
+cmark_node_check(cmark_node *node, FILE *out)
+{
+	cmark_node *cur;
+	int errors = 0;
+
+	if (!node) {
+		return 0;
+	}
+
+	cur = node;
+	while (true) {
+		if (cur->first_child) {
+			if (cur->first_child->parent != cur) {
+				S_print_error(out, cur->first_child, "parent");
+				cur->first_child->parent = cur;
+				++errors;
+			}
+			cur = cur->first_child;
+			continue;
+		}
+
+	next_sibling:
+		if (cur == node) {
+			break;
+		}
+		if (cur->next) {
+			if (cur->next->prev != cur) {
+				S_print_error(out, cur->next, "prev");
+				cur->next->prev = cur;
+				++errors;
+			}
+			if (cur->next->parent != cur->parent) {
+				S_print_error(out, cur->next, "parent");
+				cur->next->parent = cur->parent;
+				++errors;
+			}
+			cur = cur->next;
+			continue;
+		}
+
+		if (cur->parent->last_child != cur) {
+			S_print_error(out, cur->parent, "last_child");
+			cur->parent->last_child = cur;
+			++errors;
+		}
+		cur = cur->parent;
+		goto next_sibling;
+	}
+
+	return errors;
+}
+
+// Free a cmark_node list and any children.
+void cmark_free_nodes(cmark_node *e)
+{
+	cmark_node *next;
+	while (e != NULL) {
+		strbuf_free(&e->string_content);
+		switch (e->type){
+		case NODE_FENCED_CODE:
+			strbuf_free(&e->as.code.info);
+			break;
+		case NODE_STRING:
+		case NODE_INLINE_HTML:
+		case NODE_INLINE_CODE:
+			cmark_chunk_free(&e->as.literal);
+			break;
+		case NODE_LINK:
+		case NODE_IMAGE:
+			free(e->as.link.url);
+			free(e->as.link.title);
+			break;
+		default:
+			break;
+		}
+		if (e->last_child) {
+			// Splice children into list
+			e->last_child->next = e->next;
+			e->next = e->first_child;
+		}
+		next = e->next;
+		free(e);
+		e = next;
+	}
+}
+
+

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/node.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/node.h b/compiler/modules/CommonMark/src/node.h
new file mode 100644
index 0000000..d1245a5
--- /dev/null
+++ b/compiler/modules/CommonMark/src/node.h
@@ -0,0 +1,74 @@
+#ifndef CMARK_NODE_H
+#define CMARK_NODE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+
+#include "cmark.h"
+#include "buffer.h"
+#include "chunk.h"
+
+typedef struct {
+	cmark_list_type   list_type;
+	int               marker_offset;
+	int               padding;
+	int               start;
+	cmark_delim_type  delimiter;
+	unsigned char     bullet_char;
+	bool              tight;
+} cmark_list;
+
+typedef struct {
+	int               fence_length;
+	int               fence_offset;
+	unsigned char     fence_char;
+	cmark_strbuf      info;
+} cmark_fenced_code;
+
+typedef struct {
+	int level;
+} cmark_header;
+
+typedef struct {
+	unsigned char *url;
+	unsigned char *title;
+} cmark_link;
+
+struct cmark_node {
+	cmark_node_type type;
+
+	struct cmark_node *next;
+	struct cmark_node *prev;
+	struct cmark_node *parent;
+	struct cmark_node *first_child;
+	struct cmark_node *last_child;
+
+	int start_line;
+	int start_column;
+	int end_line;
+	bool open;
+	bool last_line_blank;
+
+	cmark_strbuf string_content;
+
+	union {
+		cmark_chunk       literal;
+		cmark_list        list;
+		cmark_fenced_code code;
+		cmark_header      header;
+		cmark_link        link;
+	} as;
+};
+
+CMARK_EXPORT int
+cmark_node_check(cmark_node *node, FILE *out);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/parser.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/parser.h b/compiler/modules/CommonMark/src/parser.h
new file mode 100644
index 0000000..4bbea09
--- /dev/null
+++ b/compiler/modules/CommonMark/src/parser.h
@@ -0,0 +1,27 @@
+#ifndef CMARK_AST_H
+#define CMARK_AST_H
+
+#include <stdio.h>
+#include "node.h"
+#include "references.h"
+#include "buffer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_LINK_LABEL_LENGTH 1000
+
+struct cmark_doc_parser {
+	struct cmark_reference_map *refmap;
+	struct cmark_node* root;
+	struct cmark_node* current;
+	int line_number;
+	cmark_strbuf *curline;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/print.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/print.c b/compiler/modules/CommonMark/src/print.c
new file mode 100644
index 0000000..b1bab4b
--- /dev/null
+++ b/compiler/modules/CommonMark/src/print.c
@@ -0,0 +1,182 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "cmark.h"
+#include "node.h"
+#include "debug.h"
+
+static void print_str(const unsigned char *s, int len)
+{
+	int i;
+
+	if (len < 0)
+		len = strlen((char *)s);
+
+	putchar('"');
+	for (i = 0; i < len; ++i) {
+		unsigned char c = s[i];
+
+		switch (c) {
+		case '\n':
+			printf("\\n");
+			break;
+		case '"':
+			printf("\\\"");
+			break;
+		case '\\':
+			printf("\\\\");
+			break;
+		default:
+			putchar((int)c);
+		}
+	}
+	putchar('"');
+}
+
+// Prettyprint an inline list, for debugging.
+static void print_inlines(cmark_node* ils, int indent)
+{
+	int i;
+
+	while(ils != NULL) {
+		for (i=0; i < indent; i++) {
+			putchar(' ');
+		}
+		switch(ils->type) {
+		case NODE_STRING:
+			printf("str ");
+			print_str(ils->as.literal.data, ils->as.literal.len);
+			putchar('\n');
+			break;
+		case NODE_LINEBREAK:
+			printf("linebreak\n");
+			break;
+		case NODE_SOFTBREAK:
+			printf("softbreak\n");
+			break;
+		case NODE_INLINE_CODE:
+			printf("code ");
+			print_str(ils->as.literal.data, ils->as.literal.len);
+			putchar('\n');
+			break;
+		case NODE_INLINE_HTML:
+			printf("html ");
+			print_str(ils->as.literal.data, ils->as.literal.len);
+			putchar('\n');
+			break;
+		case NODE_LINK:
+		case NODE_IMAGE:
+			printf("%s url=", ils->type == NODE_LINK ? "link" : "image");
+
+			if (ils->as.link.url)
+				print_str(ils->as.link.url, -1);
+
+			if (ils->as.link.title) {
+				printf(" title=");
+				print_str(ils->as.link.title, -1);
+			}
+			putchar('\n');
+			print_inlines(ils->first_child, indent + 2);
+			break;
+		case NODE_STRONG:
+			printf("strong\n");
+			print_inlines(ils->first_child, indent + 2);
+			break;
+		case NODE_EMPH:
+			printf("emph\n");
+			print_inlines(ils->first_child, indent + 2);
+			break;
+		default:
+			break;
+		}
+		ils = ils->next;
+	}
+}
+
+// Functions to pretty-print inline and cmark_node lists, for debugging.
+// Prettyprint an inline list, for debugging.
+static void print_blocks(cmark_node* b, int indent)
+{
+	cmark_list *data;
+	int i;
+
+	while(b != NULL) {
+		for (i=0; i < indent; i++) {
+			putchar(' ');
+		}
+
+		switch(b->type) {
+		case NODE_DOCUMENT:
+			printf("document\n");
+			print_blocks(b->first_child, indent + 2);
+			break;
+		case NODE_BLOCK_QUOTE:
+			printf("block_quote\n");
+			print_blocks(b->first_child, indent + 2);
+			break;
+		case NODE_LIST_ITEM:
+			printf("list_item\n");
+			print_blocks(b->first_child, indent + 2);
+			break;
+		case NODE_LIST:
+			data = &(b->as.list);
+			if (data->list_type == CMARK_ORDERED_LIST) {
+				printf("list (type=ordered tight=%s start=%d delim=%s)\n",
+				       (data->tight ? "true" : "false"),
+				       data->start,
+				       (data->delimiter == CMARK_PAREN_DELIM ? "parens" : "period"));
+			} else {
+				printf("list (type=bullet tight=%s bullet_char=%c)\n",
+				       (data->tight ? "true" : "false"),
+				       data->bullet_char);
+			}
+			print_blocks(b->first_child, indent + 2);
+			break;
+		case NODE_ATX_HEADER:
+			printf("atx_header (level=%d)\n", b->as.header.level);
+			print_inlines(b->first_child, indent + 2);
+			break;
+		case NODE_SETEXT_HEADER:
+			printf("setext_header (level=%d)\n", b->as.header.level);
+			print_inlines(b->first_child, indent + 2);
+			break;
+		case NODE_PARAGRAPH:
+			printf("paragraph\n");
+			print_inlines(b->first_child, indent + 2);
+			break;
+		case NODE_HRULE:
+			printf("hrule\n");
+			break;
+		case NODE_INDENTED_CODE:
+			printf("indented_code ");
+			print_str(b->string_content.ptr, -1);
+			putchar('\n');
+			break;
+		case NODE_FENCED_CODE:
+			printf("fenced_code length=%d info=",
+			       b->as.code.fence_length);
+			print_str(b->as.code.info.ptr, -1);
+			putchar(' ');
+			print_str(b->string_content.ptr, -1);
+			putchar('\n');
+			break;
+		case NODE_HTML:
+			printf("html_block ");
+			print_str(b->string_content.ptr, -1);
+			putchar('\n');
+			break;
+		case NODE_REFERENCE_DEF:
+			printf("reference_def\n");
+			break;
+		default:
+			printf("# NOT IMPLEMENTED (%d)\n", b->type);
+			break;
+		}
+		b = b->next;
+	}
+}
+
+void cmark_debug_print(cmark_node *root)
+{
+	print_blocks(root, 0);
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/references.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/references.c b/compiler/modules/CommonMark/src/references.c
new file mode 100644
index 0000000..1738de1
--- /dev/null
+++ b/compiler/modules/CommonMark/src/references.c
@@ -0,0 +1,153 @@
+#include "cmark.h"
+#include "utf8.h"
+#include "parser.h"
+#include "references.h"
+#include "inlines.h"
+#include "chunk.h"
+
+static unsigned int
+refhash(const unsigned char *link_ref)
+{
+	unsigned int hash = 0;
+
+	while (*link_ref)
+		hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash;
+
+	return hash;
+}
+
+static void reference_free(reference *ref)
+{
+	if(ref != NULL) {
+		free(ref->label);
+		free(ref->url);
+		free(ref->title);
+		free(ref);
+	}
+}
+
+// normalize reference:  collapse internal whitespace to single space,
+// remove leading/trailing whitespace, case fold
+// Return NULL if the reference name is actually empty (i.e. composed
+// solely from whitespace)
+static unsigned char *normalize_reference(chunk *ref)
+{
+	strbuf normalized = GH_BUF_INIT;
+	unsigned char *result;
+
+	if(ref == NULL)
+		return NULL;
+
+	if (ref->len == 0)
+		return NULL;
+
+	utf8proc_case_fold(&normalized, ref->data, ref->len);
+	strbuf_trim(&normalized);
+	strbuf_normalize_whitespace(&normalized);
+
+	result = strbuf_detach(&normalized);
+	assert(result);
+
+	if (result[0] == '\0') {
+		free(result);
+		return NULL;
+	}
+
+	return result;
+}
+
+static void add_reference(reference_map *map, reference* ref)
+{
+	reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE];
+
+	while (t) {
+		if (t->hash == ref->hash &&
+		    !strcmp((char *)t->label, (char *)ref->label)) {
+			reference_free(ref);
+			return;
+		}
+
+		t = t->next;
+	}
+
+	map->table[ref->hash % REFMAP_SIZE] = ref;
+}
+
+extern void reference_create(reference_map *map, chunk *label, chunk *url, chunk *title)
+{
+	reference *ref;
+	unsigned char *reflabel = normalize_reference(label);
+
+	/* empty reference name, or composed from only whitespace */
+	if (reflabel == NULL)
+		return;
+
+	ref = (reference *)calloc(1, sizeof(*ref));
+	if(ref != NULL) {
+		ref->label = reflabel;
+		ref->hash = refhash(ref->label);
+		ref->url = clean_url(url);
+		ref->title = clean_title(title);
+		ref->next = NULL;
+
+		add_reference(map, ref);
+	}
+}
+
+// Returns reference if refmap contains a reference with matching
+// label, otherwise NULL.
+reference* reference_lookup(reference_map *map, chunk *label)
+{
+	reference *ref = NULL;
+	unsigned char *norm;
+	unsigned int hash;
+
+	if (label->len > MAX_LINK_LABEL_LENGTH)
+		return NULL;
+
+	if (map == NULL)
+		return NULL;
+
+	norm = normalize_reference(label);
+	if (norm == NULL)
+		return NULL;
+
+	hash = refhash(norm);
+	ref = map->table[hash % REFMAP_SIZE];
+
+	while (ref) {
+		if (ref->hash == hash &&
+		    !strcmp((char *)ref->label, (char *)norm))
+			break;
+		ref = ref->next;
+	}
+
+	free(norm);
+	return ref;
+}
+
+void reference_map_free(reference_map *map)
+{
+	unsigned int i;
+
+	if(map == NULL)
+		return;
+
+	for (i = 0; i < REFMAP_SIZE; ++i) {
+		reference *ref = map->table[i];
+		reference *next;
+
+		while (ref) {
+			next = ref->next;
+			reference_free(ref);
+			ref = next;
+		}
+	}
+
+	free(map);
+}
+
+reference_map *reference_map_new(void)
+{
+	return (reference_map *)calloc(1, sizeof(reference_map));
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/references.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/references.h b/compiler/modules/CommonMark/src/references.h
new file mode 100644
index 0000000..572178d
--- /dev/null
+++ b/compiler/modules/CommonMark/src/references.h
@@ -0,0 +1,46 @@
+#ifndef CMARK_REFERENCES_H
+#define CMARK_REFERENCES_H
+
+#include "chunk.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define REFMAP_SIZE 16
+
+struct cmark_reference {
+	struct cmark_reference *next;
+	unsigned char *label;
+	unsigned char *url;
+	unsigned char *title;
+	unsigned int hash;
+};
+
+typedef struct cmark_reference cmark_reference;
+
+struct cmark_reference_map {
+	cmark_reference *table[REFMAP_SIZE];
+};
+
+typedef struct cmark_reference_map cmark_reference_map;
+
+cmark_reference_map *cmark_reference_map_new(void);
+void cmark_reference_map_free(cmark_reference_map *map);
+cmark_reference* cmark_reference_lookup(cmark_reference_map *map, cmark_chunk *label);
+extern void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, cmark_chunk *url, cmark_chunk *title);
+
+#ifndef CMARK_NO_SHORT_NAMES
+  #define reference             cmark_reference
+  #define reference_map         cmark_reference_map
+  #define reference_map_new     cmark_reference_map_new
+  #define reference_map_free    cmark_reference_map_free
+  #define reference_lookup      cmark_reference_lookup
+  #define reference_create      cmark_reference_create
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif