You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2015/08/06 18:19:43 UTC

[06/20] lucy-clownfish git commit: Upgrade libcmark to 0.21.0

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/inlines.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/inlines.c b/compiler/modules/CommonMark/src/inlines.c
index 2487f63..7ea308d 100644
--- a/compiler/modules/CommonMark/src/inlines.c
+++ b/compiler/modules/CommonMark/src/inlines.c
@@ -14,6 +14,15 @@
 #include "inlines.h"
 
 
+static const char *EMDASH = "\xE2\x80\x94";
+static const char *ENDASH = "\xE2\x80\x93";
+static const char *ELLIPSES = "\xE2\x80\xA6";
+static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C";
+static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D";
+static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
+static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
+
+
 // Macros for creating various kinds of simple.
 #define make_str(s) make_literal(CMARK_NODE_TEXT, s)
 #define make_code(s) make_literal(CMARK_NODE_CODE, s)
@@ -27,8 +36,8 @@ typedef struct delimiter {
 	struct delimiter *previous;
 	struct delimiter *next;
 	cmark_node *inl_text;
+	bufsize_t position;
 	unsigned char delim_char;
-	int position;
 	bool can_open;
 	bool can_close;
 	bool active;
@@ -36,45 +45,53 @@ typedef struct delimiter {
 
 typedef struct {
 	cmark_chunk input;
-	int pos;
+	bufsize_t pos;
 	cmark_reference_map *refmap;
 	delimiter *last_delim;
 } subject;
 
+static inline bool
+S_is_line_end_char(char c)
+{
+	return (c == '\n' || c == '\r');
+}
+
 static delimiter*
 S_insert_emph(subject *subj, delimiter *opener, delimiter *closer);
 
-static int parse_inline(subject* subj, cmark_node * parent);
+static int parse_inline(subject* subj, cmark_node * parent, int options);
 
 static void subject_from_buf(subject *e, cmark_strbuf *buffer,
                              cmark_reference_map *refmap);
-static int subject_find_special_char(subject *subj);
+static bufsize_t subject_find_special_char(subject *subj, int options);
 
-static unsigned char *cmark_clean_autolink(cmark_chunk *url, int is_email)
+static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email)
 {
 	cmark_strbuf buf = GH_BUF_INIT;
 
 	cmark_chunk_trim(url);
 
-	if (url->len == 0)
-		return NULL;
+	if (url->len == 0) {
+		cmark_chunk result = CMARK_CHUNK_EMPTY;
+		return result;
+	}
 
 	if (is_email)
 		cmark_strbuf_puts(&buf, "mailto:");
 
 	houdini_unescape_html_f(&buf, url->data, url->len);
-	return cmark_strbuf_detach(&buf);
+	return cmark_chunk_buf_detach(&buf);
 }
 
-static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsigned char *title)
+static inline cmark_node *make_link(cmark_node *label, cmark_chunk *url, cmark_chunk *title)
 {
 	cmark_node* e = (cmark_node *)calloc(1, sizeof(*e));
 	if(e != NULL) {
 		e->type = CMARK_NODE_LINK;
 		e->first_child   = label;
 		e->last_child    = label;
-		e->as.link.url   = url;
-		e->as.link.title = title;
+		e->as.link.url   = *url;
+		e->as.link.title = *title;
 		e->next = NULL;
 		label->parent = e;
 	}
@@ -83,7 +100,9 @@ static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsig
 
 static inline cmark_node* make_autolink(cmark_node* label, cmark_chunk url, int is_email)
 {
-	return make_link(label, cmark_clean_autolink(&url, is_email), NULL);
+	cmark_chunk clean_url = cmark_clean_autolink(&url, is_email);
+	cmark_chunk title = CMARK_CHUNK_EMPTY;
+	return make_link(label, &clean_url, &title);
 }
 
 // Create an inline with a literal string value.
@@ -125,19 +144,20 @@ static inline cmark_node* make_simple(cmark_node_type t)
 	return e;
 }
 
-static unsigned char *bufdup(const unsigned char *buf)
+// Duplicate a chunk by creating a copy of the buffer not by reusing the
+// buffer like cmark_chunk_dup does.
+static cmark_chunk chunk_clone(cmark_chunk *src)
 {
-	unsigned char *new_buf = NULL;
+	cmark_chunk c;
+	bufsize_t len = src->len;
 
-	if (buf) {
-		int len = strlen((char *)buf);
-		new_buf = (unsigned char *)calloc(len + 1, sizeof(*new_buf));
-		if(new_buf != NULL) {
-			memcpy(new_buf, buf, len + 1);
-		}
-	}
+	c.len   = len;
+	c.data  = (unsigned char *)malloc(len + 1);
+	c.alloc = 1;
+	memcpy(c.data, src->data, len);
+	c.data[len] = '\0';
 
-	return new_buf;
+	return c;
 }
 
 static void subject_from_buf(subject *e, cmark_strbuf *buffer,
@@ -149,8 +169,6 @@ static void subject_from_buf(subject *e, cmark_strbuf *buffer,
 	e->pos = 0;
 	e->refmap = refmap;
 	e->last_delim = NULL;
-
-	cmark_chunk_rtrim(&e->input);
 }
 
 static inline int isbacktick(int c)
@@ -160,10 +178,13 @@ static inline int isbacktick(int c)
 
 static inline unsigned char peek_char(subject *subj)
 {
+	// NULL bytes should have been stripped out by now.  If they're
+	// present, it's a programming error:
+	assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 0));
 	return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
 }
 
-static inline unsigned char peek_at(subject *subj, int pos)
+static inline unsigned char peek_at(subject *subj, bufsize_t pos)
 {
 	return subj->input.data[pos];
 }
@@ -177,12 +198,38 @@ static inline int is_eof(subject* subj)
 // Advance the subject.  Doesn't check for eof.
 #define advance(subj) (subj)->pos += 1
 
+static inline bool
+skip_spaces(subject *subj)
+{
+	bool skipped = false;
+	while (peek_char(subj) == ' ' || peek_char(subj) == '\t') {
+		advance(subj);
+		skipped = true;
+	}
+	return skipped;
+}
+
+static inline bool
+skip_line_end(subject *subj)
+{
+	bool seen_line_end_char = false;
+	if (peek_char(subj) == '\r') {
+		advance(subj);
+		seen_line_end_char = true;
+	}
+	if (peek_char(subj) == '\n') {
+		advance(subj);
+		seen_line_end_char = true;
+	}
+	return seen_line_end_char || is_eof(subj);
+}
+
 // Take characters while a predicate holds, and return a string.
 static inline cmark_chunk take_while(subject* subj, int (*f)(int))
 {
 	unsigned char c;
-	int startpos = subj->pos;
-	int len = 0;
+	bufsize_t startpos = subj->pos;
+	bufsize_t len = 0;
 
 	while ((c = peek_char(subj)) && (*f)(c)) {
 		advance(subj);
@@ -197,7 +244,7 @@ static inline cmark_chunk take_while(subject* subj, int (*f)(int))
 // parsed).  Return 0 if you don't find matching closing
 // backticks, otherwise return the position in the subject
 // after the closing backticks.
-static int scan_to_closing_backticks(subject* subj, int openticklength)
+static bufsize_t scan_to_closing_backticks(subject* subj, bufsize_t openticklength)
 {
 	// read non backticks
 	unsigned char c;
@@ -207,7 +254,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 	if (is_eof(subj)) {
 		return 0;  // did not find closing ticks, return 0
 	}
-	int numticks = 0;
+	bufsize_t numticks = 0;
 	while (peek_char(subj) == '`') {
 		advance(subj);
 		numticks++;
@@ -223,8 +270,8 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 static cmark_node* handle_backticks(subject *subj)
 {
 	cmark_chunk openticks = take_while(subj, isbacktick);
-	int startpos = subj->pos;
-	int endpos = scan_to_closing_backticks(subj, openticks.len);
+	bufsize_t startpos = subj->pos;
+	bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
 
 	if (endpos == 0) { // not found
 		subj->pos = startpos; // rewind
@@ -246,10 +293,11 @@ static int
 scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
 {
 	int numdelims = 0;
-	int before_char_pos;
+	bufsize_t before_char_pos;
 	int32_t after_char = 0;
 	int32_t before_char = 0;
 	int len;
+	bool left_flanking, right_flanking;
 
 	if (subj->pos == 0) {
 		before_char = 10;
@@ -267,9 +315,14 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
 		}
 	}
 
-	while (peek_char(subj) == c) {
+	if (c == '\'' || c == '"') {
 		numdelims++;
-		advance(subj);
+		advance(subj);  // limit to 1 delim for quotes
+	} else {
+		while (peek_char(subj) == c) {
+			numdelims++;
+			advance(subj);
+		}
 	}
 
 	len = utf8proc_iterate(subj->input.data + subj->pos,
@@ -277,19 +330,25 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
 	if (len == -1) {
 		after_char = 10;
 	}
-	*can_open = numdelims > 0 && !utf8proc_is_space(after_char) &&
-	            !(utf8proc_is_punctuation(after_char) &&
-	              !utf8proc_is_space(before_char) &&
-	              !utf8proc_is_punctuation(before_char));
-	*can_close = numdelims > 0 && !utf8proc_is_space(before_char) &&
-	             !(utf8proc_is_punctuation(before_char) &&
-	               !utf8proc_is_space(after_char) &&
-	               !utf8proc_is_punctuation(after_char));
+	left_flanking = numdelims > 0 && !utf8proc_is_space(after_char) &&
+	                !(utf8proc_is_punctuation(after_char) &&
+	                  !utf8proc_is_space(before_char) &&
+	                  !utf8proc_is_punctuation(before_char));
+	right_flanking = numdelims > 0 && !utf8proc_is_space(before_char) &&
+	                 !(utf8proc_is_punctuation(before_char) &&
+	                   !utf8proc_is_space(after_char) &&
+	                   !utf8proc_is_punctuation(after_char));
 	if (c == '_') {
-		*can_open = *can_open && !(before_char < 128 &&
-		                           cmark_isalnum((char)before_char));
-		*can_close = *can_close && !(before_char < 128 &&
-		                             cmark_isalnum((char)after_char));
+		*can_open = left_flanking &&
+		            (!right_flanking || utf8proc_is_punctuation(before_char));
+		*can_close = right_flanking &&
+		             (!left_flanking || utf8proc_is_punctuation(after_char));
+	} else if (c == '\'' || c == '"') {
+		*can_open = left_flanking && !right_flanking;
+		*can_close = right_flanking;
+	} else {
+		*can_open = left_flanking;
+		*can_close = right_flanking;
 	}
 	return numdelims;
 }
@@ -300,10 +359,10 @@ static void print_delimiters(subject *subj)
 	delimiter *delim;
 	delim = subj->last_delim;
 	while (delim != NULL) {
-		printf("Item at %p: %d %d %d next(%p) prev(%p)\n",
-		       delim, delim->delim_char,
+		printf("Item at stack pos %p, text pos %d: %d %d %d next(%p) prev(%p)\n",
+		       (void*)delim, delim->position, delim->delim_char,
 		       delim->can_open, delim->can_close,
-		       delim->next, delim->previous);
+		       (void*)delim->next, (void*)delim->previous);
 		delim = delim->previous;
 	}
 }
@@ -347,59 +406,175 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open,
 	subj->last_delim = delim;
 }
 
-// Parse strong/emph or a fallback.
-// Assumes the subject has '_' or '*' at the current position.
-static cmark_node* handle_strong_emph(subject* subj, unsigned char c)
+// Assumes the subject has a c at the current position.
+static cmark_node* handle_delim(subject* subj, unsigned char c, bool smart)
 {
-	int numdelims;
+	bufsize_t numdelims;
 	cmark_node * inl_text;
 	bool can_open, can_close;
+	cmark_chunk contents;
 
 	numdelims = scan_delims(subj, c, &can_open, &can_close);
 
-	inl_text = make_str(cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
+	if (c == '\'' && smart) {
+		contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
+	} else if (c == '"' && smart) {
+		contents = cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE);
+	} else {
+		contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
+	}
 
-	if (can_open || can_close) {
+	inl_text = make_str(contents);
+
+	if ((can_open || can_close) &&
+	    (!(c == '\'' || c == '"') || smart)) {
 		push_delimiter(subj, c, can_open, can_close, inl_text);
 	}
 
 	return inl_text;
 }
 
-static void process_emphasis(subject *subj, delimiter *start_delim)
+// Assumes we have a hyphen at the current position.
+static cmark_node* handle_hyphen(subject* subj, bool smart)
+{
+	int startpos = subj->pos;
+
+	advance(subj);
+
+	if (!smart || peek_char(subj) != '-') {
+		return make_str(cmark_chunk_literal("-"));
+	}
+
+	while (smart && peek_char(subj) == '-') {
+		advance(subj);
+	}
+
+	int numhyphens = subj->pos - startpos;
+	int en_count = 0;
+	int em_count = 0;
+	int i;
+	cmark_strbuf buf = GH_BUF_INIT;
+
+	if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
+		em_count = numhyphens / 3;
+	} else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes
+		en_count = numhyphens / 2;
+	} else if (numhyphens % 3 == 2) { // use one en dash at end
+		en_count = 1;
+		em_count = (numhyphens - 2) / 3;
+	} else { // use two en dashes at the end
+		en_count = 2;
+		em_count = (numhyphens - 4) / 3;
+	}
+
+	for (i = em_count; i > 0; i--) {
+		cmark_strbuf_puts(&buf, EMDASH);
+	}
+
+	for (i = en_count; i > 0; i--) {
+		cmark_strbuf_puts(&buf, ENDASH);
+	}
+
+	return make_str(cmark_chunk_buf_detach(&buf));
+}
+
+// Assumes we have a period at the current position.
+static cmark_node* handle_period(subject* subj, bool smart)
+{
+	advance(subj);
+	if (smart && peek_char(subj) == '.') {
+		advance(subj);
+		if (peek_char(subj) == '.') {
+			advance(subj);
+			return make_str(cmark_chunk_literal(ELLIPSES));
+		} else {
+			return make_str(cmark_chunk_literal(".."));
+		}
+	} else {
+		return make_str(cmark_chunk_literal("."));
+	}
+}
+
+static void process_emphasis(subject *subj, delimiter *stack_bottom)
 {
 	delimiter *closer = subj->last_delim;
 	delimiter *opener;
+	delimiter *old_closer;
+	bool opener_found;
+	delimiter *openers_bottom[128];
+
+	// initialize openers_bottom:
+	openers_bottom['*'] = stack_bottom;
+	openers_bottom['_'] = stack_bottom;
+	openers_bottom['\''] = stack_bottom;
+	openers_bottom['"'] = stack_bottom;
 
 	// move back to first relevant delim.
-	while (closer != NULL && closer->previous != start_delim) {
+	while (closer != NULL && closer->previous != stack_bottom) {
 		closer = closer->previous;
 	}
 
 	// now move forward, looking for closers, and handling each
 	while (closer != NULL) {
 		if (closer->can_close &&
-		    (closer->delim_char == '*' || closer->delim_char == '_')) {
+		    (closer->delim_char == '*' || closer->delim_char == '_' ||
+		     closer->delim_char == '"' || closer->delim_char == '\'')) {
 			// Now look backwards for first matching opener:
 			opener = closer->previous;
-			while (opener != NULL && opener != start_delim) {
+			opener_found = false;
+			while (opener != NULL && opener != stack_bottom &&
+			       opener != openers_bottom[closer->delim_char]) {
 				if (opener->delim_char == closer->delim_char &&
 				    opener->can_open) {
+					opener_found = true;
 					break;
 				}
 				opener = opener->previous;
 			}
-			if (opener != NULL && opener != start_delim) {
-				closer = S_insert_emph(subj, opener, closer);
-			} else {
+			old_closer = closer;
+			if (closer->delim_char == '*' || closer->delim_char == '_') {
+				if (opener_found) {
+					closer = S_insert_emph(subj, opener, closer);
+				} else {
+					closer = closer->next;
+				}
+			} else if (closer->delim_char == '\'') {
+				cmark_chunk_free(&closer->inl_text->as.literal);
+				closer->inl_text->as.literal =
+				    cmark_chunk_literal(RIGHTSINGLEQUOTE);
+				if (opener_found) {
+					cmark_chunk_free(&opener->inl_text->as.literal);
+					opener->inl_text->as.literal =
+					    cmark_chunk_literal(LEFTSINGLEQUOTE);
+				}
 				closer = closer->next;
+			} else if (closer->delim_char == '"') {
+				cmark_chunk_free(&closer->inl_text->as.literal);
+				closer->inl_text->as.literal =
+				    cmark_chunk_literal(RIGHTDOUBLEQUOTE);
+				if (opener_found) {
+					cmark_chunk_free(&opener->inl_text->as.literal);
+					opener->inl_text->as.literal =
+					    cmark_chunk_literal(LEFTDOUBLEQUOTE);
+				}
+				closer = closer->next;
+			}
+			if (!opener_found) {
+				// set lower bound for future searches for openers:
+				openers_bottom[old_closer->delim_char] = old_closer->previous;
+				if (!old_closer->can_open) {
+					// we can remove a closer that can't be an
+					// opener, once we've seen there's no
+					// matching opener:
+					remove_delimiter(subj, old_closer);
+				}
 			}
 		} else {
 			closer = closer->next;
 		}
 	}
-	// free all delimiters in list until start_delim:
-	while (subj->last_delim != start_delim) {
+	// free all delimiters in list until stack_bottom:
+	while (subj->last_delim != stack_bottom) {
 		remove_delimiter(subj, subj->last_delim);
 	}
 }
@@ -408,11 +583,11 @@ static delimiter*
 S_insert_emph(subject *subj, delimiter *opener, delimiter *closer)
 {
 	delimiter *delim, *tmp_delim;
-	int use_delims;
+	bufsize_t use_delims;
 	cmark_node *opener_inl = opener->inl_text;
 	cmark_node *closer_inl = closer->inl_text;
-	int opener_num_chars = opener_inl->as.literal.len;
-	int closer_num_chars = closer_inl->as.literal.len;
+	bufsize_t opener_num_chars = opener_inl->as.literal.len;
+	bufsize_t closer_num_chars = closer_inl->as.literal.len;
 	cmark_node *tmp, *emph, *first_child, *last_child;
 
 	// calculate the actual number of characters used from this closer
@@ -491,8 +666,7 @@ static cmark_node* handle_backslash(subject *subj)
 	if (cmark_ispunct(nextchar)) {  // only ascii symbols and newline can be escaped
 		advance(subj);
 		return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
-	} else if (nextchar == '\n') {
-		advance(subj);
+	} else if (!is_eof(subj) && skip_line_end(subj)) {
 		return make_linebreak();
 	} else {
 		return make_str(cmark_chunk_literal("\\"));
@@ -504,7 +678,7 @@ static cmark_node* handle_backslash(subject *subj)
 static cmark_node* handle_entity(subject* subj)
 {
 	cmark_strbuf ent = GH_BUF_INIT;
-	size_t len;
+	bufsize_t len;
 
 	advance(subj);
 
@@ -526,7 +700,7 @@ static cmark_node *make_str_with_entities(cmark_chunk *content)
 {
 	cmark_strbuf unescaped = GH_BUF_INIT;
 
-	if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
+	if (houdini_unescape_html(&unescaped, content->data, content->len)) {
 		return make_str(cmark_chunk_buf_detach(&unescaped));
 	} else {
 		return make_str(*content);
@@ -535,14 +709,16 @@ static cmark_node *make_str_with_entities(cmark_chunk *content)
 
 // Clean a URL: remove surrounding whitespace and surrounding <>,
 // and remove \ that escape punctuation.
-unsigned char *cmark_clean_url(cmark_chunk *url)
+cmark_chunk cmark_clean_url(cmark_chunk *url)
 {
 	cmark_strbuf buf = GH_BUF_INIT;
 
 	cmark_chunk_trim(url);
 
-	if (url->len == 0)
-		return NULL;
+	if (url->len == 0) {
+		cmark_chunk result = CMARK_CHUNK_EMPTY;
+		return result;
+	}
 
 	if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
 		houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
@@ -551,16 +727,18 @@ unsigned char *cmark_clean_url(cmark_chunk *url)
 	}
 
 	cmark_strbuf_unescape(&buf);
-	return cmark_strbuf_detach(&buf);
+	return cmark_chunk_buf_detach(&buf);
 }
 
-unsigned char *cmark_clean_title(cmark_chunk *title)
+cmark_chunk cmark_clean_title(cmark_chunk *title)
 {
 	cmark_strbuf buf = GH_BUF_INIT;
 	unsigned char first, last;
 
-	if (title->len == 0)
-		return NULL;
+	if (title->len == 0) {
+		cmark_chunk result = CMARK_CHUNK_EMPTY;
+		return result;
+	}
 
 	first = title->data[0];
 	last = title->data[title->len - 1];
@@ -575,14 +753,14 @@ unsigned char *cmark_clean_title(cmark_chunk *title)
 	}
 
 	cmark_strbuf_unescape(&buf);
-	return cmark_strbuf_detach(&buf);
+	return cmark_chunk_buf_detach(&buf);
 }
 
 // Parse an autolink or HTML tag.
 // Assumes the subject has a '<' character at the current position.
 static cmark_node* handle_pointy_brace(subject* subj)
 {
-	int matchlen = 0;
+	bufsize_t matchlen = 0;
 	cmark_chunk contents;
 
 	advance(subj);  // advance past first <
@@ -629,7 +807,7 @@ static cmark_node* handle_pointy_brace(subject* subj)
 // encountered.  Backticks in labels do not start code spans.
 static int link_label(subject* subj, cmark_chunk *raw_label)
 {
-	int startpos = subj->pos;
+	bufsize_t startpos = subj->pos;
 	int length = 0;
 	unsigned char c;
 
@@ -659,6 +837,7 @@ static int link_label(subject* subj, cmark_chunk *raw_label)
 
 	if (c == ']') { // match found
 		*raw_label = cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
+		cmark_chunk_trim(raw_label);
 		advance(subj);  // advance past ]
 		return 1;
 	}
@@ -672,14 +851,14 @@ noMatch:
 // Return a link, an image, or a literal close bracket.
 static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 {
-	int initial_pos;
-	int starturl, endurl, starttitle, endtitle, endall;
-	int n;
-	int sps;
+	bufsize_t initial_pos;
+	bufsize_t starturl, endurl, starttitle, endtitle, endall;
+	bufsize_t n;
+	bufsize_t sps;
 	cmark_reference *ref;
 	bool is_image = false;
 	cmark_chunk url_chunk, title_chunk;
-	unsigned char *url, *title;
+	cmark_chunk url, title;
 	delimiter *opener;
 	cmark_node *link_text;
 	cmark_node *inl;
@@ -767,8 +946,8 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 	cmark_chunk_free(&raw_label);
 
 	if (ref != NULL) { // found
-		url = bufdup(ref->url);
-		title = bufdup(ref->title);
+		url   = chunk_clone(&ref->url);
+		title = chunk_clone(&ref->title);
 		goto match;
 	} else {
 		goto noMatch;
@@ -785,7 +964,7 @@ match:
 	inl->type = is_image ? NODE_IMAGE : NODE_LINK;
 	cmark_chunk_free(&inl->as.literal);
 	inl->first_child = link_text;
-	process_emphasis(subj, opener->previous);
+	process_emphasis(subj, opener);
 	inl->as.link.url   = url;
 	inl->as.link.title = title;
 	inl->next = NULL;
@@ -800,10 +979,10 @@ match:
 	}
 	parent->last_child = inl;
 
-	// process_emphasis will remove this delimiter and all later ones.
 	// Now, if we have a link, we also want to deactivate earlier link
 	// delimiters. (This code can be removed if we decide to allow links
 	// inside links.)
+	remove_delimiter(subj, opener);
 	if (!is_image) {
 		opener = subj->last_delim;
 		while (opener != NULL) {
@@ -825,13 +1004,11 @@ match:
 // Assumes the subject has a newline at the current position.
 static cmark_node* handle_newline(subject *subj)
 {
-	int nlpos = subj->pos;
+	bufsize_t nlpos = subj->pos;
 	// skip over newline
 	advance(subj);
 	// skip spaces at beginning of line
-	while (peek_char(subj) == ' ') {
-		advance(subj);
-	}
+	skip_spaces(subj);
 	if (nlpos > 1 &&
 	    peek_at(subj, nlpos - 1) == ' ' &&
 	    peek_at(subj, nlpos - 2) == ' ') {
@@ -841,11 +1018,11 @@ static cmark_node* handle_newline(subject *subj)
 	}
 }
 
-static int subject_find_special_char(subject *subj)
+static bufsize_t subject_find_special_char(subject *subj, int options)
 {
-	// "\n\\`&_*[]<!"
+	// "\r\n\\`&_*[]<!"
 	static const int8_t SPECIAL_CHARS[256] = {
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 		0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
@@ -863,11 +1040,34 @@ static int subject_find_special_char(subject *subj)
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 	};
 
-	int n = subj->pos + 1;
+	// " ' . -
+	static const char SMART_PUNCT_CHARS[] = {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	};
+
+	bufsize_t n = subj->pos + 1;
 
 	while (n < subj->input.len) {
 		if (SPECIAL_CHARS[subj->input.data[n]])
 			return n;
+		if (options & CMARK_OPT_SMART &&
+		    SMART_PUNCT_CHARS[subj->input.data[n]])
+			return n;
 		n++;
 	}
 
@@ -876,17 +1076,18 @@ static int subject_find_special_char(subject *subj)
 
 // Parse an inline, advancing subject, and add it as a child of parent.
 // Return 0 if no inline can be parsed, 1 otherwise.
-static int parse_inline(subject* subj, cmark_node * parent)
+static int parse_inline(subject* subj, cmark_node * parent, int options)
 {
 	cmark_node* new_inl = NULL;
 	cmark_chunk contents;
 	unsigned char c;
-	int endpos;
+	bufsize_t endpos;
 	c = peek_char(subj);
 	if (c == 0) {
 		return 0;
 	}
 	switch(c) {
+	case '\r':
 	case '\n':
 		new_inl = handle_newline(subj);
 		break;
@@ -904,7 +1105,15 @@ static int parse_inline(subject* subj, cmark_node * parent)
 		break;
 	case '*':
 	case '_':
-		new_inl = handle_strong_emph(subj, c);
+	case '\'':
+	case '"':
+		new_inl = handle_delim(subj, c, options & CMARK_OPT_SMART);
+		break;
+	case '-':
+		new_inl = handle_hyphen(subj, options & CMARK_OPT_SMART);
+		break;
+	case '.':
+		new_inl = handle_period(subj, options & CMARK_OPT_SMART);
 		break;
 	case '[':
 		advance(subj);
@@ -925,12 +1134,12 @@ static int parse_inline(subject* subj, cmark_node * parent)
 		}
 		break;
 	default:
-		endpos = subject_find_special_char(subj);
+		endpos = subject_find_special_char(subj, options);
 		contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
 		subj->pos = endpos;
 
 		// if we're at a newline, strip trailing spaces.
-		if (peek_char(subj) == '\n') {
+		if (S_is_line_end_char(peek_char(subj))) {
 			cmark_chunk_rtrim(&contents);
 		}
 
@@ -944,12 +1153,13 @@ static int parse_inline(subject* subj, cmark_node * parent)
 }
 
 // Parse inlines from parent's string_content, adding as children of parent.
-extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap)
+extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options)
 {
 	subject subj;
 	subject_from_buf(&subj, &parent->string_content, refmap);
+	cmark_chunk_rtrim(&subj.input);
 
-	while (!is_eof(&subj) && parse_inline(&subj, parent)) ;
+	while (!is_eof(&subj) && parse_inline(&subj, parent, options)) ;
 
 	process_emphasis(&subj, NULL);
 }
@@ -957,11 +1167,9 @@ extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap)
 // Parse zero or more space characters, including at most one newline.
 static void spnl(subject* subj)
 {
-	bool seen_newline = false;
-	while (peek_char(subj) == ' ' ||
-	       (!seen_newline &&
-	        (seen_newline = peek_char(subj) == '\n'))) {
-		advance(subj);
+	skip_spaces(subj);
+	if (skip_line_end(subj)) {
+		skip_spaces(subj);
 	}
 }
 
@@ -969,7 +1177,7 @@ static void spnl(subject* subj)
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
+bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
 {
 	subject subj;
 
@@ -977,13 +1185,13 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma
 	cmark_chunk url;
 	cmark_chunk title;
 
-	int matchlen = 0;
-	int beforetitle;
+	bufsize_t matchlen = 0;
+	bufsize_t beforetitle;
 
 	subject_from_buf(&subj, input, NULL);
 
 	// parse label:
-	if (!link_label(&subj, &lab))
+	if (!link_label(&subj, &lab) || lab.len == 0)
 		return 0;
 
 	// colon:
@@ -1014,14 +1222,19 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma
 		subj.pos = beforetitle;
 		title = cmark_chunk_literal("");
 	}
+
 	// parse final spaces and newline:
-	while (peek_char(&subj) == ' ') {
-		advance(&subj);
-	}
-	if (peek_char(&subj) == '\n') {
-		advance(&subj);
-	} else if (peek_char(&subj) != 0) {
-		return 0;
+	skip_spaces(&subj);
+	if (!skip_line_end(&subj)) {
+		if (matchlen) { // try rewinding before title
+			subj.pos = beforetitle;
+			skip_spaces(&subj);
+			if (!skip_line_end(&subj)) {
+				return 0;
+			}
+		} else {
+			return 0;
+		}
 	}
 	// insert reference into refmap
 	cmark_reference_create(refmap, &lab, &url, &title);

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/inlines.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/inlines.h b/compiler/modules/CommonMark/src/inlines.h
index d2ccfb4..f8847fc 100644
--- a/compiler/modules/CommonMark/src/inlines.h
+++ b/compiler/modules/CommonMark/src/inlines.h
@@ -5,12 +5,12 @@
 extern "C" {
 #endif
 
-unsigned char *cmark_clean_url(cmark_chunk *url);
-unsigned char *cmark_clean_title(cmark_chunk *title);
+cmark_chunk cmark_clean_url(cmark_chunk *url);
+cmark_chunk cmark_clean_title(cmark_chunk *title);
 
-void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap);
+void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options);
 
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
+bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
 
 #ifdef __cplusplus
 }

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/iterator.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/iterator.c b/compiler/modules/CommonMark/src/iterator.c
index 4daec2d..f18e3bf 100644
--- a/compiler/modules/CommonMark/src/iterator.c
+++ b/compiler/modules/CommonMark/src/iterator.c
@@ -108,6 +108,12 @@ cmark_iter_get_event_type(cmark_iter *iter)
 	return iter->cur.ev_type;
 }
 
+cmark_node*
+cmark_iter_get_root(cmark_iter *iter)
+{
+	return iter->root;
+}
+
 
 void cmark_consolidate_text_nodes(cmark_node *root)
 {
@@ -123,18 +129,20 @@ void cmark_consolidate_text_nodes(cmark_node *root)
 		    cur->next &&
 		    cur->next->type == CMARK_NODE_TEXT) {
 			cmark_strbuf_clear(&buf);
-			cmark_strbuf_puts(&buf, cmark_node_get_literal(cur));
+			cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
 			tmp = cur->next;
 			while (tmp && tmp->type == CMARK_NODE_TEXT) {
-				cmark_iter_get_node(iter); // advance pointer
-				cmark_strbuf_puts(&buf, cmark_node_get_literal(tmp));
+				cmark_iter_next(iter); // advance pointer
+				cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len);
 				next = tmp->next;
 				cmark_node_free(tmp);
 				tmp = next;
 			}
-			cmark_node_set_literal(cur, (char *)cmark_strbuf_detach(&buf));
+			cmark_chunk_free(&cur->as.literal);
+			cur->as.literal = cmark_chunk_buf_detach(&buf);
 		}
 	}
 
+	cmark_strbuf_free(&buf);
 	cmark_iter_free(iter);
 }

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/latex.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/latex.c b/compiler/modules/CommonMark/src/latex.c
new file mode 100644
index 0000000..782b0c0
--- /dev/null
+++ b/compiler/modules/CommonMark/src/latex.c
@@ -0,0 +1,430 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include "config.h"
+#include "cmark.h"
+#include "node.h"
+#include "buffer.h"
+#include "utf8.h"
+#include "scanners.h"
+#include "render.h"
+
+#define safe_strlen(s) cmark_strbuf_safe_strlen(s)
+#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
+#define LIT(s) renderer->out(renderer, s, false, LITERAL)
+#define CR() renderer->cr(renderer)
+#define BLANKLINE() renderer->blankline(renderer)
+
+static inline void outc(cmark_renderer *renderer,
+                        cmark_escaping escape,
+                        int32_t c,
+                        unsigned char nextc)
+{
+	if (escape == LITERAL) {
+		cmark_render_code_point(renderer, c);
+		return;
+	}
+
+	switch(c) {
+	case 123: // '{'
+	case 125: // '}'
+	case 35: // '#'
+	case 37: // '%'
+	case 38: // '&'
+		cmark_render_ascii(renderer, "\\");
+		cmark_render_code_point(renderer, c);
+		break;
+	case 36: // '$'
+	case 95: // '_'
+		if (escape == NORMAL) {
+			cmark_render_ascii(renderer, "\\");
+		}
+		cmark_render_code_point(renderer, c);
+		break;
+	case 45 : // '-'
+		if (nextc == 45) { // prevent ligature
+			cmark_render_ascii(renderer, "\\-");
+		} else {
+			cmark_render_ascii(renderer, "-");
+		}
+		break;
+	case 126: // '~'
+		if (escape == NORMAL) {
+			cmark_render_ascii(renderer, "\\textasciitilde{}");
+		} else {
+			cmark_render_code_point(renderer, c);
+		}
+		break;
+	case 94: // '^'
+		cmark_render_ascii(renderer, "\\^{}");
+		break;
+	case 92: // '\\'
+		if (escape == URL) {
+			// / acts as path sep even on windows:
+			cmark_render_ascii(renderer, "/");
+		} else {
+			cmark_render_ascii(renderer, "\\textbackslash{}");
+		}
+		break;
+	case 124: // '|'
+		cmark_render_ascii(renderer, "\\textbar{}");
+		break;
+	case 60: // '<'
+		cmark_render_ascii(renderer, "\\textless{}");
+		break;
+	case 62: // '>'
+		cmark_render_ascii(renderer, "\\textgreater{}");
+		break;
+	case 91: // '['
+	case 93: // ']'
+		cmark_render_ascii(renderer, "{");
+		cmark_render_code_point(renderer, c);
+		cmark_render_ascii(renderer, "}");
+		break;
+	case 34: // '"'
+		cmark_render_ascii(renderer, "\\textquotedbl{}");
+		// requires \usepackage[T1]{fontenc}
+		break;
+	case 39: // '\''
+		cmark_render_ascii(renderer, "\\textquotesingle{}");
+		// requires \usepackage{textcomp}
+		break;
+	case 160: // nbsp
+		cmark_render_ascii(renderer, "~");
+		break;
+	case 8230: // hellip
+		cmark_render_ascii(renderer, "\\ldots{}");
+		break;
+	case 8216: // lsquo
+		if (escape == NORMAL) {
+			cmark_render_ascii(renderer, "`");
+		} else {
+			cmark_render_code_point(renderer, c);
+		}
+		break;
+	case 8217: // rsquo
+		if (escape == NORMAL) {
+			cmark_render_ascii(renderer, "\'");
+		} else {
+			cmark_render_code_point(renderer, c);
+		}
+		break;
+	case 8220: // ldquo
+		if (escape == NORMAL) {
+			cmark_render_ascii(renderer, "``");
+		} else {
+			cmark_render_code_point(renderer, c);
+		}
+		break;
+	case 8221: // rdquo
+		if (escape == NORMAL) {
+			cmark_render_ascii(renderer, "''");
+		} else {
+			cmark_render_code_point(renderer, c);
+		}
+		break;
+	case 8212: // emdash
+		if (escape == NORMAL) {
+			cmark_render_ascii(renderer, "---");
+		} else {
+			cmark_render_code_point(renderer, c);
+		}
+		break;
+	case 8211: // endash
+		if (escape == NORMAL) {
+			cmark_render_ascii(renderer, "--");
+		} else {
+			cmark_render_code_point(renderer, c);
+		}
+		break;
+	default:
+		cmark_render_code_point(renderer, c);
+	}
+}
+
+typedef enum  {
+	NO_LINK,
+	URL_AUTOLINK,
+	EMAIL_AUTOLINK,
+	NORMAL_LINK
+} link_type;
+
+static link_type
+get_link_type(cmark_node *node)
+{
+	size_t title_len, url_len;
+	cmark_node *link_text;
+	char *realurl;
+	int realurllen;
+	bool isemail = false;
+
+	if (node->type != CMARK_NODE_LINK) {
+		return NO_LINK;
+	}
+
+	const char* url = cmark_node_get_url(node);
+	cmark_chunk url_chunk = cmark_chunk_literal(url);
+
+	url_len = safe_strlen(url);
+	if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) {
+		return NO_LINK;
+	}
+
+	const char* title = cmark_node_get_title(node);
+	title_len = safe_strlen(title);
+	// if it has a title, we can't treat it as an autolink:
+	if (title_len > 0) {
+		return NORMAL_LINK;
+	}
+
+	link_text = node->first_child;
+	cmark_consolidate_text_nodes(link_text);
+	realurl = (char*)url;
+	realurllen = url_len;
+	if (strncmp(realurl, "mailto:", 7) == 0) {
+		realurl += 7;
+		realurllen -= 7;
+		isemail = true;
+	}
+	if (realurllen == link_text->as.literal.len &&
+	    strncmp(realurl,
+	            (char*)link_text->as.literal.data,
+	            link_text->as.literal.len) == 0) {
+		if (isemail) {
+			return EMAIL_AUTOLINK;
+		} else {
+			return URL_AUTOLINK;
+		}
+	} else {
+		return NORMAL_LINK;
+	}
+}
+
+static int
+S_get_enumlevel(cmark_node *node)
+{
+	int enumlevel = 0;
+	cmark_node *tmp = node;
+	while (tmp) {
+		if (tmp->type == CMARK_NODE_LIST &&
+		    cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) {
+			enumlevel++;
+		}
+		tmp = tmp->parent;
+	}
+	return enumlevel;
+}
+
+static int
+S_render_node(cmark_renderer *renderer,
+              cmark_node *node,
+              cmark_event_type ev_type,
+              int options)
+{
+	int list_number;
+	char list_number_string[20];
+	bool entering = (ev_type == CMARK_EVENT_ENTER);
+	cmark_list_type list_type;
+	const char* roman_numerals[] = { "", "i", "ii", "iii", "iv", "v",
+	                                 "vi", "vii", "viii", "ix", "x"
+	                               };
+
+	// avoid warning about unused parameter:
+	(void)(options);
+
+	switch (node->type) {
+	case CMARK_NODE_DOCUMENT:
+		break;
+
+	case CMARK_NODE_BLOCK_QUOTE:
+		if (entering) {
+			LIT("\\begin{quote}");
+			CR();
+		} else {
+			LIT("\\end{quote}");
+			BLANKLINE();
+		}
+		break;
+
+	case CMARK_NODE_LIST:
+		list_type = cmark_node_get_list_type(node);
+		if (entering) {
+			LIT("\\begin{");
+			LIT(list_type == CMARK_ORDERED_LIST ?
+			    "enumerate" : "itemize");
+			LIT("}");
+			CR();
+			list_number = cmark_node_get_list_start(node);
+			if (list_number > 1) {
+				sprintf(list_number_string,
+				        "%d", list_number);
+				LIT("\\setcounter{enum");
+				LIT((char *)roman_numerals[S_get_enumlevel(node)]);
+				LIT("}{");
+				OUT(list_number_string, false, NORMAL);
+				LIT("}");
+				CR();
+			}
+		} else {
+			LIT("\\end{");
+			LIT(list_type == CMARK_ORDERED_LIST ?
+			    "enumerate" : "itemize");
+			LIT("}");
+			BLANKLINE();
+		}
+		break;
+
+	case CMARK_NODE_ITEM:
+		if (entering) {
+			LIT("\\item ");
+		} else {
+			CR();
+		}
+		break;
+
+	case CMARK_NODE_HEADER:
+		if (entering) {
+			switch (cmark_node_get_header_level(node)) {
+			case 1:
+				LIT("\\section");
+				break;
+			case 2:
+				LIT("\\subsection");
+				break;
+			case 3:
+				LIT("\\subsubsection");
+				break;
+			case 4:
+				LIT("\\paragraph");
+				break;
+			case 5:
+				LIT("\\subparagraph");
+				break;
+			}
+			LIT("{");
+		} else {
+			LIT("}");
+			BLANKLINE();
+		}
+		break;
+
+	case CMARK_NODE_CODE_BLOCK:
+		CR();
+		LIT("\\begin{verbatim}");
+		CR();
+		OUT(cmark_node_get_literal(node), false, LITERAL);
+		CR();
+		LIT("\\end{verbatim}");
+		BLANKLINE();
+		break;
+
+	case CMARK_NODE_HTML:
+		break;
+
+	case CMARK_NODE_HRULE:
+		BLANKLINE();
+		LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}");
+		BLANKLINE();
+		break;
+
+	case CMARK_NODE_PARAGRAPH:
+		if (!entering) {
+			BLANKLINE();
+		}
+		break;
+
+	case CMARK_NODE_TEXT:
+		OUT(cmark_node_get_literal(node), true, NORMAL);
+		break;
+
+	case CMARK_NODE_LINEBREAK:
+		LIT("\\\\");
+		CR();
+		break;
+
+	case CMARK_NODE_SOFTBREAK:
+		if (renderer->width == 0) {
+			CR();
+		} else {
+			OUT(" ", true, NORMAL);
+		}
+		break;
+
+	case CMARK_NODE_CODE:
+		LIT("\\texttt{");
+		OUT(cmark_node_get_literal(node), false, NORMAL);
+		LIT("}");
+		break;
+
+	case CMARK_NODE_INLINE_HTML:
+		break;
+
+	case CMARK_NODE_STRONG:
+		if (entering) {
+			LIT("\\textbf{");
+		} else {
+			LIT("}");
+		}
+		break;
+
+	case CMARK_NODE_EMPH:
+		if (entering) {
+			LIT("\\emph{");
+		} else {
+			LIT("}");
+		}
+		break;
+
+	case CMARK_NODE_LINK:
+		if (entering) {
+			const char* url = cmark_node_get_url(node);
+			// requires \usepackage{hyperref}
+			switch(get_link_type(node)) {
+			case URL_AUTOLINK:
+				LIT("\\url{");
+				OUT(url, false, URL);
+				break;
+			case EMAIL_AUTOLINK:
+				LIT("\\href{");
+				OUT(url, false, URL);
+				LIT("}\\nolinkurl{");
+				break;
+			case NORMAL_LINK:
+				LIT("\\href{");
+				OUT(url, false, URL);
+				LIT("}{");
+				break;
+			case NO_LINK:
+				LIT("{");  // error?
+			}
+		} else {
+			LIT("}");
+		}
+
+		break;
+
+	case CMARK_NODE_IMAGE:
+		if (entering) {
+			LIT("\\protect\\includegraphics{");
+			// requires \include{graphicx}
+			OUT(cmark_node_get_url(node), false, URL);
+			LIT("}");
+			return 0;
+		}
+		break;
+
+	default:
+		assert(false);
+		break;
+	}
+
+	return 1;
+}
+
+char *cmark_render_latex(cmark_node *root, int options, int width)
+{
+	return cmark_render(root, options, width, outc, S_render_node);
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/libcmark.pc.in
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/libcmark.pc.in b/compiler/modules/CommonMark/src/libcmark.pc.in
deleted file mode 100644
index 9c3a9a9..0000000
--- a/compiler/modules/CommonMark/src/libcmark.pc.in
+++ /dev/null
@@ -1,10 +0,0 @@
-prefix=@CMAKE_INSTALL_PREFIX@
-exec_prefix=@CMAKE_INSTALL_PREFIX@
-libdir=@CMAKE_INSTALL_PREFIX@/lib
-includedir=@CMAKE_INSTALL_PREFIX@/include
-
-Name: libcmark
-Description: CommonMark parsing, rendering, and manipulation
-Version: @PROJECT_VERSION@
-Libs: -L${libdir} -lcmark
-Cflags: -I${includedir}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/man.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/man.c b/compiler/modules/CommonMark/src/man.c
index 2c8a3a5..6ff33f5 100644
--- a/compiler/modules/CommonMark/src/man.c
+++ b/compiler/modules/CommonMark/src/man.c
@@ -7,72 +7,84 @@
 #include "cmark.h"
 #include "node.h"
 #include "buffer.h"
+#include "utf8.h"
+#include "render.h"
 
-// Functions to convert cmark_nodes to groff man strings.
+#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
+#define LIT(s) renderer->out(renderer, s, false, LITERAL)
+#define CR() renderer->cr(renderer)
+#define BLANKLINE() renderer->blankline(renderer)
 
-static void escape_man(cmark_strbuf *dest, const unsigned char *source, int length)
+// Functions to convert cmark_nodes to groff man strings.
+static
+void S_outc(cmark_renderer *renderer,
+            cmark_escaping escape,
+            int32_t c,
+            unsigned char nextc)
 {
-	int i;
-	unsigned char c;
-
-	for (i = 0; i < length; i++) {
-		c = source[i];
-		if (c == '.' && i == 0) {
-			cmark_strbuf_puts(dest, "\\&.");
-		} else if (c == '\'' && i == 0) {
-			cmark_strbuf_puts(dest, "\\&'");
-		} else if (c == '-') {
-			cmark_strbuf_puts(dest, "\\-");
-		} else if (c == '\\') {
-			cmark_strbuf_puts(dest, "\\e");
+	(void)(nextc);
+
+	if (escape == LITERAL) {
+		cmark_render_code_point(renderer, c);
+		return;
+	}
+
+	switch(c) {
+	case 46:
+		if (renderer->begin_line) {
+			cmark_render_ascii(renderer, "\\&.");
+		} else {
+			cmark_render_code_point(renderer, c);
+		}
+		break;
+	case 39:
+		if (renderer->begin_line) {
+			cmark_render_ascii(renderer, "\\&'");
 		} else {
-			cmark_strbuf_putc(dest, source[i]);
+			cmark_render_code_point(renderer, c);
 		}
+		break;
+	case 45:
+		cmark_render_ascii(renderer, "\\-");
+		break;
+	case 92:
+		cmark_render_ascii(renderer, "\\e");
+		break;
+	case 8216: // left single quote
+		cmark_render_ascii(renderer, "\\[oq]");
+		break;
+	case 8217: // right single quote
+		cmark_render_ascii(renderer, "\\[cq]");
+		break;
+	case 8220: // left double quote
+		cmark_render_ascii(renderer, "\\[lq]");
+		break;
+	case 8221: // right double quote
+		cmark_render_ascii(renderer, "\\[rq]");
+		break;
+	case 8212: // em dash
+		cmark_render_ascii(renderer, "\\[em]");
+		break;
+	case 8211: // en dash
+		cmark_render_ascii(renderer, "\\[en]");
+		break;
+	default:
+		cmark_render_code_point(renderer, c);
 	}
 }
 
-static inline void cr(cmark_strbuf *man)
-{
-	if (man->size && man->ptr[man->size - 1] != '\n')
-		cmark_strbuf_putc(man, '\n');
-}
-
-struct render_state {
-	cmark_strbuf* man;
-	cmark_node *plain;
-};
-
 static int
-S_render_node(cmark_node *node, cmark_event_type ev_type,
-              struct render_state *state)
+S_render_node(cmark_renderer *renderer,
+              cmark_node *node,
+              cmark_event_type ev_type,
+              int options)
 {
 	cmark_node *tmp;
-	cmark_strbuf *man = state->man;
 	int list_number;
 	bool entering = (ev_type == CMARK_EVENT_ENTER);
 
-	if (state->plain == node) { // back at original node
-		state->plain = NULL;
-	}
-
-	if (state->plain != NULL) {
-		switch(node->type) {
-		case CMARK_NODE_TEXT:
-		case CMARK_NODE_CODE:
-			escape_man(man, node->as.literal.data,
-			           node->as.literal.len);
-			break;
-
-		case CMARK_NODE_LINEBREAK:
-		case CMARK_NODE_SOFTBREAK:
-			cmark_strbuf_putc(man, ' ');
-			break;
-
-		default:
-			break;
-		}
-		return 1;
-	}
+	// avoid unused parameter error:
+	(void)(options);
 
 	switch (node->type) {
 	case CMARK_NODE_DOCUMENT:
@@ -80,13 +92,13 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 
 	case CMARK_NODE_BLOCK_QUOTE:
 		if (entering) {
-			cr(man);
-			cmark_strbuf_puts(man, ".RS");
-			cr(man);
+			CR();
+			LIT(".RS");
+			CR();
 		} else {
-			cr(man);
-			cmark_strbuf_puts(man, ".RE");
-			cr(man);
+			CR();
+			LIT(".RE");
+			CR();
 		}
 		break;
 
@@ -95,11 +107,11 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 
 	case CMARK_NODE_ITEM:
 		if (entering) {
-			cr(man);
-			cmark_strbuf_puts(man, ".IP ");
+			CR();
+			LIT(".IP ");
 			if (cmark_node_get_list_type(node->parent) ==
 			    CMARK_BULLET_LIST) {
-				cmark_strbuf_puts(man, "\\[bu] 2");
+				LIT("\\[bu] 2");
 			} else {
 				list_number = cmark_node_get_list_start(node->parent);
 				tmp = node;
@@ -107,43 +119,45 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 					tmp = tmp->prev;
 					list_number += 1;
 				}
-				cmark_strbuf_printf(man, "\"%d.\" 4", list_number);
+				char list_number_s[20];
+				sprintf(list_number_s, "\"%d.\" 4", list_number);
+				LIT(list_number_s);
 			}
-			cr(man);
+			CR();
 		} else {
-			cr(man);
+			CR();
 		}
 		break;
 
 	case CMARK_NODE_HEADER:
 		if (entering) {
-			cr(man);
-			cmark_strbuf_puts(man,
-			                  cmark_node_get_header_level(node) == 1 ?
-			                  ".SH" : ".SS");
-			cr(man);
+			CR();
+			LIT(cmark_node_get_header_level(node) == 1 ?
+			    ".SH" : ".SS");
+			CR();
 		} else {
-			cr(man);
+			CR();
 		}
 		break;
 
 	case CMARK_NODE_CODE_BLOCK:
-		cr(man);
-		cmark_strbuf_puts(man, ".IP\n.nf\n\\f[C]\n");
-		escape_man(man, node->as.code.literal.data,
-		           node->as.code.literal.len);
-		cr(man);
-		cmark_strbuf_puts(man, "\\f[]\n.fi");
-		cr(man);
+		CR();
+		LIT(".IP\n.nf\n\\f[C]\n");
+		OUT(cmark_node_get_literal(node),
+		    false,
+		    NORMAL);
+		CR();
+		LIT("\\f[]\n.fi");
+		CR();
 		break;
 
 	case CMARK_NODE_HTML:
 		break;
 
 	case CMARK_NODE_HRULE:
-		cr(man);
-		cmark_strbuf_puts(man, ".PP\n  *  *  *  *  *");
-		cr(man);
+		CR();
+		LIT(".PP\n  *  *  *  *  *");
+		CR();
 		break;
 
 	case CMARK_NODE_PARAGRAPH:
@@ -154,32 +168,36 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 			    node->prev == NULL) {
 				// no blank line or .PP
 			} else {
-				cr(man);
-				cmark_strbuf_puts(man, ".PP\n");
+				CR();
+				LIT(".PP");
+				CR();
 			}
 		} else {
-			cr(man);
+			CR();
 		}
 		break;
 
 	case CMARK_NODE_TEXT:
-		escape_man(man, node->as.literal.data,
-		           node->as.literal.len);
+		OUT(cmark_node_get_literal(node), true, NORMAL);
 		break;
 
 	case CMARK_NODE_LINEBREAK:
-		cmark_strbuf_puts(man, ".PD 0\n.P\n.PD");
-		cr(man);
+		LIT(".PD 0\n.P\n.PD");
+		CR();
 		break;
 
 	case CMARK_NODE_SOFTBREAK:
-		cmark_strbuf_putc(man, '\n');
+		if (renderer->width == 0) {
+			CR();
+		} else {
+			OUT(" ", true, LITERAL);
+		}
 		break;
 
 	case CMARK_NODE_CODE:
-		cmark_strbuf_puts(man, "\\f[C]");
-		escape_man(man, node->as.literal.data, node->as.literal.len);
-		cmark_strbuf_puts(man, "\\f[]");
+		LIT("\\f[C]");
+		OUT(cmark_node_get_literal(node), true, NORMAL);
+		LIT("\\f[]");
 		break;
 
 	case CMARK_NODE_INLINE_HTML:
@@ -187,33 +205,33 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 
 	case CMARK_NODE_STRONG:
 		if (entering) {
-			cmark_strbuf_puts(man, "\\f[B]");
+			LIT("\\f[B]");
 		} else {
-			cmark_strbuf_puts(man, "\\f[]");
+			LIT("\\f[]");
 		}
 		break;
 
 	case CMARK_NODE_EMPH:
 		if (entering) {
-			cmark_strbuf_puts(man, "\\f[I]");
+			LIT("\\f[I]");
 		} else {
-			cmark_strbuf_puts(man, "\\f[]");
+			LIT("\\f[]");
 		}
 		break;
 
 	case CMARK_NODE_LINK:
 		if (!entering) {
-			cmark_strbuf_printf(man, " (%s)",
-			                    cmark_node_get_url(node));
+			LIT(" (");
+			OUT(cmark_node_get_url(node), true, URL);
+			LIT(")");
 		}
 		break;
 
 	case CMARK_NODE_IMAGE:
 		if (entering) {
-			cmark_strbuf_puts(man, "[IMAGE: ");
-			state->plain = node;
+			LIT("[IMAGE: ");
 		} else {
-			cmark_strbuf_puts(man, "]");
+			LIT("]");
 		}
 		break;
 
@@ -222,28 +240,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 		break;
 	}
 
-	// cmark_strbuf_putc(man, 'x');
 	return 1;
 }
 
-char *cmark_render_man(cmark_node *root, long options)
+char *cmark_render_man(cmark_node *root, int options, int width)
 {
-	char *result;
-	cmark_strbuf man = GH_BUF_INIT;
-	struct render_state state = { &man, NULL };
-	cmark_node *cur;
-	cmark_event_type ev_type;
-	cmark_iter *iter = cmark_iter_new(root);
-
-	if (options == 0) options = 0; // avoid warning about unused parameters
-
-	while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
-		cur = cmark_iter_get_node(iter);
-		S_render_node(cur, ev_type, &state);
-	}
-	result = (char *)cmark_strbuf_detach(&man);
-
-	cmark_iter_free(iter);
-	cmark_strbuf_free(&man);
-	return result;
+	return cmark_render(root, options, width, S_outc, S_render_node);
 }

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/node.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/node.c b/compiler/modules/CommonMark/src/node.c
index 3785a27..7b1bb10 100644
--- a/compiler/modules/CommonMark/src/node.c
+++ b/compiler/modules/CommonMark/src/node.c
@@ -7,6 +7,73 @@
 static void
 S_node_unlink(cmark_node *node);
 
+static inline bool
+S_is_block(cmark_node *node)
+{
+	if (node == NULL) {
+		return false;
+	}
+	return node->type >= CMARK_NODE_FIRST_BLOCK
+	       && node->type <= CMARK_NODE_LAST_BLOCK;
+}
+
+static inline bool
+S_is_inline(cmark_node *node)
+{
+	if (node == NULL) {
+		return false;
+	}
+	return node->type >= CMARK_NODE_FIRST_INLINE
+	       && node->type <= CMARK_NODE_LAST_INLINE;
+}
+
+static bool
+S_can_contain(cmark_node *node, cmark_node *child)
+{
+	cmark_node *cur;
+
+	if (node == NULL || child == NULL) {
+		return false;
+	}
+
+	// Verify that child is not an ancestor of node or equal to node.
+	cur = node;
+	do {
+		if (cur == child) {
+			return false;
+		}
+		cur = cur->parent;
+	} while (cur != NULL);
+
+	if (child->type == CMARK_NODE_DOCUMENT) {
+		return false;
+	}
+
+	switch (node->type) {
+	case CMARK_NODE_DOCUMENT:
+	case CMARK_NODE_BLOCK_QUOTE:
+	case CMARK_NODE_ITEM:
+		return S_is_block(child)
+		       && child->type != CMARK_NODE_ITEM;
+
+	case CMARK_NODE_LIST:
+		return child->type == CMARK_NODE_ITEM;
+
+	case CMARK_NODE_PARAGRAPH:
+	case CMARK_NODE_HEADER:
+	case CMARK_NODE_EMPH:
+	case CMARK_NODE_STRONG:
+	case CMARK_NODE_LINK:
+	case CMARK_NODE_IMAGE:
+		return S_is_inline(child);
+
+	default:
+		break;
+	}
+
+	return false;
+}
+
 cmark_node*
 cmark_node_new(cmark_node_type type)
 {
@@ -39,7 +106,9 @@ void S_free_nodes(cmark_node *e)
 {
 	cmark_node *next;
 	while (e != NULL) {
-		cmark_strbuf_free(&e->string_content);
+		if (S_is_block(e)) {
+			cmark_strbuf_free(&e->string_content);
+		}
 		switch (e->type) {
 		case NODE_CODE_BLOCK:
 			cmark_chunk_free(&e->as.code.info);
@@ -53,8 +122,8 @@ void S_free_nodes(cmark_node *e)
 			break;
 		case NODE_LINK:
 		case NODE_IMAGE:
-			free(e->as.link.url);
-			free(e->as.link.title);
+			cmark_chunk_free(&e->as.link.url);
+			cmark_chunk_free(&e->as.link.title);
 			break;
 		default:
 			break;
@@ -189,13 +258,24 @@ cmark_node_last_child(cmark_node *node)
 	}
 }
 
-static char*
-S_strdup(const char *str)
+void*
+cmark_node_get_user_data(cmark_node *node)
 {
-	size_t size = strlen(str) + 1;
-	char *dup = (char *)malloc(size);
-	memcpy(dup, str, size);
-	return dup;
+	if (node == NULL) {
+		return NULL;
+	} else {
+		return node->user_data;
+	}
+}
+
+int
+cmark_node_set_user_data(cmark_node *node, void *user_data)
+{
+	if (node == NULL) {
+		return 0;
+	}
+	node->user_data = user_data;
+	return 1;
 }
 
 const char*
@@ -448,7 +528,7 @@ cmark_node_get_url(cmark_node *node)
 	switch (node->type) {
 	case NODE_LINK:
 	case NODE_IMAGE:
-		return (char *)node->as.link.url;
+		return cmark_chunk_to_cstr(&node->as.link.url);
 	default:
 		break;
 	}
@@ -466,8 +546,7 @@ cmark_node_set_url(cmark_node *node, const char *url)
 	switch (node->type) {
 	case NODE_LINK:
 	case NODE_IMAGE:
-		free(node->as.link.url);
-		node->as.link.url = (unsigned char *)S_strdup(url);
+		cmark_chunk_set_cstr(&node->as.link.url, url);
 		return 1;
 	default:
 		break;
@@ -486,7 +565,7 @@ cmark_node_get_title(cmark_node *node)
 	switch (node->type) {
 	case NODE_LINK:
 	case NODE_IMAGE:
-		return (char *)node->as.link.title;
+		return cmark_chunk_to_cstr(&node->as.link.title);
 	default:
 		break;
 	}
@@ -504,8 +583,7 @@ cmark_node_set_title(cmark_node *node, const char *title)
 	switch (node->type) {
 	case NODE_LINK:
 	case NODE_IMAGE:
-		free(node->as.link.title);
-		node->as.link.title = (unsigned char *)S_strdup(title);
+		cmark_chunk_set_cstr(&node->as.link.title, title);
 		return 1;
 	default:
 		break;
@@ -550,73 +628,6 @@ cmark_node_get_end_column(cmark_node *node)
 	return node->end_column;
 }
 
-static inline bool
-S_is_block(cmark_node *node)
-{
-	if (node == NULL) {
-		return false;
-	}
-	return node->type >= CMARK_NODE_FIRST_BLOCK
-	       && node->type <= CMARK_NODE_LAST_BLOCK;
-}
-
-static inline bool
-S_is_inline(cmark_node *node)
-{
-	if (node == NULL) {
-		return false;
-	}
-	return node->type >= CMARK_NODE_FIRST_INLINE
-	       && node->type <= CMARK_NODE_LAST_INLINE;
-}
-
-static bool
-S_can_contain(cmark_node *node, cmark_node *child)
-{
-	cmark_node *cur;
-
-	if (node == NULL || child == NULL) {
-		return false;
-	}
-
-	// Verify that child is not an ancestor of node or equal to node.
-	cur = node;
-	do {
-		if (cur == child) {
-			return false;
-		}
-		cur = cur->parent;
-	} while (cur != NULL);
-
-	if (child->type == CMARK_NODE_DOCUMENT) {
-		return false;
-	}
-
-	switch (node->type) {
-	case CMARK_NODE_DOCUMENT:
-	case CMARK_NODE_BLOCK_QUOTE:
-	case CMARK_NODE_ITEM:
-		return S_is_block(child)
-		       && child->type != CMARK_NODE_ITEM;
-
-	case CMARK_NODE_LIST:
-		return child->type == CMARK_NODE_ITEM;
-
-	case CMARK_NODE_PARAGRAPH:
-	case CMARK_NODE_HEADER:
-	case CMARK_NODE_EMPH:
-	case CMARK_NODE_STRONG:
-	case CMARK_NODE_LINK:
-	case CMARK_NODE_IMAGE:
-		return S_is_inline(child);
-
-	default:
-		break;
-	}
-
-	return false;
-}
-
 // Unlink a node without adjusting its next, prev, and parent pointers.
 static void
 S_node_unlink(cmark_node *node)

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/node.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/node.h b/compiler/modules/CommonMark/src/node.h
index c0c43d3..b579408 100644
--- a/compiler/modules/CommonMark/src/node.h
+++ b/compiler/modules/CommonMark/src/node.h
@@ -6,6 +6,7 @@ extern "C" {
 #endif
 
 #include <stdio.h>
+#include <stdint.h>
 
 #include "cmark.h"
 #include "buffer.h"
@@ -22,12 +23,13 @@ typedef struct {
 } cmark_list;
 
 typedef struct {
-	bool              fenced;
-	int               fence_length;
-	int               fence_offset;
-	unsigned char     fence_char;
 	cmark_chunk       info;
 	cmark_chunk       literal;
+	int               fence_length;
+	/* fence_offset must be 0-3, so we can use int8_t */
+	int8_t            fence_offset;
+	unsigned char     fence_char;
+	bool              fenced;
 } cmark_code;
 
 typedef struct {
@@ -36,23 +38,26 @@ typedef struct {
 } cmark_header;
 
 typedef struct {
-	unsigned char *url;
-	unsigned char *title;
+	cmark_chunk url;
+	cmark_chunk title;
 } cmark_link;
 
 struct cmark_node {
-	cmark_node_type type;
-
 	struct cmark_node *next;
 	struct cmark_node *prev;
 	struct cmark_node *parent;
 	struct cmark_node *first_child;
 	struct cmark_node *last_child;
 
+	void *user_data;
+
 	int start_line;
 	int start_column;
 	int end_line;
 	int end_column;
+
+	cmark_node_type type;
+
 	bool open;
 	bool last_line_blank;
 
@@ -64,6 +69,7 @@ struct cmark_node {
 		cmark_code        code;
 		cmark_header      header;
 		cmark_link        link;
+		int               html_block_type;
 	} as;
 };
 

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/parser.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/parser.h b/compiler/modules/CommonMark/src/parser.h
index 3c8def9..01a7aeb 100644
--- a/compiler/modules/CommonMark/src/parser.h
+++ b/compiler/modules/CommonMark/src/parser.h
@@ -16,9 +16,16 @@ struct cmark_parser {
 	struct cmark_node* root;
 	struct cmark_node* current;
 	int line_number;
+	bufsize_t offset;
+	bufsize_t column;
+	bufsize_t first_nonspace;
+	bufsize_t first_nonspace_column;
+	int indent;
+	bool blank;
 	cmark_strbuf *curline;
-	int last_line_length;
+	bufsize_t last_line_length;
 	cmark_strbuf *linebuf;
+	int options;
 };
 
 #ifdef __cplusplus

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/references.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/references.c b/compiler/modules/CommonMark/src/references.c
index 37bf4cb..1d3d56d 100644
--- a/compiler/modules/CommonMark/src/references.c
+++ b/compiler/modules/CommonMark/src/references.c
@@ -20,8 +20,8 @@ static void reference_free(cmark_reference *ref)
 {
 	if(ref != NULL) {
 		free(ref->label);
-		free(ref->url);
-		free(ref->title);
+		cmark_chunk_free(&ref->url);
+		cmark_chunk_free(&ref->title);
 		free(ref);
 	}
 }

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/references.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/references.h b/compiler/modules/CommonMark/src/references.h
index 69325bb..a360cd5 100644
--- a/compiler/modules/CommonMark/src/references.h
+++ b/compiler/modules/CommonMark/src/references.h
@@ -12,8 +12,8 @@ extern "C" {
 struct cmark_reference {
 	struct cmark_reference *next;
 	unsigned char *label;
-	unsigned char *url;
-	unsigned char *title;
+	cmark_chunk url;
+	cmark_chunk title;
 	unsigned int hash;
 };
 

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/render.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/render.c b/compiler/modules/CommonMark/src/render.c
new file mode 100644
index 0000000..2f1faac
--- /dev/null
+++ b/compiler/modules/CommonMark/src/render.c
@@ -0,0 +1,186 @@
+#include <stdlib.h>
+#include "buffer.h"
+#include "chunk.h"
+#include "cmark.h"
+#include "utf8.h"
+#include "render.h"
+
+static inline
+void S_cr(cmark_renderer *renderer)
+{
+	if (renderer->need_cr < 1) {
+		renderer->need_cr = 1;
+	}
+}
+
+static inline
+void S_blankline(cmark_renderer *renderer)
+{
+	if (renderer->need_cr < 2) {
+		renderer->need_cr = 2;
+	}
+}
+
+static
+void S_out(cmark_renderer *renderer,
+           const char *source,
+           bool wrap,
+           cmark_escaping escape)
+{
+	int length = cmark_strbuf_safe_strlen(source);
+	unsigned char nextc;
+	int32_t c;
+	int i = 0;
+	int len;
+	cmark_chunk remainder = cmark_chunk_literal("");
+	int k = renderer->buffer->size - 1;
+
+	wrap = wrap && !renderer->no_wrap;
+
+	if (renderer->in_tight_list_item && renderer->need_cr > 1) {
+		renderer->need_cr = 1;
+	}
+	while (renderer->need_cr) {
+		if (k < 0 || renderer->buffer->ptr[k] == '\n') {
+			k -= 1;
+		} else {
+			cmark_strbuf_putc(renderer->buffer, '\n');
+			if (renderer->need_cr > 1) {
+				cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
+				                 renderer->prefix->size);
+			}
+		}
+		renderer->column = 0;
+		renderer->begin_line = true;
+		renderer->need_cr -= 1;
+	}
+
+	while (i < length) {
+		if (renderer->begin_line) {
+			cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
+			                 renderer->prefix->size);
+			// note: this assumes prefix is ascii:
+			renderer->column = renderer->prefix->size;
+		}
+
+		len = utf8proc_iterate((const uint8_t *)source + i, length - i, &c);
+		if (len == -1) { // error condition
+			return;  // return without rendering rest of string
+		}
+		nextc = source[i + len];
+		if (c == 32 && wrap) {
+			if (!renderer->begin_line) {
+				cmark_strbuf_putc(renderer->buffer, ' ');
+				renderer->column += 1;
+				renderer->begin_line = false;
+				renderer->last_breakable = renderer->buffer->size -
+				                           1;
+				// skip following spaces
+				while (source[i + 1] == ' ') {
+					i++;
+				}
+			}
+
+		} else if (c == 10) {
+			cmark_strbuf_putc(renderer->buffer, '\n');
+			renderer->column = 0;
+			renderer->begin_line = true;
+			renderer->last_breakable = 0;
+		} else if (escape == LITERAL) {
+			cmark_render_code_point(renderer, c);
+			renderer->begin_line = false;
+		} else {
+			(renderer->outc)(renderer, escape, c, nextc);
+			renderer->begin_line = false;
+		}
+
+		// If adding the character went beyond width, look for an
+		// earlier place where the line could be broken:
+		if (renderer->width > 0 &&
+		    renderer->column > renderer->width &&
+		    !renderer->begin_line &&
+		    renderer->last_breakable > 0) {
+
+			// copy from last_breakable to remainder
+			cmark_chunk_set_cstr(&remainder, (char *) renderer->buffer->ptr + renderer->last_breakable + 1);
+			// truncate at last_breakable
+			cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable);
+			// add newline, prefix, and remainder
+			cmark_strbuf_putc(renderer->buffer, '\n');
+			cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
+			                 renderer->prefix->size);
+			cmark_strbuf_put(renderer->buffer, remainder.data, remainder.len);
+			renderer->column = renderer->prefix->size + remainder.len;
+			cmark_chunk_free(&remainder);
+			renderer->last_breakable = 0;
+			renderer->begin_line = false;
+		}
+
+		i += len;
+	}
+}
+
+// Assumes no newlines, assumes ascii content:
+void
+cmark_render_ascii(cmark_renderer* renderer, const char* s)
+{
+	int origsize = renderer->buffer->size;
+	cmark_strbuf_puts(renderer->buffer, s);
+	renderer->column += renderer->buffer->size - origsize;
+}
+
+void
+cmark_render_code_point(cmark_renderer *renderer, uint32_t c)
+{
+	utf8proc_encode_char(c, renderer->buffer);
+	renderer->column += 1;
+}
+
+char*
+cmark_render(cmark_node *root,
+             int options,
+             int width,
+             void (*outc)(cmark_renderer*,
+                          cmark_escaping,
+                          int32_t,
+                          unsigned char),
+             int (*render_node)(cmark_renderer *renderer,
+                                cmark_node *node,
+                                cmark_event_type ev_type,
+                                int options))
+{
+	cmark_strbuf pref = GH_BUF_INIT;
+	cmark_strbuf buf = GH_BUF_INIT;
+	cmark_node *cur;
+	cmark_event_type ev_type;
+	char *result;
+	cmark_iter *iter = cmark_iter_new(root);
+
+	cmark_renderer renderer = { &buf, &pref, 0, width,
+	                            0, 0, true, false, false,
+	                            outc, S_cr, S_blankline, S_out
+	                          };
+
+	while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+		cur = cmark_iter_get_node(iter);
+		if (!render_node(&renderer, cur, ev_type, options)) {
+			// a false value causes us to skip processing
+			// the node's contents.  this is used for
+			// autolinks.
+			cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT);
+		}
+	}
+
+	// ensure final newline
+	if (renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') {
+		cmark_strbuf_putc(renderer.buffer, '\n');
+	}
+
+	result = (char *)cmark_strbuf_detach(renderer.buffer);
+
+	cmark_iter_free(iter);
+	cmark_strbuf_free(renderer.prefix);
+	cmark_strbuf_free(renderer.buffer);
+
+	return result;
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/render.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/render.h b/compiler/modules/CommonMark/src/render.h
new file mode 100644
index 0000000..ca541bc
--- /dev/null
+++ b/compiler/modules/CommonMark/src/render.h
@@ -0,0 +1,66 @@
+#ifndef CMARK_RENDER_H
+#define CMARK_RENDER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+#include "buffer.h"
+#include "chunk.h"
+
+typedef enum  {
+	LITERAL,
+	NORMAL,
+	TITLE,
+	URL
+} cmark_escaping;
+
+struct cmark_renderer {
+	cmark_strbuf* buffer;
+	cmark_strbuf* prefix;
+	int column;
+	int width;
+	int need_cr;
+	bufsize_t last_breakable;
+	bool begin_line;
+	bool no_wrap;
+	bool in_tight_list_item;
+	void (*outc)(struct cmark_renderer*,
+		     cmark_escaping,
+		     int32_t,
+		     unsigned char);
+	void (*cr)(struct cmark_renderer*);
+	void (*blankline)(struct cmark_renderer*);
+	void (*out)(struct cmark_renderer*,
+		    const char *,
+		    bool,
+		    cmark_escaping);
+};
+
+typedef struct cmark_renderer cmark_renderer;
+
+void
+cmark_render_ascii(cmark_renderer *renderer, const char* s);
+
+void
+cmark_render_code_point(cmark_renderer *renderer, uint32_t c);
+
+char*
+cmark_render(cmark_node *root,
+	     int options,
+	     int width,
+	     void (*outc)(cmark_renderer*,
+			  cmark_escaping,
+			  int32_t,
+			  unsigned char),
+	     int (*render_node)(cmark_renderer *renderer,
+				cmark_node *node,
+				cmark_event_type ev_type,
+				int options));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif