You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2015/08/06 18:19:43 UTC
[06/20] lucy-clownfish git commit: Upgrade libcmark to 0.21.0
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/inlines.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/inlines.c b/compiler/modules/CommonMark/src/inlines.c
index 2487f63..7ea308d 100644
--- a/compiler/modules/CommonMark/src/inlines.c
+++ b/compiler/modules/CommonMark/src/inlines.c
@@ -14,6 +14,15 @@
#include "inlines.h"
+static const char *EMDASH = "\xE2\x80\x94";
+static const char *ENDASH = "\xE2\x80\x93";
+static const char *ELLIPSES = "\xE2\x80\xA6";
+static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C";
+static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D";
+static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
+static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
+
+
// Macros for creating various kinds of simple.
#define make_str(s) make_literal(CMARK_NODE_TEXT, s)
#define make_code(s) make_literal(CMARK_NODE_CODE, s)
@@ -27,8 +36,8 @@ typedef struct delimiter {
struct delimiter *previous;
struct delimiter *next;
cmark_node *inl_text;
+ bufsize_t position;
unsigned char delim_char;
- int position;
bool can_open;
bool can_close;
bool active;
@@ -36,45 +45,53 @@ typedef struct delimiter {
typedef struct {
cmark_chunk input;
- int pos;
+ bufsize_t pos;
cmark_reference_map *refmap;
delimiter *last_delim;
} subject;
+static inline bool
+S_is_line_end_char(char c)
+{
+ return (c == '\n' || c == '\r');
+}
+
static delimiter*
S_insert_emph(subject *subj, delimiter *opener, delimiter *closer);
-static int parse_inline(subject* subj, cmark_node * parent);
+static int parse_inline(subject* subj, cmark_node * parent, int options);
static void subject_from_buf(subject *e, cmark_strbuf *buffer,
cmark_reference_map *refmap);
-static int subject_find_special_char(subject *subj);
+static bufsize_t subject_find_special_char(subject *subj, int options);
-static unsigned char *cmark_clean_autolink(cmark_chunk *url, int is_email)
+static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email)
{
cmark_strbuf buf = GH_BUF_INIT;
cmark_chunk_trim(url);
- if (url->len == 0)
- return NULL;
+ if (url->len == 0) {
+ cmark_chunk result = CMARK_CHUNK_EMPTY;
+ return result;
+ }
if (is_email)
cmark_strbuf_puts(&buf, "mailto:");
houdini_unescape_html_f(&buf, url->data, url->len);
- return cmark_strbuf_detach(&buf);
+ return cmark_chunk_buf_detach(&buf);
}
-static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsigned char *title)
+static inline cmark_node *make_link(cmark_node *label, cmark_chunk *url, cmark_chunk *title)
{
cmark_node* e = (cmark_node *)calloc(1, sizeof(*e));
if(e != NULL) {
e->type = CMARK_NODE_LINK;
e->first_child = label;
e->last_child = label;
- e->as.link.url = url;
- e->as.link.title = title;
+ e->as.link.url = *url;
+ e->as.link.title = *title;
e->next = NULL;
label->parent = e;
}
@@ -83,7 +100,9 @@ static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsig
static inline cmark_node* make_autolink(cmark_node* label, cmark_chunk url, int is_email)
{
- return make_link(label, cmark_clean_autolink(&url, is_email), NULL);
+ cmark_chunk clean_url = cmark_clean_autolink(&url, is_email);
+ cmark_chunk title = CMARK_CHUNK_EMPTY;
+ return make_link(label, &clean_url, &title);
}
// Create an inline with a literal string value.
@@ -125,19 +144,20 @@ static inline cmark_node* make_simple(cmark_node_type t)
return e;
}
-static unsigned char *bufdup(const unsigned char *buf)
+// Duplicate a chunk by creating a copy of the buffer not by reusing the
+// buffer like cmark_chunk_dup does.
+static cmark_chunk chunk_clone(cmark_chunk *src)
{
- unsigned char *new_buf = NULL;
+ cmark_chunk c;
+ bufsize_t len = src->len;
- if (buf) {
- int len = strlen((char *)buf);
- new_buf = (unsigned char *)calloc(len + 1, sizeof(*new_buf));
- if(new_buf != NULL) {
- memcpy(new_buf, buf, len + 1);
- }
- }
+ c.len = len;
+ c.data = (unsigned char *)malloc(len + 1);
+ c.alloc = 1;
+ memcpy(c.data, src->data, len);
+ c.data[len] = '\0';
- return new_buf;
+ return c;
}
static void subject_from_buf(subject *e, cmark_strbuf *buffer,
@@ -149,8 +169,6 @@ static void subject_from_buf(subject *e, cmark_strbuf *buffer,
e->pos = 0;
e->refmap = refmap;
e->last_delim = NULL;
-
- cmark_chunk_rtrim(&e->input);
}
static inline int isbacktick(int c)
@@ -160,10 +178,13 @@ static inline int isbacktick(int c)
static inline unsigned char peek_char(subject *subj)
{
+ // NULL bytes should have been stripped out by now. If they're
+ // present, it's a programming error:
+ assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 0));
return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
}
-static inline unsigned char peek_at(subject *subj, int pos)
+static inline unsigned char peek_at(subject *subj, bufsize_t pos)
{
return subj->input.data[pos];
}
@@ -177,12 +198,38 @@ static inline int is_eof(subject* subj)
// Advance the subject. Doesn't check for eof.
#define advance(subj) (subj)->pos += 1
+static inline bool
+skip_spaces(subject *subj)
+{
+ bool skipped = false;
+ while (peek_char(subj) == ' ' || peek_char(subj) == '\t') {
+ advance(subj);
+ skipped = true;
+ }
+ return skipped;
+}
+
+static inline bool
+skip_line_end(subject *subj)
+{
+ bool seen_line_end_char = false;
+ if (peek_char(subj) == '\r') {
+ advance(subj);
+ seen_line_end_char = true;
+ }
+ if (peek_char(subj) == '\n') {
+ advance(subj);
+ seen_line_end_char = true;
+ }
+ return seen_line_end_char || is_eof(subj);
+}
+
// Take characters while a predicate holds, and return a string.
static inline cmark_chunk take_while(subject* subj, int (*f)(int))
{
unsigned char c;
- int startpos = subj->pos;
- int len = 0;
+ bufsize_t startpos = subj->pos;
+ bufsize_t len = 0;
while ((c = peek_char(subj)) && (*f)(c)) {
advance(subj);
@@ -197,7 +244,7 @@ static inline cmark_chunk take_while(subject* subj, int (*f)(int))
// parsed). Return 0 if you don't find matching closing
// backticks, otherwise return the position in the subject
// after the closing backticks.
-static int scan_to_closing_backticks(subject* subj, int openticklength)
+static bufsize_t scan_to_closing_backticks(subject* subj, bufsize_t openticklength)
{
// read non backticks
unsigned char c;
@@ -207,7 +254,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
if (is_eof(subj)) {
return 0; // did not find closing ticks, return 0
}
- int numticks = 0;
+ bufsize_t numticks = 0;
while (peek_char(subj) == '`') {
advance(subj);
numticks++;
@@ -223,8 +270,8 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
static cmark_node* handle_backticks(subject *subj)
{
cmark_chunk openticks = take_while(subj, isbacktick);
- int startpos = subj->pos;
- int endpos = scan_to_closing_backticks(subj, openticks.len);
+ bufsize_t startpos = subj->pos;
+ bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
if (endpos == 0) { // not found
subj->pos = startpos; // rewind
@@ -246,10 +293,11 @@ static int
scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
{
int numdelims = 0;
- int before_char_pos;
+ bufsize_t before_char_pos;
int32_t after_char = 0;
int32_t before_char = 0;
int len;
+ bool left_flanking, right_flanking;
if (subj->pos == 0) {
before_char = 10;
@@ -267,9 +315,14 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
}
}
- while (peek_char(subj) == c) {
+ if (c == '\'' || c == '"') {
numdelims++;
- advance(subj);
+ advance(subj); // limit to 1 delim for quotes
+ } else {
+ while (peek_char(subj) == c) {
+ numdelims++;
+ advance(subj);
+ }
}
len = utf8proc_iterate(subj->input.data + subj->pos,
@@ -277,19 +330,25 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
if (len == -1) {
after_char = 10;
}
- *can_open = numdelims > 0 && !utf8proc_is_space(after_char) &&
- !(utf8proc_is_punctuation(after_char) &&
- !utf8proc_is_space(before_char) &&
- !utf8proc_is_punctuation(before_char));
- *can_close = numdelims > 0 && !utf8proc_is_space(before_char) &&
- !(utf8proc_is_punctuation(before_char) &&
- !utf8proc_is_space(after_char) &&
- !utf8proc_is_punctuation(after_char));
+ left_flanking = numdelims > 0 && !utf8proc_is_space(after_char) &&
+ !(utf8proc_is_punctuation(after_char) &&
+ !utf8proc_is_space(before_char) &&
+ !utf8proc_is_punctuation(before_char));
+ right_flanking = numdelims > 0 && !utf8proc_is_space(before_char) &&
+ !(utf8proc_is_punctuation(before_char) &&
+ !utf8proc_is_space(after_char) &&
+ !utf8proc_is_punctuation(after_char));
if (c == '_') {
- *can_open = *can_open && !(before_char < 128 &&
- cmark_isalnum((char)before_char));
- *can_close = *can_close && !(before_char < 128 &&
- cmark_isalnum((char)after_char));
+ *can_open = left_flanking &&
+ (!right_flanking || utf8proc_is_punctuation(before_char));
+ *can_close = right_flanking &&
+ (!left_flanking || utf8proc_is_punctuation(after_char));
+ } else if (c == '\'' || c == '"') {
+ *can_open = left_flanking && !right_flanking;
+ *can_close = right_flanking;
+ } else {
+ *can_open = left_flanking;
+ *can_close = right_flanking;
}
return numdelims;
}
@@ -300,10 +359,10 @@ static void print_delimiters(subject *subj)
delimiter *delim;
delim = subj->last_delim;
while (delim != NULL) {
- printf("Item at %p: %d %d %d next(%p) prev(%p)\n",
- delim, delim->delim_char,
+ printf("Item at stack pos %p, text pos %d: %d %d %d next(%p) prev(%p)\n",
+ (void*)delim, delim->position, delim->delim_char,
delim->can_open, delim->can_close,
- delim->next, delim->previous);
+ (void*)delim->next, (void*)delim->previous);
delim = delim->previous;
}
}
@@ -347,59 +406,175 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open,
subj->last_delim = delim;
}
-// Parse strong/emph or a fallback.
-// Assumes the subject has '_' or '*' at the current position.
-static cmark_node* handle_strong_emph(subject* subj, unsigned char c)
+// Assumes the subject has a c at the current position.
+static cmark_node* handle_delim(subject* subj, unsigned char c, bool smart)
{
- int numdelims;
+ bufsize_t numdelims;
cmark_node * inl_text;
bool can_open, can_close;
+ cmark_chunk contents;
numdelims = scan_delims(subj, c, &can_open, &can_close);
- inl_text = make_str(cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
+ if (c == '\'' && smart) {
+ contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
+ } else if (c == '"' && smart) {
+ contents = cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE);
+ } else {
+ contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
+ }
- if (can_open || can_close) {
+ inl_text = make_str(contents);
+
+ if ((can_open || can_close) &&
+ (!(c == '\'' || c == '"') || smart)) {
push_delimiter(subj, c, can_open, can_close, inl_text);
}
return inl_text;
}
-static void process_emphasis(subject *subj, delimiter *start_delim)
+// Assumes we have a hyphen at the current position.
+static cmark_node* handle_hyphen(subject* subj, bool smart)
+{
+ int startpos = subj->pos;
+
+ advance(subj);
+
+ if (!smart || peek_char(subj) != '-') {
+ return make_str(cmark_chunk_literal("-"));
+ }
+
+ while (smart && peek_char(subj) == '-') {
+ advance(subj);
+ }
+
+ int numhyphens = subj->pos - startpos;
+ int en_count = 0;
+ int em_count = 0;
+ int i;
+ cmark_strbuf buf = GH_BUF_INIT;
+
+ if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
+ em_count = numhyphens / 3;
+ } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes
+ en_count = numhyphens / 2;
+ } else if (numhyphens % 3 == 2) { // use one en dash at end
+ en_count = 1;
+ em_count = (numhyphens - 2) / 3;
+ } else { // use two en dashes at the end
+ en_count = 2;
+ em_count = (numhyphens - 4) / 3;
+ }
+
+ for (i = em_count; i > 0; i--) {
+ cmark_strbuf_puts(&buf, EMDASH);
+ }
+
+ for (i = en_count; i > 0; i--) {
+ cmark_strbuf_puts(&buf, ENDASH);
+ }
+
+ return make_str(cmark_chunk_buf_detach(&buf));
+}
+
+// Assumes we have a period at the current position.
+static cmark_node* handle_period(subject* subj, bool smart)
+{
+ advance(subj);
+ if (smart && peek_char(subj) == '.') {
+ advance(subj);
+ if (peek_char(subj) == '.') {
+ advance(subj);
+ return make_str(cmark_chunk_literal(ELLIPSES));
+ } else {
+ return make_str(cmark_chunk_literal(".."));
+ }
+ } else {
+ return make_str(cmark_chunk_literal("."));
+ }
+}
+
+static void process_emphasis(subject *subj, delimiter *stack_bottom)
{
delimiter *closer = subj->last_delim;
delimiter *opener;
+ delimiter *old_closer;
+ bool opener_found;
+ delimiter *openers_bottom[128];
+
+ // initialize openers_bottom:
+ openers_bottom['*'] = stack_bottom;
+ openers_bottom['_'] = stack_bottom;
+ openers_bottom['\''] = stack_bottom;
+ openers_bottom['"'] = stack_bottom;
// move back to first relevant delim.
- while (closer != NULL && closer->previous != start_delim) {
+ while (closer != NULL && closer->previous != stack_bottom) {
closer = closer->previous;
}
// now move forward, looking for closers, and handling each
while (closer != NULL) {
if (closer->can_close &&
- (closer->delim_char == '*' || closer->delim_char == '_')) {
+ (closer->delim_char == '*' || closer->delim_char == '_' ||
+ closer->delim_char == '"' || closer->delim_char == '\'')) {
// Now look backwards for first matching opener:
opener = closer->previous;
- while (opener != NULL && opener != start_delim) {
+ opener_found = false;
+ while (opener != NULL && opener != stack_bottom &&
+ opener != openers_bottom[closer->delim_char]) {
if (opener->delim_char == closer->delim_char &&
opener->can_open) {
+ opener_found = true;
break;
}
opener = opener->previous;
}
- if (opener != NULL && opener != start_delim) {
- closer = S_insert_emph(subj, opener, closer);
- } else {
+ old_closer = closer;
+ if (closer->delim_char == '*' || closer->delim_char == '_') {
+ if (opener_found) {
+ closer = S_insert_emph(subj, opener, closer);
+ } else {
+ closer = closer->next;
+ }
+ } else if (closer->delim_char == '\'') {
+ cmark_chunk_free(&closer->inl_text->as.literal);
+ closer->inl_text->as.literal =
+ cmark_chunk_literal(RIGHTSINGLEQUOTE);
+ if (opener_found) {
+ cmark_chunk_free(&opener->inl_text->as.literal);
+ opener->inl_text->as.literal =
+ cmark_chunk_literal(LEFTSINGLEQUOTE);
+ }
closer = closer->next;
+ } else if (closer->delim_char == '"') {
+ cmark_chunk_free(&closer->inl_text->as.literal);
+ closer->inl_text->as.literal =
+ cmark_chunk_literal(RIGHTDOUBLEQUOTE);
+ if (opener_found) {
+ cmark_chunk_free(&opener->inl_text->as.literal);
+ opener->inl_text->as.literal =
+ cmark_chunk_literal(LEFTDOUBLEQUOTE);
+ }
+ closer = closer->next;
+ }
+ if (!opener_found) {
+ // set lower bound for future searches for openers:
+ openers_bottom[old_closer->delim_char] = old_closer->previous;
+ if (!old_closer->can_open) {
+ // we can remove a closer that can't be an
+ // opener, once we've seen there's no
+ // matching opener:
+ remove_delimiter(subj, old_closer);
+ }
}
} else {
closer = closer->next;
}
}
- // free all delimiters in list until start_delim:
- while (subj->last_delim != start_delim) {
+ // free all delimiters in list until stack_bottom:
+ while (subj->last_delim != stack_bottom) {
remove_delimiter(subj, subj->last_delim);
}
}
@@ -408,11 +583,11 @@ static delimiter*
S_insert_emph(subject *subj, delimiter *opener, delimiter *closer)
{
delimiter *delim, *tmp_delim;
- int use_delims;
+ bufsize_t use_delims;
cmark_node *opener_inl = opener->inl_text;
cmark_node *closer_inl = closer->inl_text;
- int opener_num_chars = opener_inl->as.literal.len;
- int closer_num_chars = closer_inl->as.literal.len;
+ bufsize_t opener_num_chars = opener_inl->as.literal.len;
+ bufsize_t closer_num_chars = closer_inl->as.literal.len;
cmark_node *tmp, *emph, *first_child, *last_child;
// calculate the actual number of characters used from this closer
@@ -491,8 +666,7 @@ static cmark_node* handle_backslash(subject *subj)
if (cmark_ispunct(nextchar)) { // only ascii symbols and newline can be escaped
advance(subj);
return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
- } else if (nextchar == '\n') {
- advance(subj);
+ } else if (!is_eof(subj) && skip_line_end(subj)) {
return make_linebreak();
} else {
return make_str(cmark_chunk_literal("\\"));
@@ -504,7 +678,7 @@ static cmark_node* handle_backslash(subject *subj)
static cmark_node* handle_entity(subject* subj)
{
cmark_strbuf ent = GH_BUF_INIT;
- size_t len;
+ bufsize_t len;
advance(subj);
@@ -526,7 +700,7 @@ static cmark_node *make_str_with_entities(cmark_chunk *content)
{
cmark_strbuf unescaped = GH_BUF_INIT;
- if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
+ if (houdini_unescape_html(&unescaped, content->data, content->len)) {
return make_str(cmark_chunk_buf_detach(&unescaped));
} else {
return make_str(*content);
@@ -535,14 +709,16 @@ static cmark_node *make_str_with_entities(cmark_chunk *content)
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
-unsigned char *cmark_clean_url(cmark_chunk *url)
+cmark_chunk cmark_clean_url(cmark_chunk *url)
{
cmark_strbuf buf = GH_BUF_INIT;
cmark_chunk_trim(url);
- if (url->len == 0)
- return NULL;
+ if (url->len == 0) {
+ cmark_chunk result = CMARK_CHUNK_EMPTY;
+ return result;
+ }
if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
@@ -551,16 +727,18 @@ unsigned char *cmark_clean_url(cmark_chunk *url)
}
cmark_strbuf_unescape(&buf);
- return cmark_strbuf_detach(&buf);
+ return cmark_chunk_buf_detach(&buf);
}
-unsigned char *cmark_clean_title(cmark_chunk *title)
+cmark_chunk cmark_clean_title(cmark_chunk *title)
{
cmark_strbuf buf = GH_BUF_INIT;
unsigned char first, last;
- if (title->len == 0)
- return NULL;
+ if (title->len == 0) {
+ cmark_chunk result = CMARK_CHUNK_EMPTY;
+ return result;
+ }
first = title->data[0];
last = title->data[title->len - 1];
@@ -575,14 +753,14 @@ unsigned char *cmark_clean_title(cmark_chunk *title)
}
cmark_strbuf_unescape(&buf);
- return cmark_strbuf_detach(&buf);
+ return cmark_chunk_buf_detach(&buf);
}
// Parse an autolink or HTML tag.
// Assumes the subject has a '<' character at the current position.
static cmark_node* handle_pointy_brace(subject* subj)
{
- int matchlen = 0;
+ bufsize_t matchlen = 0;
cmark_chunk contents;
advance(subj); // advance past first <
@@ -629,7 +807,7 @@ static cmark_node* handle_pointy_brace(subject* subj)
// encountered. Backticks in labels do not start code spans.
static int link_label(subject* subj, cmark_chunk *raw_label)
{
- int startpos = subj->pos;
+ bufsize_t startpos = subj->pos;
int length = 0;
unsigned char c;
@@ -659,6 +837,7 @@ static int link_label(subject* subj, cmark_chunk *raw_label)
if (c == ']') { // match found
*raw_label = cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
+ cmark_chunk_trim(raw_label);
advance(subj); // advance past ]
return 1;
}
@@ -672,14 +851,14 @@ noMatch:
// Return a link, an image, or a literal close bracket.
static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
{
- int initial_pos;
- int starturl, endurl, starttitle, endtitle, endall;
- int n;
- int sps;
+ bufsize_t initial_pos;
+ bufsize_t starturl, endurl, starttitle, endtitle, endall;
+ bufsize_t n;
+ bufsize_t sps;
cmark_reference *ref;
bool is_image = false;
cmark_chunk url_chunk, title_chunk;
- unsigned char *url, *title;
+ cmark_chunk url, title;
delimiter *opener;
cmark_node *link_text;
cmark_node *inl;
@@ -767,8 +946,8 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
cmark_chunk_free(&raw_label);
if (ref != NULL) { // found
- url = bufdup(ref->url);
- title = bufdup(ref->title);
+ url = chunk_clone(&ref->url);
+ title = chunk_clone(&ref->title);
goto match;
} else {
goto noMatch;
@@ -785,7 +964,7 @@ match:
inl->type = is_image ? NODE_IMAGE : NODE_LINK;
cmark_chunk_free(&inl->as.literal);
inl->first_child = link_text;
- process_emphasis(subj, opener->previous);
+ process_emphasis(subj, opener);
inl->as.link.url = url;
inl->as.link.title = title;
inl->next = NULL;
@@ -800,10 +979,10 @@ match:
}
parent->last_child = inl;
- // process_emphasis will remove this delimiter and all later ones.
// Now, if we have a link, we also want to deactivate earlier link
// delimiters. (This code can be removed if we decide to allow links
// inside links.)
+ remove_delimiter(subj, opener);
if (!is_image) {
opener = subj->last_delim;
while (opener != NULL) {
@@ -825,13 +1004,11 @@ match:
// Assumes the subject has a newline at the current position.
static cmark_node* handle_newline(subject *subj)
{
- int nlpos = subj->pos;
+ bufsize_t nlpos = subj->pos;
// skip over newline
advance(subj);
// skip spaces at beginning of line
- while (peek_char(subj) == ' ') {
- advance(subj);
- }
+ skip_spaces(subj);
if (nlpos > 1 &&
peek_at(subj, nlpos - 1) == ' ' &&
peek_at(subj, nlpos - 2) == ' ') {
@@ -841,11 +1018,11 @@ static cmark_node* handle_newline(subject *subj)
}
}
-static int subject_find_special_char(subject *subj)
+static bufsize_t subject_find_special_char(subject *subj, int options)
{
- // "\n\\`&_*[]<!"
+ // "\r\n\\`&_*[]<!"
static const int8_t SPECIAL_CHARS[256] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
@@ -863,11 +1040,34 @@ static int subject_find_special_char(subject *subj)
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
- int n = subj->pos + 1;
+ // " ' . -
+ static const char SMART_PUNCT_CHARS[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+
+ bufsize_t n = subj->pos + 1;
while (n < subj->input.len) {
if (SPECIAL_CHARS[subj->input.data[n]])
return n;
+ if (options & CMARK_OPT_SMART &&
+ SMART_PUNCT_CHARS[subj->input.data[n]])
+ return n;
n++;
}
@@ -876,17 +1076,18 @@ static int subject_find_special_char(subject *subj)
// Parse an inline, advancing subject, and add it as a child of parent.
// Return 0 if no inline can be parsed, 1 otherwise.
-static int parse_inline(subject* subj, cmark_node * parent)
+static int parse_inline(subject* subj, cmark_node * parent, int options)
{
cmark_node* new_inl = NULL;
cmark_chunk contents;
unsigned char c;
- int endpos;
+ bufsize_t endpos;
c = peek_char(subj);
if (c == 0) {
return 0;
}
switch(c) {
+ case '\r':
case '\n':
new_inl = handle_newline(subj);
break;
@@ -904,7 +1105,15 @@ static int parse_inline(subject* subj, cmark_node * parent)
break;
case '*':
case '_':
- new_inl = handle_strong_emph(subj, c);
+ case '\'':
+ case '"':
+ new_inl = handle_delim(subj, c, options & CMARK_OPT_SMART);
+ break;
+ case '-':
+ new_inl = handle_hyphen(subj, options & CMARK_OPT_SMART);
+ break;
+ case '.':
+ new_inl = handle_period(subj, options & CMARK_OPT_SMART);
break;
case '[':
advance(subj);
@@ -925,12 +1134,12 @@ static int parse_inline(subject* subj, cmark_node * parent)
}
break;
default:
- endpos = subject_find_special_char(subj);
+ endpos = subject_find_special_char(subj, options);
contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
subj->pos = endpos;
// if we're at a newline, strip trailing spaces.
- if (peek_char(subj) == '\n') {
+ if (S_is_line_end_char(peek_char(subj))) {
cmark_chunk_rtrim(&contents);
}
@@ -944,12 +1153,13 @@ static int parse_inline(subject* subj, cmark_node * parent)
}
// Parse inlines from parent's string_content, adding as children of parent.
-extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap)
+extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options)
{
subject subj;
subject_from_buf(&subj, &parent->string_content, refmap);
+ cmark_chunk_rtrim(&subj.input);
- while (!is_eof(&subj) && parse_inline(&subj, parent)) ;
+ while (!is_eof(&subj) && parse_inline(&subj, parent, options)) ;
process_emphasis(&subj, NULL);
}
@@ -957,11 +1167,9 @@ extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap)
// Parse zero or more space characters, including at most one newline.
static void spnl(subject* subj)
{
- bool seen_newline = false;
- while (peek_char(subj) == ' ' ||
- (!seen_newline &&
- (seen_newline = peek_char(subj) == '\n'))) {
- advance(subj);
+ skip_spaces(subj);
+ if (skip_line_end(subj)) {
+ skip_spaces(subj);
}
}
@@ -969,7 +1177,7 @@ static void spnl(subject* subj)
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
+bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
{
subject subj;
@@ -977,13 +1185,13 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma
cmark_chunk url;
cmark_chunk title;
- int matchlen = 0;
- int beforetitle;
+ bufsize_t matchlen = 0;
+ bufsize_t beforetitle;
subject_from_buf(&subj, input, NULL);
// parse label:
- if (!link_label(&subj, &lab))
+ if (!link_label(&subj, &lab) || lab.len == 0)
return 0;
// colon:
@@ -1014,14 +1222,19 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma
subj.pos = beforetitle;
title = cmark_chunk_literal("");
}
+
// parse final spaces and newline:
- while (peek_char(&subj) == ' ') {
- advance(&subj);
- }
- if (peek_char(&subj) == '\n') {
- advance(&subj);
- } else if (peek_char(&subj) != 0) {
- return 0;
+ skip_spaces(&subj);
+ if (!skip_line_end(&subj)) {
+ if (matchlen) { // try rewinding before title
+ subj.pos = beforetitle;
+ skip_spaces(&subj);
+ if (!skip_line_end(&subj)) {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
}
// insert reference into refmap
cmark_reference_create(refmap, &lab, &url, &title);
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/inlines.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/inlines.h b/compiler/modules/CommonMark/src/inlines.h
index d2ccfb4..f8847fc 100644
--- a/compiler/modules/CommonMark/src/inlines.h
+++ b/compiler/modules/CommonMark/src/inlines.h
@@ -5,12 +5,12 @@
extern "C" {
#endif
-unsigned char *cmark_clean_url(cmark_chunk *url);
-unsigned char *cmark_clean_title(cmark_chunk *title);
+cmark_chunk cmark_clean_url(cmark_chunk *url);
+cmark_chunk cmark_clean_title(cmark_chunk *title);
-void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap);
+void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options);
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
+bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
#ifdef __cplusplus
}
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/iterator.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/iterator.c b/compiler/modules/CommonMark/src/iterator.c
index 4daec2d..f18e3bf 100644
--- a/compiler/modules/CommonMark/src/iterator.c
+++ b/compiler/modules/CommonMark/src/iterator.c
@@ -108,6 +108,12 @@ cmark_iter_get_event_type(cmark_iter *iter)
return iter->cur.ev_type;
}
+cmark_node*
+cmark_iter_get_root(cmark_iter *iter)
+{
+ return iter->root;
+}
+
void cmark_consolidate_text_nodes(cmark_node *root)
{
@@ -123,18 +129,20 @@ void cmark_consolidate_text_nodes(cmark_node *root)
cur->next &&
cur->next->type == CMARK_NODE_TEXT) {
cmark_strbuf_clear(&buf);
- cmark_strbuf_puts(&buf, cmark_node_get_literal(cur));
+ cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
tmp = cur->next;
while (tmp && tmp->type == CMARK_NODE_TEXT) {
- cmark_iter_get_node(iter); // advance pointer
- cmark_strbuf_puts(&buf, cmark_node_get_literal(tmp));
+ cmark_iter_next(iter); // advance pointer
+ cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len);
next = tmp->next;
cmark_node_free(tmp);
tmp = next;
}
- cmark_node_set_literal(cur, (char *)cmark_strbuf_detach(&buf));
+ cmark_chunk_free(&cur->as.literal);
+ cur->as.literal = cmark_chunk_buf_detach(&buf);
}
}
+ cmark_strbuf_free(&buf);
cmark_iter_free(iter);
}
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/latex.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/latex.c b/compiler/modules/CommonMark/src/latex.c
new file mode 100644
index 0000000..782b0c0
--- /dev/null
+++ b/compiler/modules/CommonMark/src/latex.c
@@ -0,0 +1,430 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include "config.h"
+#include "cmark.h"
+#include "node.h"
+#include "buffer.h"
+#include "utf8.h"
+#include "scanners.h"
+#include "render.h"
+
+#define safe_strlen(s) cmark_strbuf_safe_strlen(s)
+#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
+#define LIT(s) renderer->out(renderer, s, false, LITERAL)
+#define CR() renderer->cr(renderer)
+#define BLANKLINE() renderer->blankline(renderer)
+
+static inline void outc(cmark_renderer *renderer,
+ cmark_escaping escape,
+ int32_t c,
+ unsigned char nextc)
+{
+ if (escape == LITERAL) {
+ cmark_render_code_point(renderer, c);
+ return;
+ }
+
+ switch(c) {
+ case 123: // '{'
+ case 125: // '}'
+ case 35: // '#'
+ case 37: // '%'
+ case 38: // '&'
+ cmark_render_ascii(renderer, "\\");
+ cmark_render_code_point(renderer, c);
+ break;
+ case 36: // '$'
+ case 95: // '_'
+ if (escape == NORMAL) {
+ cmark_render_ascii(renderer, "\\");
+ }
+ cmark_render_code_point(renderer, c);
+ break;
+ case 45 : // '-'
+ if (nextc == 45) { // prevent ligature
+ cmark_render_ascii(renderer, "\\-");
+ } else {
+ cmark_render_ascii(renderer, "-");
+ }
+ break;
+ case 126: // '~'
+ if (escape == NORMAL) {
+ cmark_render_ascii(renderer, "\\textasciitilde{}");
+ } else {
+ cmark_render_code_point(renderer, c);
+ }
+ break;
+ case 94: // '^'
+ cmark_render_ascii(renderer, "\\^{}");
+ break;
+ case 92: // '\\'
+ if (escape == URL) {
+ // / acts as path sep even on windows:
+ cmark_render_ascii(renderer, "/");
+ } else {
+ cmark_render_ascii(renderer, "\\textbackslash{}");
+ }
+ break;
+ case 124: // '|'
+ cmark_render_ascii(renderer, "\\textbar{}");
+ break;
+ case 60: // '<'
+ cmark_render_ascii(renderer, "\\textless{}");
+ break;
+ case 62: // '>'
+ cmark_render_ascii(renderer, "\\textgreater{}");
+ break;
+ case 91: // '['
+ case 93: // ']'
+ cmark_render_ascii(renderer, "{");
+ cmark_render_code_point(renderer, c);
+ cmark_render_ascii(renderer, "}");
+ break;
+ case 34: // '"'
+ cmark_render_ascii(renderer, "\\textquotedbl{}");
+ // requires \usepackage[T1]{fontenc}
+ break;
+ case 39: // '\''
+ cmark_render_ascii(renderer, "\\textquotesingle{}");
+ // requires \usepackage{textcomp}
+ break;
+ case 160: // nbsp
+ cmark_render_ascii(renderer, "~");
+ break;
+ case 8230: // hellip
+ cmark_render_ascii(renderer, "\\ldots{}");
+ break;
+ case 8216: // lsquo
+ if (escape == NORMAL) {
+ cmark_render_ascii(renderer, "`");
+ } else {
+ cmark_render_code_point(renderer, c);
+ }
+ break;
+ case 8217: // rsquo
+ if (escape == NORMAL) {
+ cmark_render_ascii(renderer, "\'");
+ } else {
+ cmark_render_code_point(renderer, c);
+ }
+ break;
+ case 8220: // ldquo
+ if (escape == NORMAL) {
+ cmark_render_ascii(renderer, "``");
+ } else {
+ cmark_render_code_point(renderer, c);
+ }
+ break;
+ case 8221: // rdquo
+ if (escape == NORMAL) {
+ cmark_render_ascii(renderer, "''");
+ } else {
+ cmark_render_code_point(renderer, c);
+ }
+ break;
+ case 8212: // emdash
+ if (escape == NORMAL) {
+ cmark_render_ascii(renderer, "---");
+ } else {
+ cmark_render_code_point(renderer, c);
+ }
+ break;
+ case 8211: // endash
+ if (escape == NORMAL) {
+ cmark_render_ascii(renderer, "--");
+ } else {
+ cmark_render_code_point(renderer, c);
+ }
+ break;
+ default:
+ cmark_render_code_point(renderer, c);
+ }
+}
+
+typedef enum {
+ NO_LINK,
+ URL_AUTOLINK,
+ EMAIL_AUTOLINK,
+ NORMAL_LINK
+} link_type;
+
+static link_type
+get_link_type(cmark_node *node)
+{
+ size_t title_len, url_len;
+ cmark_node *link_text;
+ char *realurl;
+ int realurllen;
+ bool isemail = false;
+
+ if (node->type != CMARK_NODE_LINK) {
+ return NO_LINK;
+ }
+
+ const char* url = cmark_node_get_url(node);
+ cmark_chunk url_chunk = cmark_chunk_literal(url);
+
+ url_len = safe_strlen(url);
+ if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) {
+ return NO_LINK;
+ }
+
+ const char* title = cmark_node_get_title(node);
+ title_len = safe_strlen(title);
+ // if it has a title, we can't treat it as an autolink:
+ if (title_len > 0) {
+ return NORMAL_LINK;
+ }
+
+ link_text = node->first_child;
+ cmark_consolidate_text_nodes(link_text);
+ realurl = (char*)url;
+ realurllen = url_len;
+ if (strncmp(realurl, "mailto:", 7) == 0) {
+ realurl += 7;
+ realurllen -= 7;
+ isemail = true;
+ }
+ if (realurllen == link_text->as.literal.len &&
+ strncmp(realurl,
+ (char*)link_text->as.literal.data,
+ link_text->as.literal.len) == 0) {
+ if (isemail) {
+ return EMAIL_AUTOLINK;
+ } else {
+ return URL_AUTOLINK;
+ }
+ } else {
+ return NORMAL_LINK;
+ }
+}
+
+static int
+S_get_enumlevel(cmark_node *node)
+{
+ int enumlevel = 0;
+ cmark_node *tmp = node;
+ while (tmp) {
+ if (tmp->type == CMARK_NODE_LIST &&
+ cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) {
+ enumlevel++;
+ }
+ tmp = tmp->parent;
+ }
+ return enumlevel;
+}
+
+static int
+S_render_node(cmark_renderer *renderer,
+ cmark_node *node,
+ cmark_event_type ev_type,
+ int options)
+{
+ int list_number;
+ char list_number_string[20];
+ bool entering = (ev_type == CMARK_EVENT_ENTER);
+ cmark_list_type list_type;
+ const char* roman_numerals[] = { "", "i", "ii", "iii", "iv", "v",
+ "vi", "vii", "viii", "ix", "x"
+ };
+
+ // avoid warning about unused parameter:
+ (void)(options);
+
+ switch (node->type) {
+ case CMARK_NODE_DOCUMENT:
+ break;
+
+ case CMARK_NODE_BLOCK_QUOTE:
+ if (entering) {
+ LIT("\\begin{quote}");
+ CR();
+ } else {
+ LIT("\\end{quote}");
+ BLANKLINE();
+ }
+ break;
+
+ case CMARK_NODE_LIST:
+ list_type = cmark_node_get_list_type(node);
+ if (entering) {
+ LIT("\\begin{");
+ LIT(list_type == CMARK_ORDERED_LIST ?
+ "enumerate" : "itemize");
+ LIT("}");
+ CR();
+ list_number = cmark_node_get_list_start(node);
+ if (list_number > 1) {
+ sprintf(list_number_string,
+ "%d", list_number);
+ LIT("\\setcounter{enum");
+ LIT((char *)roman_numerals[S_get_enumlevel(node)]);
+ LIT("}{");
+ OUT(list_number_string, false, NORMAL);
+ LIT("}");
+ CR();
+ }
+ } else {
+ LIT("\\end{");
+ LIT(list_type == CMARK_ORDERED_LIST ?
+ "enumerate" : "itemize");
+ LIT("}");
+ BLANKLINE();
+ }
+ break;
+
+ case CMARK_NODE_ITEM:
+ if (entering) {
+ LIT("\\item ");
+ } else {
+ CR();
+ }
+ break;
+
+ case CMARK_NODE_HEADER:
+ if (entering) {
+ switch (cmark_node_get_header_level(node)) {
+ case 1:
+ LIT("\\section");
+ break;
+ case 2:
+ LIT("\\subsection");
+ break;
+ case 3:
+ LIT("\\subsubsection");
+ break;
+ case 4:
+ LIT("\\paragraph");
+ break;
+ case 5:
+ LIT("\\subparagraph");
+ break;
+ }
+ LIT("{");
+ } else {
+ LIT("}");
+ BLANKLINE();
+ }
+ break;
+
+ case CMARK_NODE_CODE_BLOCK:
+ CR();
+ LIT("\\begin{verbatim}");
+ CR();
+ OUT(cmark_node_get_literal(node), false, LITERAL);
+ CR();
+ LIT("\\end{verbatim}");
+ BLANKLINE();
+ break;
+
+ case CMARK_NODE_HTML:
+ break;
+
+ case CMARK_NODE_HRULE:
+ BLANKLINE();
+ LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}");
+ BLANKLINE();
+ break;
+
+ case CMARK_NODE_PARAGRAPH:
+ if (!entering) {
+ BLANKLINE();
+ }
+ break;
+
+ case CMARK_NODE_TEXT:
+ OUT(cmark_node_get_literal(node), true, NORMAL);
+ break;
+
+ case CMARK_NODE_LINEBREAK:
+ LIT("\\\\");
+ CR();
+ break;
+
+ case CMARK_NODE_SOFTBREAK:
+ if (renderer->width == 0) {
+ CR();
+ } else {
+ OUT(" ", true, NORMAL);
+ }
+ break;
+
+ case CMARK_NODE_CODE:
+ LIT("\\texttt{");
+ OUT(cmark_node_get_literal(node), false, NORMAL);
+ LIT("}");
+ break;
+
+ case CMARK_NODE_INLINE_HTML:
+ break;
+
+ case CMARK_NODE_STRONG:
+ if (entering) {
+ LIT("\\textbf{");
+ } else {
+ LIT("}");
+ }
+ break;
+
+ case CMARK_NODE_EMPH:
+ if (entering) {
+ LIT("\\emph{");
+ } else {
+ LIT("}");
+ }
+ break;
+
+ case CMARK_NODE_LINK:
+ if (entering) {
+ const char* url = cmark_node_get_url(node);
+ // requires \usepackage{hyperref}
+ switch(get_link_type(node)) {
+ case URL_AUTOLINK:
+ LIT("\\url{");
+ OUT(url, false, URL);
+ break;
+ case EMAIL_AUTOLINK:
+ LIT("\\href{");
+ OUT(url, false, URL);
+ LIT("}\\nolinkurl{");
+ break;
+ case NORMAL_LINK:
+ LIT("\\href{");
+ OUT(url, false, URL);
+ LIT("}{");
+ break;
+ case NO_LINK:
+ LIT("{"); // error?
+ }
+ } else {
+ LIT("}");
+ }
+
+ break;
+
+ case CMARK_NODE_IMAGE:
+ if (entering) {
+ LIT("\\protect\\includegraphics{");
+ // requires \include{graphicx}
+ OUT(cmark_node_get_url(node), false, URL);
+ LIT("}");
+ return 0;
+ }
+ break;
+
+ default:
+ assert(false);
+ break;
+ }
+
+ return 1;
+}
+
+char *cmark_render_latex(cmark_node *root, int options, int width)
+{
+ return cmark_render(root, options, width, outc, S_render_node);
+}
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/libcmark.pc.in
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/libcmark.pc.in b/compiler/modules/CommonMark/src/libcmark.pc.in
deleted file mode 100644
index 9c3a9a9..0000000
--- a/compiler/modules/CommonMark/src/libcmark.pc.in
+++ /dev/null
@@ -1,10 +0,0 @@
-prefix=@CMAKE_INSTALL_PREFIX@
-exec_prefix=@CMAKE_INSTALL_PREFIX@
-libdir=@CMAKE_INSTALL_PREFIX@/lib
-includedir=@CMAKE_INSTALL_PREFIX@/include
-
-Name: libcmark
-Description: CommonMark parsing, rendering, and manipulation
-Version: @PROJECT_VERSION@
-Libs: -L${libdir} -lcmark
-Cflags: -I${includedir}
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/man.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/man.c b/compiler/modules/CommonMark/src/man.c
index 2c8a3a5..6ff33f5 100644
--- a/compiler/modules/CommonMark/src/man.c
+++ b/compiler/modules/CommonMark/src/man.c
@@ -7,72 +7,84 @@
#include "cmark.h"
#include "node.h"
#include "buffer.h"
+#include "utf8.h"
+#include "render.h"
-// Functions to convert cmark_nodes to groff man strings.
+#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
+#define LIT(s) renderer->out(renderer, s, false, LITERAL)
+#define CR() renderer->cr(renderer)
+#define BLANKLINE() renderer->blankline(renderer)
-static void escape_man(cmark_strbuf *dest, const unsigned char *source, int length)
+// Functions to convert cmark_nodes to groff man strings.
+static
+void S_outc(cmark_renderer *renderer,
+ cmark_escaping escape,
+ int32_t c,
+ unsigned char nextc)
{
- int i;
- unsigned char c;
-
- for (i = 0; i < length; i++) {
- c = source[i];
- if (c == '.' && i == 0) {
- cmark_strbuf_puts(dest, "\\&.");
- } else if (c == '\'' && i == 0) {
- cmark_strbuf_puts(dest, "\\&'");
- } else if (c == '-') {
- cmark_strbuf_puts(dest, "\\-");
- } else if (c == '\\') {
- cmark_strbuf_puts(dest, "\\e");
+ (void)(nextc);
+
+ if (escape == LITERAL) {
+ cmark_render_code_point(renderer, c);
+ return;
+ }
+
+ switch(c) {
+ case 46:
+ if (renderer->begin_line) {
+ cmark_render_ascii(renderer, "\\&.");
+ } else {
+ cmark_render_code_point(renderer, c);
+ }
+ break;
+ case 39:
+ if (renderer->begin_line) {
+ cmark_render_ascii(renderer, "\\&'");
} else {
- cmark_strbuf_putc(dest, source[i]);
+ cmark_render_code_point(renderer, c);
}
+ break;
+ case 45:
+ cmark_render_ascii(renderer, "\\-");
+ break;
+ case 92:
+ cmark_render_ascii(renderer, "\\e");
+ break;
+ case 8216: // left single quote
+ cmark_render_ascii(renderer, "\\[oq]");
+ break;
+ case 8217: // right single quote
+ cmark_render_ascii(renderer, "\\[cq]");
+ break;
+ case 8220: // left double quote
+ cmark_render_ascii(renderer, "\\[lq]");
+ break;
+ case 8221: // right double quote
+ cmark_render_ascii(renderer, "\\[rq]");
+ break;
+ case 8212: // em dash
+ cmark_render_ascii(renderer, "\\[em]");
+ break;
+ case 8211: // en dash
+ cmark_render_ascii(renderer, "\\[en]");
+ break;
+ default:
+ cmark_render_code_point(renderer, c);
}
}
-static inline void cr(cmark_strbuf *man)
-{
- if (man->size && man->ptr[man->size - 1] != '\n')
- cmark_strbuf_putc(man, '\n');
-}
-
-struct render_state {
- cmark_strbuf* man;
- cmark_node *plain;
-};
-
static int
-S_render_node(cmark_node *node, cmark_event_type ev_type,
- struct render_state *state)
+S_render_node(cmark_renderer *renderer,
+ cmark_node *node,
+ cmark_event_type ev_type,
+ int options)
{
cmark_node *tmp;
- cmark_strbuf *man = state->man;
int list_number;
bool entering = (ev_type == CMARK_EVENT_ENTER);
- if (state->plain == node) { // back at original node
- state->plain = NULL;
- }
-
- if (state->plain != NULL) {
- switch(node->type) {
- case CMARK_NODE_TEXT:
- case CMARK_NODE_CODE:
- escape_man(man, node->as.literal.data,
- node->as.literal.len);
- break;
-
- case CMARK_NODE_LINEBREAK:
- case CMARK_NODE_SOFTBREAK:
- cmark_strbuf_putc(man, ' ');
- break;
-
- default:
- break;
- }
- return 1;
- }
+ // avoid unused parameter error:
+ (void)(options);
switch (node->type) {
case CMARK_NODE_DOCUMENT:
@@ -80,13 +92,13 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_BLOCK_QUOTE:
if (entering) {
- cr(man);
- cmark_strbuf_puts(man, ".RS");
- cr(man);
+ CR();
+ LIT(".RS");
+ CR();
} else {
- cr(man);
- cmark_strbuf_puts(man, ".RE");
- cr(man);
+ CR();
+ LIT(".RE");
+ CR();
}
break;
@@ -95,11 +107,11 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_ITEM:
if (entering) {
- cr(man);
- cmark_strbuf_puts(man, ".IP ");
+ CR();
+ LIT(".IP ");
if (cmark_node_get_list_type(node->parent) ==
CMARK_BULLET_LIST) {
- cmark_strbuf_puts(man, "\\[bu] 2");
+ LIT("\\[bu] 2");
} else {
list_number = cmark_node_get_list_start(node->parent);
tmp = node;
@@ -107,43 +119,45 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
tmp = tmp->prev;
list_number += 1;
}
- cmark_strbuf_printf(man, "\"%d.\" 4", list_number);
+ char list_number_s[20];
+ sprintf(list_number_s, "\"%d.\" 4", list_number);
+ LIT(list_number_s);
}
- cr(man);
+ CR();
} else {
- cr(man);
+ CR();
}
break;
case CMARK_NODE_HEADER:
if (entering) {
- cr(man);
- cmark_strbuf_puts(man,
- cmark_node_get_header_level(node) == 1 ?
- ".SH" : ".SS");
- cr(man);
+ CR();
+ LIT(cmark_node_get_header_level(node) == 1 ?
+ ".SH" : ".SS");
+ CR();
} else {
- cr(man);
+ CR();
}
break;
case CMARK_NODE_CODE_BLOCK:
- cr(man);
- cmark_strbuf_puts(man, ".IP\n.nf\n\\f[C]\n");
- escape_man(man, node->as.code.literal.data,
- node->as.code.literal.len);
- cr(man);
- cmark_strbuf_puts(man, "\\f[]\n.fi");
- cr(man);
+ CR();
+ LIT(".IP\n.nf\n\\f[C]\n");
+ OUT(cmark_node_get_literal(node),
+ false,
+ NORMAL);
+ CR();
+ LIT("\\f[]\n.fi");
+ CR();
break;
case CMARK_NODE_HTML:
break;
case CMARK_NODE_HRULE:
- cr(man);
- cmark_strbuf_puts(man, ".PP\n * * * * *");
- cr(man);
+ CR();
+ LIT(".PP\n * * * * *");
+ CR();
break;
case CMARK_NODE_PARAGRAPH:
@@ -154,32 +168,36 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
node->prev == NULL) {
// no blank line or .PP
} else {
- cr(man);
- cmark_strbuf_puts(man, ".PP\n");
+ CR();
+ LIT(".PP");
+ CR();
}
} else {
- cr(man);
+ CR();
}
break;
case CMARK_NODE_TEXT:
- escape_man(man, node->as.literal.data,
- node->as.literal.len);
+ OUT(cmark_node_get_literal(node), true, NORMAL);
break;
case CMARK_NODE_LINEBREAK:
- cmark_strbuf_puts(man, ".PD 0\n.P\n.PD");
- cr(man);
+ LIT(".PD 0\n.P\n.PD");
+ CR();
break;
case CMARK_NODE_SOFTBREAK:
- cmark_strbuf_putc(man, '\n');
+ if (renderer->width == 0) {
+ CR();
+ } else {
+ OUT(" ", true, LITERAL);
+ }
break;
case CMARK_NODE_CODE:
- cmark_strbuf_puts(man, "\\f[C]");
- escape_man(man, node->as.literal.data, node->as.literal.len);
- cmark_strbuf_puts(man, "\\f[]");
+ LIT("\\f[C]");
+ OUT(cmark_node_get_literal(node), true, NORMAL);
+ LIT("\\f[]");
break;
case CMARK_NODE_INLINE_HTML:
@@ -187,33 +205,33 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_STRONG:
if (entering) {
- cmark_strbuf_puts(man, "\\f[B]");
+ LIT("\\f[B]");
} else {
- cmark_strbuf_puts(man, "\\f[]");
+ LIT("\\f[]");
}
break;
case CMARK_NODE_EMPH:
if (entering) {
- cmark_strbuf_puts(man, "\\f[I]");
+ LIT("\\f[I]");
} else {
- cmark_strbuf_puts(man, "\\f[]");
+ LIT("\\f[]");
}
break;
case CMARK_NODE_LINK:
if (!entering) {
- cmark_strbuf_printf(man, " (%s)",
- cmark_node_get_url(node));
+ LIT(" (");
+ OUT(cmark_node_get_url(node), true, URL);
+ LIT(")");
}
break;
case CMARK_NODE_IMAGE:
if (entering) {
- cmark_strbuf_puts(man, "[IMAGE: ");
- state->plain = node;
+ LIT("[IMAGE: ");
} else {
- cmark_strbuf_puts(man, "]");
+ LIT("]");
}
break;
@@ -222,28 +240,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
break;
}
- // cmark_strbuf_putc(man, 'x');
return 1;
}
-char *cmark_render_man(cmark_node *root, long options)
+char *cmark_render_man(cmark_node *root, int options, int width)
{
- char *result;
- cmark_strbuf man = GH_BUF_INIT;
- struct render_state state = { &man, NULL };
- cmark_node *cur;
- cmark_event_type ev_type;
- cmark_iter *iter = cmark_iter_new(root);
-
- if (options == 0) options = 0; // avoid warning about unused parameters
-
- while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
- cur = cmark_iter_get_node(iter);
- S_render_node(cur, ev_type, &state);
- }
- result = (char *)cmark_strbuf_detach(&man);
-
- cmark_iter_free(iter);
- cmark_strbuf_free(&man);
- return result;
+ return cmark_render(root, options, width, S_outc, S_render_node);
}
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/node.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/node.c b/compiler/modules/CommonMark/src/node.c
index 3785a27..7b1bb10 100644
--- a/compiler/modules/CommonMark/src/node.c
+++ b/compiler/modules/CommonMark/src/node.c
@@ -7,6 +7,73 @@
static void
S_node_unlink(cmark_node *node);
+static inline bool
+S_is_block(cmark_node *node)
+{
+ if (node == NULL) {
+ return false;
+ }
+ return node->type >= CMARK_NODE_FIRST_BLOCK
+ && node->type <= CMARK_NODE_LAST_BLOCK;
+}
+
+static inline bool
+S_is_inline(cmark_node *node)
+{
+ if (node == NULL) {
+ return false;
+ }
+ return node->type >= CMARK_NODE_FIRST_INLINE
+ && node->type <= CMARK_NODE_LAST_INLINE;
+}
+
+static bool
+S_can_contain(cmark_node *node, cmark_node *child)
+{
+ cmark_node *cur;
+
+ if (node == NULL || child == NULL) {
+ return false;
+ }
+
+ // Verify that child is not an ancestor of node or equal to node.
+ cur = node;
+ do {
+ if (cur == child) {
+ return false;
+ }
+ cur = cur->parent;
+ } while (cur != NULL);
+
+ if (child->type == CMARK_NODE_DOCUMENT) {
+ return false;
+ }
+
+ switch (node->type) {
+ case CMARK_NODE_DOCUMENT:
+ case CMARK_NODE_BLOCK_QUOTE:
+ case CMARK_NODE_ITEM:
+ return S_is_block(child)
+ && child->type != CMARK_NODE_ITEM;
+
+ case CMARK_NODE_LIST:
+ return child->type == CMARK_NODE_ITEM;
+
+ case CMARK_NODE_PARAGRAPH:
+ case CMARK_NODE_HEADER:
+ case CMARK_NODE_EMPH:
+ case CMARK_NODE_STRONG:
+ case CMARK_NODE_LINK:
+ case CMARK_NODE_IMAGE:
+ return S_is_inline(child);
+
+ default:
+ break;
+ }
+
+ return false;
+}
+
cmark_node*
cmark_node_new(cmark_node_type type)
{
@@ -39,7 +106,9 @@ void S_free_nodes(cmark_node *e)
{
cmark_node *next;
while (e != NULL) {
- cmark_strbuf_free(&e->string_content);
+ if (S_is_block(e)) {
+ cmark_strbuf_free(&e->string_content);
+ }
switch (e->type) {
case NODE_CODE_BLOCK:
cmark_chunk_free(&e->as.code.info);
@@ -53,8 +122,8 @@ void S_free_nodes(cmark_node *e)
break;
case NODE_LINK:
case NODE_IMAGE:
- free(e->as.link.url);
- free(e->as.link.title);
+ cmark_chunk_free(&e->as.link.url);
+ cmark_chunk_free(&e->as.link.title);
break;
default:
break;
@@ -189,13 +258,24 @@ cmark_node_last_child(cmark_node *node)
}
}
-static char*
-S_strdup(const char *str)
+void*
+cmark_node_get_user_data(cmark_node *node)
{
- size_t size = strlen(str) + 1;
- char *dup = (char *)malloc(size);
- memcpy(dup, str, size);
- return dup;
+ if (node == NULL) {
+ return NULL;
+ } else {
+ return node->user_data;
+ }
+}
+
+int
+cmark_node_set_user_data(cmark_node *node, void *user_data)
+{
+ if (node == NULL) {
+ return 0;
+ }
+ node->user_data = user_data;
+ return 1;
}
const char*
@@ -448,7 +528,7 @@ cmark_node_get_url(cmark_node *node)
switch (node->type) {
case NODE_LINK:
case NODE_IMAGE:
- return (char *)node->as.link.url;
+ return cmark_chunk_to_cstr(&node->as.link.url);
default:
break;
}
@@ -466,8 +546,7 @@ cmark_node_set_url(cmark_node *node, const char *url)
switch (node->type) {
case NODE_LINK:
case NODE_IMAGE:
- free(node->as.link.url);
- node->as.link.url = (unsigned char *)S_strdup(url);
+ cmark_chunk_set_cstr(&node->as.link.url, url);
return 1;
default:
break;
@@ -486,7 +565,7 @@ cmark_node_get_title(cmark_node *node)
switch (node->type) {
case NODE_LINK:
case NODE_IMAGE:
- return (char *)node->as.link.title;
+ return cmark_chunk_to_cstr(&node->as.link.title);
default:
break;
}
@@ -504,8 +583,7 @@ cmark_node_set_title(cmark_node *node, const char *title)
switch (node->type) {
case NODE_LINK:
case NODE_IMAGE:
- free(node->as.link.title);
- node->as.link.title = (unsigned char *)S_strdup(title);
+ cmark_chunk_set_cstr(&node->as.link.title, title);
return 1;
default:
break;
@@ -550,73 +628,6 @@ cmark_node_get_end_column(cmark_node *node)
return node->end_column;
}
-static inline bool
-S_is_block(cmark_node *node)
-{
- if (node == NULL) {
- return false;
- }
- return node->type >= CMARK_NODE_FIRST_BLOCK
- && node->type <= CMARK_NODE_LAST_BLOCK;
-}
-
-static inline bool
-S_is_inline(cmark_node *node)
-{
- if (node == NULL) {
- return false;
- }
- return node->type >= CMARK_NODE_FIRST_INLINE
- && node->type <= CMARK_NODE_LAST_INLINE;
-}
-
-static bool
-S_can_contain(cmark_node *node, cmark_node *child)
-{
- cmark_node *cur;
-
- if (node == NULL || child == NULL) {
- return false;
- }
-
- // Verify that child is not an ancestor of node or equal to node.
- cur = node;
- do {
- if (cur == child) {
- return false;
- }
- cur = cur->parent;
- } while (cur != NULL);
-
- if (child->type == CMARK_NODE_DOCUMENT) {
- return false;
- }
-
- switch (node->type) {
- case CMARK_NODE_DOCUMENT:
- case CMARK_NODE_BLOCK_QUOTE:
- case CMARK_NODE_ITEM:
- return S_is_block(child)
- && child->type != CMARK_NODE_ITEM;
-
- case CMARK_NODE_LIST:
- return child->type == CMARK_NODE_ITEM;
-
- case CMARK_NODE_PARAGRAPH:
- case CMARK_NODE_HEADER:
- case CMARK_NODE_EMPH:
- case CMARK_NODE_STRONG:
- case CMARK_NODE_LINK:
- case CMARK_NODE_IMAGE:
- return S_is_inline(child);
-
- default:
- break;
- }
-
- return false;
-}
-
// Unlink a node without adjusting its next, prev, and parent pointers.
static void
S_node_unlink(cmark_node *node)
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/node.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/node.h b/compiler/modules/CommonMark/src/node.h
index c0c43d3..b579408 100644
--- a/compiler/modules/CommonMark/src/node.h
+++ b/compiler/modules/CommonMark/src/node.h
@@ -6,6 +6,7 @@ extern "C" {
#endif
#include <stdio.h>
+#include <stdint.h>
#include "cmark.h"
#include "buffer.h"
@@ -22,12 +23,13 @@ typedef struct {
} cmark_list;
typedef struct {
- bool fenced;
- int fence_length;
- int fence_offset;
- unsigned char fence_char;
cmark_chunk info;
cmark_chunk literal;
+ int fence_length;
+ /* fence_offset must be 0-3, so we can use int8_t */
+ int8_t fence_offset;
+ unsigned char fence_char;
+ bool fenced;
} cmark_code;
typedef struct {
@@ -36,23 +38,26 @@ typedef struct {
} cmark_header;
typedef struct {
- unsigned char *url;
- unsigned char *title;
+ cmark_chunk url;
+ cmark_chunk title;
} cmark_link;
struct cmark_node {
- cmark_node_type type;
-
struct cmark_node *next;
struct cmark_node *prev;
struct cmark_node *parent;
struct cmark_node *first_child;
struct cmark_node *last_child;
+ void *user_data;
+
int start_line;
int start_column;
int end_line;
int end_column;
+
+ cmark_node_type type;
+
bool open;
bool last_line_blank;
@@ -64,6 +69,7 @@ struct cmark_node {
cmark_code code;
cmark_header header;
cmark_link link;
+ int html_block_type;
} as;
};
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/parser.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/parser.h b/compiler/modules/CommonMark/src/parser.h
index 3c8def9..01a7aeb 100644
--- a/compiler/modules/CommonMark/src/parser.h
+++ b/compiler/modules/CommonMark/src/parser.h
@@ -16,9 +16,16 @@ struct cmark_parser {
struct cmark_node* root;
struct cmark_node* current;
int line_number;
+ bufsize_t offset;
+ bufsize_t column;
+ bufsize_t first_nonspace;
+ bufsize_t first_nonspace_column;
+ int indent;
+ bool blank;
cmark_strbuf *curline;
- int last_line_length;
+ bufsize_t last_line_length;
cmark_strbuf *linebuf;
+ int options;
};
#ifdef __cplusplus
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/references.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/references.c b/compiler/modules/CommonMark/src/references.c
index 37bf4cb..1d3d56d 100644
--- a/compiler/modules/CommonMark/src/references.c
+++ b/compiler/modules/CommonMark/src/references.c
@@ -20,8 +20,8 @@ static void reference_free(cmark_reference *ref)
{
if(ref != NULL) {
free(ref->label);
- free(ref->url);
- free(ref->title);
+ cmark_chunk_free(&ref->url);
+ cmark_chunk_free(&ref->title);
free(ref);
}
}
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/references.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/references.h b/compiler/modules/CommonMark/src/references.h
index 69325bb..a360cd5 100644
--- a/compiler/modules/CommonMark/src/references.h
+++ b/compiler/modules/CommonMark/src/references.h
@@ -12,8 +12,8 @@ extern "C" {
struct cmark_reference {
struct cmark_reference *next;
unsigned char *label;
- unsigned char *url;
- unsigned char *title;
+ cmark_chunk url;
+ cmark_chunk title;
unsigned int hash;
};
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/render.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/render.c b/compiler/modules/CommonMark/src/render.c
new file mode 100644
index 0000000..2f1faac
--- /dev/null
+++ b/compiler/modules/CommonMark/src/render.c
@@ -0,0 +1,186 @@
+#include <stdlib.h>
+#include "buffer.h"
+#include "chunk.h"
+#include "cmark.h"
+#include "utf8.h"
+#include "render.h"
+
+static inline
+void S_cr(cmark_renderer *renderer)
+{
+ if (renderer->need_cr < 1) {
+ renderer->need_cr = 1;
+ }
+}
+
+static inline
+void S_blankline(cmark_renderer *renderer)
+{
+ if (renderer->need_cr < 2) {
+ renderer->need_cr = 2;
+ }
+}
+
+static
+void S_out(cmark_renderer *renderer,
+ const char *source,
+ bool wrap,
+ cmark_escaping escape)
+{
+ int length = cmark_strbuf_safe_strlen(source);
+ unsigned char nextc;
+ int32_t c;
+ int i = 0;
+ int len;
+ cmark_chunk remainder = cmark_chunk_literal("");
+ int k = renderer->buffer->size - 1;
+
+ wrap = wrap && !renderer->no_wrap;
+
+ if (renderer->in_tight_list_item && renderer->need_cr > 1) {
+ renderer->need_cr = 1;
+ }
+ while (renderer->need_cr) {
+ if (k < 0 || renderer->buffer->ptr[k] == '\n') {
+ k -= 1;
+ } else {
+ cmark_strbuf_putc(renderer->buffer, '\n');
+ if (renderer->need_cr > 1) {
+ cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
+ renderer->prefix->size);
+ }
+ }
+ renderer->column = 0;
+ renderer->begin_line = true;
+ renderer->need_cr -= 1;
+ }
+
+ while (i < length) {
+ if (renderer->begin_line) {
+ cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
+ renderer->prefix->size);
+ // note: this assumes prefix is ascii:
+ renderer->column = renderer->prefix->size;
+ }
+
+ len = utf8proc_iterate((const uint8_t *)source + i, length - i, &c);
+ if (len == -1) { // error condition
+ return; // return without rendering rest of string
+ }
+ nextc = source[i + len];
+ if (c == 32 && wrap) {
+ if (!renderer->begin_line) {
+ cmark_strbuf_putc(renderer->buffer, ' ');
+ renderer->column += 1;
+ renderer->begin_line = false;
+ renderer->last_breakable = renderer->buffer->size -
+ 1;
+ // skip following spaces
+ while (source[i + 1] == ' ') {
+ i++;
+ }
+ }
+
+ } else if (c == 10) {
+ cmark_strbuf_putc(renderer->buffer, '\n');
+ renderer->column = 0;
+ renderer->begin_line = true;
+ renderer->last_breakable = 0;
+ } else if (escape == LITERAL) {
+ cmark_render_code_point(renderer, c);
+ renderer->begin_line = false;
+ } else {
+ (renderer->outc)(renderer, escape, c, nextc);
+ renderer->begin_line = false;
+ }
+
+ // If adding the character went beyond width, look for an
+ // earlier place where the line could be broken:
+ if (renderer->width > 0 &&
+ renderer->column > renderer->width &&
+ !renderer->begin_line &&
+ renderer->last_breakable > 0) {
+
+ // copy from last_breakable to remainder
+ cmark_chunk_set_cstr(&remainder, (char *) renderer->buffer->ptr + renderer->last_breakable + 1);
+ // truncate at last_breakable
+ cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable);
+ // add newline, prefix, and remainder
+ cmark_strbuf_putc(renderer->buffer, '\n');
+ cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
+ renderer->prefix->size);
+ cmark_strbuf_put(renderer->buffer, remainder.data, remainder.len);
+ renderer->column = renderer->prefix->size + remainder.len;
+ cmark_chunk_free(&remainder);
+ renderer->last_breakable = 0;
+ renderer->begin_line = false;
+ }
+
+ i += len;
+ }
+}
+
+// Assumes no newlines, assumes ascii content:
+void
+cmark_render_ascii(cmark_renderer* renderer, const char* s)
+{
+ int origsize = renderer->buffer->size;
+ cmark_strbuf_puts(renderer->buffer, s);
+ renderer->column += renderer->buffer->size - origsize;
+}
+
+void
+cmark_render_code_point(cmark_renderer *renderer, uint32_t c)
+{
+ utf8proc_encode_char(c, renderer->buffer);
+ renderer->column += 1;
+}
+
+char*
+cmark_render(cmark_node *root,
+ int options,
+ int width,
+ void (*outc)(cmark_renderer*,
+ cmark_escaping,
+ int32_t,
+ unsigned char),
+ int (*render_node)(cmark_renderer *renderer,
+ cmark_node *node,
+ cmark_event_type ev_type,
+ int options))
+{
+ cmark_strbuf pref = GH_BUF_INIT;
+ cmark_strbuf buf = GH_BUF_INIT;
+ cmark_node *cur;
+ cmark_event_type ev_type;
+ char *result;
+ cmark_iter *iter = cmark_iter_new(root);
+
+ cmark_renderer renderer = { &buf, &pref, 0, width,
+ 0, 0, true, false, false,
+ outc, S_cr, S_blankline, S_out
+ };
+
+ while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+ cur = cmark_iter_get_node(iter);
+ if (!render_node(&renderer, cur, ev_type, options)) {
+ // a false value causes us to skip processing
+ // the node's contents. this is used for
+ // autolinks.
+ cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT);
+ }
+ }
+
+ // ensure final newline
+ if (renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') {
+ cmark_strbuf_putc(renderer.buffer, '\n');
+ }
+
+ result = (char *)cmark_strbuf_detach(renderer.buffer);
+
+ cmark_iter_free(iter);
+ cmark_strbuf_free(renderer.prefix);
+ cmark_strbuf_free(renderer.buffer);
+
+ return result;
+}
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/render.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/render.h b/compiler/modules/CommonMark/src/render.h
new file mode 100644
index 0000000..ca541bc
--- /dev/null
+++ b/compiler/modules/CommonMark/src/render.h
@@ -0,0 +1,66 @@
+#ifndef CMARK_RENDER_H
+#define CMARK_RENDER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+#include "buffer.h"
+#include "chunk.h"
+
+typedef enum {
+ LITERAL,
+ NORMAL,
+ TITLE,
+ URL
+} cmark_escaping;
+
+struct cmark_renderer {
+ cmark_strbuf* buffer;
+ cmark_strbuf* prefix;
+ int column;
+ int width;
+ int need_cr;
+ bufsize_t last_breakable;
+ bool begin_line;
+ bool no_wrap;
+ bool in_tight_list_item;
+ void (*outc)(struct cmark_renderer*,
+ cmark_escaping,
+ int32_t,
+ unsigned char);
+ void (*cr)(struct cmark_renderer*);
+ void (*blankline)(struct cmark_renderer*);
+ void (*out)(struct cmark_renderer*,
+ const char *,
+ bool,
+ cmark_escaping);
+};
+
+typedef struct cmark_renderer cmark_renderer;
+
+void
+cmark_render_ascii(cmark_renderer *renderer, const char* s);
+
+void
+cmark_render_code_point(cmark_renderer *renderer, uint32_t c);
+
+char*
+cmark_render(cmark_node *root,
+ int options,
+ int width,
+ void (*outc)(cmark_renderer*,
+ cmark_escaping,
+ int32_t,
+ unsigned char),
+ int (*render_node)(cmark_renderer *renderer,
+ cmark_node *node,
+ cmark_event_type ev_type,
+ int options));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif