You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2014/11/23 18:43:24 UTC

[12/16] lucy-clownfish git commit: Add CommonMark source files

Add CommonMark source files

Add the following files from the CommonMark source tree as of commit
4570eb2bff2e1b71fa5b6408abbc69c98ff5ff24 from Sat Nov 22 22:39:26 2014
-0800:

* LICENSE
* README.md
* All files in src excluding main.c and bench.h
* scanners.c generated from scanners.re with re2c
* Bug fix in blocks.c
* Add custom versions of config.h and cmark_export.h


Project: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/commit/144f0b22
Tree: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/tree/144f0b22
Diff: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/diff/144f0b22

Branch: refs/heads/markdown
Commit: 144f0b229e9677d5dc2ab699b541451aa0e3cbc2
Parents: 3c66772
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sun Nov 9 13:29:15 2014 +0100
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Sun Nov 23 17:33:56 2014 +0100

----------------------------------------------------------------------
 compiler/modules/CommonMark/LICENSE             |    70 +
 compiler/modules/CommonMark/README.md           |   290 +
 compiler/modules/CommonMark/src/blocks.c        |   832 ++
 compiler/modules/CommonMark/src/buffer.c        |   375 +
 compiler/modules/CommonMark/src/buffer.h        |   178 +
 .../modules/CommonMark/src/case_fold_switch.inc |  2637 ++++
 compiler/modules/CommonMark/src/chunk.h         |   123 +
 compiler/modules/CommonMark/src/cmark.c         |    22 +
 compiler/modules/CommonMark/src/cmark.h         |   245 +
 compiler/modules/CommonMark/src/cmark_export.h  |     7 +
 compiler/modules/CommonMark/src/config.h        |    11 +
 compiler/modules/CommonMark/src/config.h.in     |    17 +
 compiler/modules/CommonMark/src/debug.h         |    36 +
 compiler/modules/CommonMark/src/html/houdini.h  |    52 +
 .../CommonMark/src/html/houdini_href_e.c        |   107 +
 .../CommonMark/src/html/houdini_html_e.c        |    81 +
 .../CommonMark/src/html/houdini_html_u.c        |   112 +
 compiler/modules/CommonMark/src/html/html.c     |   361 +
 .../CommonMark/src/html/html_unescape.gperf     |  2131 +++
 .../modules/CommonMark/src/html/html_unescape.h |  9736 +++++++++++++
 compiler/modules/CommonMark/src/inlines.c       |   993 ++
 compiler/modules/CommonMark/src/inlines.h       |    26 +
 compiler/modules/CommonMark/src/node.c          |   657 +
 compiler/modules/CommonMark/src/node.h          |    74 +
 compiler/modules/CommonMark/src/parser.h        |    27 +
 compiler/modules/CommonMark/src/print.c         |   182 +
 compiler/modules/CommonMark/src/references.c    |   153 +
 compiler/modules/CommonMark/src/references.h    |    46 +
 compiler/modules/CommonMark/src/scanners.c      | 12398 +++++++++++++++++
 compiler/modules/CommonMark/src/scanners.h      |    38 +
 compiler/modules/CommonMark/src/scanners.re     |   235 +
 compiler/modules/CommonMark/src/utf8.c          |   403 +
 compiler/modules/CommonMark/src/utf8.h          |    23 +
 33 files changed, 32678 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/LICENSE
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/LICENSE b/compiler/modules/CommonMark/LICENSE
new file mode 100644
index 0000000..c8377be
--- /dev/null
+++ b/compiler/modules/CommonMark/LICENSE
@@ -0,0 +1,70 @@
+Copyright (c) 2014, John MacFarlane
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of John MacFarlane nor the names of other
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-----
+
+The polyfill for String.fromCodePoint included in commonmark.js is
+Copyright Mathias Bynens <http://mathiasbynens.be/>
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-----
+
+The normalization code in runtests.py was derived from the
+markdowntest project, Copyright 2013 Karl Dubost:
+
+The MIT License (MIT)
+
+Copyright (c) 2013 Karl Dubost
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/README.md
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/README.md b/compiler/modules/CommonMark/README.md
new file mode 100644
index 0000000..32ed91d
--- /dev/null
+++ b/compiler/modules/CommonMark/README.md
@@ -0,0 +1,290 @@
+CommonMark
+==========
+
+CommonMark is a rationalized version of Markdown syntax,
+with a [spec][the spec] and BSD3-licensed reference
+implementations in C and JavaScript.
+
+[Try it now!](http://spec.commonmark.org/dingus.html)
+
+The implementations
+-------------------
+
+The C implementation provides both a shared library (`libcmark`) and a
+standalone program `cmark` that converts CommonMark to HTML.  It is
+written in standard C99 and has no library dependencies.  The parser is
+very fast (see [benchmarks](benchmarks.md)).
+
+It is easy to use `libcmark` in python or ruby code:  see `wrapper.py`
+and `wrapper.rb` in the repository for simple examples.
+
+The JavaScript implementation is a single JavaScript file, with
+no dependencies, that can be linked to in an HTML page.  Here
+is a simple usage example:
+
+``` javascript
+var reader = new commonmark.DocParser();
+var writer = new commonmark.HtmlRenderer();
+var parsed = reader.parse("Hello *world*");
+var result = writer.render(parsed);
+```
+
+A node package is also available; it includes a command-line tool called
+`commonmark`.
+
+**A note on security:**
+Neither implementation attempts to sanitize link attributes or
+raw HTML.  If you use these libraries in applications that accept
+untrusted user input, you must run the output through an HTML
+sanitizer to protect against
+[XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
+
+Installing (C)
+--------------
+
+Building the C program (`cmark`) and shared library (`libcmark`)
+requires [cmake] and [re2c], which is used to generate `scanners.c` from
+`scanners.re`.  (Note that [re2c] is only a build dependency for
+developers, since `scanners.c` can be provided in a released source
+tarball.)
+
+If you have GNU make, you can simply `make`, `make test`, and `make
+install`.  This calls [cmake] to create a `Makefile` in the `build`
+directory, then uses that `Makefile` to create the executable and
+library.
+
+For a more portable method, you can use [cmake] manually. [cmake] knows
+how to create build environments for many build systems.  For example,
+on FreeBSD:
+
+    mkdir build
+    cd build
+    cmake ..  # optionally: -DCMAKE_INSTALL_PREFIX=path
+    make      # executable will be create as build/src/cmake
+    make test
+    make install
+
+Or, to create Xcode project files on OSX:
+
+    mkdir build
+    cd build
+    cmake -G Xcode ..
+    make
+    make test
+    make install
+
+Tests can also be run manually on any executable `$PROG` using:
+
+    python runtests.py --program $PROG
+
+The GNU Makefile also provides a few other targets for developers.
+To run a "fuzz test" against ten long randomly generated inputs:
+
+    make fuzztest
+
+To run a test for memory leaks using valgrind:
+
+    make leakcheck
+
+To make a release tarball and zip archive:
+
+    make archive
+
+To test the archives:
+
+    make testarchive
+
+Compiling for Windows
+---------------------
+
+You can cross-compile a Windows binary and dll on linux if you have the
+`mingw32` compiler:
+
+    make mingw
+
+The binaries will be in `build-mingw/windows/bin`.
+
+Installing (JavaScript)
+-----------------------
+
+The JavaScript library can be installed through `npm`:
+
+    npm install commonmark
+
+To build the JavaScript library as a single standalone file:
+
+    browserify --standalone commonmark js/lib/index.js -o js/commonmark.js
+
+Or fetch a pre-built copy from
+<http://spec.commonmark.org/js/commonmark.js>`.
+
+To run tests for the JavaScript library:
+
+    make testjs
+
+or
+
+    node js/test.js
+
+The spec
+--------
+
+[The spec] contains over 500 embedded examples which serve as conformance
+tests.  To run the tests for `cmark`, do `make test`.  To run them for
+another Markdown program, say `myprog`, do `make test PROG=myprog`.  To
+run the tests for `commonmark.js`, do `make testjs`.
+
+[The spec]:  http://jgm.github.io/CommonMark/spec.html
+
+The source of [the spec] is `spec.txt`.  This is basically a Markdown
+file, with code examples written in a shorthand form:
+
+    .
+    Markdown source
+    .
+    expected HTML output
+    .
+
+To build an HTML version of the spec, do `make spec.html`.  To build a
+PDF version, do `make spec.pdf`.  Both these commands require that
+[pandoc] is installed, and creating a PDF requires a latex installation.
+
+The spec is written from the point of view of the human writer, not
+the computer reader.  It is not an algorithm---an English translation of
+a computer program---but a declarative description of what counts as a block
+quote, a code block, and each of the other structural elements that can
+make up a Markdown document.
+
+Because John Gruber's [canonical syntax
+description](http://daringfireball.net/projects/markdown/syntax) leaves
+many aspects of the syntax undetermined, writing a precise spec requires
+making a large number of decisions, many of them somewhat arbitrary.
+In making them, we have appealed to existing conventions and
+considerations of simplicity, readability, expressive power, and
+consistency.  We have tried to ensure that "normal" documents in the many
+incompatible existing implementations of Markdown will render, as far as
+possible, as their authors intended.  And we have tried to make the rules
+for different elements work together harmoniously.  In places where
+different decisions could have been made (for example, the rules
+governing list indentation), we have explained the rationale for
+my choices.  In a few cases, we have departed slightly from the canonical
+syntax description, in ways that we think further the goals of Markdown
+as stated in that description.
+
+For the most part, we have limited ourselves to the basic elements
+described in Gruber's canonical syntax description, eschewing extensions
+like footnotes and definition lists.  It is important to get the core
+right before considering such things. However, we have included a visible
+syntax for line breaks and fenced code blocks.
+
+Differences from original Markdown
+----------------------------------
+
+There are only a few places where this spec says things that contradict
+the canonical syntax description:
+
+-   It [allows all punctuation symbols to be
+    backslash-escaped](http://jgm.github.io/CommonMark/spec.html#backslash-escapes),
+    not just the symbols with special meanings in Markdown. We found
+    that it was just too hard to remember which symbols could be
+    escaped.
+
+-   It introduces an [alternative syntax for hard line
+    breaks](http://jgm.github.io/CommonMark/spec.html#hard-line-breaks), a
+    backslash at the end of the line, supplementing the
+    two-spaces-at-the-end-of-line rule. This is motivated by persistent
+    complaints about the “invisible” nature of the two-space rule.
+
+-   Link syntax has been made a bit more predictable (in a
+    backwards-compatible way). For example, `Markdown.pl` allows single
+    quotes around a title in inline links, but not in reference links.
+    This kind of difference is really hard for users to remember, so the
+    spec [allows single quotes in both
+    contexts](http://jgm.github.io/CommonMark/spec.html#links).
+
+-   The rule for HTML blocks differs, though in most real cases it
+    shouldn't make a difference. (See
+    [here](http://jgm.github.io/CommonMark/spec.html#html-blocks) for
+    details.) The spec's proposal makes it easy to include Markdown
+    inside HTML block-level tags, if you want to, but also allows you to
+    exclude this. It is also makes parsing much easier, avoiding
+    expensive backtracking.
+
+-   It does not collapse adjacent bird-track blocks into a single
+    blockquote:
+
+        > this is two
+
+        > blockquotes
+
+        > this is a single
+        >
+        > blockquote with two paragraphs
+
+-   Rules for content in lists differ in a few respects, though (as with
+    HTML blocks), most lists in existing documents should render as
+    intended. There is some discussion of the choice points and
+    differences [here](http://jgm.github.io/CommonMark/spec.html#motivation).
+    We think that the spec's proposal does better than any existing
+    implementation in rendering lists the way a human writer or reader
+    would intuitively understand them. (We could give numerous examples
+    of perfectly natural looking lists that nearly every existing
+    implementation flubs up.)
+
+-   The spec stipulates that two blank lines break out of all list
+    contexts.  This is an attempt to deal with issues that often come up
+    when someone wants to have two adjacent lists, or a list followed by
+    an indented code block.
+
+-   Changing bullet characters, or changing from bullets to numbers or
+    vice versa, starts a new list. We think that is almost always going
+    to be the writer's intent.
+
+-   The number that begins an ordered list item may be followed by
+    either `.` or `)`. Changing the delimiter style starts a new
+    list.
+
+-   The start number of an ordered list is significant.
+
+-   [Fenced code blocks](http://jgm.github.io/CommonMark/spec.html#fenced-code-blocks) are supported, delimited by either
+    backticks (```` ``` ```` or tildes (` ~~~ `).
+
+Contributing
+------------
+
+There is a [forum for discussing
+CommonMark](http://talk.commonmark.org); you should use it instead of
+github issues for questions and possibly open-ended discussions.
+Use the [github issue tracker](http://github.com/jgm/CommonMark/issues)
+only for simple, clear, actionable issues.
+
+Authors
+-------
+
+The spec was written by John MacFarlane, drawing on
+
+- his experience writing and maintaining Markdown implementations in several
+  languages, including the first Markdown parser not based on regular
+  expression substitutions ([pandoc](http://github.com/jgm/pandoc)) and
+  the first markdown parsers based on PEG grammars
+  ([peg-markdown](http://github.com/jgm/peg-markdown),
+  [lunamark](http://github.com/jgm/lunamark))
+- a detailed examination of the differences between existing Markdown
+  implementations using [BabelMark 2](http://johnmacfarlane.net/babelmark2/),
+  and
+- extensive discussions with David Greenspan, Jeff Atwood, Vicent
+  Marti, Neil Williams, and Benjamin Dumke-von der Ehe.
+
+John MacFarlane was also responsible for the original versions of the
+C and JavaScript implementations.  The block parsing algorithm was
+worked out together with David Greenspan.  Vicent Marti
+optimized the C implementation for performance, increasing its speed
+tenfold.  Kārlis Gaņģis helped work out a better parsing algorithm
+for links and emphasis, eliminating several worst-case performance
+issues.  Nick Wellnhofer contributed many improvements, including
+most of the C library's API and its test harness.
+
+[cmake]: http://www.cmake.org/download/
+[pandoc]: http://johnmacfarlane.net/pandoc/
+[re2c]: http://re2c.org
+

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/blocks.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/blocks.c b/compiler/modules/CommonMark/src/blocks.c
new file mode 100644
index 0000000..c74b425
--- /dev/null
+++ b/compiler/modules/CommonMark/src/blocks.c
@@ -0,0 +1,832 @@
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include "config.h"
+#include "parser.h"
+#include "cmark.h"
+#include "node.h"
+#include "references.h"
+#include "utf8.h"
+#include "scanners.h"
+#include "inlines.h"
+#include "html/houdini.h"
+#include "buffer.h"
+#include "debug.h"
+
+#define CODE_INDENT 4
+#define peek_at(i, n) (i)->data[n]
+
+static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column)
+{
+	cmark_node* e;
+
+	e = (cmark_node *)calloc(1, sizeof(*e));
+	if(e != NULL) {
+		e->type = tag;
+		e->open = true;
+		e->start_line = start_line;
+		e->start_column = start_column;
+		e->end_line = start_line;
+		strbuf_init(&e->string_content, 32);
+	}
+
+	return e;
+}
+
+// Create a root document cmark_node.
+static cmark_node* make_document()
+{
+	cmark_node *e = make_block(NODE_DOCUMENT, 1, 1);
+	return e;
+}
+
+cmark_doc_parser *cmark_new_doc_parser()
+{
+	cmark_doc_parser *parser = (cmark_doc_parser*)malloc(sizeof(cmark_doc_parser));
+	cmark_node *document = make_document();
+	strbuf *line = (strbuf*)malloc(sizeof(strbuf));
+	cmark_strbuf_init(line, 256);
+
+	parser->refmap = reference_map_new();
+	parser->root = document;
+	parser->current = document;
+	parser->line_number = 0;
+	parser->curline = line;
+
+	return parser;
+}
+
+void cmark_free_doc_parser(cmark_doc_parser *parser)
+{
+	cmark_strbuf_free(parser->curline);
+	free(parser->curline);
+	cmark_reference_map_free(parser->refmap);
+	free(parser);
+}
+
+static void finalize(cmark_doc_parser *parser, cmark_node* b, int line_number);
+
+// Returns true if line has only space characters, else false.
+static bool is_blank(strbuf *s, int offset)
+{
+	while (offset < s->size) {
+		switch (s->ptr[offset]) {
+			case '\n':
+				return true;
+			case ' ':
+				offset++;
+				break;
+			default:
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static inline bool can_contain(cmark_node_type parent_type, cmark_node_type child_type)
+{
+	return ( parent_type == NODE_DOCUMENT ||
+			parent_type == NODE_BLOCK_QUOTE ||
+			parent_type == NODE_LIST_ITEM ||
+			(parent_type == NODE_LIST && child_type == NODE_LIST_ITEM) );
+}
+
+static inline bool accepts_lines(cmark_node_type block_type)
+{
+	return (block_type == NODE_PARAGRAPH ||
+			block_type == NODE_ATX_HEADER ||
+			block_type == NODE_INDENTED_CODE ||
+			block_type == NODE_FENCED_CODE);
+}
+
+static void add_line(cmark_node* cmark_node, chunk *ch, int offset)
+{
+	assert(cmark_node->open);
+	strbuf_put(&cmark_node->string_content, ch->data + offset, ch->len - offset);
+}
+
+static void remove_trailing_blank_lines(strbuf *ln)
+{
+	int i;
+
+	for (i = ln->size - 1; i >= 0; --i) {
+		unsigned char c = ln->ptr[i];
+
+		if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
+			break;
+	}
+
+	if (i < 0) {
+		strbuf_clear(ln);
+		return;
+	}
+
+	i = strbuf_strchr(ln, '\n', i);
+	if (i >= 0)
+		strbuf_truncate(ln, i);
+}
+
+// Check to see if a cmark_node ends with a blank line, descending
+// if needed into lists and sublists.
+static bool ends_with_blank_line(cmark_node* cmark_node)
+{
+	if (cmark_node->last_line_blank) {
+		return true;
+	}
+	if ((cmark_node->type == NODE_LIST || cmark_node->type == NODE_LIST_ITEM) && cmark_node->last_child) {
+		return ends_with_blank_line(cmark_node->last_child);
+	} else {
+		return false;
+	}
+}
+
+// Break out of all containing lists
+static int break_out_of_lists(cmark_doc_parser *parser, cmark_node ** bptr, int line_number)
+{
+	cmark_node *container = *bptr;
+	cmark_node *b = parser->root;
+	// find first containing NODE_LIST:
+	while (b && b->type != NODE_LIST) {
+		b = b->last_child;
+	}
+	if (b) {
+		while (container && container != b) {
+			finalize(parser, container, line_number);
+			container = container->parent;
+		}
+		finalize(parser, b, line_number);
+		*bptr = b->parent;
+	}
+	return 0;
+}
+
+
+static void finalize(cmark_doc_parser *parser, cmark_node* b, int line_number)
+{
+	int firstlinelen;
+	int pos;
+	cmark_node* item;
+	cmark_node* subitem;
+
+	if (!b->open)
+		return; // don't do anything if the cmark_node is already closed
+
+	b->open = false;
+	if (line_number > b->start_line) {
+		b->end_line = line_number - 1;
+	} else {
+		b->end_line = line_number;
+	}
+
+	switch (b->type) {
+		case NODE_PARAGRAPH:
+			pos = 0;
+			while (strbuf_at(&b->string_content, 0) == '[' &&
+					(pos = parse_reference_inline(&b->string_content, parser->refmap))) {
+
+				strbuf_drop(&b->string_content, pos);
+			}
+			if (is_blank(&b->string_content, 0)) {
+				b->type = NODE_REFERENCE_DEF;
+			}
+			break;
+
+		case NODE_INDENTED_CODE:
+			remove_trailing_blank_lines(&b->string_content);
+			strbuf_putc(&b->string_content, '\n');
+			break;
+
+		case NODE_FENCED_CODE:
+			// first line of contents becomes info
+			firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
+
+			strbuf_init(&b->as.code.info, 0);
+			houdini_unescape_html_f(
+					&b->as.code.info,
+					b->string_content.ptr,
+					firstlinelen
+					);
+
+			strbuf_drop(&b->string_content, firstlinelen + 1);
+
+			strbuf_trim(&b->as.code.info);
+			strbuf_unescape(&b->as.code.info);
+			break;
+
+		case NODE_LIST: // determine tight/loose status
+			b->as.list.tight = true; // tight by default
+			item = b->first_child;
+
+			while (item) {
+				// check for non-final non-empty list item ending with blank line:
+				if (item->last_line_blank && item->next) {
+					b->as.list.tight = false;
+					break;
+				}
+				// recurse into children of list item, to see if there are
+				// spaces between them:
+				subitem = item->first_child;
+				while (subitem) {
+					if (ends_with_blank_line(subitem) &&
+							(item->next || subitem->next)) {
+						b->as.list.tight = false;
+						break;
+					}
+					subitem = subitem->next;
+				}
+				if (!(b->as.list.tight)) {
+					break;
+				}
+				item = item->next;
+			}
+
+			break;
+
+		default:
+			break;
+	}
+}
+
+// Add a cmark_node as child of another.  Return pointer to child.
+static cmark_node* add_child(cmark_doc_parser *parser, cmark_node* parent,
+		cmark_node_type block_type, int start_line, int start_column)
+{
+	assert(parent);
+
+	// if 'parent' isn't the kind of cmark_node that can accept this child,
+	// then back up til we hit a cmark_node that can.
+	while (!can_contain(parent->type, block_type)) {
+		finalize(parser, parent, start_line);
+		parent = parent->parent;
+	}
+
+	cmark_node* child = make_block(block_type, start_line, start_column);
+	child->parent = parent;
+
+	if (parent->last_child) {
+		parent->last_child->next = child;
+		child->prev = parent->last_child;
+	} else {
+		parent->first_child = child;
+		child->prev = NULL;
+	}
+	parent->last_child = child;
+	return child;
+}
+
+
+typedef struct BlockStack {
+	struct BlockStack *previous;
+	cmark_node *next_sibling;
+} block_stack;
+
+// Walk through cmark_node and all children, recursively, parsing
+// string content into inline content where appropriate.
+static void process_inlines(cmark_node* cur, reference_map *refmap)
+{
+	block_stack* stack = NULL;
+	block_stack* newstack = NULL;
+
+	while (cur != NULL) {
+		switch (cur->type) {
+			case NODE_PARAGRAPH:
+			case NODE_ATX_HEADER:
+			case NODE_SETEXT_HEADER:
+				parse_inlines(cur, refmap);
+				break;
+
+			default:
+				break;
+		}
+
+		if (cur->first_child) {
+			newstack = (block_stack*)malloc(sizeof(block_stack));
+			if (newstack == NULL) return;
+			newstack->previous = stack;
+			stack = newstack;
+			stack->next_sibling = cur->next;
+			cur = cur->first_child;
+		} else {
+			cur = cur->next;
+		}
+
+		while (cur == NULL && stack != NULL) {
+			cur = stack->next_sibling;
+			newstack = stack->previous;
+			free(stack);
+			stack = newstack;
+		}
+	}
+	while (stack != NULL) {
+		newstack = stack->previous;
+		free(stack);
+		stack = newstack;
+	}
+}
+
+// Attempts to parse a list item marker (bullet or enumerated).
+// On success, returns length of the marker, and populates
+// data with the details.  On failure, returns 0.
+static int parse_list_marker(chunk *input, int pos, cmark_list **dataptr)
+{
+	unsigned char c;
+	int startpos;
+	cmark_list *data;
+
+	startpos = pos;
+	c = peek_at(input, pos);
+
+	if ((c == '*' || c == '-' || c == '+') && !scan_hrule(input, pos)) {
+		pos++;
+		if (!isspace(peek_at(input, pos))) {
+			return 0;
+		}
+		data = (cmark_list *)calloc(1, sizeof(*data));
+		if(data == NULL) {
+			return 0;
+		} else {
+			data->marker_offset = 0; // will be adjusted later
+			data->list_type = CMARK_BULLET_LIST;
+			data->bullet_char = c;
+			data->start = 1;
+			data->delimiter = CMARK_PERIOD_DELIM;
+			data->tight = false;
+		}
+	} else if (isdigit(c)) {
+		int start = 0;
+
+		do {
+			start = (10 * start) + (peek_at(input, pos) - '0');
+			pos++;
+		} while (isdigit(peek_at(input, pos)));
+
+		c = peek_at(input, pos);
+		if (c == '.' || c == ')') {
+			pos++;
+			if (!isspace(peek_at(input, pos))) {
+				return 0;
+			}
+			data = (cmark_list *)calloc(1, sizeof(*data));
+			if(data == NULL) {
+				return 0;
+			} else {
+				data->marker_offset = 0; // will be adjusted later
+				data->list_type = CMARK_ORDERED_LIST;
+				data->bullet_char = 0;
+				data->start = start;
+				data->delimiter = (c == '.' ? CMARK_PERIOD_DELIM : CMARK_PAREN_DELIM);
+				data->tight = false;
+			}
+		} else {
+			return 0;
+		}
+
+	} else {
+		return 0;
+	}
+
+	*dataptr = data;
+	return (pos - startpos);
+}
+
+// Return 1 if list item belongs in list, else 0.
+static int lists_match(cmark_list *list_data, cmark_list *item_data)
+{
+	return (list_data->list_type == item_data->list_type &&
+			list_data->delimiter == item_data->delimiter &&
+			// list_data->marker_offset == item_data.marker_offset &&
+			list_data->bullet_char == item_data->bullet_char);
+}
+
+static cmark_node *finalize_document(cmark_doc_parser *parser)
+{
+	while (parser->current != parser->root) {
+		finalize(parser, parser->current, parser->line_number);
+		parser->current = parser->current->parent;
+	}
+
+	finalize(parser, parser->root, parser->line_number);
+	process_inlines(parser->root, parser->refmap);
+
+	return parser->root;
+}
+
+extern cmark_node *cmark_parse_file(FILE *f)
+{
+	char buffer[4096];
+	cmark_doc_parser *parser = cmark_new_doc_parser();
+	size_t offset;
+	cmark_node *document;
+
+	while (fgets(buffer, sizeof(buffer), f)) {
+		offset = strlen(buffer);
+		cmark_process_line(parser, buffer, offset);
+	}
+
+	document = cmark_finish(parser);
+	cmark_free_doc_parser(parser);
+	return document;
+}
+
+extern cmark_node *cmark_parse_document(const char *buffer, size_t len)
+{
+	int linenum = 1;
+	const char *end = buffer + len;
+	size_t offset;
+	cmark_doc_parser *parser = cmark_new_doc_parser();
+	cmark_node *document;
+
+	while (buffer < end) {
+		const char *eol = memchr(buffer, '\n', end - buffer);
+		offset = eol ? (eol - buffer) + 1 : end - buffer;
+		cmark_process_line(parser, buffer, offset);
+		buffer += offset;
+		linenum++;
+	}
+
+	document = cmark_finish(parser);
+	cmark_free_doc_parser(parser);
+	return document;
+}
+
+static void chop_trailing_hashtags(chunk *ch)
+{
+	int n, orig_n;
+
+	chunk_rtrim(ch);
+	orig_n = n = ch->len - 1;
+
+	// if string ends in space followed by #s, remove these:
+	while (n >= 0 && peek_at(ch, n) == '#')
+		n--;
+
+	// Check for a be a space before the final #s:
+	if (n != orig_n && n >= 0 && peek_at(ch, n) == ' ') {
+		ch->len = n;
+		chunk_rtrim(ch);
+	}
+}
+
+void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
+		 size_t bytes)
+{
+	cmark_node* last_matched_container;
+	int offset = 0;
+	int matched = 0;
+	int lev = 0;
+	int i;
+	cmark_list *data = NULL;
+	bool all_matched = true;
+	cmark_node* container;
+	cmark_node* cur = parser->current;
+	bool blank = false;
+	int first_nonspace;
+	int indent;
+	chunk input;
+
+	utf8proc_detab(parser->curline, (unsigned char *)buffer, bytes);
+
+	// Add a newline to the end if not present:
+	// TODO this breaks abstraction:
+	if (parser->curline->ptr[parser->curline->size - 1] != '\n') {
+		strbuf_putc(parser->curline, '\n');
+	}
+	input.data = parser->curline->ptr;
+	input.len = parser->curline->size;
+
+	// container starts at the document root.
+	container = parser->root;
+
+	parser->line_number++;
+
+	// for each containing cmark_node, try to parse the associated line start.
+	// bail out on failure:  container will point to the last matching cmark_node.
+
+	while (container->last_child && container->last_child->open) {
+		container = container->last_child;
+
+		first_nonspace = offset;
+		while (peek_at(&input, first_nonspace) == ' ') {
+			first_nonspace++;
+		}
+
+		indent = first_nonspace - offset;
+		blank = peek_at(&input, first_nonspace) == '\n';
+
+		if (container->type == NODE_BLOCK_QUOTE) {
+			matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
+			if (matched) {
+				offset = first_nonspace + 1;
+				if (peek_at(&input, offset) == ' ')
+					offset++;
+			} else {
+				all_matched = false;
+			}
+
+		} else if (container->type == NODE_LIST_ITEM) {
+
+			if (indent >= container->as.list.marker_offset +
+					container->as.list.padding) {
+				offset += container->as.list.marker_offset +
+					container->as.list.padding;
+			} else if (blank) {
+				offset = first_nonspace;
+			} else {
+				all_matched = false;
+			}
+
+		} else if (container->type == NODE_INDENTED_CODE) {
+
+			if (indent >= CODE_INDENT) {
+				offset += CODE_INDENT;
+			} else if (blank) {
+				offset = first_nonspace;
+			} else {
+				all_matched = false;
+			}
+
+		} else if (container->type == NODE_ATX_HEADER ||
+				container->type == NODE_SETEXT_HEADER) {
+
+			// a header can never contain more than one line
+			all_matched = false;
+			if (blank) {
+				container->last_line_blank = true;
+			}
+
+		} else if (container->type == NODE_FENCED_CODE) {
+
+			// skip optional spaces of fence offset
+			i = container->as.code.fence_offset;
+			while (i > 0 && peek_at(&input, offset) == ' ') {
+				offset++;
+				i--;
+			}
+
+		} else if (container->type == NODE_HTML) {
+
+			if (blank) {
+				container->last_line_blank = true;
+				all_matched = false;
+			}
+
+		} else if (container->type == NODE_PARAGRAPH) {
+
+			if (blank) {
+				container->last_line_blank = true;
+				all_matched = false;
+			}
+
+		}
+
+		if (!all_matched) {
+			container = container->parent;  // back up to last matching cmark_node
+			break;
+		}
+	}
+
+	last_matched_container = container;
+
+	// check to see if we've hit 2nd blank line, break out of list:
+	if (blank && container->last_line_blank) {
+		break_out_of_lists(parser, &container, parser->line_number);
+	}
+
+	// unless last matched container is code cmark_node, try new container starts:
+	while (container->type != NODE_FENCED_CODE && container->type != NODE_INDENTED_CODE &&
+			container->type != NODE_HTML) {
+
+		first_nonspace = offset;
+		while (peek_at(&input, first_nonspace) == ' ')
+			first_nonspace++;
+
+		indent = first_nonspace - offset;
+		blank = peek_at(&input, first_nonspace) == '\n';
+
+		if (indent >= CODE_INDENT) {
+			if (cur->type != NODE_PARAGRAPH && !blank) {
+				offset += CODE_INDENT;
+				container = add_child(parser, container, NODE_INDENTED_CODE, parser->line_number, offset + 1);
+			} else { // indent > 4 in lazy line
+				break;
+			}
+
+		} else if (peek_at(&input, first_nonspace) == '>') {
+
+			offset = first_nonspace + 1;
+			// optional following character
+			if (peek_at(&input, offset) == ' ')
+				offset++;
+			container = add_child(parser, container, NODE_BLOCK_QUOTE, parser->line_number, offset + 1);
+
+		} else if ((matched = scan_atx_header_start(&input, first_nonspace))) {
+
+			offset = first_nonspace + matched;
+			container = add_child(parser, container, NODE_ATX_HEADER, parser->line_number, offset + 1);
+
+			int hashpos = chunk_strchr(&input, '#', first_nonspace);
+			int level = 0;
+
+			while (peek_at(&input, hashpos) == '#') {
+				level++;
+				hashpos++;
+			}
+			container->as.header.level = level;
+
+		} else if ((matched = scan_open_code_fence(&input, first_nonspace))) {
+
+			container = add_child(parser, container, NODE_FENCED_CODE, parser->line_number, first_nonspace + 1);
+			container->as.code.fence_char = peek_at(&input, first_nonspace);
+			container->as.code.fence_length = matched;
+			container->as.code.fence_offset = first_nonspace - offset;
+			offset = first_nonspace + matched;
+
+		} else if ((matched = scan_html_block_tag(&input, first_nonspace))) {
+
+			container = add_child(parser, container, NODE_HTML, parser->line_number, first_nonspace + 1);
+			// note, we don't adjust offset because the tag is part of the text
+
+		} else if (container->type == NODE_PARAGRAPH &&
+				(lev = scan_setext_header_line(&input, first_nonspace)) &&
+				// check that there is only one line in the paragraph:
+				strbuf_strrchr(&container->string_content, '\n',
+					strbuf_len(&container->string_content) - 2) < 0) {
+
+			container->type = NODE_SETEXT_HEADER;
+			container->as.header.level = lev;
+			offset = input.len - 1;
+
+		} else if (!(container->type == NODE_PARAGRAPH && !all_matched) &&
+				(matched = scan_hrule(&input, first_nonspace))) {
+
+			// it's only now that we know the line is not part of a setext header:
+			container = add_child(parser, container, NODE_HRULE, parser->line_number, first_nonspace + 1);
+			finalize(parser, container, parser->line_number);
+			container = container->parent;
+			offset = input.len - 1;
+
+		} else if ((matched = parse_list_marker(&input, first_nonspace, &data))) {
+
+			// compute padding:
+			offset = first_nonspace + matched;
+			i = 0;
+			while (i <= 5 && peek_at(&input, offset + i) == ' ') {
+				i++;
+			}
+			// i = number of spaces after marker, up to 5
+			if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') {
+				data->padding = matched + 1;
+				if (i > 0) {
+					offset += 1;
+				}
+			} else {
+				data->padding = matched + i;
+				offset += i;
+			}
+
+			// check container; if it's a list, see if this list item
+			// can continue the list; otherwise, create a list container.
+
+			data->marker_offset = indent;
+
+			if (container->type != NODE_LIST ||
+					!lists_match(&container->as.list, data)) {
+				container = add_child(parser, container, NODE_LIST, parser->line_number,
+						first_nonspace + 1);
+
+				memcpy(&container->as.list, data, sizeof(*data));
+			}
+
+			// add the list item
+			container = add_child(parser, container, NODE_LIST_ITEM, parser->line_number,
+					first_nonspace + 1);
+			/* TODO: static */
+			memcpy(&container->as.list, data, sizeof(*data));
+			free(data);
+		} else {
+			break;
+		}
+
+		if (accepts_lines(container->type)) {
+			// if it's a line container, it can't contain other containers
+			break;
+		}
+	}
+
+	// what remains at offset is a text line.  add the text to the
+	// appropriate container.
+
+	first_nonspace = offset;
+	while (peek_at(&input, first_nonspace) == ' ')
+		first_nonspace++;
+
+	indent = first_nonspace - offset;
+	blank = peek_at(&input, first_nonspace) == '\n';
+
+	// cmark_node quote lines are never blank as they start with >
+	// and we don't count blanks in fenced code for purposes of tight/loose
+	// lists or breaking out of lists.  we also don't set last_line_blank
+	// on an empty list item.
+	container->last_line_blank = (blank &&
+			container->type != NODE_BLOCK_QUOTE &&
+			container->type != NODE_SETEXT_HEADER &&
+			container->type != NODE_FENCED_CODE &&
+			!(container->type == NODE_LIST_ITEM &&
+				container->first_child == NULL &&
+				container->start_line == parser->line_number));
+
+	cmark_node *cont = container;
+	while (cont->parent) {
+		cont->parent->last_line_blank = false;
+		cont = cont->parent;
+	}
+
+	if (cur != last_matched_container &&
+			container == last_matched_container &&
+			!blank &&
+			cur->type == NODE_PARAGRAPH &&
+			strbuf_len(&cur->string_content) > 0) {
+
+		add_line(cur, &input, offset);
+
+	} else { // not a lazy continuation
+
+		// finalize any blocks that were not matched and set cur to container:
+		while (cur != last_matched_container) {
+			finalize(parser, cur, parser->line_number);
+			cur = cur->parent;
+			assert(cur != NULL);
+		}
+
+		if (container->type == NODE_INDENTED_CODE) {
+
+			add_line(container, &input, offset);
+
+		} else if (container->type == NODE_FENCED_CODE) {
+			matched = 0;
+
+			if (indent <= 3 &&
+					peek_at(&input, first_nonspace) == container->as.code.fence_char) {
+				int fence_len = scan_close_code_fence(&input, first_nonspace);
+				if (fence_len > container->as.code.fence_length)
+					matched = 1;
+			}
+
+			if (matched) {
+				// if closing fence, don't add line to container; instead, close it:
+				finalize(parser, container, parser->line_number);
+				container = container->parent; // back up to parent
+			} else {
+				add_line(container, &input, offset);
+			}
+
+		} else if (container->type == NODE_HTML) {
+
+			add_line(container, &input, offset);
+
+		} else if (blank) {
+
+			// ??? do nothing
+
+		} else if (container->type == NODE_ATX_HEADER) {
+
+			chop_trailing_hashtags(&input);
+			add_line(container, &input, first_nonspace);
+			finalize(parser, container, parser->line_number);
+			container = container->parent;
+
+		} else if (accepts_lines(container->type)) {
+
+			add_line(container, &input, first_nonspace);
+
+		} else if (container->type != NODE_HRULE && container->type != NODE_SETEXT_HEADER) {
+
+			// create paragraph container for line
+			container = add_child(parser, container, NODE_PARAGRAPH, parser->line_number, first_nonspace + 1);
+			add_line(container, &input, first_nonspace);
+
+		} else {
+			assert(false);
+		}
+
+		parser->current = container;
+	}
+	strbuf_clear(parser->curline);
+
+}
+
+cmark_node *cmark_finish(cmark_doc_parser *parser)
+{
+	finalize_document(parser);
+	strbuf_free(parser->curline);
+#if CMARK_DEBUG_NODES
+	if (cmark_node_check(parser->root, stderr)) {
+		abort();
+	}
+#endif
+	return parser->root;
+}
+

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/buffer.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/buffer.c b/compiler/modules/CommonMark/src/buffer.c
new file mode 100644
index 0000000..45b6984
--- /dev/null
+++ b/compiler/modules/CommonMark/src/buffer.c
@@ -0,0 +1,375 @@
+#include <stdarg.h>
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "buffer.h"
+
+/* Used as default value for strbuf->ptr so that people can always
+ * assume ptr is non-NULL and zero terminated even for new strbufs.
+ */
+unsigned char cmark_strbuf__initbuf[1];
+unsigned char cmark_strbuf__oom[1];
+
+#define ENSURE_SIZE(b, d)					\
+	if ((d) > buf->asize && strbuf_grow(b, (d)) < 0)	\
+		return -1;
+
+#ifndef MIN
+#define MIN(x,y)  ((x<y) ? x : y)
+#endif
+
+void cmark_strbuf_init(strbuf *buf, int initial_size)
+{
+	buf->asize = 0;
+	buf->size = 0;
+	buf->ptr = cmark_strbuf__initbuf;
+
+	if (initial_size)
+		cmark_strbuf_grow(buf, initial_size);
+}
+
+int cmark_strbuf_try_grow(strbuf *buf, int target_size, bool mark_oom)
+{
+	unsigned char *new_ptr;
+	int new_size;
+
+	if (buf->ptr == cmark_strbuf__oom)
+		return -1;
+
+	if (target_size <= buf->asize)
+		return 0;
+
+	if (buf->asize == 0) {
+		new_size = target_size;
+		new_ptr = NULL;
+	} else {
+		new_size = buf->asize;
+		new_ptr = buf->ptr;
+	}
+
+	/* grow the buffer size by 1.5, until it's big enough
+	 * to fit our target size */
+	while (new_size < target_size)
+		new_size = (new_size << 1) - (new_size >> 1);
+
+	/* round allocation up to multiple of 8 */
+	new_size = (new_size + 7) & ~7;
+
+	new_ptr = (unsigned char *)realloc(new_ptr, new_size);
+
+	if (!new_ptr) {
+		if (mark_oom)
+			buf->ptr = cmark_strbuf__oom;
+		return -1;
+	}
+
+	buf->asize = new_size;
+	buf->ptr   = new_ptr;
+
+	/* truncate the existing buffer size if necessary */
+	if (buf->size >= buf->asize)
+		buf->size = buf->asize - 1;
+	buf->ptr[buf->size] = '\0';
+
+	return 0;
+}
+
+int cmark_strbuf_grow(cmark_strbuf *buf, int target_size)
+{
+	return cmark_strbuf_try_grow(buf, target_size, true);
+}
+
+bool cmark_strbuf_oom(const cmark_strbuf *buf)
+{
+	return (buf->ptr == cmark_strbuf__oom);
+}
+
+size_t cmark_strbuf_len(const cmark_strbuf *buf)
+{
+	return buf->size;
+}
+
+void cmark_strbuf_free(strbuf *buf)
+{
+	if (!buf) return;
+
+	if (buf->ptr != cmark_strbuf__initbuf && buf->ptr != cmark_strbuf__oom)
+		free(buf->ptr);
+
+	cmark_strbuf_init(buf, 0);
+}
+
+void cmark_strbuf_clear(strbuf *buf)
+{
+	buf->size = 0;
+
+	if (buf->asize > 0)
+		buf->ptr[0] = '\0';
+}
+
+int cmark_strbuf_set(strbuf *buf, const unsigned char *data, int len)
+{
+	if (len <= 0 || data == NULL) {
+		cmark_strbuf_clear(buf);
+	} else {
+		if (data != buf->ptr) {
+			ENSURE_SIZE(buf, len + 1);
+			memmove(buf->ptr, data, len);
+		}
+		buf->size = len;
+		buf->ptr[buf->size] = '\0';
+	}
+	return 0;
+}
+
+int cmark_strbuf_sets(strbuf *buf, const char *string)
+{
+	return cmark_strbuf_set(buf,
+			  (const unsigned char *)string,
+			  string ? strlen(string) : 0);
+}
+
+int cmark_strbuf_putc(strbuf *buf, int c)
+{
+	ENSURE_SIZE(buf, buf->size + 2);
+	buf->ptr[buf->size++] = c;
+	buf->ptr[buf->size] = '\0';
+	return 0;
+}
+
+int cmark_strbuf_put(strbuf *buf, const unsigned char *data, int len)
+{
+	if (len <= 0)
+		return 0;
+
+	ENSURE_SIZE(buf, buf->size + len + 1);
+	memmove(buf->ptr + buf->size, data, len);
+	buf->size += len;
+	buf->ptr[buf->size] = '\0';
+	return 0;
+}
+
+int cmark_strbuf_puts(strbuf *buf, const char *string)
+{
+	return cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string));
+}
+
+int cmark_strbuf_vprintf(strbuf *buf, const char *format, va_list ap)
+{
+	const int expected_size = buf->size + (strlen(format) * 2);
+	int len;
+
+	ENSURE_SIZE(buf, expected_size);
+
+	while (1) {
+		len = vsnprintf(
+			(char *)buf->ptr + buf->size,
+			buf->asize - buf->size,
+			format, ap
+			);
+
+		if (len < 0) {
+			free(buf->ptr);
+			buf->ptr = cmark_strbuf__oom;
+			return -1;
+		}
+
+		if (len + 1 <= buf->asize - buf->size) {
+			buf->size += len;
+			break;
+		}
+
+		ENSURE_SIZE(buf, buf->size + len + 1);
+	}
+
+	return 0;
+}
+
+int cmark_strbuf_printf(strbuf *buf, const char *format, ...)
+{
+	int r;
+	va_list ap;
+
+	va_start(ap, format);
+	r = cmark_strbuf_vprintf(buf, format, ap);
+	va_end(ap);
+
+	return r;
+}
+
+void cmark_strbuf_copy_cstr(char *data, int datasize, const strbuf *buf)
+{
+	int copylen;
+
+	assert(data && datasize && buf);
+
+	data[0] = '\0';
+
+	if (buf->size == 0 || buf->asize <= 0)
+		return;
+
+	copylen = buf->size;
+	if (copylen > datasize - 1)
+		copylen = datasize - 1;
+	memmove(data, buf->ptr, copylen);
+	data[copylen] = '\0';
+}
+
+void cmark_strbuf_swap(strbuf *buf_a, strbuf *buf_b)
+{
+	strbuf t = *buf_a;
+	*buf_a = *buf_b;
+	*buf_b = t;
+}
+
+unsigned char *cmark_strbuf_detach(strbuf *buf)
+{
+	unsigned char *data = buf->ptr;
+
+	if (buf->asize == 0 || buf->ptr == cmark_strbuf__oom) {
+		/* return an empty string */
+		return (unsigned char *)calloc(1, 1);
+	}
+
+	cmark_strbuf_init(buf, 0);
+	return data;
+}
+
+void cmark_strbuf_attach(strbuf *buf, unsigned char *ptr, int asize)
+{
+	cmark_strbuf_free(buf);
+
+	if (ptr) {
+		buf->ptr = ptr;
+		buf->size = strlen((char *)ptr);
+		if (asize)
+			buf->asize = (asize < buf->size) ? buf->size + 1 : asize;
+		else /* pass 0 to fall back on strlen + 1 */
+			buf->asize = buf->size + 1;
+	} else {
+		cmark_strbuf_grow(buf, asize);
+	}
+}
+
+int cmark_strbuf_cmp(const strbuf *a, const strbuf *b)
+{
+	int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
+	return (result != 0) ? result :
+		(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
+}
+
+int cmark_strbuf_strchr(const strbuf *buf, int c, int pos)
+{
+	const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos);
+	if (!p)
+		return -1;
+
+	return (int)(p - (const unsigned char *)buf->ptr);
+}
+
+int cmark_strbuf_strrchr(const strbuf *buf, int c, int pos)
+{
+	int i;
+
+	for (i = pos; i >= 0; i--) {
+		if (buf->ptr[i] == (unsigned char) c)
+			return i;
+	}
+
+	return -1;
+}
+
+void cmark_strbuf_truncate(strbuf *buf, int len)
+{
+	if (len < buf->size) {
+		buf->size = len;
+		buf->ptr[buf->size] = '\0';
+	}
+}
+
+void cmark_strbuf_drop(strbuf *buf, int n)
+{
+	if (n > 0) {
+		buf->size = buf->size - n;
+		if (buf->size)
+			memmove(buf->ptr, buf->ptr + n, buf->size);
+
+		buf->ptr[buf->size] = '\0';
+	}
+}
+
+void cmark_strbuf_rtrim(strbuf *buf)
+{
+	if (!buf->size)
+		return;
+
+	while (buf->size > 0) {
+		if (!isspace(buf->ptr[buf->size - 1]))
+			break;
+
+		buf->size--;
+	}
+
+	buf->ptr[buf->size] = '\0';
+}
+
+void cmark_strbuf_trim(strbuf *buf)
+{
+	int i = 0;
+
+	if (!buf->size)
+		return;
+
+	while (i < buf->size && isspace(buf->ptr[i]))
+		i++;
+
+	cmark_strbuf_drop(buf, i);
+
+	cmark_strbuf_rtrim(buf);
+}
+
+// Destructively modify string, collapsing consecutive
+// space and newline characters into a single space.
+void cmark_strbuf_normalize_whitespace(strbuf *s)
+{
+	bool last_char_was_space = false;
+	int r, w;
+
+	for (r = 0, w = 0; r < s->size; ++r) {
+		switch (s->ptr[r]) {
+		case ' ':
+		case '\n':
+			if (last_char_was_space)
+				break;
+
+			s->ptr[w++] = ' ';
+			last_char_was_space = true;
+			break;
+
+		default:
+			s->ptr[w++] = s->ptr[r];
+			last_char_was_space = false;
+		}
+	}
+
+	cmark_strbuf_truncate(s, w);
+}
+
+// Destructively unescape a string: remove backslashes before punctuation chars.
+extern void cmark_strbuf_unescape(strbuf *buf)
+{
+	int r, w;
+
+	for (r = 0, w = 0; r < buf->size; ++r) {
+		if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
+			continue;
+
+		buf->ptr[w++] = buf->ptr[r];
+	}
+
+	cmark_strbuf_truncate(buf, w);
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/144f0b22/compiler/modules/CommonMark/src/buffer.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/buffer.h b/compiler/modules/CommonMark/src/buffer.h
new file mode 100644
index 0000000..be888e1
--- /dev/null
+++ b/compiler/modules/CommonMark/src/buffer.h
@@ -0,0 +1,178 @@
+#ifndef CMARK_BUFFER_H
+#define CMARK_BUFFER_H
+
+#include <stddef.h>
+#include <stdarg.h>
+#include "config.h"
+#include "cmark_export.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+	unsigned char *ptr;
+	int asize, size;
+} cmark_strbuf;
+
+CMARK_EXPORT
+extern unsigned char cmark_strbuf__initbuf[];
+
+CMARK_EXPORT
+extern unsigned char cmark_strbuf__oom[];
+
+#define CMARK_GH_BUF_INIT { cmark_strbuf__initbuf, 0, 0 }
+
+/**
+ * Initialize a strbuf structure.
+ *
+ * For the cases where GH_BUF_INIT cannot be used to do static
+ * initialization.
+ */
+CMARK_EXPORT
+void cmark_strbuf_init(cmark_strbuf *buf, int initial_size);
+
+/**
+ * Attempt to grow the buffer to hold at least `target_size` bytes.
+ *
+ * If the allocation fails, this will return an error.  If mark_oom is true,
+ * this will mark the buffer as invalid for future operations; if false,
+ * existing buffer content will be preserved, but calling code must handle
+ * that buffer was not expanded.
+ */
+CMARK_EXPORT
+int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom);
+
+/**
+ * Grow the buffer to hold at least `target_size` bytes.
+ *
+ * If the allocation fails, this will return an error and the buffer will be
+ * marked as invalid for future operations, invaliding contents.
+ *
+ * @return 0 on success or -1 on failure
+ */
+CMARK_EXPORT
+int cmark_strbuf_grow(cmark_strbuf *buf, int target_size);
+
+CMARK_EXPORT
+void cmark_strbuf_free(cmark_strbuf *buf);
+CMARK_EXPORT
+void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
+
+/**
+ * Test if there have been any reallocation failures with this strbuf.
+ *
+ * Any function that writes to a strbuf can fail due to memory allocation
+ * issues.  If one fails, the strbuf will be marked with an OOM error and
+ * further calls to modify the buffer will fail.  Check strbuf_oom() at the
+ * end of your sequence and it will be true if you ran out of memory at any
+ * point with that buffer.
+ *
+ * @return false if no error, true if allocation error
+ */
+CMARK_EXPORT
+bool cmark_strbuf_oom(const cmark_strbuf *buf);
+
+CMARK_EXPORT
+size_t cmark_strbuf_len(const cmark_strbuf *buf);
+
+CMARK_EXPORT
+int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b);
+
+CMARK_EXPORT
+void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize);
+CMARK_EXPORT
+unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
+CMARK_EXPORT
+void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf);
+
+static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf)
+{
+	return (char *)buf->ptr;
+}
+
+#define cmark_strbuf_at(buf, n) ((buf)->ptr[n])
+
+/*
+ * Functions below that return int value error codes will return 0 on
+ * success or -1 on failure (which generally means an allocation failed).
+ * Using a strbuf where the allocation has failed with result in -1 from
+ * all further calls using that buffer.  As a result, you can ignore the
+ * return code of these functions and call them in a series then just call
+ * strbuf_oom at the end.
+ */
+CMARK_EXPORT
+int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len);
+CMARK_EXPORT
+int cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
+CMARK_EXPORT
+int cmark_strbuf_putc(cmark_strbuf *buf, int c);
+CMARK_EXPORT
+int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len);
+CMARK_EXPORT
+int cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
+CMARK_EXPORT
+int cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...)
+	CMARK_ATTRIBUTE((format (printf, 2, 3)));
+CMARK_EXPORT
+int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap);
+CMARK_EXPORT
+void cmark_strbuf_clear(cmark_strbuf *buf);
+
+CMARK_EXPORT
+int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos);
+CMARK_EXPORT
+int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos);
+CMARK_EXPORT
+void cmark_strbuf_drop(cmark_strbuf *buf, int n);
+CMARK_EXPORT
+void cmark_strbuf_truncate(cmark_strbuf *buf, int len);
+CMARK_EXPORT
+void cmark_strbuf_rtrim(cmark_strbuf *buf);
+CMARK_EXPORT
+void cmark_strbuf_trim(cmark_strbuf *buf);
+CMARK_EXPORT
+void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
+CMARK_EXPORT
+void cmark_strbuf_unescape(cmark_strbuf *s);
+
+#ifndef CMARK_NO_SHORT_NAMES
+  #define strbuf                        cmark_strbuf
+  #define strbuf__initbuf               cmark_strbuf__initbuf
+  #define strbuf__oom                   cmark_strbuf__oom
+  #define GH_BUF_INIT                   CMARK_GH_BUF_INIT
+  #define strbuf_init                   cmark_strbuf_init
+  #define strbuf_try_grow               cmark_strbuf_try_grow
+  #define strbuf_grow                   cmark_strbuf_grow
+  #define strbuf_free                   cmark_strbuf_free
+  #define strbuf_swap                   cmark_strbuf_swap
+  #define strbuf_oom                    cmark_strbuf_oom
+  #define strbuf_len                    cmark_strbuf_len
+  #define strbuf_cmp                    cmark_strbuf_cmp
+  #define strbuf_attach                 cmark_strbuf_attach
+  #define strbuf_detach                 cmark_strbuf_detach
+  #define strbuf_copy_cstr              cmark_strbuf_copy_cstr
+  #define strbuf_at                     cmark_strbuf_at
+  #define strbuf_set                    cmark_strbuf_set
+  #define strbuf_sets                   cmark_strbuf_sets
+  #define strbuf_putc                   cmark_strbuf_putc
+  #define strbuf_put                    cmark_strbuf_put
+  #define strbuf_puts                   cmark_strbuf_puts
+  #define strbuf_printf                 cmark_strbuf_printf
+  #define strbuf_vprintf                cmark_strbuf_vprintf
+  #define strbuf_clear                  cmark_strbuf_clear
+  #define strbuf_strchr                 cmark_strbuf_strchr
+  #define strbuf_strrchr                cmark_strbuf_strrchr
+  #define strbuf_drop                   cmark_strbuf_drop
+  #define strbuf_truncate               cmark_strbuf_truncate
+  #define strbuf_rtrim                  cmark_strbuf_rtrim
+  #define strbuf_trim                   cmark_strbuf_trim
+  #define strbuf_normalize_whitespace   cmark_strbuf_normalize_whitespace
+  #define strbuf_unescape               cmark_strbuf_unescape
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif