You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by al...@apache.org on 2016/07/21 18:44:21 UTC

[02/52] [abbrv] [partial] nifi-minifi-cpp git commit: MINIFI-6: More infra works

http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/7956696e/thirdparty/libxml2/doc/apibuild.py
----------------------------------------------------------------------
diff --git a/thirdparty/libxml2/doc/apibuild.py b/thirdparty/libxml2/doc/apibuild.py
new file mode 100755
index 0000000..b5b669a
--- /dev/null
+++ b/thirdparty/libxml2/doc/apibuild.py
@@ -0,0 +1,2151 @@
+#!/usr/bin/python -u
+#
+# This is the API builder, it parses the C sources and build the
+# API formal description in XML.
+#
+# See Copyright for the status of this software.
+#
+# daniel@veillard.com
+#
+import os, sys
+import string
+import glob
+
+debug=0
+#debugsym='ignorableWhitespaceSAXFunc'
+debugsym=None
+
+#
+# C parser analysis code
+#
+ignored_files = {
+  "trio": "too many non standard macros",
+  "trio.c": "too many non standard macros",
+  "trionan.c": "too many non standard macros",
+  "triostr.c": "too many non standard macros",
+  "acconfig.h": "generated portability layer",
+  "config.h": "generated portability layer",
+  "libxml.h": "internal only",
+  "testOOM.c": "out of memory tester",
+  "testOOMlib.h": "out of memory tester",
+  "testOOMlib.c": "out of memory tester",
+  "rngparser.c": "not yet integrated",
+  "rngparser.h": "not yet integrated",
+  "elfgcchack.h": "not a normal header",
+  "testHTML.c": "test tool",
+  "testReader.c": "test tool",
+  "testSchemas.c": "test tool",
+  "testXPath.c": "test tool",
+  "testAutomata.c": "test tool",
+  "testModule.c": "test tool",
+  "testRegexp.c": "test tool",
+  "testThreads.c": "test tool",
+  "testC14N.c": "test tool",
+  "testRelax.c": "test tool",
+  "testThreadsWin32.c": "test tool",
+  "testSAX.c": "test tool",
+  "testURI.c": "test tool",
+  "testapi.c": "generated regression tests",
+  "runtest.c": "regression tests program",
+  "runsuite.c": "regression tests program",
+  "tst.c": "not part of the library",
+  "test.c": "not part of the library",
+  "testdso.c": "test for dynamid shared libraries",
+  "testrecurse.c": "test for entities recursions",
+  "xzlib.h": "Internal API only 2.8.0",
+  "buf.h": "Internal API only 2.9.0",
+  "enc.h": "Internal API only 2.9.0",
+  "/save.h": "Internal API only 2.9.0",
+  "timsort.h": "Internal header only for xpath.c 2.9.0",
+}
+
+ignored_words = {
+  "WINAPI": (0, "Windows keyword"),
+  "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
+  "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
+  "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
+  "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
+  "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
+  "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
+  "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
+  "XMLCALL": (0, "Special macro for win32 calls"),
+  "XSLTCALL": (0, "Special macro for win32 calls"),
+  "XMLCDECL": (0, "Special macro for win32 calls"),
+  "EXSLTCALL": (0, "Special macro for win32 calls"),
+  "__declspec": (3, "Windows keyword"),
+  "__stdcall": (0, "Windows keyword"),
+  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
+  "LIBEXSLT_PUBLIC": (0, "macro keyword"),
+  "X_IN_Y": (5, "macro function builder"),
+  "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
+  "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
+  "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
+  "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
+}
+
+def escape(raw):
+    raw = raw.replace('&', '&')
+    raw = raw.replace('<', '&lt;')
+    raw = raw.replace('>', '&gt;')
+    raw = raw.replace("'", '&apos;')
+    raw = raw.replace('"', '&quot;')
+    return raw
+
+def uniq(items):
+    d = {}
+    for item in items:
+        d[item]=1
+    return list(d.keys())
+
+class identifier:
+    def __init__(self, name, header=None, module=None, type=None, lineno = 0,
+                 info=None, extra=None, conditionals = None):
+        self.name = name
+        self.header = header
+        self.module = module
+        self.type = type
+        self.info = info
+        self.extra = extra
+        self.lineno = lineno
+        self.static = 0
+        if conditionals == None or len(conditionals) == 0:
+            self.conditionals = None
+        else:
+            self.conditionals = conditionals[:]
+        if self.name == debugsym:
+            print("=> define %s : %s" % (debugsym, (module, type, info,
+                                         extra, conditionals)))
+
+    def __repr__(self):
+        r = "%s %s:" % (self.type, self.name)
+        if self.static:
+            r = r + " static"
+        if self.module != None:
+            r = r + " from %s" % (self.module)
+        if self.info != None:
+            r = r + " " +  repr(self.info)
+        if self.extra != None:
+            r = r + " " + repr(self.extra)
+        if self.conditionals != None:
+            r = r + " " + repr(self.conditionals)
+        return r
+
+
+    def set_header(self, header):
+        self.header = header
+    def set_module(self, module):
+        self.module = module
+    def set_type(self, type):
+        self.type = type
+    def set_info(self, info):
+        self.info = info
+    def set_extra(self, extra):
+        self.extra = extra
+    def set_lineno(self, lineno):
+        self.lineno = lineno
+    def set_static(self, static):
+        self.static = static
+    def set_conditionals(self, conditionals):
+        if conditionals == None or len(conditionals) == 0:
+            self.conditionals = None
+        else:
+            self.conditionals = conditionals[:]
+
+    def get_name(self):
+        return self.name
+    def get_header(self):
+        return self.module
+    def get_module(self):
+        return self.module
+    def get_type(self):
+        return self.type
+    def get_info(self):
+        return self.info
+    def get_lineno(self):
+        return self.lineno
+    def get_extra(self):
+        return self.extra
+    def get_static(self):
+        return self.static
+    def get_conditionals(self):
+        return self.conditionals
+
+    def update(self, header, module, type = None, info = None, extra=None,
+               conditionals=None):
+        if self.name == debugsym:
+            print("=> update %s : %s" % (debugsym, (module, type, info,
+                                         extra, conditionals)))
+        if header != None and self.header == None:
+            self.set_header(module)
+        if module != None and (self.module == None or self.header == self.module):
+            self.set_module(module)
+        if type != None and self.type == None:
+            self.set_type(type)
+        if info != None:
+            self.set_info(info)
+        if extra != None:
+            self.set_extra(extra)
+        if conditionals != None:
+            self.set_conditionals(conditionals)
+
+class index:
+    def __init__(self, name = "noname"):
+        self.name = name
+        self.identifiers = {}
+        self.functions = {}
+        self.variables = {}
+        self.includes = {}
+        self.structs = {}
+        self.enums = {}
+        self.typedefs = {}
+        self.macros = {}
+        self.references = {}
+        self.info = {}
+
+    def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
+        if name[0:2] == '__':
+            return None
+        d = None
+        try:
+           d = self.identifiers[name]
+           d.update(header, module, type, lineno, info, extra, conditionals)
+        except:
+           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
+           self.identifiers[name] = d
+
+        if d != None and static == 1:
+            d.set_static(1)
+
+        if d != None and name != None and type != None:
+            self.references[name] = d
+
+        if name == debugsym:
+            print("New ref: %s" % (d))
+
+        return d
+
+    def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
+        if name[0:2] == '__':
+            return None
+        d = None
+        try:
+           d = self.identifiers[name]
+           d.update(header, module, type, lineno, info, extra, conditionals)
+        except:
+           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
+           self.identifiers[name] = d
+
+        if d != None and static == 1:
+            d.set_static(1)
+
+        if d != None and name != None and type != None:
+            if type == "function":
+                self.functions[name] = d
+            elif type == "functype":
+                self.functions[name] = d
+            elif type == "variable":
+                self.variables[name] = d
+            elif type == "include":
+                self.includes[name] = d
+            elif type == "struct":
+                self.structs[name] = d
+            elif type == "enum":
+                self.enums[name] = d
+            elif type == "typedef":
+                self.typedefs[name] = d
+            elif type == "macro":
+                self.macros[name] = d
+            else:
+                print("Unable to register type ", type)
+
+        if name == debugsym:
+            print("New symbol: %s" % (d))
+
+        return d
+
+    def merge(self, idx):
+        for id in list(idx.functions.keys()):
+              #
+              # macro might be used to override functions or variables
+              # definitions
+              #
+             if id in self.macros:
+                 del self.macros[id]
+             if id in self.functions:
+                 print("function %s from %s redeclared in %s" % (
+                    id, self.functions[id].header, idx.functions[id].header))
+             else:
+                 self.functions[id] = idx.functions[id]
+                 self.identifiers[id] = idx.functions[id]
+        for id in list(idx.variables.keys()):
+              #
+              # macro might be used to override functions or variables
+              # definitions
+              #
+             if id in self.macros:
+                 del self.macros[id]
+             if id in self.variables:
+                 print("variable %s from %s redeclared in %s" % (
+                    id, self.variables[id].header, idx.variables[id].header))
+             else:
+                 self.variables[id] = idx.variables[id]
+                 self.identifiers[id] = idx.variables[id]
+        for id in list(idx.structs.keys()):
+             if id in self.structs:
+                 print("struct %s from %s redeclared in %s" % (
+                    id, self.structs[id].header, idx.structs[id].header))
+             else:
+                 self.structs[id] = idx.structs[id]
+                 self.identifiers[id] = idx.structs[id]
+        for id in list(idx.typedefs.keys()):
+             if id in self.typedefs:
+                 print("typedef %s from %s redeclared in %s" % (
+                    id, self.typedefs[id].header, idx.typedefs[id].header))
+             else:
+                 self.typedefs[id] = idx.typedefs[id]
+                 self.identifiers[id] = idx.typedefs[id]
+        for id in list(idx.macros.keys()):
+              #
+              # macro might be used to override functions or variables
+              # definitions
+              #
+             if id in self.variables:
+                 continue
+             if id in self.functions:
+                 continue
+             if id in self.enums:
+                 continue
+             if id in self.macros:
+                 print("macro %s from %s redeclared in %s" % (
+                    id, self.macros[id].header, idx.macros[id].header))
+             else:
+                 self.macros[id] = idx.macros[id]
+                 self.identifiers[id] = idx.macros[id]
+        for id in list(idx.enums.keys()):
+             if id in self.enums:
+                 print("enum %s from %s redeclared in %s" % (
+                    id, self.enums[id].header, idx.enums[id].header))
+             else:
+                 self.enums[id] = idx.enums[id]
+                 self.identifiers[id] = idx.enums[id]
+
+    def merge_public(self, idx):
+        for id in list(idx.functions.keys()):
+             if id in self.functions:
+                 # check that function condition agrees with header
+                 if idx.functions[id].conditionals != \
+                    self.functions[id].conditionals:
+                     print("Header condition differs from Function for %s:" \
+                        % id)
+                     print("  H: %s" % self.functions[id].conditionals)
+                     print("  C: %s" % idx.functions[id].conditionals)
+                 up = idx.functions[id]
+                 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
+         #     else:
+         #         print "Function %s from %s is not declared in headers" % (
+         #                id, idx.functions[id].module)
+         # TODO: do the same for variables.
+
+    def analyze_dict(self, type, dict):
+        count = 0
+        public = 0
+        for name in list(dict.keys()):
+            id = dict[name]
+            count = count + 1
+            if id.static == 0:
+                public = public + 1
+        if count != public:
+            print("  %d %s , %d public" % (count, type, public))
+        elif count != 0:
+            print("  %d public %s" % (count, type))
+
+
+    def analyze(self):
+        self.analyze_dict("functions", self.functions)
+        self.analyze_dict("variables", self.variables)
+        self.analyze_dict("structs", self.structs)
+        self.analyze_dict("typedefs", self.typedefs)
+        self.analyze_dict("macros", self.macros)
+
+class CLexer:
+    """A lexer for the C language, tokenize the input by reading and
+       analyzing it line by line"""
+    def __init__(self, input):
+        self.input = input
+        self.tokens = []
+        self.line = ""
+        self.lineno = 0
+
+    def getline(self):
+        line = ''
+        while line == '':
+            line = self.input.readline()
+            if not line:
+                return None
+            self.lineno = self.lineno + 1
+            line = line.lstrip()
+            line = line.rstrip()
+            if line == '':
+                continue
+            while line[-1] == '\\':
+                line = line[:-1]
+                n = self.input.readline()
+                self.lineno = self.lineno + 1
+                n = n.lstrip()
+                n = n.rstrip()
+                if not n:
+                    break
+                else:
+                    line = line + n
+        return line
+
+    def getlineno(self):
+        return self.lineno
+
+    def push(self, token):
+        self.tokens.insert(0, token);
+
+    def debug(self):
+        print("Last token: ", self.last)
+        print("Token queue: ", self.tokens)
+        print("Line %d end: " % (self.lineno), self.line)
+
+    def token(self):
+        while self.tokens == []:
+            if self.line == "":
+                line = self.getline()
+            else:
+                line = self.line
+                self.line = ""
+            if line == None:
+                return None
+
+            if line[0] == '#':
+                self.tokens = list(map((lambda x: ('preproc', x)),
+                                  line.split()))
+                break;
+            l = len(line)
+            if line[0] == '"' or line[0] == "'":
+                end = line[0]
+                line = line[1:]
+                found = 0
+                tok = ""
+                while found == 0:
+                    i = 0
+                    l = len(line)
+                    while i < l:
+                        if line[i] == end:
+                            self.line = line[i+1:]
+                            line = line[:i]
+                            l = i
+                            found = 1
+                            break
+                        if line[i] == '\\':
+                            i = i + 1
+                        i = i + 1
+                    tok = tok + line
+                    if found == 0:
+                        line = self.getline()
+                        if line == None:
+                            return None
+                self.last = ('string', tok)
+                return self.last
+
+            if l >= 2 and line[0] == '/' and line[1] == '*':
+                line = line[2:]
+                found = 0
+                tok = ""
+                while found == 0:
+                    i = 0
+                    l = len(line)
+                    while i < l:
+                        if line[i] == '*' and i+1 < l and line[i+1] == '/':
+                            self.line = line[i+2:]
+                            line = line[:i-1]
+                            l = i
+                            found = 1
+                            break
+                        i = i + 1
+                    if tok != "":
+                        tok = tok + "\n"
+                    tok = tok + line
+                    if found == 0:
+                        line = self.getline()
+                        if line == None:
+                            return None
+                self.last = ('comment', tok)
+                return self.last
+            if l >= 2 and line[0] == '/' and line[1] == '/':
+                line = line[2:]
+                self.last = ('comment', line)
+                return self.last
+            i = 0
+            while i < l:
+                if line[i] == '/' and i+1 < l and line[i+1] == '/':
+                    self.line = line[i:]
+                    line = line[:i]
+                    break
+                if line[i] == '/' and i+1 < l and line[i+1] == '*':
+                    self.line = line[i:]
+                    line = line[:i]
+                    break
+                if line[i] == '"' or line[i] == "'":
+                    self.line = line[i:]
+                    line = line[:i]
+                    break
+                i = i + 1
+            l = len(line)
+            i = 0
+            while i < l:
+                if line[i] == ' ' or line[i] == '\t':
+                    i = i + 1
+                    continue
+                o = ord(line[i])
+                if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
+                   (o >= 48 and o <= 57):
+                    s = i
+                    while i < l:
+                        o = ord(line[i])
+                        if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
+                           (o >= 48 and o <= 57) or \
+			   (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
+                            i = i + 1
+                        else:
+                            break
+                    self.tokens.append(('name', line[s:i]))
+                    continue
+                if "(){}:;,[]".find(line[i]) != -1:
+#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
+#                    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
+#                    line[i] == ',' or line[i] == '[' or line[i] == ']':
+                    self.tokens.append(('sep', line[i]))
+                    i = i + 1
+                    continue
+                if "+-*><=/%&!|.".find(line[i]) != -1:
+#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
+#                    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
+#                    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
+#                    line[i] == '!' or line[i] == '|' or line[i] == '.':
+                    if line[i] == '.' and  i + 2 < l and \
+                       line[i+1] == '.' and line[i+2] == '.':
+                        self.tokens.append(('name', '...'))
+                        i = i + 3
+                        continue
+
+                    j = i + 1
+                    if j < l and (
+                       "+-*><=/%&!|".find(line[j]) != -1):
+#                        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
+#                        line[j] == '>' or line[j] == '<' or line[j] == '=' or \
+#                        line[j] == '/' or line[j] == '%' or line[j] == '&' or \
+#                        line[j] == '!' or line[j] == '|'):
+                        self.tokens.append(('op', line[i:j+1]))
+                        i = j + 1
+                    else:
+                        self.tokens.append(('op', line[i]))
+                        i = i + 1
+                    continue
+                s = i
+                while i < l:
+                    o = ord(line[i])
+                    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
+                       (o >= 48 and o <= 57) or (
+                        " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
+#                         line[i] != ' ' and line[i] != '\t' and
+#                         line[i] != '(' and line[i] != ')' and
+#                         line[i] != '{'  and line[i] != '}' and
+#                         line[i] != ':' and line[i] != ';' and
+#                         line[i] != ',' and line[i] != '+' and
+#                         line[i] != '-' and line[i] != '*' and
+#                         line[i] != '/' and line[i] != '%' and
+#                         line[i] != '&' and line[i] != '!' and
+#                         line[i] != '|' and line[i] != '[' and
+#                         line[i] != ']' and line[i] != '=' and
+#                         line[i] != '*' and line[i] != '>' and
+#                         line[i] != '<'):
+                        i = i + 1
+                    else:
+                        break
+                self.tokens.append(('name', line[s:i]))
+
+        tok = self.tokens[0]
+        self.tokens = self.tokens[1:]
+        self.last = tok
+        return tok
+
+class CParser:
+    """The C module parser"""
+    def __init__(self, filename, idx = None):
+        self.filename = filename
+        if len(filename) > 2 and filename[-2:] == '.h':
+            self.is_header = 1
+        else:
+            self.is_header = 0
+        self.input = open(filename)
+        self.lexer = CLexer(self.input)
+        if idx == None:
+            self.index = index()
+        else:
+            self.index = idx
+        self.top_comment = ""
+        self.last_comment = ""
+        self.comment = None
+        self.collect_ref = 0
+        self.no_error = 0
+        self.conditionals = []
+        self.defines = []
+
+    def collect_references(self):
+        self.collect_ref = 1
+
+    def stop_error(self):
+        self.no_error = 1
+
+    def start_error(self):
+        self.no_error = 0
+
+    def lineno(self):
+        return self.lexer.getlineno()
+
+    def index_add(self, name, module, static, type, info=None, extra = None):
+        if self.is_header == 1:
+            self.index.add(name, module, module, static, type, self.lineno(),
+                           info, extra, self.conditionals)
+        else:
+            self.index.add(name, None, module, static, type, self.lineno(),
+                           info, extra, self.conditionals)
+
+    def index_add_ref(self, name, module, static, type, info=None,
+                      extra = None):
+        if self.is_header == 1:
+            self.index.add_ref(name, module, module, static, type,
+                               self.lineno(), info, extra, self.conditionals)
+        else:
+            self.index.add_ref(name, None, module, static, type, self.lineno(),
+                               info, extra, self.conditionals)
+
+    def warning(self, msg):
+        if self.no_error:
+            return
+        print(msg)
+
+    def error(self, msg, token=-1):
+        if self.no_error:
+            return
+
+        print("Parse Error: " + msg)
+        if token != -1:
+            print("Got token ", token)
+        self.lexer.debug()
+        sys.exit(1)
+
+    def debug(self, msg, token=-1):
+        print("Debug: " + msg)
+        if token != -1:
+            print("Got token ", token)
+        self.lexer.debug()
+
+    def parseTopComment(self, comment):
+        res = {}
+        lines = comment.split("\n")
+        item = None
+        for line in lines:
+            while line != "" and (line[0] == ' ' or line[0] == '\t'):
+                line = line[1:]
+            while line != "" and line[0] == '*':
+                line = line[1:]
+            while line != "" and (line[0] == ' ' or line[0] == '\t'):
+                line = line[1:]
+            try:
+                (it, line) = line.split(":", 1)
+                item = it
+                while line != "" and (line[0] == ' ' or line[0] == '\t'):
+                    line = line[1:]
+                if item in res:
+                    res[item] = res[item] + " " + line
+                else:
+                    res[item] = line
+            except:
+                if item != None:
+                    if item in res:
+                        res[item] = res[item] + " " + line
+                    else:
+                        res[item] = line
+        self.index.info = res
+
+    def parseComment(self, token):
+        if self.top_comment == "":
+            self.top_comment = token[1]
+        if self.comment == None or token[1][0] == '*':
+            self.comment = token[1];
+        else:
+            self.comment = self.comment + token[1]
+        token = self.lexer.token()
+
+        if self.comment.find("DOC_DISABLE") != -1:
+            self.stop_error()
+
+        if self.comment.find("DOC_ENABLE") != -1:
+            self.start_error()
+
+        return token
+
+    #
+    # Parse a comment block associate to a typedef
+    #
+    def parseTypeComment(self, name, quiet = 0):
+        if name[0:2] == '__':
+            quiet = 1
+
+        args = []
+        desc = ""
+
+        if self.comment == None:
+            if not quiet:
+                self.warning("Missing comment for type %s" % (name))
+            return((args, desc))
+        if self.comment[0] != '*':
+            if not quiet:
+                self.warning("Missing * in type comment for %s" % (name))
+            return((args, desc))
+        lines = self.comment.split('\n')
+        if lines[0] == '*':
+            del lines[0]
+        if lines[0] != "* %s:" % (name):
+            if not quiet:
+                self.warning("Misformatted type comment for %s" % (name))
+                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
+            return((args, desc))
+        del lines[0]
+        while len(lines) > 0 and lines[0] == '*':
+            del lines[0]
+        desc = ""
+        while len(lines) > 0:
+            l = lines[0]
+            while len(l) > 0 and l[0] == '*':
+                l = l[1:]
+            l = l.strip()
+            desc = desc + " " + l
+            del lines[0]
+
+        desc = desc.strip()
+
+        if quiet == 0:
+            if desc == "":
+                self.warning("Type comment for %s lack description of the macro" % (name))
+
+        return(desc)
+    #
+    # Parse a comment block associate to a macro
+    #
+    def parseMacroComment(self, name, quiet = 0):
+        if name[0:2] == '__':
+            quiet = 1
+
+        args = []
+        desc = ""
+
+        if self.comment == None:
+            if not quiet:
+                self.warning("Missing comment for macro %s" % (name))
+            return((args, desc))
+        if self.comment[0] != '*':
+            if not quiet:
+                self.warning("Missing * in macro comment for %s" % (name))
+            return((args, desc))
+        lines = self.comment.split('\n')
+        if lines[0] == '*':
+            del lines[0]
+        if lines[0] != "* %s:" % (name):
+            if not quiet:
+                self.warning("Misformatted macro comment for %s" % (name))
+                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
+            return((args, desc))
+        del lines[0]
+        while lines[0] == '*':
+            del lines[0]
+        while len(lines) > 0 and lines[0][0:3] == '* @':
+            l = lines[0][3:]
+            try:
+                (arg, desc) = l.split(':', 1)
+                desc=desc.strip()
+                arg=arg.strip()
+            except:
+                if not quiet:
+                    self.warning("Misformatted macro comment for %s" % (name))
+                    self.warning("  problem with '%s'" % (lines[0]))
+                del lines[0]
+                continue
+            del lines[0]
+            l = lines[0].strip()
+            while len(l) > 2 and l[0:3] != '* @':
+                while l[0] == '*':
+                    l = l[1:]
+                desc = desc + ' ' + l.strip()
+                del lines[0]
+                if len(lines) == 0:
+                    break
+                l = lines[0]
+            args.append((arg, desc))
+        while len(lines) > 0 and lines[0] == '*':
+            del lines[0]
+        desc = ""
+        while len(lines) > 0:
+            l = lines[0]
+            while len(l) > 0 and l[0] == '*':
+                l = l[1:]
+            l = l.strip()
+            desc = desc + " " + l
+            del lines[0]
+
+        desc = desc.strip()
+
+        if quiet == 0:
+            if desc == "":
+                self.warning("Macro comment for %s lack description of the macro" % (name))
+
+        return((args, desc))
+
+     #
+     # Parse a comment block and merge the informations found in the
+     # parameters descriptions, finally returns a block as complete
+     # as possible
+     #
+    def mergeFunctionComment(self, name, description, quiet = 0):
+        if name == 'main':
+            quiet = 1
+        if name[0:2] == '__':
+            quiet = 1
+
+        (ret, args) = description
+        desc = ""
+        retdesc = ""
+
+        if self.comment == None:
+            if not quiet:
+                self.warning("Missing comment for function %s" % (name))
+            return(((ret[0], retdesc), args, desc))
+        if self.comment[0] != '*':
+            if not quiet:
+                self.warning("Missing * in function comment for %s" % (name))
+            return(((ret[0], retdesc), args, desc))
+        lines = self.comment.split('\n')
+        if lines[0] == '*':
+            del lines[0]
+        if lines[0] != "* %s:" % (name):
+            if not quiet:
+                self.warning("Misformatted function comment for %s" % (name))
+                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
+            return(((ret[0], retdesc), args, desc))
+        del lines[0]
+        while lines[0] == '*':
+            del lines[0]
+        nbargs = len(args)
+        while len(lines) > 0 and lines[0][0:3] == '* @':
+            l = lines[0][3:]
+            try:
+                (arg, desc) = l.split(':', 1)
+                desc=desc.strip()
+                arg=arg.strip()
+            except:
+                if not quiet:
+                    self.warning("Misformatted function comment for %s" % (name))
+                    self.warning("  problem with '%s'" % (lines[0]))
+                del lines[0]
+                continue
+            del lines[0]
+            l = lines[0].strip()
+            while len(l) > 2 and l[0:3] != '* @':
+                while l[0] == '*':
+                    l = l[1:]
+                desc = desc + ' ' + l.strip()
+                del lines[0]
+                if len(lines) == 0:
+                    break
+                l = lines[0]
+            i = 0
+            while i < nbargs:
+                if args[i][1] == arg:
+                    args[i] = (args[i][0], arg, desc)
+                    break;
+                i = i + 1
+            if i >= nbargs:
+                if not quiet:
+                    self.warning("Unable to find arg %s from function comment for %s" % (
+                       arg, name))
+        while len(lines) > 0 and lines[0] == '*':
+            del lines[0]
+        desc = ""
+        while len(lines) > 0:
+            l = lines[0]
+            while len(l) > 0 and l[0] == '*':
+                l = l[1:]
+            l = l.strip()
+            if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
+                try:
+                    l = l.split(' ', 1)[1]
+                except:
+                    l = ""
+                retdesc = l.strip()
+                del lines[0]
+                while len(lines) > 0:
+                    l = lines[0]
+                    while len(l) > 0 and l[0] == '*':
+                        l = l[1:]
+                    l = l.strip()
+                    retdesc = retdesc + " " + l
+                    del lines[0]
+            else:
+                desc = desc + " " + l
+                del lines[0]
+
+        retdesc = retdesc.strip()
+        desc = desc.strip()
+
+        if quiet == 0:
+             #
+             # report missing comments
+             #
+            i = 0
+            while i < nbargs:
+                if args[i][2] == None and args[i][0] != "void" and \
+                   ((args[i][1] != None) or (args[i][1] == '')):
+                    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
+                i = i + 1
+            if retdesc == "" and ret[0] != "void":
+                self.warning("Function comment for %s lacks description of return value" % (name))
+            if desc == "":
+                self.warning("Function comment for %s lacks description of the function" % (name))
+
+        return(((ret[0], retdesc), args, desc))
+
+    def parsePreproc(self, token):
+        if debug:
+            print("=> preproc ", token, self.lexer.tokens)
+        name = token[1]
+        if name == "#include":
+            token = self.lexer.token()
+            if token == None:
+                return None
+            if token[0] == 'preproc':
+                self.index_add(token[1], self.filename, not self.is_header,
+                                "include")
+                return self.lexer.token()
+            return token
+        if name == "#define":
+            token = self.lexer.token()
+            if token == None:
+                return None
+            if token[0] == 'preproc':
+                 # TODO macros with arguments
+                name = token[1]
+                lst = []
+                token = self.lexer.token()
+                while token != None and token[0] == 'preproc' and \
+                      token[1][0] != '#':
+                    lst.append(token[1])
+                    token = self.lexer.token()
+                try:
+                    name = name.split('(') [0]
+                except:
+                    pass
+                info = self.parseMacroComment(name, not self.is_header)
+                self.index_add(name, self.filename, not self.is_header,
+                                "macro", info)
+                return token
+
+        #
+        # Processing of conditionals modified by Bill 1/1/05
+        #
+        # We process conditionals (i.e. tokens from #ifdef, #ifndef,
+        # #if, #else and #endif) for headers and mainline code,
+        # store the ones from the header in libxml2-api.xml, and later
+        # (in the routine merge_public) verify that the two (header and
+        # mainline code) agree.
+        #
+        # There is a small problem with processing the headers. Some of
+        # the variables are not concerned with enabling / disabling of
+        # library functions (e.g. '__XML_PARSER_H__'), and we don't want
+        # them to be included in libxml2-api.xml, or involved in
+        # the check between the header and the mainline code.  To
+        # accomplish this, we ignore any conditional which doesn't include
+        # the string 'ENABLED'
+        #
+        if name == "#ifdef":
+            apstr = self.lexer.tokens[0][1]
+            try:
+                self.defines.append(apstr)
+                if apstr.find('ENABLED') != -1:
+                    self.conditionals.append("defined(%s)" % apstr)
+            except:
+                pass
+        elif name == "#ifndef":
+            apstr = self.lexer.tokens[0][1]
+            try:
+                self.defines.append(apstr)
+                if apstr.find('ENABLED') != -1:
+                    self.conditionals.append("!defined(%s)" % apstr)
+            except:
+                pass
+        elif name == "#if":
+            apstr = ""
+            for tok in self.lexer.tokens:
+                if apstr != "":
+                    apstr = apstr + " "
+                apstr = apstr + tok[1]
+            try:
+                self.defines.append(apstr)
+                if apstr.find('ENABLED') != -1:
+                    self.conditionals.append(apstr)
+            except:
+                pass
+        elif name == "#else":
+            if self.conditionals != [] and \
+               self.defines[-1].find('ENABLED') != -1:
+                self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
+        elif name == "#endif":
+            if self.conditionals != [] and \
+               self.defines[-1].find('ENABLED') != -1:
+                self.conditionals = self.conditionals[:-1]
+            self.defines = self.defines[:-1]
+        token = self.lexer.token()
+        while token != None and token[0] == 'preproc' and \
+            token[1][0] != '#':
+            token = self.lexer.token()
+        return token
+
+     #
+     # token acquisition on top of the lexer, it handle internally
+     # preprocessor and comments since they are logically not part of
+     # the program structure.
+     #
+    def token(self):
+        global ignored_words
+
+        token = self.lexer.token()
+        while token != None:
+            if token[0] == 'comment':
+                token = self.parseComment(token)
+                continue
+            elif token[0] == 'preproc':
+                token = self.parsePreproc(token)
+                continue
+            elif token[0] == "name" and token[1] == "__const":
+                token = ("name", "const")
+                return token
+            elif token[0] == "name" and token[1] == "__attribute":
+                token = self.lexer.token()
+                while token != None and token[1] != ";":
+                    token = self.lexer.token()
+                return token
+            elif token[0] == "name" and token[1] in ignored_words:
+                (n, info) = ignored_words[token[1]]
+                i = 0
+                while i < n:
+                    token = self.lexer.token()
+                    i = i + 1
+                token = self.lexer.token()
+                continue
+            else:
+                if debug:
+                    print("=> ", token)
+                return token
+        return None
+
+     #
+     # Parse a typedef, it records the type and its name.
+     #
+    def parseTypedef(self, token):
+        if token == None:
+            return None
+        token = self.parseType(token)
+        if token == None:
+            self.error("parsing typedef")
+            return None
+        base_type = self.type
+        type = base_type
+         #self.debug("end typedef type", token)
+        while token != None:
+            if token[0] == "name":
+                name = token[1]
+                signature = self.signature
+                if signature != None:
+                    type = type.split('(')[0]
+                    d = self.mergeFunctionComment(name,
+                            ((type, None), signature), 1)
+                    self.index_add(name, self.filename, not self.is_header,
+                                    "functype", d)
+                else:
+                    if base_type == "struct":
+                        self.index_add(name, self.filename, not self.is_header,
+                                        "struct", type)
+                        base_type = "struct " + name
+                    else:
+                        # TODO report missing or misformatted comments
+                        info = self.parseTypeComment(name, 1)
+                        self.index_add(name, self.filename, not self.is_header,
+                                    "typedef", type, info)
+                token = self.token()
+            else:
+                self.error("parsing typedef: expecting a name")
+                return token
+             #self.debug("end typedef", token)
+            if token != None and token[0] == 'sep' and token[1] == ',':
+                type = base_type
+                token = self.token()
+                while token != None and token[0] == "op":
+                    type = type + token[1]
+                    token = self.token()
+            elif token != None and token[0] == 'sep' and token[1] == ';':
+                break;
+            elif token != None and token[0] == 'name':
+                type = base_type
+                continue;
+            else:
+                self.error("parsing typedef: expecting ';'", token)
+                return token
+        token = self.token()
+        return token
+
+     #
+     # Parse a C code block, used for functions it parse till
+     # the balancing } included
+     #
+    def parseBlock(self, token):
+        while token != None:
+            if token[0] == "sep" and token[1] == "{":
+                token = self.token()
+                token = self.parseBlock(token)
+            elif token[0] == "sep" and token[1] == "}":
+                self.comment = None
+                token = self.token()
+                return token
+            else:
+                if self.collect_ref == 1:
+                    oldtok = token
+                    token = self.token()
+                    if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
+                        if token[0] == "sep" and token[1] == "(":
+                            self.index_add_ref(oldtok[1], self.filename,
+                                                0, "function")
+                            token = self.token()
+                        elif token[0] == "name":
+                            token = self.token()
+                            if token[0] == "sep" and (token[1] == ";" or
+                               token[1] == "," or token[1] == "="):
+                                self.index_add_ref(oldtok[1], self.filename,
+                                                    0, "type")
+                    elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
+                        self.index_add_ref(oldtok[1], self.filename,
+                                            0, "typedef")
+                    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
+                        self.index_add_ref(oldtok[1], self.filename,
+                                            0, "typedef")
+
+                else:
+                    token = self.token()
+        return token
+
+     #
+     # Parse a C struct definition till the balancing }
+     #
+    def parseStruct(self, token):
+        fields = []
+         #self.debug("start parseStruct", token)
+        while token != None:
+            if token[0] == "sep" and token[1] == "{":
+                token = self.token()
+                token = self.parseTypeBlock(token)
+            elif token[0] == "sep" and token[1] == "}":
+                self.struct_fields = fields
+                 #self.debug("end parseStruct", token)
+                 #print fields
+                token = self.token()
+                return token
+            else:
+                base_type = self.type
+                 #self.debug("before parseType", token)
+                token = self.parseType(token)
+                 #self.debug("after parseType", token)
+                if token != None and token[0] == "name":
+                    fname = token[1]
+                    token = self.token()
+                    if token[0] == "sep" and token[1] == ";":
+                        self.comment = None
+                        token = self.token()
+                        fields.append((self.type, fname, self.comment))
+                        self.comment = None
+                    else:
+                        self.error("parseStruct: expecting ;", token)
+                elif token != None and token[0] == "sep" and token[1] == "{":
+                    token = self.token()
+                    token = self.parseTypeBlock(token)
+                    if token != None and token[0] == "name":
+                        token = self.token()
+                    if token != None and token[0] == "sep" and token[1] == ";":
+                        token = self.token()
+                    else:
+                        self.error("parseStruct: expecting ;", token)
+                else:
+                    self.error("parseStruct: name", token)
+                    token = self.token()
+                self.type = base_type;
+        self.struct_fields = fields
+         #self.debug("end parseStruct", token)
+         #print fields
+        return token
+
+     #
+     # Parse a C enum block, parse till the balancing }
+     #
+    def parseEnumBlock(self, token):
+        self.enums = []
+        name = None
+        self.comment = None
+        comment = ""
+        value = "0"
+        while token != None:
+            if token[0] == "sep" and token[1] == "{":
+                token = self.token()
+                token = self.parseTypeBlock(token)
+            elif token[0] == "sep" and token[1] == "}":
+                if name != None:
+                    if self.comment != None:
+                        comment = self.comment
+                        self.comment = None
+                    self.enums.append((name, value, comment))
+                token = self.token()
+                return token
+            elif token[0] == "name":
+                    if name != None:
+                        if self.comment != None:
+                            comment = self.comment.strip()
+                            self.comment = None
+                        self.enums.append((name, value, comment))
+                    name = token[1]
+                    comment = ""
+                    token = self.token()
+                    if token[0] == "op" and token[1][0] == "=":
+                        value = ""
+                        if len(token[1]) > 1:
+                            value = token[1][1:]
+                        token = self.token()
+                        while token[0] != "sep" or (token[1] != ',' and
+                              token[1] != '}'):
+                            value = value + token[1]
+                            token = self.token()
+                    else:
+                        try:
+                            value = "%d" % (int(value) + 1)
+                        except:
+                            self.warning("Failed to compute value of enum %s" % (name))
+                            value=""
+                    if token[0] == "sep" and token[1] == ",":
+                        token = self.token()
+            else:
+                token = self.token()
+        return token
+
+     #
+     # Parse a C definition block, used for structs it parse till
+     # the balancing }
+     #
+    def parseTypeBlock(self, token):
+        while token != None:
+            if token[0] == "sep" and token[1] == "{":
+                token = self.token()
+                token = self.parseTypeBlock(token)
+            elif token[0] == "sep" and token[1] == "}":
+                token = self.token()
+                return token
+            else:
+                token = self.token()
+        return token
+
+     #
+     # Parse a type: the fact that the type name can either occur after
+     #    the definition or within the definition makes it a little harder
+     #    if inside, the name token is pushed back before returning
+     #
+    def parseType(self, token):
+        self.type = ""
+        self.struct_fields = []
+        self.signature = None
+        if token == None:
+            return token
+
+        while token[0] == "name" and (
+              token[1] == "const" or \
+              token[1] == "unsigned" or \
+              token[1] == "signed"):
+            if self.type == "":
+                self.type = token[1]
+            else:
+                self.type = self.type + " " + token[1]
+            token = self.token()
+
+        if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
+            if self.type == "":
+                self.type = token[1]
+            else:
+                self.type = self.type + " " + token[1]
+            if token[0] == "name" and token[1] == "int":
+                if self.type == "":
+                    self.type = tmp[1]
+                else:
+                    self.type = self.type + " " + tmp[1]
+
+        elif token[0] == "name" and token[1] == "struct":
+            if self.type == "":
+                self.type = token[1]
+            else:
+                self.type = self.type + " " + token[1]
+            token = self.token()
+            nametok = None
+            if token[0] == "name":
+                nametok = token
+                token = self.token()
+            if token != None and token[0] == "sep" and token[1] == "{":
+                token = self.token()
+                token = self.parseStruct(token)
+            elif token != None and token[0] == "op" and token[1] == "*":
+                self.type = self.type + " " + nametok[1] + " *"
+                token = self.token()
+                while token != None and token[0] == "op" and token[1] == "*":
+                    self.type = self.type + " *"
+                    token = self.token()
+                if token[0] == "name":
+                    nametok = token
+                    token = self.token()
+                else:
+                    self.error("struct : expecting name", token)
+                    return token
+            elif token != None and token[0] == "name" and nametok != None:
+                self.type = self.type + " " + nametok[1]
+                return token
+
+            if nametok != None:
+                self.lexer.push(token)
+                token = nametok
+            return token
+
+        elif token[0] == "name" and token[1] == "enum":
+            if self.type == "":
+                self.type = token[1]
+            else:
+                self.type = self.type + " " + token[1]
+            self.enums = []
+            token = self.token()
+            if token != None and token[0] == "sep" and token[1] == "{":
+                token = self.token()
+                token = self.parseEnumBlock(token)
+            else:
+                self.error("parsing enum: expecting '{'", token)
+            enum_type = None
+            if token != None and token[0] != "name":
+                self.lexer.push(token)
+                token = ("name", "enum")
+            else:
+                enum_type = token[1]
+            for enum in self.enums:
+                self.index_add(enum[0], self.filename,
+                               not self.is_header, "enum",
+                               (enum[1], enum[2], enum_type))
+            return token
+
+        elif token[0] == "name":
+            if self.type == "":
+                self.type = token[1]
+            else:
+                self.type = self.type + " " + token[1]
+        else:
+            self.error("parsing type %s: expecting a name" % (self.type),
+                       token)
+            return token
+        token = self.token()
+        while token != None and (token[0] == "op" or
+              token[0] == "name" and token[1] == "const"):
+            self.type = self.type + " " + token[1]
+            token = self.token()
+
+         #
+         # if there is a parenthesis here, this means a function type
+         #
+        if token != None and token[0] == "sep" and token[1] == '(':
+            self.type = self.type + token[1]
+            token = self.token()
+            while token != None and token[0] == "op" and token[1] == '*':
+                self.type = self.type + token[1]
+                token = self.token()
+            if token == None or token[0] != "name" :
+                self.error("parsing function type, name expected", token);
+                return token
+            self.type = self.type + token[1]
+            nametok = token
+            token = self.token()
+            if token != None and token[0] == "sep" and token[1] == ')':
+                self.type = self.type + token[1]
+                token = self.token()
+                if token != None and token[0] == "sep" and token[1] == '(':
+                    token = self.token()
+                    type = self.type;
+                    token = self.parseSignature(token);
+                    self.type = type;
+                else:
+                    self.error("parsing function type, '(' expected", token);
+                    return token
+            else:
+                self.error("parsing function type, ')' expected", token);
+                return token
+            self.lexer.push(token)
+            token = nametok
+            return token
+
+         #
+         # do some lookahead for arrays
+         #
+        if token != None and token[0] == "name":
+            nametok = token
+            token = self.token()
+            if token != None and token[0] == "sep" and token[1] == '[':
+                self.type = self.type + nametok[1]
+                while token != None and token[0] == "sep" and token[1] == '[':
+                    self.type = self.type + token[1]
+                    token = self.token()
+                    while token != None and token[0] != 'sep' and \
+                          token[1] != ']' and token[1] != ';':
+                        self.type = self.type + token[1]
+                        token = self.token()
+                if token != None and token[0] == 'sep' and token[1] == ']':
+                    self.type = self.type + token[1]
+                    token = self.token()
+                else:
+                    self.error("parsing array type, ']' expected", token);
+                    return token
+            elif token != None and token[0] == "sep" and token[1] == ':':
+                 # remove :12 in case it's a limited int size
+                token = self.token()
+                token = self.token()
+            self.lexer.push(token)
+            token = nametok
+
+        return token
+
+     #
+     # Parse a signature: '(' has been parsed and we scan the type definition
+     #    up to the ')' included
+    def parseSignature(self, token):
+        signature = []
+        if token != None and token[0] == "sep" and token[1] == ')':
+            self.signature = []
+            token = self.token()
+            return token
+        while token != None:
+            token = self.parseType(token)
+            if token != None and token[0] == "name":
+                signature.append((self.type, token[1], None))
+                token = self.token()
+            elif token != None and token[0] == "sep" and token[1] == ',':
+                token = self.token()
+                continue
+            elif token != None and token[0] == "sep" and token[1] == ')':
+                 # only the type was provided
+                if self.type == "...":
+                    signature.append((self.type, "...", None))
+                else:
+                    signature.append((self.type, None, None))
+            if token != None and token[0] == "sep":
+                if token[1] == ',':
+                    token = self.token()
+                    continue
+                elif token[1] == ')':
+                    token = self.token()
+                    break
+        self.signature = signature
+        return token
+
+     #
+     # Parse a global definition, be it a type, variable or function
+     # the extern "C" blocks are a bit nasty and require it to recurse.
+     #
+    def parseGlobal(self, token):
+        static = 0
+        if token[1] == 'extern':
+            token = self.token()
+            if token == None:
+                return token
+            if token[0] == 'string':
+                if token[1] == 'C':
+                    token = self.token()
+                    if token == None:
+                        return token
+                    if token[0] == 'sep' and token[1] == "{":
+                        token = self.token()
+#                         print 'Entering extern "C line ', self.lineno()
+                        while token != None and (token[0] != 'sep' or
+                              token[1] != "}"):
+                            if token[0] == 'name':
+                                token = self.parseGlobal(token)
+                            else:
+                                self.error(
+                                 "token %s %s unexpected at the top level" % (
+                                        token[0], token[1]))
+                                token = self.parseGlobal(token)
+#                         print 'Exiting extern "C" line', self.lineno()
+                        token = self.token()
+                        return token
+                else:
+                    return token
+        elif token[1] == 'static':
+            static = 1
+            token = self.token()
+            if token == None or  token[0] != 'name':
+                return token
+
+        if token[1] == 'typedef':
+            token = self.token()
+            return self.parseTypedef(token)
+        else:
+            token = self.parseType(token)
+            type_orig = self.type
+        if token == None or token[0] != "name":
+            return token
+        type = type_orig
+        self.name = token[1]
+        token = self.token()
+        while token != None and (token[0] == "sep" or token[0] == "op"):
+            if token[0] == "sep":
+                if token[1] == "[":
+                    type = type + token[1]
+                    token = self.token()
+                    while token != None and (token[0] != "sep" or \
+                          token[1] != ";"):
+                        type = type + token[1]
+                        token = self.token()
+
+            if token != None and token[0] == "op" and token[1] == "=":
+                 #
+                 # Skip the initialization of the variable
+                 #
+                token = self.token()
+                if token[0] == 'sep' and token[1] == '{':
+                    token = self.token()
+                    token = self.parseBlock(token)
+                else:
+                    self.comment = None
+                    while token != None and (token[0] != "sep" or \
+                          (token[1] != ';' and token[1] != ',')):
+                            token = self.token()
+                self.comment = None
+                if token == None or token[0] != "sep" or (token[1] != ';' and
+                   token[1] != ','):
+                    self.error("missing ';' or ',' after value")
+
+            if token != None and token[0] == "sep":
+                if token[1] == ";":
+                    self.comment = None
+                    token = self.token()
+                    if type == "struct":
+                        self.index_add(self.name, self.filename,
+                             not self.is_header, "struct", self.struct_fields)
+                    else:
+                        self.index_add(self.name, self.filename,
+                             not self.is_header, "variable", type)
+                    break
+                elif token[1] == "(":
+                    token = self.token()
+                    token = self.parseSignature(token)
+                    if token == None:
+                        return None
+                    if token[0] == "sep" and token[1] == ";":
+                        d = self.mergeFunctionComment(self.name,
+                                ((type, None), self.signature), 1)
+                        self.index_add(self.name, self.filename, static,
+                                        "function", d)
+                        token = self.token()
+                    elif token[0] == "sep" and token[1] == "{":
+                        d = self.mergeFunctionComment(self.name,
+                                ((type, None), self.signature), static)
+                        self.index_add(self.name, self.filename, static,
+                                        "function", d)
+                        token = self.token()
+                        token = self.parseBlock(token);
+                elif token[1] == ',':
+                    self.comment = None
+                    self.index_add(self.name, self.filename, static,
+                                    "variable", type)
+                    type = type_orig
+                    token = self.token()
+                    while token != None and token[0] == "sep":
+                        type = type + token[1]
+                        token = self.token()
+                    if token != None and token[0] == "name":
+                        self.name = token[1]
+                        token = self.token()
+                else:
+                    break
+
+        return token
+
+    def parse(self):
+        self.warning("Parsing %s" % (self.filename))
+        token = self.token()
+        while token != None:
+            if token[0] == 'name':
+                token = self.parseGlobal(token)
+            else:
+                self.error("token %s %s unexpected at the top level" % (
+                       token[0], token[1]))
+                token = self.parseGlobal(token)
+                return
+        self.parseTopComment(self.top_comment)
+        return self.index
+
+
+class docBuilder:
+    """A documentation builder"""
+    def __init__(self, name, directories=['.'], excludes=[]):
+        self.name = name
+        self.directories = directories
+        self.excludes = excludes + list(ignored_files.keys())
+        self.modules = {}
+        self.headers = {}
+        self.idx = index()
+        self.xref = {}
+        self.index = {}
+        if name == 'libxml2':
+            self.basename = 'libxml'
+        else:
+            self.basename = name
+
+    def indexString(self, id, str):
+        if str == None:
+            return
+        str = str.replace("'", ' ')
+        str = str.replace('"', ' ')
+        str = str.replace("/", ' ')
+        str = str.replace('*', ' ')
+        str = str.replace("[", ' ')
+        str = str.replace("]", ' ')
+        str = str.replace("(", ' ')
+        str = str.replace(")", ' ')
+        str = str.replace("<", ' ')
+        str = str.replace('>', ' ')
+        str = str.replace("&", ' ')
+        str = str.replace('#', ' ')
+        str = str.replace(",", ' ')
+        str = str.replace('.', ' ')
+        str = str.replace(';', ' ')
+        tokens = str.split()
+        for token in tokens:
+            try:
+                c = token[0]
+                if string.ascii_letters.find(c) < 0:
+                    pass
+                elif len(token) < 3:
+                    pass
+                else:
+                    lower = token.lower()
+                    # TODO: generalize this a bit
+                    if lower == 'and' or lower == 'the':
+                        pass
+                    elif token in self.xref:
+                        self.xref[token].append(id)
+                    else:
+                        self.xref[token] = [id]
+            except:
+                pass
+
+    def analyze(self):
+        print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
+        self.idx.analyze()
+
+    def scanHeaders(self):
+        for header in list(self.headers.keys()):
+            parser = CParser(header)
+            idx = parser.parse()
+            self.headers[header] = idx;
+            self.idx.merge(idx)
+
+    def scanModules(self):
+        for module in list(self.modules.keys()):
+            parser = CParser(module)
+            idx = parser.parse()
+            # idx.analyze()
+            self.modules[module] = idx
+            self.idx.merge_public(idx)
+
+    def scan(self):
+        for directory in self.directories:
+            files = glob.glob(directory + "/*.c")
+            for file in files:
+                skip = 0
+                for excl in self.excludes:
+                    if file.find(excl) != -1:
+                        print("Skipping %s" % file)
+                        skip = 1
+                        break
+                if skip == 0:
+                    self.modules[file] = None;
+            files = glob.glob(directory + "/*.h")
+            for file in files:
+                skip = 0
+                for excl in self.excludes:
+                    if file.find(excl) != -1:
+                        print("Skipping %s" % file)
+                        skip = 1
+                        break
+                if skip == 0:
+                    self.headers[file] = None;
+        self.scanHeaders()
+        self.scanModules()
+
+    def modulename_file(self, file):
+        module = os.path.basename(file)
+        if module[-2:] == '.h':
+            module = module[:-2]
+        elif module[-2:] == '.c':
+            module = module[:-2]
+        return module
+
+    def serialize_enum(self, output, name):
+        id = self.idx.enums[name]
+        output.write("    <enum name='%s' file='%s'" % (name,
+                     self.modulename_file(id.header)))
+        if id.info != None:
+            info = id.info
+            if info[0] != None and info[0] != '':
+                try:
+                    val = eval(info[0])
+                except:
+                    val = info[0]
+                output.write(" value='%s'" % (val));
+            if info[2] != None and info[2] != '':
+                output.write(" type='%s'" % info[2]);
+            if info[1] != None and info[1] != '':
+                output.write(" info='%s'" % escape(info[1]));
+        output.write("/>\n")
+
+    def serialize_macro(self, output, name):
+        id = self.idx.macros[name]
+        output.write("    <macro name='%s' file='%s'>\n" % (name,
+                     self.modulename_file(id.header)))
+        if id.info != None:
+            try:
+                (args, desc) = id.info
+                if desc != None and desc != "":
+                    output.write("      <info>%s</info>\n" % (escape(desc)))
+                    self.indexString(name, desc)
+                for arg in args:
+                    (name, desc) = arg
+                    if desc != None and desc != "":
+                        output.write("      <arg name='%s' info='%s'/>\n" % (
+                                     name, escape(desc)))
+                        self.indexString(name, desc)
+                    else:
+                        output.write("      <arg name='%s'/>\n" % (name))
+            except:
+                pass
+        output.write("    </macro>\n")
+
+    def serialize_typedef(self, output, name):
+        id = self.idx.typedefs[name]
+        if id.info[0:7] == 'struct ':
+            output.write("    <struct name='%s' file='%s' type='%s'" % (
+                     name, self.modulename_file(id.header), id.info))
+            name = id.info[7:]
+            if name in self.idx.structs and ( \
+               type(self.idx.structs[name].info) == type(()) or
+                type(self.idx.structs[name].info) == type([])):
+                output.write(">\n");
+                try:
+                    for field in self.idx.structs[name].info:
+                        desc = field[2]
+                        self.indexString(name, desc)
+                        if desc == None:
+                            desc = ''
+                        else:
+                            desc = escape(desc)
+                        output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
+                except:
+                    print("Failed to serialize struct %s" % (name))
+                output.write("    </struct>\n")
+            else:
+                output.write("/>\n");
+        else :
+            output.write("    <typedef name='%s' file='%s' type='%s'" % (
+                         name, self.modulename_file(id.header), id.info))
+            try:
+                desc = id.extra
+                if desc != None and desc != "":
+                    output.write(">\n      <info>%s</info>\n" % (escape(desc)))
+                    output.write("    </typedef>\n")
+                else:
+                    output.write("/>\n")
+            except:
+                output.write("/>\n")
+
+    def serialize_variable(self, output, name):
+        id = self.idx.variables[name]
+        if id.info != None:
+            output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
+                    name, self.modulename_file(id.header), id.info))
+        else:
+            output.write("    <variable name='%s' file='%s'/>\n" % (
+                    name, self.modulename_file(id.header)))
+
+    def serialize_function(self, output, name):
+        id = self.idx.functions[name]
+        if name == debugsym:
+            print("=>", id)
+
+        output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
+                     name, self.modulename_file(id.header),
+                     self.modulename_file(id.module)))
+        #
+        # Processing of conditionals modified by Bill 1/1/05
+        #
+        if id.conditionals != None:
+            apstr = ""
+            for cond in id.conditionals:
+                if apstr != "":
+                    apstr = apstr + " &amp;&amp; "
+                apstr = apstr + cond
+            output.write("      <cond>%s</cond>\n"% (apstr));
+        try:
+            (ret, params, desc) = id.info
+            if (desc == None or desc == '') and \
+               name[0:9] != "xmlThrDef" and name != "xmlDllMain":
+                print("%s %s from %s has no description" % (id.type, name,
+                       self.modulename_file(id.module)))
+
+            output.write("      <info>%s</info>\n" % (escape(desc)))
+            self.indexString(name, desc)
+            if ret[0] != None:
+                if ret[0] == "void":
+                    output.write("      <return type='void'/>\n")
+                else:
+                    output.write("      <return type='%s' info='%s'/>\n" % (
+                             ret[0], escape(ret[1])))
+                    self.indexString(name, ret[1])
+            for param in params:
+                if param[0] == 'void':
+                    continue
+                if param[2] == None:
+                    output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
+                else:
+                    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
+                    self.indexString(name, param[2])
+        except:
+            print("Failed to save function %s info: " % name, repr(id.info))
+        output.write("    </%s>\n" % (id.type))
+
+    def serialize_exports(self, output, file):
+        module = self.modulename_file(file)
+        output.write("    <file name='%s'>\n" % (module))
+        dict = self.headers[file]
+        if dict.info != None:
+            for data in ('Summary', 'Description', 'Author'):
+                try:
+                    output.write("     <%s>%s</%s>\n" % (
+                                 data.lower(),
+                                 escape(dict.info[data]),
+                                 data.lower()))
+                except:
+                    print("Header %s lacks a %s description" % (module, data))
+            if 'Description' in dict.info:
+                desc = dict.info['Description']
+                if desc.find("DEPRECATED") != -1:
+                    output.write("     <deprecated/>\n")
+
+        ids = list(dict.macros.keys())
+        ids.sort()
+        for id in uniq(ids):
+            # Macros are sometime used to masquerade other types.
+            if id in dict.functions:
+                continue
+            if id in dict.variables:
+                continue
+            if id in dict.typedefs:
+                continue
+            if id in dict.structs:
+                continue
+            if id in dict.enums:
+                continue
+            output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
+        ids = list(dict.enums.keys())
+        ids.sort()
+        for id in uniq(ids):
+            output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
+        ids = list(dict.typedefs.keys())
+        ids.sort()
+        for id in uniq(ids):
+            output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
+        ids = list(dict.structs.keys())
+        ids.sort()
+        for id in uniq(ids):
+            output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
+        ids = list(dict.variables.keys())
+        ids.sort()
+        for id in uniq(ids):
+            output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
+        ids = list(dict.functions.keys())
+        ids.sort()
+        for id in uniq(ids):
+            output.write("     <exports symbol='%s' type='function'/>\n" % (id))
+        output.write("    </file>\n")
+
+    def serialize_xrefs_files(self, output):
+        headers = list(self.headers.keys())
+        headers.sort()
+        for file in headers:
+            module = self.modulename_file(file)
+            output.write("    <file name='%s'>\n" % (module))
+            dict = self.headers[file]
+            ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \
+                  list(dict.macros.keys()) + list(dict.typedefs.keys()) + \
+                  list(dict.structs.keys()) + list(dict.enums.keys()))
+            ids.sort()
+            for id in ids:
+                output.write("      <ref name='%s'/>\n" % (id))
+            output.write("    </file>\n")
+        pass
+
+    def serialize_xrefs_functions(self, output):
+        funcs = {}
+        for name in list(self.idx.functions.keys()):
+            id = self.idx.functions[name]
+            try:
+                (ret, params, desc) = id.info
+                for param in params:
+                    if param[0] == 'void':
+                        continue
+                    if param[0] in funcs:
+                        funcs[param[0]].append(name)
+                    else:
+                        funcs[param[0]] = [name]
+            except:
+                pass
+        typ = list(funcs.keys())
+        typ.sort()
+        for type in typ:
+            if type == '' or type == 'void' or type == "int" or \
+               type == "char *" or type == "const char *" :
+                continue
+            output.write("    <type name='%s'>\n" % (type))
+            ids = funcs[type]
+            ids.sort()
+            pid = ''        # not sure why we have dups, but get rid of them!
+            for id in ids:
+                if id != pid:
+                    output.write("      <ref name='%s'/>\n" % (id))
+                    pid = id
+            output.write("    </type>\n")
+
+    def serialize_xrefs_constructors(self, output):
+        funcs = {}
+        for name in list(self.idx.functions.keys()):
+            id = self.idx.functions[name]
+            try:
+                (ret, params, desc) = id.info
+                if ret[0] == "void":
+                    continue
+                if ret[0] in funcs:
+                    funcs[ret[0]].append(name)
+                else:
+                    funcs[ret[0]] = [name]
+            except:
+                pass
+        typ = list(funcs.keys())
+        typ.sort()
+        for type in typ:
+            if type == '' or type == 'void' or type == "int" or \
+               type == "char *" or type == "const char *" :
+                continue
+            output.write("    <type name='%s'>\n" % (type))
+            ids = funcs[type]
+            ids.sort()
+            for id in ids:
+                output.write("      <ref name='%s'/>\n" % (id))
+            output.write("    </type>\n")
+
+    def serialize_xrefs_alpha(self, output):
+        letter = None
+        ids = list(self.idx.identifiers.keys())
+        ids.sort()
+        for id in ids:
+            if id[0] != letter:
+                if letter != None:
+                    output.write("    </letter>\n")
+                letter = id[0]
+                output.write("    <letter name='%s'>\n" % (letter))
+            output.write("      <ref name='%s'/>\n" % (id))
+        if letter != None:
+            output.write("    </letter>\n")
+
+    def serialize_xrefs_references(self, output):
+        typ = list(self.idx.identifiers.keys())
+        typ.sort()
+        for id in typ:
+            idf = self.idx.identifiers[id]
+            module = idf.header
+            output.write("    <reference name='%s' href='%s'/>\n" % (id,
+                         'html/' + self.basename + '-' +
+                         self.modulename_file(module) + '.html#' +
+                         id))
+
+    def serialize_xrefs_index(self, output):
+        index = self.xref
+        typ = list(index.keys())
+        typ.sort()
+        letter = None
+        count = 0
+        chunk = 0
+        chunks = []
+        for id in typ:
+            if len(index[id]) > 30:
+                continue
+            if id[0] != letter:
+                if letter == None or count > 200:
+                    if letter != None:
+                        output.write("      </letter>\n")
+                        output.write("    </chunk>\n")
+                        count = 0
+                        chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
+                    output.write("    <chunk name='chunk%s'>\n" % (chunk))
+                    first_letter = id[0]
+                    chunk = chunk + 1
+                elif letter != None:
+                    output.write("      </letter>\n")
+                letter = id[0]
+                output.write("      <letter name='%s'>\n" % (letter))
+            output.write("        <word name='%s'>\n" % (id))
+            tokens = index[id];
+            tokens.sort()
+            tok = None
+            for token in tokens:
+                if tok == token:
+                    continue
+                tok = token
+                output.write("          <ref name='%s'/>\n" % (token))
+                count = count + 1
+            output.write("        </word>\n")
+        if letter != None:
+            output.write("      </letter>\n")
+            output.write("    </chunk>\n")
+            if count != 0:
+                chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
+            output.write("    <chunks>\n")
+            for ch in chunks:
+                output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
+                             ch[0], ch[1], ch[2]))
+            output.write("    </chunks>\n")
+
+    def serialize_xrefs(self, output):
+        output.write("  <references>\n")
+        self.serialize_xrefs_references(output)
+        output.write("  </references>\n")
+        output.write("  <alpha>\n")
+        self.serialize_xrefs_alpha(output)
+        output.write("  </alpha>\n")
+        output.write("  <constructors>\n")
+        self.serialize_xrefs_constructors(output)
+        output.write("  </constructors>\n")
+        output.write("  <functions>\n")
+        self.serialize_xrefs_functions(output)
+        output.write("  </functions>\n")
+        output.write("  <files>\n")
+        self.serialize_xrefs_files(output)
+        output.write("  </files>\n")
+        output.write("  <index>\n")
+        self.serialize_xrefs_index(output)
+        output.write("  </index>\n")
+
+    def serialize(self):
+        filename = "%s-api.xml" % self.name
+        print("Saving XML description %s" % (filename))
+        output = open(filename, "w")
+        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
+        output.write("<api name='%s'>\n" % self.name)
+        output.write("  <files>\n")
+        headers = list(self.headers.keys())
+        headers.sort()
+        for file in headers:
+            self.serialize_exports(output, file)
+        output.write("  </files>\n")
+        output.write("  <symbols>\n")
+        macros = list(self.idx.macros.keys())
+        macros.sort()
+        for macro in macros:
+            self.serialize_macro(output, macro)
+        enums = list(self.idx.enums.keys())
+        enums.sort()
+        for enum in enums:
+            self.serialize_enum(output, enum)
+        typedefs = list(self.idx.typedefs.keys())
+        typedefs.sort()
+        for typedef in typedefs:
+            self.serialize_typedef(output, typedef)
+        variables = list(self.idx.variables.keys())
+        variables.sort()
+        for variable in variables:
+            self.serialize_variable(output, variable)
+        functions = list(self.idx.functions.keys())
+        functions.sort()
+        for function in functions:
+            self.serialize_function(output, function)
+        output.write("  </symbols>\n")
+        output.write("</api>\n")
+        output.close()
+
+        filename = "%s-refs.xml" % self.name
+        print("Saving XML Cross References %s" % (filename))
+        output = open(filename, "w")
+        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
+        output.write("<apirefs name='%s'>\n" % self.name)
+        self.serialize_xrefs(output)
+        output.write("</apirefs>\n")
+        output.close()
+
+
+def rebuild():
+    builder = None
+    if glob.glob("parser.c") != [] :
+        print("Rebuilding API description for libxml2")
+        builder = docBuilder("libxml2", [".", "."],
+                             ["xmlwin32version.h", "tst.c"])
+    elif glob.glob("../parser.c") != [] :
+        print("Rebuilding API description for libxml2")
+        builder = docBuilder("libxml2", ["..", "../include/libxml"],
+                             ["xmlwin32version.h", "tst.c"])
+    elif glob.glob("../libxslt/transform.c") != [] :
+        print("Rebuilding API description for libxslt")
+        builder = docBuilder("libxslt", ["../libxslt"],
+                             ["win32config.h", "libxslt.h", "tst.c"])
+    else:
+        print("rebuild() failed, unable to guess the module")
+        return None
+    builder.scan()
+    builder.analyze()
+    builder.serialize()
+    if glob.glob("../libexslt/exslt.c") != [] :
+        extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
+        extra.scan()
+        extra.analyze()
+        extra.serialize()
+    return builder
+
+#
+# for debugging the parser
+#
+def parse(filename):
+    parser = CParser(filename)
+    idx = parser.parse()
+    return idx
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        debug = 1
+        parse(sys.argv[1])
+    else:
+        rebuild()