You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by rv...@apache.org on 2015/09/22 21:14:10 UTC
[07/35] incubator-hawq git commit: SGA import. Now with files previously missing because of the .gitignore issue

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/yaml/scanner.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/scanner.py b/tools/bin/ext/yaml/scanner.py
new file mode 100644
index 0000000..a3ecdd0
--- /dev/null
+++ b/tools/bin/ext/yaml/scanner.py
@@ -0,0 +1,1456 @@
+
+# Scanner produces tokens of the following types:
+# STREAM-START
+# STREAM-END
+# DIRECTIVE(name, value)
+# DOCUMENT-START
+# DOCUMENT-END
+# BLOCK-SEQUENCE-START
+# BLOCK-MAPPING-START
+# BLOCK-END
+# FLOW-SEQUENCE-START
+# FLOW-MAPPING-START
+# FLOW-SEQUENCE-END
+# FLOW-MAPPING-END
+# BLOCK-ENTRY
+# FLOW-ENTRY
+# KEY
+# VALUE
+# ALIAS(value)
+# ANCHOR(value)
+# TAG(value)
+# SCALAR(value, plain, style)
+#
+# Read comments in the Scanner code for more details.
+#
+
+__all__ = ['Scanner', 'ScannerError']
+
+from error import MarkedYAMLError
+from tokens import *
+
+class ScannerError(MarkedYAMLError):
+    pass
+
+class SimpleKey(object):
+    # See below simple keys treatment.
+
+    def __init__(self, token_number, required, index, line, column, mark):
+        self.token_number = token_number
+        self.required = required
+        self.index = index
+        self.line = line
+        self.column = column
+        self.mark = mark
+
+class Scanner(object):
+
+    def __init__(self):
+        """Initialize the scanner."""
+        # It is assumed that Scanner and Reader will have a common descendant.
+        # Reader do the dirty work of checking for BOM and converting the
+        # input data to Unicode. It also adds NUL to the end.
+        #
+        # Reader supports the following methods
+        #   self.peek(i=0)       # peek the next i-th character
+        #   self.prefix(l=1)     # peek the next l characters
+        #   self.forward(l=1)    # read the next l characters and move the pointer.
+
+        # Had we reached the end of the stream?
+        self.done = False
+
+        # The number of unclosed '{' and '['. `flow_level == 0` means block
+        # context.
+        self.flow_level = 0
+
+        # List of processed tokens that are not yet emitted.
+        self.tokens = []
+
+        # Add the STREAM-START token.
+        self.fetch_stream_start()
+
+        # Number of tokens that were emitted through the `get_token` method.
+        self.tokens_taken = 0
+
+        # The current indentation level.
+        self.indent = -1
+
+        # Past indentation levels.
+        self.indents = []
+
+        # Variables related to simple keys treatment.
+
+        # A simple key is a key that is not denoted by the '?' indicator.
+        # Example of simple keys:
+        #   ---
+        #   block simple key: value
+        #   ? not a simple key:
+        #   : { flow simple key: value }
+        # We emit the KEY token before all keys, so when we find a potential
+        # simple key, we try to locate the corresponding ':' indicator.
+        # Simple keys should be limited to a single line and 1024 characters.
+
+        # Can a simple key start at the current position? A simple key may
+        # start:
+        # - at the beginning of the line, not counting indentation spaces
+        #       (in block context),
+        # - after '{', '[', ',' (in the flow context),
+        # - after '?', ':', '-' (in the block context).
+        # In the block context, this flag also signifies if a block collection
+        # may start at the current position.
+        self.allow_simple_key = True
+
+        # Keep track of possible simple keys. This is a dictionary. The key
+        # is `flow_level`; there can be no more that one possible simple key
+        # for each level. The value is a SimpleKey record:
+        #   (token_number, required, index, line, column, mark)
+        # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
+        # '[', or '{' tokens.
+        self.possible_simple_keys = {}
+
+    # Public methods.
+
+    def check_token(self, *choices):
+        # Check if the next token is one of the given types.
+        while self.need_more_tokens():
+            self.fetch_more_tokens()
+        if self.tokens:
+            if not choices:
+                return True
+            for choice in choices:
+                if isinstance(self.tokens[0], choice):
+                    return True
+        return False
+
+    def peek_token(self):
+        # Return the next token, but do not delete if from the queue.
+        while self.need_more_tokens():
+            self.fetch_more_tokens()
+        if self.tokens:
+            return self.tokens[0]
+
+    def get_token(self):
+        # Return the next token.
+        while self.need_more_tokens():
+            self.fetch_more_tokens()
+        if self.tokens:
+            self.tokens_taken += 1
+            return self.tokens.pop(0)
+
+    # Private methods.
+
+    def need_more_tokens(self):
+        if self.done:
+            return False
+        if not self.tokens:
+            return True
+        # The current token may be a potential simple key, so we
+        # need to look further.
+        self.stale_possible_simple_keys()
+        if self.next_possible_simple_key() == self.tokens_taken:
+            return True
+
+    def fetch_more_tokens(self):
+
+        # Eat whitespaces and comments until we reach the next token.
+        self.scan_to_next_token()
+
+        # Remove obsolete possible simple keys.
+        self.stale_possible_simple_keys()
+
+        # Compare the current indentation and column. It may add some tokens
+        # and decrease the current indentation level.
+        self.unwind_indent(self.column)
+
+        # Peek the next character.
+        ch = self.peek()
+
+        # Is it the end of stream?
+        if ch == u'\0':
+            return self.fetch_stream_end()
+
+        # Is it a directive?
+        if ch == u'%' and self.check_directive():
+            return self.fetch_directive()
+
+        # Is it the document start?
+        if ch == u'-' and self.check_document_start():
+            return self.fetch_document_start()
+
+        # Is it the document end?
+        if ch == u'.' and self.check_document_end():
+            return self.fetch_document_end()
+
+        # TODO: support for BOM within a stream.
+        #if ch == u'\uFEFF':
+        #    return self.fetch_bom()    <-- issue BOMToken
+
+        # Note: the order of the following checks is NOT significant.
+
+        # Is it the flow sequence start indicator?
+        if ch == u'[':
+            return self.fetch_flow_sequence_start()
+
+        # Is it the flow mapping start indicator?
+        if ch == u'{':
+            return self.fetch_flow_mapping_start()
+
+        # Is it the flow sequence end indicator?
+        if ch == u']':
+            return self.fetch_flow_sequence_end()
+
+        # Is it the flow mapping end indicator?
+        if ch == u'}':
+            return self.fetch_flow_mapping_end()
+
+        # Is it the flow entry indicator?
+        if ch == u',':
+            return self.fetch_flow_entry()
+
+        # Is it the block entry indicator?
+        if ch == u'-' and self.check_block_entry():
+            return self.fetch_block_entry()
+
+        # Is it the key indicator?
+        if ch == u'?' and self.check_key():
+            return self.fetch_key()
+
+        # Is it the value indicator?
+        if ch == u':' and self.check_value():
+            return self.fetch_value()
+
+        # Is it an alias?
+        if ch == u'*':
+            return self.fetch_alias()
+
+        # Is it an anchor?
+        if ch == u'&':
+            return self.fetch_anchor()
+
+        # Is it a tag?
+        if ch == u'!':
+            return self.fetch_tag()
+
+        # Is it a literal scalar?
+        if ch == u'|' and not self.flow_level:
+            return self.fetch_literal()
+
+        # Is it a folded scalar?
+        if ch == u'>' and not self.flow_level:
+            return self.fetch_folded()
+
+        # Is it a single quoted scalar?
+        if ch == u'\'':
+            return self.fetch_single()
+
+        # Is it a double quoted scalar?
+        if ch == u'\"':
+            return self.fetch_double()
+
+        # It must be a plain scalar then.
+        if self.check_plain():
+            return self.fetch_plain()
+
+        # No? It's an error. Let's produce a nice error message.
+        raise ScannerError("while scanning for the next token", None,
+                "found character %r that cannot start any token"
+                % ch.encode('utf-8'), self.get_mark())
+
+    # Simple keys treatment.
+
+    def next_possible_simple_key(self):
+        # Return the number of the nearest possible simple key. Actually we
+        # don't need to loop through the whole dictionary. We may replace it
+        # with the following code:
+        #   if not self.possible_simple_keys:
+        #       return None
+        #   return self.possible_simple_keys[
+        #           min(self.possible_simple_keys.keys())].token_number
+        min_token_number = None
+        for level in self.possible_simple_keys:
+            key = self.possible_simple_keys[level]
+            if min_token_number is None or key.token_number < min_token_number:
+                min_token_number = key.token_number
+        return min_token_number
+
+    def stale_possible_simple_keys(self):
+        # Remove entries that are no longer possible simple keys. According to
+        # the YAML specification, simple keys
+        # - should be limited to a single line,
+        # - should be no longer than 1024 characters.
+        # Disabling this procedure will allow simple keys of any length and
+        # height (may cause problems if indentation is broken though).
+        for level in self.possible_simple_keys.keys():
+            key = self.possible_simple_keys[level]
+            if key.line != self.line  \
+                    or self.index-key.index > 1024:
+                if key.required:
+                    raise ScannerError("while scanning a simple key", key.mark,
+                            "could not found expected ':'", self.get_mark())
+                del self.possible_simple_keys[level]
+
+    def save_possible_simple_key(self):
+        # The next token may start a simple key. We check if it's possible
+        # and save its position. This function is called for
+        #   ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
+
+        # Check if a simple key is required at the current position.
+        required = not self.flow_level and self.indent == self.column
+
+        # A simple key is required only if it is the first token in the current
+        # line. Therefore it is always allowed.
+        assert self.allow_simple_key or not required
+
+        # The next token might be a simple key. Let's save it's number and
+        # position.
+        if self.allow_simple_key:
+            self.remove_possible_simple_key()
+            token_number = self.tokens_taken+len(self.tokens)
+            key = SimpleKey(token_number, required,
+                    self.index, self.line, self.column, self.get_mark())
+            self.possible_simple_keys[self.flow_level] = key
+
+    def remove_possible_simple_key(self):
+        # Remove the saved possible key position at the current flow level.
+        if self.flow_level in self.possible_simple_keys:
+            key = self.possible_simple_keys[self.flow_level]
+            
+            if key.required:
+                raise ScannerError("while scanning a simple key", key.mark,
+                        "could not found expected ':'", self.get_mark())
+
+            del self.possible_simple_keys[self.flow_level]
+
+    # Indentation functions.
+
+    def unwind_indent(self, column):
+
+        ## In flow context, tokens should respect indentation.
+        ## Actually the condition should be `self.indent >= column` according to
+        ## the spec. But this condition will prohibit intuitively correct
+        ## constructions such as
+        ## key : {
+        ## }
+        #if self.flow_level and self.indent > column:
+        #    raise ScannerError(None, None,
+        #            "invalid intendation or unclosed '[' or '{'",
+        #            self.get_mark())
+
+        # In the flow context, indentation is ignored. We make the scanner less
+        # restrictive then specification requires.
+        if self.flow_level:
+            return
+
+        # In block context, we may need to issue the BLOCK-END tokens.
+        while self.indent > column:
+            mark = self.get_mark()
+            self.indent = self.indents.pop()
+            self.tokens.append(BlockEndToken(mark, mark))
+
+    def add_indent(self, column):
+        # Check if we need to increase indentation.
+        if self.indent < column:
+            self.indents.append(self.indent)
+            self.indent = column
+            return True
+        return False
+
+    # Fetchers.
+
+    def fetch_stream_start(self):
+        # We always add STREAM-START as the first token and STREAM-END as the
+        # last token.
+
+        # Read the token.
+        mark = self.get_mark()
+        
+        # Add STREAM-START.
+        self.tokens.append(StreamStartToken(mark, mark,
+            encoding=self.encoding))
+        
+
+    def fetch_stream_end(self):
+
+        # Set the current intendation to -1.
+        self.unwind_indent(-1)
+
+        # Reset everything (not really needed).
+        self.allow_simple_key = False
+        self.possible_simple_keys = {}
+
+        # Read the token.
+        mark = self.get_mark()
+        
+        # Add STREAM-END.
+        self.tokens.append(StreamEndToken(mark, mark))
+
+        # The steam is finished.
+        self.done = True
+
+    def fetch_directive(self):
+        
+        # Set the current intendation to -1.
+        self.unwind_indent(-1)
+
+        # Reset simple keys.
+        self.remove_possible_simple_key()
+        self.allow_simple_key = False
+
+        # Scan and add DIRECTIVE.
+        self.tokens.append(self.scan_directive())
+
+    def fetch_document_start(self):
+        self.fetch_document_indicator(DocumentStartToken)
+
+    def fetch_document_end(self):
+        self.fetch_document_indicator(DocumentEndToken)
+
+    def fetch_document_indicator(self, TokenClass):
+
+        # Set the current intendation to -1.
+        self.unwind_indent(-1)
+
+        # Reset simple keys. Note that there could not be a block collection
+        # after '---'.
+        self.remove_possible_simple_key()
+        self.allow_simple_key = False
+
+        # Add DOCUMENT-START or DOCUMENT-END.
+        start_mark = self.get_mark()
+        self.forward(3)
+        end_mark = self.get_mark()
+        self.tokens.append(TokenClass(start_mark, end_mark))
+
+    def fetch_flow_sequence_start(self):
+        self.fetch_flow_collection_start(FlowSequenceStartToken)
+
+    def fetch_flow_mapping_start(self):
+        self.fetch_flow_collection_start(FlowMappingStartToken)
+
+    def fetch_flow_collection_start(self, TokenClass):
+
+        # '[' and '{' may start a simple key.
+        self.save_possible_simple_key()
+
+        # Increase the flow level.
+        self.flow_level += 1
+
+        # Simple keys are allowed after '[' and '{'.
+        self.allow_simple_key = True
+
+        # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
+        start_mark = self.get_mark()
+        self.forward()
+        end_mark = self.get_mark()
+        self.tokens.append(TokenClass(start_mark, end_mark))
+
+    def fetch_flow_sequence_end(self):
+        self.fetch_flow_collection_end(FlowSequenceEndToken)
+
+    def fetch_flow_mapping_end(self):
+        self.fetch_flow_collection_end(FlowMappingEndToken)
+
+    def fetch_flow_collection_end(self, TokenClass):
+
+        # Reset possible simple key on the current level.
+        self.remove_possible_simple_key()
+
+        # Decrease the flow level.
+        self.flow_level -= 1
+
+        # No simple keys after ']' or '}'.
+        self.allow_simple_key = False
+
+        # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
+        start_mark = self.get_mark()
+        self.forward()
+        end_mark = self.get_mark()
+        self.tokens.append(TokenClass(start_mark, end_mark))
+
+    def fetch_flow_entry(self):
+
+        # Simple keys are allowed after ','.
+        self.allow_simple_key = True
+
+        # Reset possible simple key on the current level.
+        self.remove_possible_simple_key()
+
+        # Add FLOW-ENTRY.
+        start_mark = self.get_mark()
+        self.forward()
+        end_mark = self.get_mark()
+        self.tokens.append(FlowEntryToken(start_mark, end_mark))
+
+    def fetch_block_entry(self):
+
+        # Block context needs additional checks.
+        if not self.flow_level:
+
+            # Are we allowed to start a new entry?
+            if not self.allow_simple_key:
+                raise ScannerError(None, None,
+                        "sequence entries are not allowed here",
+                        self.get_mark())
+
+            # We may need to add BLOCK-SEQUENCE-START.
+            if self.add_indent(self.column):
+                mark = self.get_mark()
+                self.tokens.append(BlockSequenceStartToken(mark, mark))
+
+        # It's an error for the block entry to occur in the flow context,
+        # but we let the parser detect this.
+        else:
+            pass
+
+        # Simple keys are allowed after '-'.
+        self.allow_simple_key = True
+
+        # Reset possible simple key on the current level.
+        self.remove_possible_simple_key()
+
+        # Add BLOCK-ENTRY.
+        start_mark = self.get_mark()
+        self.forward()
+        end_mark = self.get_mark()
+        self.tokens.append(BlockEntryToken(start_mark, end_mark))
+
+    def fetch_key(self):
+        
+        # Block context needs additional checks.
+        if not self.flow_level:
+
+            # Are we allowed to start a key (not nessesary a simple)?
+            if not self.allow_simple_key:
+                raise ScannerError(None, None,
+                        "mapping keys are not allowed here",
+                        self.get_mark())
+
+            # We may need to add BLOCK-MAPPING-START.
+            if self.add_indent(self.column):
+                mark = self.get_mark()
+                self.tokens.append(BlockMappingStartToken(mark, mark))
+
+        # Simple keys are allowed after '?' in the block context.
+        self.allow_simple_key = not self.flow_level
+
+        # Reset possible simple key on the current level.
+        self.remove_possible_simple_key()
+
+        # Add KEY.
+        start_mark = self.get_mark()
+        self.forward()
+        end_mark = self.get_mark()
+        self.tokens.append(KeyToken(start_mark, end_mark))
+
+    def fetch_value(self):
+
+        # Do we determine a simple key?
+        if self.flow_level in self.possible_simple_keys:
+
+            # Add KEY.
+            key = self.possible_simple_keys[self.flow_level]
+            del self.possible_simple_keys[self.flow_level]
+            self.tokens.insert(key.token_number-self.tokens_taken,
+                    KeyToken(key.mark, key.mark))
+
+            # If this key starts a new block mapping, we need to add
+            # BLOCK-MAPPING-START.
+            if not self.flow_level:
+                if self.add_indent(key.column):
+                    self.tokens.insert(key.token_number-self.tokens_taken,
+                            BlockMappingStartToken(key.mark, key.mark))
+
+            # There cannot be two simple keys one after another.
+            self.allow_simple_key = False
+
+        # It must be a part of a complex key.
+        else:
+            
+            # Block context needs additional checks.
+            # (Do we really need them? They will be catched by the parser
+            # anyway.)
+            if not self.flow_level:
+
+                # We are allowed to start a complex value if and only if
+                # we can start a simple key.
+                if not self.allow_simple_key:
+                    raise ScannerError(None, None,
+                            "mapping values are not allowed here",
+                            self.get_mark())
+
+            # If this value starts a new block mapping, we need to add
+            # BLOCK-MAPPING-START.  It will be detected as an error later by
+            # the parser.
+            if not self.flow_level:
+                if self.add_indent(self.column):
+                    mark = self.get_mark()
+                    self.tokens.append(BlockMappingStartToken(mark, mark))
+
+            # Simple keys are allowed after ':' in the block context.
+            self.allow_simple_key = not self.flow_level
+
+            # Reset possible simple key on the current level.
+            self.remove_possible_simple_key()
+
+        # Add VALUE.
+        start_mark = self.get_mark()
+        self.forward()
+        end_mark = self.get_mark()
+        self.tokens.append(ValueToken(start_mark, end_mark))
+
+    def fetch_alias(self):
+
+        # ALIAS could be a simple key.
+        self.save_possible_simple_key()
+
+        # No simple keys after ALIAS.
+        self.allow_simple_key = False
+
+        # Scan and add ALIAS.
+        self.tokens.append(self.scan_anchor(AliasToken))
+
+    def fetch_anchor(self):
+
+        # ANCHOR could start a simple key.
+        self.save_possible_simple_key()
+
+        # No simple keys after ANCHOR.
+        self.allow_simple_key = False
+
+        # Scan and add ANCHOR.
+        self.tokens.append(self.scan_anchor(AnchorToken))
+
+    def fetch_tag(self):
+
+        # TAG could start a simple key.
+        self.save_possible_simple_key()
+
+        # No simple keys after TAG.
+        self.allow_simple_key = False
+
+        # Scan and add TAG.
+        self.tokens.append(self.scan_tag())
+
+    def fetch_literal(self):
+        self.fetch_block_scalar(style='|')
+
+    def fetch_folded(self):
+        self.fetch_block_scalar(style='>')
+
+    def fetch_block_scalar(self, style):
+
+        # A simple key may follow a block scalar.
+        self.allow_simple_key = True
+
+        # Reset possible simple key on the current level.
+        self.remove_possible_simple_key()
+
+        # Scan and add SCALAR.
+        self.tokens.append(self.scan_block_scalar(style))
+
+    def fetch_single(self):
+        self.fetch_flow_scalar(style='\'')
+
+    def fetch_double(self):
+        self.fetch_flow_scalar(style='"')
+
+    def fetch_flow_scalar(self, style):
+
+        # A flow scalar could be a simple key.
+        self.save_possible_simple_key()
+
+        # No simple keys after flow scalars.
+        self.allow_simple_key = False
+
+        # Scan and add SCALAR.
+        self.tokens.append(self.scan_flow_scalar(style))
+
+    def fetch_plain(self):
+
+        # A plain scalar could be a simple key.
+        self.save_possible_simple_key()
+
+        # No simple keys after plain scalars. But note that `scan_plain` will
+        # change this flag if the scan is finished at the beginning of the
+        # line.
+        self.allow_simple_key = False
+
+        # Scan and add SCALAR. May change `allow_simple_key`.
+        self.tokens.append(self.scan_plain())
+
+    # Checkers.
+
+    def check_directive(self):
+
+        # DIRECTIVE:        ^ '%' ...
+        # The '%' indicator is already checked.
+        if self.column == 0:
+            return True
+
+    def check_document_start(self):
+
+        # DOCUMENT-START:   ^ '---' (' '|'\n')
+        if self.column == 0:
+            if self.prefix(3) == u'---'  \
+                    and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+                return True
+
+    def check_document_end(self):
+
+        # DOCUMENT-END:     ^ '...' (' '|'\n')
+        if self.column == 0:
+            if self.prefix(3) == u'...'  \
+                    and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+                return True
+
+    def check_block_entry(self):
+
+        # BLOCK-ENTRY:      '-' (' '|'\n')
+        return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+
+    def check_key(self):
+
+        # KEY(flow context):    '?'
+        if self.flow_level:
+            return True
+
+        # KEY(block context):   '?' (' '|'\n')
+        else:
+            return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+
+    def check_value(self):
+
+        # VALUE(flow context):  ':'
+        if self.flow_level:
+            return True
+
+        # VALUE(block context): ':' (' '|'\n')
+        else:
+            return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+
+    def check_plain(self):
+
+        # A plain scalar may start with any non-space character except:
+        #   '-', '?', ':', ',', '[', ']', '{', '}',
+        #   '#', '&', '*', '!', '|', '>', '\'', '\"',
+        #   '%', '@', '`'.
+        #
+        # It may also start with
+        #   '-', '?', ':'
+        # if it is followed by a non-space character.
+        #
+        # Note that we limit the last rule to the block context (except the
+        # '-' character) because we want the flow context to be space
+        # independent.
+        ch = self.peek()
+        return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`'  \
+                or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029'
+                        and (ch == u'-' or (not self.flow_level and ch in u'?:')))
+
+    # Scanners.
+
+    def scan_to_next_token(self):
+        # We ignore spaces, line breaks and comments.
+        # If we find a line break in the block context, we set the flag
+        # `allow_simple_key` on.
+        # The byte order mark is stripped if it's the first character in the
+        # stream. We do not yet support BOM inside the stream as the
+        # specification requires. Any such mark will be considered as a part
+        # of the document.
+        #
+        # TODO: We need to make tab handling rules more sane. A good rule is
+        #   Tabs cannot precede tokens
+        #   BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
+        #   KEY(block), VALUE(block), BLOCK-ENTRY
+        # So the checking code is
+        #   if <TAB>:
+        #       self.allow_simple_keys = False
+        # We also need to add the check for `allow_simple_keys == True` to
+        # `unwind_indent` before issuing BLOCK-END.
+        # Scanners for block, flow, and plain scalars need to be modified.
+
+        if self.index == 0 and self.peek() == u'\uFEFF':
+            self.forward()
+        found = False
+        while not found:
+            while self.peek() == u' ':
+                self.forward()
+            if self.peek() == u'#':
+                while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+                    self.forward()
+            if self.scan_line_break():
+                if not self.flow_level:
+                    self.allow_simple_key = True
+            else:
+                found = True
+
+    def scan_directive(self):
+        # See the specification for details.
+        start_mark = self.get_mark()
+        self.forward()
+        name = self.scan_directive_name(start_mark)
+        value = None
+        if name == u'YAML':
+            value = self.scan_yaml_directive_value(start_mark)
+            end_mark = self.get_mark()
+        elif name == u'TAG':
+            value = self.scan_tag_directive_value(start_mark)
+            end_mark = self.get_mark()
+        else:
+            end_mark = self.get_mark()
+            while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+                self.forward()
+        self.scan_directive_ignored_line(start_mark)
+        return DirectiveToken(name, value, start_mark, end_mark)
+
+    def scan_directive_name(self, start_mark):
+        # See the specification for details.
+        length = 0
+        ch = self.peek(length)
+        while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
+                or ch in u'-_':
+            length += 1
+            ch = self.peek(length)
+        if not length:
+            raise ScannerError("while scanning a directive", start_mark,
+                    "expected alphabetic or numeric character, but found %r"
+                    % ch.encode('utf-8'), self.get_mark())
+        value = self.prefix(length)
+        self.forward(length)
+        ch = self.peek()
+        if ch not in u'\0 \r\n\x85\u2028\u2029':
+            raise ScannerError("while scanning a directive", start_mark,
+                    "expected alphabetic or numeric character, but found %r"
+                    % ch.encode('utf-8'), self.get_mark())
+        return value
+
+    def scan_yaml_directive_value(self, start_mark):
+        # See the specification for details.
+        while self.peek() == u' ':
+            self.forward()
+        major = self.scan_yaml_directive_number(start_mark)
+        if self.peek() != '.':
+            raise ScannerError("while scanning a directive", start_mark,
+                    "expected a digit or '.', but found %r"
+                    % self.peek().encode('utf-8'),
+                    self.get_mark())
+        self.forward()
+        minor = self.scan_yaml_directive_number(start_mark)
+        if self.peek() not in u'\0 \r\n\x85\u2028\u2029':
+            raise ScannerError("while scanning a directive", start_mark,
+                    "expected a digit or ' ', but found %r"
+                    % self.peek().encode('utf-8'),
+                    self.get_mark())
+        return (major, minor)
+
+    def scan_yaml_directive_number(self, start_mark):
+        # See the specification for details.
+        ch = self.peek()
+        if not (u'0' <= ch <= '9'):
+            raise ScannerError("while scanning a directive", start_mark,
+                    "expected a digit, but found %r" % ch.encode('utf-8'),
+                    self.get_mark())
+        length = 0
+        while u'0' <= self.peek(length) <= u'9':
+            length += 1
+        value = int(self.prefix(length))
+        self.forward(length)
+        return value
+
+    def scan_tag_directive_value(self, start_mark):
+        # See the specification for details.
+        while self.peek() == u' ':
+            self.forward()
+        handle = self.scan_tag_directive_handle(start_mark)
+        while self.peek() == u' ':
+            self.forward()
+        prefix = self.scan_tag_directive_prefix(start_mark)
+        return (handle, prefix)
+
+    def scan_tag_directive_handle(self, start_mark):
+        # See the specification for details.
+        value = self.scan_tag_handle('directive', start_mark)
+        ch = self.peek()
+        if ch != u' ':
+            raise ScannerError("while scanning a directive", start_mark,
+                    "expected ' ', but found %r" % ch.encode('utf-8'),
+                    self.get_mark())
+        return value
+
+    def scan_tag_directive_prefix(self, start_mark):
+        # See the specification for details.
+        value = self.scan_tag_uri('directive', start_mark)
+        ch = self.peek()
+        if ch not in u'\0 \r\n\x85\u2028\u2029':
+            raise ScannerError("while scanning a directive", start_mark,
+                    "expected ' ', but found %r" % ch.encode('utf-8'),
+                    self.get_mark())
+        return value
+
+    def scan_directive_ignored_line(self, start_mark):
+        # See the specification for details.
+        while self.peek() == u' ':
+            self.forward()
+        if self.peek() == u'#':
+            while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+                self.forward()
+        ch = self.peek()
+        if ch not in u'\0\r\n\x85\u2028\u2029':
+            raise ScannerError("while scanning a directive", start_mark,
+                    "expected a comment or a line break, but found %r"
+                        % ch.encode('utf-8'), self.get_mark())
+        self.scan_line_break()
+
+    def scan_anchor(self, TokenClass):
+        # The specification does not restrict characters for anchors and
+        # aliases. This may lead to problems, for instance, the document:
+        #   [ *alias, value ]
+        # can be interpteted in two ways, as
+        #   [ "value" ]
+        # and
+        #   [ *alias , "value" ]
+        # Therefore we restrict aliases to numbers and ASCII letters.
+        start_mark = self.get_mark()
+        indicator = self.peek()
+        if indicator == '*':
+            name = 'alias'
+        else:
+            name = 'anchor'
+        self.forward()
+        length = 0
+        ch = self.peek(length)
+        while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
+                or ch in u'-_':
+            length += 1
+            ch = self.peek(length)
+        if not length:
+            raise ScannerError("while scanning an %s" % name, start_mark,
+                    "expected alphabetic or numeric character, but found %r"
+                    % ch.encode('utf-8'), self.get_mark())
+        value = self.prefix(length)
+        self.forward(length)
+        ch = self.peek()
+        if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
+            raise ScannerError("while scanning an %s" % name, start_mark,
+                    "expected alphabetic or numeric character, but found %r"
+                    % ch.encode('utf-8'), self.get_mark())
+        end_mark = self.get_mark()
+        return TokenClass(value, start_mark, end_mark)
+
+    def scan_tag(self):
+        # See the specification for details.
+        start_mark = self.get_mark()
+        ch = self.peek(1)
+        if ch == u'<':
+            handle = None
+            self.forward(2)
+            suffix = self.scan_tag_uri('tag', start_mark)
+            if self.peek() != u'>':
+                raise ScannerError("while parsing a tag", start_mark,
+                        "expected '>', but found %r" % self.peek().encode('utf-8'),
+                        self.get_mark())
+            self.forward()
+        elif ch in u'\0 \t\r\n\x85\u2028\u2029':
+            handle = None
+            suffix = u'!'
+            self.forward()
+        else:
+            length = 1
+            use_handle = False
+            while ch not in u'\0 \r\n\x85\u2028\u2029':
+                if ch == u'!':
+                    use_handle = True
+                    break
+                length += 1
+                ch = self.peek(length)
+            handle = u'!'
+            if use_handle:
+                handle = self.scan_tag_handle('tag', start_mark)
+            else:
+                handle = u'!'
+                self.forward()
+            suffix = self.scan_tag_uri('tag', start_mark)
+        ch = self.peek()
+        if ch not in u'\0 \r\n\x85\u2028\u2029':
+            raise ScannerError("while scanning a tag", start_mark,
+                    "expected ' ', but found %r" % ch.encode('utf-8'),
+                    self.get_mark())
+        value = (handle, suffix)
+        end_mark = self.get_mark()
+        return TagToken(value, start_mark, end_mark)
+
+    def scan_block_scalar(self, style):
+        # See the specification for details.
+
+        if style == '>':
+            folded = True
+        else:
+            folded = False
+
+        chunks = []
+        start_mark = self.get_mark()
+
+        # Scan the header.
+        self.forward()
+        chomping, increment = self.scan_block_scalar_indicators(start_mark)
+        self.scan_block_scalar_ignored_line(start_mark)
+
+        # Determine the indentation level and go to the first non-empty line.
+        min_indent = self.indent+1
+        if min_indent < 1:
+            min_indent = 1
+        if increment is None:
+            breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
+            indent = max(min_indent, max_indent)
+        else:
+            indent = min_indent+increment-1
+            breaks, end_mark = self.scan_block_scalar_breaks(indent)
+        line_break = u''
+
+        # Scan the inner part of the block scalar.
+        while self.column == indent and self.peek() != u'\0':
+            chunks.extend(breaks)
+            leading_non_space = self.peek() not in u' \t'
+            length = 0
+            while self.peek(length) not in u'\0\r\n\x85\u2028\u2029':
+                length += 1
+            chunks.append(self.prefix(length))
+            self.forward(length)
+            line_break = self.scan_line_break()
+            breaks, end_mark = self.scan_block_scalar_breaks(indent)
+            if self.column == indent and self.peek() != u'\0':
+
+                # Unfortunately, folding rules are ambiguous.
+                #
+                # This is the folding according to the specification:
+                
+                if folded and line_break == u'\n'   \
+                        and leading_non_space and self.peek() not in u' \t':
+                    if not breaks:
+                        chunks.append(u' ')
+                else:
+                    chunks.append(line_break)
+                
+                # This is Clark Evans's interpretation (also in the spec
+                # examples):
+                #
+                #if folded and line_break == u'\n':
+                #    if not breaks:
+                #        if self.peek() not in ' \t':
+                #            chunks.append(u' ')
+                #        else:
+                #            chunks.append(line_break)
+                #else:
+                #    chunks.append(line_break)
+            else:
+                break
+
+        # Chomp the tail.
+        if chomping is not False:
+            chunks.append(line_break)
+        if chomping is True:
+            chunks.extend(breaks)
+
+        # We are done.
+        return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
+                style)
+
+    def scan_block_scalar_indicators(self, start_mark):
+        # See the specification for details.
+        chomping = None
+        increment = None
+        ch = self.peek()
+        if ch in u'+-':
+            if ch == '+':
+                chomping = True
+            else:
+                chomping = False
+            self.forward()
+            ch = self.peek()
+            if ch in u'0123456789':
+                increment = int(ch)
+                if increment == 0:
+                    raise ScannerError("while scanning a block scalar", start_mark,
+                            "expected indentation indicator in the range 1-9, but found 0",
+                            self.get_mark())
+                self.forward()
+        elif ch in u'0123456789':
+            increment = int(ch)
+            if increment == 0:
+                raise ScannerError("while scanning a block scalar", start_mark,
+                        "expected indentation indicator in the range 1-9, but found 0",
+                        self.get_mark())
+            self.forward()
+            ch = self.peek()
+            if ch in u'+-':
+                if ch == '+':
+                    chomping = True
+                else:
+                    chomping = False
+                self.forward()
+        ch = self.peek()
+        if ch not in u'\0 \r\n\x85\u2028\u2029':
+            raise ScannerError("while scanning a block scalar", start_mark,
+                    "expected chomping or indentation indicators, but found %r"
+                        % ch.encode('utf-8'), self.get_mark())
+        return chomping, increment
+
+    def scan_block_scalar_ignored_line(self, start_mark):
+        # See the specification for details.
+        while self.peek() == u' ':
+            self.forward()
+        if self.peek() == u'#':
+            while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+                self.forward()
+        ch = self.peek()
+        if ch not in u'\0\r\n\x85\u2028\u2029':
+            raise ScannerError("while scanning a block scalar", start_mark,
+                    "expected a comment or a line break, but found %r"
+                        % ch.encode('utf-8'), self.get_mark())
+        self.scan_line_break()
+
+    def scan_block_scalar_indentation(self):
+        # See the specification for details.
+        chunks = []
+        max_indent = 0
+        end_mark = self.get_mark()
+        while self.peek() in u' \r\n\x85\u2028\u2029':
+            if self.peek() != u' ':
+                chunks.append(self.scan_line_break())
+                end_mark = self.get_mark()
+            else:
+                self.forward()
+                if self.column > max_indent:
+                    max_indent = self.column
+        return chunks, max_indent, end_mark
+
+    def scan_block_scalar_breaks(self, indent):
+        # See the specification for details.
+        chunks = []
+        end_mark = self.get_mark()
+        while self.column < indent and self.peek() == u' ':
+            self.forward()
+        while self.peek() in u'\r\n\x85\u2028\u2029':
+            chunks.append(self.scan_line_break())
+            end_mark = self.get_mark()
+            while self.column < indent and self.peek() == u' ':
+                self.forward()
+        return chunks, end_mark
+
+    def scan_flow_scalar(self, style):
+        # See the specification for details.
+        # Note that we loose indentation rules for quoted scalars. Quoted
+        # scalars don't need to adhere indentation because " and ' clearly
+        # mark the beginning and the end of them. Therefore we are less
+        # restrictive then the specification requires. We only need to check
+        # that document separators are not included in scalars.
+        if style == '"':
+            double = True
+        else:
+            double = False
+        chunks = []
+        start_mark = self.get_mark()
+        quote = self.peek()
+        self.forward()
+        chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
+        while self.peek() != quote:
+            chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
+            chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
+        self.forward()
+        end_mark = self.get_mark()
+        return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
+                style)
+
+    ESCAPE_REPLACEMENTS = {
+        u'0':   u'\0',
+        u'a':   u'\x07',
+        u'b':   u'\x08',
+        u't':   u'\x09',
+        u'\t':  u'\x09',
+        u'n':   u'\x0A',
+        u'v':   u'\x0B',
+        u'f':   u'\x0C',
+        u'r':   u'\x0D',
+        u'e':   u'\x1B',
+        u' ':   u'\x20',
+        u'\"':  u'\"',
+        u'\\':  u'\\',
+        u'N':   u'\x85',
+        u'_':   u'\xA0',
+        u'L':   u'\u2028',
+        u'P':   u'\u2029',
+    }
+
+    ESCAPE_CODES = {
+        u'x':   2,
+        u'u':   4,
+        u'U':   8,
+    }
+
+    def scan_flow_scalar_non_spaces(self, double, start_mark):
+        # See the specification for details.
+        chunks = []
+        while True:
+            length = 0
+            while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':
+                length += 1
+            if length:
+                chunks.append(self.prefix(length))
+                self.forward(length)
+            ch = self.peek()
+            if not double and ch == u'\'' and self.peek(1) == u'\'':
+                chunks.append(u'\'')
+                self.forward(2)
+            elif (double and ch == u'\'') or (not double and ch in u'\"\\'):
+                chunks.append(ch)
+                self.forward()
+            elif double and ch == u'\\':
+                self.forward()
+                ch = self.peek()
+                if ch in self.ESCAPE_REPLACEMENTS:
+                    chunks.append(self.ESCAPE_REPLACEMENTS[ch])
+                    self.forward()
+                elif ch in self.ESCAPE_CODES:
+                    length = self.ESCAPE_CODES[ch]
+                    self.forward()
+                    for k in range(length):
+                        if self.peek(k) not in u'0123456789ABCDEFabcdef':
+                            raise ScannerError("while scanning a double-quoted scalar", start_mark,
+                                    "expected escape sequence of %d hexdecimal numbers, but found %r" %
+                                        (length, self.peek(k).encode('utf-8')), self.get_mark())
+                    code = int(self.prefix(length), 16)
+                    chunks.append(unichr(code))
+                    self.forward(length)
+                elif ch in u'\r\n\x85\u2028\u2029':
+                    self.scan_line_break()
+                    chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
+                else:
+                    raise ScannerError("while scanning a double-quoted scalar", start_mark,
+                            "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark())
+            else:
+                return chunks
+
+    def scan_flow_scalar_spaces(self, double, start_mark):
+        # See the specification for details.
+        chunks = []
+        length = 0
+        while self.peek(length) in u' \t':
+            length += 1
+        whitespaces = self.prefix(length)
+        self.forward(length)
+        ch = self.peek()
+        if ch == u'\0':
+            raise ScannerError("while scanning a quoted scalar", start_mark,
+                    "found unexpected end of stream", self.get_mark())
+        elif ch in u'\r\n\x85\u2028\u2029':
+            line_break = self.scan_line_break()
+            breaks = self.scan_flow_scalar_breaks(double, start_mark)
+            if line_break != u'\n':
+                chunks.append(line_break)
+            elif not breaks:
+                chunks.append(u' ')
+            chunks.extend(breaks)
+        else:
+            chunks.append(whitespaces)
+        return chunks
+
+    def scan_flow_scalar_breaks(self, double, start_mark):
+        # See the specification for details.
+        chunks = []
+        while True:
+            # Instead of checking indentation, we check for document
+            # separators.
+            prefix = self.prefix(3)
+            if (prefix == u'---' or prefix == u'...')   \
+                    and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+                raise ScannerError("while scanning a quoted scalar", start_mark,
+                        "found unexpected document separator", self.get_mark())
+            while self.peek() in u' \t':
+                self.forward()
+            if self.peek() in u'\r\n\x85\u2028\u2029':
+                chunks.append(self.scan_line_break())
+            else:
+                return chunks
+
+    def scan_plain(self):
+        # See the specification for details.
+        # We add an additional restriction for the flow context:
+        #   plain scalars in the flow context cannot contain ',', ':' and '?'.
+        # We also keep track of the `allow_simple_key` flag here.
+        # Indentation rules are loosed for the flow context.
+        chunks = []
+        start_mark = self.get_mark()
+        end_mark = start_mark
+        indent = self.indent+1
+        # We allow zero indentation for scalars, but then we need to check for
+        # document separators at the beginning of the line.
+        #if indent == 0:
+        #    indent = 1
+        spaces = []
+        while True:
+            length = 0
+            if self.peek() == u'#':
+                break
+            while True:
+                ch = self.peek(length)
+                if ch in u'\0 \t\r\n\x85\u2028\u2029'   \
+                        or (not self.flow_level and ch == u':' and
+                                self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \
+                        or (self.flow_level and ch in u',:?[]{}'):
+                    break
+                length += 1
+            # It's not clear what we should do with ':' in the flow context.
+            if (self.flow_level and ch == u':'
+                    and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'):
+                self.forward(length)
+                raise ScannerError("while scanning a plain scalar", start_mark,
+                    "found unexpected ':'", self.get_mark(),
+                    "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
+            if length == 0:
+                break
+            self.allow_simple_key = False
+            chunks.extend(spaces)
+            chunks.append(self.prefix(length))
+            self.forward(length)
+            end_mark = self.get_mark()
+            spaces = self.scan_plain_spaces(indent, start_mark)
+            if not spaces or self.peek() == u'#' \
+                    or (not self.flow_level and self.column < indent):
+                break
+        return ScalarToken(u''.join(chunks), True, start_mark, end_mark)
+
+    def scan_plain_spaces(self, indent, start_mark):
+        # See the specification for details.
+        # The specification is really confusing about tabs in plain scalars.
+        # We just forbid them completely. Do not use tabs in YAML!
+        chunks = []
+        length = 0
+        while self.peek(length) in u' ':
+            length += 1
+        whitespaces = self.prefix(length)
+        self.forward(length)
+        ch = self.peek()
+        if ch in u'\r\n\x85\u2028\u2029':
+            line_break = self.scan_line_break()
+            self.allow_simple_key = True
+            prefix = self.prefix(3)
+            if (prefix == u'---' or prefix == u'...')   \
+                    and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+                return
+            breaks = []
+            while self.peek() in u' \r\n\x85\u2028\u2029':
+                if self.peek() == ' ':
+                    self.forward()
+                else:
+                    breaks.append(self.scan_line_break())
+                    prefix = self.prefix(3)
+                    if (prefix == u'---' or prefix == u'...')   \
+                            and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+                        return
+            if line_break != u'\n':
+                chunks.append(line_break)
+            elif not breaks:
+                chunks.append(u' ')
+            chunks.extend(breaks)
+        elif whitespaces:
+            chunks.append(whitespaces)
+        return chunks
+
+    def scan_tag_handle(self, name, start_mark):
+        # See the specification for details.
+        # For some strange reasons, the specification does not allow '_' in
+        # tag handles. I have allowed it anyway.
+        ch = self.peek()
+        if ch != u'!':
+            raise ScannerError("while scanning a %s" % name, start_mark,
+                    "expected '!', but found %r" % ch.encode('utf-8'),
+                    self.get_mark())
+        length = 1
+        ch = self.peek(length)
+        if ch != u' ':
+            while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
+                    or ch in u'-_':
+                length += 1
+                ch = self.peek(length)
+            if ch != u'!':
+                self.forward(length)
+                raise ScannerError("while scanning a %s" % name, start_mark,
+                        "expected '!', but found %r" % ch.encode('utf-8'),
+                        self.get_mark())
+            length += 1
+        value = self.prefix(length)
+        self.forward(length)
+        return value
+
+    def scan_tag_uri(self, name, start_mark):
+        # See the specification for details.
+        # Note: we do not check if URI is well-formed.
+        chunks = []
+        length = 0
+        ch = self.peek(length)
+        while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
+                or ch in u'-;/?:@&=+$,_.!~*\'()[]%':
+            if ch == u'%':
+                chunks.append(self.prefix(length))
+                self.forward(length)
+                length = 0
+                chunks.append(self.scan_uri_escapes(name, start_mark))
+            else:
+                length += 1
+            ch = self.peek(length)
+        if length:
+            chunks.append(self.prefix(length))
+            self.forward(length)
+            length = 0
+        if not chunks:
+            raise ScannerError("while parsing a %s" % name, start_mark,
+                    "expected URI, but found %r" % ch.encode('utf-8'),
+                    self.get_mark())
+        return u''.join(chunks)
+
+    def scan_uri_escapes(self, name, start_mark):
+        # See the specification for details.
+        bytes = []
+        mark = self.get_mark()
+        while self.peek() == u'%':
+            self.forward()
+            for k in range(2):
+                if self.peek(k) not in u'0123456789ABCDEFabcdef':
+                    raise ScannerError("while scanning a %s" % name, start_mark,
+                            "expected URI escape sequence of 2 hexdecimal numbers, but found %r" %
+                                (self.peek(k).encode('utf-8')), self.get_mark())
+            bytes.append(chr(int(self.prefix(2), 16)))
+            self.forward(2)
+        try:
+            value = unicode(''.join(bytes), 'utf-8')
+        except UnicodeDecodeError, exc:
+            raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
+        return value
+
+    def scan_line_break(self):
+        # Transforms:
+        #   '\r\n'      :   '\n'
+        #   '\r'        :   '\n'
+        #   '\n'        :   '\n'
+        #   '\x85'      :   '\n'
+        #   '\u2028'    :   '\u2028'
+        #   '\u2029     :   '\u2029'
+        #   default     :   ''
+        ch = self.peek()
+        if ch in u'\r\n\x85':
+            if self.prefix(2) == u'\r\n':
+                self.forward(2)
+            else:
+                self.forward()
+            return u'\n'
+        elif ch in u'\u2028\u2029':
+            self.forward()
+            return ch
+        return u''
+
+#try:
+#    import psyco
+#    psyco.bind(Scanner)
+#except ImportError:
+#    pass
+

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/yaml/scanner.pyc
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/scanner.pyc b/tools/bin/ext/yaml/scanner.pyc
new file mode 100644
index 0000000..3ae15e8
Binary files /dev/null and b/tools/bin/ext/yaml/scanner.pyc differ

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/yaml/serializer.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/serializer.py b/tools/bin/ext/yaml/serializer.py
new file mode 100644
index 0000000..2101f95
--- /dev/null
+++ b/tools/bin/ext/yaml/serializer.py
@@ -0,0 +1,111 @@
+
+__all__ = ['Serializer', 'SerializerError']
+
+from error import YAMLError
+from events import *
+from nodes import *
+
+class SerializerError(YAMLError):
+    pass
+
+class Serializer(object):
+
+    ANCHOR_TEMPLATE = u'id%03d'
+
+    def __init__(self, encoding=None,
+            explicit_start=None, explicit_end=None, version=None, tags=None):
+        self.use_encoding = encoding
+        self.use_explicit_start = explicit_start
+        self.use_explicit_end = explicit_end
+        self.use_version = version
+        self.use_tags = tags
+        self.serialized_nodes = {}
+        self.anchors = {}
+        self.last_anchor_id = 0
+        self.closed = None
+
+    def open(self):
+        if self.closed is None:
+            self.emit(StreamStartEvent(encoding=self.use_encoding))
+            self.closed = False
+        elif self.closed:
+            raise SerializerError("serializer is closed")
+        else:
+            raise SerializerError("serializer is already opened")
+
+    def close(self):
+        if self.closed is None:
+            raise SerializerError("serializer is not opened")
+        elif not self.closed:
+            self.emit(StreamEndEvent())
+            self.closed = True
+
+    #def __del__(self):
+    #    self.close()
+
+    def serialize(self, node):
+        if self.closed is None:
+            raise SerializerError("serializer is not opened")
+        elif self.closed:
+            raise SerializerError("serializer is closed")
+        self.emit(DocumentStartEvent(explicit=self.use_explicit_start,
+            version=self.use_version, tags=self.use_tags))
+        self.anchor_node(node)
+        self.serialize_node(node, None, None)
+        self.emit(DocumentEndEvent(explicit=self.use_explicit_end))
+        self.serialized_nodes = {}
+        self.anchors = {}
+        self.last_alias_id = 0
+
+    def anchor_node(self, node):
+        if node in self.anchors:
+            if self.anchors[node] is None:
+                self.anchors[node] = self.generate_anchor(node)
+        else:
+            self.anchors[node] = None
+            if isinstance(node, SequenceNode):
+                for item in node.value:
+                    self.anchor_node(item)
+            elif isinstance(node, MappingNode):
+                for key, value in node.value:
+                    self.anchor_node(key)
+                    self.anchor_node(value)
+
+    def generate_anchor(self, node):
+        self.last_anchor_id += 1
+        return self.ANCHOR_TEMPLATE % self.last_anchor_id
+
+    def serialize_node(self, node, parent, index):
+        alias = self.anchors[node]
+        if node in self.serialized_nodes:
+            self.emit(AliasEvent(alias))
+        else:
+            self.serialized_nodes[node] = True
+            self.descend_resolver(parent, index)
+            if isinstance(node, ScalarNode):
+                detected_tag = self.resolve(ScalarNode, node.value, (True, False))
+                default_tag = self.resolve(ScalarNode, node.value, (False, True))
+                implicit = (node.tag == detected_tag), (node.tag == default_tag)
+                self.emit(ScalarEvent(alias, node.tag, implicit, node.value,
+                    style=node.style))
+            elif isinstance(node, SequenceNode):
+                implicit = (node.tag
+                            == self.resolve(SequenceNode, node.value, True))
+                self.emit(SequenceStartEvent(alias, node.tag, implicit,
+                    flow_style=node.flow_style))
+                index = 0
+                for item in node.value:
+                    self.serialize_node(item, node, index)
+                    index += 1
+                self.emit(SequenceEndEvent())
+            elif isinstance(node, MappingNode):
+                implicit = (node.tag
+                            == self.resolve(MappingNode, node.value, True))
+                self.emit(MappingStartEvent(alias, node.tag, implicit,
+                    flow_style=node.flow_style))
+                for key, value in node.value:
+                    self.serialize_node(key, node, None)
+                    self.serialize_node(value, node, key)
+                self.emit(MappingEndEvent())
+            self.ascend_resolver()
+

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/yaml/serializer.pyc
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/serializer.pyc b/tools/bin/ext/yaml/serializer.pyc
new file mode 100644
index 0000000..24fce03
Binary files /dev/null and b/tools/bin/ext/yaml/serializer.pyc differ

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/yaml/tokens.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/tokens.py b/tools/bin/ext/yaml/tokens.py
new file mode 100644
index 0000000..4d0b48a
--- /dev/null
+++ b/tools/bin/ext/yaml/tokens.py
@@ -0,0 +1,104 @@
+
+class Token(object):
+    def __init__(self, start_mark, end_mark):
+        self.start_mark = start_mark
+        self.end_mark = end_mark
+    def __repr__(self):
+        attributes = [key for key in self.__dict__
+                if not key.endswith('_mark')]
+        attributes.sort()
+        arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
+                for key in attributes])
+        return '%s(%s)' % (self.__class__.__name__, arguments)
+
+#class BOMToken(Token):
+#    id = '<byte order mark>'
+
+class DirectiveToken(Token):
+    id = '<directive>'
+    def __init__(self, name, value, start_mark, end_mark):
+        self.name = name
+        self.value = value
+        self.start_mark = start_mark
+        self.end_mark = end_mark
+
+class DocumentStartToken(Token):
+    id = '<document start>'
+
+class DocumentEndToken(Token):
+    id = '<document end>'
+
+class StreamStartToken(Token):
+    id = '<stream start>'
+    def __init__(self, start_mark=None, end_mark=None,
+            encoding=None):
+        self.start_mark = start_mark
+        self.end_mark = end_mark
+        self.encoding = encoding
+
+class StreamEndToken(Token):
+    id = '<stream end>'
+
+class BlockSequenceStartToken(Token):
+    id = '<block sequence start>'
+
+class BlockMappingStartToken(Token):
+    id = '<block mapping start>'
+
+class BlockEndToken(Token):
+    id = '<block end>'
+
+class FlowSequenceStartToken(Token):
+    id = '['
+
+class FlowMappingStartToken(Token):
+    id = '{'
+
+class FlowSequenceEndToken(Token):
+    id = ']'
+
+class FlowMappingEndToken(Token):
+    id = '}'
+
+class KeyToken(Token):
+    id = '?'
+
+class ValueToken(Token):
+    id = ':'
+
+class BlockEntryToken(Token):
+    id = '-'
+
+class FlowEntryToken(Token):
+    id = ','
+
+class AliasToken(Token):
+    id = '<alias>'
+    def __init__(self, value, start_mark, end_mark):
+        self.value = value
+        self.start_mark = start_mark
+        self.end_mark = end_mark
+
+class AnchorToken(Token):
+    id = '<anchor>'
+    def __init__(self, value, start_mark, end_mark):
+        self.value = value
+        self.start_mark = start_mark
+        self.end_mark = end_mark
+
+class TagToken(Token):
+    id = '<tag>'
+    def __init__(self, value, start_mark, end_mark):
+        self.value = value
+        self.start_mark = start_mark
+        self.end_mark = end_mark
+
+class ScalarToken(Token):
+    id = '<scalar>'
+    def __init__(self, value, plain, start_mark, end_mark, style=None):
+        self.value = value
+        self.plain = plain
+        self.start_mark = start_mark
+        self.end_mark = end_mark
+        self.style = style
+

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/yaml/tokens.pyc
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/tokens.pyc b/tools/bin/ext/yaml/tokens.pyc
new file mode 100644
index 0000000..47b5986
Binary files /dev/null and b/tools/bin/ext/yaml/tokens.pyc differ

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.html
----------------------------------------------------------------------
diff --git a/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.html b/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.html
new file mode 100644
index 0000000..2683a15
--- /dev/null
+++ b/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.html
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="Docutils 0.4: http://docutils.sourceforge.net/" />
+<title>PyGreSQL Announcements</title>
+<link rel="stylesheet" href="docs.css" type="text/css" />
+</head>
+<body>
+<div class="document" id="pygresql-announcements">
+<h1 class="title">PyGreSQL Announcements</h1>
+<h2 class="subtitle" id="release-of-pygresql-version-4-0">Release of PyGreSQL version 4.0</h2>
+<p>PyGreSQL v4.0 has been released.</p>
+<p>It is available at: <a class="reference" href="ftp://ftp.PyGreSQL.org/pub/distrib/PyGreSQL-4.0.tgz">ftp://ftp.PyGreSQL.org/pub/distrib/PyGreSQL-4.0.tgz</a>.</p>
+<p>If you are running NetBSD, look in the packages directory under databases.
+There is also a package in the FreeBSD ports collection.</p>
+<p>Please refer to <a class="reference" href="changelog.html">changelog.txt</a>
+for things that have changed in this version.</p>
+<p>Please refer to <a class="reference" href="readme.html">readme.txt</a>
+for general information.</p>
+<div class="line-block">
+<div class="line">D'Arcy J.M. Cain</div>
+<div class="line"><a class="reference" href="mailto:darcy&#64;PyGreSQL.org">darcy&#64;PyGreSQL.org</a></div>
+</div>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.txt
----------------------------------------------------------------------
diff --git a/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.txt b/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.txt
new file mode 100644
index 0000000..fa08458
--- /dev/null
+++ b/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.txt
@@ -0,0 +1,23 @@
+======================
+PyGreSQL Announcements
+======================
+
+-------------------------------
+Release of PyGreSQL version 4.0
+-------------------------------
+
+PyGreSQL v4.0 has been released.
+
+It is available at: ftp://ftp.PyGreSQL.org/pub/distrib/PyGreSQL-4.0.tgz.
+
+If you are running NetBSD, look in the packages directory under databases.
+There is also a package in the FreeBSD ports collection.
+
+Please refer to `changelog.txt <changelog.html>`_
+for things that have changed in this version.
+
+Please refer to `readme.txt <readme.html>`_
+for general information.
+
+| D'Arcy J.M. Cain
+| darcy@PyGreSQL.org

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/pythonSrc/PyGreSQL-4.0/docs/changelog.html
----------------------------------------------------------------------
diff --git a/tools/bin/pythonSrc/PyGreSQL-4.0/docs/changelog.html b/tools/bin/pythonSrc/PyGreSQL-4.0/docs/changelog.html
new file mode 100644
index 0000000..24c38e9
--- /dev/null
+++ b/tools/bin/pythonSrc/PyGreSQL-4.0/docs/changelog.html
@@ -0,0 +1,333 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="Docutils 0.4: http://docutils.sourceforge.net/" />
+<title>PyGreSQL ChangeLog</title>
+<link rel="stylesheet" href="docs.css" type="text/css" />
+</head>
+<body>
+<div class="document" id="pygresql-changelog">
+<h1 class="title">PyGreSQL ChangeLog</h1>
+<div class="section">
+<h1><a id="version-4-0-2009-01-01" name="version-4-0-2009-01-01">Version 4.0 (2009-01-01)</a></h1>
+<ul class="simple">
+<li>Dropped support for Python below 2.3 and PostgreSQL below 7.4.</li>
+<li>Improved performance of fetchall() for large result sets
+by speeding up the type casts (as suggested by Peter Schuller).</li>
+<li>Exposed exceptions as attributes of the connection object.</li>
+<li>Exposed connection as attribute of the cursor object.</li>
+<li>Cursors now support the iteration protocol.</li>
+<li>Added new method to get parameter settings.</li>
+<li>Added customizable row_factory as suggested by Simon Pamies.</li>
+<li>Separated between mandatory and additional type objects.</li>
+<li>Added keyword args to insert, update and delete methods.</li>
+<li>Added exception handling for direct copy.</li>
+<li>Release the GIL while making a connection
+(as suggested by Peter Schuller).</li>
+<li>If available, use decimal.Decimal for numeric types.</li>
+<li>Allow DB wrapper to be used with DB-API 2 connections
+(as suggested by Chris Hilton).</li>
+<li>Made private attributes of DB wrapper accessible.</li>
+<li>Dropped dependence on mx.DateTime module.</li>
+<li>Support for PQescapeStringConn() and PQescapeByteaConn();
+these are now also used by the internal _quote() functions.</li>
+<li>Added 'int8' to INTEGER types. New SMALLINT type.</li>
+<li>Added a way to find the number of rows affected by a query()
+with the classic pg module by returning it as a string.
+For single inserts, query() still returns the oid as an integer.
+The pgdb module already provides the &quot;rowcount&quot; cursor attribute
+for the same purpose.</li>
+<li>Improved getnotify() by calling PQconsumeInput() instead of
+submitting an empty command.</li>
+<li>Removed compatibility code for old OID munging style.</li>
+<li>The insert() and update() methods now use the &quot;returning&quot; clause
+if possible to get all changed values, and they also check in advance
+whether a subsequent select is possible, so that ongoing transactions
+won't break if there is no select privilege.</li>
+<li>Added &quot;protocol_version&quot; and &quot;server_version&quot; attributes.</li>
+<li>Revived the &quot;user&quot; attribute.</li>
+<li>The pg module now works correctly with composite primary keys;
+these are represented as frozensets.</li>
+<li>Removed the undocumented and actually unnecessary &quot;view&quot; parameter
+from the get() method.</li>
+<li>get() raises a nicer ProgrammingError instead of a KeyError
+if no primary key was found.</li>
+<li>delete() now also works based on the primary key if no oid available
+and returns whether the row existed or not.</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-3-8-1-2006-06-05" name="version-3-8-1-2006-06-05">Version 3.8.1 (2006-06-05)</a></h1>
+<ul class="simple">
+<li>Use string methods instead of deprecated string functions.</li>
+<li>Only use SQL-standard way of escaping quotes.</li>
+<li>Added the functions escape_string() and escape/unescape_bytea()
+(as suggested by Charlie Dyson and Kavous Bojnourdi a long time ago).</li>
+<li>Reverted code in clear() method that set date to current.</li>
+<li>Added code for backwards compatibility in OID munging code.</li>
+<li>Reorder attnames tests so that &quot;interval&quot; is checked for before &quot;int.&quot;</li>
+<li>If caller supplies key dictionary, make sure that all has a namespace.</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-3-8-2006-02-17" name="version-3-8-2006-02-17">Version 3.8 (2006-02-17)</a></h1>
+<ul class="simple">
+<li>Installed new favicon.ico from Matthew Sporleder &lt;<a class="reference" href="mailto:mspo&#64;mspo.com">mspo&#64;mspo.com</a>&gt;</li>
+<li>Replaced snprintf by PyOS_snprintf.</li>
+<li>Removed NO_SNPRINTF switch which is not needed any longer</li>
+<li>Clean up some variable names and namespace</li>
+<li>Add get_relations() method to get any type of relation</li>
+<li>Rewrite get_tables() to use get_relations()</li>
+<li>Use new method in get_attnames method to get attributes of views as well</li>
+<li>Add Binary type</li>
+<li>Number of rows is now -1 after executing no-result statements</li>
+<li>Fix some number handling</li>
+<li>Non-simple types do not raise an error any more</li>
+<li>Improvements to documentation framework</li>
+<li>Take into account that nowadays not every table must have an oid column</li>
+<li>Simplification and improvement of the inserttable() function</li>
+<li>Fix up unit tests</li>
+<li>The usual assortment of minor fixes and enhancements</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-3-7-2005-09-07" name="version-3-7-2005-09-07">Version 3.7 (2005-09-07)</a></h1>
+<p>Improvement of pgdb module:</p>
+<ul class="simple">
+<li>Use Python standard <cite>datetime</cite> if <cite>mxDateTime</cite> is not available</li>
+</ul>
+<p>Major improvements and clean-up in classic pg module:</p>
+<ul class="simple">
+<li>All members of the underlying connection directly available in <cite>DB</cite></li>
+<li>Fixes to quoting function</li>
+<li>Add checks for valid database connection to methods</li>
+<li>Improved namespace support, handle <cite>search_path</cite> correctly</li>
+<li>Removed old dust and unnessesary imports, added docstrings</li>
+<li>Internal sql statements as one-liners, smoothed out ugly code</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-3-6-2-2005-02-23" name="version-3-6-2-2005-02-23">Version 3.6.2 (2005-02-23)</a></h1>
+<ul class="simple">
+<li>Further fixes to namespace handling</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-3-6-1-2005-01-11" name="version-3-6-1-2005-01-11">Version 3.6.1 (2005-01-11)</a></h1>
+<ul class="simple">
+<li>Fixes to namespace handling</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-3-6-2004-12-17" name="version-3-6-2004-12-17">Version 3.6 (2004-12-17)</a></h1>
+<ul class="simple">
+<li>Better DB-API 2.0 compliance</li>
+<li>Exception hierarchy moved into C module and made available to both APIs</li>
+<li>Fix error in update method that caused false exceptions</li>
+<li>Moved to standard exception hierarchy in classic API</li>
+<li>Added new method to get transaction state</li>
+<li>Use proper Python constants where appropriate</li>
+<li>Use Python versions of strtol, etc. Allows Win32 build.</li>
+<li>Bug fixes and cleanups</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-3-5-2004-08-29" name="version-3-5-2004-08-29">Version 3.5 (2004-08-29)</a></h1>
+<p>Fixes and enhancements:</p>
+<ul class="simple">
+<li>Add interval to list of data types</li>
+<li>fix up method wrapping especially close()</li>
+<li>retry pkeys once if table missing in case it was just added</li>
+<li>wrap query method separately to handle debug better</li>
+<li>use isinstance instead of type</li>
+<li>fix free/PQfreemem issue - finally</li>
+<li>miscellaneous cleanups and formatting</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-3-4-2004-06-02" name="version-3-4-2004-06-02">Version 3.4 (2004-06-02)</a></h1>
+<p>Some cleanups and fixes.
+This is the first version where PyGreSQL is moved back out of the
+PostgreSQL tree. A lot of the changes mentioned below were actually
+made while in the PostgreSQL tree since their last release.</p>
+<ul class="simple">
+<li>Allow for larger integer returns</li>
+<li>Return proper strings for true and false</li>
+<li>Cleanup convenience method creation</li>
+<li>Enhance debugging method</li>
+<li>Add reopen method</li>
+<li>Allow programs to preload field names for speedup</li>
+<li>Move OID handling so that it returns long instead of int</li>
+<li>Miscellaneous cleanups and formatting</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-3-3-2001-12-03" name="version-3-3-2001-12-03">Version 3.3 (2001-12-03)</a></h1>
+<p>A few cleanups.  Mostly there was some confusion about the latest version
+and so I am bumping the number to keep it straight.</p>
+<ul class="simple">
+<li>Added NUMERICOID to list of returned types. This fixes a bug when
+returning aggregates in the latest version of PostgreSQL.</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-3-2-2001-06-20" name="version-3-2-2001-06-20">Version 3.2 (2001-06-20)</a></h1>
+<p>Note that there are very few changes to PyGreSQL between 3.1 and 3.2.
+The main reason for the release is the move into the PostgreSQL
+development tree.  Even the WIN32 changes are pretty minor.</p>
+<ul class="simple">
+<li>Add Win32 support (<a class="reference" href="mailto:gerhard&#64;bigfoot.de">gerhard&#64;bigfoot.de</a>)</li>
+<li>Fix some DB-API quoting problems (<a class="reference" href="mailto:niall.smart&#64;ebeon.com">niall.smart&#64;ebeon.com</a>)</li>
+<li>Moved development into PostgreSQL development tree.</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-3-1-2000-11-06" name="version-3-1-2000-11-06">Version 3.1 (2000-11-06)</a></h1>
+<ul class="simple">
+<li>Fix some quoting functions.  In particular handle NULLs better.</li>
+<li>Use a method to add primary key information rather than direct
+manipulation of the class structures</li>
+<li>Break decimal out in <cite>_quote</cite> (in pg.py) and treat it as float</li>
+<li>Treat timestamp like date for quoting purposes</li>
+<li>Remove a redundant SELECT from the <cite>get</cite> method speeding it,
+and <cite>insert</cite> (since it calls <cite>get</cite>) up a little.</li>
+<li>Add test for BOOL type in typecast method to <cite>pgdbTypeCache</cite> class
+(<a class="reference" href="mailto:tv&#64;beamnet.de">tv&#64;beamnet.de</a>)</li>
+<li>Fix pgdb.py to send port as integer to lower level function
+(<a class="reference" href="mailto:dildog&#64;l0pht.com">dildog&#64;l0pht.com</a>)</li>
+<li>Change pg.py to speed up some operations</li>
+<li>Allow updates on tables with no primary keys</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-3-0-2000-05-30" name="version-3-0-2000-05-30">Version 3.0 (2000-05-30)</a></h1>
+<ul class="simple">
+<li>Remove strlen() call from pglarge_write() and get size from object
+(<a class="reference" href="mailto:Richard&#64;Bouska.cz">Richard&#64;Bouska.cz</a>)</li>
+<li>Add a little more error checking to the quote function in the wrapper</li>
+<li>Add extra checking in <cite>_quote</cite> function</li>
+<li>Wrap query in pg.py for debugging</li>
+<li>Add DB-API 2.0 support to pgmodule.c (<a class="reference" href="mailto:andre&#64;via.ecp.fr">andre&#64;via.ecp.fr</a>)</li>
+<li>Add DB-API 2.0 wrapper pgdb.py (<a class="reference" href="mailto:andre&#64;via.ecp.fr">andre&#64;via.ecp.fr</a>)</li>
+<li>Correct keyword clash (temp) in tutorial</li>
+<li>Clean up layout of tutorial</li>
+<li>Return NULL values as None (<a class="reference" href="mailto:rlawrence&#64;lastfoot.com">rlawrence&#64;lastfoot.com</a>)
+(WARNING: This will cause backwards compatibility issues)</li>
+<li>Change None to NULL in insert and update</li>
+<li>Change hash-bang lines to use /usr/bin/env</li>
+<li>Clearing date should be blank (NULL) not TODAY</li>
+<li>Quote backslashes in strings in <cite>_quote</cite> (<a class="reference" href="mailto:brian&#64;CSUA.Berkeley.EDU">brian&#64;CSUA.Berkeley.EDU</a>)</li>
+<li>Expanded and clarified build instructions (<a class="reference" href="mailto:tbryan&#64;starship.python.net">tbryan&#64;starship.python.net</a>)</li>
+<li>Make code thread safe (<a class="reference" href="mailto:Jerome.Alet&#64;unice.fr">Jerome.Alet&#64;unice.fr</a>)</li>
+<li>Add README.distutils (<a class="reference" href="mailto:mwa&#64;gate.net">mwa&#64;gate.net</a> &amp; <a class="reference" href="mailto:jeremy&#64;cnri.reston.va.us">jeremy&#64;cnri.reston.va.us</a>)</li>
+<li>Many fixes and increased DB-API compliance by <a class="reference" href="mailto:chifungfan&#64;yahoo.com">chifungfan&#64;yahoo.com</a>,
+<a class="reference" href="mailto:tony&#64;printra.net">tony&#64;printra.net</a>, <a class="reference" href="mailto:jeremy&#64;alum.mit.edu">jeremy&#64;alum.mit.edu</a> and others to get the final
+version ready to release.</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-2-4-1999-06-15" name="version-2-4-1999-06-15">Version 2.4 (1999-06-15)</a></h1>
+<ul class="simple">
+<li>Insert returns None if the user doesn't have select permissions
+on the table.  It can (and does) happen that one has insert but
+not select permissions on a table.</li>
+<li>Added ntuples() method to query object (<a class="reference" href="mailto:brit&#64;druid.net">brit&#64;druid.net</a>)</li>
+<li>Corrected a bug related to getresult() and the money type</li>
+<li>Corrected a bug related to negative money amounts</li>
+<li>Allow update based on primary key if munged oid not available and
+table has a primary key</li>
+<li>Add many __doc__ strings (<a class="reference" href="mailto:andre&#64;via.ecp.fr">andre&#64;via.ecp.fr</a>)</li>
+<li>Get method works with views if key specified</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-2-3-1999-04-17" name="version-2-3-1999-04-17">Version 2.3 (1999-04-17)</a></h1>
+<ul class="simple">
+<li>connect.host returns &quot;localhost&quot; when connected to Unix socket
+(<a class="reference" href="mailto:torppa&#64;tuhnu.cutery.fi">torppa&#64;tuhnu.cutery.fi</a>)</li>
+<li>Use <cite>PyArg_ParseTupleAndKeywords</cite> in connect() (<a class="reference" href="mailto:torppa&#64;tuhnu.cutery.fi">torppa&#64;tuhnu.cutery.fi</a>)</li>
+<li>fixes and cleanups (<a class="reference" href="mailto:torppa&#64;tuhnu.cutery.fi">torppa&#64;tuhnu.cutery.fi</a>)</li>
+<li>Fixed memory leak in dictresult() (<a class="reference" href="mailto:terekhov&#64;emc.com">terekhov&#64;emc.com</a>)</li>
+<li>Deprecated pgext.py - functionality now in pg.py</li>
+<li>More cleanups to the tutorial</li>
+<li>Added fileno() method - <a class="reference" href="mailto:terekhov&#64;emc.com">terekhov&#64;emc.com</a> (Mikhail Terekhov)</li>
+<li>added money type to quoting function</li>
+<li>Compiles cleanly with more warnings turned on</li>
+<li>Returns PostgreSQL error message on error</li>
+<li>Init accepts keywords (Jarkko Torppa)</li>
+<li>Convenience functions can be overridden (Jarkko Torppa)</li>
+<li>added close() method</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-2-2-1998-12-21" name="version-2-2-1998-12-21">Version 2.2 (1998-12-21)</a></h1>
+<ul class="simple">
+<li>Added user and password support thanks to Ng Pheng Siong (<a class="reference" href="mailto:ngps&#64;post1.com">ngps&#64;post1.com</a>)</li>
+<li>Insert queries return the inserted oid</li>
+<li>Add new <cite>pg</cite> wrapper (C module renamed to _pg)</li>
+<li>Wrapped database connection in a class</li>
+<li>Cleaned up some of the tutorial.  (More work needed.)</li>
+<li>Added <cite>version</cite> and <cite>__version__</cite>.
+Thanks to <a class="reference" href="mailto:thilo&#64;eevolute.com">thilo&#64;eevolute.com</a> for the suggestion.</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-2-1-1998-03-07" name="version-2-1-1998-03-07">Version 2.1 (1998-03-07)</a></h1>
+<ul class="simple">
+<li>return fields as proper Python objects for field type</li>
+<li>Cleaned up pgext.py</li>
+<li>Added dictresult method</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-2-0-1997-12-23" name="version-2-0-1997-12-23">Version 2.0  (1997-12-23)</a></h1>
+<ul class="simple">
+<li>Updated code for PostgreSQL 6.2.1 and Python 1.5</li>
+<li>Reformatted code and converted to use full ANSI style prototypes</li>
+<li>Changed name to PyGreSQL (from PyGres95)</li>
+<li>Changed order of arguments to connect function</li>
+<li>Created new type <cite>pgqueryobject</cite> and moved certain methods to it</li>
+<li>Added a print function for pgqueryobject</li>
+<li>Various code changes - mostly stylistic</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-1-0b-1995-11-04" name="version-1-0b-1995-11-04">Version 1.0b (1995-11-04)</a></h1>
+<ul class="simple">
+<li>Keyword support for connect function moved from library file to C code
+and taken away from library</li>
+<li>Rewrote documentation</li>
+<li>Bug fix in connect function</li>
+<li>Enhancements in large objects interface methods</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-1-0a-1995-10-30" name="version-1-0a-1995-10-30">Version 1.0a (1995-10-30)</a></h1>
+<p>A limited release.</p>
+<ul class="simple">
+<li>Module adapted to standard Python syntax</li>
+<li>Keyword support for connect function in library file</li>
+<li>Rewrote default parameters interface (internal use of strings)</li>
+<li>Fixed minor bugs in module interface</li>
+<li>Redefinition of error messages</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-0-9b-1995-10-10" name="version-0-9b-1995-10-10">Version 0.9b (1995-10-10)</a></h1>
+<p>The first public release.</p>
+<ul class="simple">
+<li>Large objects implementation</li>
+<li>Many bug fixes, enhancements, ...</li>
+</ul>
+</div>
+<div class="section">
+<h1><a id="version-0-1a-1995-10-07" name="version-0-1a-1995-10-07">Version 0.1a (1995-10-07)</a></h1>
+<ul class="simple">
+<li>Basic libpq functions (SQL access)</li>
+</ul>
+</div>
+</div>
+</body>
+</html>