You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by rl...@apache.org on 2016/01/06 09:56:08 UTC
[07/13] incubator-hawq git commit: HAWQ-271. Remove external python
modules.
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/ext/yaml/scanner.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/scanner.py b/tools/bin/ext/yaml/scanner.py
deleted file mode 100644
index 369c027..0000000
--- a/tools/bin/ext/yaml/scanner.py
+++ /dev/null
@@ -1,1472 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Scanner produces tokens of the following types:
-# STREAM-START
-# STREAM-END
-# DIRECTIVE(name, value)
-# DOCUMENT-START
-# DOCUMENT-END
-# BLOCK-SEQUENCE-START
-# BLOCK-MAPPING-START
-# BLOCK-END
-# FLOW-SEQUENCE-START
-# FLOW-MAPPING-START
-# FLOW-SEQUENCE-END
-# FLOW-MAPPING-END
-# BLOCK-ENTRY
-# FLOW-ENTRY
-# KEY
-# VALUE
-# ALIAS(value)
-# ANCHOR(value)
-# TAG(value)
-# SCALAR(value, plain, style)
-#
-# Read comments in the Scanner code for more details.
-#
-
-__all__ = ['Scanner', 'ScannerError']
-
-from error import MarkedYAMLError
-from tokens import *
-
-class ScannerError(MarkedYAMLError):
- pass
-
-class SimpleKey(object):
- # See below simple keys treatment.
-
- def __init__(self, token_number, required, index, line, column, mark):
- self.token_number = token_number
- self.required = required
- self.index = index
- self.line = line
- self.column = column
- self.mark = mark
-
-class Scanner(object):
-
- def __init__(self):
- """Initialize the scanner."""
- # It is assumed that Scanner and Reader will have a common descendant.
- # Reader do the dirty work of checking for BOM and converting the
- # input data to Unicode. It also adds NUL to the end.
- #
- # Reader supports the following methods
- # self.peek(i=0) # peek the next i-th character
- # self.prefix(l=1) # peek the next l characters
- # self.forward(l=1) # read the next l characters and move the pointer.
-
- # Had we reached the end of the stream?
- self.done = False
-
- # The number of unclosed '{' and '['. `flow_level == 0` means block
- # context.
- self.flow_level = 0
-
- # List of processed tokens that are not yet emitted.
- self.tokens = []
-
- # Add the STREAM-START token.
- self.fetch_stream_start()
-
- # Number of tokens that were emitted through the `get_token` method.
- self.tokens_taken = 0
-
- # The current indentation level.
- self.indent = -1
-
- # Past indentation levels.
- self.indents = []
-
- # Variables related to simple keys treatment.
-
- # A simple key is a key that is not denoted by the '?' indicator.
- # Example of simple keys:
- # ---
- # block simple key: value
- # ? not a simple key:
- # : { flow simple key: value }
- # We emit the KEY token before all keys, so when we find a potential
- # simple key, we try to locate the corresponding ':' indicator.
- # Simple keys should be limited to a single line and 1024 characters.
-
- # Can a simple key start at the current position? A simple key may
- # start:
- # - at the beginning of the line, not counting indentation spaces
- # (in block context),
- # - after '{', '[', ',' (in the flow context),
- # - after '?', ':', '-' (in the block context).
- # In the block context, this flag also signifies if a block collection
- # may start at the current position.
- self.allow_simple_key = True
-
- # Keep track of possible simple keys. This is a dictionary. The key
- # is `flow_level`; there can be no more that one possible simple key
- # for each level. The value is a SimpleKey record:
- # (token_number, required, index, line, column, mark)
- # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
- # '[', or '{' tokens.
- self.possible_simple_keys = {}
-
- # Public methods.
-
- def check_token(self, *choices):
- # Check if the next token is one of the given types.
- while self.need_more_tokens():
- self.fetch_more_tokens()
- if self.tokens:
- if not choices:
- return True
- for choice in choices:
- if isinstance(self.tokens[0], choice):
- return True
- return False
-
- def peek_token(self):
- # Return the next token, but do not delete if from the queue.
- while self.need_more_tokens():
- self.fetch_more_tokens()
- if self.tokens:
- return self.tokens[0]
-
- def get_token(self):
- # Return the next token.
- while self.need_more_tokens():
- self.fetch_more_tokens()
- if self.tokens:
- self.tokens_taken += 1
- return self.tokens.pop(0)
-
- # Private methods.
-
- def need_more_tokens(self):
- if self.done:
- return False
- if not self.tokens:
- return True
- # The current token may be a potential simple key, so we
- # need to look further.
- self.stale_possible_simple_keys()
- if self.next_possible_simple_key() == self.tokens_taken:
- return True
-
- def fetch_more_tokens(self):
-
- # Eat whitespaces and comments until we reach the next token.
- self.scan_to_next_token()
-
- # Remove obsolete possible simple keys.
- self.stale_possible_simple_keys()
-
- # Compare the current indentation and column. It may add some tokens
- # and decrease the current indentation level.
- self.unwind_indent(self.column)
-
- # Peek the next character.
- ch = self.peek()
-
- # Is it the end of stream?
- if ch == u'\0':
- return self.fetch_stream_end()
-
- # Is it a directive?
- if ch == u'%' and self.check_directive():
- return self.fetch_directive()
-
- # Is it the document start?
- if ch == u'-' and self.check_document_start():
- return self.fetch_document_start()
-
- # Is it the document end?
- if ch == u'.' and self.check_document_end():
- return self.fetch_document_end()
-
- # TODO: support for BOM within a stream.
- #if ch == u'\uFEFF':
- # return self.fetch_bom() <-- issue BOMToken
-
- # Note: the order of the following checks is NOT significant.
-
- # Is it the flow sequence start indicator?
- if ch == u'[':
- return self.fetch_flow_sequence_start()
-
- # Is it the flow mapping start indicator?
- if ch == u'{':
- return self.fetch_flow_mapping_start()
-
- # Is it the flow sequence end indicator?
- if ch == u']':
- return self.fetch_flow_sequence_end()
-
- # Is it the flow mapping end indicator?
- if ch == u'}':
- return self.fetch_flow_mapping_end()
-
- # Is it the flow entry indicator?
- if ch == u',':
- return self.fetch_flow_entry()
-
- # Is it the block entry indicator?
- if ch == u'-' and self.check_block_entry():
- return self.fetch_block_entry()
-
- # Is it the key indicator?
- if ch == u'?' and self.check_key():
- return self.fetch_key()
-
- # Is it the value indicator?
- if ch == u':' and self.check_value():
- return self.fetch_value()
-
- # Is it an alias?
- if ch == u'*':
- return self.fetch_alias()
-
- # Is it an anchor?
- if ch == u'&':
- return self.fetch_anchor()
-
- # Is it a tag?
- if ch == u'!':
- return self.fetch_tag()
-
- # Is it a literal scalar?
- if ch == u'|' and not self.flow_level:
- return self.fetch_literal()
-
- # Is it a folded scalar?
- if ch == u'>' and not self.flow_level:
- return self.fetch_folded()
-
- # Is it a single quoted scalar?
- if ch == u'\'':
- return self.fetch_single()
-
- # Is it a double quoted scalar?
- if ch == u'\"':
- return self.fetch_double()
-
- # It must be a plain scalar then.
- if self.check_plain():
- return self.fetch_plain()
-
- # No? It's an error. Let's produce a nice error message.
- raise ScannerError("while scanning for the next token", None,
- "found character %r that cannot start any token"
- % ch.encode('utf-8'), self.get_mark())
-
- # Simple keys treatment.
-
- def next_possible_simple_key(self):
- # Return the number of the nearest possible simple key. Actually we
- # don't need to loop through the whole dictionary. We may replace it
- # with the following code:
- # if not self.possible_simple_keys:
- # return None
- # return self.possible_simple_keys[
- # min(self.possible_simple_keys.keys())].token_number
- min_token_number = None
- for level in self.possible_simple_keys:
- key = self.possible_simple_keys[level]
- if min_token_number is None or key.token_number < min_token_number:
- min_token_number = key.token_number
- return min_token_number
-
- def stale_possible_simple_keys(self):
- # Remove entries that are no longer possible simple keys. According to
- # the YAML specification, simple keys
- # - should be limited to a single line,
- # - should be no longer than 1024 characters.
- # Disabling this procedure will allow simple keys of any length and
- # height (may cause problems if indentation is broken though).
- for level in self.possible_simple_keys.keys():
- key = self.possible_simple_keys[level]
- if key.line != self.line \
- or self.index-key.index > 1024:
- if key.required:
- raise ScannerError("while scanning a simple key", key.mark,
- "could not found expected ':'", self.get_mark())
- del self.possible_simple_keys[level]
-
- def save_possible_simple_key(self):
- # The next token may start a simple key. We check if it's possible
- # and save its position. This function is called for
- # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
-
- # Check if a simple key is required at the current position.
- required = not self.flow_level and self.indent == self.column
-
- # A simple key is required only if it is the first token in the current
- # line. Therefore it is always allowed.
- assert self.allow_simple_key or not required
-
- # The next token might be a simple key. Let's save it's number and
- # position.
- if self.allow_simple_key:
- self.remove_possible_simple_key()
- token_number = self.tokens_taken+len(self.tokens)
- key = SimpleKey(token_number, required,
- self.index, self.line, self.column, self.get_mark())
- self.possible_simple_keys[self.flow_level] = key
-
- def remove_possible_simple_key(self):
- # Remove the saved possible key position at the current flow level.
- if self.flow_level in self.possible_simple_keys:
- key = self.possible_simple_keys[self.flow_level]
-
- if key.required:
- raise ScannerError("while scanning a simple key", key.mark,
- "could not found expected ':'", self.get_mark())
-
- del self.possible_simple_keys[self.flow_level]
-
- # Indentation functions.
-
- def unwind_indent(self, column):
-
- ## In flow context, tokens should respect indentation.
- ## Actually the condition should be `self.indent >= column` according to
- ## the spec. But this condition will prohibit intuitively correct
- ## constructions such as
- ## key : {
- ## }
- #if self.flow_level and self.indent > column:
- # raise ScannerError(None, None,
- # "invalid intendation or unclosed '[' or '{'",
- # self.get_mark())
-
- # In the flow context, indentation is ignored. We make the scanner less
- # restrictive then specification requires.
- if self.flow_level:
- return
-
- # In block context, we may need to issue the BLOCK-END tokens.
- while self.indent > column:
- mark = self.get_mark()
- self.indent = self.indents.pop()
- self.tokens.append(BlockEndToken(mark, mark))
-
- def add_indent(self, column):
- # Check if we need to increase indentation.
- if self.indent < column:
- self.indents.append(self.indent)
- self.indent = column
- return True
- return False
-
- # Fetchers.
-
- def fetch_stream_start(self):
- # We always add STREAM-START as the first token and STREAM-END as the
- # last token.
-
- # Read the token.
- mark = self.get_mark()
-
- # Add STREAM-START.
- self.tokens.append(StreamStartToken(mark, mark,
- encoding=self.encoding))
-
-
- def fetch_stream_end(self):
-
- # Set the current intendation to -1.
- self.unwind_indent(-1)
-
- # Reset everything (not really needed).
- self.allow_simple_key = False
- self.possible_simple_keys = {}
-
- # Read the token.
- mark = self.get_mark()
-
- # Add STREAM-END.
- self.tokens.append(StreamEndToken(mark, mark))
-
- # The steam is finished.
- self.done = True
-
- def fetch_directive(self):
-
- # Set the current intendation to -1.
- self.unwind_indent(-1)
-
- # Reset simple keys.
- self.remove_possible_simple_key()
- self.allow_simple_key = False
-
- # Scan and add DIRECTIVE.
- self.tokens.append(self.scan_directive())
-
- def fetch_document_start(self):
- self.fetch_document_indicator(DocumentStartToken)
-
- def fetch_document_end(self):
- self.fetch_document_indicator(DocumentEndToken)
-
- def fetch_document_indicator(self, TokenClass):
-
- # Set the current intendation to -1.
- self.unwind_indent(-1)
-
- # Reset simple keys. Note that there could not be a block collection
- # after '---'.
- self.remove_possible_simple_key()
- self.allow_simple_key = False
-
- # Add DOCUMENT-START or DOCUMENT-END.
- start_mark = self.get_mark()
- self.forward(3)
- end_mark = self.get_mark()
- self.tokens.append(TokenClass(start_mark, end_mark))
-
- def fetch_flow_sequence_start(self):
- self.fetch_flow_collection_start(FlowSequenceStartToken)
-
- def fetch_flow_mapping_start(self):
- self.fetch_flow_collection_start(FlowMappingStartToken)
-
- def fetch_flow_collection_start(self, TokenClass):
-
- # '[' and '{' may start a simple key.
- self.save_possible_simple_key()
-
- # Increase the flow level.
- self.flow_level += 1
-
- # Simple keys are allowed after '[' and '{'.
- self.allow_simple_key = True
-
- # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
- start_mark = self.get_mark()
- self.forward()
- end_mark = self.get_mark()
- self.tokens.append(TokenClass(start_mark, end_mark))
-
- def fetch_flow_sequence_end(self):
- self.fetch_flow_collection_end(FlowSequenceEndToken)
-
- def fetch_flow_mapping_end(self):
- self.fetch_flow_collection_end(FlowMappingEndToken)
-
- def fetch_flow_collection_end(self, TokenClass):
-
- # Reset possible simple key on the current level.
- self.remove_possible_simple_key()
-
- # Decrease the flow level.
- self.flow_level -= 1
-
- # No simple keys after ']' or '}'.
- self.allow_simple_key = False
-
- # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
- start_mark = self.get_mark()
- self.forward()
- end_mark = self.get_mark()
- self.tokens.append(TokenClass(start_mark, end_mark))
-
- def fetch_flow_entry(self):
-
- # Simple keys are allowed after ','.
- self.allow_simple_key = True
-
- # Reset possible simple key on the current level.
- self.remove_possible_simple_key()
-
- # Add FLOW-ENTRY.
- start_mark = self.get_mark()
- self.forward()
- end_mark = self.get_mark()
- self.tokens.append(FlowEntryToken(start_mark, end_mark))
-
- def fetch_block_entry(self):
-
- # Block context needs additional checks.
- if not self.flow_level:
-
- # Are we allowed to start a new entry?
- if not self.allow_simple_key:
- raise ScannerError(None, None,
- "sequence entries are not allowed here",
- self.get_mark())
-
- # We may need to add BLOCK-SEQUENCE-START.
- if self.add_indent(self.column):
- mark = self.get_mark()
- self.tokens.append(BlockSequenceStartToken(mark, mark))
-
- # It's an error for the block entry to occur in the flow context,
- # but we let the parser detect this.
- else:
- pass
-
- # Simple keys are allowed after '-'.
- self.allow_simple_key = True
-
- # Reset possible simple key on the current level.
- self.remove_possible_simple_key()
-
- # Add BLOCK-ENTRY.
- start_mark = self.get_mark()
- self.forward()
- end_mark = self.get_mark()
- self.tokens.append(BlockEntryToken(start_mark, end_mark))
-
- def fetch_key(self):
-
- # Block context needs additional checks.
- if not self.flow_level:
-
- # Are we allowed to start a key (not nessesary a simple)?
- if not self.allow_simple_key:
- raise ScannerError(None, None,
- "mapping keys are not allowed here",
- self.get_mark())
-
- # We may need to add BLOCK-MAPPING-START.
- if self.add_indent(self.column):
- mark = self.get_mark()
- self.tokens.append(BlockMappingStartToken(mark, mark))
-
- # Simple keys are allowed after '?' in the block context.
- self.allow_simple_key = not self.flow_level
-
- # Reset possible simple key on the current level.
- self.remove_possible_simple_key()
-
- # Add KEY.
- start_mark = self.get_mark()
- self.forward()
- end_mark = self.get_mark()
- self.tokens.append(KeyToken(start_mark, end_mark))
-
- def fetch_value(self):
-
- # Do we determine a simple key?
- if self.flow_level in self.possible_simple_keys:
-
- # Add KEY.
- key = self.possible_simple_keys[self.flow_level]
- del self.possible_simple_keys[self.flow_level]
- self.tokens.insert(key.token_number-self.tokens_taken,
- KeyToken(key.mark, key.mark))
-
- # If this key starts a new block mapping, we need to add
- # BLOCK-MAPPING-START.
- if not self.flow_level:
- if self.add_indent(key.column):
- self.tokens.insert(key.token_number-self.tokens_taken,
- BlockMappingStartToken(key.mark, key.mark))
-
- # There cannot be two simple keys one after another.
- self.allow_simple_key = False
-
- # It must be a part of a complex key.
- else:
-
- # Block context needs additional checks.
- # (Do we really need them? They will be catched by the parser
- # anyway.)
- if not self.flow_level:
-
- # We are allowed to start a complex value if and only if
- # we can start a simple key.
- if not self.allow_simple_key:
- raise ScannerError(None, None,
- "mapping values are not allowed here",
- self.get_mark())
-
- # If this value starts a new block mapping, we need to add
- # BLOCK-MAPPING-START. It will be detected as an error later by
- # the parser.
- if not self.flow_level:
- if self.add_indent(self.column):
- mark = self.get_mark()
- self.tokens.append(BlockMappingStartToken(mark, mark))
-
- # Simple keys are allowed after ':' in the block context.
- self.allow_simple_key = not self.flow_level
-
- # Reset possible simple key on the current level.
- self.remove_possible_simple_key()
-
- # Add VALUE.
- start_mark = self.get_mark()
- self.forward()
- end_mark = self.get_mark()
- self.tokens.append(ValueToken(start_mark, end_mark))
-
- def fetch_alias(self):
-
- # ALIAS could be a simple key.
- self.save_possible_simple_key()
-
- # No simple keys after ALIAS.
- self.allow_simple_key = False
-
- # Scan and add ALIAS.
- self.tokens.append(self.scan_anchor(AliasToken))
-
- def fetch_anchor(self):
-
- # ANCHOR could start a simple key.
- self.save_possible_simple_key()
-
- # No simple keys after ANCHOR.
- self.allow_simple_key = False
-
- # Scan and add ANCHOR.
- self.tokens.append(self.scan_anchor(AnchorToken))
-
- def fetch_tag(self):
-
- # TAG could start a simple key.
- self.save_possible_simple_key()
-
- # No simple keys after TAG.
- self.allow_simple_key = False
-
- # Scan and add TAG.
- self.tokens.append(self.scan_tag())
-
- def fetch_literal(self):
- self.fetch_block_scalar(style='|')
-
- def fetch_folded(self):
- self.fetch_block_scalar(style='>')
-
- def fetch_block_scalar(self, style):
-
- # A simple key may follow a block scalar.
- self.allow_simple_key = True
-
- # Reset possible simple key on the current level.
- self.remove_possible_simple_key()
-
- # Scan and add SCALAR.
- self.tokens.append(self.scan_block_scalar(style))
-
- def fetch_single(self):
- self.fetch_flow_scalar(style='\'')
-
- def fetch_double(self):
- self.fetch_flow_scalar(style='"')
-
- def fetch_flow_scalar(self, style):
-
- # A flow scalar could be a simple key.
- self.save_possible_simple_key()
-
- # No simple keys after flow scalars.
- self.allow_simple_key = False
-
- # Scan and add SCALAR.
- self.tokens.append(self.scan_flow_scalar(style))
-
- def fetch_plain(self):
-
- # A plain scalar could be a simple key.
- self.save_possible_simple_key()
-
- # No simple keys after plain scalars. But note that `scan_plain` will
- # change this flag if the scan is finished at the beginning of the
- # line.
- self.allow_simple_key = False
-
- # Scan and add SCALAR. May change `allow_simple_key`.
- self.tokens.append(self.scan_plain())
-
- # Checkers.
-
- def check_directive(self):
-
- # DIRECTIVE: ^ '%' ...
- # The '%' indicator is already checked.
- if self.column == 0:
- return True
-
- def check_document_start(self):
-
- # DOCUMENT-START: ^ '---' (' '|'\n')
- if self.column == 0:
- if self.prefix(3) == u'---' \
- and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
- return True
-
- def check_document_end(self):
-
- # DOCUMENT-END: ^ '...' (' '|'\n')
- if self.column == 0:
- if self.prefix(3) == u'...' \
- and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
- return True
-
- def check_block_entry(self):
-
- # BLOCK-ENTRY: '-' (' '|'\n')
- return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
-
- def check_key(self):
-
- # KEY(flow context): '?'
- if self.flow_level:
- return True
-
- # KEY(block context): '?' (' '|'\n')
- else:
- return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
-
- def check_value(self):
-
- # VALUE(flow context): ':'
- if self.flow_level:
- return True
-
- # VALUE(block context): ':' (' '|'\n')
- else:
- return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
-
- def check_plain(self):
-
- # A plain scalar may start with any non-space character except:
- # '-', '?', ':', ',', '[', ']', '{', '}',
- # '#', '&', '*', '!', '|', '>', '\'', '\"',
- # '%', '@', '`'.
- #
- # It may also start with
- # '-', '?', ':'
- # if it is followed by a non-space character.
- #
- # Note that we limit the last rule to the block context (except the
- # '-' character) because we want the flow context to be space
- # independent.
- ch = self.peek()
- return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \
- or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029'
- and (ch == u'-' or (not self.flow_level and ch in u'?:')))
-
- # Scanners.
-
- def scan_to_next_token(self):
- # We ignore spaces, line breaks and comments.
- # If we find a line break in the block context, we set the flag
- # `allow_simple_key` on.
- # The byte order mark is stripped if it's the first character in the
- # stream. We do not yet support BOM inside the stream as the
- # specification requires. Any such mark will be considered as a part
- # of the document.
- #
- # TODO: We need to make tab handling rules more sane. A good rule is
- # Tabs cannot precede tokens
- # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
- # KEY(block), VALUE(block), BLOCK-ENTRY
- # So the checking code is
- # if <TAB>:
- # self.allow_simple_keys = False
- # We also need to add the check for `allow_simple_keys == True` to
- # `unwind_indent` before issuing BLOCK-END.
- # Scanners for block, flow, and plain scalars need to be modified.
-
- if self.index == 0 and self.peek() == u'\uFEFF':
- self.forward()
- found = False
- while not found:
- while self.peek() == u' ':
- self.forward()
- if self.peek() == u'#':
- while self.peek() not in u'\0\r\n\x85\u2028\u2029':
- self.forward()
- if self.scan_line_break():
- if not self.flow_level:
- self.allow_simple_key = True
- else:
- found = True
-
- def scan_directive(self):
- # See the specification for details.
- start_mark = self.get_mark()
- self.forward()
- name = self.scan_directive_name(start_mark)
- value = None
- if name == u'YAML':
- value = self.scan_yaml_directive_value(start_mark)
- end_mark = self.get_mark()
- elif name == u'TAG':
- value = self.scan_tag_directive_value(start_mark)
- end_mark = self.get_mark()
- else:
- end_mark = self.get_mark()
- while self.peek() not in u'\0\r\n\x85\u2028\u2029':
- self.forward()
- self.scan_directive_ignored_line(start_mark)
- return DirectiveToken(name, value, start_mark, end_mark)
-
- def scan_directive_name(self, start_mark):
- # See the specification for details.
- length = 0
- ch = self.peek(length)
- while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
- or ch in u'-_':
- length += 1
- ch = self.peek(length)
- if not length:
- raise ScannerError("while scanning a directive", start_mark,
- "expected alphabetic or numeric character, but found %r"
- % ch.encode('utf-8'), self.get_mark())
- value = self.prefix(length)
- self.forward(length)
- ch = self.peek()
- if ch not in u'\0 \r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a directive", start_mark,
- "expected alphabetic or numeric character, but found %r"
- % ch.encode('utf-8'), self.get_mark())
- return value
-
- def scan_yaml_directive_value(self, start_mark):
- # See the specification for details.
- while self.peek() == u' ':
- self.forward()
- major = self.scan_yaml_directive_number(start_mark)
- if self.peek() != '.':
- raise ScannerError("while scanning a directive", start_mark,
- "expected a digit or '.', but found %r"
- % self.peek().encode('utf-8'),
- self.get_mark())
- self.forward()
- minor = self.scan_yaml_directive_number(start_mark)
- if self.peek() not in u'\0 \r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a directive", start_mark,
- "expected a digit or ' ', but found %r"
- % self.peek().encode('utf-8'),
- self.get_mark())
- return (major, minor)
-
- def scan_yaml_directive_number(self, start_mark):
- # See the specification for details.
- ch = self.peek()
- if not (u'0' <= ch <= '9'):
- raise ScannerError("while scanning a directive", start_mark,
- "expected a digit, but found %r" % ch.encode('utf-8'),
- self.get_mark())
- length = 0
- while u'0' <= self.peek(length) <= u'9':
- length += 1
- value = int(self.prefix(length))
- self.forward(length)
- return value
-
- def scan_tag_directive_value(self, start_mark):
- # See the specification for details.
- while self.peek() == u' ':
- self.forward()
- handle = self.scan_tag_directive_handle(start_mark)
- while self.peek() == u' ':
- self.forward()
- prefix = self.scan_tag_directive_prefix(start_mark)
- return (handle, prefix)
-
- def scan_tag_directive_handle(self, start_mark):
- # See the specification for details.
- value = self.scan_tag_handle('directive', start_mark)
- ch = self.peek()
- if ch != u' ':
- raise ScannerError("while scanning a directive", start_mark,
- "expected ' ', but found %r" % ch.encode('utf-8'),
- self.get_mark())
- return value
-
- def scan_tag_directive_prefix(self, start_mark):
- # See the specification for details.
- value = self.scan_tag_uri('directive', start_mark)
- ch = self.peek()
- if ch not in u'\0 \r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a directive", start_mark,
- "expected ' ', but found %r" % ch.encode('utf-8'),
- self.get_mark())
- return value
-
- def scan_directive_ignored_line(self, start_mark):
- # See the specification for details.
- while self.peek() == u' ':
- self.forward()
- if self.peek() == u'#':
- while self.peek() not in u'\0\r\n\x85\u2028\u2029':
- self.forward()
- ch = self.peek()
- if ch not in u'\0\r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a directive", start_mark,
- "expected a comment or a line break, but found %r"
- % ch.encode('utf-8'), self.get_mark())
- self.scan_line_break()
-
- def scan_anchor(self, TokenClass):
- # The specification does not restrict characters for anchors and
- # aliases. This may lead to problems, for instance, the document:
- # [ *alias, value ]
- # can be interpteted in two ways, as
- # [ "value" ]
- # and
- # [ *alias , "value" ]
- # Therefore we restrict aliases to numbers and ASCII letters.
- start_mark = self.get_mark()
- indicator = self.peek()
- if indicator == '*':
- name = 'alias'
- else:
- name = 'anchor'
- self.forward()
- length = 0
- ch = self.peek(length)
- while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
- or ch in u'-_':
- length += 1
- ch = self.peek(length)
- if not length:
- raise ScannerError("while scanning an %s" % name, start_mark,
- "expected alphabetic or numeric character, but found %r"
- % ch.encode('utf-8'), self.get_mark())
- value = self.prefix(length)
- self.forward(length)
- ch = self.peek()
- if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
- raise ScannerError("while scanning an %s" % name, start_mark,
- "expected alphabetic or numeric character, but found %r"
- % ch.encode('utf-8'), self.get_mark())
- end_mark = self.get_mark()
- return TokenClass(value, start_mark, end_mark)
-
- def scan_tag(self):
- # See the specification for details.
- start_mark = self.get_mark()
- ch = self.peek(1)
- if ch == u'<':
- handle = None
- self.forward(2)
- suffix = self.scan_tag_uri('tag', start_mark)
- if self.peek() != u'>':
- raise ScannerError("while parsing a tag", start_mark,
- "expected '>', but found %r" % self.peek().encode('utf-8'),
- self.get_mark())
- self.forward()
- elif ch in u'\0 \t\r\n\x85\u2028\u2029':
- handle = None
- suffix = u'!'
- self.forward()
- else:
- length = 1
- use_handle = False
- while ch not in u'\0 \r\n\x85\u2028\u2029':
- if ch == u'!':
- use_handle = True
- break
- length += 1
- ch = self.peek(length)
- handle = u'!'
- if use_handle:
- handle = self.scan_tag_handle('tag', start_mark)
- else:
- handle = u'!'
- self.forward()
- suffix = self.scan_tag_uri('tag', start_mark)
- ch = self.peek()
- if ch not in u'\0 \r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a tag", start_mark,
- "expected ' ', but found %r" % ch.encode('utf-8'),
- self.get_mark())
- value = (handle, suffix)
- end_mark = self.get_mark()
- return TagToken(value, start_mark, end_mark)
-
- def scan_block_scalar(self, style):
- # See the specification for details.
-
- if style == '>':
- folded = True
- else:
- folded = False
-
- chunks = []
- start_mark = self.get_mark()
-
- # Scan the header.
- self.forward()
- chomping, increment = self.scan_block_scalar_indicators(start_mark)
- self.scan_block_scalar_ignored_line(start_mark)
-
- # Determine the indentation level and go to the first non-empty line.
- min_indent = self.indent+1
- if min_indent < 1:
- min_indent = 1
- if increment is None:
- breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
- indent = max(min_indent, max_indent)
- else:
- indent = min_indent+increment-1
- breaks, end_mark = self.scan_block_scalar_breaks(indent)
- line_break = u''
-
- # Scan the inner part of the block scalar.
- while self.column == indent and self.peek() != u'\0':
- chunks.extend(breaks)
- leading_non_space = self.peek() not in u' \t'
- length = 0
- while self.peek(length) not in u'\0\r\n\x85\u2028\u2029':
- length += 1
- chunks.append(self.prefix(length))
- self.forward(length)
- line_break = self.scan_line_break()
- breaks, end_mark = self.scan_block_scalar_breaks(indent)
- if self.column == indent and self.peek() != u'\0':
-
- # Unfortunately, folding rules are ambiguous.
- #
- # This is the folding according to the specification:
-
- if folded and line_break == u'\n' \
- and leading_non_space and self.peek() not in u' \t':
- if not breaks:
- chunks.append(u' ')
- else:
- chunks.append(line_break)
-
- # This is Clark Evans's interpretation (also in the spec
- # examples):
- #
- #if folded and line_break == u'\n':
- # if not breaks:
- # if self.peek() not in ' \t':
- # chunks.append(u' ')
- # else:
- # chunks.append(line_break)
- #else:
- # chunks.append(line_break)
- else:
- break
-
- # Chomp the tail.
- if chomping is not False:
- chunks.append(line_break)
- if chomping is True:
- chunks.extend(breaks)
-
- # We are done.
- return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
- style)
-
- def scan_block_scalar_indicators(self, start_mark):
- # See the specification for details.
- chomping = None
- increment = None
- ch = self.peek()
- if ch in u'+-':
- if ch == '+':
- chomping = True
- else:
- chomping = False
- self.forward()
- ch = self.peek()
- if ch in u'0123456789':
- increment = int(ch)
- if increment == 0:
- raise ScannerError("while scanning a block scalar", start_mark,
- "expected indentation indicator in the range 1-9, but found 0",
- self.get_mark())
- self.forward()
- elif ch in u'0123456789':
- increment = int(ch)
- if increment == 0:
- raise ScannerError("while scanning a block scalar", start_mark,
- "expected indentation indicator in the range 1-9, but found 0",
- self.get_mark())
- self.forward()
- ch = self.peek()
- if ch in u'+-':
- if ch == '+':
- chomping = True
- else:
- chomping = False
- self.forward()
- ch = self.peek()
- if ch not in u'\0 \r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a block scalar", start_mark,
- "expected chomping or indentation indicators, but found %r"
- % ch.encode('utf-8'), self.get_mark())
- return chomping, increment
-
- def scan_block_scalar_ignored_line(self, start_mark):
- # See the specification for details.
- while self.peek() == u' ':
- self.forward()
- if self.peek() == u'#':
- while self.peek() not in u'\0\r\n\x85\u2028\u2029':
- self.forward()
- ch = self.peek()
- if ch not in u'\0\r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a block scalar", start_mark,
- "expected a comment or a line break, but found %r"
- % ch.encode('utf-8'), self.get_mark())
- self.scan_line_break()
-
- def scan_block_scalar_indentation(self):
- # See the specification for details.
- chunks = []
- max_indent = 0
- end_mark = self.get_mark()
- while self.peek() in u' \r\n\x85\u2028\u2029':
- if self.peek() != u' ':
- chunks.append(self.scan_line_break())
- end_mark = self.get_mark()
- else:
- self.forward()
- if self.column > max_indent:
- max_indent = self.column
- return chunks, max_indent, end_mark
-
- def scan_block_scalar_breaks(self, indent):
- # See the specification for details.
- chunks = []
- end_mark = self.get_mark()
- while self.column < indent and self.peek() == u' ':
- self.forward()
- while self.peek() in u'\r\n\x85\u2028\u2029':
- chunks.append(self.scan_line_break())
- end_mark = self.get_mark()
- while self.column < indent and self.peek() == u' ':
- self.forward()
- return chunks, end_mark
-
- def scan_flow_scalar(self, style):
- # See the specification for details.
- # Note that we loose indentation rules for quoted scalars. Quoted
- # scalars don't need to adhere indentation because " and ' clearly
- # mark the beginning and the end of them. Therefore we are less
- # restrictive then the specification requires. We only need to check
- # that document separators are not included in scalars.
- if style == '"':
- double = True
- else:
- double = False
- chunks = []
- start_mark = self.get_mark()
- quote = self.peek()
- self.forward()
- chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
- while self.peek() != quote:
- chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
- chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
- self.forward()
- end_mark = self.get_mark()
- return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
- style)
-
- ESCAPE_REPLACEMENTS = {
- u'0': u'\0',
- u'a': u'\x07',
- u'b': u'\x08',
- u't': u'\x09',
- u'\t': u'\x09',
- u'n': u'\x0A',
- u'v': u'\x0B',
- u'f': u'\x0C',
- u'r': u'\x0D',
- u'e': u'\x1B',
- u' ': u'\x20',
- u'\"': u'\"',
- u'\\': u'\\',
- u'N': u'\x85',
- u'_': u'\xA0',
- u'L': u'\u2028',
- u'P': u'\u2029',
- }
-
- ESCAPE_CODES = {
- u'x': 2,
- u'u': 4,
- u'U': 8,
- }
-
- def scan_flow_scalar_non_spaces(self, double, start_mark):
- # See the specification for details.
- chunks = []
- while True:
- length = 0
- while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':
- length += 1
- if length:
- chunks.append(self.prefix(length))
- self.forward(length)
- ch = self.peek()
- if not double and ch == u'\'' and self.peek(1) == u'\'':
- chunks.append(u'\'')
- self.forward(2)
- elif (double and ch == u'\'') or (not double and ch in u'\"\\'):
- chunks.append(ch)
- self.forward()
- elif double and ch == u'\\':
- self.forward()
- ch = self.peek()
- if ch in self.ESCAPE_REPLACEMENTS:
- chunks.append(self.ESCAPE_REPLACEMENTS[ch])
- self.forward()
- elif ch in self.ESCAPE_CODES:
- length = self.ESCAPE_CODES[ch]
- self.forward()
- for k in range(length):
- if self.peek(k) not in u'0123456789ABCDEFabcdef':
- raise ScannerError("while scanning a double-quoted scalar", start_mark,
- "expected escape sequence of %d hexdecimal numbers, but found %r" %
- (length, self.peek(k).encode('utf-8')), self.get_mark())
- code = int(self.prefix(length), 16)
- chunks.append(unichr(code))
- self.forward(length)
- elif ch in u'\r\n\x85\u2028\u2029':
- self.scan_line_break()
- chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
- else:
- raise ScannerError("while scanning a double-quoted scalar", start_mark,
- "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark())
- else:
- return chunks
-
- def scan_flow_scalar_spaces(self, double, start_mark):
- # See the specification for details.
- chunks = []
- length = 0
- while self.peek(length) in u' \t':
- length += 1
- whitespaces = self.prefix(length)
- self.forward(length)
- ch = self.peek()
- if ch == u'\0':
- raise ScannerError("while scanning a quoted scalar", start_mark,
- "found unexpected end of stream", self.get_mark())
- elif ch in u'\r\n\x85\u2028\u2029':
- line_break = self.scan_line_break()
- breaks = self.scan_flow_scalar_breaks(double, start_mark)
- if line_break != u'\n':
- chunks.append(line_break)
- elif not breaks:
- chunks.append(u' ')
- chunks.extend(breaks)
- else:
- chunks.append(whitespaces)
- return chunks
-
- def scan_flow_scalar_breaks(self, double, start_mark):
- # See the specification for details.
- chunks = []
- while True:
- # Instead of checking indentation, we check for document
- # separators.
- prefix = self.prefix(3)
- if (prefix == u'---' or prefix == u'...') \
- and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a quoted scalar", start_mark,
- "found unexpected document separator", self.get_mark())
- while self.peek() in u' \t':
- self.forward()
- if self.peek() in u'\r\n\x85\u2028\u2029':
- chunks.append(self.scan_line_break())
- else:
- return chunks
-
- def scan_plain(self):
- # See the specification for details.
- # We add an additional restriction for the flow context:
- # plain scalars in the flow context cannot contain ',', ':' and '?'.
- # We also keep track of the `allow_simple_key` flag here.
- # Indentation rules are loosed for the flow context.
- chunks = []
- start_mark = self.get_mark()
- end_mark = start_mark
- indent = self.indent+1
- # We allow zero indentation for scalars, but then we need to check for
- # document separators at the beginning of the line.
- #if indent == 0:
- # indent = 1
- spaces = []
- while True:
- length = 0
- if self.peek() == u'#':
- break
- while True:
- ch = self.peek(length)
- if ch in u'\0 \t\r\n\x85\u2028\u2029' \
- or (not self.flow_level and ch == u':' and
- self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \
- or (self.flow_level and ch in u',:?[]{}'):
- break
- length += 1
- # It's not clear what we should do with ':' in the flow context.
- if (self.flow_level and ch == u':'
- and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'):
- self.forward(length)
- raise ScannerError("while scanning a plain scalar", start_mark,
- "found unexpected ':'", self.get_mark(),
- "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
- if length == 0:
- break
- self.allow_simple_key = False
- chunks.extend(spaces)
- chunks.append(self.prefix(length))
- self.forward(length)
- end_mark = self.get_mark()
- spaces = self.scan_plain_spaces(indent, start_mark)
- if not spaces or self.peek() == u'#' \
- or (not self.flow_level and self.column < indent):
- break
- return ScalarToken(u''.join(chunks), True, start_mark, end_mark)
-
- def scan_plain_spaces(self, indent, start_mark):
- # See the specification for details.
- # The specification is really confusing about tabs in plain scalars.
- # We just forbid them completely. Do not use tabs in YAML!
- chunks = []
- length = 0
- while self.peek(length) in u' ':
- length += 1
- whitespaces = self.prefix(length)
- self.forward(length)
- ch = self.peek()
- if ch in u'\r\n\x85\u2028\u2029':
- line_break = self.scan_line_break()
- self.allow_simple_key = True
- prefix = self.prefix(3)
- if (prefix == u'---' or prefix == u'...') \
- and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
- return
- breaks = []
- while self.peek() in u' \r\n\x85\u2028\u2029':
- if self.peek() == ' ':
- self.forward()
- else:
- breaks.append(self.scan_line_break())
- prefix = self.prefix(3)
- if (prefix == u'---' or prefix == u'...') \
- and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
- return
- if line_break != u'\n':
- chunks.append(line_break)
- elif not breaks:
- chunks.append(u' ')
- chunks.extend(breaks)
- elif whitespaces:
- chunks.append(whitespaces)
- return chunks
-
- def scan_tag_handle(self, name, start_mark):
- # See the specification for details.
- # For some strange reasons, the specification does not allow '_' in
- # tag handles. I have allowed it anyway.
- ch = self.peek()
- if ch != u'!':
- raise ScannerError("while scanning a %s" % name, start_mark,
- "expected '!', but found %r" % ch.encode('utf-8'),
- self.get_mark())
- length = 1
- ch = self.peek(length)
- if ch != u' ':
- while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
- or ch in u'-_':
- length += 1
- ch = self.peek(length)
- if ch != u'!':
- self.forward(length)
- raise ScannerError("while scanning a %s" % name, start_mark,
- "expected '!', but found %r" % ch.encode('utf-8'),
- self.get_mark())
- length += 1
- value = self.prefix(length)
- self.forward(length)
- return value
-
- def scan_tag_uri(self, name, start_mark):
- # See the specification for details.
- # Note: we do not check if URI is well-formed.
- chunks = []
- length = 0
- ch = self.peek(length)
- while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
- or ch in u'-;/?:@&=+$,_.!~*\'()[]%':
- if ch == u'%':
- chunks.append(self.prefix(length))
- self.forward(length)
- length = 0
- chunks.append(self.scan_uri_escapes(name, start_mark))
- else:
- length += 1
- ch = self.peek(length)
- if length:
- chunks.append(self.prefix(length))
- self.forward(length)
- length = 0
- if not chunks:
- raise ScannerError("while parsing a %s" % name, start_mark,
- "expected URI, but found %r" % ch.encode('utf-8'),
- self.get_mark())
- return u''.join(chunks)
-
- def scan_uri_escapes(self, name, start_mark):
- # See the specification for details.
- bytes = []
- mark = self.get_mark()
- while self.peek() == u'%':
- self.forward()
- for k in range(2):
- if self.peek(k) not in u'0123456789ABCDEFabcdef':
- raise ScannerError("while scanning a %s" % name, start_mark,
- "expected URI escape sequence of 2 hexdecimal numbers, but found %r" %
- (self.peek(k).encode('utf-8')), self.get_mark())
- bytes.append(chr(int(self.prefix(2), 16)))
- self.forward(2)
- try:
- value = unicode(''.join(bytes), 'utf-8')
- except UnicodeDecodeError, exc:
- raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
- return value
-
- def scan_line_break(self):
- # Transforms:
- # '\r\n' : '\n'
- # '\r' : '\n'
- # '\n' : '\n'
- # '\x85' : '\n'
- # '\u2028' : '\u2028'
- # '\u2029 : '\u2029'
- # default : ''
- ch = self.peek()
- if ch in u'\r\n\x85':
- if self.prefix(2) == u'\r\n':
- self.forward(2)
- else:
- self.forward()
- return u'\n'
- elif ch in u'\u2028\u2029':
- self.forward()
- return ch
- return u''
-
-#try:
-# import psyco
-# psyco.bind(Scanner)
-#except ImportError:
-# pass
-
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/ext/yaml/serializer.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/serializer.py b/tools/bin/ext/yaml/serializer.py
deleted file mode 100644
index eb70372..0000000
--- a/tools/bin/ext/yaml/serializer.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-__all__ = ['Serializer', 'SerializerError']
-
-from error import YAMLError
-from events import *
-from nodes import *
-
-class SerializerError(YAMLError):
- pass
-
-class Serializer(object):
-
- ANCHOR_TEMPLATE = u'id%03d'
-
- def __init__(self, encoding=None,
- explicit_start=None, explicit_end=None, version=None, tags=None):
- self.use_encoding = encoding
- self.use_explicit_start = explicit_start
- self.use_explicit_end = explicit_end
- self.use_version = version
- self.use_tags = tags
- self.serialized_nodes = {}
- self.anchors = {}
- self.last_anchor_id = 0
- self.closed = None
-
- def open(self):
- if self.closed is None:
- self.emit(StreamStartEvent(encoding=self.use_encoding))
- self.closed = False
- elif self.closed:
- raise SerializerError("serializer is closed")
- else:
- raise SerializerError("serializer is already opened")
-
- def close(self):
- if self.closed is None:
- raise SerializerError("serializer is not opened")
- elif not self.closed:
- self.emit(StreamEndEvent())
- self.closed = True
-
- #def __del__(self):
- # self.close()
-
- def serialize(self, node):
- if self.closed is None:
- raise SerializerError("serializer is not opened")
- elif self.closed:
- raise SerializerError("serializer is closed")
- self.emit(DocumentStartEvent(explicit=self.use_explicit_start,
- version=self.use_version, tags=self.use_tags))
- self.anchor_node(node)
- self.serialize_node(node, None, None)
- self.emit(DocumentEndEvent(explicit=self.use_explicit_end))
- self.serialized_nodes = {}
- self.anchors = {}
- self.last_alias_id = 0
-
- def anchor_node(self, node):
- if node in self.anchors:
- if self.anchors[node] is None:
- self.anchors[node] = self.generate_anchor(node)
- else:
- self.anchors[node] = None
- if isinstance(node, SequenceNode):
- for item in node.value:
- self.anchor_node(item)
- elif isinstance(node, MappingNode):
- for key, value in node.value:
- self.anchor_node(key)
- self.anchor_node(value)
-
- def generate_anchor(self, node):
- self.last_anchor_id += 1
- return self.ANCHOR_TEMPLATE % self.last_anchor_id
-
- def serialize_node(self, node, parent, index):
- alias = self.anchors[node]
- if node in self.serialized_nodes:
- self.emit(AliasEvent(alias))
- else:
- self.serialized_nodes[node] = True
- self.descend_resolver(parent, index)
- if isinstance(node, ScalarNode):
- detected_tag = self.resolve(ScalarNode, node.value, (True, False))
- default_tag = self.resolve(ScalarNode, node.value, (False, True))
- implicit = (node.tag == detected_tag), (node.tag == default_tag)
- self.emit(ScalarEvent(alias, node.tag, implicit, node.value,
- style=node.style))
- elif isinstance(node, SequenceNode):
- implicit = (node.tag
- == self.resolve(SequenceNode, node.value, True))
- self.emit(SequenceStartEvent(alias, node.tag, implicit,
- flow_style=node.flow_style))
- index = 0
- for item in node.value:
- self.serialize_node(item, node, index)
- index += 1
- self.emit(SequenceEndEvent())
- elif isinstance(node, MappingNode):
- implicit = (node.tag
- == self.resolve(MappingNode, node.value, True))
- self.emit(MappingStartEvent(alias, node.tag, implicit,
- flow_style=node.flow_style))
- for key, value in node.value:
- self.serialize_node(key, node, None)
- self.serialize_node(value, node, key)
- self.emit(MappingEndEvent())
- self.ascend_resolver()
-
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/ext/yaml/tokens.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/yaml/tokens.py b/tools/bin/ext/yaml/tokens.py
deleted file mode 100644
index 41ee0fb..0000000
--- a/tools/bin/ext/yaml/tokens.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class Token(object):
- def __init__(self, start_mark, end_mark):
- self.start_mark = start_mark
- self.end_mark = end_mark
- def __repr__(self):
- attributes = [key for key in self.__dict__
- if not key.endswith('_mark')]
- attributes.sort()
- arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
- for key in attributes])
- return '%s(%s)' % (self.__class__.__name__, arguments)
-
-#class BOMToken(Token):
-# id = '<byte order mark>'
-
-class DirectiveToken(Token):
- id = '<directive>'
- def __init__(self, name, value, start_mark, end_mark):
- self.name = name
- self.value = value
- self.start_mark = start_mark
- self.end_mark = end_mark
-
-class DocumentStartToken(Token):
- id = '<document start>'
-
-class DocumentEndToken(Token):
- id = '<document end>'
-
-class StreamStartToken(Token):
- id = '<stream start>'
- def __init__(self, start_mark=None, end_mark=None,
- encoding=None):
- self.start_mark = start_mark
- self.end_mark = end_mark
- self.encoding = encoding
-
-class StreamEndToken(Token):
- id = '<stream end>'
-
-class BlockSequenceStartToken(Token):
- id = '<block sequence start>'
-
-class BlockMappingStartToken(Token):
- id = '<block mapping start>'
-
-class BlockEndToken(Token):
- id = '<block end>'
-
-class FlowSequenceStartToken(Token):
- id = '['
-
-class FlowMappingStartToken(Token):
- id = '{'
-
-class FlowSequenceEndToken(Token):
- id = ']'
-
-class FlowMappingEndToken(Token):
- id = '}'
-
-class KeyToken(Token):
- id = '?'
-
-class ValueToken(Token):
- id = ':'
-
-class BlockEntryToken(Token):
- id = '-'
-
-class FlowEntryToken(Token):
- id = ','
-
-class AliasToken(Token):
- id = '<alias>'
- def __init__(self, value, start_mark, end_mark):
- self.value = value
- self.start_mark = start_mark
- self.end_mark = end_mark
-
-class AnchorToken(Token):
- id = '<anchor>'
- def __init__(self, value, start_mark, end_mark):
- self.value = value
- self.start_mark = start_mark
- self.end_mark = end_mark
-
-class TagToken(Token):
- id = '<tag>'
- def __init__(self, value, start_mark, end_mark):
- self.value = value
- self.start_mark = start_mark
- self.end_mark = end_mark
-
-class ScalarToken(Token):
- id = '<scalar>'
- def __init__(self, value, plain, start_mark, end_mark, style=None):
- self.value = value
- self.plain = plain
- self.start_mark = start_mark
- self.end_mark = end_mark
- self.style = style
-
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/generate-greenplum-path.sh
----------------------------------------------------------------------
diff --git a/tools/bin/generate-greenplum-path.sh b/tools/bin/generate-greenplum-path.sh
index 92c458d..3fcd2fb 100755
--- a/tools/bin/generate-greenplum-path.sh
+++ b/tools/bin/generate-greenplum-path.sh
@@ -91,7 +91,7 @@ fi
#setup PYTHONPATH
cat <<EOF
-PYTHONPATH=\$GPHOME/lib/python:\$PYTHONPATH
+PYTHONPATH=\$GPHOME/lib/python:\$GPHOME/lib/python/pygresql:\$PYTHONPATH
EOF
# openssl configuration file path
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/gpcheck
----------------------------------------------------------------------
diff --git a/tools/bin/gpcheck b/tools/bin/gpcheck
index 5b7dcff..4125ed3 100755
--- a/tools/bin/gpcheck
+++ b/tools/bin/gpcheck
@@ -26,8 +26,8 @@ try:
from gppylib.commands.unix import getLocalHostname, getUserName, SYSTEM
from gppylib.commands.base import WorkerPool, Command, REMOTE
from gppylib.gpcheckutil import HostType, hosttype_str
- from pygresql.pgdb import DatabaseError
- from pygresql import pg
+ from pgdb import DatabaseError
+ import pg
except ImportError, e:
sys.exit('Cannot import modules. Please check that you have sourced greenplum_path.sh. Detail: ' + str(e))
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/gpextract
----------------------------------------------------------------------
diff --git a/tools/bin/gpextract b/tools/bin/gpextract
index 9a2e182..be08117 100644
--- a/tools/bin/gpextract
+++ b/tools/bin/gpextract
@@ -93,8 +93,8 @@ try:
from gppylib.db import dbconn
from gppylib.gplog import get_default_logger, setup_tool_logging
from gppylib.gpparseopts import OptParser, OptChecker
- from pygresql import pg
- from pygresql.pgdb import DatabaseError
+ import pg
+ from pgdb import DatabaseError
import yaml
except ImportError, e:
print e
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/gpload.py
----------------------------------------------------------------------
diff --git a/tools/bin/gpload.py b/tools/bin/gpload.py
index bde5d6c..454da7a 100755
--- a/tools/bin/gpload.py
+++ b/tools/bin/gpload.py
@@ -49,7 +49,7 @@ except ImportError:
sys.exit(2)
try:
- from pygresql import pg
+ import pg
except Exception, e:
errorMsg = "gpload was unable to import The PyGreSQL Python module (pg.py) - %s\n" % str(e)
sys.stderr.write(str(errorMsg))
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/gppylib/commands/base.py
----------------------------------------------------------------------
diff --git a/tools/bin/gppylib/commands/base.py b/tools/bin/gppylib/commands/base.py
index 801d85f..132c108 100755
--- a/tools/bin/gppylib/commands/base.py
+++ b/tools/bin/gppylib/commands/base.py
@@ -42,7 +42,7 @@ import time
from gppylib import gplog
from gppylib import gpsubprocess
-from pygresql.pg import DB
+#from pg import DB
# paramiko prints deprecation warnings which are ugly to the end-user
import warnings
@@ -697,11 +697,19 @@ class Command:
def run(self,validateAfter=False):
faultPoint = os.getenv('GP_COMMAND_FAULT_POINT')
+ print "### DEBUG: ENV[GP_COMMAND_FAULT_POINT] = %s" % (faultPoint if faultPoint else "None")
+ print "### DEBUG: self.name = %s" % ("SomeName" if self.name else "None")
if not faultPoint or (self.name and not self.name.startswith(faultPoint)):
+ print "### DEBUG: EXECUTE name = %s" % ("SomeName" if self.name else "None")
+ print "### DEBUG: EXECUTE cmdStr = %s" % ("SomeCmdStr" if self.cmdStr else "None")
+ print "### DEBUG: EXECUTE context = %s" % ("SomeExecContext" if self.exec_context else "None")
+ print "### DEBUG: EXECUTE remoteHost = %s" % ("SomeRemoteHost" if self.remoteHost else "None")
self.exec_context.execute(self)
else:
# simulate error
+ print "### DEBUG: CommandResult"
self.results = CommandResult(1,'Fault Injection','Fault Injection' ,False,True)
+ print self.results
if validateAfter:
self.validate()
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/gppylib/db/dbconn.py
----------------------------------------------------------------------
diff --git a/tools/bin/gppylib/db/dbconn.py b/tools/bin/gppylib/db/dbconn.py
index 6a474ce..0d2209a 100644
--- a/tools/bin/gppylib/db/dbconn.py
+++ b/tools/bin/gppylib/db/dbconn.py
@@ -24,7 +24,7 @@ import os
import stat
try:
- from pygresql import pgdb
+ import pgdb
from gppylib.commands.unix import UserId
except ImportError, e:
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/gppylib/operations/gpMigratorUtil.py
----------------------------------------------------------------------
diff --git a/tools/bin/gppylib/operations/gpMigratorUtil.py b/tools/bin/gppylib/operations/gpMigratorUtil.py
index a6c8c36..73c5637 100644
--- a/tools/bin/gppylib/operations/gpMigratorUtil.py
+++ b/tools/bin/gppylib/operations/gpMigratorUtil.py
@@ -27,7 +27,7 @@ from threading import Thread
from Queue import Queue
import urllib # for passing strings across gpssh
import shutil # for copying
-from pygresql import pg # Database interaction
+import pg # Database interaction
from gppylib.gplog import * # Greenplum logging facility
from gppylib.commands import base # Greenplum layer for worker pools
from gppylib.commands import unix # Greenplum layer for unix interaction
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/gppylib/operations/test_utils_helper.py
----------------------------------------------------------------------
diff --git a/tools/bin/gppylib/operations/test_utils_helper.py b/tools/bin/gppylib/operations/test_utils_helper.py
index 51be194..3cefe97 100755
--- a/tools/bin/gppylib/operations/test_utils_helper.py
+++ b/tools/bin/gppylib/operations/test_utils_helper.py
@@ -58,5 +58,5 @@ class ExceptionWithArgsUnsafe(Exception):
class RaiseOperation_Unpicklable(Operation):
def execute(self):
- from pygresql import pg
+ import pg
raise pg.DatabaseError()
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/gppylib/test/regress/test_regress_pygresql.py
----------------------------------------------------------------------
diff --git a/tools/bin/gppylib/test/regress/test_regress_pygresql.py b/tools/bin/gppylib/test/regress/test_regress_pygresql.py
index e086265..5b5a8b0 100755
--- a/tools/bin/gppylib/test/regress/test_regress_pygresql.py
+++ b/tools/bin/gppylib/test/regress/test_regress_pygresql.py
@@ -22,8 +22,8 @@ import logging
import unittest2 as unittest
-from pygresql import pg
-from pygresql import pgdb
+import pg
+import pgdb
from gppylib import gplog
from gppylib.db.dbconn import *
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/hawq_ctl
----------------------------------------------------------------------
diff --git a/tools/bin/hawq_ctl b/tools/bin/hawq_ctl
index 036f8ea..375c7fa 100755
--- a/tools/bin/hawq_ctl
+++ b/tools/bin/hawq_ctl
@@ -38,7 +38,7 @@ try:
check_syncmaster_running, create_cluster_directory
from hawqpylib.HAWQ_HELP import *
from gppylib.db import dbconn
- from pygresql.pg import DatabaseError
+ from pg import DatabaseError
except ImportError, e:
sys.exit('ERROR: Cannot import modules. Please check that you '
'have sourced greenplum_path.sh. Detail: ' + str(e))
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/hawqconfig
----------------------------------------------------------------------
diff --git a/tools/bin/hawqconfig b/tools/bin/hawqconfig
index f425972..9bbdf5d 100755
--- a/tools/bin/hawqconfig
+++ b/tools/bin/hawqconfig
@@ -20,7 +20,7 @@ import os, sys, re
from optparse import Option, OptionParser
from hawqpylib.hawqlib import HawqXMLParser, parse_hosts_file, remove_property_xml
from gppylib.db import dbconn
-from pygresql.pg import DatabaseError
+from pg import DatabaseError
def parseargs():
parser = OptionParser(usage="HAWQ config options.")
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/hawqextract
----------------------------------------------------------------------
diff --git a/tools/bin/hawqextract b/tools/bin/hawqextract
index 9a2e182..be08117 100755
--- a/tools/bin/hawqextract
+++ b/tools/bin/hawqextract
@@ -93,8 +93,8 @@ try:
from gppylib.db import dbconn
from gppylib.gplog import get_default_logger, setup_tool_logging
from gppylib.gpparseopts import OptParser, OptChecker
- from pygresql import pg
- from pygresql.pgdb import DatabaseError
+ import pg
+ from pgdb import DatabaseError
import yaml
except ImportError, e:
print e
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/hawqfilespace
----------------------------------------------------------------------
diff --git a/tools/bin/hawqfilespace b/tools/bin/hawqfilespace
index eba7c5b..ab9dc9c 100755
--- a/tools/bin/hawqfilespace
+++ b/tools/bin/hawqfilespace
@@ -63,7 +63,7 @@ try:
from gppylib.commands import unix
from gppylib.db import dbconn
from gppylib.db import catalog
- from pygresql import pg # Database interaction
+ import pg # Database interaction
from gppylib.gplog import * # Greenplum logging facility
from getpass import getpass
from gppylib.parseutils import line_reader, parse_fspacename, parse_dfs_url, parse_fspacesys, parse_fspacereplica, parse_gpfilespace_line, \
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/hawqstate
----------------------------------------------------------------------
diff --git a/tools/bin/hawqstate b/tools/bin/hawqstate
index 3344f20..feffb89 100755
--- a/tools/bin/hawqstate
+++ b/tools/bin/hawqstate
@@ -21,7 +21,7 @@ import sys
from optparse import Option, OptionParser
from hawqpylib.hawqlib import HawqXMLParser, parse_hosts_file, check_file_exist_list
from gppylib.db import dbconn
-from pygresql.pg import DatabaseError
+from pg import DatabaseError
from gppylib.gplog import get_default_logger, setup_hawq_tool_logging, quiet_stdout_logging, enable_verbose_logging
from gppylib.commands.unix import getLocalHostname, getUserName
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/lib/gpcheckcat
----------------------------------------------------------------------
diff --git a/tools/bin/lib/gpcheckcat b/tools/bin/lib/gpcheckcat
index a7c2f16..186f9c5 100755
--- a/tools/bin/lib/gpcheckcat
+++ b/tools/bin/lib/gpcheckcat
@@ -46,8 +46,8 @@ try:
from gppylib.gplog import *
from gppylib.gpcatalog import *
from gppylib.commands.unix import *
- from pygresql.pgdb import DatabaseError
- from pygresql import pg
+ from pgdb import DatabaseError
+ import pg
except ImportError, e:
sys.exit('Error: unable to import module: ' + str(e))
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/pythonSrc/.gitignore
----------------------------------------------------------------------
diff --git a/tools/bin/pythonSrc/.gitignore b/tools/bin/pythonSrc/.gitignore
deleted file mode 100644
index df40d44..0000000
--- a/tools/bin/pythonSrc/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-lockfile-0.9.1
-paramiko-1.7.6-9
-PSI-0.3b2_gp
-pychecker-0.8.18
-pycrypto-2.0.1
-PyGreSQL-4.0
-unittest2-0.5.1
-._paramiko-1.7.6-9
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/pythonSrc/PSI-0.3b2_gp.tar.gz
----------------------------------------------------------------------
diff --git a/tools/bin/pythonSrc/PSI-0.3b2_gp.tar.gz b/tools/bin/pythonSrc/PSI-0.3b2_gp.tar.gz
deleted file mode 100644
index 303d225..0000000
Binary files a/tools/bin/pythonSrc/PSI-0.3b2_gp.tar.gz and /dev/null differ
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.html
----------------------------------------------------------------------
diff --git a/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.html b/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.html
deleted file mode 100644
index 2683a15..0000000
--- a/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.html
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Docutils 0.4: http://docutils.sourceforge.net/" />
-<title>PyGreSQL Announcements</title>
-<link rel="stylesheet" href="docs.css" type="text/css" />
-</head>
-<body>
-<div class="document" id="pygresql-announcements">
-<h1 class="title">PyGreSQL Announcements</h1>
-<h2 class="subtitle" id="release-of-pygresql-version-4-0">Release of PyGreSQL version 4.0</h2>
-<p>PyGreSQL v4.0 has been released.</p>
-<p>It is available at: <a class="reference" href="ftp://ftp.PyGreSQL.org/pub/distrib/PyGreSQL-4.0.tgz">ftp://ftp.PyGreSQL.org/pub/distrib/PyGreSQL-4.0.tgz</a>.</p>
-<p>If you are running NetBSD, look in the packages directory under databases.
-There is also a package in the FreeBSD ports collection.</p>
-<p>Please refer to <a class="reference" href="changelog.html">changelog.txt</a>
-for things that have changed in this version.</p>
-<p>Please refer to <a class="reference" href="readme.html">readme.txt</a>
-for general information.</p>
-<div class="line-block">
-<div class="line">D'Arcy J.M. Cain</div>
-<div class="line"><a class="reference" href="mailto:darcy@PyGreSQL.org">darcy@PyGreSQL.org</a></div>
-</div>
-</div>
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0672292f/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.txt
----------------------------------------------------------------------
diff --git a/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.txt b/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.txt
deleted file mode 100644
index fa08458..0000000
--- a/tools/bin/pythonSrc/PyGreSQL-4.0/docs/announce.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-======================
-PyGreSQL Announcements
-======================
-
--------------------------------
-Release of PyGreSQL version 4.0
--------------------------------
-
-PyGreSQL v4.0 has been released.
-
-It is available at: ftp://ftp.PyGreSQL.org/pub/distrib/PyGreSQL-4.0.tgz.
-
-If you are running NetBSD, look in the packages directory under databases.
-There is also a package in the FreeBSD ports collection.
-
-Please refer to `changelog.txt <changelog.html>`_
-for things that have changed in this version.
-
-Please refer to `readme.txt <readme.html>`_
-for general information.
-
-| D'Arcy J.M. Cain
-| darcy@PyGreSQL.org