You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2018/05/10 21:48:56 UTC
[4/8] impala git commit: IMPALA-6999: Upgrade to sqlparse-0.1.19 for
Impala shell
http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.19/sqlparse/filters.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/filters.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/filters.py
new file mode 100644
index 0000000..676344f
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/filters.py
@@ -0,0 +1,728 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from os.path import abspath, join
+
+from sqlparse import sql, tokens as T
+from sqlparse.engine import FilterStack
+from sqlparse.lexer import tokenize
+from sqlparse.pipeline import Pipeline
+from sqlparse.tokens import (Comment, Comparison, Keyword, Name, Punctuation,
+ String, Whitespace)
+from sqlparse.utils import memoize_generator
+from sqlparse.utils import split_unquoted_newlines
+
+
+# --------------------------
+# token process
+
+class _CaseFilter:
+
+ ttype = None
+
+ def __init__(self, case=None):
+ if case is None:
+ case = 'upper'
+ assert case in ['lower', 'upper', 'capitalize']
+ self.convert = getattr(unicode, case)
+
+ def process(self, stack, stream):
+ for ttype, value in stream:
+ if ttype in self.ttype:
+ value = self.convert(value)
+ yield ttype, value
+
+
+class KeywordCaseFilter(_CaseFilter):
+ ttype = T.Keyword
+
+
+class IdentifierCaseFilter(_CaseFilter):
+ ttype = (T.Name, T.String.Symbol)
+
+ def process(self, stack, stream):
+ for ttype, value in stream:
+ if ttype in self.ttype and not value.strip()[0] == '"':
+ value = self.convert(value)
+ yield ttype, value
+
+
+class TruncateStringFilter:
+
+ def __init__(self, width, char):
+ self.width = max(width, 1)
+ self.char = unicode(char)
+
+ def process(self, stack, stream):
+ for ttype, value in stream:
+ if ttype is T.Literal.String.Single:
+ if value[:2] == '\'\'':
+ inner = value[2:-2]
+ quote = u'\'\''
+ else:
+ inner = value[1:-1]
+ quote = u'\''
+ if len(inner) > self.width:
+ value = u''.join((quote, inner[:self.width], self.char,
+ quote))
+ yield ttype, value
+
+
+class GetComments:
+ """Get the comments from a stack"""
+ def process(self, stack, stream):
+ for token_type, value in stream:
+ if token_type in Comment:
+ yield token_type, value
+
+
+class StripComments:
+ """Strip the comments from a stack"""
+ def process(self, stack, stream):
+ for token_type, value in stream:
+ if token_type not in Comment:
+ yield token_type, value
+
+
+def StripWhitespace(stream):
+ "Strip the useless whitespaces from a stream leaving only the minimal ones"
+ last_type = None
+ has_space = False
+ ignore_group = frozenset((Comparison, Punctuation))
+
+ for token_type, value in stream:
+ # We got a previous token (not empty first ones)
+ if last_type:
+ if token_type in Whitespace:
+ has_space = True
+ continue
+
+ # Ignore first empty spaces and dot-commas
+ elif token_type in (Whitespace, Whitespace.Newline, ignore_group):
+ continue
+
+ # Yield a whitespace if it can't be ignored
+ if has_space:
+ if not ignore_group.intersection((last_type, token_type)):
+ yield Whitespace, ' '
+ has_space = False
+
+ # Yield the token and set its type for checking with the next one
+ yield token_type, value
+ last_type = token_type
+
+
+class IncludeStatement:
+ """Filter that enable a INCLUDE statement"""
+
+ def __init__(self, dirpath=".", maxrecursive=10, raiseexceptions=False):
+ if maxrecursive <= 0:
+ raise ValueError('Max recursion limit reached')
+
+ self.dirpath = abspath(dirpath)
+ self.maxRecursive = maxrecursive
+ self.raiseexceptions = raiseexceptions
+
+ self.detected = False
+
+ @memoize_generator
+ def process(self, stack, stream):
+ # Run over all tokens in the stream
+ for token_type, value in stream:
+ # INCLUDE statement found, set detected mode
+ if token_type in Name and value.upper() == 'INCLUDE':
+ self.detected = True
+ continue
+
+ # INCLUDE statement was found, parse it
+ elif self.detected:
+ # Omit whitespaces
+ if token_type in Whitespace:
+ continue
+
+ # Found file path to include
+ if token_type in String.Symbol:
+# if token_type in tokens.String.Symbol:
+
+ # Get path of file to include
+ path = join(self.dirpath, value[1:-1])
+
+ try:
+ f = open(path)
+ raw_sql = f.read()
+ f.close()
+
+ # There was a problem loading the include file
+ except IOError, err:
+ # Raise the exception to the interpreter
+ if self.raiseexceptions:
+ raise
+
+ # Put the exception as a comment on the SQL code
+ yield Comment, u'-- IOError: %s\n' % err
+
+ else:
+ # Create new FilterStack to parse readed file
+ # and add all its tokens to the main stack recursively
+ try:
+ filtr = IncludeStatement(self.dirpath,
+ self.maxRecursive - 1,
+ self.raiseexceptions)
+
+ # Max recursion limit reached
+ except ValueError, err:
+ # Raise the exception to the interpreter
+ if self.raiseexceptions:
+ raise
+
+ # Put the exception as a comment on the SQL code
+ yield Comment, u'-- ValueError: %s\n' % err
+
+ stack = FilterStack()
+ stack.preprocess.append(filtr)
+
+ for tv in stack.run(raw_sql):
+ yield tv
+
+ # Set normal mode
+ self.detected = False
+
+ # Don't include any token while in detected mode
+ continue
+
+ # Normal token
+ yield token_type, value
+
+
+# ----------------------
+# statement process
+
+class StripCommentsFilter:
+
+ def _get_next_comment(self, tlist):
+ # TODO(andi) Comment types should be unified, see related issue38
+ token = tlist.token_next_by_instance(0, sql.Comment)
+ if token is None:
+ token = tlist.token_next_by_type(0, T.Comment)
+ return token
+
+ def _process(self, tlist):
+ token = self._get_next_comment(tlist)
+ while token:
+ tidx = tlist.token_index(token)
+ prev = tlist.token_prev(tidx, False)
+ next_ = tlist.token_next(tidx, False)
+ # Replace by whitespace if prev and next exist and if they're not
+ # whitespaces. This doesn't apply if prev or next is a paranthesis.
+ if (prev is not None and next_ is not None
+ and not prev.is_whitespace() and not next_.is_whitespace()
+ and not (prev.match(T.Punctuation, '(')
+ or next_.match(T.Punctuation, ')'))):
+ tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')
+ else:
+ tlist.tokens.pop(tidx)
+ token = self._get_next_comment(tlist)
+
+ def process(self, stack, stmt):
+ [self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
+ self._process(stmt)
+
+
+class StripWhitespaceFilter:
+
+ def _stripws(self, tlist):
+ func_name = '_stripws_%s' % tlist.__class__.__name__.lower()
+ func = getattr(self, func_name, self._stripws_default)
+ func(tlist)
+
+ def _stripws_default(self, tlist):
+ last_was_ws = False
+ for token in tlist.tokens:
+ if token.is_whitespace():
+ if last_was_ws:
+ token.value = ''
+ else:
+ token.value = ' '
+ last_was_ws = token.is_whitespace()
+
+ def _stripws_identifierlist(self, tlist):
+ # Removes newlines before commas, see issue140
+ last_nl = None
+ for token in tlist.tokens[:]:
+ if (token.ttype is T.Punctuation
+ and token.value == ','
+ and last_nl is not None):
+ tlist.tokens.remove(last_nl)
+ if token.is_whitespace():
+ last_nl = token
+ else:
+ last_nl = None
+ return self._stripws_default(tlist)
+
+ def _stripws_parenthesis(self, tlist):
+ if tlist.tokens[1].is_whitespace():
+ tlist.tokens.pop(1)
+ if tlist.tokens[-2].is_whitespace():
+ tlist.tokens.pop(-2)
+ self._stripws_default(tlist)
+
+ def process(self, stack, stmt, depth=0):
+ [self.process(stack, sgroup, depth + 1)
+ for sgroup in stmt.get_sublists()]
+ self._stripws(stmt)
+ if (
+ depth == 0
+ and stmt.tokens
+ and stmt.tokens[-1].is_whitespace()
+ ):
+ stmt.tokens.pop(-1)
+
+
+class ReindentFilter:
+
+ def __init__(self, width=2, char=' ', line_width=None):
+ self.width = width
+ self.char = char
+ self.indent = 0
+ self.offset = 0
+ self.line_width = line_width
+ self._curr_stmt = None
+ self._last_stmt = None
+
+ def _flatten_up_to_token(self, token):
+ """Yields all tokens up to token plus the next one."""
+ # helper for _get_offset
+ iterator = self._curr_stmt.flatten()
+ for t in iterator:
+ yield t
+ if t == token:
+ raise StopIteration
+
+ def _get_offset(self, token):
+ raw = ''.join(map(unicode, self._flatten_up_to_token(token)))
+ line = raw.splitlines()[-1]
+ # Now take current offset into account and return relative offset.
+ full_offset = len(line) - len(self.char * (self.width * self.indent))
+ return full_offset - self.offset
+
+ def nl(self):
+ # TODO: newline character should be configurable
+ space = (self.char * ((self.indent * self.width) + self.offset))
+ # Detect runaway indenting due to parsing errors
+ if len(space) > 200:
+ # something seems to be wrong, flip back
+ self.indent = self.offset = 0
+ space = (self.char * ((self.indent * self.width) + self.offset))
+ ws = '\n' + space
+ return sql.Token(T.Whitespace, ws)
+
+ def _split_kwds(self, tlist):
+ split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR',
+ 'GROUP', 'ORDER', 'UNION', 'VALUES',
+ 'SET', 'BETWEEN', 'EXCEPT', 'HAVING')
+
+ def _next_token(i):
+ t = tlist.token_next_match(i, T.Keyword, split_words,
+ regex=True)
+ if t and t.value.upper() == 'BETWEEN':
+ t = _next_token(tlist.token_index(t) + 1)
+ if t and t.value.upper() == 'AND':
+ t = _next_token(tlist.token_index(t) + 1)
+ return t
+
+ idx = 0
+ token = _next_token(idx)
+ added = set()
+ while token:
+ prev = tlist.token_prev(tlist.token_index(token), False)
+ offset = 1
+ if prev and prev.is_whitespace() and prev not in added:
+ tlist.tokens.pop(tlist.token_index(prev))
+ offset += 1
+ uprev = unicode(prev)
+ if (prev and (uprev.endswith('\n') or uprev.endswith('\r'))):
+ nl = tlist.token_next(token)
+ else:
+ nl = self.nl()
+ added.add(nl)
+ tlist.insert_before(token, nl)
+ offset += 1
+ token = _next_token(tlist.token_index(nl) + offset)
+
+ def _split_statements(self, tlist):
+ idx = 0
+ token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML))
+ while token:
+ prev = tlist.token_prev(tlist.token_index(token), False)
+ if prev and prev.is_whitespace():
+ tlist.tokens.pop(tlist.token_index(prev))
+ # only break if it's not the first token
+ if prev:
+ nl = self.nl()
+ tlist.insert_before(token, nl)
+ token = tlist.token_next_by_type(tlist.token_index(token) + 1,
+ (T.Keyword.DDL, T.Keyword.DML))
+
+ def _process(self, tlist):
+ func_name = '_process_%s' % tlist.__class__.__name__.lower()
+ func = getattr(self, func_name, self._process_default)
+ func(tlist)
+
+ def _process_where(self, tlist):
+ token = tlist.token_next_match(0, T.Keyword, 'WHERE')
+ try:
+ tlist.insert_before(token, self.nl())
+ except ValueError: # issue121, errors in statement
+ pass
+ self.indent += 1
+ self._process_default(tlist)
+ self.indent -= 1
+
+ def _process_having(self, tlist):
+ token = tlist.token_next_match(0, T.Keyword, 'HAVING')
+ try:
+ tlist.insert_before(token, self.nl())
+ except ValueError: # issue121, errors in statement
+ pass
+ self.indent += 1
+ self._process_default(tlist)
+ self.indent -= 1
+
+ def _process_parenthesis(self, tlist):
+ first = tlist.token_next(0)
+ indented = False
+ if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
+ self.indent += 1
+ tlist.tokens.insert(0, self.nl())
+ indented = True
+ num_offset = self._get_offset(
+ tlist.token_next_match(0, T.Punctuation, '('))
+ self.offset += num_offset
+ self._process_default(tlist, stmts=not indented)
+ if indented:
+ self.indent -= 1
+ self.offset -= num_offset
+
+ def _process_identifierlist(self, tlist):
+ identifiers = list(tlist.get_identifiers())
+ if len(identifiers) > 1 and not tlist.within(sql.Function):
+ first = list(identifiers[0].flatten())[0]
+ if self.char == '\t':
+ # when using tabs we don't count the actual word length
+ # in spaces.
+ num_offset = 1
+ else:
+ num_offset = self._get_offset(first) - len(first.value)
+ self.offset += num_offset
+ for token in identifiers[1:]:
+ tlist.insert_before(token, self.nl())
+ self.offset -= num_offset
+ self._process_default(tlist)
+
+ def _process_case(self, tlist):
+ is_first = True
+ num_offset = None
+ case = tlist.tokens[0]
+ outer_offset = self._get_offset(case) - len(case.value)
+ self.offset += outer_offset
+ for cond, value in tlist.get_cases():
+ if is_first:
+ tcond = list(cond[0].flatten())[0]
+ is_first = False
+ num_offset = self._get_offset(tcond) - len(tcond.value)
+ self.offset += num_offset
+ continue
+ if cond is None:
+ token = value[0]
+ else:
+ token = cond[0]
+ tlist.insert_before(token, self.nl())
+ # Line breaks on group level are done. Now let's add an offset of
+ # 5 (=length of "when", "then", "else") and process subgroups.
+ self.offset += 5
+ self._process_default(tlist)
+ self.offset -= 5
+ if num_offset is not None:
+ self.offset -= num_offset
+ end = tlist.token_next_match(0, T.Keyword, 'END')
+ tlist.insert_before(end, self.nl())
+ self.offset -= outer_offset
+
+ def _process_default(self, tlist, stmts=True, kwds=True):
+ if stmts:
+ self._split_statements(tlist)
+ if kwds:
+ self._split_kwds(tlist)
+ [self._process(sgroup) for sgroup in tlist.get_sublists()]
+
+ def process(self, stack, stmt):
+ if isinstance(stmt, sql.Statement):
+ self._curr_stmt = stmt
+ self._process(stmt)
+ if isinstance(stmt, sql.Statement):
+ if self._last_stmt is not None:
+ if unicode(self._last_stmt).endswith('\n'):
+ nl = '\n'
+ else:
+ nl = '\n\n'
+ stmt.tokens.insert(
+ 0, sql.Token(T.Whitespace, nl))
+ if self._last_stmt != stmt:
+ self._last_stmt = stmt
+
+
+# FIXME: Doesn't work ;)
+class RightMarginFilter:
+
+ keep_together = (
+ # sql.TypeCast, sql.Identifier, sql.Alias,
+ )
+
+ def __init__(self, width=79):
+ self.width = width
+ self.line = ''
+
+ def _process(self, stack, group, stream):
+ for token in stream:
+ if token.is_whitespace() and '\n' in token.value:
+ if token.value.endswith('\n'):
+ self.line = ''
+ else:
+ self.line = token.value.splitlines()[-1]
+ elif (token.is_group()
+ and not token.__class__ in self.keep_together):
+ token.tokens = self._process(stack, token, token.tokens)
+ else:
+ val = unicode(token)
+ if len(self.line) + len(val) > self.width:
+ match = re.search('^ +', self.line)
+ if match is not None:
+ indent = match.group()
+ else:
+ indent = ''
+ yield sql.Token(T.Whitespace, '\n%s' % indent)
+ self.line = indent
+ self.line += val
+ yield token
+
+ def process(self, stack, group):
+ return
+ group.tokens = self._process(stack, group, group.tokens)
+
+
+class ColumnsSelect:
+ """Get the columns names of a SELECT query"""
+ def process(self, stack, stream):
+ mode = 0
+ oldValue = ""
+ parenthesis = 0
+
+ for token_type, value in stream:
+ # Ignore comments
+ if token_type in Comment:
+ continue
+
+ # We have not detected a SELECT statement
+ if mode == 0:
+ if token_type in Keyword and value == 'SELECT':
+ mode = 1
+
+ # We have detected a SELECT statement
+ elif mode == 1:
+ if value == 'FROM':
+ if oldValue:
+ yield oldValue
+
+ mode = 3 # Columns have been checked
+
+ elif value == 'AS':
+ oldValue = ""
+ mode = 2
+
+ elif (token_type == Punctuation
+ and value == ',' and not parenthesis):
+ if oldValue:
+ yield oldValue
+ oldValue = ""
+
+ elif token_type not in Whitespace:
+ if value == '(':
+ parenthesis += 1
+ elif value == ')':
+ parenthesis -= 1
+
+ oldValue += value
+
+ # We are processing an AS keyword
+ elif mode == 2:
+ # We check also for Keywords because a bug in SQLParse
+ if token_type == Name or token_type == Keyword:
+ yield value
+ mode = 1
+
+
+# ---------------------------
+# postprocess
+
+class SerializerUnicode:
+
+ def process(self, stack, stmt):
+ raw = unicode(stmt)
+ lines = split_unquoted_newlines(raw)
+ res = '\n'.join(line.rstrip() for line in lines)
+ return res
+
+
+def Tokens2Unicode(stream):
+ result = ""
+
+ for _, value in stream:
+ result += unicode(value)
+
+ return result
+
+
+class OutputFilter:
+ varname_prefix = ''
+
+ def __init__(self, varname='sql'):
+ self.varname = self.varname_prefix + varname
+ self.count = 0
+
+ def _process(self, stream, varname, has_nl):
+ raise NotImplementedError
+
+ def process(self, stack, stmt):
+ self.count += 1
+ if self.count > 1:
+ varname = '%s%d' % (self.varname, self.count)
+ else:
+ varname = self.varname
+
+ has_nl = len(unicode(stmt).strip().splitlines()) > 1
+ stmt.tokens = self._process(stmt.tokens, varname, has_nl)
+ return stmt
+
+
+class OutputPythonFilter(OutputFilter):
+ def _process(self, stream, varname, has_nl):
+ # SQL query asignation to varname
+ if self.count > 1:
+ yield sql.Token(T.Whitespace, '\n')
+ yield sql.Token(T.Name, varname)
+ yield sql.Token(T.Whitespace, ' ')
+ yield sql.Token(T.Operator, '=')
+ yield sql.Token(T.Whitespace, ' ')
+ if has_nl:
+ yield sql.Token(T.Operator, '(')
+ yield sql.Token(T.Text, "'")
+
+ # Print the tokens on the quote
+ for token in stream:
+ # Token is a new line separator
+ if token.is_whitespace() and '\n' in token.value:
+ # Close quote and add a new line
+ yield sql.Token(T.Text, " '")
+ yield sql.Token(T.Whitespace, '\n')
+
+ # Quote header on secondary lines
+ yield sql.Token(T.Whitespace, ' ' * (len(varname) + 4))
+ yield sql.Token(T.Text, "'")
+
+ # Indentation
+ after_lb = token.value.split('\n', 1)[1]
+ if after_lb:
+ yield sql.Token(T.Whitespace, after_lb)
+ continue
+
+ # Token has escape chars
+ elif "'" in token.value:
+ token.value = token.value.replace("'", "\\'")
+
+ # Put the token
+ yield sql.Token(T.Text, token.value)
+
+ # Close quote
+ yield sql.Token(T.Text, "'")
+ if has_nl:
+ yield sql.Token(T.Operator, ')')
+
+
+class OutputPHPFilter(OutputFilter):
+ varname_prefix = '$'
+
+ def _process(self, stream, varname, has_nl):
+ # SQL query asignation to varname (quote header)
+ if self.count > 1:
+ yield sql.Token(T.Whitespace, '\n')
+ yield sql.Token(T.Name, varname)
+ yield sql.Token(T.Whitespace, ' ')
+ if has_nl:
+ yield sql.Token(T.Whitespace, ' ')
+ yield sql.Token(T.Operator, '=')
+ yield sql.Token(T.Whitespace, ' ')
+ yield sql.Token(T.Text, '"')
+
+ # Print the tokens on the quote
+ for token in stream:
+ # Token is a new line separator
+ if token.is_whitespace() and '\n' in token.value:
+ # Close quote and add a new line
+ yield sql.Token(T.Text, ' ";')
+ yield sql.Token(T.Whitespace, '\n')
+
+ # Quote header on secondary lines
+ yield sql.Token(T.Name, varname)
+ yield sql.Token(T.Whitespace, ' ')
+ yield sql.Token(T.Operator, '.=')
+ yield sql.Token(T.Whitespace, ' ')
+ yield sql.Token(T.Text, '"')
+
+ # Indentation
+ after_lb = token.value.split('\n', 1)[1]
+ if after_lb:
+ yield sql.Token(T.Whitespace, after_lb)
+ continue
+
+ # Token has escape chars
+ elif '"' in token.value:
+ token.value = token.value.replace('"', '\\"')
+
+ # Put the token
+ yield sql.Token(T.Text, token.value)
+
+ # Close quote
+ yield sql.Token(T.Text, '"')
+ yield sql.Token(T.Punctuation, ';')
+
+
+class Limit:
+ """Get the LIMIT of a query.
+
+ If not defined, return -1 (SQL specification for no LIMIT query)
+ """
+ def process(self, stack, stream):
+ index = 7
+ stream = list(stream)
+ stream.reverse()
+
+ # Run over all tokens in the stream from the end
+ for token_type, value in stream:
+ index -= 1
+
+# if index and token_type in Keyword:
+ if index and token_type in Keyword and value == 'LIMIT':
+ return stream[4 - index][1]
+
+ return -1
+
+
+def compact(stream):
+ """Function that return a compacted version of the stream"""
+ pipe = Pipeline()
+
+ pipe.append(StripComments())
+ pipe.append(StripWhitespace)
+
+ return pipe(stream)
http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.19/sqlparse/formatter.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/formatter.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/formatter.py
new file mode 100644
index 0000000..811f5af
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/formatter.py
@@ -0,0 +1,137 @@
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""SQL formatter"""
+
+from sqlparse import filters
+from sqlparse.exceptions import SQLParseError
+
+
+def validate_options(options):
+ """Validates options."""
+ kwcase = options.get('keyword_case', None)
+ if kwcase not in [None, 'upper', 'lower', 'capitalize']:
+ raise SQLParseError('Invalid value for keyword_case: %r' % kwcase)
+
+ idcase = options.get('identifier_case', None)
+ if idcase not in [None, 'upper', 'lower', 'capitalize']:
+ raise SQLParseError('Invalid value for identifier_case: %r' % idcase)
+
+ ofrmt = options.get('output_format', None)
+ if ofrmt not in [None, 'sql', 'python', 'php']:
+ raise SQLParseError('Unknown output format: %r' % ofrmt)
+
+ strip_comments = options.get('strip_comments', False)
+ if strip_comments not in [True, False]:
+ raise SQLParseError('Invalid value for strip_comments: %r'
+ % strip_comments)
+
+ strip_ws = options.get('strip_whitespace', False)
+ if strip_ws not in [True, False]:
+ raise SQLParseError('Invalid value for strip_whitespace: %r'
+ % strip_ws)
+
+ truncate_strings = options.get('truncate_strings', None)
+ if truncate_strings is not None:
+ try:
+ truncate_strings = int(truncate_strings)
+ except (ValueError, TypeError):
+ raise SQLParseError('Invalid value for truncate_strings: %r'
+ % truncate_strings)
+ if truncate_strings <= 1:
+ raise SQLParseError('Invalid value for truncate_strings: %r'
+ % truncate_strings)
+ options['truncate_strings'] = truncate_strings
+ options['truncate_char'] = options.get('truncate_char', '[...]')
+
+ reindent = options.get('reindent', False)
+ if reindent not in [True, False]:
+ raise SQLParseError('Invalid value for reindent: %r'
+ % reindent)
+ elif reindent:
+ options['strip_whitespace'] = True
+ indent_tabs = options.get('indent_tabs', False)
+ if indent_tabs not in [True, False]:
+ raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs)
+ elif indent_tabs:
+ options['indent_char'] = '\t'
+ else:
+ options['indent_char'] = ' '
+ indent_width = options.get('indent_width', 2)
+ try:
+ indent_width = int(indent_width)
+ except (TypeError, ValueError):
+ raise SQLParseError('indent_width requires an integer')
+ if indent_width < 1:
+ raise SQLParseError('indent_width requires an positive integer')
+ options['indent_width'] = indent_width
+
+ right_margin = options.get('right_margin', None)
+ if right_margin is not None:
+ try:
+ right_margin = int(right_margin)
+ except (TypeError, ValueError):
+ raise SQLParseError('right_margin requires an integer')
+ if right_margin < 10:
+ raise SQLParseError('right_margin requires an integer > 10')
+ options['right_margin'] = right_margin
+
+ return options
+
+
+def build_filter_stack(stack, options):
+ """Setup and return a filter stack.
+
+ Args:
+ stack: :class:`~sqlparse.filters.FilterStack` instance
+ options: Dictionary with options validated by validate_options.
+ """
+ # Token filter
+ if options.get('keyword_case', None):
+ stack.preprocess.append(
+ filters.KeywordCaseFilter(options['keyword_case']))
+
+ if options.get('identifier_case', None):
+ stack.preprocess.append(
+ filters.IdentifierCaseFilter(options['identifier_case']))
+
+ if options.get('truncate_strings', None) is not None:
+ stack.preprocess.append(filters.TruncateStringFilter(
+ width=options['truncate_strings'], char=options['truncate_char']))
+
+ # After grouping
+ if options.get('strip_comments', False):
+ stack.enable_grouping()
+ stack.stmtprocess.append(filters.StripCommentsFilter())
+
+ if (options.get('strip_whitespace', False)
+ or options.get('reindent', False)):
+ stack.enable_grouping()
+ stack.stmtprocess.append(filters.StripWhitespaceFilter())
+
+ if options.get('reindent', False):
+ stack.enable_grouping()
+ stack.stmtprocess.append(
+ filters.ReindentFilter(char=options['indent_char'],
+ width=options['indent_width']))
+
+ if options.get('right_margin', False):
+ stack.enable_grouping()
+ stack.stmtprocess.append(
+ filters.RightMarginFilter(width=options['right_margin']))
+
+ # Serializer
+ if options.get('output_format'):
+ frmt = options['output_format']
+ if frmt.lower() == 'php':
+ fltr = filters.OutputPHPFilter()
+ elif frmt.lower() == 'python':
+ fltr = filters.OutputPythonFilter()
+ else:
+ fltr = None
+ if fltr is not None:
+ stack.postprocess.append(fltr)
+
+ return stack
http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.19/sqlparse/functions.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/functions.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/functions.py
new file mode 100644
index 0000000..e54457e
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/functions.py
@@ -0,0 +1,44 @@
+'''
+Created on 17/05/2012
+
+@author: piranna
+
+Several utility functions to extract info from the SQL sentences
+'''
+
+from sqlparse.filters import ColumnsSelect, Limit
+from sqlparse.pipeline import Pipeline
+from sqlparse.tokens import Keyword, Whitespace
+
+
+def getlimit(stream):
+ """Function that return the LIMIT of a input SQL """
+ pipe = Pipeline()
+
+ pipe.append(Limit())
+
+ result = pipe(stream)
+ try:
+ return int(result)
+ except ValueError:
+ return result
+
+
+def getcolumns(stream):
+ """Function that return the colums of a SELECT query"""
+ pipe = Pipeline()
+
+ pipe.append(ColumnsSelect())
+
+ return pipe(stream)
+
+
+class IsType(object):
+ """Functor that return is the statement is of a specific type"""
+ def __init__(self, type):
+ self.type = type
+
+ def __call__(self, stream):
+ for token_type, value in stream:
+ if token_type not in Whitespace:
+ return token_type in Keyword and value == self.type
http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.19/sqlparse/keywords.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/keywords.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/keywords.py
new file mode 100644
index 0000000..1595aa8
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/keywords.py
@@ -0,0 +1,574 @@
+from sqlparse import tokens
+
+KEYWORDS = {
+ 'ABORT': tokens.Keyword,
+ 'ABS': tokens.Keyword,
+ 'ABSOLUTE': tokens.Keyword,
+ 'ACCESS': tokens.Keyword,
+ 'ADA': tokens.Keyword,
+ 'ADD': tokens.Keyword,
+ 'ADMIN': tokens.Keyword,
+ 'AFTER': tokens.Keyword,
+ 'AGGREGATE': tokens.Keyword,
+ 'ALIAS': tokens.Keyword,
+ 'ALL': tokens.Keyword,
+ 'ALLOCATE': tokens.Keyword,
+ 'ANALYSE': tokens.Keyword,
+ 'ANALYZE': tokens.Keyword,
+ 'ANY': tokens.Keyword,
+ 'ARE': tokens.Keyword,
+ 'ASC': tokens.Keyword.Order,
+ 'ASENSITIVE': tokens.Keyword,
+ 'ASSERTION': tokens.Keyword,
+ 'ASSIGNMENT': tokens.Keyword,
+ 'ASYMMETRIC': tokens.Keyword,
+ 'AT': tokens.Keyword,
+ 'ATOMIC': tokens.Keyword,
+ 'AUTHORIZATION': tokens.Keyword,
+ 'AVG': tokens.Keyword,
+
+ 'BACKWARD': tokens.Keyword,
+ 'BEFORE': tokens.Keyword,
+ 'BEGIN': tokens.Keyword,
+ 'BETWEEN': tokens.Keyword,
+ 'BITVAR': tokens.Keyword,
+ 'BIT_LENGTH': tokens.Keyword,
+ 'BOTH': tokens.Keyword,
+ 'BREADTH': tokens.Keyword,
+
+ # 'C': tokens.Keyword, # most likely this is an alias
+ 'CACHE': tokens.Keyword,
+ 'CALL': tokens.Keyword,
+ 'CALLED': tokens.Keyword,
+ 'CARDINALITY': tokens.Keyword,
+ 'CASCADE': tokens.Keyword,
+ 'CASCADED': tokens.Keyword,
+ 'CAST': tokens.Keyword,
+ 'CATALOG': tokens.Keyword,
+ 'CATALOG_NAME': tokens.Keyword,
+ 'CHAIN': tokens.Keyword,
+ 'CHARACTERISTICS': tokens.Keyword,
+ 'CHARACTER_LENGTH': tokens.Keyword,
+ 'CHARACTER_SET_CATALOG': tokens.Keyword,
+ 'CHARACTER_SET_NAME': tokens.Keyword,
+ 'CHARACTER_SET_SCHEMA': tokens.Keyword,
+ 'CHAR_LENGTH': tokens.Keyword,
+ 'CHECK': tokens.Keyword,
+ 'CHECKED': tokens.Keyword,
+ 'CHECKPOINT': tokens.Keyword,
+ 'CLASS': tokens.Keyword,
+ 'CLASS_ORIGIN': tokens.Keyword,
+ 'CLOB': tokens.Keyword,
+ 'CLOSE': tokens.Keyword,
+ 'CLUSTER': tokens.Keyword,
+ 'COALESCE': tokens.Keyword,
+ 'COBOL': tokens.Keyword,
+ 'COLLATE': tokens.Keyword,
+ 'COLLATION': tokens.Keyword,
+ 'COLLATION_CATALOG': tokens.Keyword,
+ 'COLLATION_NAME': tokens.Keyword,
+ 'COLLATION_SCHEMA': tokens.Keyword,
+ 'COLLECT': tokens.Keyword,
+ 'COLUMN': tokens.Keyword,
+ 'COLUMN_NAME': tokens.Keyword,
+ 'COMMAND_FUNCTION': tokens.Keyword,
+ 'COMMAND_FUNCTION_CODE': tokens.Keyword,
+ 'COMMENT': tokens.Keyword,
+ 'COMMIT': tokens.Keyword.DML,
+ 'COMMITTED': tokens.Keyword,
+ 'COMPLETION': tokens.Keyword,
+ 'CONDITION_NUMBER': tokens.Keyword,
+ 'CONNECT': tokens.Keyword,
+ 'CONNECTION': tokens.Keyword,
+ 'CONNECTION_NAME': tokens.Keyword,
+ 'CONSTRAINT': tokens.Keyword,
+ 'CONSTRAINTS': tokens.Keyword,
+ 'CONSTRAINT_CATALOG': tokens.Keyword,
+ 'CONSTRAINT_NAME': tokens.Keyword,
+ 'CONSTRAINT_SCHEMA': tokens.Keyword,
+ 'CONSTRUCTOR': tokens.Keyword,
+ 'CONTAINS': tokens.Keyword,
+ 'CONTINUE': tokens.Keyword,
+ 'CONVERSION': tokens.Keyword,
+ 'CONVERT': tokens.Keyword,
+ 'COPY': tokens.Keyword,
+ 'CORRESPONTING': tokens.Keyword,
+ 'COUNT': tokens.Keyword,
+ 'CREATEDB': tokens.Keyword,
+ 'CREATEUSER': tokens.Keyword,
+ 'CROSS': tokens.Keyword,
+ 'CUBE': tokens.Keyword,
+ 'CURRENT': tokens.Keyword,
+ 'CURRENT_DATE': tokens.Keyword,
+ 'CURRENT_PATH': tokens.Keyword,
+ 'CURRENT_ROLE': tokens.Keyword,
+ 'CURRENT_TIME': tokens.Keyword,
+ 'CURRENT_TIMESTAMP': tokens.Keyword,
+ 'CURRENT_USER': tokens.Keyword,
+ 'CURSOR': tokens.Keyword,
+ 'CURSOR_NAME': tokens.Keyword,
+ 'CYCLE': tokens.Keyword,
+
+ 'DATA': tokens.Keyword,
+ 'DATABASE': tokens.Keyword,
+ 'DATETIME_INTERVAL_CODE': tokens.Keyword,
+ 'DATETIME_INTERVAL_PRECISION': tokens.Keyword,
+ 'DAY': tokens.Keyword,
+ 'DEALLOCATE': tokens.Keyword,
+ 'DECLARE': tokens.Keyword,
+ 'DEFAULT': tokens.Keyword,
+ 'DEFAULTS': tokens.Keyword,
+ 'DEFERRABLE': tokens.Keyword,
+ 'DEFERRED': tokens.Keyword,
+ 'DEFINED': tokens.Keyword,
+ 'DEFINER': tokens.Keyword,
+ 'DELIMITER': tokens.Keyword,
+ 'DELIMITERS': tokens.Keyword,
+ 'DEREF': tokens.Keyword,
+ 'DESC': tokens.Keyword.Order,
+ 'DESCRIBE': tokens.Keyword,
+ 'DESCRIPTOR': tokens.Keyword,
+ 'DESTROY': tokens.Keyword,
+ 'DESTRUCTOR': tokens.Keyword,
+ 'DETERMINISTIC': tokens.Keyword,
+ 'DIAGNOSTICS': tokens.Keyword,
+ 'DICTIONARY': tokens.Keyword,
+ 'DISCONNECT': tokens.Keyword,
+ 'DISPATCH': tokens.Keyword,
+ 'DO': tokens.Keyword,
+ 'DOMAIN': tokens.Keyword,
+ 'DYNAMIC': tokens.Keyword,
+ 'DYNAMIC_FUNCTION': tokens.Keyword,
+ 'DYNAMIC_FUNCTION_CODE': tokens.Keyword,
+
+ 'EACH': tokens.Keyword,
+ 'ENCODING': tokens.Keyword,
+ 'ENCRYPTED': tokens.Keyword,
+ 'END-EXEC': tokens.Keyword,
+ 'EQUALS': tokens.Keyword,
+ 'ESCAPE': tokens.Keyword,
+ 'EVERY': tokens.Keyword,
+ 'EXCEPT': tokens.Keyword,
+ 'ESCEPTION': tokens.Keyword,
+ 'EXCLUDING': tokens.Keyword,
+ 'EXCLUSIVE': tokens.Keyword,
+ 'EXEC': tokens.Keyword,
+ 'EXECUTE': tokens.Keyword,
+ 'EXISTING': tokens.Keyword,
+ 'EXISTS': tokens.Keyword,
+ 'EXTERNAL': tokens.Keyword,
+ 'EXTRACT': tokens.Keyword,
+
+ 'FALSE': tokens.Keyword,
+ 'FETCH': tokens.Keyword,
+ 'FINAL': tokens.Keyword,
+ 'FIRST': tokens.Keyword,
+ 'FORCE': tokens.Keyword,
+ 'FOREACH': tokens.Keyword,
+ 'FOREIGN': tokens.Keyword,
+ 'FORTRAN': tokens.Keyword,
+ 'FORWARD': tokens.Keyword,
+ 'FOUND': tokens.Keyword,
+ 'FREE': tokens.Keyword,
+ 'FREEZE': tokens.Keyword,
+ 'FULL': tokens.Keyword,
+ 'FUNCTION': tokens.Keyword,
+
+ # 'G': tokens.Keyword,
+ 'GENERAL': tokens.Keyword,
+ 'GENERATED': tokens.Keyword,
+ 'GET': tokens.Keyword,
+ 'GLOBAL': tokens.Keyword,
+ 'GO': tokens.Keyword,
+ 'GOTO': tokens.Keyword,
+ 'GRANT': tokens.Keyword,
+ 'GRANTED': tokens.Keyword,
+ 'GROUPING': tokens.Keyword,
+
+ 'HANDLER': tokens.Keyword,
+ 'HAVING': tokens.Keyword,
+ 'HIERARCHY': tokens.Keyword,
+ 'HOLD': tokens.Keyword,
+ 'HOST': tokens.Keyword,
+
+ 'IDENTITY': tokens.Keyword,
+ 'IGNORE': tokens.Keyword,
+ 'ILIKE': tokens.Keyword,
+ 'IMMEDIATE': tokens.Keyword,
+ 'IMMUTABLE': tokens.Keyword,
+
+ 'IMPLEMENTATION': tokens.Keyword,
+ 'IMPLICIT': tokens.Keyword,
+ 'INCLUDING': tokens.Keyword,
+ 'INCREMENT': tokens.Keyword,
+ 'INDEX': tokens.Keyword,
+
+ 'INDITCATOR': tokens.Keyword,
+ 'INFIX': tokens.Keyword,
+ 'INHERITS': tokens.Keyword,
+ 'INITIALIZE': tokens.Keyword,
+ 'INITIALLY': tokens.Keyword,
+ 'INOUT': tokens.Keyword,
+ 'INPUT': tokens.Keyword,
+ 'INSENSITIVE': tokens.Keyword,
+ 'INSTANTIABLE': tokens.Keyword,
+ 'INSTEAD': tokens.Keyword,
+ 'INTERSECT': tokens.Keyword,
+ 'INTO': tokens.Keyword,
+ 'INVOKER': tokens.Keyword,
+ 'IS': tokens.Keyword,
+ 'ISNULL': tokens.Keyword,
+ 'ISOLATION': tokens.Keyword,
+ 'ITERATE': tokens.Keyword,
+
+ # 'K': tokens.Keyword,
+ 'KEY': tokens.Keyword,
+ 'KEY_MEMBER': tokens.Keyword,
+ 'KEY_TYPE': tokens.Keyword,
+
+ 'LANCOMPILER': tokens.Keyword,
+ 'LANGUAGE': tokens.Keyword,
+ 'LARGE': tokens.Keyword,
+ 'LAST': tokens.Keyword,
+ 'LATERAL': tokens.Keyword,
+ 'LEADING': tokens.Keyword,
+ 'LENGTH': tokens.Keyword,
+ 'LESS': tokens.Keyword,
+ 'LEVEL': tokens.Keyword,
+ 'LIMIT': tokens.Keyword,
+ 'LISTEN': tokens.Keyword,
+ 'LOAD': tokens.Keyword,
+ 'LOCAL': tokens.Keyword,
+ 'LOCALTIME': tokens.Keyword,
+ 'LOCALTIMESTAMP': tokens.Keyword,
+ 'LOCATION': tokens.Keyword,
+ 'LOCATOR': tokens.Keyword,
+ 'LOCK': tokens.Keyword,
+ 'LOWER': tokens.Keyword,
+
+ # 'M': tokens.Keyword,
+ 'MAP': tokens.Keyword,
+ 'MATCH': tokens.Keyword,
+ 'MAXVALUE': tokens.Keyword,
+ 'MESSAGE_LENGTH': tokens.Keyword,
+ 'MESSAGE_OCTET_LENGTH': tokens.Keyword,
+ 'MESSAGE_TEXT': tokens.Keyword,
+ 'METHOD': tokens.Keyword,
+ 'MINUTE': tokens.Keyword,
+ 'MINVALUE': tokens.Keyword,
+ 'MOD': tokens.Keyword,
+ 'MODE': tokens.Keyword,
+ 'MODIFIES': tokens.Keyword,
+ 'MODIFY': tokens.Keyword,
+ 'MONTH': tokens.Keyword,
+ 'MORE': tokens.Keyword,
+ 'MOVE': tokens.Keyword,
+ 'MUMPS': tokens.Keyword,
+
+ 'NAMES': tokens.Keyword,
+ 'NATIONAL': tokens.Keyword,
+ 'NATURAL': tokens.Keyword,
+ 'NCHAR': tokens.Keyword,
+ 'NCLOB': tokens.Keyword,
+ 'NEW': tokens.Keyword,
+ 'NEXT': tokens.Keyword,
+ 'NO': tokens.Keyword,
+ 'NOCREATEDB': tokens.Keyword,
+ 'NOCREATEUSER': tokens.Keyword,
+ 'NONE': tokens.Keyword,
+ 'NOT': tokens.Keyword,
+ 'NOTHING': tokens.Keyword,
+ 'NOTIFY': tokens.Keyword,
+ 'NOTNULL': tokens.Keyword,
+ 'NULL': tokens.Keyword,
+ 'NULLABLE': tokens.Keyword,
+ 'NULLIF': tokens.Keyword,
+
+ 'OBJECT': tokens.Keyword,
+ 'OCTET_LENGTH': tokens.Keyword,
+ 'OF': tokens.Keyword,
+ 'OFF': tokens.Keyword,
+ 'OFFSET': tokens.Keyword,
+ 'OIDS': tokens.Keyword,
+ 'OLD': tokens.Keyword,
+ 'ONLY': tokens.Keyword,
+ 'OPEN': tokens.Keyword,
+ 'OPERATION': tokens.Keyword,
+ 'OPERATOR': tokens.Keyword,
+ 'OPTION': tokens.Keyword,
+ 'OPTIONS': tokens.Keyword,
+ 'ORDINALITY': tokens.Keyword,
+ 'OUT': tokens.Keyword,
+ 'OUTPUT': tokens.Keyword,
+ 'OVERLAPS': tokens.Keyword,
+ 'OVERLAY': tokens.Keyword,
+ 'OVERRIDING': tokens.Keyword,
+ 'OWNER': tokens.Keyword,
+
+ 'PAD': tokens.Keyword,
+ 'PARAMETER': tokens.Keyword,
+ 'PARAMETERS': tokens.Keyword,
+ 'PARAMETER_MODE': tokens.Keyword,
+ 'PARAMATER_NAME': tokens.Keyword,
+ 'PARAMATER_ORDINAL_POSITION': tokens.Keyword,
+ 'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword,
+ 'PARAMETER_SPECIFIC_NAME': tokens.Keyword,
+ 'PARAMATER_SPECIFIC_SCHEMA': tokens.Keyword,
+ 'PARTIAL': tokens.Keyword,
+ 'PASCAL': tokens.Keyword,
+ 'PENDANT': tokens.Keyword,
+ 'PLACING': tokens.Keyword,
+ 'PLI': tokens.Keyword,
+ 'POSITION': tokens.Keyword,
+ 'POSTFIX': tokens.Keyword,
+ 'PRECISION': tokens.Keyword,
+ 'PREFIX': tokens.Keyword,
+ 'PREORDER': tokens.Keyword,
+ 'PREPARE': tokens.Keyword,
+ 'PRESERVE': tokens.Keyword,
+ 'PRIMARY': tokens.Keyword,
+ 'PRIOR': tokens.Keyword,
+ 'PRIVILEGES': tokens.Keyword,
+ 'PROCEDURAL': tokens.Keyword,
+ 'PROCEDURE': tokens.Keyword,
+ 'PUBLIC': tokens.Keyword,
+
+ 'RAISE': tokens.Keyword,
+ 'READ': tokens.Keyword,
+ 'READS': tokens.Keyword,
+ 'RECHECK': tokens.Keyword,
+ 'RECURSIVE': tokens.Keyword,
+ 'REF': tokens.Keyword,
+ 'REFERENCES': tokens.Keyword,
+ 'REFERENCING': tokens.Keyword,
+ 'REINDEX': tokens.Keyword,
+ 'RELATIVE': tokens.Keyword,
+ 'RENAME': tokens.Keyword,
+ 'REPEATABLE': tokens.Keyword,
+ 'RESET': tokens.Keyword,
+ 'RESTART': tokens.Keyword,
+ 'RESTRICT': tokens.Keyword,
+ 'RESULT': tokens.Keyword,
+ 'RETURN': tokens.Keyword,
+ 'RETURNED_LENGTH': tokens.Keyword,
+ 'RETURNED_OCTET_LENGTH': tokens.Keyword,
+ 'RETURNED_SQLSTATE': tokens.Keyword,
+ 'RETURNS': tokens.Keyword,
+ 'REVOKE': tokens.Keyword,
+ 'RIGHT': tokens.Keyword,
+ 'ROLE': tokens.Keyword,
+ 'ROLLBACK': tokens.Keyword.DML,
+ 'ROLLUP': tokens.Keyword,
+ 'ROUTINE': tokens.Keyword,
+ 'ROUTINE_CATALOG': tokens.Keyword,
+ 'ROUTINE_NAME': tokens.Keyword,
+ 'ROUTINE_SCHEMA': tokens.Keyword,
+ 'ROW': tokens.Keyword,
+ 'ROWS': tokens.Keyword,
+ 'ROW_COUNT': tokens.Keyword,
+ 'RULE': tokens.Keyword,
+
+ 'SAVE_POINT': tokens.Keyword,
+ 'SCALE': tokens.Keyword,
+ 'SCHEMA': tokens.Keyword,
+ 'SCHEMA_NAME': tokens.Keyword,
+ 'SCOPE': tokens.Keyword,
+ 'SCROLL': tokens.Keyword,
+ 'SEARCH': tokens.Keyword,
+ 'SECOND': tokens.Keyword,
+ 'SECURITY': tokens.Keyword,
+ 'SELF': tokens.Keyword,
+ 'SENSITIVE': tokens.Keyword,
+ 'SERIALIZABLE': tokens.Keyword,
+ 'SERVER_NAME': tokens.Keyword,
+ 'SESSION': tokens.Keyword,
+ 'SESSION_USER': tokens.Keyword,
+ 'SETOF': tokens.Keyword,
+ 'SETS': tokens.Keyword,
+ 'SHARE': tokens.Keyword,
+ 'SHOW': tokens.Keyword,
+ 'SIMILAR': tokens.Keyword,
+ 'SIMPLE': tokens.Keyword,
+ 'SIZE': tokens.Keyword,
+ 'SOME': tokens.Keyword,
+ 'SOURCE': tokens.Keyword,
+ 'SPACE': tokens.Keyword,
+ 'SPECIFIC': tokens.Keyword,
+ 'SPECIFICTYPE': tokens.Keyword,
+ 'SPECIFIC_NAME': tokens.Keyword,
+ 'SQL': tokens.Keyword,
+ 'SQLCODE': tokens.Keyword,
+ 'SQLERROR': tokens.Keyword,
+ 'SQLEXCEPTION': tokens.Keyword,
+ 'SQLSTATE': tokens.Keyword,
+ 'SQLWARNING': tokens.Keyword,
+ 'STABLE': tokens.Keyword,
+ 'START': tokens.Keyword.DML,
+ 'STATE': tokens.Keyword,
+ 'STATEMENT': tokens.Keyword,
+ 'STATIC': tokens.Keyword,
+ 'STATISTICS': tokens.Keyword,
+ 'STDIN': tokens.Keyword,
+ 'STDOUT': tokens.Keyword,
+ 'STORAGE': tokens.Keyword,
+ 'STRICT': tokens.Keyword,
+ 'STRUCTURE': tokens.Keyword,
+ 'STYPE': tokens.Keyword,
+ 'SUBCLASS_ORIGIN': tokens.Keyword,
+ 'SUBLIST': tokens.Keyword,
+ 'SUBSTRING': tokens.Keyword,
+ 'SUM': tokens.Keyword,
+ 'SYMMETRIC': tokens.Keyword,
+ 'SYSID': tokens.Keyword,
+ 'SYSTEM': tokens.Keyword,
+ 'SYSTEM_USER': tokens.Keyword,
+
+ 'TABLE': tokens.Keyword,
+ 'TABLE_NAME': tokens.Keyword,
+ 'TEMP': tokens.Keyword,
+ 'TEMPLATE': tokens.Keyword,
+ 'TEMPORARY': tokens.Keyword,
+ 'TERMINATE': tokens.Keyword,
+ 'THAN': tokens.Keyword,
+ 'TIMESTAMP': tokens.Keyword,
+ 'TIMEZONE_HOUR': tokens.Keyword,
+ 'TIMEZONE_MINUTE': tokens.Keyword,
+ 'TO': tokens.Keyword,
+ 'TOAST': tokens.Keyword,
+ 'TRAILING': tokens.Keyword,
+ 'TRANSATION': tokens.Keyword,
+ 'TRANSACTIONS_COMMITTED': tokens.Keyword,
+ 'TRANSACTIONS_ROLLED_BACK': tokens.Keyword,
+ 'TRANSATION_ACTIVE': tokens.Keyword,
+ 'TRANSFORM': tokens.Keyword,
+ 'TRANSFORMS': tokens.Keyword,
+ 'TRANSLATE': tokens.Keyword,
+ 'TRANSLATION': tokens.Keyword,
+ 'TREAT': tokens.Keyword,
+ 'TRIGGER': tokens.Keyword,
+ 'TRIGGER_CATALOG': tokens.Keyword,
+ 'TRIGGER_NAME': tokens.Keyword,
+ 'TRIGGER_SCHEMA': tokens.Keyword,
+ 'TRIM': tokens.Keyword,
+ 'TRUE': tokens.Keyword,
+ 'TRUNCATE': tokens.Keyword,
+ 'TRUSTED': tokens.Keyword,
+ 'TYPE': tokens.Keyword,
+
+ 'UNCOMMITTED': tokens.Keyword,
+ 'UNDER': tokens.Keyword,
+ 'UNENCRYPTED': tokens.Keyword,
+ 'UNION': tokens.Keyword,
+ 'UNIQUE': tokens.Keyword,
+ 'UNKNOWN': tokens.Keyword,
+ 'UNLISTEN': tokens.Keyword,
+ 'UNNAMED': tokens.Keyword,
+ 'UNNEST': tokens.Keyword,
+ 'UNTIL': tokens.Keyword,
+ 'UPPER': tokens.Keyword,
+ 'USAGE': tokens.Keyword,
+ 'USE': tokens.Keyword,
+ 'USER': tokens.Keyword,
+ 'USER_DEFINED_TYPE_CATALOG': tokens.Keyword,
+ 'USER_DEFINED_TYPE_NAME': tokens.Keyword,
+ 'USER_DEFINED_TYPE_SCHEMA': tokens.Keyword,
+ 'USING': tokens.Keyword,
+
+ 'VACUUM': tokens.Keyword,
+ 'VALID': tokens.Keyword,
+ 'VALIDATOR': tokens.Keyword,
+ 'VALUES': tokens.Keyword,
+ 'VARIABLE': tokens.Keyword,
+ 'VERBOSE': tokens.Keyword,
+ 'VERSION': tokens.Keyword,
+ 'VIEW': tokens.Keyword,
+ 'VOLATILE': tokens.Keyword,
+
+ 'WHENEVER': tokens.Keyword,
+ 'WITH': tokens.Keyword,
+ 'WITHOUT': tokens.Keyword,
+ 'WORK': tokens.Keyword,
+ 'WRITE': tokens.Keyword,
+
+ 'YEAR': tokens.Keyword,
+
+ 'ZONE': tokens.Keyword,
+
+ # Name.Builtin
+ 'ARRAY': tokens.Name.Builtin,
+ 'BIGINT': tokens.Name.Builtin,
+ 'BINARY': tokens.Name.Builtin,
+ 'BIT': tokens.Name.Builtin,
+ 'BLOB': tokens.Name.Builtin,
+ 'BOOLEAN': tokens.Name.Builtin,
+ 'CHAR': tokens.Name.Builtin,
+ 'CHARACTER': tokens.Name.Builtin,
+ 'DATE': tokens.Name.Builtin,
+ 'DEC': tokens.Name.Builtin,
+ 'DECIMAL': tokens.Name.Builtin,
+ 'FLOAT': tokens.Name.Builtin,
+ 'INT': tokens.Name.Builtin,
+ 'INT8': tokens.Name.Builtin,
+ 'INTEGER': tokens.Name.Builtin,
+ 'INTERVAL': tokens.Name.Builtin,
+ 'LONG': tokens.Name.Builtin,
+ 'NUMBER': tokens.Name.Builtin,
+ 'NUMERIC': tokens.Name.Builtin,
+ 'REAL': tokens.Name.Builtin,
+ 'SERIAL': tokens.Name.Builtin,
+ 'SERIAL8': tokens.Name.Builtin,
+ 'SIGNED': tokens.Name.Builtin,
+ 'SMALLINT': tokens.Name.Builtin,
+ 'TEXT': tokens.Name.Builtin,
+ 'TINYINT': tokens.Name.Builtin,
+ 'UNSIGNED': tokens.Name.Builtin,
+ 'VARCHAR': tokens.Name.Builtin,
+ 'VARCHAR2': tokens.Name.Builtin,
+ 'VARYING': tokens.Name.Builtin,
+}
+
+
+KEYWORDS_COMMON = {
+ 'SELECT': tokens.Keyword.DML,
+ 'INSERT': tokens.Keyword.DML,
+ 'DELETE': tokens.Keyword.DML,
+ 'UPDATE': tokens.Keyword.DML,
+ 'REPLACE': tokens.Keyword.DML,
+ 'MERGE': tokens.Keyword.DML,
+ 'DROP': tokens.Keyword.DDL,
+ 'CREATE': tokens.Keyword.DDL,
+ 'ALTER': tokens.Keyword.DDL,
+
+ 'WHERE': tokens.Keyword,
+ 'FROM': tokens.Keyword,
+ 'INNER': tokens.Keyword,
+ 'JOIN': tokens.Keyword,
+ 'STRAIGHT_JOIN': tokens.Keyword,
+ 'AND': tokens.Keyword,
+ 'OR': tokens.Keyword,
+ 'LIKE': tokens.Keyword,
+ 'ON': tokens.Keyword,
+ 'IN': tokens.Keyword,
+ 'SET': tokens.Keyword,
+
+ 'BY': tokens.Keyword,
+ 'GROUP': tokens.Keyword,
+ 'ORDER': tokens.Keyword,
+ 'LEFT': tokens.Keyword,
+ 'OUTER': tokens.Keyword,
+ 'FULL': tokens.Keyword,
+
+ 'IF': tokens.Keyword,
+ 'END': tokens.Keyword,
+ 'THEN': tokens.Keyword,
+ 'LOOP': tokens.Keyword,
+ 'AS': tokens.Keyword,
+ 'ELSE': tokens.Keyword,
+ 'FOR': tokens.Keyword,
+
+ 'CASE': tokens.Keyword,
+ 'WHEN': tokens.Keyword,
+ 'MIN': tokens.Keyword,
+ 'MAX': tokens.Keyword,
+ 'DISTINCT': tokens.Keyword,
+}
http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.19/sqlparse/lexer.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/lexer.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/lexer.py
new file mode 100644
index 0000000..fd29f5c
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/lexer.py
@@ -0,0 +1,362 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+"""SQL Lexer"""
+
+# This code is based on the SqlLexer in pygments.
+# http://pygments.org/
+# It's separated from the rest of pygments to increase performance
+# and to allow some customizations.
+
+import re
+import sys
+
+from sqlparse import tokens
+from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON
+from cStringIO import StringIO
+
+
+class include(str):
+ pass
+
+
+class combined(tuple):
+ """Indicates a state combined from multiple states."""
+
+ def __new__(cls, *args):
+ return tuple.__new__(cls, args)
+
+ def __init__(self, *args):
+ # tuple.__init__ doesn't do anything
+ pass
+
+
+def is_keyword(value):
+ test = value.upper()
+ return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value
+
+
+def apply_filters(stream, filters, lexer=None):
+ """
+ Use this method to apply an iterable of filters to
+ a stream. If lexer is given it's forwarded to the
+ filter, otherwise the filter receives `None`.
+ """
+
+ def _apply(filter_, stream):
+ for token in filter_.filter(lexer, stream):
+ yield token
+
+ for filter_ in filters:
+ stream = _apply(filter_, stream)
+ return stream
+
+
+class LexerMeta(type):
+ """
+ Metaclass for Lexer, creates the self._tokens attribute from
+ self.tokens on the first instantiation.
+ """
+
+ def _process_state(cls, unprocessed, processed, state):
+ assert type(state) is str, "wrong state name %r" % state
+ assert state[0] != '#', "invalid state name %r" % state
+ if state in processed:
+ return processed[state]
+ tokenlist = processed[state] = []
+ rflags = cls.flags
+ for tdef in unprocessed[state]:
+ if isinstance(tdef, include):
+ # it's a state reference
+ assert tdef != state, "circular state reference %r" % state
+ tokenlist.extend(cls._process_state(
+ unprocessed, processed, str(tdef)))
+ continue
+
+ assert type(tdef) is tuple, "wrong rule def %r" % tdef
+
+ try:
+ rex = re.compile(tdef[0], rflags).match
+ except Exception, err:
+ raise ValueError(("uncompilable regex %r in state"
+ " %r of %r: %s"
+ % (tdef[0], state, cls, err)))
+
+ assert type(tdef[1]) is tokens._TokenType or callable(tdef[1]), \
+ ('token type must be simple type or callable, not %r'
+ % (tdef[1],))
+
+ if len(tdef) == 2:
+ new_state = None
+ else:
+ tdef2 = tdef[2]
+ if isinstance(tdef2, str):
+ # an existing state
+ if tdef2 == '#pop':
+ new_state = -1
+ elif tdef2 in unprocessed:
+ new_state = (tdef2,)
+ elif tdef2 == '#push':
+ new_state = tdef2
+ elif tdef2[:5] == '#pop:':
+ new_state = -int(tdef2[5:])
+ else:
+ assert False, 'unknown new state %r' % tdef2
+ elif isinstance(tdef2, combined):
+ # combine a new state from existing ones
+ new_state = '_tmp_%d' % cls._tmpname
+ cls._tmpname += 1
+ itokens = []
+ for istate in tdef2:
+ assert istate != state, \
+ 'circular state ref %r' % istate
+ itokens.extend(cls._process_state(unprocessed,
+ processed, istate))
+ processed[new_state] = itokens
+ new_state = (new_state,)
+ elif isinstance(tdef2, tuple):
+ # push more than one state
+ for state in tdef2:
+ assert (state in unprocessed or
+ state in ('#pop', '#push')), \
+ 'unknown new state ' + state
+ new_state = tdef2
+ else:
+ assert False, 'unknown new state def %r' % tdef2
+ tokenlist.append((rex, tdef[1], new_state))
+ return tokenlist
+
+ def process_tokendef(cls):
+ cls._all_tokens = {}
+ cls._tmpname = 0
+ processed = cls._all_tokens[cls.__name__] = {}
+ #tokendefs = tokendefs or cls.tokens[name]
+ for state in cls.tokens.keys():
+ cls._process_state(cls.tokens, processed, state)
+ return processed
+
+ def __call__(cls, *args, **kwds):
+ if not hasattr(cls, '_tokens'):
+ cls._all_tokens = {}
+ cls._tmpname = 0
+ if hasattr(cls, 'token_variants') and cls.token_variants:
+ # don't process yet
+ pass
+ else:
+ cls._tokens = cls.process_tokendef()
+
+ return type.__call__(cls, *args, **kwds)
+
+
+class Lexer(object):
+
+ __metaclass__ = LexerMeta
+
+ encoding = 'utf-8'
+ stripall = False
+ stripnl = False
+ tabsize = 0
+ flags = re.IGNORECASE | re.UNICODE
+
+ tokens = {
+ 'root': [
+ (r'(--|# ).*?(\r\n|\r|\n)', tokens.Comment.Single),
+ # $ matches *before* newline, therefore we have two patterns
+ # to match Comment.Single
+ (r'(--|# ).*?$', tokens.Comment.Single),
+ (r'(\r\n|\r|\n)', tokens.Newline),
+ (r'\s+', tokens.Whitespace),
+ (r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
+ (r':=', tokens.Assignment),
+ (r'::', tokens.Punctuation),
+ (r'[*]', tokens.Wildcard),
+ (r'CASE\b', tokens.Keyword), # extended CASE(foo)
+ (r"`(``|[^`])*`", tokens.Name),
+ (r"´(´´|[^´])*´", tokens.Name),
+ (r'\$([^\W\d]\w*)?\$', tokens.Name.Builtin),
+ (r'\?{1}', tokens.Name.Placeholder),
+ (r'%\(\w+\)s', tokens.Name.Placeholder),
+ (r'%s', tokens.Name.Placeholder),
+ (r'[$:?]\w+', tokens.Name.Placeholder),
+ # FIXME(andi): VALUES shouldn't be listed here
+ # see https://github.com/andialbrecht/sqlparse/pull/64
+ (r'VALUES', tokens.Keyword),
+ (r'(@|##|#)[^\W\d_]\w+', tokens.Name),
+ # IN is special, it may be followed by a parenthesis, but
+ # is never a functino, see issue183
+ (r'in\b(?=[ (])?', tokens.Keyword),
+ (r'[^\W\d_]\w*(?=[.(])', tokens.Name), # see issue39
+ (r'[-]?0x[0-9a-fA-F]+', tokens.Number.Hexadecimal),
+ (r'[-]?[0-9]*(\.[0-9]+)?[eE][-]?[0-9]+', tokens.Number.Float),
+ (r'[-]?[0-9]*\.[0-9]+', tokens.Number.Float),
+ (r'[-]?[0-9]+', tokens.Number.Integer),
+ (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single),
+ # not a real string literal in ANSI SQL:
+ (r'(""|".*?[^\\]")', tokens.String.Symbol),
+ # sqlite names can be escaped with [square brackets]. left bracket
+ # cannot be preceded by word character or a right bracket --
+ # otherwise it's probably an array index
+ (r'(?<![\w\])])(\[[^\]]+\])', tokens.Name),
+ (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
+ (r'END(\s+IF|\s+LOOP)?\b', tokens.Keyword),
+ (r'NOT NULL\b', tokens.Keyword),
+ (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
+ (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
+ (r'(?<=\.)[^\W\d_]\w*', tokens.Name),
+ (r'[^\W\d]\w*', is_keyword),
+ (r'[;:()\[\],\.]', tokens.Punctuation),
+ (r'[<>=~!]+', tokens.Operator.Comparison),
+ (r'[+/@#%^&|`?^-]+', tokens.Operator),
+ ],
+ 'multiline-comments': [
+ (r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
+ (r'\*/', tokens.Comment.Multiline, '#pop'),
+ (r'[^/\*]+', tokens.Comment.Multiline),
+ (r'[/*]', tokens.Comment.Multiline),
+ ]}
+
+ def __init__(self):
+ self.filters = []
+
+ def add_filter(self, filter_, **options):
+ from sqlparse.filters import Filter
+ if not isinstance(filter_, Filter):
+ filter_ = filter_(**options)
+ self.filters.append(filter_)
+
+ def _decode(self, text):
+ if sys.version_info[0] == 3:
+ if isinstance(text, str):
+ return text
+ if self.encoding == 'guess':
+ try:
+ text = text.decode('utf-8')
+ if text.startswith(u'\ufeff'):
+ text = text[len(u'\ufeff'):]
+ except UnicodeDecodeError:
+ text = text.decode('latin1')
+ else:
+ try:
+ text = text.decode(self.encoding)
+ except UnicodeDecodeError:
+ text = text.decode('unicode-escape')
+
+ if self.tabsize > 0:
+ text = text.expandtabs(self.tabsize)
+ return text
+
+ def get_tokens(self, text, unfiltered=False):
+ """
+ Return an iterable of (tokentype, value) pairs generated from
+ `text`. If `unfiltered` is set to `True`, the filtering mechanism
+ is bypassed even if filters are defined.
+
+ Also preprocess the text, i.e. expand tabs and strip it if
+ wanted and applies registered filters.
+ """
+ if isinstance(text, basestring):
+ if self.stripall:
+ text = text.strip()
+ elif self.stripnl:
+ text = text.strip('\n')
+
+ if sys.version_info[0] < 3 and isinstance(text, unicode):
+ text = StringIO(text.encode('utf-8'))
+ self.encoding = 'utf-8'
+ else:
+ text = StringIO(text)
+
+ def streamer():
+ for i, t, v in self.get_tokens_unprocessed(text):
+ yield t, v
+ stream = streamer()
+ if not unfiltered:
+ stream = apply_filters(stream, self.filters, self)
+ return stream
+
+ def get_tokens_unprocessed(self, stream, stack=('root',)):
+ """
+ Split ``text`` into (tokentype, text) pairs.
+
+ ``stack`` is the inital stack (default: ``['root']``)
+ """
+ pos = 0
+ tokendefs = self._tokens # see __call__, pylint:disable=E1101
+ statestack = list(stack)
+ statetokens = tokendefs[statestack[-1]]
+ known_names = {}
+
+ text = stream.read()
+ text = self._decode(text)
+
+ while 1:
+ for rexmatch, action, new_state in statetokens:
+ m = rexmatch(text, pos)
+ if m:
+ value = m.group()
+ if value in known_names:
+ yield pos, known_names[value], value
+ elif type(action) is tokens._TokenType:
+ yield pos, action, value
+ elif hasattr(action, '__call__'):
+ ttype, value = action(value)
+ known_names[value] = ttype
+ yield pos, ttype, value
+ else:
+ for item in action(self, m):
+ yield item
+ pos = m.end()
+ if new_state is not None:
+ # state transition
+ if isinstance(new_state, tuple):
+ for state in new_state:
+ if state == '#pop':
+ statestack.pop()
+ elif state == '#push':
+ statestack.append(statestack[-1])
+ elif (
+ # Ugly hack - multiline-comments
+ # are not stackable
+ state != 'multiline-comments'
+ or not statestack
+ or statestack[-1] != 'multiline-comments'
+ ):
+ statestack.append(state)
+ elif isinstance(new_state, int):
+ # pop
+ del statestack[new_state:]
+ elif new_state == '#push':
+ statestack.append(statestack[-1])
+ else:
+ assert False, "wrong state def: %r" % new_state
+ statetokens = tokendefs[statestack[-1]]
+ break
+ else:
+ try:
+ if text[pos] == '\n':
+ # at EOL, reset state to "root"
+ pos += 1
+ statestack = ['root']
+ statetokens = tokendefs['root']
+ yield pos, tokens.Text, u'\n'
+ continue
+ yield pos, tokens.Error, text[pos]
+ pos += 1
+ except IndexError:
+ break
+
+
+def tokenize(sql, encoding=None):
+ """Tokenize sql.
+
+ Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
+ of ``(token type, value)`` items.
+ """
+ lexer = Lexer()
+ if encoding is not None:
+ lexer.encoding = encoding
+ return lexer.get_tokens(sql)
http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py
new file mode 100644
index 0000000..34dad19
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2011 Jesus Leganes "piranna", piranna@gmail.com
+#
+# This module is part of python-sqlparse and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
+
+from types import GeneratorType
+
+
+class Pipeline(list):
+ """Pipeline to process filters sequentially"""
+
+ def __call__(self, stream):
+ """Run the pipeline
+
+ Return a static (non generator) version of the result
+ """
+
+ # Run the stream over all the filters on the pipeline
+ for filter in self:
+ # Functions and callable objects (objects with '__call__' method)
+ if callable(filter):
+ stream = filter(stream)
+
+ # Normal filters (objects with 'process' method)
+ else:
+ stream = filter.process(None, stream)
+
+ # If last filter return a generator, staticalize it inside a list
+ if isinstance(stream, GeneratorType):
+ return list(stream)
+ return stream
http://git-wip-us.apache.org/repos/asf/impala/blob/49413d9c/shell/ext-py/sqlparse-0.1.19/sqlparse/sql.py
----------------------------------------------------------------------
diff --git a/shell/ext-py/sqlparse-0.1.19/sqlparse/sql.py b/shell/ext-py/sqlparse-0.1.19/sqlparse/sql.py
new file mode 100644
index 0000000..7325712
--- /dev/null
+++ b/shell/ext-py/sqlparse-0.1.19/sqlparse/sql.py
@@ -0,0 +1,684 @@
+# -*- coding: utf-8 -*-
+
+"""This module contains classes representing syntactical elements of SQL."""
+
+import re
+import sys
+
+from sqlparse import tokens as T
+
+
+class Token(object):
+ """Base class for all other classes in this module.
+
+ It represents a single token and has two instance attributes:
+ ``value`` is the unchange value of the token and ``ttype`` is
+ the type of the token.
+ """
+
+ __slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword')
+
+ def __init__(self, ttype, value):
+ self.value = value
+ if ttype in T.Keyword:
+ self.normalized = value.upper()
+ else:
+ self.normalized = value
+ self.ttype = ttype
+ self.is_keyword = ttype in T.Keyword
+ self.parent = None
+
+ def __str__(self):
+ if sys.version_info[0] == 3:
+ return self.value
+ else:
+ return unicode(self).encode('utf-8')
+
+ def __repr__(self):
+ short = self._get_repr_value()
+ if sys.version_info[0] < 3:
+ short = short.encode('utf-8')
+ return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
+ short, id(self))
+
+ def __unicode__(self):
+ """Returns a unicode representation of this object."""
+ return self.value or ''
+
+ def to_unicode(self):
+ """Returns a unicode representation of this object.
+
+ .. deprecated:: 0.1.5
+ Use ``unicode(token)`` (for Python 3: ``str(token)``) instead.
+ """
+ return unicode(self)
+
+ def _get_repr_name(self):
+ return str(self.ttype).split('.')[-1]
+
+ def _get_repr_value(self):
+ raw = unicode(self)
+ if len(raw) > 7:
+ raw = raw[:6] + u'...'
+ return re.sub('\s+', ' ', raw)
+
+ def flatten(self):
+ """Resolve subgroups."""
+ yield self
+
+ def match(self, ttype, values, regex=False):
+ """Checks whether the token matches the given arguments.
+
+ *ttype* is a token type. If this token doesn't match the given token
+ type.
+ *values* is a list of possible values for this token. The values
+ are OR'ed together so if only one of the values matches ``True``
+ is returned. Except for keyword tokens the comparison is
+ case-sensitive. For convenience it's ok to pass in a single string.
+ If *regex* is ``True`` (default is ``False``) the given values are
+ treated as regular expressions.
+ """
+ type_matched = self.ttype is ttype
+ if not type_matched or values is None:
+ return type_matched
+
+ if regex:
+ if isinstance(values, basestring):
+ values = set([values])
+
+ if self.ttype is T.Keyword:
+ values = set(re.compile(v, re.IGNORECASE) for v in values)
+ else:
+ values = set(re.compile(v) for v in values)
+
+ for pattern in values:
+ if pattern.search(self.value):
+ return True
+ return False
+
+ if isinstance(values, basestring):
+ if self.is_keyword:
+ return values.upper() == self.normalized
+ return values == self.value
+
+ if self.is_keyword:
+ for v in values:
+ if v.upper() == self.normalized:
+ return True
+ return False
+
+ return self.value in values
+
+ def is_group(self):
+ """Returns ``True`` if this object has children."""
+ return False
+
+ def is_whitespace(self):
+ """Return ``True`` if this token is a whitespace token."""
+ return self.ttype and self.ttype in T.Whitespace
+
+ def within(self, group_cls):
+ """Returns ``True`` if this token is within *group_cls*.
+
+ Use this method for example to check if an identifier is within
+ a function: ``t.within(sql.Function)``.
+ """
+ parent = self.parent
+ while parent:
+ if isinstance(parent, group_cls):
+ return True
+ parent = parent.parent
+ return False
+
+ def is_child_of(self, other):
+ """Returns ``True`` if this token is a direct child of *other*."""
+ return self.parent == other
+
+ def has_ancestor(self, other):
+ """Returns ``True`` if *other* is in this tokens ancestry."""
+ parent = self.parent
+ while parent:
+ if parent == other:
+ return True
+ parent = parent.parent
+ return False
+
+
+class TokenList(Token):
+ """A group of tokens.
+
+ It has an additional instance attribute ``tokens`` which holds a
+ list of child-tokens.
+ """
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def __init__(self, tokens=None):
+ if tokens is None:
+ tokens = []
+ self.tokens = tokens
+ Token.__init__(self, None, self._to_string())
+
+ def __unicode__(self):
+ return self._to_string()
+
+ def __str__(self):
+ str_ = self._to_string()
+ if sys.version_info[0] < 2:
+ str_ = str_.encode('utf-8')
+ return str_
+
+ def _to_string(self):
+ if sys.version_info[0] == 3:
+ return ''.join(x.value for x in self.flatten())
+ else:
+ return ''.join(unicode(x) for x in self.flatten())
+
+ def _get_repr_name(self):
+ return self.__class__.__name__
+
+ def _pprint_tree(self, max_depth=None, depth=0):
+ """Pretty-print the object tree."""
+ indent = ' ' * (depth * 2)
+ for idx, token in enumerate(self.tokens):
+ if token.is_group():
+ pre = ' +-'
+ else:
+ pre = ' | '
+ print '%s%s%d %s \'%s\'' % (indent, pre, idx,
+ token._get_repr_name(),
+ token._get_repr_value())
+ if (token.is_group() and (max_depth is None or depth < max_depth)):
+ token._pprint_tree(max_depth, depth + 1)
+
+ def _remove_quotes(self, val):
+ """Helper that removes surrounding quotes from strings."""
+ if not val:
+ return val
+ if val[0] in ('"', '\'') and val[-1] == val[0]:
+ val = val[1:-1]
+ return val
+
+ def get_token_at_offset(self, offset):
+ """Returns the token that is on position offset."""
+ idx = 0
+ for token in self.flatten():
+ end = idx + len(token.value)
+ if idx <= offset <= end:
+ return token
+ idx = end
+
+ def flatten(self):
+ """Generator yielding ungrouped tokens.
+
+ This method is recursively called for all child tokens.
+ """
+ for token in self.tokens:
+ if isinstance(token, TokenList):
+ for item in token.flatten():
+ yield item
+ else:
+ yield token
+
+# def __iter__(self):
+# return self
+#
+# def next(self):
+# for token in self.tokens:
+# yield token
+
+ def is_group(self):
+ return True
+
+ def get_sublists(self):
+# return [x for x in self.tokens if isinstance(x, TokenList)]
+ for x in self.tokens:
+ if isinstance(x, TokenList):
+ yield x
+
+ @property
+ def _groupable_tokens(self):
+ return self.tokens
+
+ def token_first(self, ignore_whitespace=True, ignore_comments=False):
+ """Returns the first child token.
+
+ If *ignore_whitespace* is ``True`` (the default), whitespace
+ tokens are ignored.
+
+ if *ignore_comments* is ``True`` (default: ``False``), comments are
+ ignored too.
+ """
+ for token in self.tokens:
+ if ignore_whitespace and token.is_whitespace():
+ continue
+ if ignore_comments and isinstance(token, Comment):
+ continue
+ return token
+
+ def token_next_by_instance(self, idx, clss, end=None):
+ """Returns the next token matching a class.
+
+ *idx* is where to start searching in the list of child tokens.
+ *clss* is a list of classes the token should be an instance of.
+
+ If no matching token can be found ``None`` is returned.
+ """
+ if not isinstance(clss, (list, tuple)):
+ clss = (clss,)
+
+ for token in self.tokens[idx:end]:
+ if isinstance(token, clss):
+ return token
+
+ def token_next_by_type(self, idx, ttypes):
+ """Returns next matching token by it's token type."""
+ if not isinstance(ttypes, (list, tuple)):
+ ttypes = [ttypes]
+
+ for token in self.tokens[idx:]:
+ if token.ttype in ttypes:
+ return token
+
+ def token_next_match(self, idx, ttype, value, regex=False):
+ """Returns next token where it's ``match`` method returns ``True``."""
+ if not isinstance(idx, int):
+ idx = self.token_index(idx)
+
+ for n in xrange(idx, len(self.tokens)):
+ token = self.tokens[n]
+ if token.match(ttype, value, regex):
+ return token
+
+ def token_not_matching(self, idx, funcs):
+ for token in self.tokens[idx:]:
+ passed = False
+ for func in funcs:
+ if func(token):
+ passed = True
+ break
+
+ if not passed:
+ return token
+
+ def token_matching(self, idx, funcs):
+ for token in self.tokens[idx:]:
+ for func in funcs:
+ if func(token):
+ return token
+
+ def token_prev(self, idx, skip_ws=True):
+ """Returns the previous token relative to *idx*.
+
+ If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
+ ``None`` is returned if there's no previous token.
+ """
+ if idx is None:
+ return None
+
+ if not isinstance(idx, int):
+ idx = self.token_index(idx)
+
+ while idx:
+ idx -= 1
+ if self.tokens[idx].is_whitespace() and skip_ws:
+ continue
+ return self.tokens[idx]
+
+ def token_next(self, idx, skip_ws=True):
+ """Returns the next token relative to *idx*.
+
+ If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
+ ``None`` is returned if there's no next token.
+ """
+ if idx is None:
+ return None
+
+ if not isinstance(idx, int):
+ idx = self.token_index(idx)
+
+ while idx < len(self.tokens) - 1:
+ idx += 1
+ if self.tokens[idx].is_whitespace() and skip_ws:
+ continue
+ return self.tokens[idx]
+
+ def token_index(self, token, start=0):
+ """Return list index of token."""
+ if start > 0:
+ # Performing `index` manually is much faster when starting in the middle
+ # of the list of tokens and expecting to find the token near to the starting
+ # index.
+ for i in xrange(start, len(self.tokens)):
+ if self.tokens[i] == token:
+ return i
+ return -1
+ return self.tokens.index(token)
+
+ def tokens_between(self, start, end, exclude_end=False):
+ """Return all tokens between (and including) start and end.
+
+ If *exclude_end* is ``True`` (default is ``False``) the end token
+ is included too.
+ """
+ # FIXME(andi): rename exclude_end to inlcude_end
+ if exclude_end:
+ offset = 0
+ else:
+ offset = 1
+ end_idx = self.token_index(end) + offset
+ start_idx = self.token_index(start)
+ return self.tokens[start_idx:end_idx]
+
+ def group_tokens(self, grp_cls, tokens, ignore_ws=False):
+ """Replace tokens by an instance of *grp_cls*."""
+ idx = self.token_index(tokens[0])
+ if ignore_ws:
+ while tokens and tokens[-1].is_whitespace():
+ tokens = tokens[:-1]
+ for t in tokens:
+ self.tokens.remove(t)
+ grp = grp_cls(tokens)
+ for token in tokens:
+ token.parent = grp
+ grp.parent = self
+ self.tokens.insert(idx, grp)
+ return grp
+
+ def insert_before(self, where, token):
+ """Inserts *token* before *where*."""
+ self.tokens.insert(self.token_index(where), token)
+
+ def insert_after(self, where, token, skip_ws=True):
+ """Inserts *token* after *where*."""
+ next_token = self.token_next(where, skip_ws=skip_ws)
+ if next_token is None:
+ self.tokens.append(token)
+ else:
+ self.tokens.insert(self.token_index(next_token), token)
+
+ def has_alias(self):
+ """Returns ``True`` if an alias is present."""
+ return self.get_alias() is not None
+
+ def get_alias(self):
+ """Returns the alias for this identifier or ``None``."""
+
+ # "name AS alias"
+ kw = self.token_next_match(0, T.Keyword, 'AS')
+ if kw is not None:
+ return self._get_first_name(kw, keywords=True)
+
+ # "name alias" or "complicated column expression alias"
+ if len(self.tokens) > 2 \
+ and self.token_next_by_type(0, T.Whitespace) is not None:
+ return self._get_first_name(reverse=True)
+
+ return None
+
+ def get_name(self):
+ """Returns the name of this identifier.
+
+ This is either it's alias or it's real name. The returned valued can
+ be considered as the name under which the object corresponding to
+ this identifier is known within the current statement.
+ """
+ alias = self.get_alias()
+ if alias is not None:
+ return alias
+ return self.get_real_name()
+
+ def get_real_name(self):
+ """Returns the real name (object name) of this identifier."""
+ # a.b
+ dot = self.token_next_match(0, T.Punctuation, '.')
+ if dot is not None:
+ return self._get_first_name(self.token_index(dot))
+
+ return self._get_first_name()
+
+ def get_parent_name(self):
+ """Return name of the parent object if any.
+
+ A parent object is identified by the first occuring dot.
+ """
+ dot = self.token_next_match(0, T.Punctuation, '.')
+ if dot is None:
+ return None
+ prev_ = self.token_prev(self.token_index(dot))
+ if prev_ is None: # something must be verry wrong here..
+ return None
+ return self._remove_quotes(prev_.value)
+
+ def _get_first_name(self, idx=None, reverse=False, keywords=False):
+ """Returns the name of the first token with a name"""
+
+ if idx and not isinstance(idx, int):
+ idx = self.token_index(idx) + 1
+
+ tokens = self.tokens[idx:] if idx else self.tokens
+ tokens = reversed(tokens) if reverse else tokens
+ types = [T.Name, T.Wildcard, T.String.Symbol]
+
+ if keywords:
+ types.append(T.Keyword)
+
+ for tok in tokens:
+ if tok.ttype in types:
+ return self._remove_quotes(tok.value)
+ elif isinstance(tok, Identifier) or isinstance(tok, Function):
+ return tok.get_name()
+ return None
+
+class Statement(TokenList):
+ """Represents a SQL statement."""
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def get_type(self):
+ """Returns the type of a statement.
+
+ The returned value is a string holding an upper-cased reprint of
+ the first DML or DDL keyword. If the first token in this group
+ isn't a DML or DDL keyword "UNKNOWN" is returned.
+
+ Whitespaces and comments at the beginning of the statement
+ are ignored.
+ """
+ first_token = self.token_first(ignore_comments=True)
+ if first_token is None:
+ # An "empty" statement that either has not tokens at all
+ # or only whitespace tokens.
+ return 'UNKNOWN'
+
+ elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
+ return first_token.normalized
+
+ return 'UNKNOWN'
+
+
+class Identifier(TokenList):
+ """Represents an identifier.
+
+ Identifiers may have aliases or typecasts.
+ """
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def is_wildcard(self):
+ """Return ``True`` if this identifier contains a wildcard."""
+ token = self.token_next_by_type(0, T.Wildcard)
+ return token is not None
+
+ def get_typecast(self):
+ """Returns the typecast or ``None`` of this object as a string."""
+ marker = self.token_next_match(0, T.Punctuation, '::')
+ if marker is None:
+ return None
+ next_ = self.token_next(self.token_index(marker), False)
+ if next_ is None:
+ return None
+ return unicode(next_)
+
+ def get_ordering(self):
+ """Returns the ordering or ``None`` as uppercase string."""
+ ordering = self.token_next_by_type(0, T.Keyword.Order)
+ if ordering is None:
+ return None
+ return ordering.value.upper()
+
+ def get_array_indices(self):
+ """Returns an iterator of index token lists"""
+
+ for tok in self.tokens:
+ if isinstance(tok, SquareBrackets):
+ # Use [1:-1] index to discard the square brackets
+ yield tok.tokens[1:-1]
+
+
+class IdentifierList(TokenList):
+ """A list of :class:`~sqlparse.sql.Identifier`\'s."""
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def get_identifiers(self):
+ """Returns the identifiers.
+
+ Whitespaces and punctuations are not included in this generator.
+ """
+ for x in self.tokens:
+ if not x.is_whitespace() and not x.match(T.Punctuation, ','):
+ yield x
+
+
+class Parenthesis(TokenList):
+ """Tokens between parenthesis."""
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ @property
+ def _groupable_tokens(self):
+ return self.tokens[1:-1]
+
+
+class SquareBrackets(TokenList):
+ """Tokens between square brackets"""
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ @property
+ def _groupable_tokens(self):
+ return self.tokens[1:-1]
+
+class Assignment(TokenList):
+ """An assignment like 'var := val;'"""
+ __slots__ = ('value', 'ttype', 'tokens')
+
+
+class If(TokenList):
+ """An 'if' clause with possible 'else if' or 'else' parts."""
+ __slots__ = ('value', 'ttype', 'tokens')
+
+
+class For(TokenList):
+ """A 'FOR' loop."""
+ __slots__ = ('value', 'ttype', 'tokens')
+
+
+class Comparison(TokenList):
+ """A comparison used for example in WHERE clauses."""
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ @property
+ def left(self):
+ return self.tokens[0]
+
+ @property
+ def right(self):
+ return self.tokens[-1]
+
+
+class Comment(TokenList):
+ """A comment."""
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def is_multiline(self):
+ return self.tokens and self.tokens[0].ttype == T.Comment.Multiline
+
+
+class Where(TokenList):
+ """A WHERE clause."""
+ __slots__ = ('value', 'ttype', 'tokens')
+
+
+class Case(TokenList):
+ """A CASE statement with one or more WHEN and possibly an ELSE part."""
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def get_cases(self):
+ """Returns a list of 2-tuples (condition, value).
+
+ If an ELSE exists condition is None.
+ """
+ CONDITION = 1
+ VALUE = 2
+
+ ret = []
+ mode = CONDITION
+
+ for token in self.tokens:
+ # Set mode from the current statement
+ if token.match(T.Keyword, 'CASE'):
+ continue
+
+ elif token.match(T.Keyword, 'WHEN'):
+ ret.append(([], []))
+ mode = CONDITION
+
+ elif token.match(T.Keyword, 'THEN'):
+ mode = VALUE
+
+ elif token.match(T.Keyword, 'ELSE'):
+ ret.append((None, []))
+ mode = VALUE
+
+ elif token.match(T.Keyword, 'END'):
+ mode = None
+
+ # First condition without preceding WHEN
+ if mode and not ret:
+ ret.append(([], []))
+
+ # Append token depending of the current mode
+ if mode == CONDITION:
+ ret[-1][0].append(token)
+
+ elif mode == VALUE:
+ ret[-1][1].append(token)
+
+ # Return cases list
+ return ret
+
+
+class Function(TokenList):
+ """A function or procedure call."""
+
+ __slots__ = ('value', 'ttype', 'tokens')
+
+ def get_parameters(self):
+ """Return a list of parameters."""
+ parenthesis = self.tokens[-1]
+ for t in parenthesis.tokens:
+ if isinstance(t, IdentifierList):
+ return t.get_identifiers()
+ elif isinstance(t, Identifier) or \
+ isinstance(t, Function) or \
+ t.ttype in T.Literal:
+ return [t,]
+ return []
+
+
+class Begin(TokenList):
+ """A BEGIN/END block."""
+
+ __slots__ = ('value', 'ttype', 'tokens')