You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by sl...@apache.org on 2016/06/29 10:37:03 UTC

cassandra git commit: Add support for CQL in pygments (proper syntax highlighting)

Repository: cassandra
Updated Branches:
  refs/heads/trunk 582f9dc08 -> e42352763


Add support for CQL in pygments (proper syntax highlighting)


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e4235276
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e4235276
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e4235276

Branch: refs/heads/trunk
Commit: e42352763089a41d4563d67fe800fd2878da842f
Parents: 582f9dc
Author: Sylvain Lebresne <sy...@datastax.com>
Authored: Wed Jun 29 09:23:38 2016 +0200
Committer: Sylvain Lebresne <sy...@datastax.com>
Committed: Wed Jun 29 12:36:54 2016 +0200

----------------------------------------------------------------------
 doc/source/_util/cql.py        | 267 ++++++++++++++++++++++++++++++++++++
 doc/source/conf.py             |   6 +-
 doc/source/cql/appendices.rst  |   2 +-
 doc/source/cql/changes.rst     |   2 +-
 doc/source/cql/ddl.rst         |   2 +-
 doc/source/cql/definitions.rst |   4 +-
 doc/source/cql/dml.rst         |   4 +-
 doc/source/cql/functions.rst   |  25 ++--
 doc/source/cql/indexes.rst     |   2 +-
 doc/source/cql/json.rst        |   2 +-
 doc/source/cql/mvs.rst         |   4 +-
 doc/source/cql/security.rst    |   7 +-
 doc/source/cql/triggers.rst    |   2 +-
 doc/source/cql/types.rst       |   2 +-
 14 files changed, 307 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/_util/cql.py
----------------------------------------------------------------------
diff --git a/doc/source/_util/cql.py b/doc/source/_util/cql.py
new file mode 100644
index 0000000..b1c8cde
--- /dev/null
+++ b/doc/source/_util/cql.py
@@ -0,0 +1,267 @@
+# -*- coding: utf-8 -*-
+"""
+    CQL pygments lexer
+    ~~~~~~~~~~~~~~~~~~
+
+    Lexer for the Cassandra Query Language (CQL).
+
+    This is heavily inspired from the pygments SQL lexer (and the Postgres one in particular) but adapted to CQL
+    keywords and specificities.
+
+    TODO: This has been hacked quickly, but once it's more tested, we could submit it upstream.
+          In particular, we have alot of keywords whose meaning depends on the context and we could potentially improve
+          their handling. For instance, SET is a keyword, but also a type name (that's why currently we also consider
+          map and list as keywords, not types; we could disambiguate by looking if there is a '<' afterwards). Or things
+          like USERS, which can is used in some documentation example as a table name but is a keyword too (we could
+          only consider it a keyword if after LIST for instance). Similarly, type nanes are not reserved, so they and
+          are sometime used as column identifiers (also, timestamp is both a type and a keyword). I "think" we can
+          somewhat disambiguate through "states", but unclear how far it's worth going.
+
+          We could also add the predefined functions?
+"""
+
+import re
+
+from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
+from pygments.token import Punctuation, Whitespace, Error, \
+    Text, Comment, Operator, Keyword, Name, String, Number, Generic, Literal
+from pygments.lexers import get_lexer_by_name, ClassNotFound
+from pygments.util import iteritems
+
+__all__ = [ 'CQLLexer' ]
+
+language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
+
+KEYWORDS = (
+    'SELECT',
+    'FROM',
+    'AS',
+    'WHERE',
+    'AND',
+    'KEY',
+    'KEYS',
+    'ENTRIES',
+    'FULL',
+    'INSERT',
+    'UPDATE',
+    'WITH',
+    'LIMIT',
+    'PER',
+    'PARTITION',
+    'USING',
+    'USE',
+    'DISTINCT',
+    'COUNT',
+    'SET',
+    'BEGIN',
+    'UNLOGGED',
+    'BATCH',
+    'APPLY',
+    'TRUNCATE',
+    'DELETE',
+    'IN',
+    'CREATE',
+    'KEYSPACE',
+    'SCHEMA',
+    'KEYSPACES',
+    'COLUMNFAMILY',
+    'TABLE',
+    'MATERIALIZED',
+    'VIEW',
+    'INDEX',
+    'CUSTOM',
+    'ON',
+    'TO',
+    'DROP',
+    'PRIMARY',
+    'INTO',
+    'VALUES',
+    'TIMESTAMP',
+    'TTL',
+    'CAST',
+    'ALTER',
+    'RENAME',
+    'ADD',
+    'TYPE',
+    'COMPACT',
+    'STORAGE',
+    'ORDER',
+    'BY',
+    'ASC',
+    'DESC',
+    'ALLOW',
+    'FILTERING',
+    'IF',
+    'IS',
+    'CONTAINS',
+    'GRANT',
+    'ALL',
+    'PERMISSION',
+    'PERMISSIONS',
+    'OF',
+    'REVOKE',
+    'MODIFY',
+    'AUTHORIZE',
+    'DESCRIBE',
+    'EXECUTE',
+    'NORECURSIVE',
+    'MBEAN',
+    'MBEANS',
+    'USER',
+    'USERS',
+    'ROLE',
+    'ROLES',
+    'SUPERUSER',
+    'NOSUPERUSER',
+    'PASSWORD',
+    'LOGIN',
+    'NOLOGIN',
+    'OPTIONS',
+    'CLUSTERING',
+    'TOKEN',
+    'WRITETIME',
+    'NULL',
+    'NOT',
+    'EXISTS',
+    'MAP',
+    'LIST',
+    'NAN',
+    'INFINITY',
+    'TUPLE',
+    'TRIGGER',
+    'STATIC',
+    'FROZEN',
+    'FUNCTION',
+    'FUNCTIONS',
+    'AGGREGATE',
+    'SFUNC',
+    'STYPE',
+    'FINALFUNC',
+    'INITCOND',
+    'RETURNS',
+    'CALLED',
+    'INPUT',
+    'LANGUAGE',
+    'OR',
+    'REPLACE',
+    'JSON',
+    'LIKE',
+)
+
+DATATYPES = (
+    'ASCII',
+    'BIGINT',
+    'BLOB',
+    'BOOLEAN',
+    'COUNTER',
+    'DATE',
+    'DECIMAL',
+    'DOUBLE',
+    'EMPTY',
+    'FLOAT',
+    'INET',
+    'INT',
+    'SMALLINT',
+    'TEXT',
+    'TIME',
+    'TIMESTAMP',
+    'TIMEUUID',
+    'TINYINT',
+    'UUID',
+    'VARCHAR',
+    'VARINT',
+)
+
+def language_callback(lexer, match):
+    """Parse the content of a $-string using a lexer
+
+    The lexer is chosen looking for a nearby LANGUAGE or assumed as
+    java if no LANGUAGE has been found.
+    """
+    l = None
+    m = language_re.match(lexer.text[max(0, match.start()-100):match.start()])
+    if m is not None:
+        l = lexer._get_lexer(m.group(1))
+    else:
+        l = lexer._get_lexer('java')
+
+    # 1 = $, 2 = delimiter, 3 = $
+    yield (match.start(1), String, match.group(1))
+    yield (match.start(2), String.Delimiter, match.group(2))
+    yield (match.start(3), String, match.group(3))
+    # 4 = string contents
+    if l:
+        for x in l.get_tokens_unprocessed(match.group(4)):
+            yield x
+    else:
+        yield (match.start(4), String, match.group(4))
+    # 5 = $, 6 = delimiter, 7 = $
+    yield (match.start(5), String, match.group(5))
+    yield (match.start(6), String.Delimiter, match.group(6))
+    yield (match.start(7), String, match.group(7))
+
+
+class CQLLexer(RegexLexer):
+    """
+    Lexer for the Cassandra Query Language.
+    """
+
+    name = 'Cassandra Query Language'
+    aliases = ['cql']
+    filenames = ['*.cql']
+    mimetypes = ['text/x-cql']
+
+    flags = re.IGNORECASE
+    tokens = {
+        'root': [
+            (r'\s+', Text),
+            (r'--.*\n?', Comment.Single),
+            (r'//.*\n?', Comment.Single),
+            (r'/\*', Comment.Multiline, 'multiline-comments'),
+            (r'(' + '|'.join(s.replace(" ", "\s+")
+                             for s in DATATYPES)
+             + r')\b', Name.Builtin),
+            (words(KEYWORDS, suffix=r'\b'), Keyword),
+            (r'[+*/<>=~!@#%^&|`?-]+', Operator),
+            (r'\$\d+', Name.Variable),
+
+            # Using Number instead of the more accurate Literal because the latter don't seem to e highlighted in most
+            # styles
+            (r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}', Number), # UUIDs
+            (r'0x[0-9a-fA-F]+', Number), # Blobs
+
+            (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
+            (r'[0-9]+', Number.Integer),
+            (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
+            # quoted identifier
+            (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
+            (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
+            (r'[a-z_]\w*', Name),
+            (r'[;:()\[\]{},.]', Punctuation),
+        ],
+        'multiline-comments': [
+            (r'/\*', Comment.Multiline, 'multiline-comments'),
+            (r'\*/', Comment.Multiline, '#pop'),
+            (r'[^/*]+', Comment.Multiline),
+            (r'[/*]', Comment.Multiline)
+        ],
+        'string': [
+            (r"[^']+", String.Single),
+            (r"''", String.Single),
+            (r"'", String.Single, '#pop'),
+        ],
+        'quoted-ident': [
+            (r'[^"]+', String.Name),
+            (r'""', String.Name),
+            (r'"', String.Name, '#pop'),
+        ],
+    }
+
+    def get_tokens_unprocessed(self, text, *args):
+        # Have a copy of the entire text to be used by `language_callback`.
+        self.text = text
+        for x in RegexLexer.get_tokens_unprocessed(self, text, *args):
+            yield x
+
+    def _get_lexer(self, lang):
+        return get_lexer_by_name(lang, **self.options)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/conf.py
----------------------------------------------------------------------
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 2b36353..63697aa 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -21,7 +21,7 @@
 #
 # This file is execfile()d with the current directory set to its containing
 # dir.
-import re
+import re, sys, os
 
 # Finds out the version (so we don't have to manually edit that file every
 # time we change the version)
@@ -32,6 +32,10 @@ with open(cassandra_build_file) as f:
         raise RuntimeException("Problem finding version in build.xml file, this shouldn't happen.")
     cassandra_version = m.group(1)
 
+def setup(sphinx):
+    sys.path.insert(0, os.path.abspath('./source/_util'))
+    from cql import CQLLexer
+    sphinx.add_lexer("cql", CQLLexer())
 
 
 # -- General configuration ------------------------------------------------

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/appendices.rst
----------------------------------------------------------------------
diff --git a/doc/source/cql/appendices.rst b/doc/source/cql/appendices.rst
index c4bb839..8c63a32 100644
--- a/doc/source/cql/appendices.rst
+++ b/doc/source/cql/appendices.rst
@@ -14,7 +14,7 @@
 .. See the License for the specific language governing permissions and
 .. limitations under the License.
 
-.. highlight:: sql
+.. highlight:: cql
 
 Appendices
 ----------

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/changes.rst
----------------------------------------------------------------------
diff --git a/doc/source/cql/changes.rst b/doc/source/cql/changes.rst
index 263df13..d9aea85 100644
--- a/doc/source/cql/changes.rst
+++ b/doc/source/cql/changes.rst
@@ -14,7 +14,7 @@
 .. See the License for the specific language governing permissions and
 .. limitations under the License.
 
-.. highlight:: sql
+.. highlight:: cql
 
 Changes
 -------

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/ddl.rst
----------------------------------------------------------------------
diff --git a/doc/source/cql/ddl.rst b/doc/source/cql/ddl.rst
index 7f3431a..029c1cb 100644
--- a/doc/source/cql/ddl.rst
+++ b/doc/source/cql/ddl.rst
@@ -14,7 +14,7 @@
 .. See the License for the specific language governing permissions and
 .. limitations under the License.
 
-.. highlight:: sql
+.. highlight:: cql
 
 .. _data-definition:
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/definitions.rst
----------------------------------------------------------------------
diff --git a/doc/source/cql/definitions.rst b/doc/source/cql/definitions.rst
index 6c3b522..e54bcd7 100644
--- a/doc/source/cql/definitions.rst
+++ b/doc/source/cql/definitions.rst
@@ -16,6 +16,8 @@
 
 .. _UUID: https://en.wikipedia.org/wiki/Universally_unique_identifier
 
+.. highlight:: cql
+
 Definitions
 -----------
 
@@ -145,7 +147,7 @@ Multi-line comments are also supported through enclosure within ``/*`` and ``*/`
 
 ::
 
-    \u2014 This is a comment
+    -- This is a comment
     // This is a comment too
     /* This is
        a multi-line comment */

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/dml.rst
----------------------------------------------------------------------
diff --git a/doc/source/cql/dml.rst b/doc/source/cql/dml.rst
index 989c0ca..b5f9e9f 100644
--- a/doc/source/cql/dml.rst
+++ b/doc/source/cql/dml.rst
@@ -14,7 +14,7 @@
 .. See the License for the specific language governing permissions and
 .. limitations under the License.
 
-.. highlight:: sql
+.. highlight:: cql
 
 .. _data-manipulation:
 
@@ -202,7 +202,7 @@ The tuple notation may also be used for ``IN`` clauses on clustering columns::
 
     SELECT * FROM posts
      WHERE userid = 'john doe'
-       AND (blog_title, posted_at) IN (('John''s Blog', '2012-01-01), ('Extreme Chess', '2014-06-01'))
+       AND (blog_title, posted_at) IN (('John''s Blog', '2012-01-01'), ('Extreme Chess', '2014-06-01'))
 
 The ``CONTAINS`` operator may only be used on collection columns (lists, sets, and maps). In the case of maps,
 ``CONTAINS`` applies to the map values. The ``CONTAINS KEY`` operator may only be used on map columns and applies to the

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/functions.rst
----------------------------------------------------------------------
diff --git a/doc/source/cql/functions.rst b/doc/source/cql/functions.rst
index efcdf32..47026cd 100644
--- a/doc/source/cql/functions.rst
+++ b/doc/source/cql/functions.rst
@@ -14,7 +14,7 @@
 .. See the License for the specific language governing permissions and
 .. limitations under the License.
 
-.. highlight:: sql
+.. highlight:: cql
 
 .. _cql-functions:
 
@@ -39,6 +39,11 @@ functions.
    evil, but no sandbox is perfect so using user-defined functions is opt-in). See the ``enable_user_defined_functions``
    in ``cassandra.yaml`` to enable them.
 
+A function is identifier by its name:
+
+.. productionlist::
+   function_name: [ `keyspace_name` '.' ] `name`
+
 .. _scalar-functions:
 
 Scalar functions
@@ -236,15 +241,15 @@ User-defined functions can be used in ``SELECT``, ``INSERT`` and ``UPDATE`` stat
 The implicitly available ``udfContext`` field (or binding for script UDFs) provides the necessary functionality to
 create new UDT and tuple values::
 
-    CREATE TYPE custom\_type (txt text, i int);
+    CREATE TYPE custom_type (txt text, i int);
     CREATE FUNCTION fct\_using\_udt ( somearg int )
         RETURNS NULL ON NULL INPUT
-        RETURNS custom\_type
+        RETURNS custom_type
         LANGUAGE java
         AS $$
             UDTValue udt = udfContext.newReturnUDTValue();
-            udt.setString(\u201ctxt\u201d, \u201csome string\u201d);
-            udt.setInt(\u201ci\u201d, 42);
+            udt.setString("txt", "some string");
+            udt.setInt("i", 42);
             return udt;
         $$;
 
@@ -447,25 +452,25 @@ statement)::
         CALLED ON NULL INPUT
         RETURNS tuple
         LANGUAGE java
-        AS '
+        AS $$
             if (val != null) {
                 state.setInt(0, state.getInt(0)+1);
                 state.setLong(1, state.getLong(1)+val.intValue());
             }
             return state;
-        ';
+        $$;
 
     CREATE OR REPLACE FUNCTION averageFinal (state tuple<int,bigint>)
         CALLED ON NULL INPUT
         RETURNS double
         LANGUAGE java
-        AS '
+        AS $$
             double r = 0;
             if (state.getInt(0) == 0) return null;
             r = state.getLong(1);
             r /= state.getInt(0);
-            return Double.valueOf�;
-        ';
+            return Double.valueOf(r);
+        $$;
 
     CREATE OR REPLACE AGGREGATE average(int)
         SFUNC averageState

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/indexes.rst
----------------------------------------------------------------------
diff --git a/doc/source/cql/indexes.rst b/doc/source/cql/indexes.rst
index fbe5827..81fe429 100644
--- a/doc/source/cql/indexes.rst
+++ b/doc/source/cql/indexes.rst
@@ -14,7 +14,7 @@
 .. See the License for the specific language governing permissions and
 .. limitations under the License.
 
-.. highlight:: sql
+.. highlight:: cql
 
 .. _secondary-indexes:
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/json.rst
----------------------------------------------------------------------
diff --git a/doc/source/cql/json.rst b/doc/source/cql/json.rst
index 6482fd6..f83f16c 100644
--- a/doc/source/cql/json.rst
+++ b/doc/source/cql/json.rst
@@ -14,7 +14,7 @@
 .. See the License for the specific language governing permissions and
 .. limitations under the License.
 
-.. highlight:: sql
+.. highlight:: cql
 
 .. _cql-json:
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/mvs.rst
----------------------------------------------------------------------
diff --git a/doc/source/cql/mvs.rst b/doc/source/cql/mvs.rst
index 84c18e0..aabea10 100644
--- a/doc/source/cql/mvs.rst
+++ b/doc/source/cql/mvs.rst
@@ -14,7 +14,7 @@
 .. See the License for the specific language governing permissions and
 .. limitations under the License.
 
-.. highlight:: sql
+.. highlight:: cql
 
 .. _materialized-views:
 
@@ -46,7 +46,7 @@ For instance::
         SELECT * FROM monkeySpecies
         WHERE population IS NOT NULL AND species IS NOT NULL
         PRIMARY KEY (population, species)
-        WITH comment=\u2018Allow query by population instead of species\u2019;
+        WITH comment='Allow query by population instead of species';
 
 The ``CREATE MATERIALIZED VIEW`` statement creates a new materialized view. Each such view is a set of *rows* which
 corresponds to rows which are present in the underlying, or base, table specified in the ``SELECT`` statement. A

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/security.rst
----------------------------------------------------------------------
diff --git a/doc/source/cql/security.rst b/doc/source/cql/security.rst
index aa65383..9efe27f 100644
--- a/doc/source/cql/security.rst
+++ b/doc/source/cql/security.rst
@@ -14,7 +14,7 @@
 .. See the License for the specific language governing permissions and
 .. limitations under the License.
 
-.. highlight:: sql
+.. highlight:: cql
 
 .. _cql-security:
 
@@ -26,6 +26,11 @@ Security
 Database Roles
 ^^^^^^^^^^^^^^
 
+CQL uses database roles to represent users and group of users. Syntactically, a role is defined by:
+
+.. productionlist::
+   role_name: `identifier` | `string`
+
 .. _create-role-statement:
 
 CREATE ROLE

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/triggers.rst
----------------------------------------------------------------------
diff --git a/doc/source/cql/triggers.rst b/doc/source/cql/triggers.rst
index 3bba72d..db3f53e 100644
--- a/doc/source/cql/triggers.rst
+++ b/doc/source/cql/triggers.rst
@@ -14,7 +14,7 @@
 .. See the License for the specific language governing permissions and
 .. limitations under the License.
 
-.. highlight:: sql
+.. highlight:: cql
 
 .. _cql-triggers:
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/types.rst
----------------------------------------------------------------------
diff --git a/doc/source/cql/types.rst b/doc/source/cql/types.rst
index 80cf864..e452f35 100644
--- a/doc/source/cql/types.rst
+++ b/doc/source/cql/types.rst
@@ -14,7 +14,7 @@
 .. See the License for the specific language governing permissions and
 .. limitations under the License.
 
-.. highlight:: sql
+.. highlight:: cql
 
 .. _UUID: https://en.wikipedia.org/wiki/Universally_unique_identifier