You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by st...@apache.org on 2017/03/02 02:02:26 UTC

[2/6] cassandra git commit: Cqlsh copy-from should error out when csv contains invalid data for collections

Cqlsh copy-from should error out when csv contains invalid data for collections

patch by Stefania Alborghetti; reviewed by Paulo Motta for CASSANDRA-13071


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/496cfa8f
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/496cfa8f
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/496cfa8f

Branch: refs/heads/cassandra-3.11
Commit: 496cfa8f5b160d26794b346ce360b513069aa300
Parents: 45b6b5b
Author: Stefania Alborghetti <st...@datastax.com>
Authored: Wed Feb 22 17:35:57 2017 +0000
Committer: Stefania Alborghetti <st...@datastax.com>
Committed: Thu Mar 2 09:58:58 2017 +0800

----------------------------------------------------------------------
 CHANGES.txt                |  1 +
 pylib/cqlshlib/copyutil.py | 29 ++++++++++++++++++++++++-----
 2 files changed, 25 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/496cfa8f/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 2c3ac39..1c3869f 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.0.12
+ * Cqlsh copy-from should error out when csv contains invalid data for collections (CASSANDRA-13071)
  * Update c.yaml doc for offheap memtables (CASSANDRA-13179)
  * Faster StreamingHistogram (CASSANDRA-13038)
  * Legacy deserializer can create unexpected boundary range tombstones (CASSANDRA-13237)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/496cfa8f/pylib/cqlshlib/copyutil.py
----------------------------------------------------------------------
diff --git a/pylib/cqlshlib/copyutil.py b/pylib/cqlshlib/copyutil.py
index 710a640..226fad5 100644
--- a/pylib/cqlshlib/copyutil.py
+++ b/pylib/cqlshlib/copyutil.py
@@ -1881,11 +1881,30 @@ class ImportConversion(object):
 
         def split(val, sep=','):
             """
-            Split into a list of values whenever we encounter a separator but
+            Split "val" into a list of values whenever the separator "sep" is found, but
             ignore separators inside parentheses or single quotes, except for the two
-            outermost parentheses, which will be ignored. We expect val to be at least
-            2 characters long (the two outer parentheses).
+            outermost parentheses, which will be ignored. This method is called when parsing composite
+            types, "val" should be at least 2 characters long, the first char should be an
+            open parenthesis and the last char should be a matching closing parenthesis. We could also
+            check exactly which parenthesis type depending on the caller, but I don't want to enforce
+            too many checks that don't necessarily provide any additional benefits, and risk breaking
+            data that could previously be imported, even if strictly speaking it is incorrect CQL.
+            For example, right now we accept sets that start with '[' and ']', I don't want to break this
+            by enforcing '{' and '}' in a minor release.
             """
+            def is_open_paren(cc):
+                return cc == '{' or cc == '[' or cc == '('
+
+            def is_close_paren(cc):
+                return cc == '}' or cc == ']' or cc == ')'
+
+            def paren_match(c1, c2):
+                return (c1 == '{' and c2 == '}') or (c1 == '[' and c2 == ']') or (c1 == '(' and c2 == ')')
+
+            if len(val) < 2 or not paren_match(val[0], val[-1]):
+                raise ParseError('Invalid composite string, it should start and end with matching parentheses: {}'
+                                 .format(val))
+
             ret = []
             last = 1
             level = 0
@@ -1894,9 +1913,9 @@ class ImportConversion(object):
                 if c == '\'':
                     quote = not quote
                 elif not quote:
-                    if c == '{' or c == '[' or c == '(':
+                    if is_open_paren(c):
                         level += 1
-                    elif c == '}' or c == ']' or c == ')':
+                    elif is_close_paren(c):
                         level -= 1
                     elif c == sep and level == 1:
                         ret.append(val[last:i])