You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by ty...@apache.org on 2015/01/29 21:39:17 UTC

[1/2] cassandra git commit: cqlsh: handle unicode BOM at the start of files

Repository: cassandra
Updated Branches:
  refs/heads/trunk 6d2f2767e -> d76450c79


cqlsh: handle unicode BOM at the start of files

Patch by Abhishek Gupta; reviewed by Tyler Hobbs for CASSANDRA-8638


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/c49f6666
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/c49f6666
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/c49f6666

Branch: refs/heads/trunk
Commit: c49f6666e457debebc3cfe935412a2b5306208a3
Parents: 1e5a0e1
Author: Abhishek Gupta <ab...@persistent.com>
Authored: Thu Jan 29 14:37:55 2015 -0600
Committer: Tyler Hobbs <ty...@datastax.com>
Committed: Thu Jan 29 14:37:55 2015 -0600

----------------------------------------------------------------------
 CHANGES.txt            |  1 +
 bin/cqlsh              | 10 +++++++---
 pylib/cqlshlib/util.py | 29 +++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/c49f6666/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index fce4898..a8f8b87 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.1.3
+ * (cqlsh) Handle unicode BOM at start of files (CASSANDRA-8638)
  * Stop compactions before exiting offline tools (CASSANDRA-8623)
  * Update tools/stress/README.txt to match current behaviour (CASSANDRA-7933)
  * Fix schema from Thrift conversion with empty metadata (CASSANDRA-8695)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c49f6666/bin/cqlsh
----------------------------------------------------------------------
diff --git a/bin/cqlsh b/bin/cqlsh
index 0254fad..26296a3 100755
--- a/bin/cqlsh
+++ b/bin/cqlsh
@@ -121,7 +121,7 @@ from cqlshlib import cqlhandling, cql3handling, pylexotron, sslhandling, async_i
 from cqlshlib.displaying import (RED, BLUE, CYAN, ANSI_RESET, COLUMN_NAME_COLORS,
                                  FormattedValue, colorme)
 from cqlshlib.formatting import format_by_type, formatter_for, format_value_utype
-from cqlshlib.util import trim_if_present
+from cqlshlib.util import trim_if_present, get_file_encoding_bomsize
 from cqlshlib.tracing import print_trace_session, print_trace
 
 DEFAULT_HOST = '127.0.0.1'
@@ -1601,7 +1601,9 @@ class Shell(cmd.Cmd):
         fname = parsed.get_binding('fname')
         fname = os.path.expanduser(self.cql_unprotect_value(fname))
         try:
-            f = open(fname, 'r')
+            encoding, bom_size = get_file_encoding_bomsize(fname)
+            f = codecs.open(fname, 'r', encoding)
+            f.seek(bom_size)
         except IOError, e:
             self.printerr('Could not open %r: %s' % (fname, e))
             return
@@ -2013,7 +2015,9 @@ def main(options, hostname, port):
         stdin = None
     else:
         try:
-            stdin = open(options.file, 'r')
+            encoding, bom_size = get_file_encoding_bomsize(options.file)
+            stdin = codecs.open(options.file, 'r', encoding)
+            stdin.seek(bom_size)
         except IOError, e:
             sys.exit("Can't open %r: %s" % (options.file, e))
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c49f6666/pylib/cqlshlib/util.py
----------------------------------------------------------------------
diff --git a/pylib/cqlshlib/util.py b/pylib/cqlshlib/util.py
index e62ded4..bc58c8b 100644
--- a/pylib/cqlshlib/util.py
+++ b/pylib/cqlshlib/util.py
@@ -14,8 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import codecs
 from itertools import izip
 
+
 def split_list(items, pred):
     """
     Split up a list (or other iterable) on the elements which satisfy the
@@ -36,6 +38,7 @@ def split_list(items, pred):
             results.append(thisresult)
     return results
 
+
 def find_common_prefix(strs):
     """
     Given a list (iterable) of strings, return the longest common prefix.
@@ -54,6 +57,7 @@ def find_common_prefix(strs):
             break
     return ''.join(common)
 
+
 def list_bifilter(pred, iterable):
     """
     Filter an iterable into two output lists: the first containing all
@@ -70,10 +74,35 @@ def list_bifilter(pred, iterable):
         (yes_s if pred(i) else no_s).append(i)
     return yes_s, no_s
 
+
 def identity(x):
     return x
 
+
 def trim_if_present(s, prefix):
     if s.startswith(prefix):
         return s[len(prefix):]
     return s
+
+
+def get_file_encoding_bomsize(filename):
+    """
+    Checks the beginning of a file for a Unicode BOM.  Based on this check,
+    the encoding that should be used to open the file and the number of
+    bytes that should be skipped (to skip the BOM) are returned.
+    """
+    bom_encodings = ((codecs.BOM_UTF8, 'utf-8-sig'),
+                     (codecs.BOM_UTF16_LE, 'utf-16le'),
+                     (codecs.BOM_UTF16_BE, 'utf-16be'),
+                     (codecs.BOM_UTF32_LE, 'utf-32be'),
+                     (codecs.BOM_UTF32_BE, 'utf-32be'))
+
+    firstbytes = open(filename, 'rb').read(4)
+    for bom, encoding in bom_encodings:
+        if firstbytes.startswith(bom):
+            file_encoding, size = encoding, len(bom)
+            break
+    else:
+        file_encoding, size = "ascii", 0
+
+    return (file_encoding, size)


[2/2] cassandra git commit: Merge branch 'cassandra-2.1' into trunk

Posted by ty...@apache.org.
Merge branch 'cassandra-2.1' into trunk


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/d76450c7
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/d76450c7
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/d76450c7

Branch: refs/heads/trunk
Commit: d76450c7986202141f3a917b3623a4c3138c1094
Parents: 6d2f276 c49f666
Author: Tyler Hobbs <ty...@datastax.com>
Authored: Thu Jan 29 14:39:07 2015 -0600
Committer: Tyler Hobbs <ty...@datastax.com>
Committed: Thu Jan 29 14:39:07 2015 -0600

----------------------------------------------------------------------
 CHANGES.txt            |  1 +
 bin/cqlsh              | 10 +++++++---
 pylib/cqlshlib/util.py | 29 +++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/d76450c7/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 157550a,a8f8b87..62c18f5
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,61 -1,5 +1,62 @@@
 +3.0
 + * Upgrade Metrics library and remove depricated metrics (CASSANDRA-5657)
 + * Serializing Row cache alternative, fully off heap (CASSANDRA-7438)
 + * Duplicate rows returned when in clause has repeated values (CASSANDRA-6707)
 + * Make CassandraException unchecked, extend RuntimeException (CASSANDRA-8560)
 + * Support direct buffer decompression for reads (CASSANDRA-8464)
 + * DirectByteBuffer compatible LZ4 methods (CASSANDRA-7039)
 + * Add role based access control (CASSANDRA-7653)
 + * Group sstables for anticompaction correctly (CASSANDRA-8578)
 + * Add ReadFailureException to native protocol, respond
 +   immediately when replicas encounter errors while handling
 +   a read request (CASSANDRA-7886)
 + * Switch CommitLogSegment from RandomAccessFile to nio (CASSANDRA-8308)
 + * Allow mixing token and partition key restrictions (CASSANDRA-7016)
 + * Support index key/value entries on map collections (CASSANDRA-8473)
 + * Modernize schema tables (CASSANDRA-8261)
 + * Support for user-defined aggregation functions (CASSANDRA-8053)
 + * Fix NPE in SelectStatement with empty IN values (CASSANDRA-8419)
 + * Refactor SelectStatement, return IN results in natural order instead
 +   of IN value list order and ignore duplicate values in partition key IN restrictions (CASSANDRA-7981)
 + * Support UDTs, tuples, and collections in user-defined
 +   functions (CASSANDRA-7563)
 + * Fix aggregate fn results on empty selection, result column name,
 +   and cqlsh parsing (CASSANDRA-8229)
 + * Mark sstables as repaired after full repair (CASSANDRA-7586)
 + * Extend Descriptor to include a format value and refactor reader/writer
 +   APIs (CASSANDRA-7443)
 + * Integrate JMH for microbenchmarks (CASSANDRA-8151)
 + * Keep sstable levels when bootstrapping (CASSANDRA-7460)
 + * Add Sigar library and perform basic OS settings check on startup (CASSANDRA-7838)
 + * Support for aggregation functions (CASSANDRA-4914)
 + * Remove cassandra-cli (CASSANDRA-7920)
 + * Accept dollar quoted strings in CQL (CASSANDRA-7769)
 + * Make assassinate a first class command (CASSANDRA-7935)
 + * Support IN clause on any clustering column (CASSANDRA-4762)
 + * Improve compaction logging (CASSANDRA-7818)
 + * Remove YamlFileNetworkTopologySnitch (CASSANDRA-7917)
 + * Do anticompaction in groups (CASSANDRA-6851)
 + * Support user-defined functions (CASSANDRA-7395, 7526, 7562, 7740, 7781, 7929,
 +   7924, 7812, 8063, 7813, 7708)
 + * Permit configurable timestamps with cassandra-stress (CASSANDRA-7416)
 + * Move sstable RandomAccessReader to nio2, which allows using the
 +   FILE_SHARE_DELETE flag on Windows (CASSANDRA-4050)
 + * Remove CQL2 (CASSANDRA-5918)
 + * Add Thrift get_multi_slice call (CASSANDRA-6757)
 + * Optimize fetching multiple cells by name (CASSANDRA-6933)
 + * Allow compilation in java 8 (CASSANDRA-7028)
 + * Make incremental repair default (CASSANDRA-7250)
 + * Enable code coverage thru JaCoCo (CASSANDRA-7226)
 + * Switch external naming of 'column families' to 'tables' (CASSANDRA-4369) 
 + * Shorten SSTable path (CASSANDRA-6962)
 + * Use unsafe mutations for most unit tests (CASSANDRA-6969)
 + * Fix race condition during calculation of pending ranges (CASSANDRA-7390)
 + * Fail on very large batch sizes (CASSANDRA-8011)
 + * Improve concurrency of repair (CASSANDRA-6455, 8208)
 +
 +
  2.1.3
+  * (cqlsh) Handle unicode BOM at start of files (CASSANDRA-8638)
   * Stop compactions before exiting offline tools (CASSANDRA-8623)
   * Update tools/stress/README.txt to match current behaviour (CASSANDRA-7933)
   * Fix schema from Thrift conversion with empty metadata (CASSANDRA-8695)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d76450c7/bin/cqlsh
----------------------------------------------------------------------