You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2010/07/27 19:46:29 UTC

svn commit: r979784 - in /cassandra/trunk: ./ conf/ src/java/org/apache/cassandra/db/marshal/ test/conf/ test/system/ test/unit/org/apache/cassandra/db/marshal/

Author: jbellis
Date: Tue Jul 27 17:46:28 2010
New Revision: 979784

URL: http://svn.apache.org/viewvc?rev=979784&view=rev
Log:
add IntegerType for arbitrary-length integers.  patch by Folke Behrens; reviewed by mdennis and jbellis for CASSANDRA-1282

Added:
    cassandra/trunk/src/java/org/apache/cassandra/db/marshal/IntegerType.java
    cassandra/trunk/test/unit/org/apache/cassandra/db/marshal/IntegerTypeTest.java
Modified:
    cassandra/trunk/CHANGES.txt
    cassandra/trunk/NEWS.txt
    cassandra/trunk/conf/cassandra.yaml
    cassandra/trunk/test/conf/cassandra.yaml
    cassandra/trunk/test/system/__init__.py
    cassandra/trunk/test/system/test_thrift_server.py

Modified: cassandra/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/cassandra/trunk/CHANGES.txt?rev=979784&r1=979783&r2=979784&view=diff
==============================================================================
--- cassandra/trunk/CHANGES.txt (original)
+++ cassandra/trunk/CHANGES.txt Tue Jul 27 17:46:28 2010
@@ -41,6 +41,7 @@ dev
  * take advantage of row cache during range queries (CASSANDRA-1302)
  * make GCGraceSeconds a per-ColumnFamily value (CASSANDRA-1276)
  * keep persistent row size and column count statistics (CASSANDRA-1155)
+ * add IntegerType (CASSANDRA-1282)
 
 
 0.6.4

Modified: cassandra/trunk/NEWS.txt
URL: http://svn.apache.org/viewvc/cassandra/trunk/NEWS.txt?rev=979784&r1=979783&r2=979784&view=diff
==============================================================================
--- cassandra/trunk/NEWS.txt (original)
+++ cassandra/trunk/NEWS.txt Tue Jul 27 17:46:28 2010
@@ -22,6 +22,8 @@ Features
       clusters
     - dynamic endpoint snitch mitigates the impact of impaired nodes
     - significantly faster reads from row cache
+    - introduced IntegerType that is both faster than LongType and
+      allows integers of both less and more bits than Long's 64
 
 Configuraton
 ------------

Modified: cassandra/trunk/conf/cassandra.yaml
URL: http://svn.apache.org/viewvc/cassandra/trunk/conf/cassandra.yaml?rev=979784&r1=979783&r2=979784&view=diff
==============================================================================
--- cassandra/trunk/conf/cassandra.yaml (original)
+++ cassandra/trunk/conf/cassandra.yaml Tue Jul 27 17:46:28 2010
@@ -213,7 +213,8 @@ request_scheduler_id: keyspace
 # - compare_with: tells Cassandra how to sort the columns for slicing
 #   operations. The default is BytesType, which is a straightforward
 #   lexical comparison of the bytes in each column.  Other options are
-#   AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType, and LongType.
+#   AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType, LongType,
+#   and IntegerType (a generic variable-length integer type).
 #   You can also specify the fully-qualified class name to a class of
 #   your choice extending org.apache.cassandra.db.marshal.AbstractType.
 #

Added: cassandra/trunk/src/java/org/apache/cassandra/db/marshal/IntegerType.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/db/marshal/IntegerType.java?rev=979784&view=auto
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/db/marshal/IntegerType.java (added)
+++ cassandra/trunk/src/java/org/apache/cassandra/db/marshal/IntegerType.java Tue Jul 27 17:46:28 2010
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.cassandra.db.marshal;
+
+public final class IntegerType extends AbstractType
+{
+    public static final IntegerType instance = new IntegerType();
+
+    private static int findMostSignificantByte(byte[] bytes)
+    {
+        int len = bytes.length - 1;
+        int i = 0;
+        for (; i < len; i++)
+        {
+            byte b0 = bytes[i];
+            if (b0 != 0 && b0 != -1)
+                break;
+            byte b1 = bytes[i + 1];
+            if (b0 == 0 && b1 != 0)
+            {
+                if (b1 > 0)
+                    i++;
+                break;
+            }
+            if (b0 == -1 && b1 != -1)
+            {
+                if (b1 < 0)
+                    i++;
+                break;
+            }
+        }
+        return i;
+    }
+
+    IntegerType() {/* singleton */}
+
+    public int compare(byte[] lhs, byte[] rhs)
+    {
+        int lhsLen = lhs.length;
+        int rhsLen = rhs.length;
+
+        if (lhsLen == 0)
+            return rhsLen == 0 ? 0 : -1;
+        if (rhsLen == 0)
+            return 1;
+
+        int lhsMsbIdx = findMostSignificantByte(lhs);
+        int rhsMsbIdx = findMostSignificantByte(rhs);
+
+        //diffs contain number of "meaningful" bytes (i.e. ignore padding)
+        int lhsLenDiff = lhsLen - lhsMsbIdx;
+        int rhsLenDiff = rhsLen - rhsMsbIdx;
+
+        byte lhsMsb = lhs[lhsMsbIdx];
+        byte rhsMsb = rhs[rhsMsbIdx];
+
+        /*         +    -
+         *      -----------
+         *    + | -d |  1 |
+         * LHS  -----------
+         *    - | -1 |  d |
+         *      -----------
+         *          RHS
+         *
+         * d = difference of length in significant bytes
+         */
+        if (lhsLenDiff != rhsLenDiff)
+        {
+            if (lhsMsb < 0)
+                return rhsMsb < 0 ? rhsLenDiff - lhsLenDiff : -1;
+            if (rhsMsb < 0)
+                return 1;
+            return lhsLenDiff - rhsLenDiff;
+        }
+
+        // msb uses signed comparison
+        if (lhsMsb != rhsMsb)
+            return lhsMsb - rhsMsb;
+        lhsMsbIdx++;
+        rhsMsbIdx++;
+
+        // remaining bytes are compared unsigned
+        while (lhsMsbIdx < lhsLen)
+        {
+            lhsMsb = lhs[lhsMsbIdx++];
+            rhsMsb = rhs[rhsMsbIdx++];
+            if (lhsMsb != rhsMsb)
+                return (lhsMsb & 0xFF) - (rhsMsb & 0xFF);
+        }
+
+        return 0;
+    }
+
+    @Override
+    public String getString(byte[] bytes)
+    {
+        if (bytes == null)
+            return "null";
+        if (bytes.length == 0)
+            return "empty";
+
+        return new java.math.BigInteger(bytes).toString(10);
+    }
+}

Modified: cassandra/trunk/test/conf/cassandra.yaml
URL: http://svn.apache.org/viewvc/cassandra/trunk/test/conf/cassandra.yaml?rev=979784&r1=979783&r2=979784&view=diff
==============================================================================
--- cassandra/trunk/test/conf/cassandra.yaml (original)
+++ cassandra/trunk/test/conf/cassandra.yaml Tue Jul 27 17:46:28 2010
@@ -49,6 +49,9 @@ keyspaces:
         - name: StandardLong2
           compare_with: LongType
 
+        - name: StandardInteger1
+          compare_with: IntegerType
+
         - name: Super1
           column_type: Super
           compare_subcolumns_with: LongType

Modified: cassandra/trunk/test/system/__init__.py
URL: http://svn.apache.org/viewvc/cassandra/trunk/test/system/__init__.py?rev=979784&r1=979783&r2=979784&view=diff
==============================================================================
--- cassandra/trunk/test/system/__init__.py (original)
+++ cassandra/trunk/test/system/__init__.py Tue Jul 27 17:46:28 2010
@@ -148,6 +148,7 @@ class ThriftTester(BaseTester):
             Cassandra.CfDef('Keyspace1', 'Standard2'), 
             Cassandra.CfDef('Keyspace1', 'StandardLong1', comparator_type='LongType'), 
             Cassandra.CfDef('Keyspace1', 'StandardLong2', comparator_type='LongType'), 
+            Cassandra.CfDef('Keyspace1', 'StandardInteger1', comparator_type='IntegerType'),
             Cassandra.CfDef('Keyspace1', 'Super1', column_type='Super', subcomparator_type='LongType', row_cache_size=1000, key_cache_size=0), 
             Cassandra.CfDef('Keyspace1', 'Super2', column_type='Super', subcomparator_type='LongType'), 
             Cassandra.CfDef('Keyspace1', 'Super3', column_type='Super', subcomparator_type='LongType'), 

Modified: cassandra/trunk/test/system/test_thrift_server.py
URL: http://svn.apache.org/viewvc/cassandra/trunk/test/system/test_thrift_server.py?rev=979784&r1=979783&r2=979784&view=diff
==============================================================================
--- cassandra/trunk/test/system/test_thrift_server.py (original)
+++ cassandra/trunk/test/system/test_thrift_server.py Tue Jul 27 17:46:28 2010
@@ -278,6 +278,22 @@ class TestMutations(ThriftTester):
         slice = [result.column.name for result in _big_slice('key1', ColumnParent('StandardLong1'))]
         assert slice == L, slice
         
+    def test_integer_order(self):
+        _set_keyspace('Keyspace1')
+        def long_xrange(start, stop, step):
+            i = start
+            while i >= stop:
+                yield i
+                i -= step
+        L = []
+        for i in long_xrange(104294967296, 0, 429496729):
+            name = _i64(i)
+            client.insert('key1', ColumnParent('StandardInteger1'), Column(name, 'v', Clock(0)), ConsistencyLevel.ONE)
+            L.append(name)
+        slice = [result.column.name for result in _big_slice('key1', ColumnParent('StandardInteger1'))]
+        L.sort()
+        assert slice == L, slice
+
     def test_time_uuid(self):
         import uuid
         L = []
@@ -338,6 +354,23 @@ class TestMutations(ThriftTester):
                      for result in client.get_slice('key1', column_parent, sp, ConsistencyLevel.ONE)]
             assert slice == [Column(_i64(i), 'value2', Clock(10 * i + 2))], (slice, i)
         
+    def test_integer_remove(self):
+        column_parent = ColumnParent('StandardInteger1')
+        sp = SlicePredicate(slice_range=SliceRange('', '', False, 1))
+        _set_keyspace('Keyspace1')
+        for i in xrange(10):
+            parent = ColumnParent('StandardInteger1')
+
+            client.insert('key1', parent, Column(_i64(i), 'value1', Clock(10 * i)), ConsistencyLevel.ONE)
+            client.remove('key1', ColumnPath('StandardInteger1'), Clock(10 * i + 1), ConsistencyLevel.ONE)
+            slice = client.get_slice('key1', column_parent, sp, ConsistencyLevel.ONE)
+            assert slice == [], slice
+            # resurrect
+            client.insert('key1', parent, Column(_i64(i), 'value2', Clock(10 * i + 2)), ConsistencyLevel.ONE)
+            slice = [result.column
+                     for result in client.get_slice('key1', column_parent, sp, ConsistencyLevel.ONE)]
+            assert slice == [Column(_i64(i), 'value2', Clock(10 * i + 2))], (slice, i)
+
     def test_batch_insert(self):
         _set_keyspace('Keyspace1')
         _insert_batch(False)
@@ -1077,7 +1110,7 @@ class TestMutations(ThriftTester):
         kspaces = client.describe_keyspaces()
         assert len(kspaces) == 5, kspaces # ['system', 'Keyspace2', 'Keyspace3', 'Keyspace1', 'Keyspace4']
         ks1 = client.describe_keyspace("Keyspace1")
-        assert set(ks1.keys()) == set(['Super1', 'Standard1', 'Standard2', 'StandardLong1', 'StandardLong2', 'Super3', 'Super2', 'Super4', 'Indexed1'])
+        assert set(ks1.keys()) == set(['Super1', 'Standard1', 'Standard2', 'StandardLong1', 'StandardLong2', 'StandardInteger1', 'Super3', 'Super2', 'Super4', 'Indexed1'])
         sysks = client.describe_keyspace("system")
 
     def test_describe(self):

Added: cassandra/trunk/test/unit/org/apache/cassandra/db/marshal/IntegerTypeTest.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/test/unit/org/apache/cassandra/db/marshal/IntegerTypeTest.java?rev=979784&view=auto
==============================================================================
--- cassandra/trunk/test/unit/org/apache/cassandra/db/marshal/IntegerTypeTest.java (added)
+++ cassandra/trunk/test/unit/org/apache/cassandra/db/marshal/IntegerTypeTest.java Tue Jul 27 17:46:28 2010
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.cassandra.db.marshal;
+
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.junit.ComparisonFailure;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class IntegerTypeTest
+{
+    private static void assertSignum(String message, int expected, double value)
+    {
+        int signum = (int)Math.signum(value);
+        if (signum != expected)
+            throw new ComparisonFailure(message, Integer.toString(expected), Integer.toString(signum));
+    }
+
+    private final IntegerType comparator = IntegerType.instance;
+
+    @Test
+    public void testTrimming()
+    {
+        byte[] n1, n2;
+        n1 = new byte[] {0};
+        n2 = new byte[] {0, 0, 0, 0};
+        assertEquals(0, comparator.compare(n1, n2));
+        n1 = new byte[] {1, 0, 0, 1};
+        n2 = new byte[] {0, 0, 0, 1, 0, 0, 1};
+        assertEquals(0, comparator.compare(n1, n2));
+        n1 = new byte[] {-1, 0, 0, -1 };
+        n2 = new byte[] {-1, -1, -1, -1, 0, 0, -1};
+        assertEquals(0, comparator.compare(n1, n2));
+        n1 = new byte[] {-1, 0};
+        n2 = new byte[] {0, -1, 0};
+        assertSignum("", -1, comparator.compare(n1, n2));
+        n1 = new byte[] {1, 0};
+        n2 = new byte[] {0, -1, 0};
+        assertSignum("", -1, comparator.compare(n1, n2));
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullLeft()
+    {
+        comparator.compare(null, new byte[1]);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullRight()
+    {
+        comparator.compare(new byte[1], null);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void testNullBoth()
+    {
+        comparator.compare(null, null);
+    }
+
+    @Test
+    public void testZeroLengthArray()
+    {
+        assertSignum("0-1", -1, comparator.compare(new byte[0], new byte[1]));
+        assertSignum("1-0", 1, comparator.compare(new byte[1], new byte[0]));
+        assertSignum("0-0", 0, comparator.compare(new byte[0], new byte[0]));
+    }
+
+    @Test
+    public void testSanity()
+    {
+        byte[] nN = new byte[] {-1};
+        byte[] nZ = new byte[] {0};
+        byte[] nP = new byte[] {1};
+        assertSignum("ZN", 1, comparator.compare(nZ, nN));
+        assertSignum("NZ", -1, comparator.compare(nN, nZ));
+        assertSignum("ZP", -1, comparator.compare(nZ, nP));
+        assertSignum("PZ", 1, comparator.compare(nP, nZ));
+        assertSignum("PN", 1, comparator.compare(nP, nN));
+        assertSignum("NP", -1, comparator.compare(nN, nP));
+    }
+
+    @Test
+    public void testSameLength()
+    {
+        byte[] n1 = new byte[] {-2, 2, -4, -5};
+        byte[] n2 = new byte[] {-2, 3, -5, -4};
+        byte[] p1 = new byte[] {2, 3, -4, -5};
+        byte[] p2 = new byte[] {2, -2, -5, -4};
+
+        assertSignum("n1n2", -1, comparator.compare(n1, n2));
+        assertSignum("n2n1", 1, comparator.compare(n2, n1));
+
+        assertSignum("p1p2", -1, comparator.compare(p1, p2));
+        assertSignum("p2p1", 1, comparator.compare(p2, p1));
+
+        assertSignum("p1n1", 1, comparator.compare(p1, n1));
+        assertSignum("p1n2", 1, comparator.compare(p1, n2));
+        assertSignum("n1p1", -1, comparator.compare(n1, p1));
+        assertSignum("n2p1", -1, comparator.compare(n2, p1));
+    }
+
+    @Test
+    public void testCommonPrefix()
+    {
+        byte[][] data = {
+                {1, 0, 0, 1},
+                {1, 0, 0, 1, 0},
+                {1, 0, 0, 1},
+                {1, 0, 0, 1, 0},
+                {-1, 0, 0, 1},
+                {-1, 0, 0, 1, 0},
+                {-1, 0, 0, 1},
+                {-1, 0, 0, 1, 0}
+        };
+
+        Arrays.sort(data, comparator);
+        assertArrayEquals(new byte[]{-1, 0, 0, 1, 0}, data[0]);
+        assertArrayEquals(new byte[]{-1, 0, 0, 1, 0}, data[1]);
+        assertArrayEquals(new byte[]{-1, 0, 0, 1}, data[2]);
+        assertArrayEquals(new byte[]{-1, 0, 0, 1}, data[3]);
+        assertArrayEquals(new byte[]{1, 0, 0, 1}, data[4]);
+        assertArrayEquals(new byte[]{1, 0, 0, 1}, data[5]);
+        assertArrayEquals(new byte[]{1, 0, 0, 1, 0}, data[6]);
+        assertArrayEquals(new byte[]{1, 0, 0, 1, 0}, data[7]);
+    }
+
+    @Test
+    public void testSorting()
+    {
+        byte[][] data = {
+                { 1, 0, 0, 0},
+                {-2, 0, 0},
+                { 3, 0},
+                {-4},
+                { 4},
+                {-3, 0},
+                { 2, 0, 0},
+                {-1, 0, 0, 0}
+        };
+
+        Arrays.sort(data, comparator);
+        assertArrayEquals("-1", new byte[] {-1, 0, 0, 0}, data[0]);
+        assertArrayEquals("-2", new byte[] {-2, 0, 0}, data[1]);
+        assertArrayEquals("-3", new byte[] {-3, 0}, data[2]);
+        assertArrayEquals("-4", new byte[] {-4}, data[3]);
+        assertArrayEquals(" 4", new byte[] { 4}, data[4]);
+        assertArrayEquals(" 3", new byte[] { 3, 0}, data[5]);
+        assertArrayEquals(" 2", new byte[] { 2, 0, 0}, data[6]);
+        assertArrayEquals(" 1", new byte[] { 1, 0, 0, 0}, data[7]);
+    }
+
+    @Test
+    public void testSortingSpecialExtendedVersion()
+    {
+        Random rng = new Random(-9078270684023566599L);
+
+        byte[][] data = new byte[10000][];
+        for (int i = 0; i < data.length; i++)
+        {
+            data[i] = new byte[rng.nextInt(32) + 1];
+            rng.nextBytes(data[i]);
+        }
+
+        Arrays.sort(data, comparator);
+
+        for (int i = 1; i < data.length; i++)
+        {
+            BigInteger i0 = new BigInteger(data[i - 1]);
+            BigInteger i1 = new BigInteger(data[i]);
+            assertTrue("#" + i, i0.compareTo(i1) <= 0);
+        }
+    }
+}