You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@cassandra.apache.org by Jack Culpepper <ja...@gmail.com> on 2010/02/08 23:01:51 UTC

get_range_slice() tester

Here's a tester program, for contrib. It generates 10 keys using uuid,
inserts them both into the cassandra column family Keyspace1/Super1
and a python dictionary. Then, it does a range scan using both methods
and marks the keys that are returned. Finally, it goes through the
python dictionary, makes sure a cassandra get() on each key works
(should through an exception on failure), and complains about keys
that were not found in the range scan.

To run, put the contents in test_bug.py then run like this:

python test_bug.py get_key_range

(Nothing printed means it worked.)

python test_bug.py get_range_slice

(Keys that should have been found in a range scan, but were not, are printed.)

Best,

Jack



import sys
import time
import uuid

from thrift import Thrift
from thrift.transport import TTransport
from thrift.transport import TSocket
from thrift.protocol.TBinaryProtocol import TBinaryProtocolAccelerated
from cassandra import Cassandra
from cassandra.ttypes import *

num_keys = 10

socket = TSocket.TSocket("10.212.87.165", 9160)
transport = TTransport.TBufferedTransport(socket)
protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
client = Cassandra.Client(protocol)

ks = "Keyspace1"
cf = "Super1"
cl = ConsistencyLevel.ONE

d = {}

transport.open()

if 1:
    ## insert keys using the raw thrift interface
    cpath = ColumnPath(cf, "foo", "is")
    value = "cool"

    for i in xrange(num_keys):
        ts = time.time()
        key = uuid.uuid4().hex
        client.insert(ks, key, cpath, value, ts, cl)
        d[key] = 1

else:
    ## insert keys using pycassa!
    import pycassa

    client = pycassa.connect(["10.212.87.165:9160"])
    cf_test = pycassa.ColumnFamily(client, ks, cf, super=True)

    for i in xrange(num_keys):
        key = uuid.uuid4().hex
        cf_test.insert(key, { 'params' : { 'is' : 'cool' }})
        d[key] = 1


cparent = ColumnParent(column_family=cf)
slice_range = SliceRange(start="key", finish="key")
p = SlicePredicate(slice_range=slice_range)

done = False
seg = 1000
start = ""

## do a scan using either get_key_range() (deprecated) or get_range_slice()
## for every key returned that is in the dictionary, mark it as found
while not done:
    if sys.argv[1] == "get_key_range":
        result = client.get_key_range(ks, cf, start, "", seg, cl)

        if len(result) < seg: done = True
        else: start = result[seg-1]

        for r in result:
            if d.has_key(r):
                d[r] = 0

    if sys.argv[1] == "get_range_slice":
        result = client.get_range_slice(ks, cparent, p, start, "", seg, cl)

        if len(result) < seg: done = True
        else: start = result[seg-1].key

        for r in result:
            if d.has_key(r.key):
                d[r.key] = 0

cpath = ColumnPath(column_family=cf, super_column='foo')

## get, remove all the keys
## print all the keys that were not marked 0
for k in d:
    result = client.get(ks, k, cpath, cl)
    #print result

    if d[k] == 1:
        print k, "not marked 0"
    #else:
    #    print k, "was marked 0!"

    ts = time.time()
    client.remove(ks, k, cpath, ts, cl)

Re: get_range_slice() tester

Posted by Jack Culpepper <ja...@gmail.com>.
No, dang. Obviously, get_key_range() is gone, but I get an exception
for get_range_slice():

ERROR 23:25:04,497 Internal error processing get_range_slice
java.lang.AssertionError
        at org.apache.cassandra.dht.Bounds.<init>(Bounds.java:16)
        at org.apache.cassandra.dht.Bounds.restrictTo(Bounds.java:34)
        at org.apache.cassandra.service.StorageProxy.getRangeSlice(StorageProxy.java:559)
        at org.apache.cassandra.thrift.CassandraServer.get_range_slice(CassandraServer.java:560)
        at org.apache.cassandra.thrift.Cassandra$Processor$get_range_slice.process(Cassandra.java:1189)
        at org.apache.cassandra.thrift.Cassandra$Processor.process(Cassandra.java:984)
        at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:253)
        at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
        at java.lang.Thread.run(Thread.java:619)

To get trunk running I modified storage-conf.xml as follows:

Changed <Seeds> to reflect my two nodes.

Deleted "localhost" from <ThriftAddress> and <ListenAddress>.

Also, I deleted everything from my data and commitlog dirs on both
machines. That's it.

Jack

On Mon, Feb 8, 2010 at 2:58 PM, Jonathan Ellis <jb...@gmail.com> wrote:
> I'm seeing failures on 0.5 but success against trunk, is that also what you see?
>
> -Jonathan
>
> On Mon, Feb 8, 2010 at 4:42 PM, Jack Culpepper <ja...@gmail.com> wrote:
>> On Mon, Feb 8, 2010 at 2:34 PM, Jonathan Ellis <jb...@gmail.com> wrote:
>>> This is supposed to pass on a single node but fail on two, correct?
>>
>> Yep! At least, it does for me.
>>
>>> What are the tokens on your two nodes, in case that is relevant?
>>> (nodeprobe ring will tell you.)
>>
>> Heh, unfortunately this also shows the fact that I accidentally
>> blasted one of my data dirs. ;)
>>
>> $ sudo bin/nodeprobe -host localhost ring
>> [sudo] password for jack:
>> Address       Status     Load          Range
>>           Ring
>>                                       YQVhw0uDS4RMOASI
>> 10.212.87.165 Up         8.18 KB       13DyIzn2EhRAHOq9
>>           |<--|
>> 10.212.230.176Up         11.71 GB      YQVhw0uDS4RMOASI
>>           |-->|
>>
>> J
>>
>>> -Jonathan
>>>
>>> On Mon, Feb 8, 2010 at 4:01 PM, Jack Culpepper <ja...@gmail.com> wrote:
>>>> Here's a tester program, for contrib. It generates 10 keys using uuid,
>>>> inserts them both into the cassandra column family Keyspace1/Super1
>>>> and a python dictionary. Then, it does a range scan using both methods
>>>> and marks the keys that are returned. Finally, it goes through the
>>>> python dictionary, makes sure a cassandra get() on each key works
>>>> (should through an exception on failure), and complains about keys
>>>> that were not found in the range scan.
>>>>
>>>> To run, put the contents in test_bug.py then run like this:
>>>>
>>>> python test_bug.py get_key_range
>>>>
>>>> (Nothing printed means it worked.)
>>>>
>>>> python test_bug.py get_range_slice
>>>>
>>>> (Keys that should have been found in a range scan, but were not, are printed.)
>>>>
>>>> Best,
>>>>
>>>> Jack
>>>>
>>>>
>>>>
>>>> import sys
>>>> import time
>>>> import uuid
>>>>
>>>> from thrift import Thrift
>>>> from thrift.transport import TTransport
>>>> from thrift.transport import TSocket
>>>> from thrift.protocol.TBinaryProtocol import TBinaryProtocolAccelerated
>>>> from cassandra import Cassandra
>>>> from cassandra.ttypes import *
>>>>
>>>> num_keys = 10
>>>>
>>>> socket = TSocket.TSocket("10.212.87.165", 9160)
>>>> transport = TTransport.TBufferedTransport(socket)
>>>> protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
>>>> client = Cassandra.Client(protocol)
>>>>
>>>> ks = "Keyspace1"
>>>> cf = "Super1"
>>>> cl = ConsistencyLevel.ONE
>>>>
>>>> d = {}
>>>>
>>>> transport.open()
>>>>
>>>> if 1:
>>>>    ## insert keys using the raw thrift interface
>>>>    cpath = ColumnPath(cf, "foo", "is")
>>>>    value = "cool"
>>>>
>>>>    for i in xrange(num_keys):
>>>>        ts = time.time()
>>>>        key = uuid.uuid4().hex
>>>>        client.insert(ks, key, cpath, value, ts, cl)
>>>>        d[key] = 1
>>>>
>>>> else:
>>>>    ## insert keys using pycassa!
>>>>    import pycassa
>>>>
>>>>    client = pycassa.connect(["10.212.87.165:9160"])
>>>>    cf_test = pycassa.ColumnFamily(client, ks, cf, super=True)
>>>>
>>>>    for i in xrange(num_keys):
>>>>        key = uuid.uuid4().hex
>>>>        cf_test.insert(key, { 'params' : { 'is' : 'cool' }})
>>>>        d[key] = 1
>>>>
>>>>
>>>> cparent = ColumnParent(column_family=cf)
>>>> slice_range = SliceRange(start="key", finish="key")
>>>> p = SlicePredicate(slice_range=slice_range)
>>>>
>>>> done = False
>>>> seg = 1000
>>>> start = ""
>>>>
>>>> ## do a scan using either get_key_range() (deprecated) or get_range_slice()
>>>> ## for every key returned that is in the dictionary, mark it as found
>>>> while not done:
>>>>    if sys.argv[1] == "get_key_range":
>>>>        result = client.get_key_range(ks, cf, start, "", seg, cl)
>>>>
>>>>        if len(result) < seg: done = True
>>>>        else: start = result[seg-1]
>>>>
>>>>        for r in result:
>>>>            if d.has_key(r):
>>>>                d[r] = 0
>>>>
>>>>    if sys.argv[1] == "get_range_slice":
>>>>        result = client.get_range_slice(ks, cparent, p, start, "", seg, cl)
>>>>
>>>>        if len(result) < seg: done = True
>>>>        else: start = result[seg-1].key
>>>>
>>>>        for r in result:
>>>>            if d.has_key(r.key):
>>>>                d[r.key] = 0
>>>>
>>>> cpath = ColumnPath(column_family=cf, super_column='foo')
>>>>
>>>> ## get, remove all the keys
>>>> ## print all the keys that were not marked 0
>>>> for k in d:
>>>>    result = client.get(ks, k, cpath, cl)
>>>>    #print result
>>>>
>>>>    if d[k] == 1:
>>>>        print k, "not marked 0"
>>>>    #else:
>>>>    #    print k, "was marked 0!"
>>>>
>>>>    ts = time.time()
>>>>    client.remove(ks, k, cpath, ts, cl)
>>>>
>>>
>>
>

Re: get_range_slice() tester

Posted by Jonathan Ellis <jb...@gmail.com>.
I'm seeing failures on 0.5 but success against trunk, is that also what you see?

-Jonathan

On Mon, Feb 8, 2010 at 4:42 PM, Jack Culpepper <ja...@gmail.com> wrote:
> On Mon, Feb 8, 2010 at 2:34 PM, Jonathan Ellis <jb...@gmail.com> wrote:
>> This is supposed to pass on a single node but fail on two, correct?
>
> Yep! At least, it does for me.
>
>> What are the tokens on your two nodes, in case that is relevant?
>> (nodeprobe ring will tell you.)
>
> Heh, unfortunately this also shows the fact that I accidentally
> blasted one of my data dirs. ;)
>
> $ sudo bin/nodeprobe -host localhost ring
> [sudo] password for jack:
> Address       Status     Load          Range
>           Ring
>                                       YQVhw0uDS4RMOASI
> 10.212.87.165 Up         8.18 KB       13DyIzn2EhRAHOq9
>           |<--|
> 10.212.230.176Up         11.71 GB      YQVhw0uDS4RMOASI
>           |-->|
>
> J
>
>> -Jonathan
>>
>> On Mon, Feb 8, 2010 at 4:01 PM, Jack Culpepper <ja...@gmail.com> wrote:
>>> Here's a tester program, for contrib. It generates 10 keys using uuid,
>>> inserts them both into the cassandra column family Keyspace1/Super1
>>> and a python dictionary. Then, it does a range scan using both methods
>>> and marks the keys that are returned. Finally, it goes through the
>>> python dictionary, makes sure a cassandra get() on each key works
>>> (should through an exception on failure), and complains about keys
>>> that were not found in the range scan.
>>>
>>> To run, put the contents in test_bug.py then run like this:
>>>
>>> python test_bug.py get_key_range
>>>
>>> (Nothing printed means it worked.)
>>>
>>> python test_bug.py get_range_slice
>>>
>>> (Keys that should have been found in a range scan, but were not, are printed.)
>>>
>>> Best,
>>>
>>> Jack
>>>
>>>
>>>
>>> import sys
>>> import time
>>> import uuid
>>>
>>> from thrift import Thrift
>>> from thrift.transport import TTransport
>>> from thrift.transport import TSocket
>>> from thrift.protocol.TBinaryProtocol import TBinaryProtocolAccelerated
>>> from cassandra import Cassandra
>>> from cassandra.ttypes import *
>>>
>>> num_keys = 10
>>>
>>> socket = TSocket.TSocket("10.212.87.165", 9160)
>>> transport = TTransport.TBufferedTransport(socket)
>>> protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
>>> client = Cassandra.Client(protocol)
>>>
>>> ks = "Keyspace1"
>>> cf = "Super1"
>>> cl = ConsistencyLevel.ONE
>>>
>>> d = {}
>>>
>>> transport.open()
>>>
>>> if 1:
>>>    ## insert keys using the raw thrift interface
>>>    cpath = ColumnPath(cf, "foo", "is")
>>>    value = "cool"
>>>
>>>    for i in xrange(num_keys):
>>>        ts = time.time()
>>>        key = uuid.uuid4().hex
>>>        client.insert(ks, key, cpath, value, ts, cl)
>>>        d[key] = 1
>>>
>>> else:
>>>    ## insert keys using pycassa!
>>>    import pycassa
>>>
>>>    client = pycassa.connect(["10.212.87.165:9160"])
>>>    cf_test = pycassa.ColumnFamily(client, ks, cf, super=True)
>>>
>>>    for i in xrange(num_keys):
>>>        key = uuid.uuid4().hex
>>>        cf_test.insert(key, { 'params' : { 'is' : 'cool' }})
>>>        d[key] = 1
>>>
>>>
>>> cparent = ColumnParent(column_family=cf)
>>> slice_range = SliceRange(start="key", finish="key")
>>> p = SlicePredicate(slice_range=slice_range)
>>>
>>> done = False
>>> seg = 1000
>>> start = ""
>>>
>>> ## do a scan using either get_key_range() (deprecated) or get_range_slice()
>>> ## for every key returned that is in the dictionary, mark it as found
>>> while not done:
>>>    if sys.argv[1] == "get_key_range":
>>>        result = client.get_key_range(ks, cf, start, "", seg, cl)
>>>
>>>        if len(result) < seg: done = True
>>>        else: start = result[seg-1]
>>>
>>>        for r in result:
>>>            if d.has_key(r):
>>>                d[r] = 0
>>>
>>>    if sys.argv[1] == "get_range_slice":
>>>        result = client.get_range_slice(ks, cparent, p, start, "", seg, cl)
>>>
>>>        if len(result) < seg: done = True
>>>        else: start = result[seg-1].key
>>>
>>>        for r in result:
>>>            if d.has_key(r.key):
>>>                d[r.key] = 0
>>>
>>> cpath = ColumnPath(column_family=cf, super_column='foo')
>>>
>>> ## get, remove all the keys
>>> ## print all the keys that were not marked 0
>>> for k in d:
>>>    result = client.get(ks, k, cpath, cl)
>>>    #print result
>>>
>>>    if d[k] == 1:
>>>        print k, "not marked 0"
>>>    #else:
>>>    #    print k, "was marked 0!"
>>>
>>>    ts = time.time()
>>>    client.remove(ks, k, cpath, ts, cl)
>>>
>>
>

Re: get_range_slice() tester

Posted by Jack Culpepper <ja...@gmail.com>.
On Mon, Feb 8, 2010 at 2:34 PM, Jonathan Ellis <jb...@gmail.com> wrote:
> This is supposed to pass on a single node but fail on two, correct?

Yep! At least, it does for me.

> What are the tokens on your two nodes, in case that is relevant?
> (nodeprobe ring will tell you.)

Heh, unfortunately this also shows the fact that I accidentally
blasted one of my data dirs. ;)

$ sudo bin/nodeprobe -host localhost ring
[sudo] password for jack:
Address       Status     Load          Range
           Ring
                                       YQVhw0uDS4RMOASI
10.212.87.165 Up         8.18 KB       13DyIzn2EhRAHOq9
           |<--|
10.212.230.176Up         11.71 GB      YQVhw0uDS4RMOASI
           |-->|

J

> -Jonathan
>
> On Mon, Feb 8, 2010 at 4:01 PM, Jack Culpepper <ja...@gmail.com> wrote:
>> Here's a tester program, for contrib. It generates 10 keys using uuid,
>> inserts them both into the cassandra column family Keyspace1/Super1
>> and a python dictionary. Then, it does a range scan using both methods
>> and marks the keys that are returned. Finally, it goes through the
>> python dictionary, makes sure a cassandra get() on each key works
>> (should through an exception on failure), and complains about keys
>> that were not found in the range scan.
>>
>> To run, put the contents in test_bug.py then run like this:
>>
>> python test_bug.py get_key_range
>>
>> (Nothing printed means it worked.)
>>
>> python test_bug.py get_range_slice
>>
>> (Keys that should have been found in a range scan, but were not, are printed.)
>>
>> Best,
>>
>> Jack
>>
>>
>>
>> import sys
>> import time
>> import uuid
>>
>> from thrift import Thrift
>> from thrift.transport import TTransport
>> from thrift.transport import TSocket
>> from thrift.protocol.TBinaryProtocol import TBinaryProtocolAccelerated
>> from cassandra import Cassandra
>> from cassandra.ttypes import *
>>
>> num_keys = 10
>>
>> socket = TSocket.TSocket("10.212.87.165", 9160)
>> transport = TTransport.TBufferedTransport(socket)
>> protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
>> client = Cassandra.Client(protocol)
>>
>> ks = "Keyspace1"
>> cf = "Super1"
>> cl = ConsistencyLevel.ONE
>>
>> d = {}
>>
>> transport.open()
>>
>> if 1:
>>    ## insert keys using the raw thrift interface
>>    cpath = ColumnPath(cf, "foo", "is")
>>    value = "cool"
>>
>>    for i in xrange(num_keys):
>>        ts = time.time()
>>        key = uuid.uuid4().hex
>>        client.insert(ks, key, cpath, value, ts, cl)
>>        d[key] = 1
>>
>> else:
>>    ## insert keys using pycassa!
>>    import pycassa
>>
>>    client = pycassa.connect(["10.212.87.165:9160"])
>>    cf_test = pycassa.ColumnFamily(client, ks, cf, super=True)
>>
>>    for i in xrange(num_keys):
>>        key = uuid.uuid4().hex
>>        cf_test.insert(key, { 'params' : { 'is' : 'cool' }})
>>        d[key] = 1
>>
>>
>> cparent = ColumnParent(column_family=cf)
>> slice_range = SliceRange(start="key", finish="key")
>> p = SlicePredicate(slice_range=slice_range)
>>
>> done = False
>> seg = 1000
>> start = ""
>>
>> ## do a scan using either get_key_range() (deprecated) or get_range_slice()
>> ## for every key returned that is in the dictionary, mark it as found
>> while not done:
>>    if sys.argv[1] == "get_key_range":
>>        result = client.get_key_range(ks, cf, start, "", seg, cl)
>>
>>        if len(result) < seg: done = True
>>        else: start = result[seg-1]
>>
>>        for r in result:
>>            if d.has_key(r):
>>                d[r] = 0
>>
>>    if sys.argv[1] == "get_range_slice":
>>        result = client.get_range_slice(ks, cparent, p, start, "", seg, cl)
>>
>>        if len(result) < seg: done = True
>>        else: start = result[seg-1].key
>>
>>        for r in result:
>>            if d.has_key(r.key):
>>                d[r.key] = 0
>>
>> cpath = ColumnPath(column_family=cf, super_column='foo')
>>
>> ## get, remove all the keys
>> ## print all the keys that were not marked 0
>> for k in d:
>>    result = client.get(ks, k, cpath, cl)
>>    #print result
>>
>>    if d[k] == 1:
>>        print k, "not marked 0"
>>    #else:
>>    #    print k, "was marked 0!"
>>
>>    ts = time.time()
>>    client.remove(ks, k, cpath, ts, cl)
>>
>

Re: get_range_slice() tester

Posted by Jonathan Ellis <jb...@gmail.com>.
This is supposed to pass on a single node but fail on two, correct?

What are the tokens on your two nodes, in case that is relevant?
(nodeprobe ring will tell you.)

-Jonathan

On Mon, Feb 8, 2010 at 4:01 PM, Jack Culpepper <ja...@gmail.com> wrote:
> Here's a tester program, for contrib. It generates 10 keys using uuid,
> inserts them both into the cassandra column family Keyspace1/Super1
> and a python dictionary. Then, it does a range scan using both methods
> and marks the keys that are returned. Finally, it goes through the
> python dictionary, makes sure a cassandra get() on each key works
> (should through an exception on failure), and complains about keys
> that were not found in the range scan.
>
> To run, put the contents in test_bug.py then run like this:
>
> python test_bug.py get_key_range
>
> (Nothing printed means it worked.)
>
> python test_bug.py get_range_slice
>
> (Keys that should have been found in a range scan, but were not, are printed.)
>
> Best,
>
> Jack
>
>
>
> import sys
> import time
> import uuid
>
> from thrift import Thrift
> from thrift.transport import TTransport
> from thrift.transport import TSocket
> from thrift.protocol.TBinaryProtocol import TBinaryProtocolAccelerated
> from cassandra import Cassandra
> from cassandra.ttypes import *
>
> num_keys = 10
>
> socket = TSocket.TSocket("10.212.87.165", 9160)
> transport = TTransport.TBufferedTransport(socket)
> protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
> client = Cassandra.Client(protocol)
>
> ks = "Keyspace1"
> cf = "Super1"
> cl = ConsistencyLevel.ONE
>
> d = {}
>
> transport.open()
>
> if 1:
>    ## insert keys using the raw thrift interface
>    cpath = ColumnPath(cf, "foo", "is")
>    value = "cool"
>
>    for i in xrange(num_keys):
>        ts = time.time()
>        key = uuid.uuid4().hex
>        client.insert(ks, key, cpath, value, ts, cl)
>        d[key] = 1
>
> else:
>    ## insert keys using pycassa!
>    import pycassa
>
>    client = pycassa.connect(["10.212.87.165:9160"])
>    cf_test = pycassa.ColumnFamily(client, ks, cf, super=True)
>
>    for i in xrange(num_keys):
>        key = uuid.uuid4().hex
>        cf_test.insert(key, { 'params' : { 'is' : 'cool' }})
>        d[key] = 1
>
>
> cparent = ColumnParent(column_family=cf)
> slice_range = SliceRange(start="key", finish="key")
> p = SlicePredicate(slice_range=slice_range)
>
> done = False
> seg = 1000
> start = ""
>
> ## do a scan using either get_key_range() (deprecated) or get_range_slice()
> ## for every key returned that is in the dictionary, mark it as found
> while not done:
>    if sys.argv[1] == "get_key_range":
>        result = client.get_key_range(ks, cf, start, "", seg, cl)
>
>        if len(result) < seg: done = True
>        else: start = result[seg-1]
>
>        for r in result:
>            if d.has_key(r):
>                d[r] = 0
>
>    if sys.argv[1] == "get_range_slice":
>        result = client.get_range_slice(ks, cparent, p, start, "", seg, cl)
>
>        if len(result) < seg: done = True
>        else: start = result[seg-1].key
>
>        for r in result:
>            if d.has_key(r.key):
>                d[r.key] = 0
>
> cpath = ColumnPath(column_family=cf, super_column='foo')
>
> ## get, remove all the keys
> ## print all the keys that were not marked 0
> for k in d:
>    result = client.get(ks, k, cpath, cl)
>    #print result
>
>    if d[k] == 1:
>        print k, "not marked 0"
>    #else:
>    #    print k, "was marked 0!"
>
>    ts = time.time()
>    client.remove(ks, k, cpath, ts, cl)
>