You are viewing a plain text version of this content. The canonical link for it is here.

Posted to user@cassandra.apache.org by Neha Trivedi <ne...@gmail.com> on 2015/04/30 09:15:33 UTC

Data Modeling for 2.1 Cassandra

Helle all,
I was wondering which data model of the Three describe below better in
terms of performance. Seems 3 is good.

*#1. log with 3 Index*

CREATE TABLE log (
        id int PRIMARY KEY,
        first_name set<text>,
    last_name set<text>,
    dob set <text>
   );
CREATE INDEX log_firstname_index ON test.log (first_name);
CREATE INDEX log_lastname_index ON test.log (last_name);
CREATE INDEX log_dob_index ON test.log (dob);
INSERT INTO log(id, first_name,last_name) VALUES ( 3, {'rob'},{'abbate'});
INSERT INTO log(id, first_name,last_name) VALUES ( 4, {'neha'},{'dave'});
select id from log where first_name contains 'rob';
select id from log where last_name contains 'abbate';

*#2. log with UDT*

CREATE TYPE test.user_profile (
    first_name text,
    last_name text,
    dob text
);

CREATE TABLE test.log_udt (
    id int PRIMARY KEY,
    userinfo set<frozen<user_profile>>
);
CREATE INDEX log_udt1__index ON test.log_udt1 (userinfo);
INSERT INTO log_udt1 (id, userinfo ) values (
1,{first_name:'rob',last_name:'abb',dob: 'dob'});
INSERT INTO log_udt1 (id, userinfo ) values (
2,{first_name:'neha',last_name:'dave',dob: 'dob1'});

select * FROM log_udt1 where userinfo = {first_name: 'rob', last_name:
'abb', dob: 'dob'};

This will not do query like : select id from log_fname where first_name
contains 'rob';

*#3. log with different Tables for each*


CREATE TABLE log_fname (
        id int PRIMARY KEY,
        first_name set<text>,
   );
CREATE INDEX log_firstname_index ON test.log_fname (first_name);
CREATE TABLE log_lname (
        id int PRIMARY KEY,
    last_name set<text>,
   );
CREATE INDEX log_lastname_index ON test.log_lname (last_name);
CREATE TABLE log_dob (
        id int PRIMARY KEY,
    dob set <text>
   );
CREATE INDEX log_dob_index ON test.log_dob (dob);

INSERT INTO log_fname(id, first_name) VALUES ( 3, {'rob'});
INSERT INTO log_lname(id, last_name) VALUES ( 4, {'dave'});
select id from log_fname where first_name contains 'rob';
select id from log_lname where last_name contains 'abbate';


Regards
Neha

RE: Data Modeling for 2.1 Cassandra

Posted by "Peer, Oded" <Od...@rsa.com>.

In general your data model should match your queries in Cassandra.
In the examples you provided the queries are by name, not by ID, so I don’t see much use in using ID as the primary key.
Without much context, like why you are using SET or if queries must specify both first_name and last_name which is not supported in option 2 , I think it would make sense to use the following model for your data:

CREATE TABLE users (
   first_name text,
   last_name text,
   dob text,
   id int
   PRIMARY KEY ((first_name, last_name)) // Note this defines a composite partition key by using an extra set of parentheses
);
INSERT INTO users(first_name, last_name, id) values (‘neha’, ‘dave’, 1);
SELECT * FROM users where first_name = 'rob' and last_name = 'abb';



From: Neha Trivedi [mailto:nehajtrivedi@gmail.com]
Sent: Thursday, April 30, 2015 10:16 AM
To: user@cassandra.apache.org
Subject: Data Modeling for 2.1 Cassandra

Helle all,
I was wondering which data model of the Three describe below better in terms of performance. Seems 3 is good.

#1. log with 3 Index

CREATE TABLE log (
        id int PRIMARY KEY,
        first_name set<text>,
    last_name set<text>,
    dob set <text>
   );
CREATE INDEX log_firstname_index ON test.log (first_name);
CREATE INDEX log_lastname_index ON test.log (last_name);
CREATE INDEX log_dob_index ON test.log (dob);
INSERT INTO log(id, first_name,last_name) VALUES ( 3, {'rob'},{'abbate'});
INSERT INTO log(id, first_name,last_name) VALUES ( 4, {'neha'},{'dave'});
select id from log where first_name contains 'rob';
select id from log where last_name contains 'abbate';

#2. log with UDT

CREATE TYPE test.user_profile (
    first_name text,
    last_name text,
    dob text
);

CREATE TABLE test.log_udt (
    id int PRIMARY KEY,
    userinfo set<frozen<user_profile>>
);
CREATE INDEX log_udt1__index ON test.log_udt1 (userinfo);
INSERT INTO log_udt1 (id, userinfo ) values ( 1,{first_name:'rob',last_name:'abb',dob: 'dob'});
INSERT INTO log_udt1 (id, userinfo ) values ( 2,{first_name:'neha',last_name:'dave',dob: 'dob1'});

select * FROM log_udt1 where userinfo = {first_name: 'rob', last_name: 'abb', dob: 'dob'};
This will not do query like : select id from log_fname where first_name contains 'rob';

#3. log with different Tables for each


CREATE TABLE log_fname (
        id int PRIMARY KEY,
        first_name set<text>,
   );
CREATE INDEX log_firstname_index ON test.log_fname (first_name);
CREATE TABLE log_lname (
        id int PRIMARY KEY,
    last_name set<text>,
   );
CREATE INDEX log_lastname_index ON test.log_lname (last_name);
CREATE TABLE log_dob (
        id int PRIMARY KEY,
    dob set <text>
   );
CREATE INDEX log_dob_index ON test.log_dob (dob);

INSERT INTO log_fname(id, first_name) VALUES ( 3, {'rob'});
INSERT INTO log_lname(id, last_name) VALUES ( 4, {'dave'});
select id from log_fname where first_name contains 'rob';
select id from log_lname where last_name contains 'abbate';

Regards
Neha