You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hive.apache.org by Jeremy Davis <jd...@datasong.com> on 2015/04/05 01:08:57 UTC
Is it possible to do a LEFT JOIN LATERAL in Hive?
Hello!
I would like to do a LEFT JOIN LATERAL .. Which is using values on the LHS as parameters on the RHS. Is this sort of thing possible in Hive?
-JD
---- Some example SQL:
create table lhs (
subject_id integer,
date_time BIGINT
);
—Subjects and responses at Arbitrary response times:
insert into lhs (subject_id, date_time) values (1,1000);
insert into lhs (subject_id, date_time) values (1,1100);
insert into lhs (subject_id, date_time) values (1,2000);
insert into lhs (subject_id, date_time) values (2,1002);
insert into lhs (subject_id, date_time) values (2,1998);
create table events (
subject_id integer,
date_time BIGINT,
event_val integer
);
SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as val_sum, count(event_val) as ecnt from events WHERE date_time < lhs.date_time and subject_id = lhs.subject_id ) rhs1 ON true;
insert into events (subject_id, date_time, event_val) values (1,999, 1);
insert into events (subject_id, date_time, event_val) values (1,1000, 2);
insert into events (subject_id, date_time, event_val) values (1,1001, 3);
insert into events (subject_id, date_time, event_val) values (1,1999, 4);
insert into events (subject_id, date_time, event_val) values (1,2000, 5);
insert into events (subject_id, date_time, event_val) values (1,2001, 6);
insert into events (subject_id, date_time, event_val) values (2,999, 10);
insert into events (subject_id, date_time, event_val) values (2,1000, 20);
insert into events (subject_id, date_time, event_val) values (2,1001, 30);
insert into events (subject_id, date_time, event_val) values (2,1999, 40);
insert into events (subject_id, date_time, event_val) values (2,2000, 50);
insert into events (subject_id, date_time, event_val) values (2,2001, 60);
SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as val_sum, count(event_val) as ecnt from events WHERE date_time < lhs.date_time and subject_id = lhs.subject_id ) rhs1 ON true;
—results:
subject_id;date_time;val_sum;ecnt
1;1000;1;1
1;1100;6;3
1;2000;10;4
2;1002;60;3
2;1998;60;3
Re: Is it possible to do a LEFT JOIN LATERAL in Hive?
Posted by Edward Capriolo <ed...@gmail.com>.
Lateral view does support outer if that helps.
On Sunday, April 5, 2015, @Sanjiv Singh <sa...@gmail.com> wrote:
> Hi Jeremy,
>
> Adding to my response ....
>
> 1. Hive doesn't support named insertion , so need to use other ways of
> insertion data in hive table ..
>
> 2. As you know , hive doesn't support LEFT JOIN LATERAL. Query , I given
> , is producing same result . hope that it can help you formulate things and
> achieve the same in hive.
> On Apr 5, 2015 3:55 PM, "@Sanjiv Singh" <sanjiv.is.on@gmail.com
> <javascript:_e(%7B%7D,'cvml','sanjiv.is.on@gmail.com');>> wrote:
>
>> -- create table lhs
>>
>> create table lhs (
>> subject_id int,
>> date_time BIGINT
>> );
>>
>> -- insert some records in table lhs , named insertion will not work
>> in case of hive
>>
>> insert into table lhs select 1,1000 from tmpTableWithOneRecords limit 1;
>> insert into table lhs select 1,1100 from tmpTableWithOneRecords limit 1;
>> insert into table lhs select 1,2000 from tmpTableWithOneRecords limit 1;
>> insert into table lhs select 2,1002 from tmpTableWithOneRecords limit 1;
>> insert into table lhs select 2,1998 from tmpTableWithOneRecords limit 1;
>>
>> create table events (
>> subject_id int,
>> date_time BIGINT,
>> event_val int
>> );
>>
>> insert into table events select 1,999, 1 from
>> tmpTableWithOneRecords limit 1;
>> insert into table events select 1,1000, 2 from
>> tmpTableWithOneRecords limit 1;
>> insert into table events select 1,1001, 3 from
>> tmpTableWithOneRecords limit 1;
>> insert into table events select 1,1999, 4 from
>> tmpTableWithOneRecords limit 1;
>> insert into table events select 1,2000, 5 from
>> tmpTableWithOneRecords limit 1;
>> insert into table events select 1,2001, 6 from
>> tmpTableWithOneRecords limit 1;
>>
>> insert into table events select 2,999, 10 from
>> tmpTableWithOneRecords limit 1;
>> insert into table events select 2,1000, 20 from
>> tmpTableWithOneRecords limit 1;
>> insert into table events select 2,1001, 30 from
>> tmpTableWithOneRecords limit 1;
>> insert into table events select 2,1999, 40 from
>> tmpTableWithOneRecords limit 1;
>> insert into table events select 2,2000, 50 from
>> tmpTableWithOneRecords limit 1;
>> insert into table events select 2,2001, 60 from
>> tmpTableWithOneRecords limit 1;
>>
>>
>> select subject_id,adate,SUM(event_val),COUNT(event_val) from (SELECT
>> a.subject_id as subject_id ,a.date_time as adate , b.date_time as
>> bdate , b.event_val as event_val FROM events b LEFT OUTER JOIN lhs a
>> ON b.subject_id = a.subject_id) abc where bdate < adate group by
>> subject_id,adate;
>>
>>
>>
>> 1 1000 1 1
>> 1 1100 6 3
>> 1 2000 10 4
>> 2 1002 60 3
>> 2 1998 60 3
>>
>>
>> On 4/5/15, Jeremy Davis <jdavis@datasong.com
>> <javascript:_e(%7B%7D,'cvml','jdavis@datasong.com');>> wrote:
>> > Hello!
>> > I would like to do a LEFT JOIN LATERAL .. Which is using values on the
>> LHS
>> > as parameters on the RHS. Is this sort of thing possible in Hive?
>> >
>> >
>> > -JD
>> >
>> >
>> > ---- Some example SQL:
>> >
>> >
>> > create table lhs (
>> > subject_id integer,
>> > date_time BIGINT
>> > );
>> >
>> > —Subjects and responses at Arbitrary response times:
>> > insert into lhs (subject_id, date_time) values (1,1000);
>> > insert into lhs (subject_id, date_time) values (1,1100);
>> > insert into lhs (subject_id, date_time) values (1,2000);
>> > insert into lhs (subject_id, date_time) values (2,1002);
>> > insert into lhs (subject_id, date_time) values (2,1998);
>> >
>> > create table events (
>> > subject_id integer,
>> > date_time BIGINT,
>> > event_val integer
>> > );
>> >
>> > SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as val_sum,
>> > count(event_val) as ecnt from events WHERE date_time < lhs.date_time and
>> > subject_id = lhs.subject_id ) rhs1 ON true;
>> >
>> >
>> > insert into events (subject_id, date_time, event_val) values
>> (1,999,
>> > 1);
>> > insert into events (subject_id, date_time, event_val) values
>> (1,1000,
>> > 2);
>> > insert into events (subject_id, date_time, event_val) values
>> (1,1001,
>> > 3);
>> > insert into events (subject_id, date_time, event_val) values
>> (1,1999,
>> > 4);
>> > insert into events (subject_id, date_time, event_val) values
>> (1,2000,
>> > 5);
>> > insert into events (subject_id, date_time, event_val) values
>> (1,2001,
>> > 6);
>> >
>> > insert into events (subject_id, date_time, event_val) values
>> (2,999,
>> > 10);
>> > insert into events (subject_id, date_time, event_val) values
>> (2,1000,
>> > 20);
>> > insert into events (subject_id, date_time, event_val) values
>> (2,1001,
>> > 30);
>> > insert into events (subject_id, date_time, event_val) values
>> (2,1999,
>> > 40);
>> > insert into events (subject_id, date_time, event_val) values
>> (2,2000,
>> > 50);
>> > insert into events (subject_id, date_time, event_val) values
>> (2,2001,
>> > 60);
>> >
>> > SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as
>> > val_sum, count(event_val) as ecnt from events WHERE date_time <
>> > lhs.date_time and subject_id = lhs.subject_id ) rhs1 ON true;
>> >
>> > —results:
>> > subject_id;date_time;val_sum;ecnt
>> > 1;1000;1;1
>> > 1;1100;6;3
>> > 1;2000;10;4
>> > 2;1002;60;3
>> > 2;1998;60;3
>> >
>> >
>> >
>>
>>
>> --
>> Regards
>> Sanjiv Singh
>> Mob : +091 9990-447-339
>>
>
--
Sorry this was sent from mobile. Will do less grammar and spell check than
usual.
Re: Is it possible to do a LEFT JOIN LATERAL in Hive?
Posted by "@Sanjiv Singh" <sa...@gmail.com>.
Hi Jeremy,
Adding to my response ....
1. Hive doesn't support named insertion , so need to use other ways of
insertion data in hive table ..
2. As you know , hive doesn't support LEFT JOIN LATERAL. Query , I given
, is producing same result . hope that it can help you formulate things and
achieve the same in hive.
On Apr 5, 2015 3:55 PM, "@Sanjiv Singh" <sa...@gmail.com> wrote:
> -- create table lhs
>
> create table lhs (
> subject_id int,
> date_time BIGINT
> );
>
> -- insert some records in table lhs , named insertion will not work
> in case of hive
>
> insert into table lhs select 1,1000 from tmpTableWithOneRecords limit 1;
> insert into table lhs select 1,1100 from tmpTableWithOneRecords limit 1;
> insert into table lhs select 1,2000 from tmpTableWithOneRecords limit 1;
> insert into table lhs select 2,1002 from tmpTableWithOneRecords limit 1;
> insert into table lhs select 2,1998 from tmpTableWithOneRecords limit 1;
>
> create table events (
> subject_id int,
> date_time BIGINT,
> event_val int
> );
>
> insert into table events select 1,999, 1 from
> tmpTableWithOneRecords limit 1;
> insert into table events select 1,1000, 2 from
> tmpTableWithOneRecords limit 1;
> insert into table events select 1,1001, 3 from
> tmpTableWithOneRecords limit 1;
> insert into table events select 1,1999, 4 from
> tmpTableWithOneRecords limit 1;
> insert into table events select 1,2000, 5 from
> tmpTableWithOneRecords limit 1;
> insert into table events select 1,2001, 6 from
> tmpTableWithOneRecords limit 1;
>
> insert into table events select 2,999, 10 from
> tmpTableWithOneRecords limit 1;
> insert into table events select 2,1000, 20 from
> tmpTableWithOneRecords limit 1;
> insert into table events select 2,1001, 30 from
> tmpTableWithOneRecords limit 1;
> insert into table events select 2,1999, 40 from
> tmpTableWithOneRecords limit 1;
> insert into table events select 2,2000, 50 from
> tmpTableWithOneRecords limit 1;
> insert into table events select 2,2001, 60 from
> tmpTableWithOneRecords limit 1;
>
>
> select subject_id,adate,SUM(event_val),COUNT(event_val) from (SELECT
> a.subject_id as subject_id ,a.date_time as adate , b.date_time as
> bdate , b.event_val as event_val FROM events b LEFT OUTER JOIN lhs a
> ON b.subject_id = a.subject_id) abc where bdate < adate group by
> subject_id,adate;
>
>
>
> 1 1000 1 1
> 1 1100 6 3
> 1 2000 10 4
> 2 1002 60 3
> 2 1998 60 3
>
>
> On 4/5/15, Jeremy Davis <jd...@datasong.com> wrote:
> > Hello!
> > I would like to do a LEFT JOIN LATERAL .. Which is using values on the
> LHS
> > as parameters on the RHS. Is this sort of thing possible in Hive?
> >
> >
> > -JD
> >
> >
> > ---- Some example SQL:
> >
> >
> > create table lhs (
> > subject_id integer,
> > date_time BIGINT
> > );
> >
> > —Subjects and responses at Arbitrary response times:
> > insert into lhs (subject_id, date_time) values (1,1000);
> > insert into lhs (subject_id, date_time) values (1,1100);
> > insert into lhs (subject_id, date_time) values (1,2000);
> > insert into lhs (subject_id, date_time) values (2,1002);
> > insert into lhs (subject_id, date_time) values (2,1998);
> >
> > create table events (
> > subject_id integer,
> > date_time BIGINT,
> > event_val integer
> > );
> >
> > SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as val_sum,
> > count(event_val) as ecnt from events WHERE date_time < lhs.date_time and
> > subject_id = lhs.subject_id ) rhs1 ON true;
> >
> >
> > insert into events (subject_id, date_time, event_val) values
> (1,999,
> > 1);
> > insert into events (subject_id, date_time, event_val) values
> (1,1000,
> > 2);
> > insert into events (subject_id, date_time, event_val) values
> (1,1001,
> > 3);
> > insert into events (subject_id, date_time, event_val) values
> (1,1999,
> > 4);
> > insert into events (subject_id, date_time, event_val) values
> (1,2000,
> > 5);
> > insert into events (subject_id, date_time, event_val) values
> (1,2001,
> > 6);
> >
> > insert into events (subject_id, date_time, event_val) values
> (2,999,
> > 10);
> > insert into events (subject_id, date_time, event_val) values
> (2,1000,
> > 20);
> > insert into events (subject_id, date_time, event_val) values
> (2,1001,
> > 30);
> > insert into events (subject_id, date_time, event_val) values
> (2,1999,
> > 40);
> > insert into events (subject_id, date_time, event_val) values
> (2,2000,
> > 50);
> > insert into events (subject_id, date_time, event_val) values
> (2,2001,
> > 60);
> >
> > SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as
> > val_sum, count(event_val) as ecnt from events WHERE date_time <
> > lhs.date_time and subject_id = lhs.subject_id ) rhs1 ON true;
> >
> > —results:
> > subject_id;date_time;val_sum;ecnt
> > 1;1000;1;1
> > 1;1100;6;3
> > 1;2000;10;4
> > 2;1002;60;3
> > 2;1998;60;3
> >
> >
> >
>
>
> --
> Regards
> Sanjiv Singh
> Mob : +091 9990-447-339
>
Re: Is it possible to do a LEFT JOIN LATERAL in Hive?
Posted by "@Sanjiv Singh" <sa...@gmail.com>.
-- create table lhs
create table lhs (
subject_id int,
date_time BIGINT
);
-- insert some records in table lhs , named insertion will not work
in case of hive
insert into table lhs select 1,1000 from tmpTableWithOneRecords limit 1;
insert into table lhs select 1,1100 from tmpTableWithOneRecords limit 1;
insert into table lhs select 1,2000 from tmpTableWithOneRecords limit 1;
insert into table lhs select 2,1002 from tmpTableWithOneRecords limit 1;
insert into table lhs select 2,1998 from tmpTableWithOneRecords limit 1;
create table events (
subject_id int,
date_time BIGINT,
event_val int
);
insert into table events select 1,999, 1 from
tmpTableWithOneRecords limit 1;
insert into table events select 1,1000, 2 from
tmpTableWithOneRecords limit 1;
insert into table events select 1,1001, 3 from
tmpTableWithOneRecords limit 1;
insert into table events select 1,1999, 4 from
tmpTableWithOneRecords limit 1;
insert into table events select 1,2000, 5 from
tmpTableWithOneRecords limit 1;
insert into table events select 1,2001, 6 from
tmpTableWithOneRecords limit 1;
insert into table events select 2,999, 10 from
tmpTableWithOneRecords limit 1;
insert into table events select 2,1000, 20 from
tmpTableWithOneRecords limit 1;
insert into table events select 2,1001, 30 from
tmpTableWithOneRecords limit 1;
insert into table events select 2,1999, 40 from
tmpTableWithOneRecords limit 1;
insert into table events select 2,2000, 50 from
tmpTableWithOneRecords limit 1;
insert into table events select 2,2001, 60 from
tmpTableWithOneRecords limit 1;
select subject_id,adate,SUM(event_val),COUNT(event_val) from (SELECT
a.subject_id as subject_id ,a.date_time as adate , b.date_time as
bdate , b.event_val as event_val FROM events b LEFT OUTER JOIN lhs a
ON b.subject_id = a.subject_id) abc where bdate < adate group by
subject_id,adate;
1 1000 1 1
1 1100 6 3
1 2000 10 4
2 1002 60 3
2 1998 60 3
On 4/5/15, Jeremy Davis <jd...@datasong.com> wrote:
> Hello!
> I would like to do a LEFT JOIN LATERAL .. Which is using values on the LHS
> as parameters on the RHS. Is this sort of thing possible in Hive?
>
>
> -JD
>
>
> ---- Some example SQL:
>
>
> create table lhs (
> subject_id integer,
> date_time BIGINT
> );
>
> —Subjects and responses at Arbitrary response times:
> insert into lhs (subject_id, date_time) values (1,1000);
> insert into lhs (subject_id, date_time) values (1,1100);
> insert into lhs (subject_id, date_time) values (1,2000);
> insert into lhs (subject_id, date_time) values (2,1002);
> insert into lhs (subject_id, date_time) values (2,1998);
>
> create table events (
> subject_id integer,
> date_time BIGINT,
> event_val integer
> );
>
> SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as val_sum,
> count(event_val) as ecnt from events WHERE date_time < lhs.date_time and
> subject_id = lhs.subject_id ) rhs1 ON true;
>
>
> insert into events (subject_id, date_time, event_val) values (1,999,
> 1);
> insert into events (subject_id, date_time, event_val) values (1,1000,
> 2);
> insert into events (subject_id, date_time, event_val) values (1,1001,
> 3);
> insert into events (subject_id, date_time, event_val) values (1,1999,
> 4);
> insert into events (subject_id, date_time, event_val) values (1,2000,
> 5);
> insert into events (subject_id, date_time, event_val) values (1,2001,
> 6);
>
> insert into events (subject_id, date_time, event_val) values (2,999,
> 10);
> insert into events (subject_id, date_time, event_val) values (2,1000,
> 20);
> insert into events (subject_id, date_time, event_val) values (2,1001,
> 30);
> insert into events (subject_id, date_time, event_val) values (2,1999,
> 40);
> insert into events (subject_id, date_time, event_val) values (2,2000,
> 50);
> insert into events (subject_id, date_time, event_val) values (2,2001,
> 60);
>
> SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as
> val_sum, count(event_val) as ecnt from events WHERE date_time <
> lhs.date_time and subject_id = lhs.subject_id ) rhs1 ON true;
>
> —results:
> subject_id;date_time;val_sum;ecnt
> 1;1000;1;1
> 1;1100;6;3
> 1;2000;10;4
> 2;1002;60;3
> 2;1998;60;3
>
>
>
--
Regards
Sanjiv Singh
Mob : +091 9990-447-339