You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hive.apache.org by Jeremy Davis <jd...@datasong.com> on 2015/04/05 01:08:57 UTC

Is it possible to do a LEFT JOIN LATERAL in Hive?

Hello!
I would like to do a LEFT JOIN LATERAL .. Which is using values on the LHS as parameters on the RHS. Is this sort of thing possible in Hive? 


-JD


---- Some example SQL:


create table lhs (
    subject_id integer,
    date_time  BIGINT
);

       —Subjects and responses at Arbitrary response times:
       insert into lhs (subject_id, date_time) values (1,1000);
       insert into lhs (subject_id, date_time) values (1,1100);
       insert into lhs (subject_id, date_time) values (1,2000);
       insert into lhs (subject_id, date_time) values (2,1002);
       insert into lhs (subject_id, date_time) values (2,1998);

create table events (
    subject_id  integer,
    date_time   BIGINT,
    event_val   integer
);

SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as val_sum, count(event_val) as ecnt from events WHERE date_time < lhs.date_time and subject_id = lhs.subject_id ) rhs1 ON true;


       insert into events (subject_id, date_time, event_val) values (1,999, 1);
       insert into events (subject_id, date_time, event_val) values (1,1000, 2);
       insert into events (subject_id, date_time, event_val) values (1,1001, 3);
       insert into events (subject_id, date_time, event_val) values (1,1999, 4);
       insert into events (subject_id, date_time, event_val) values (1,2000, 5);
       insert into events (subject_id, date_time, event_val) values (1,2001, 6);

       insert into events (subject_id, date_time, event_val) values (2,999, 10);
       insert into events (subject_id, date_time, event_val) values (2,1000, 20);
       insert into events (subject_id, date_time, event_val) values (2,1001, 30);
       insert into events (subject_id, date_time, event_val) values (2,1999, 40);
       insert into events (subject_id, date_time, event_val) values (2,2000, 50);
       insert into events (subject_id, date_time, event_val) values (2,2001, 60);

       SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as val_sum, count(event_val) as ecnt from events WHERE date_time < lhs.date_time and subject_id = lhs.subject_id ) rhs1 ON true;

       —results:
       subject_id;date_time;val_sum;ecnt
       1;1000;1;1
       1;1100;6;3
       1;2000;10;4
       2;1002;60;3
       2;1998;60;3



Re: Is it possible to do a LEFT JOIN LATERAL in Hive?

Posted by Edward Capriolo <ed...@gmail.com>.
Lateral view does support outer if that helps.

On Sunday, April 5, 2015, @Sanjiv Singh <sa...@gmail.com> wrote:

> Hi Jeremy,
>
> Adding to my response ....
>
> 1. Hive doesn't support named insertion , so need to use other ways of
> insertion data in hive table ..
>
> 2.  As you know , hive doesn't support LEFT JOIN LATERAL.  Query , I given
> , is producing same result . hope that it can help you formulate things and
> achieve the same in hive.
> On Apr 5, 2015 3:55 PM, "@Sanjiv Singh" <sanjiv.is.on@gmail.com
> <javascript:_e(%7B%7D,'cvml','sanjiv.is.on@gmail.com');>> wrote:
>
>> --  create table lhs
>>
>> create table lhs (
>>     subject_id int,
>>     date_time  BIGINT
>> );
>>
>> --  insert some records in table lhs , named insertion will not work
>> in case of  hive
>>
>> insert into table lhs select 1,1000 from tmpTableWithOneRecords limit 1;
>> insert into table  lhs select 1,1100 from tmpTableWithOneRecords limit 1;
>> insert into table  lhs select 1,2000 from tmpTableWithOneRecords limit 1;
>> insert into table  lhs select 2,1002 from tmpTableWithOneRecords limit 1;
>> insert into table  lhs select 2,1998 from tmpTableWithOneRecords limit 1;
>>
>> create table events (
>>     subject_id  int,
>>     date_time   BIGINT,
>>     event_val   int
>> );
>>
>>        insert into table events select 1,999, 1 from
>> tmpTableWithOneRecords limit 1;
>>        insert into table events select 1,1000, 2 from
>> tmpTableWithOneRecords limit 1;
>>        insert into table events select 1,1001, 3 from
>> tmpTableWithOneRecords limit 1;
>>        insert into table events select 1,1999, 4 from
>> tmpTableWithOneRecords limit 1;
>>        insert into table events select 1,2000, 5 from
>> tmpTableWithOneRecords limit 1;
>>        insert into table events select 1,2001, 6 from
>> tmpTableWithOneRecords limit 1;
>>
>>        insert into table events select 2,999, 10 from
>> tmpTableWithOneRecords limit 1;
>>        insert into table events select 2,1000, 20 from
>> tmpTableWithOneRecords limit 1;
>>        insert into table events select 2,1001, 30 from
>> tmpTableWithOneRecords limit 1;
>>        insert into table events select 2,1999, 40 from
>> tmpTableWithOneRecords limit 1;
>>        insert into table events select 2,2000, 50 from
>> tmpTableWithOneRecords limit 1;
>>        insert into table events select 2,2001, 60 from
>> tmpTableWithOneRecords limit 1;
>>
>>
>> select subject_id,adate,SUM(event_val),COUNT(event_val) from (SELECT
>> a.subject_id as subject_id ,a.date_time as adate , b.date_time as
>> bdate , b.event_val as event_val  FROM events b LEFT OUTER JOIN lhs a
>> ON b.subject_id = a.subject_id) abc where bdate < adate group by
>> subject_id,adate;
>>
>>
>>
>> 1       1000    1       1
>> 1       1100    6       3
>> 1       2000    10      4
>> 2       1002    60      3
>> 2       1998    60      3
>>
>>
>> On 4/5/15, Jeremy Davis <jdavis@datasong.com
>> <javascript:_e(%7B%7D,'cvml','jdavis@datasong.com');>> wrote:
>> > Hello!
>> > I would like to do a LEFT JOIN LATERAL .. Which is using values on the
>> LHS
>> > as parameters on the RHS. Is this sort of thing possible in Hive?
>> >
>> >
>> > -JD
>> >
>> >
>> > ---- Some example SQL:
>> >
>> >
>> > create table lhs (
>> >     subject_id integer,
>> >     date_time  BIGINT
>> > );
>> >
>> >        —Subjects and responses at Arbitrary response times:
>> >        insert into lhs (subject_id, date_time) values (1,1000);
>> >        insert into lhs (subject_id, date_time) values (1,1100);
>> >        insert into lhs (subject_id, date_time) values (1,2000);
>> >        insert into lhs (subject_id, date_time) values (2,1002);
>> >        insert into lhs (subject_id, date_time) values (2,1998);
>> >
>> > create table events (
>> >     subject_id  integer,
>> >     date_time   BIGINT,
>> >     event_val   integer
>> > );
>> >
>> > SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as val_sum,
>> > count(event_val) as ecnt from events WHERE date_time < lhs.date_time and
>> > subject_id = lhs.subject_id ) rhs1 ON true;
>> >
>> >
>> >        insert into events (subject_id, date_time, event_val) values
>> (1,999,
>> > 1);
>> >        insert into events (subject_id, date_time, event_val) values
>> (1,1000,
>> > 2);
>> >        insert into events (subject_id, date_time, event_val) values
>> (1,1001,
>> > 3);
>> >        insert into events (subject_id, date_time, event_val) values
>> (1,1999,
>> > 4);
>> >        insert into events (subject_id, date_time, event_val) values
>> (1,2000,
>> > 5);
>> >        insert into events (subject_id, date_time, event_val) values
>> (1,2001,
>> > 6);
>> >
>> >        insert into events (subject_id, date_time, event_val) values
>> (2,999,
>> > 10);
>> >        insert into events (subject_id, date_time, event_val) values
>> (2,1000,
>> > 20);
>> >        insert into events (subject_id, date_time, event_val) values
>> (2,1001,
>> > 30);
>> >        insert into events (subject_id, date_time, event_val) values
>> (2,1999,
>> > 40);
>> >        insert into events (subject_id, date_time, event_val) values
>> (2,2000,
>> > 50);
>> >        insert into events (subject_id, date_time, event_val) values
>> (2,2001,
>> > 60);
>> >
>> >        SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as
>> > val_sum, count(event_val) as ecnt from events WHERE date_time <
>> > lhs.date_time and subject_id = lhs.subject_id ) rhs1 ON true;
>> >
>> >        —results:
>> >        subject_id;date_time;val_sum;ecnt
>> >        1;1000;1;1
>> >        1;1100;6;3
>> >        1;2000;10;4
>> >        2;1002;60;3
>> >        2;1998;60;3
>> >
>> >
>> >
>>
>>
>> --
>> Regards
>> Sanjiv Singh
>> Mob :  +091 9990-447-339
>>
>

-- 
Sorry this was sent from mobile. Will do less grammar and spell check than
usual.

Re: Is it possible to do a LEFT JOIN LATERAL in Hive?

Posted by "@Sanjiv Singh" <sa...@gmail.com>.
Hi Jeremy,

Adding to my response ....

1. Hive doesn't support named insertion , so need to use other ways of
insertion data in hive table ..

2.  As you know , hive doesn't support LEFT JOIN LATERAL.  Query , I given
, is producing same result . hope that it can help you formulate things and
achieve the same in hive.
On Apr 5, 2015 3:55 PM, "@Sanjiv Singh" <sa...@gmail.com> wrote:

> --  create table lhs
>
> create table lhs (
>     subject_id int,
>     date_time  BIGINT
> );
>
> --  insert some records in table lhs , named insertion will not work
> in case of  hive
>
> insert into table lhs select 1,1000 from tmpTableWithOneRecords limit 1;
> insert into table  lhs select 1,1100 from tmpTableWithOneRecords limit 1;
> insert into table  lhs select 1,2000 from tmpTableWithOneRecords limit 1;
> insert into table  lhs select 2,1002 from tmpTableWithOneRecords limit 1;
> insert into table  lhs select 2,1998 from tmpTableWithOneRecords limit 1;
>
> create table events (
>     subject_id  int,
>     date_time   BIGINT,
>     event_val   int
> );
>
>        insert into table events select 1,999, 1 from
> tmpTableWithOneRecords limit 1;
>        insert into table events select 1,1000, 2 from
> tmpTableWithOneRecords limit 1;
>        insert into table events select 1,1001, 3 from
> tmpTableWithOneRecords limit 1;
>        insert into table events select 1,1999, 4 from
> tmpTableWithOneRecords limit 1;
>        insert into table events select 1,2000, 5 from
> tmpTableWithOneRecords limit 1;
>        insert into table events select 1,2001, 6 from
> tmpTableWithOneRecords limit 1;
>
>        insert into table events select 2,999, 10 from
> tmpTableWithOneRecords limit 1;
>        insert into table events select 2,1000, 20 from
> tmpTableWithOneRecords limit 1;
>        insert into table events select 2,1001, 30 from
> tmpTableWithOneRecords limit 1;
>        insert into table events select 2,1999, 40 from
> tmpTableWithOneRecords limit 1;
>        insert into table events select 2,2000, 50 from
> tmpTableWithOneRecords limit 1;
>        insert into table events select 2,2001, 60 from
> tmpTableWithOneRecords limit 1;
>
>
> select subject_id,adate,SUM(event_val),COUNT(event_val) from (SELECT
> a.subject_id as subject_id ,a.date_time as adate , b.date_time as
> bdate , b.event_val as event_val  FROM events b LEFT OUTER JOIN lhs a
> ON b.subject_id = a.subject_id) abc where bdate < adate group by
> subject_id,adate;
>
>
>
> 1       1000    1       1
> 1       1100    6       3
> 1       2000    10      4
> 2       1002    60      3
> 2       1998    60      3
>
>
> On 4/5/15, Jeremy Davis <jd...@datasong.com> wrote:
> > Hello!
> > I would like to do a LEFT JOIN LATERAL .. Which is using values on the
> LHS
> > as parameters on the RHS. Is this sort of thing possible in Hive?
> >
> >
> > -JD
> >
> >
> > ---- Some example SQL:
> >
> >
> > create table lhs (
> >     subject_id integer,
> >     date_time  BIGINT
> > );
> >
> >        —Subjects and responses at Arbitrary response times:
> >        insert into lhs (subject_id, date_time) values (1,1000);
> >        insert into lhs (subject_id, date_time) values (1,1100);
> >        insert into lhs (subject_id, date_time) values (1,2000);
> >        insert into lhs (subject_id, date_time) values (2,1002);
> >        insert into lhs (subject_id, date_time) values (2,1998);
> >
> > create table events (
> >     subject_id  integer,
> >     date_time   BIGINT,
> >     event_val   integer
> > );
> >
> > SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as val_sum,
> > count(event_val) as ecnt from events WHERE date_time < lhs.date_time and
> > subject_id = lhs.subject_id ) rhs1 ON true;
> >
> >
> >        insert into events (subject_id, date_time, event_val) values
> (1,999,
> > 1);
> >        insert into events (subject_id, date_time, event_val) values
> (1,1000,
> > 2);
> >        insert into events (subject_id, date_time, event_val) values
> (1,1001,
> > 3);
> >        insert into events (subject_id, date_time, event_val) values
> (1,1999,
> > 4);
> >        insert into events (subject_id, date_time, event_val) values
> (1,2000,
> > 5);
> >        insert into events (subject_id, date_time, event_val) values
> (1,2001,
> > 6);
> >
> >        insert into events (subject_id, date_time, event_val) values
> (2,999,
> > 10);
> >        insert into events (subject_id, date_time, event_val) values
> (2,1000,
> > 20);
> >        insert into events (subject_id, date_time, event_val) values
> (2,1001,
> > 30);
> >        insert into events (subject_id, date_time, event_val) values
> (2,1999,
> > 40);
> >        insert into events (subject_id, date_time, event_val) values
> (2,2000,
> > 50);
> >        insert into events (subject_id, date_time, event_val) values
> (2,2001,
> > 60);
> >
> >        SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as
> > val_sum, count(event_val) as ecnt from events WHERE date_time <
> > lhs.date_time and subject_id = lhs.subject_id ) rhs1 ON true;
> >
> >        —results:
> >        subject_id;date_time;val_sum;ecnt
> >        1;1000;1;1
> >        1;1100;6;3
> >        1;2000;10;4
> >        2;1002;60;3
> >        2;1998;60;3
> >
> >
> >
>
>
> --
> Regards
> Sanjiv Singh
> Mob :  +091 9990-447-339
>

Re: Is it possible to do a LEFT JOIN LATERAL in Hive?

Posted by "@Sanjiv Singh" <sa...@gmail.com>.
--  create table lhs

create table lhs (
    subject_id int,
    date_time  BIGINT
);

--  insert some records in table lhs , named insertion will not work
in case of  hive

insert into table lhs select 1,1000 from tmpTableWithOneRecords limit 1;
insert into table  lhs select 1,1100 from tmpTableWithOneRecords limit 1;
insert into table  lhs select 1,2000 from tmpTableWithOneRecords limit 1;
insert into table  lhs select 2,1002 from tmpTableWithOneRecords limit 1;
insert into table  lhs select 2,1998 from tmpTableWithOneRecords limit 1;

create table events (
    subject_id  int,
    date_time   BIGINT,
    event_val   int
);

       insert into table events select 1,999, 1 from
tmpTableWithOneRecords limit 1;
       insert into table events select 1,1000, 2 from
tmpTableWithOneRecords limit 1;
       insert into table events select 1,1001, 3 from
tmpTableWithOneRecords limit 1;
       insert into table events select 1,1999, 4 from
tmpTableWithOneRecords limit 1;
       insert into table events select 1,2000, 5 from
tmpTableWithOneRecords limit 1;
       insert into table events select 1,2001, 6 from
tmpTableWithOneRecords limit 1;

       insert into table events select 2,999, 10 from
tmpTableWithOneRecords limit 1;
       insert into table events select 2,1000, 20 from
tmpTableWithOneRecords limit 1;
       insert into table events select 2,1001, 30 from
tmpTableWithOneRecords limit 1;
       insert into table events select 2,1999, 40 from
tmpTableWithOneRecords limit 1;
       insert into table events select 2,2000, 50 from
tmpTableWithOneRecords limit 1;
       insert into table events select 2,2001, 60 from
tmpTableWithOneRecords limit 1;


select subject_id,adate,SUM(event_val),COUNT(event_val) from (SELECT
a.subject_id as subject_id ,a.date_time as adate , b.date_time as
bdate , b.event_val as event_val  FROM events b LEFT OUTER JOIN lhs a
ON b.subject_id = a.subject_id) abc where bdate < adate group by
subject_id,adate;


	
1       1000    1       1
1       1100    6       3
1       2000    10      4
2       1002    60      3
2       1998    60      3


On 4/5/15, Jeremy Davis <jd...@datasong.com> wrote:
> Hello!
> I would like to do a LEFT JOIN LATERAL .. Which is using values on the LHS
> as parameters on the RHS. Is this sort of thing possible in Hive?
>
>
> -JD
>
>
> ---- Some example SQL:
>
>
> create table lhs (
>     subject_id integer,
>     date_time  BIGINT
> );
>
>        —Subjects and responses at Arbitrary response times:
>        insert into lhs (subject_id, date_time) values (1,1000);
>        insert into lhs (subject_id, date_time) values (1,1100);
>        insert into lhs (subject_id, date_time) values (1,2000);
>        insert into lhs (subject_id, date_time) values (2,1002);
>        insert into lhs (subject_id, date_time) values (2,1998);
>
> create table events (
>     subject_id  integer,
>     date_time   BIGINT,
>     event_val   integer
> );
>
> SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as val_sum,
> count(event_val) as ecnt from events WHERE date_time < lhs.date_time and
> subject_id = lhs.subject_id ) rhs1 ON true;
>
>
>        insert into events (subject_id, date_time, event_val) values (1,999,
> 1);
>        insert into events (subject_id, date_time, event_val) values (1,1000,
> 2);
>        insert into events (subject_id, date_time, event_val) values (1,1001,
> 3);
>        insert into events (subject_id, date_time, event_val) values (1,1999,
> 4);
>        insert into events (subject_id, date_time, event_val) values (1,2000,
> 5);
>        insert into events (subject_id, date_time, event_val) values (1,2001,
> 6);
>
>        insert into events (subject_id, date_time, event_val) values (2,999,
> 10);
>        insert into events (subject_id, date_time, event_val) values (2,1000,
> 20);
>        insert into events (subject_id, date_time, event_val) values (2,1001,
> 30);
>        insert into events (subject_id, date_time, event_val) values (2,1999,
> 40);
>        insert into events (subject_id, date_time, event_val) values (2,2000,
> 50);
>        insert into events (subject_id, date_time, event_val) values (2,2001,
> 60);
>
>        SELECT * from lhs LEFT JOIN LATERAL ( select SUM(event_val) as
> val_sum, count(event_val) as ecnt from events WHERE date_time <
> lhs.date_time and subject_id = lhs.subject_id ) rhs1 ON true;
>
>        —results:
>        subject_id;date_time;val_sum;ecnt
>        1;1000;1;1
>        1;1100;6;3
>        1;2000;10;4
>        2;1002;60;3
>        2;1998;60;3
>
>
>


-- 
Regards
Sanjiv Singh
Mob :  +091 9990-447-339