You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hive.apache.org by prakash sejwani <pr...@gmail.com> on 2010/03/02 16:37:14 UTC

complex query using FROM and INSERT in hive

when i run this query from hive console

FROM (
  SELECT h.*,
    p.title AS product_sku, p.description AS product_name,
    c.name AS company_name,
    c2.id AS product_company_id,
    c2.name AS product_company_name
  FROM (
      -- Pull from the access_log
      SELECT ip, ident, user,
        -- Reformat the time from the access log
        from_unixtime(cast(unix_
timestamp(time, "dd/MMM/yyyy:hh:mm:ss Z") AS INT)) AS time,
        method, resource, protocol, status, length, referer, agent,
        -- Extract the product_id for the hit from the URL
        cast(regexp_extract(resource, '/products/(\\d+)', 1) AS INT) AS
product_id,
        -- Extract the company_id for the hit from the URL
        cast(regexp_extract(resource, '/companies/(\\d+)', 1) AS INT) AS
company_id,
        -- Run our User Defined Function (see
src/com/econify/geoip/IpToCountry.java).  Takes the IP of the hit and looks
up its country
        -- ip_to_country(ip) AS ip_country
      FROM access_log
    ) h
    -- Join each hit with its product or company (if it has one)
    LEFT OUTER JOIN products p ON (h.product_id = p.id)
    LEFT OUTER JOIN companies c ON (h.company_id = c.id)
    -- If the hit was for a product, we probably didn't get the company_id
in the hit subquery,
    -- so join products.company_id with another instance of the companies
table
    LEFT OUTER JOIN companies c2 ON (p.company_id = c2.id)
    -- Filter out all hits that weren't for a company or a product
    WHERE h.product_id IS NOT NULL OR h.company_id IS NOT NULL
) hit
-- Insert the hit data into a seperate product_hits table
INSERT OVERWRITE TABLE product_hits
  SELECT ip, ident, user, time,
    method, resource, protocol, status,
    length, referer, agent,
    product_id,
    product_company_id AS company_id,
    ip_country,
    product_name,
    product_company_name AS company_name
  WHERE product_name IS NOT NULL
-- Insert the hit data insto a seperate company_hits table
INSERT OVERWRITE TABLE company_hits
  SELECT ip, ident, user, time,
    method, resource, protocol, status,
    length, referer, agent,
    company_id,
    ip_country,
    company_name
  WHERE company_name IS NOT NULL;

I get the following error

FAILED: Parse Error: line 19:6 cannot recognize input 'FROM' in select
expression

thanks,
prakash

Re: complex query using FROM and INSERT in hive

Posted by prakash sejwani <pr...@gmail.com>.
thanks Zheng

On Tue, Mar 2, 2010 at 11:30 PM, Zheng Shao <zs...@gmail.com> wrote:

> there is an extra "," before "FROM"
>
>        cast(regexp_extract(resource, '/companies/(\\d+)', 1) AS INT)
> AS company_id,
>        -- Run our User Defined Function (see
> src/com/econify/geoip/IpToCountry.java).  Takes the IP of the hit and
> looks up its country
>        -- ip_to_country(ip) AS ip_country
>      FROM access_log
>
>
> On Tue, Mar 2, 2010 at 7:37 AM, prakash sejwani
> <pr...@gmail.com> wrote:
> > when i run this query from hive console
> >
> > FROM (
> >   SELECT h.*,
> >     p.title AS product_sku, p.description AS product_name,
> >     c.name AS company_name,
> >     c2.id AS product_company_id,
> >     c2.name AS product_company_name
> >   FROM (
> >       -- Pull from the access_log
> >       SELECT ip, ident, user,
> >         -- Reformat the time from the access log
> >         from_unixtime(cast(unix_
> > timestamp(time, "dd/MMM/yyyy:hh:mm:ss Z") AS INT)) AS time,
> >         method, resource, protocol, status, length, referer, agent,
> >         -- Extract the product_id for the hit from the URL
> >         cast(regexp_extract(resource, '/products/(\\d+)', 1) AS INT) AS
> > product_id,
> >         -- Extract the company_id for the hit from the URL
> >         cast(regexp_extract(resource, '/companies/(\\d+)', 1) AS INT) AS
> > company_id,
> >         -- Run our User Defined Function (see
> > src/com/econify/geoip/IpToCountry.java).  Takes the IP of the hit and
> looks
> > up its country
> >         -- ip_to_country(ip) AS ip_country
> >       FROM access_log
> >     ) h
> >     -- Join each hit with its product or company (if it has one)
> >     LEFT OUTER JOIN products p ON (h.product_id = p.id)
> >     LEFT OUTER JOIN companies c ON (h.company_id = c.id)
> >     -- If the hit was for a product, we probably didn't get the
> company_id
> > in the hit subquery,
> >     -- so join products.company_id with another instance of the companies
> > table
> >     LEFT OUTER JOIN companies c2 ON (p.company_id = c2.id)
> >     -- Filter out all hits that weren't for a company or a product
> >     WHERE h.product_id IS NOT NULL OR h.company_id IS NOT NULL
> > ) hit
> > -- Insert the hit data into a seperate product_hits table
> > INSERT OVERWRITE TABLE product_hits
> >   SELECT ip, ident, user, time,
> >     method, resource, protocol, status,
> >     length, referer, agent,
> >     product_id,
> >     product_company_id AS company_id,
> >     ip_country,
> >     product_name,
> >     product_company_name AS company_name
> >   WHERE product_name IS NOT NULL
> > -- Insert the hit data insto a seperate company_hits table
> > INSERT OVERWRITE TABLE company_hits
> >   SELECT ip, ident, user, time,
> >     method, resource, protocol, status,
> >     length, referer, agent,
> >     company_id,
> >     ip_country,
> >     company_name
> >   WHERE company_name IS NOT NULL;
> >
> > I get the following error
> >
> > FAILED: Parse Error: line 19:6 cannot recognize input 'FROM' in select
> > expression
> >
> > thanks,
> > prakash
>
>
>
> --
> Yours,
> Zheng
>

Re: complex query using FROM and INSERT in hive

Posted by Zheng Shao <zs...@gmail.com>.
there is an extra "," before "FROM"

        cast(regexp_extract(resource, '/companies/(\\d+)', 1) AS INT)
AS company_id,
        -- Run our User Defined Function (see
src/com/econify/geoip/IpToCountry.java).  Takes the IP of the hit and
looks up its country
        -- ip_to_country(ip) AS ip_country
      FROM access_log


On Tue, Mar 2, 2010 at 7:37 AM, prakash sejwani
<pr...@gmail.com> wrote:
> when i run this query from hive console
>
> FROM (
>   SELECT h.*,
>     p.title AS product_sku, p.description AS product_name,
>     c.name AS company_name,
>     c2.id AS product_company_id,
>     c2.name AS product_company_name
>   FROM (
>       -- Pull from the access_log
>       SELECT ip, ident, user,
>         -- Reformat the time from the access log
>         from_unixtime(cast(unix_
> timestamp(time, "dd/MMM/yyyy:hh:mm:ss Z") AS INT)) AS time,
>         method, resource, protocol, status, length, referer, agent,
>         -- Extract the product_id for the hit from the URL
>         cast(regexp_extract(resource, '/products/(\\d+)', 1) AS INT) AS
> product_id,
>         -- Extract the company_id for the hit from the URL
>         cast(regexp_extract(resource, '/companies/(\\d+)', 1) AS INT) AS
> company_id,
>         -- Run our User Defined Function (see
> src/com/econify/geoip/IpToCountry.java).  Takes the IP of the hit and looks
> up its country
>         -- ip_to_country(ip) AS ip_country
>       FROM access_log
>     ) h
>     -- Join each hit with its product or company (if it has one)
>     LEFT OUTER JOIN products p ON (h.product_id = p.id)
>     LEFT OUTER JOIN companies c ON (h.company_id = c.id)
>     -- If the hit was for a product, we probably didn't get the company_id
> in the hit subquery,
>     -- so join products.company_id with another instance of the companies
> table
>     LEFT OUTER JOIN companies c2 ON (p.company_id = c2.id)
>     -- Filter out all hits that weren't for a company or a product
>     WHERE h.product_id IS NOT NULL OR h.company_id IS NOT NULL
> ) hit
> -- Insert the hit data into a seperate product_hits table
> INSERT OVERWRITE TABLE product_hits
>   SELECT ip, ident, user, time,
>     method, resource, protocol, status,
>     length, referer, agent,
>     product_id,
>     product_company_id AS company_id,
>     ip_country,
>     product_name,
>     product_company_name AS company_name
>   WHERE product_name IS NOT NULL
> -- Insert the hit data insto a seperate company_hits table
> INSERT OVERWRITE TABLE company_hits
>   SELECT ip, ident, user, time,
>     method, resource, protocol, status,
>     length, referer, agent,
>     company_id,
>     ip_country,
>     company_name
>   WHERE company_name IS NOT NULL;
>
> I get the following error
>
> FAILED: Parse Error: line 19:6 cannot recognize input 'FROM' in select
> expression
>
> thanks,
> prakash



-- 
Yours,
Zheng