You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hive.apache.org by prakash sejwani <pr...@gmail.com> on 2010/03/02 16:37:14 UTC
complex query using FROM and INSERT in hive
when i run this query from hive console
FROM (
SELECT h.*,
p.title AS product_sku, p.description AS product_name,
c.name AS company_name,
c2.id AS product_company_id,
c2.name AS product_company_name
FROM (
-- Pull from the access_log
SELECT ip, ident, user,
-- Reformat the time from the access log
from_unixtime(cast(unix_
timestamp(time, "dd/MMM/yyyy:hh:mm:ss Z") AS INT)) AS time,
method, resource, protocol, status, length, referer, agent,
-- Extract the product_id for the hit from the URL
cast(regexp_extract(resource, '/products/(\\d+)', 1) AS INT) AS
product_id,
-- Extract the company_id for the hit from the URL
cast(regexp_extract(resource, '/companies/(\\d+)', 1) AS INT) AS
company_id,
-- Run our User Defined Function (see
src/com/econify/geoip/IpToCountry.java). Takes the IP of the hit and looks
up its country
-- ip_to_country(ip) AS ip_country
FROM access_log
) h
-- Join each hit with its product or company (if it has one)
LEFT OUTER JOIN products p ON (h.product_id = p.id)
LEFT OUTER JOIN companies c ON (h.company_id = c.id)
-- If the hit was for a product, we probably didn't get the company_id
in the hit subquery,
-- so join products.company_id with another instance of the companies
table
LEFT OUTER JOIN companies c2 ON (p.company_id = c2.id)
-- Filter out all hits that weren't for a company or a product
WHERE h.product_id IS NOT NULL OR h.company_id IS NOT NULL
) hit
-- Insert the hit data into a seperate product_hits table
INSERT OVERWRITE TABLE product_hits
SELECT ip, ident, user, time,
method, resource, protocol, status,
length, referer, agent,
product_id,
product_company_id AS company_id,
ip_country,
product_name,
product_company_name AS company_name
WHERE product_name IS NOT NULL
-- Insert the hit data insto a seperate company_hits table
INSERT OVERWRITE TABLE company_hits
SELECT ip, ident, user, time,
method, resource, protocol, status,
length, referer, agent,
company_id,
ip_country,
company_name
WHERE company_name IS NOT NULL;
I get the following error
FAILED: Parse Error: line 19:6 cannot recognize input 'FROM' in select
expression
thanks,
prakash
Re: complex query using FROM and INSERT in hive
Posted by prakash sejwani <pr...@gmail.com>.
thanks Zheng
On Tue, Mar 2, 2010 at 11:30 PM, Zheng Shao <zs...@gmail.com> wrote:
> there is an extra "," before "FROM"
>
> cast(regexp_extract(resource, '/companies/(\\d+)', 1) AS INT)
> AS company_id,
> -- Run our User Defined Function (see
> src/com/econify/geoip/IpToCountry.java). Takes the IP of the hit and
> looks up its country
> -- ip_to_country(ip) AS ip_country
> FROM access_log
>
>
> On Tue, Mar 2, 2010 at 7:37 AM, prakash sejwani
> <pr...@gmail.com> wrote:
> > when i run this query from hive console
> >
> > FROM (
> > SELECT h.*,
> > p.title AS product_sku, p.description AS product_name,
> > c.name AS company_name,
> > c2.id AS product_company_id,
> > c2.name AS product_company_name
> > FROM (
> > -- Pull from the access_log
> > SELECT ip, ident, user,
> > -- Reformat the time from the access log
> > from_unixtime(cast(unix_
> > timestamp(time, "dd/MMM/yyyy:hh:mm:ss Z") AS INT)) AS time,
> > method, resource, protocol, status, length, referer, agent,
> > -- Extract the product_id for the hit from the URL
> > cast(regexp_extract(resource, '/products/(\\d+)', 1) AS INT) AS
> > product_id,
> > -- Extract the company_id for the hit from the URL
> > cast(regexp_extract(resource, '/companies/(\\d+)', 1) AS INT) AS
> > company_id,
> > -- Run our User Defined Function (see
> > src/com/econify/geoip/IpToCountry.java). Takes the IP of the hit and
> looks
> > up its country
> > -- ip_to_country(ip) AS ip_country
> > FROM access_log
> > ) h
> > -- Join each hit with its product or company (if it has one)
> > LEFT OUTER JOIN products p ON (h.product_id = p.id)
> > LEFT OUTER JOIN companies c ON (h.company_id = c.id)
> > -- If the hit was for a product, we probably didn't get the
> company_id
> > in the hit subquery,
> > -- so join products.company_id with another instance of the companies
> > table
> > LEFT OUTER JOIN companies c2 ON (p.company_id = c2.id)
> > -- Filter out all hits that weren't for a company or a product
> > WHERE h.product_id IS NOT NULL OR h.company_id IS NOT NULL
> > ) hit
> > -- Insert the hit data into a seperate product_hits table
> > INSERT OVERWRITE TABLE product_hits
> > SELECT ip, ident, user, time,
> > method, resource, protocol, status,
> > length, referer, agent,
> > product_id,
> > product_company_id AS company_id,
> > ip_country,
> > product_name,
> > product_company_name AS company_name
> > WHERE product_name IS NOT NULL
> > -- Insert the hit data insto a seperate company_hits table
> > INSERT OVERWRITE TABLE company_hits
> > SELECT ip, ident, user, time,
> > method, resource, protocol, status,
> > length, referer, agent,
> > company_id,
> > ip_country,
> > company_name
> > WHERE company_name IS NOT NULL;
> >
> > I get the following error
> >
> > FAILED: Parse Error: line 19:6 cannot recognize input 'FROM' in select
> > expression
> >
> > thanks,
> > prakash
>
>
>
> --
> Yours,
> Zheng
>
Re: complex query using FROM and INSERT in hive
Posted by Zheng Shao <zs...@gmail.com>.
there is an extra "," before "FROM"
cast(regexp_extract(resource, '/companies/(\\d+)', 1) AS INT)
AS company_id,
-- Run our User Defined Function (see
src/com/econify/geoip/IpToCountry.java). Takes the IP of the hit and
looks up its country
-- ip_to_country(ip) AS ip_country
FROM access_log
On Tue, Mar 2, 2010 at 7:37 AM, prakash sejwani
<pr...@gmail.com> wrote:
> when i run this query from hive console
>
> FROM (
> SELECT h.*,
> p.title AS product_sku, p.description AS product_name,
> c.name AS company_name,
> c2.id AS product_company_id,
> c2.name AS product_company_name
> FROM (
> -- Pull from the access_log
> SELECT ip, ident, user,
> -- Reformat the time from the access log
> from_unixtime(cast(unix_
> timestamp(time, "dd/MMM/yyyy:hh:mm:ss Z") AS INT)) AS time,
> method, resource, protocol, status, length, referer, agent,
> -- Extract the product_id for the hit from the URL
> cast(regexp_extract(resource, '/products/(\\d+)', 1) AS INT) AS
> product_id,
> -- Extract the company_id for the hit from the URL
> cast(regexp_extract(resource, '/companies/(\\d+)', 1) AS INT) AS
> company_id,
> -- Run our User Defined Function (see
> src/com/econify/geoip/IpToCountry.java). Takes the IP of the hit and looks
> up its country
> -- ip_to_country(ip) AS ip_country
> FROM access_log
> ) h
> -- Join each hit with its product or company (if it has one)
> LEFT OUTER JOIN products p ON (h.product_id = p.id)
> LEFT OUTER JOIN companies c ON (h.company_id = c.id)
> -- If the hit was for a product, we probably didn't get the company_id
> in the hit subquery,
> -- so join products.company_id with another instance of the companies
> table
> LEFT OUTER JOIN companies c2 ON (p.company_id = c2.id)
> -- Filter out all hits that weren't for a company or a product
> WHERE h.product_id IS NOT NULL OR h.company_id IS NOT NULL
> ) hit
> -- Insert the hit data into a seperate product_hits table
> INSERT OVERWRITE TABLE product_hits
> SELECT ip, ident, user, time,
> method, resource, protocol, status,
> length, referer, agent,
> product_id,
> product_company_id AS company_id,
> ip_country,
> product_name,
> product_company_name AS company_name
> WHERE product_name IS NOT NULL
> -- Insert the hit data insto a seperate company_hits table
> INSERT OVERWRITE TABLE company_hits
> SELECT ip, ident, user, time,
> method, resource, protocol, status,
> length, referer, agent,
> company_id,
> ip_country,
> company_name
> WHERE company_name IS NOT NULL;
>
> I get the following error
>
> FAILED: Parse Error: line 19:6 cannot recognize input 'FROM' in select
> expression
>
> thanks,
> prakash
--
Yours,
Zheng