You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@pig.apache.org by Raj hadoop <ra...@gmail.com> on 2013/04/22 22:59:02 UTC
Pig script from sql query
Hi friends,
I am new to PIG script. I need to convert below sql query to PIG script.
SELECT ('CSS'||DB.DISTRICT_CODE||DB.BILLING_ACCOUNT_NO) BAC_KEY,
CASE WHEN T1.TAC_142 IS NULL THEN 'N' ELSE T1.TAC_142 END TAC_142 FROM
(
SELECT DISTRICT_CODE,BILLING_ACCOUNT_NO,
MAX(CASE WHEN TAC_1 = 'Y' AND (TAC_2 = 'Y' OR TAC_3 = 'Y') THEN 'Y' ELSE
'N' END) TAC_142 FROM
(
SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,
MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD) LIKE
'%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE '%USE%999%ALERT%METHOD%'
THEN 'Y' ELSE 'N' END) TAC_1,
MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END) TAC_2,
MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640') THEN
'Y' ELSE 'N' END) TAC_3
FROM
D_INSTALLATION DI,
D_INSTALLATION_PRODUCT DIP
WHERE
DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND
DIP.BAC_WID = DI.BAC_WID
GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO
)
GROUP BY DISTRICT_CODE,BILLING_ACCOUNT_NO)
T1,
D_BILLING_ACCOUNT DB
WHERE
DB.DISTRICT_CODE = T1.DISTRICT_CODE(+) AND
DB.BILLING_ACCOUNT_NO = T1.BILLING_ACCOUNT_NO(+)
--------------------------------------------------------------------------
I have tried to write a below inner query into PIG.
SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,
MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD) LIKE
'%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE '%USE%999%ALERT%METHOD%'
THEN 'Y' ELSE 'N' END) TAC_1,
MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END) TAC_2,
MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640') THEN
'Y' ELSE 'N' END) TAC_3
FROM
D_INSTALLATION DI,
D_INSTALLATION_PRODUCT DIP
WHERE
DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND
DIP.BAC_WID = DI.BAC_WID
GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO
PIG Script
A = load 'D_INSTALLATION.txt';
B= load 'D_INSTALLATION_PRODUCT.txt';
C= join A by DI.INST_SEQUENCE_NO, B by INST_SEQUENCE_NO;
D= join C by BAC_WID,B by BAC_WID;
caseData = foreach D generate ((PRODUCT_CODE=='A14493' and
UPPER(HAZARD)=='999%EMERGENCY%LINE' and
UPPER(WARNING)=='USE%999%ALERT%METHOD') ? 'Y':'N') AS TAC_1,
((PRODUCT_TYPE IN ('20','21') and MAINTENANCE_CONTRACT IN
('E','T')) ? 'Y':'N') As TAC_2,
(PRODUCT_CODE IN('A14498','A22640','A22640') ? 'Y':'N') As TAC_3;
grouped = group caseData by
DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO;
Data = foreach grouped generate group as
DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO,MAX(TAC_1) as
TAC1,MAX(TAC_2) as TAC2,MAX(TAC_3) as TAC3
It is giving lot of errors. Can you please help me.. attached are the
tables
Re: Pig script from sql query
Posted by "j.barrett Strausser" <j....@gmail.com>.
Can you post the errors?
On Mon, Apr 22, 2013 at 5:22 PM, Raj hadoop <ra...@gmail.com> wrote:
> Hi,
>
> Thanks for giving reply. I didn't have any issues with load statement as i
> have mentioned the correct file path while loading. Could you please check
> the rest of the logic..
>
>
>
> On Tue, Apr 23, 2013 at 2:33 AM, j.barrett Strausser <
> j.barrett.strausser@gmail.com> wrote:
>
> > You'll have more luck if you post the errors.
> >
> > Off the bat, I assume you are going to have problems given your load
> > statement.
> >
> > -b
> >
> >
> > On Mon, Apr 22, 2013 at 4:59 PM, Raj hadoop <ra...@gmail.com>
> wrote:
> >
> > > Hi friends,
> > >
> > > I am new to PIG script. I need to convert below sql query to PIG
> script.
> > >
> > >
> > > SELECT ('CSS'||DB.DISTRICT_CODE||DB.BILLING_ACCOUNT_NO) BAC_KEY,
> > >
> > > CASE WHEN T1.TAC_142 IS NULL THEN 'N' ELSE T1.TAC_142 END TAC_142 FROM
> > >
> > > (
> > >
> > >
> > >
> > > SELECT DISTRICT_CODE,BILLING_ACCOUNT_NO,
> > >
> > > MAX(CASE WHEN TAC_1 = 'Y' AND (TAC_2 = 'Y' OR TAC_3 = 'Y') THEN 'Y'
> ELSE
> > > 'N' END) TAC_142 FROM
> > >
> > > (
> > >
> > > SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,
> > >
> > > MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD)
> LIKE
> > > '%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE
> > '%USE%999%ALERT%METHOD%'
> > > THEN 'Y' ELSE 'N' END) TAC_1,
> > >
> > > MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
> > > TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END)
> TAC_2,
> > >
> > > MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640')
> THEN
> > > 'Y' ELSE 'N' END) TAC_3
> > >
> > > FROM
> > >
> > > D_INSTALLATION DI,
> > >
> > > D_INSTALLATION_PRODUCT DIP
> > >
> > > WHERE
> > >
> > > DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND
> > >
> > > DIP.BAC_WID = DI.BAC_WID
> > >
> > > GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO
> > >
> > > )
> > >
> > > GROUP BY DISTRICT_CODE,BILLING_ACCOUNT_NO)
> > >
> > > T1,
> > >
> > > D_BILLING_ACCOUNT DB
> > >
> > > WHERE
> > >
> > > DB.DISTRICT_CODE = T1.DISTRICT_CODE(+) AND
> > >
> > > DB.BILLING_ACCOUNT_NO = T1.BILLING_ACCOUNT_NO(+)
> > >
> > >
> > >
> >
> --------------------------------------------------------------------------
> > >
> > >
> > > I have tried to write a below inner query into PIG.
> > >
> > >
> > > SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,
> > >
> > > MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD)
> LIKE
> > > '%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE
> > '%USE%999%ALERT%METHOD%'
> > > THEN 'Y' ELSE 'N' END) TAC_1,
> > >
> > > MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
> > > TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END)
> TAC_2,
> > >
> > > MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640')
> THEN
> > > 'Y' ELSE 'N' END) TAC_3
> > >
> > > FROM
> > >
> > > D_INSTALLATION DI,
> > >
> > > D_INSTALLATION_PRODUCT DIP
> > >
> > > WHERE
> > >
> > > DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND
> > >
> > > DIP.BAC_WID = DI.BAC_WID
> > >
> > > GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO
> > >
> > >
> > > PIG Script
> > >
> > >
> > > A = load 'D_INSTALLATION.txt';
> > > B= load 'D_INSTALLATION_PRODUCT.txt';
> > > C= join A by DI.INST_SEQUENCE_NO, B by INST_SEQUENCE_NO;
> > > D= join C by BAC_WID,B by BAC_WID;
> > >
> > > caseData = foreach D generate ((PRODUCT_CODE=='A14493' and
> > > UPPER(HAZARD)=='999%EMERGENCY%LINE' and
> > > UPPER(WARNING)=='USE%999%ALERT%METHOD') ? 'Y':'N') AS TAC_1,
> > >
> > > ((PRODUCT_TYPE IN ('20','21') and MAINTENANCE_CONTRACT IN
> > > ('E','T')) ? 'Y':'N') As TAC_2,
> > >
> > > (PRODUCT_CODE IN('A14498','A22640','A22640') ? 'Y':'N') As
> > TAC_3;
> > > grouped = group caseData by
> > > DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO;
> > > Data = foreach grouped generate group as
> > > DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO,MAX(TAC_1) as
> > > TAC1,MAX(TAC_2) as TAC2,MAX(TAC_3) as TAC3
> > >
> > >
> > > It is giving lot of errors. Can you please help me.. attached are the
> > > tables
> > >
> > >
> > >
> > >
> > >
> >
> >
> > --
> >
> >
> > https://github.com/bearrito
> > @barrettsmash
> >
>
--
https://github.com/bearrito
@barrettsmash
Re: Pig script from sql query
Posted by Raj hadoop <ra...@gmail.com>.
Hi,
Thanks for giving reply. I didn't have any issues with load statement as i
have mentioned the correct file path while loading. Could you please check
the rest of the logic..
On Tue, Apr 23, 2013 at 2:33 AM, j.barrett Strausser <
j.barrett.strausser@gmail.com> wrote:
> You'll have more luck if you post the errors.
>
> Off the bat, I assume you are going to have problems given your load
> statement.
>
> -b
>
>
> On Mon, Apr 22, 2013 at 4:59 PM, Raj hadoop <ra...@gmail.com> wrote:
>
> > Hi friends,
> >
> > I am new to PIG script. I need to convert below sql query to PIG script.
> >
> >
> > SELECT ('CSS'||DB.DISTRICT_CODE||DB.BILLING_ACCOUNT_NO) BAC_KEY,
> >
> > CASE WHEN T1.TAC_142 IS NULL THEN 'N' ELSE T1.TAC_142 END TAC_142 FROM
> >
> > (
> >
> >
> >
> > SELECT DISTRICT_CODE,BILLING_ACCOUNT_NO,
> >
> > MAX(CASE WHEN TAC_1 = 'Y' AND (TAC_2 = 'Y' OR TAC_3 = 'Y') THEN 'Y' ELSE
> > 'N' END) TAC_142 FROM
> >
> > (
> >
> > SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,
> >
> > MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD) LIKE
> > '%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE
> '%USE%999%ALERT%METHOD%'
> > THEN 'Y' ELSE 'N' END) TAC_1,
> >
> > MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
> > TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END) TAC_2,
> >
> > MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640') THEN
> > 'Y' ELSE 'N' END) TAC_3
> >
> > FROM
> >
> > D_INSTALLATION DI,
> >
> > D_INSTALLATION_PRODUCT DIP
> >
> > WHERE
> >
> > DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND
> >
> > DIP.BAC_WID = DI.BAC_WID
> >
> > GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO
> >
> > )
> >
> > GROUP BY DISTRICT_CODE,BILLING_ACCOUNT_NO)
> >
> > T1,
> >
> > D_BILLING_ACCOUNT DB
> >
> > WHERE
> >
> > DB.DISTRICT_CODE = T1.DISTRICT_CODE(+) AND
> >
> > DB.BILLING_ACCOUNT_NO = T1.BILLING_ACCOUNT_NO(+)
> >
> >
> >
> --------------------------------------------------------------------------
> >
> >
> > I have tried to write a below inner query into PIG.
> >
> >
> > SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,
> >
> > MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD) LIKE
> > '%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE
> '%USE%999%ALERT%METHOD%'
> > THEN 'Y' ELSE 'N' END) TAC_1,
> >
> > MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
> > TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END) TAC_2,
> >
> > MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640') THEN
> > 'Y' ELSE 'N' END) TAC_3
> >
> > FROM
> >
> > D_INSTALLATION DI,
> >
> > D_INSTALLATION_PRODUCT DIP
> >
> > WHERE
> >
> > DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND
> >
> > DIP.BAC_WID = DI.BAC_WID
> >
> > GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO
> >
> >
> > PIG Script
> >
> >
> > A = load 'D_INSTALLATION.txt';
> > B= load 'D_INSTALLATION_PRODUCT.txt';
> > C= join A by DI.INST_SEQUENCE_NO, B by INST_SEQUENCE_NO;
> > D= join C by BAC_WID,B by BAC_WID;
> >
> > caseData = foreach D generate ((PRODUCT_CODE=='A14493' and
> > UPPER(HAZARD)=='999%EMERGENCY%LINE' and
> > UPPER(WARNING)=='USE%999%ALERT%METHOD') ? 'Y':'N') AS TAC_1,
> >
> > ((PRODUCT_TYPE IN ('20','21') and MAINTENANCE_CONTRACT IN
> > ('E','T')) ? 'Y':'N') As TAC_2,
> >
> > (PRODUCT_CODE IN('A14498','A22640','A22640') ? 'Y':'N') As
> TAC_3;
> > grouped = group caseData by
> > DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO;
> > Data = foreach grouped generate group as
> > DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO,MAX(TAC_1) as
> > TAC1,MAX(TAC_2) as TAC2,MAX(TAC_3) as TAC3
> >
> >
> > It is giving lot of errors. Can you please help me.. attached are the
> > tables
> >
> >
> >
> >
> >
>
>
> --
>
>
> https://github.com/bearrito
> @barrettsmash
>
Re: Pig script from sql query
Posted by "j.barrett Strausser" <j....@gmail.com>.
You'll have more luck if you post the errors.
Off the bat, I assume you are going to have problems given your load
statement.
-b
On Mon, Apr 22, 2013 at 4:59 PM, Raj hadoop <ra...@gmail.com> wrote:
> Hi friends,
>
> I am new to PIG script. I need to convert below sql query to PIG script.
>
>
> SELECT ('CSS'||DB.DISTRICT_CODE||DB.BILLING_ACCOUNT_NO) BAC_KEY,
>
> CASE WHEN T1.TAC_142 IS NULL THEN 'N' ELSE T1.TAC_142 END TAC_142 FROM
>
> (
>
>
>
> SELECT DISTRICT_CODE,BILLING_ACCOUNT_NO,
>
> MAX(CASE WHEN TAC_1 = 'Y' AND (TAC_2 = 'Y' OR TAC_3 = 'Y') THEN 'Y' ELSE
> 'N' END) TAC_142 FROM
>
> (
>
> SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,
>
> MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD) LIKE
> '%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE '%USE%999%ALERT%METHOD%'
> THEN 'Y' ELSE 'N' END) TAC_1,
>
> MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
> TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END) TAC_2,
>
> MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640') THEN
> 'Y' ELSE 'N' END) TAC_3
>
> FROM
>
> D_INSTALLATION DI,
>
> D_INSTALLATION_PRODUCT DIP
>
> WHERE
>
> DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND
>
> DIP.BAC_WID = DI.BAC_WID
>
> GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO
>
> )
>
> GROUP BY DISTRICT_CODE,BILLING_ACCOUNT_NO)
>
> T1,
>
> D_BILLING_ACCOUNT DB
>
> WHERE
>
> DB.DISTRICT_CODE = T1.DISTRICT_CODE(+) AND
>
> DB.BILLING_ACCOUNT_NO = T1.BILLING_ACCOUNT_NO(+)
>
>
> --------------------------------------------------------------------------
>
>
> I have tried to write a below inner query into PIG.
>
>
> SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,
>
> MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD) LIKE
> '%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE '%USE%999%ALERT%METHOD%'
> THEN 'Y' ELSE 'N' END) TAC_1,
>
> MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
> TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END) TAC_2,
>
> MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640') THEN
> 'Y' ELSE 'N' END) TAC_3
>
> FROM
>
> D_INSTALLATION DI,
>
> D_INSTALLATION_PRODUCT DIP
>
> WHERE
>
> DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND
>
> DIP.BAC_WID = DI.BAC_WID
>
> GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO
>
>
> PIG Script
>
>
> A = load 'D_INSTALLATION.txt';
> B= load 'D_INSTALLATION_PRODUCT.txt';
> C= join A by DI.INST_SEQUENCE_NO, B by INST_SEQUENCE_NO;
> D= join C by BAC_WID,B by BAC_WID;
>
> caseData = foreach D generate ((PRODUCT_CODE=='A14493' and
> UPPER(HAZARD)=='999%EMERGENCY%LINE' and
> UPPER(WARNING)=='USE%999%ALERT%METHOD') ? 'Y':'N') AS TAC_1,
>
> ((PRODUCT_TYPE IN ('20','21') and MAINTENANCE_CONTRACT IN
> ('E','T')) ? 'Y':'N') As TAC_2,
>
> (PRODUCT_CODE IN('A14498','A22640','A22640') ? 'Y':'N') As TAC_3;
> grouped = group caseData by
> DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO;
> Data = foreach grouped generate group as
> DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO,MAX(TAC_1) as
> TAC1,MAX(TAC_2) as TAC2,MAX(TAC_3) as TAC3
>
>
> It is giving lot of errors. Can you please help me.. attached are the
> tables
>
>
>
>
>
--
https://github.com/bearrito
@barrettsmash