You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@sqoop.apache.org by Venkat Ranganathan <vr...@hortonworks.com> on 2014/03/14 20:17:30 UTC
Fwd: Sqoop import Issue
Moving to sqoop user mailing list.
This points to error in your query. $CONDIITONS evaluates to the per
mapper range. In your case, your split column is not present in
every place you have $CONDITIONS so you should remove them from places
where it does not make sense to have. For example,
==
SELECT T.*
FROM SI_F_L0_TRADE T
WHERE T.REVENUE_DATE >= '01-JAN-2014' and \$CONDITIONS
AND
===
The $CONDITIONS will be subsituted to refer to columns that does not
exist in the subquery context.
In general, youcan enable --verbose option, get the query being
executed, and try to run it with SQL*Plus or some other tool to see if
it is valid SQL
Venkat
---------- Forwarded message ----------
From: Sethuramaswamy, Suresh <su...@credit-suisse.com>
Date: Fri, Mar 14, 2014 at 9:40 AM
Subject: RE: Sqoop import Issue
To: "vranganathan@hortonworks.com" <vr...@hortonworks.com>
Greetings Venkat,
Am trying to run a free form SQL on sqoop to import data from Oracle
10g , everytime I keep getting the split-by column mentioned is
invalid . Can you please help.
Sqoop version : 1.4.3
Oracle 10g
Ojdbc6.jar
Error
14/03/14 08:55:16 INFO mapred.JobClient: Task Id :
attempt_201403100830_0118_m_000009_0, Status : FAILED
java.io.IOException: SQLException in nextKeyValue
at org.apache.sqoop.mapreduce.db.DBRecordReader.nextKeyValue(DBRecordReader.java:265)
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.nextKeyValue(MapTask.java:530)
at org.apache.hadoop.mapreduce.MapContext.nextKeyValue(MapContext.java:67)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
at org.apache.sqoop.mapreduce.AutoProgressMapper.run(AutoProgressMapper.java:64)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:363)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1232)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: java.sql.SQLSyntaxErrorException: ORA-00904:
"SI_CUSTOMER_XREF2"."CUSTOMER_ID": invalid identifier
QUERY
sqoop import --connect <<connection parameter>> -query
"
SELECT
SI_CUSTOMER_XREF2.GESC_CLIENT_NAME,
SI_CUSTOMER_XREF2.GESC_CLIENT_ID,
SI_F_L0_TRADE.DATA_SRC_SYS_CD,
SI_F_L0_TRADE.SI_TRADE_ID,
SI_CUSTOMER_XREF2.CUSTOMER_ID,
****
****
FROM
(
SELECT * FROM SI_CUSTOMER_XREF2 WHERE RUN_ID='209'
and \$CONDITIONS
) SI_CUSTOMER_XREF2 join
(
SELECT T.*
FROM SI_F_L0_TRADE T
WHERE T.REVENUE_DATE >= '01-JAN-2014' and \$CONDITIONS
AND
T.ORG_DC_SNAPSHOT_ID = (SELECT D.DC_SNAPSHOT_ID FROM DC_DATE_CARD
D WHERE D.RUN_ID = '209' and \$CONDITIONS
)
) SI_F_L0_TRADE ON (
SI_F_L0_TRADE.CUSTOMER_SGK=SI_CUSTOMER_XREF2.CUSTOMER_SGK )
join
(
SELECT * FROM SI_F_L0_TRADE_GROUP_ID G where RUN_ID='209' and \$CONDITIONS
AND REVENUE_DATE >= '01-JAN-2014'
AND G.ORG_DC_SNAPSHOT_ID = (SELECT D.DC_SNAPSHOT_ID FROM
DC_DATE_CARD D WHERE D.RUN_ID = '209' and \$CONDITIONS
)
) SI_F_L0_TRADE_GROUP_ID ON (
SI_F_L0_TRADE.TRADE_SGK=SI_F_L0_TRADE_GROUP_ID.TRADE_SGK )
join
( SELECT * FROM cpl_trade_instrument_ref WHERE RUN_ID= '209' --
and \$CONDITIONS
) CPL_INSTRUMENT_REF ON (
CPL_INSTRUMENT_REF.INSTRUMENT_SGK=SI_F_L0_TRADE.INSTRUMENT_SGK )
JOIN
(
SELECT * FROM CPL_INSTRUMENT_REF_EXT WHERE RUN_ID='209' and \$CONDITIONS
) CPL_INSTRUMENT_REF_EXT ON (
CPL_INSTRUMENT_REF.INSTRUMENT_SGK=CPL_INSTRUMENT_REF_EXT.INSTRUMENT_SGK)
join
(
SELECT * FROM SI_BUSINESS_REGION WHERE RUN_ID='209' and \$CONDITIONS
) ISSUER_BUSINESS_REGION ON (
ISSUER_BUSINESS_REGION.ISO_CNTRY_3_CHAR_CD=CPL_INSTRUMENT_REF_EXT.ISSUER_COUNTRY_CD
)
join
(
SELECT (product_line_cd ||
CASE WHEN nvl(new_issue_ind, '0') = '1' THEN 'P'
ELSE 'S' END || '_' ||
nvl(business_line_cd, 'N') || '_' ||
nvl(trade_type_cd, 'N') || '_' ||
nvl(region_cd, 'N'
)) product_cd, SI_GROUP_ID_XREF.* FROM SI_GROUP_ID_XREF
WHERE RUN_ID = '209' and \$CONDITIONS
) SI_GROUP_ID_XREF ON (
SI_F_L0_TRADE_GROUP_ID.GROUP_ID=SI_GROUP_ID_XREF.GROUP_ID )
JOIN
(
SELECT * FROM SI_PRODUCT_LINE WHERE RUN_ID= '209' and \$CONDITIONS
) SI_PRODUCT_LINE ON (
SI_GROUP_ID_XREF.PRODUCT_LINE_CD=SI_PRODUCT_LINE.PRODUCT_LINE_L6_SGK
)
join
(
SELECT * FROM SI_BUSINESS_REGION WHERE RUN_ID= '209' and \$CONDITIONS
) SI_BUSINESS_REGION ON (
SI_GROUP_ID_XREF.REGION_CD=SI_BUSINESS_REGION.REGION_L6_SGK )
JOIN
(
SELECT * FROM si_salesrep_xref WHERE run_id = '209' and \$CONDITIONS
) MARKETER ON ( MARKETER.SRC_SALESREP_ID=SI_F_L0_TRADE.SRC_SALES_REP_ID)
join
(
SELECT * FROM CDW_PRESENTATION.CPL_EMPLOYEE_REF WHERE run_id ='209'
and \$CONDITIONS
) MARKETER_EMPLOYEE_REF ON (
MARKETER.CS_EMPLOYEE_ID=MARKETER_EMPLOYEE_REF.EMPLOYEE_ID )
join
(
SELECT * FROM SI_BOOK WHERE RUN_ID= '209' and \$CONDITIONS
) SI_BOOK ON ( SI_BOOK.BOOK_SGK=SI_F_L0_TRADE.BOOK_SGK )
join
(SELECT * FROM cpl_sfo_country_xref ) DOMICILE_SFO_CNTRY_XREF ON
( SI_CUSTOMER_XREF2.DOMICILE_CNTRY_ISO_CD=DOMICILE_SFO_CNTRY_XREF.CNTRY_ISO_CD)
join
(
SELECT * FROM CPL_CLIENT_REF WHERE RUN_ID='209' and \$CONDITIONS
AND SRC_SYS_CD = 'GESC' ) CPL_CLIENT_REF
ON ( SI_CUSTOMER_XREF2.GESC_CLIENT_ID=CPL_CLIENT_REF.CLIENT_CD )
JOIN
(
select * from si_client_type where run_id = '209' and \$CONDITIONS
) CLIENT_TYPE ON ( CPL_CLIENT_REF.CLIENT_SGK=CLIENT_TYPE.CLIENT_SGK )
join
(
select * from si_client_types where run_id = '209' and \$CONDITIONS
) CLIENT_TYPES ON (
CLIENT_TYPE.CLIENT_TYPES_SGK=CLIENT_TYPES.CLIENT_TYPES_SGK )
WHERE MARKETER.SRC_SYS_CD=SI_F_L0_TRADE.SREP_SRC_SYS_CD and \$CONDITIONS
AND
(
( ( SI_F_L0_TRADE.IGNORE_REASON_CD ) IS NULL
AND ( SI_F_L0_TRADE_GROUP_ID.GROUP_ID ) IS NOT NULL
AND ( SI_F_L0_TRADE_GROUP_ID.GROUP_ID ) >0
AND SI_GROUP_ID_XREF.Can_Double_Count = 0
AND SI_GROUP_ID_XREF.Product_Line_Cd IS NOT NULL
AND SI_GROUP_ID_XREF.LOSS_IND >=0
)
AND
SI_PRODUCT_LINE.PRODUCT_LINE_L1_NAME = 'All (Equities)' )
GROUP BY
SI_CUSTOMER_XREF2.GESC_CLIENT_NAME,
SI_CUSTOMER_XREF2.GESC_CLIENT_ID,
SI_F_L0_TRADE.DATA_SRC_SYS_CD,
SI_F_L0_TRADE.SI_TRADE_ID,
SI_CUSTOMER_XREF2.CUSTOMER_ID,
****
***
" --split-by SI_CUSTOMER_XREF2.customer_id --boundary-query "select
min(CUSTOMER_ID),max(CUSTOMER_ID) from SI_CUSTOMER_XREF2 WHERE
RUN_ID='209'" -m 48 --fields-terminated-by "|" --target-dir
"/SqoopImports/daily_eqt" --verbose;
Suresh Sethuramaswamy
Cross Div MIS & Power User Tools
From: Venkat Ranganathan [mailto:vranganathan@hortonworks.com]
Sent: Thursday, March 13, 2014 5:30 PM
To: user@sqoop.apache.org
Subject: Re: Sqoop import into a Hive table with 2 columns as partition keys
If your partition columns are dynamic, then using hcatalog import will
be enough. The Sqoop HCatalog integration section in the Sqoop user
guide has more information on the HCatalog integration (that I linked
earlier). The restriction is that you can't have more than one
static partition key.
Venkat
On Wed, Mar 12, 2014 at 11:28 PM, Richa Sharma
<ma...@gmail.com> wrote:
Thanks for the inputs.
In my case the partition columns are dynamic.. so I guess it will make
sense to load it into 2 hops.
Venkat, thanks for sharing the ppt on Hcatalog - I couldn't find much
online so it is very helpful :-)
Richa
On Thu, Mar 13, 2014 at 11:33 AM, Venkat Ranganathan
<vr...@hortonworks.com> wrote:
In Sqoop 1.4.4 we have Hcatalog Integration. This allows importing
data into hive partition tables with multiple partition keys (but only
can be a static partitioning key). See
http://sqoop.apache.org/docs/1.4.4/SqoopUserGuide.html#_sqoop_hcatalog_integration
There is a presentation done at Hadoop World on Hcatalog integration
that you can refer to for more information
https://cwiki.apache.org/confluence/download/attachments/27361435/SqoopHCatIntegration-HadoopWorld2013.pptx
Venkat
On Wed, Mar 12, 2014 at 9:19 PM, bejoy ks <be...@gmail.com> wrote:
AFAIK. Sqoop 1x imports supports hive single column partitions.
If it is multi column partitions you might have to take a two hop approach
1. Load the data into a single/non partitioned hive tabe
2. From the above staging table load to the final one.
On Tue, Mar 11, 2014 at 7:23 AM, Richa Sharma
<ma...@gmail.com> wrote:
Hi,
I am trying to run sqoop import into a Hive table partitioned on 2
columns (col1 string, col2 string).
I looked at the documentation for both --hive-import and --hcatalog
and could not find any such option.
It works successfully when table is partitioned on 1 column but not
when i increase number of partition columns.
Has anyone tried it and knows how to achieve it ?
Thanks,
Richa
--
Regards
Bejoy
CONFIDENTIALITY NOTICE
NOTICE: This message is intended for the use of the individual or
entity to which it is addressed and may contain information that is
confidential, privileged and exempt from disclosure under applicable
law. If the reader of this message is not the intended recipient, you
are hereby notified that any printing, copying, dissemination,
distribution, disclosure or forwarding of this communication is
strictly prohibited. If you have received this communication in error,
please contact the sender immediately and delete it from your system.
Thank You.
CONFIDENTIALITY NOTICE
NOTICE: This message is intended for the use of the individual or
entity to which it is addressed and may contain information that is
confidential, privileged and exempt from disclosure under applicable
law. If the reader of this message is not the intended recipient, you
are hereby notified that any printing, copying, dissemination,
distribution, disclosure or forwarding of this communication is
strictly prohibited. If you have received this communication in error,
please contact the sender immediately and delete it from your system.
Thank You.
==============================================================================
Please access the attached hyperlink for an important electronic
communications disclaimer:
http://www.credit-suisse.com/legal/en/disclaimer_email_ib.html
==============================================================================
--
CONFIDENTIALITY NOTICE
NOTICE: This message is intended for the use of the individual or entity to
which it is addressed and may contain information that is confidential,
privileged and exempt from disclosure under applicable law. If the reader
of this message is not the intended recipient, you are hereby notified that
any printing, copying, dissemination, distribution, disclosure or
forwarding of this communication is strictly prohibited. If you have
received this communication in error, please contact the sender immediately
and delete it from your system. Thank You.
RE: Sqoop import Issue
Posted by "Sethuramaswamy, Suresh " <su...@credit-suisse.com>.
Thank you so much Venkat for your prompt response, I will correct this and let you know if it resolved the issue.
Regards,
Suresh
-----Original Message-----
From: Venkat Ranganathan [mailto:vranganathan@hortonworks.com]
Sent: Friday, March 14, 2014 3:18 PM
To: user@sqoop.apache.org
Subject: Fwd: Sqoop import Issue
Moving to sqoop user mailing list.
This points to error in your query. $CONDIITONS evaluates to the per
mapper range. In your case, your split column is not present in
every place you have $CONDITIONS so you should remove them from places
where it does not make sense to have. For example,
==
SELECT T.*
FROM SI_F_L0_TRADE T
WHERE T.REVENUE_DATE >= '01-JAN-2014' and \$CONDITIONS
AND
===
The $CONDITIONS will be subsituted to refer to columns that does not
exist in the subquery context.
In general, youcan enable --verbose option, get the query being
executed, and try to run it with SQL*Plus or some other tool to see if
it is valid SQL
Venkat
---------- Forwarded message ----------
From: Sethuramaswamy, Suresh <su...@credit-suisse.com>
Date: Fri, Mar 14, 2014 at 9:40 AM
Subject: RE: Sqoop import Issue
To: "vranganathan@hortonworks.com" <vr...@hortonworks.com>
Greetings Venkat,
Am trying to run a free form SQL on sqoop to import data from Oracle
10g , everytime I keep getting the split-by column mentioned is
invalid . Can you please help.
Sqoop version : 1.4.3
Oracle 10g
Ojdbc6.jar
Error
14/03/14 08:55:16 INFO mapred.JobClient: Task Id :
attempt_201403100830_0118_m_000009_0, Status : FAILED
java.io.IOException: SQLException in nextKeyValue
at org.apache.sqoop.mapreduce.db.DBRecordReader.nextKeyValue(DBRecordReader.java:265)
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.nextKeyValue(MapTask.java:530)
at org.apache.hadoop.mapreduce.MapContext.nextKeyValue(MapContext.java:67)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
at org.apache.sqoop.mapreduce.AutoProgressMapper.run(AutoProgressMapper.java:64)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:363)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1232)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: java.sql.SQLSyntaxErrorException: ORA-00904:
"SI_CUSTOMER_XREF2"."CUSTOMER_ID": invalid identifier
QUERY
sqoop import --connect <<connection parameter>> -query
"
SELECT
SI_CUSTOMER_XREF2.GESC_CLIENT_NAME,
SI_CUSTOMER_XREF2.GESC_CLIENT_ID,
SI_F_L0_TRADE.DATA_SRC_SYS_CD,
SI_F_L0_TRADE.SI_TRADE_ID,
SI_CUSTOMER_XREF2.CUSTOMER_ID,
****
****
FROM
(
SELECT * FROM SI_CUSTOMER_XREF2 WHERE RUN_ID='209'
and \$CONDITIONS
) SI_CUSTOMER_XREF2 join
(
SELECT T.*
FROM SI_F_L0_TRADE T
WHERE T.REVENUE_DATE >= '01-JAN-2014' and \$CONDITIONS
AND
T.ORG_DC_SNAPSHOT_ID = (SELECT D.DC_SNAPSHOT_ID FROM DC_DATE_CARD
D WHERE D.RUN_ID = '209' and \$CONDITIONS
)
) SI_F_L0_TRADE ON (
SI_F_L0_TRADE.CUSTOMER_SGK=SI_CUSTOMER_XREF2.CUSTOMER_SGK )
join
(
SELECT * FROM SI_F_L0_TRADE_GROUP_ID G where RUN_ID='209' and \$CONDITIONS
AND REVENUE_DATE >= '01-JAN-2014'
AND G.ORG_DC_SNAPSHOT_ID = (SELECT D.DC_SNAPSHOT_ID FROM
DC_DATE_CARD D WHERE D.RUN_ID = '209' and \$CONDITIONS
)
) SI_F_L0_TRADE_GROUP_ID ON (
SI_F_L0_TRADE.TRADE_SGK=SI_F_L0_TRADE_GROUP_ID.TRADE_SGK )
join
( SELECT * FROM cpl_trade_instrument_ref WHERE RUN_ID= '209' --
and \$CONDITIONS
) CPL_INSTRUMENT_REF ON (
CPL_INSTRUMENT_REF.INSTRUMENT_SGK=SI_F_L0_TRADE.INSTRUMENT_SGK )
JOIN
(
SELECT * FROM CPL_INSTRUMENT_REF_EXT WHERE RUN_ID='209' and \$CONDITIONS
) CPL_INSTRUMENT_REF_EXT ON (
CPL_INSTRUMENT_REF.INSTRUMENT_SGK=CPL_INSTRUMENT_REF_EXT.INSTRUMENT_SGK)
join
(
SELECT * FROM SI_BUSINESS_REGION WHERE RUN_ID='209' and \$CONDITIONS
) ISSUER_BUSINESS_REGION ON (
ISSUER_BUSINESS_REGION.ISO_CNTRY_3_CHAR_CD=CPL_INSTRUMENT_REF_EXT.ISSUER_COUNTRY_CD
)
join
(
SELECT (product_line_cd ||
CASE WHEN nvl(new_issue_ind, '0') = '1' THEN 'P'
ELSE 'S' END || '_' ||
nvl(business_line_cd, 'N') || '_' ||
nvl(trade_type_cd, 'N') || '_' ||
nvl(region_cd, 'N'
)) product_cd, SI_GROUP_ID_XREF.* FROM SI_GROUP_ID_XREF
WHERE RUN_ID = '209' and \$CONDITIONS
) SI_GROUP_ID_XREF ON (
SI_F_L0_TRADE_GROUP_ID.GROUP_ID=SI_GROUP_ID_XREF.GROUP_ID )
JOIN
(
SELECT * FROM SI_PRODUCT_LINE WHERE RUN_ID= '209' and \$CONDITIONS
) SI_PRODUCT_LINE ON (
SI_GROUP_ID_XREF.PRODUCT_LINE_CD=SI_PRODUCT_LINE.PRODUCT_LINE_L6_SGK
)
join
(
SELECT * FROM SI_BUSINESS_REGION WHERE RUN_ID= '209' and \$CONDITIONS
) SI_BUSINESS_REGION ON (
SI_GROUP_ID_XREF.REGION_CD=SI_BUSINESS_REGION.REGION_L6_SGK )
JOIN
(
SELECT * FROM si_salesrep_xref WHERE run_id = '209' and \$CONDITIONS
) MARKETER ON ( MARKETER.SRC_SALESREP_ID=SI_F_L0_TRADE.SRC_SALES_REP_ID)
join
(
SELECT * FROM CDW_PRESENTATION.CPL_EMPLOYEE_REF WHERE run_id ='209'
and \$CONDITIONS
) MARKETER_EMPLOYEE_REF ON (
MARKETER.CS_EMPLOYEE_ID=MARKETER_EMPLOYEE_REF.EMPLOYEE_ID )
join
(
SELECT * FROM SI_BOOK WHERE RUN_ID= '209' and \$CONDITIONS
) SI_BOOK ON ( SI_BOOK.BOOK_SGK=SI_F_L0_TRADE.BOOK_SGK )
join
(SELECT * FROM cpl_sfo_country_xref ) DOMICILE_SFO_CNTRY_XREF ON
( SI_CUSTOMER_XREF2.DOMICILE_CNTRY_ISO_CD=DOMICILE_SFO_CNTRY_XREF.CNTRY_ISO_CD)
join
(
SELECT * FROM CPL_CLIENT_REF WHERE RUN_ID='209' and \$CONDITIONS
AND SRC_SYS_CD = 'GESC' ) CPL_CLIENT_REF
ON ( SI_CUSTOMER_XREF2.GESC_CLIENT_ID=CPL_CLIENT_REF.CLIENT_CD )
JOIN
(
select * from si_client_type where run_id = '209' and \$CONDITIONS
) CLIENT_TYPE ON ( CPL_CLIENT_REF.CLIENT_SGK=CLIENT_TYPE.CLIENT_SGK )
join
(
select * from si_client_types where run_id = '209' and \$CONDITIONS
) CLIENT_TYPES ON (
CLIENT_TYPE.CLIENT_TYPES_SGK=CLIENT_TYPES.CLIENT_TYPES_SGK )
WHERE MARKETER.SRC_SYS_CD=SI_F_L0_TRADE.SREP_SRC_SYS_CD and \$CONDITIONS
AND
(
( ( SI_F_L0_TRADE.IGNORE_REASON_CD ) IS NULL
AND ( SI_F_L0_TRADE_GROUP_ID.GROUP_ID ) IS NOT NULL
AND ( SI_F_L0_TRADE_GROUP_ID.GROUP_ID ) >0
AND SI_GROUP_ID_XREF.Can_Double_Count = 0
AND SI_GROUP_ID_XREF.Product_Line_Cd IS NOT NULL
AND SI_GROUP_ID_XREF.LOSS_IND >=0
)
AND
SI_PRODUCT_LINE.PRODUCT_LINE_L1_NAME = 'All (Equities)' )
GROUP BY
SI_CUSTOMER_XREF2.GESC_CLIENT_NAME,
SI_CUSTOMER_XREF2.GESC_CLIENT_ID,
SI_F_L0_TRADE.DATA_SRC_SYS_CD,
SI_F_L0_TRADE.SI_TRADE_ID,
SI_CUSTOMER_XREF2.CUSTOMER_ID,
****
***
" --split-by SI_CUSTOMER_XREF2.customer_id --boundary-query "select
min(CUSTOMER_ID),max(CUSTOMER_ID) from SI_CUSTOMER_XREF2 WHERE
RUN_ID='209'" -m 48 --fields-terminated-by "|" --target-dir
"/SqoopImports/daily_eqt" --verbose;
Suresh Sethuramaswamy
Cross Div MIS & Power User Tools
From: Venkat Ranganathan [mailto:vranganathan@hortonworks.com]
Sent: Thursday, March 13, 2014 5:30 PM
To: user@sqoop.apache.org
Subject: Re: Sqoop import into a Hive table with 2 columns as partition keys
If your partition columns are dynamic, then using hcatalog import will
be enough. The Sqoop HCatalog integration section in the Sqoop user
guide has more information on the HCatalog integration (that I linked
earlier). The restriction is that you can't have more than one
static partition key.
Venkat
On Wed, Mar 12, 2014 at 11:28 PM, Richa Sharma
<ma...@gmail.com> wrote:
Thanks for the inputs.
In my case the partition columns are dynamic.. so I guess it will make
sense to load it into 2 hops.
Venkat, thanks for sharing the ppt on Hcatalog - I couldn't find much
online so it is very helpful :-)
Richa
On Thu, Mar 13, 2014 at 11:33 AM, Venkat Ranganathan
<vr...@hortonworks.com> wrote:
In Sqoop 1.4.4 we have Hcatalog Integration. This allows importing
data into hive partition tables with multiple partition keys (but only
can be a static partitioning key). See
http://sqoop.apache.org/docs/1.4.4/SqoopUserGuide.html#_sqoop_hcatalog_integration
There is a presentation done at Hadoop World on Hcatalog integration
that you can refer to for more information
https://cwiki.apache.org/confluence/download/attachments/27361435/SqoopHCatIntegration-HadoopWorld2013.pptx
Venkat
On Wed, Mar 12, 2014 at 9:19 PM, bejoy ks <be...@gmail.com> wrote:
AFAIK. Sqoop 1x imports supports hive single column partitions.
If it is multi column partitions you might have to take a two hop approach
1. Load the data into a single/non partitioned hive tabe
2. From the above staging table load to the final one.
On Tue, Mar 11, 2014 at 7:23 AM, Richa Sharma
<ma...@gmail.com> wrote:
Hi,
I am trying to run sqoop import into a Hive table partitioned on 2
columns (col1 string, col2 string).
I looked at the documentation for both --hive-import and --hcatalog
and could not find any such option.
It works successfully when table is partitioned on 1 column but not
when i increase number of partition columns.
Has anyone tried it and knows how to achieve it ?
Thanks,
Richa
--
Regards
Bejoy
CONFIDENTIALITY NOTICE
NOTICE: This message is intended for the use of the individual or
entity to which it is addressed and may contain information that is
confidential, privileged and exempt from disclosure under applicable
law. If the reader of this message is not the intended recipient, you
are hereby notified that any printing, copying, dissemination,
distribution, disclosure or forwarding of this communication is
strictly prohibited. If you have received this communication in error,
please contact the sender immediately and delete it from your system.
Thank You.
CONFIDENTIALITY NOTICE
NOTICE: This message is intended for the use of the individual or
entity to which it is addressed and may contain information that is
confidential, privileged and exempt from disclosure under applicable
law. If the reader of this message is not the intended recipient, you
are hereby notified that any printing, copying, dissemination,
distribution, disclosure or forwarding of this communication is
strictly prohibited. If you have received this communication in error,
please contact the sender immediately and delete it from your system.
Thank You.
==============================================================================
Please access the attached hyperlink for an important electronic
communications disclaimer:
http://www.credit-suisse.com/legal/en/disclaimer_email_ib.html
==============================================================================
--
CONFIDENTIALITY NOTICE
NOTICE: This message is intended for the use of the individual or entity to
which it is addressed and may contain information that is confidential,
privileged and exempt from disclosure under applicable law. If the reader
of this message is not the intended recipient, you are hereby notified that
any printing, copying, dissemination, distribution, disclosure or
forwarding of this communication is strictly prohibited. If you have
received this communication in error, please contact the sender immediately
and delete it from your system. Thank You.
===============================================================================
Please access the attached hyperlink for an important electronic communications disclaimer:
http://www.credit-suisse.com/legal/en/disclaimer_email_ib.html
===============================================================================