You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-user@hadoop.apache.org by manish dunani <ma...@gmail.com> on 2013/07/27 06:31:33 UTC
Want to Sort the values in one line using map reduce
Hi,
*I have input file and my data looks like:*
date country city pagePath visits 20120301 India Ahmedabad / 1
20120302 India Ahmedabad /gtuadmissionhelpline-team 1 20120302 India
Mumbai / 1 20120302 India Mumbai /merit-calculator 1
* I wrote the map and reduce application to convert it into page_url by
city:*
*
*
*
*
*
*
*
*
package data.ga;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
public class pharmecy
{
public static class MapClass extends MapReduceBase implements
Mapper<LongWritable,Text,Text,Text>
{
Text k = new Text();
Text v = new Text();
public void map(LongWritable key,Text
value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
{
try
{
String[] line = value.toString().split(",",5);
String city = String.valueOf(line[2]);
String url = String.valueOf(line[3]);
k.set(city);
v.set(url);
output.collect(k, v);
}
catch(Exception e)
{
System.out.println(e);
}
}
}
public static class ReduceClass extends MapReduceBase implements Reducer
<Text,Text,Text,Text>
{
Text v = new Text();
public void reduce(Text key,Iterator<Text>
values,OutputCollector<Text,Text>output,Reporter reporter) throws
IOException
{
while(values.hasNext())
{
String val=values.next().toString();
v.set(val);
output.collect(key,v);
}
}
public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(data.ga.pharmecy.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
// TODO: specify output types
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, new
Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));
FileOutputFormat.setOutputPath(conf, new
Path("hdfs://localhost:54310/user/manish/gadataoutput11"));
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.setMapperClass(MapClass.class);
conf.setReducerClass(ReduceClass.class);
client.setConf(conf);
try {
JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
*Output:*
*
*
*
*
*#city* * #pagepath*
"Aachen" "/medicalcollege/m-p-shah-medical-college"
"Abbottabad" "/merit-calculator"
"Abbottabad" "/merit-calculator"
"Abidjan"
"/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"
"Abidjan"
"/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
*My question is:*
I want to convert this output in below format::
#city #pagepath
city1 url1,url2,url3
city2 url1,url2,url3
Is it possible to convert it in this format using map and reduce ???
If yes then how??
--
MANISH DUNANI
-THANX
Re: Want to Sort the values in one line using map reduce
Posted by manish dunani <ma...@gmail.com>.
*"Thanks a lot Devraj!!!!!!!!"*
On Sat, Jul 27, 2013 at 10:25 AM, Devaraj k <de...@huawei.com> wrote:
> You are almost done to get the desired output. You need to change little
> in the reduce function like this, ****
>
> ** **
>
> *public* *static* *class* ReduceClass *extends* *MapReduceBase* *
> implements*****
>
> *Reducer*<Text, Text, Text, Text> {****
>
> Text v = *new* Text();****
>
> ** **
>
> *public* *void* reduce(Text key, *Iterator*<Text> values,****
>
> *OutputCollector*<Text, Text> output, *Reporter* reporter)****
>
> *throws* IOException {****
>
> StringBuffer value = *new* StringBuffer();****
>
> *while* (values.hasNext()){****
>
> value.append(values.next().toString());****
>
> value.append(",");****
>
> }****
>
> v.set(value.toString());****
>
> output.collect(key, v);****
>
> }****
>
> }****
>
> In the above reduce function you can add logical condition to avoid extra
> ‘,’ at end of each value line.****
>
> ** **
>
> Thanks****
>
> Devaraj k****
>
> ** **
>
> *From:* manish dunani [mailto:manishd207@gmail.com]
> *Sent:* 27 July 2013 10:02
> *To:* user@hadoop.apache.org
> *Subject:* Want to Sort the values in one line using map reduce****
>
> ** **
>
> Hi,****
>
> ** **
>
> *I have input file and my data looks like:*****
>
> ** **
>
> date ****
>
> country****
>
> city****
>
> pagePath****
>
> visits****
>
> 20120301 ****
>
> India****
>
> Ahmedabad****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Ahmedabad****
>
> /gtuadmissionhelpline-team****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /merit-calculator****
>
> 1****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> * I wrote the map and reduce application to convert it into page_url by
> city:*****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> package data.ga;****
>
> ** **
>
> import java.io.IOException;****
>
> import java.util.Iterator;****
>
> import org.apache.hadoop.fs.Path;****
>
> import org.apache.hadoop.io.LongWritable;****
>
> import org.apache.hadoop.io.Text;****
>
> import org.apache.hadoop.mapred.FileInputFormat;****
>
> import org.apache.hadoop.mapred.FileOutputFormat;****
>
> import org.apache.hadoop.mapred.JobClient;****
>
> import org.apache.hadoop.mapred.JobConf;****
>
> import org.apache.hadoop.mapred.MapReduceBase;****
>
> import org.apache.hadoop.mapred.Mapper;****
>
> import org.apache.hadoop.mapred.OutputCollector;****
>
> import org.apache.hadoop.mapred.Reducer;****
>
> import org.apache.hadoop.mapred.Reporter;****
>
> import org.apache.hadoop.mapred.TextInputFormat;****
>
> import org.apache.hadoop.mapred.TextOutputFormat;****
>
> ** **
>
> ** **
>
> public class pharmecy ****
>
> {****
>
> public static class MapClass extends MapReduceBase implements
> Mapper<LongWritable,Text,Text,Text>****
>
> {****
>
> Text k = new Text();****
>
> Text v = new Text();****
>
> ****
>
> public void map(LongWritable key,Text
> value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
> ****
>
> {****
>
> try****
>
> {****
>
> String[] line =
> value.toString().split(",",5);****
>
> ****
>
> String city = String.valueOf(line[2]);
> ****
>
> String url = String.valueOf(line[3]);*
> ***
>
> ****
>
> k.set(city);****
>
> v.set(url);****
>
> ****
>
> output.collect(k, v);****
>
> }****
>
> catch(Exception e)****
>
> {****
>
> System.out.println(e);****
>
> }****
>
> **
> **
>
> }****
>
> }****
>
> ****
>
> public static class ReduceClass extends MapReduceBase
> implements Reducer <Text,Text,Text,Text>****
>
> {****
>
> Text v = new Text();****
>
> ****
>
> public void reduce(Text key,Iterator<Text>
> values,OutputCollector<Text,Text>output,Reporter reporter) throws
> IOException****
>
> {****
>
> ****
>
> ****
>
> while(values.hasNext())****
>
> ****
>
> {****
>
> String
> val=values.next().toString();****
>
> ****
>
> v.set(val);****
>
>
> ****
>
> output.collect(key,v);****
>
> ****
>
> ****
>
> }****
>
> ****
>
> ****
>
> }****
>
> ****
>
> ****
>
> public static void main(String[] args) {****
>
> JobClient client = new JobClient();****
>
> JobConf conf = new JobConf(data.ga.pharmecy.class);
> ****
>
> ** **
>
> conf.setMapOutputKeyClass(Text.class);****
>
> conf.setMapOutputValueClass(Text.class);****
>
> // TODO: specify output types****
>
> conf.setOutputKeyClass(Text.class);****
>
> conf.setOutputValueClass(Text.class);****
>
> ** **
>
> FileInputFormat.setInputPaths(conf, new
> Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));*
> ***
>
> FileOutputFormat.setOutputPath(conf, new
> Path("hdfs://localhost:54310/user/manish/gadataoutput11"));****
>
> ** **
>
> conf.setInputFormat(TextInputFormat.class);****
>
> conf.setOutputFormat(TextOutputFormat.class);****
>
> ****
>
> conf.setMapperClass(MapClass.class);****
>
> conf.setReducerClass(ReduceClass.class);****
>
> ****
>
> client.setConf(conf);****
>
> try {****
>
> JobClient.runJob(conf);****
>
> } catch (Exception e) {****
>
> e.printStackTrace();****
>
> }****
>
> }****
>
> ** **
>
> }****
>
> }****
>
> * *
>
> *Output:***
>
> * *
>
> * *
>
> *#city* * #pagepath*****
>
> "Aachen" "/medicalcollege/m-p-shah-medical-college"****
>
> "Abbottabad" "/merit-calculator"****
>
> "Abbottabad" "/merit-calculator"****
>
> "Abidjan"
> "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"****
>
> "Abidjan"
> "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
> ****
>
> ** **
>
> ** **
>
> *My question is:*****
>
> ** **
>
> I want to convert this output in below format::****
>
> ** **
>
> #city #pagepath****
>
> city1 url1,url2,url3****
>
> city2 url1,url2,url3****
>
> ** **
>
> Is it possible to convert it in this format using map and reduce ???****
>
> ** **
>
> If yes then how??****
>
> ** **
>
> -- ****
>
> MANISH DUNANI
> -THANX****
>
> ** **
>
--
MANISH DUNANI
-THANX
+91 9426881954,+91 8460656443
manishd207@gmail.com
Re: Want to Sort the values in one line using map reduce
Posted by manish dunani <ma...@gmail.com>.
*"Thanks a lot Devraj!!!!!!!!"*
On Sat, Jul 27, 2013 at 10:25 AM, Devaraj k <de...@huawei.com> wrote:
> You are almost done to get the desired output. You need to change little
> in the reduce function like this, ****
>
> ** **
>
> *public* *static* *class* ReduceClass *extends* *MapReduceBase* *
> implements*****
>
> *Reducer*<Text, Text, Text, Text> {****
>
> Text v = *new* Text();****
>
> ** **
>
> *public* *void* reduce(Text key, *Iterator*<Text> values,****
>
> *OutputCollector*<Text, Text> output, *Reporter* reporter)****
>
> *throws* IOException {****
>
> StringBuffer value = *new* StringBuffer();****
>
> *while* (values.hasNext()){****
>
> value.append(values.next().toString());****
>
> value.append(",");****
>
> }****
>
> v.set(value.toString());****
>
> output.collect(key, v);****
>
> }****
>
> }****
>
> In the above reduce function you can add logical condition to avoid extra
> ‘,’ at end of each value line.****
>
> ** **
>
> Thanks****
>
> Devaraj k****
>
> ** **
>
> *From:* manish dunani [mailto:manishd207@gmail.com]
> *Sent:* 27 July 2013 10:02
> *To:* user@hadoop.apache.org
> *Subject:* Want to Sort the values in one line using map reduce****
>
> ** **
>
> Hi,****
>
> ** **
>
> *I have input file and my data looks like:*****
>
> ** **
>
> date ****
>
> country****
>
> city****
>
> pagePath****
>
> visits****
>
> 20120301 ****
>
> India****
>
> Ahmedabad****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Ahmedabad****
>
> /gtuadmissionhelpline-team****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /merit-calculator****
>
> 1****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> * I wrote the map and reduce application to convert it into page_url by
> city:*****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> package data.ga;****
>
> ** **
>
> import java.io.IOException;****
>
> import java.util.Iterator;****
>
> import org.apache.hadoop.fs.Path;****
>
> import org.apache.hadoop.io.LongWritable;****
>
> import org.apache.hadoop.io.Text;****
>
> import org.apache.hadoop.mapred.FileInputFormat;****
>
> import org.apache.hadoop.mapred.FileOutputFormat;****
>
> import org.apache.hadoop.mapred.JobClient;****
>
> import org.apache.hadoop.mapred.JobConf;****
>
> import org.apache.hadoop.mapred.MapReduceBase;****
>
> import org.apache.hadoop.mapred.Mapper;****
>
> import org.apache.hadoop.mapred.OutputCollector;****
>
> import org.apache.hadoop.mapred.Reducer;****
>
> import org.apache.hadoop.mapred.Reporter;****
>
> import org.apache.hadoop.mapred.TextInputFormat;****
>
> import org.apache.hadoop.mapred.TextOutputFormat;****
>
> ** **
>
> ** **
>
> public class pharmecy ****
>
> {****
>
> public static class MapClass extends MapReduceBase implements
> Mapper<LongWritable,Text,Text,Text>****
>
> {****
>
> Text k = new Text();****
>
> Text v = new Text();****
>
> ****
>
> public void map(LongWritable key,Text
> value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
> ****
>
> {****
>
> try****
>
> {****
>
> String[] line =
> value.toString().split(",",5);****
>
> ****
>
> String city = String.valueOf(line[2]);
> ****
>
> String url = String.valueOf(line[3]);*
> ***
>
> ****
>
> k.set(city);****
>
> v.set(url);****
>
> ****
>
> output.collect(k, v);****
>
> }****
>
> catch(Exception e)****
>
> {****
>
> System.out.println(e);****
>
> }****
>
> **
> **
>
> }****
>
> }****
>
> ****
>
> public static class ReduceClass extends MapReduceBase
> implements Reducer <Text,Text,Text,Text>****
>
> {****
>
> Text v = new Text();****
>
> ****
>
> public void reduce(Text key,Iterator<Text>
> values,OutputCollector<Text,Text>output,Reporter reporter) throws
> IOException****
>
> {****
>
> ****
>
> ****
>
> while(values.hasNext())****
>
> ****
>
> {****
>
> String
> val=values.next().toString();****
>
> ****
>
> v.set(val);****
>
>
> ****
>
> output.collect(key,v);****
>
> ****
>
> ****
>
> }****
>
> ****
>
> ****
>
> }****
>
> ****
>
> ****
>
> public static void main(String[] args) {****
>
> JobClient client = new JobClient();****
>
> JobConf conf = new JobConf(data.ga.pharmecy.class);
> ****
>
> ** **
>
> conf.setMapOutputKeyClass(Text.class);****
>
> conf.setMapOutputValueClass(Text.class);****
>
> // TODO: specify output types****
>
> conf.setOutputKeyClass(Text.class);****
>
> conf.setOutputValueClass(Text.class);****
>
> ** **
>
> FileInputFormat.setInputPaths(conf, new
> Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));*
> ***
>
> FileOutputFormat.setOutputPath(conf, new
> Path("hdfs://localhost:54310/user/manish/gadataoutput11"));****
>
> ** **
>
> conf.setInputFormat(TextInputFormat.class);****
>
> conf.setOutputFormat(TextOutputFormat.class);****
>
> ****
>
> conf.setMapperClass(MapClass.class);****
>
> conf.setReducerClass(ReduceClass.class);****
>
> ****
>
> client.setConf(conf);****
>
> try {****
>
> JobClient.runJob(conf);****
>
> } catch (Exception e) {****
>
> e.printStackTrace();****
>
> }****
>
> }****
>
> ** **
>
> }****
>
> }****
>
> * *
>
> *Output:***
>
> * *
>
> * *
>
> *#city* * #pagepath*****
>
> "Aachen" "/medicalcollege/m-p-shah-medical-college"****
>
> "Abbottabad" "/merit-calculator"****
>
> "Abbottabad" "/merit-calculator"****
>
> "Abidjan"
> "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"****
>
> "Abidjan"
> "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
> ****
>
> ** **
>
> ** **
>
> *My question is:*****
>
> ** **
>
> I want to convert this output in below format::****
>
> ** **
>
> #city #pagepath****
>
> city1 url1,url2,url3****
>
> city2 url1,url2,url3****
>
> ** **
>
> Is it possible to convert it in this format using map and reduce ???****
>
> ** **
>
> If yes then how??****
>
> ** **
>
> -- ****
>
> MANISH DUNANI
> -THANX****
>
> ** **
>
--
MANISH DUNANI
-THANX
+91 9426881954,+91 8460656443
manishd207@gmail.com
Re: Want to Sort the values in one line using map reduce
Posted by manish dunani <ma...@gmail.com>.
*"Thanks a lot Devraj!!!!!!!!"*
On Sat, Jul 27, 2013 at 10:25 AM, Devaraj k <de...@huawei.com> wrote:
> You are almost done to get the desired output. You need to change little
> in the reduce function like this, ****
>
> ** **
>
> *public* *static* *class* ReduceClass *extends* *MapReduceBase* *
> implements*****
>
> *Reducer*<Text, Text, Text, Text> {****
>
> Text v = *new* Text();****
>
> ** **
>
> *public* *void* reduce(Text key, *Iterator*<Text> values,****
>
> *OutputCollector*<Text, Text> output, *Reporter* reporter)****
>
> *throws* IOException {****
>
> StringBuffer value = *new* StringBuffer();****
>
> *while* (values.hasNext()){****
>
> value.append(values.next().toString());****
>
> value.append(",");****
>
> }****
>
> v.set(value.toString());****
>
> output.collect(key, v);****
>
> }****
>
> }****
>
> In the above reduce function you can add logical condition to avoid extra
> ‘,’ at end of each value line.****
>
> ** **
>
> Thanks****
>
> Devaraj k****
>
> ** **
>
> *From:* manish dunani [mailto:manishd207@gmail.com]
> *Sent:* 27 July 2013 10:02
> *To:* user@hadoop.apache.org
> *Subject:* Want to Sort the values in one line using map reduce****
>
> ** **
>
> Hi,****
>
> ** **
>
> *I have input file and my data looks like:*****
>
> ** **
>
> date ****
>
> country****
>
> city****
>
> pagePath****
>
> visits****
>
> 20120301 ****
>
> India****
>
> Ahmedabad****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Ahmedabad****
>
> /gtuadmissionhelpline-team****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /merit-calculator****
>
> 1****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> * I wrote the map and reduce application to convert it into page_url by
> city:*****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> package data.ga;****
>
> ** **
>
> import java.io.IOException;****
>
> import java.util.Iterator;****
>
> import org.apache.hadoop.fs.Path;****
>
> import org.apache.hadoop.io.LongWritable;****
>
> import org.apache.hadoop.io.Text;****
>
> import org.apache.hadoop.mapred.FileInputFormat;****
>
> import org.apache.hadoop.mapred.FileOutputFormat;****
>
> import org.apache.hadoop.mapred.JobClient;****
>
> import org.apache.hadoop.mapred.JobConf;****
>
> import org.apache.hadoop.mapred.MapReduceBase;****
>
> import org.apache.hadoop.mapred.Mapper;****
>
> import org.apache.hadoop.mapred.OutputCollector;****
>
> import org.apache.hadoop.mapred.Reducer;****
>
> import org.apache.hadoop.mapred.Reporter;****
>
> import org.apache.hadoop.mapred.TextInputFormat;****
>
> import org.apache.hadoop.mapred.TextOutputFormat;****
>
> ** **
>
> ** **
>
> public class pharmecy ****
>
> {****
>
> public static class MapClass extends MapReduceBase implements
> Mapper<LongWritable,Text,Text,Text>****
>
> {****
>
> Text k = new Text();****
>
> Text v = new Text();****
>
> ****
>
> public void map(LongWritable key,Text
> value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
> ****
>
> {****
>
> try****
>
> {****
>
> String[] line =
> value.toString().split(",",5);****
>
> ****
>
> String city = String.valueOf(line[2]);
> ****
>
> String url = String.valueOf(line[3]);*
> ***
>
> ****
>
> k.set(city);****
>
> v.set(url);****
>
> ****
>
> output.collect(k, v);****
>
> }****
>
> catch(Exception e)****
>
> {****
>
> System.out.println(e);****
>
> }****
>
> **
> **
>
> }****
>
> }****
>
> ****
>
> public static class ReduceClass extends MapReduceBase
> implements Reducer <Text,Text,Text,Text>****
>
> {****
>
> Text v = new Text();****
>
> ****
>
> public void reduce(Text key,Iterator<Text>
> values,OutputCollector<Text,Text>output,Reporter reporter) throws
> IOException****
>
> {****
>
> ****
>
> ****
>
> while(values.hasNext())****
>
> ****
>
> {****
>
> String
> val=values.next().toString();****
>
> ****
>
> v.set(val);****
>
>
> ****
>
> output.collect(key,v);****
>
> ****
>
> ****
>
> }****
>
> ****
>
> ****
>
> }****
>
> ****
>
> ****
>
> public static void main(String[] args) {****
>
> JobClient client = new JobClient();****
>
> JobConf conf = new JobConf(data.ga.pharmecy.class);
> ****
>
> ** **
>
> conf.setMapOutputKeyClass(Text.class);****
>
> conf.setMapOutputValueClass(Text.class);****
>
> // TODO: specify output types****
>
> conf.setOutputKeyClass(Text.class);****
>
> conf.setOutputValueClass(Text.class);****
>
> ** **
>
> FileInputFormat.setInputPaths(conf, new
> Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));*
> ***
>
> FileOutputFormat.setOutputPath(conf, new
> Path("hdfs://localhost:54310/user/manish/gadataoutput11"));****
>
> ** **
>
> conf.setInputFormat(TextInputFormat.class);****
>
> conf.setOutputFormat(TextOutputFormat.class);****
>
> ****
>
> conf.setMapperClass(MapClass.class);****
>
> conf.setReducerClass(ReduceClass.class);****
>
> ****
>
> client.setConf(conf);****
>
> try {****
>
> JobClient.runJob(conf);****
>
> } catch (Exception e) {****
>
> e.printStackTrace();****
>
> }****
>
> }****
>
> ** **
>
> }****
>
> }****
>
> * *
>
> *Output:***
>
> * *
>
> * *
>
> *#city* * #pagepath*****
>
> "Aachen" "/medicalcollege/m-p-shah-medical-college"****
>
> "Abbottabad" "/merit-calculator"****
>
> "Abbottabad" "/merit-calculator"****
>
> "Abidjan"
> "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"****
>
> "Abidjan"
> "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
> ****
>
> ** **
>
> ** **
>
> *My question is:*****
>
> ** **
>
> I want to convert this output in below format::****
>
> ** **
>
> #city #pagepath****
>
> city1 url1,url2,url3****
>
> city2 url1,url2,url3****
>
> ** **
>
> Is it possible to convert it in this format using map and reduce ???****
>
> ** **
>
> If yes then how??****
>
> ** **
>
> -- ****
>
> MANISH DUNANI
> -THANX****
>
> ** **
>
--
MANISH DUNANI
-THANX
+91 9426881954,+91 8460656443
manishd207@gmail.com
Re: Want to Sort the values in one line using map reduce
Posted by manish dunani <ma...@gmail.com>.
*"Thanks a lot Devraj!!!!!!!!"*
On Sat, Jul 27, 2013 at 10:25 AM, Devaraj k <de...@huawei.com> wrote:
> You are almost done to get the desired output. You need to change little
> in the reduce function like this, ****
>
> ** **
>
> *public* *static* *class* ReduceClass *extends* *MapReduceBase* *
> implements*****
>
> *Reducer*<Text, Text, Text, Text> {****
>
> Text v = *new* Text();****
>
> ** **
>
> *public* *void* reduce(Text key, *Iterator*<Text> values,****
>
> *OutputCollector*<Text, Text> output, *Reporter* reporter)****
>
> *throws* IOException {****
>
> StringBuffer value = *new* StringBuffer();****
>
> *while* (values.hasNext()){****
>
> value.append(values.next().toString());****
>
> value.append(",");****
>
> }****
>
> v.set(value.toString());****
>
> output.collect(key, v);****
>
> }****
>
> }****
>
> In the above reduce function you can add logical condition to avoid extra
> ‘,’ at end of each value line.****
>
> ** **
>
> Thanks****
>
> Devaraj k****
>
> ** **
>
> *From:* manish dunani [mailto:manishd207@gmail.com]
> *Sent:* 27 July 2013 10:02
> *To:* user@hadoop.apache.org
> *Subject:* Want to Sort the values in one line using map reduce****
>
> ** **
>
> Hi,****
>
> ** **
>
> *I have input file and my data looks like:*****
>
> ** **
>
> date ****
>
> country****
>
> city****
>
> pagePath****
>
> visits****
>
> 20120301 ****
>
> India****
>
> Ahmedabad****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Ahmedabad****
>
> /gtuadmissionhelpline-team****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /merit-calculator****
>
> 1****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> * I wrote the map and reduce application to convert it into page_url by
> city:*****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> package data.ga;****
>
> ** **
>
> import java.io.IOException;****
>
> import java.util.Iterator;****
>
> import org.apache.hadoop.fs.Path;****
>
> import org.apache.hadoop.io.LongWritable;****
>
> import org.apache.hadoop.io.Text;****
>
> import org.apache.hadoop.mapred.FileInputFormat;****
>
> import org.apache.hadoop.mapred.FileOutputFormat;****
>
> import org.apache.hadoop.mapred.JobClient;****
>
> import org.apache.hadoop.mapred.JobConf;****
>
> import org.apache.hadoop.mapred.MapReduceBase;****
>
> import org.apache.hadoop.mapred.Mapper;****
>
> import org.apache.hadoop.mapred.OutputCollector;****
>
> import org.apache.hadoop.mapred.Reducer;****
>
> import org.apache.hadoop.mapred.Reporter;****
>
> import org.apache.hadoop.mapred.TextInputFormat;****
>
> import org.apache.hadoop.mapred.TextOutputFormat;****
>
> ** **
>
> ** **
>
> public class pharmecy ****
>
> {****
>
> public static class MapClass extends MapReduceBase implements
> Mapper<LongWritable,Text,Text,Text>****
>
> {****
>
> Text k = new Text();****
>
> Text v = new Text();****
>
> ****
>
> public void map(LongWritable key,Text
> value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
> ****
>
> {****
>
> try****
>
> {****
>
> String[] line =
> value.toString().split(",",5);****
>
> ****
>
> String city = String.valueOf(line[2]);
> ****
>
> String url = String.valueOf(line[3]);*
> ***
>
> ****
>
> k.set(city);****
>
> v.set(url);****
>
> ****
>
> output.collect(k, v);****
>
> }****
>
> catch(Exception e)****
>
> {****
>
> System.out.println(e);****
>
> }****
>
> **
> **
>
> }****
>
> }****
>
> ****
>
> public static class ReduceClass extends MapReduceBase
> implements Reducer <Text,Text,Text,Text>****
>
> {****
>
> Text v = new Text();****
>
> ****
>
> public void reduce(Text key,Iterator<Text>
> values,OutputCollector<Text,Text>output,Reporter reporter) throws
> IOException****
>
> {****
>
> ****
>
> ****
>
> while(values.hasNext())****
>
> ****
>
> {****
>
> String
> val=values.next().toString();****
>
> ****
>
> v.set(val);****
>
>
> ****
>
> output.collect(key,v);****
>
> ****
>
> ****
>
> }****
>
> ****
>
> ****
>
> }****
>
> ****
>
> ****
>
> public static void main(String[] args) {****
>
> JobClient client = new JobClient();****
>
> JobConf conf = new JobConf(data.ga.pharmecy.class);
> ****
>
> ** **
>
> conf.setMapOutputKeyClass(Text.class);****
>
> conf.setMapOutputValueClass(Text.class);****
>
> // TODO: specify output types****
>
> conf.setOutputKeyClass(Text.class);****
>
> conf.setOutputValueClass(Text.class);****
>
> ** **
>
> FileInputFormat.setInputPaths(conf, new
> Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));*
> ***
>
> FileOutputFormat.setOutputPath(conf, new
> Path("hdfs://localhost:54310/user/manish/gadataoutput11"));****
>
> ** **
>
> conf.setInputFormat(TextInputFormat.class);****
>
> conf.setOutputFormat(TextOutputFormat.class);****
>
> ****
>
> conf.setMapperClass(MapClass.class);****
>
> conf.setReducerClass(ReduceClass.class);****
>
> ****
>
> client.setConf(conf);****
>
> try {****
>
> JobClient.runJob(conf);****
>
> } catch (Exception e) {****
>
> e.printStackTrace();****
>
> }****
>
> }****
>
> ** **
>
> }****
>
> }****
>
> * *
>
> *Output:***
>
> * *
>
> * *
>
> *#city* * #pagepath*****
>
> "Aachen" "/medicalcollege/m-p-shah-medical-college"****
>
> "Abbottabad" "/merit-calculator"****
>
> "Abbottabad" "/merit-calculator"****
>
> "Abidjan"
> "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"****
>
> "Abidjan"
> "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
> ****
>
> ** **
>
> ** **
>
> *My question is:*****
>
> ** **
>
> I want to convert this output in below format::****
>
> ** **
>
> #city #pagepath****
>
> city1 url1,url2,url3****
>
> city2 url1,url2,url3****
>
> ** **
>
> Is it possible to convert it in this format using map and reduce ???****
>
> ** **
>
> If yes then how??****
>
> ** **
>
> -- ****
>
> MANISH DUNANI
> -THANX****
>
> ** **
>
--
MANISH DUNANI
-THANX
+91 9426881954,+91 8460656443
manishd207@gmail.com
RE: Want to Sort the values in one line using map reduce
Posted by Devaraj k <de...@huawei.com>.
You are almost done to get the desired output. You need to change little in the reduce function like this,
public static class ReduceClass extends MapReduceBase implements
Reducer<Text, Text, Text, Text> {
Text v = new Text();
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
StringBuffer value = new StringBuffer();
while (values.hasNext()){
value.append(values.next().toString());
value.append(",");
}
v.set(value.toString());
output.collect(key, v);
}
}
In the above reduce function you can add logical condition to avoid extra ',' at end of each value line.
Thanks
Devaraj k
From: manish dunani [mailto:manishd207@gmail.com]
Sent: 27 July 2013 10:02
To: user@hadoop.apache.org
Subject: Want to Sort the values in one line using map reduce
Hi,
I have input file and my data looks like:
date
country
city
pagePath
visits
20120301
India
Ahmedabad
/
1
20120302
India
Ahmedabad
/gtuadmissionhelpline-team
1
20120302
India
Mumbai
/
1
20120302
India
Mumbai
/merit-calculator
1
I wrote the map and reduce application to convert it into page_url by city:
package data.ga<http://data.ga>;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
public class pharmecy
{
public static class MapClass extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>
{
Text k = new Text();
Text v = new Text();
public void map(LongWritable key,Text value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
{
try
{
String[] line = value.toString().split(",",5);
String city = String.valueOf(line[2]);
String url = String.valueOf(line[3]);
k.set(city);
v.set(url);
output.collect(k, v);
}
catch(Exception e)
{
System.out.println(e);
}
}
}
public static class ReduceClass extends MapReduceBase implements Reducer <Text,Text,Text,Text>
{
Text v = new Text();
public void reduce(Text key,Iterator<Text> values,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
{
while(values.hasNext())
{
String val=values.next().toString();
v.set(val);
output.collect(key,v);
}
}
public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(data.ga.pharmecy.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
// TODO: specify output types
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, new Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));
FileOutputFormat.setOutputPath(conf, new Path("hdfs://localhost:54310/user/manish/gadataoutput11"));
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.setMapperClass(MapClass.class);
conf.setReducerClass(ReduceClass.class);
client.setConf(conf);
try {
JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
Output:
#city #pagepath
"Aachen" "/medicalcollege/m-p-shah-medical-college"
"Abbottabad" "/merit-calculator"
"Abbottabad" "/merit-calculator"
"Abidjan" "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"
"Abidjan" "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
My question is:
I want to convert this output in below format::
#city #pagepath
city1 url1,url2,url3
city2 url1,url2,url3
Is it possible to convert it in this format using map and reduce ???
If yes then how??
--
MANISH DUNANI
-THANX
RE: Want to Sort the values in one line using map reduce
Posted by Devaraj k <de...@huawei.com>.
You are almost done to get the desired output. You need to change little in the reduce function like this,
public static class ReduceClass extends MapReduceBase implements
Reducer<Text, Text, Text, Text> {
Text v = new Text();
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
StringBuffer value = new StringBuffer();
while (values.hasNext()){
value.append(values.next().toString());
value.append(",");
}
v.set(value.toString());
output.collect(key, v);
}
}
In the above reduce function you can add logical condition to avoid extra ',' at end of each value line.
Thanks
Devaraj k
From: manish dunani [mailto:manishd207@gmail.com]
Sent: 27 July 2013 10:02
To: user@hadoop.apache.org
Subject: Want to Sort the values in one line using map reduce
Hi,
I have input file and my data looks like:
date
country
city
pagePath
visits
20120301
India
Ahmedabad
/
1
20120302
India
Ahmedabad
/gtuadmissionhelpline-team
1
20120302
India
Mumbai
/
1
20120302
India
Mumbai
/merit-calculator
1
I wrote the map and reduce application to convert it into page_url by city:
package data.ga<http://data.ga>;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
public class pharmecy
{
public static class MapClass extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>
{
Text k = new Text();
Text v = new Text();
public void map(LongWritable key,Text value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
{
try
{
String[] line = value.toString().split(",",5);
String city = String.valueOf(line[2]);
String url = String.valueOf(line[3]);
k.set(city);
v.set(url);
output.collect(k, v);
}
catch(Exception e)
{
System.out.println(e);
}
}
}
public static class ReduceClass extends MapReduceBase implements Reducer <Text,Text,Text,Text>
{
Text v = new Text();
public void reduce(Text key,Iterator<Text> values,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
{
while(values.hasNext())
{
String val=values.next().toString();
v.set(val);
output.collect(key,v);
}
}
public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(data.ga.pharmecy.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
// TODO: specify output types
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, new Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));
FileOutputFormat.setOutputPath(conf, new Path("hdfs://localhost:54310/user/manish/gadataoutput11"));
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.setMapperClass(MapClass.class);
conf.setReducerClass(ReduceClass.class);
client.setConf(conf);
try {
JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
Output:
#city #pagepath
"Aachen" "/medicalcollege/m-p-shah-medical-college"
"Abbottabad" "/merit-calculator"
"Abbottabad" "/merit-calculator"
"Abidjan" "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"
"Abidjan" "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
My question is:
I want to convert this output in below format::
#city #pagepath
city1 url1,url2,url3
city2 url1,url2,url3
Is it possible to convert it in this format using map and reduce ???
If yes then how??
--
MANISH DUNANI
-THANX
RE: Want to Sort the values in one line using map reduce
Posted by Devaraj k <de...@huawei.com>.
You are almost done to get the desired output. You need to change little in the reduce function like this,
public static class ReduceClass extends MapReduceBase implements
Reducer<Text, Text, Text, Text> {
Text v = new Text();
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
StringBuffer value = new StringBuffer();
while (values.hasNext()){
value.append(values.next().toString());
value.append(",");
}
v.set(value.toString());
output.collect(key, v);
}
}
In the above reduce function you can add logical condition to avoid extra ',' at end of each value line.
Thanks
Devaraj k
From: manish dunani [mailto:manishd207@gmail.com]
Sent: 27 July 2013 10:02
To: user@hadoop.apache.org
Subject: Want to Sort the values in one line using map reduce
Hi,
I have input file and my data looks like:
date
country
city
pagePath
visits
20120301
India
Ahmedabad
/
1
20120302
India
Ahmedabad
/gtuadmissionhelpline-team
1
20120302
India
Mumbai
/
1
20120302
India
Mumbai
/merit-calculator
1
I wrote the map and reduce application to convert it into page_url by city:
package data.ga<http://data.ga>;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
public class pharmecy
{
public static class MapClass extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>
{
Text k = new Text();
Text v = new Text();
public void map(LongWritable key,Text value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
{
try
{
String[] line = value.toString().split(",",5);
String city = String.valueOf(line[2]);
String url = String.valueOf(line[3]);
k.set(city);
v.set(url);
output.collect(k, v);
}
catch(Exception e)
{
System.out.println(e);
}
}
}
public static class ReduceClass extends MapReduceBase implements Reducer <Text,Text,Text,Text>
{
Text v = new Text();
public void reduce(Text key,Iterator<Text> values,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
{
while(values.hasNext())
{
String val=values.next().toString();
v.set(val);
output.collect(key,v);
}
}
public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(data.ga.pharmecy.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
// TODO: specify output types
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, new Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));
FileOutputFormat.setOutputPath(conf, new Path("hdfs://localhost:54310/user/manish/gadataoutput11"));
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.setMapperClass(MapClass.class);
conf.setReducerClass(ReduceClass.class);
client.setConf(conf);
try {
JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
Output:
#city #pagepath
"Aachen" "/medicalcollege/m-p-shah-medical-college"
"Abbottabad" "/merit-calculator"
"Abbottabad" "/merit-calculator"
"Abidjan" "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"
"Abidjan" "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
My question is:
I want to convert this output in below format::
#city #pagepath
city1 url1,url2,url3
city2 url1,url2,url3
Is it possible to convert it in this format using map and reduce ???
If yes then how??
--
MANISH DUNANI
-THANX
RE: Want to Sort the values in one line using map reduce
Posted by Devaraj k <de...@huawei.com>.
You are almost done to get the desired output. You need to change little in the reduce function like this,
public static class ReduceClass extends MapReduceBase implements
Reducer<Text, Text, Text, Text> {
Text v = new Text();
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
StringBuffer value = new StringBuffer();
while (values.hasNext()){
value.append(values.next().toString());
value.append(",");
}
v.set(value.toString());
output.collect(key, v);
}
}
In the above reduce function you can add logical condition to avoid extra ',' at end of each value line.
Thanks
Devaraj k
From: manish dunani [mailto:manishd207@gmail.com]
Sent: 27 July 2013 10:02
To: user@hadoop.apache.org
Subject: Want to Sort the values in one line using map reduce
Hi,
I have input file and my data looks like:
date
country
city
pagePath
visits
20120301
India
Ahmedabad
/
1
20120302
India
Ahmedabad
/gtuadmissionhelpline-team
1
20120302
India
Mumbai
/
1
20120302
India
Mumbai
/merit-calculator
1
I wrote the map and reduce application to convert it into page_url by city:
package data.ga<http://data.ga>;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
public class pharmecy
{
public static class MapClass extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>
{
Text k = new Text();
Text v = new Text();
public void map(LongWritable key,Text value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
{
try
{
String[] line = value.toString().split(",",5);
String city = String.valueOf(line[2]);
String url = String.valueOf(line[3]);
k.set(city);
v.set(url);
output.collect(k, v);
}
catch(Exception e)
{
System.out.println(e);
}
}
}
public static class ReduceClass extends MapReduceBase implements Reducer <Text,Text,Text,Text>
{
Text v = new Text();
public void reduce(Text key,Iterator<Text> values,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
{
while(values.hasNext())
{
String val=values.next().toString();
v.set(val);
output.collect(key,v);
}
}
public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(data.ga.pharmecy.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
// TODO: specify output types
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, new Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));
FileOutputFormat.setOutputPath(conf, new Path("hdfs://localhost:54310/user/manish/gadataoutput11"));
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.setMapperClass(MapClass.class);
conf.setReducerClass(ReduceClass.class);
client.setConf(conf);
try {
JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
Output:
#city #pagepath
"Aachen" "/medicalcollege/m-p-shah-medical-college"
"Abbottabad" "/merit-calculator"
"Abbottabad" "/merit-calculator"
"Abidjan" "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"
"Abidjan" "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
My question is:
I want to convert this output in below format::
#city #pagepath
city1 url1,url2,url3
city2 url1,url2,url3
Is it possible to convert it in this format using map and reduce ???
If yes then how??
--
MANISH DUNANI
-THANX