You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-user@hadoop.apache.org by manish dunani <ma...@gmail.com> on 2013/07/27 06:31:33 UTC

Want to Sort the values in one line using map reduce

Hi,

*I have input file and my data looks like:*

 date  country  city pagePath visits  20120301  India Ahmedabad / 1
20120302  India Ahmedabad /gtuadmissionhelpline-team 1  20120302  India
Mumbai / 1  20120302  India Mumbai /merit-calculator 1





* I wrote the map and reduce application to convert it into page_url by
city:*
*
*
*
*
*
*
*
*
package data.ga;

import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;


public class pharmecy
{
public static class MapClass extends MapReduceBase implements
Mapper<LongWritable,Text,Text,Text>
{
Text k = new Text();
Text v = new Text();
 public void map(LongWritable key,Text
value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
{
try
{
String[] line = value.toString().split(",",5);
 String city = String.valueOf(line[2]);
String url = String.valueOf(line[3]);
 k.set(city);
v.set(url);
 output.collect(k, v);
}
catch(Exception e)
{
System.out.println(e);
}
 }
}
 public static class ReduceClass extends MapReduceBase implements Reducer
<Text,Text,Text,Text>
{
Text v = new Text();
 public void reduce(Text key,Iterator<Text>
values,OutputCollector<Text,Text>output,Reporter reporter) throws
IOException
{

 while(values.hasNext())

{
String val=values.next().toString();
 v.set(val);
 output.collect(key,v);
 }
 }
 public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(data.ga.pharmecy.class);

conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
// TODO: specify output types
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);

FileInputFormat.setInputPaths(conf, new
Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));
FileOutputFormat.setOutputPath(conf, new
Path("hdfs://localhost:54310/user/manish/gadataoutput11"));

conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
 conf.setMapperClass(MapClass.class);
conf.setReducerClass(ReduceClass.class);
 client.setConf(conf);
try {
JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace();
}
}

}
}

*Output:*
*
*
*
*
*#city*                 * #pagepath*
"Aachen" "/medicalcollege/m-p-shah-medical-college"
"Abbottabad" "/merit-calculator"
"Abbottabad" "/merit-calculator"
"Abidjan"
"/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"
"Abidjan"
"/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"


*My question is:*

I want to convert this output in below format::

#city                        #pagepath
city1                        url1,url2,url3
city2                        url1,url2,url3

Is it possible to convert it in this format using map and reduce ???

If yes then how??

-- 
MANISH DUNANI
-THANX

Re: Want to Sort the values in one line using map reduce

Posted by manish dunani <ma...@gmail.com>.
     *"Thanks a lot Devraj!!!!!!!!"*


On Sat, Jul 27, 2013 at 10:25 AM, Devaraj k <de...@huawei.com> wrote:

>  You are almost done to get the desired output. You need to change little
> in the reduce function like this, ****
>
> ** **
>
> *public* *static* *class* ReduceClass *extends* *MapReduceBase* *
> implements*****
>
>       *Reducer*<Text, Text, Text, Text> {****
>
>     Text v = *new* Text();****
>
> ** **
>
>     *public* *void* reduce(Text key, *Iterator*<Text> values,****
>
>         *OutputCollector*<Text, Text> output, *Reporter* reporter)****
>
>         *throws* IOException {****
>
>       StringBuffer value = *new* StringBuffer();****
>
>       *while* (values.hasNext()){****
>
>         value.append(values.next().toString());****
>
>         value.append(",");****
>
>       }****
>
>       v.set(value.toString());****
>
>       output.collect(key, v);****
>
>     }****
>
>   }****
>
> In the above reduce function you can add logical condition to avoid extra
> ‘,’ at end of each value line.****
>
> ** **
>
> Thanks****
>
> Devaraj k****
>
> ** **
>
> *From:* manish dunani [mailto:manishd207@gmail.com]
> *Sent:* 27 July 2013 10:02
> *To:* user@hadoop.apache.org
> *Subject:* Want to Sort the values in one line using map reduce****
>
> ** **
>
> Hi,****
>
> ** **
>
> *I have input file and my data looks like:*****
>
> ** **
>
>    date ****
>
> country****
>
>  city****
>
> pagePath****
>
> visits****
>
> 20120301 ****
>
> India****
>
> Ahmedabad****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Ahmedabad****
>
> /gtuadmissionhelpline-team****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /merit-calculator****
>
> 1****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> * I wrote the map and reduce application to convert it into page_url by
> city:*****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> package data.ga;****
>
> ** **
>
> import java.io.IOException;****
>
> import java.util.Iterator;****
>
> import org.apache.hadoop.fs.Path;****
>
> import org.apache.hadoop.io.LongWritable;****
>
> import org.apache.hadoop.io.Text;****
>
> import org.apache.hadoop.mapred.FileInputFormat;****
>
> import org.apache.hadoop.mapred.FileOutputFormat;****
>
> import org.apache.hadoop.mapred.JobClient;****
>
> import org.apache.hadoop.mapred.JobConf;****
>
> import org.apache.hadoop.mapred.MapReduceBase;****
>
> import org.apache.hadoop.mapred.Mapper;****
>
> import org.apache.hadoop.mapred.OutputCollector;****
>
> import org.apache.hadoop.mapred.Reducer;****
>
> import org.apache.hadoop.mapred.Reporter;****
>
> import org.apache.hadoop.mapred.TextInputFormat;****
>
> import org.apache.hadoop.mapred.TextOutputFormat;****
>
> ** **
>
> ** **
>
> public class pharmecy ****
>
> {****
>
>             public static class MapClass extends MapReduceBase implements
> Mapper<LongWritable,Text,Text,Text>****
>
>             {****
>
>                         Text k = new Text();****
>
>                         Text v = new Text();****
>
>                         ****
>
>                         public void map(LongWritable key,Text
> value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
> ****
>
>                         {****
>
>                                     try****
>
>                                     {****
>
>                                     String[] line =
> value.toString().split(",",5);****
>
>                                     ****
>
>                                     String city = String.valueOf(line[2]);
> ****
>
>                                     String url = String.valueOf(line[3]);*
> ***
>
>                                     ****
>
>                                     k.set(city);****
>
>                                     v.set(url);****
>
>                                     ****
>
>                                     output.collect(k, v);****
>
>                                     }****
>
>                                     catch(Exception e)****
>
>                                     {****
>
>                                                 System.out.println(e);****
>
>                                     }****
>
>                                                                         **
> **
>
>                         }****
>
>             }****
>
>             ****
>
>             public static class ReduceClass extends MapReduceBase
> implements Reducer <Text,Text,Text,Text>****
>
>             {****
>
>                         Text v = new Text();****
>
>                         ****
>
>                         public void reduce(Text key,Iterator<Text>
> values,OutputCollector<Text,Text>output,Reporter reporter) throws
> IOException****
>
>                         {****
>
>                          ****
>
>                         ****
>
>                                     while(values.hasNext())****
>
>                                       ****
>
>                                     {****
>
>                                                 String
> val=values.next().toString();****
>
>                                                 ****
>
>                                                 v.set(val);****
>
>
> ****
>
>                                                 output.collect(key,v);****
>
>                                     ****
>
>                                     ****
>
>                                     }****
>
>                                     ****
>
>                                     ****
>
>                         }****
>
>             ****
>
>             ****
>
>             public static void main(String[] args) {****
>
>                         JobClient client = new JobClient();****
>
>                         JobConf conf = new JobConf(data.ga.pharmecy.class);
> ****
>
> ** **
>
>                         conf.setMapOutputKeyClass(Text.class);****
>
>                         conf.setMapOutputValueClass(Text.class);****
>
>                         // TODO: specify output types****
>
>                         conf.setOutputKeyClass(Text.class);****
>
>                         conf.setOutputValueClass(Text.class);****
>
> ** **
>
>                         FileInputFormat.setInputPaths(conf, new
> Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));*
> ***
>
>                         FileOutputFormat.setOutputPath(conf, new
> Path("hdfs://localhost:54310/user/manish/gadataoutput11"));****
>
> ** **
>
>                         conf.setInputFormat(TextInputFormat.class);****
>
>                         conf.setOutputFormat(TextOutputFormat.class);****
>
>                         ****
>
>                         conf.setMapperClass(MapClass.class);****
>
>                         conf.setReducerClass(ReduceClass.class);****
>
>                         ****
>
>                         client.setConf(conf);****
>
>                         try {****
>
>                                     JobClient.runJob(conf);****
>
>                         } catch (Exception e) {****
>
>                                     e.printStackTrace();****
>
>                         }****
>
>             }****
>
> ** **
>
>             }****
>
> }****
>
> * *
>
> *Output:***
>
> * *
>
> * *
>
> *#city*                 * #pagepath*****
>
> "Aachen"                 "/medicalcollege/m-p-shah-medical-college"****
>
> "Abbottabad"  "/merit-calculator"****
>
> "Abbottabad"  "/merit-calculator"****
>
> "Abidjan"
> "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"****
>
> "Abidjan"
> "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
> ****
>
> ** **
>
> ** **
>
> *My question is:*****
>
> ** **
>
> I want to convert this output in below format::****
>
> ** **
>
> #city                        #pagepath****
>
> city1                        url1,url2,url3****
>
> city2                        url1,url2,url3****
>
> ** **
>
> Is it possible to convert it in this format using map and reduce ???****
>
> ** **
>
> If yes then how??****
>
> ** **
>
> -- ****
>
> MANISH DUNANI
> -THANX****
>
> ** **
>



-- 
MANISH DUNANI
-THANX
+91 9426881954,+91 8460656443
manishd207@gmail.com

Re: Want to Sort the values in one line using map reduce

Posted by manish dunani <ma...@gmail.com>.
     *"Thanks a lot Devraj!!!!!!!!"*


On Sat, Jul 27, 2013 at 10:25 AM, Devaraj k <de...@huawei.com> wrote:

>  You are almost done to get the desired output. You need to change little
> in the reduce function like this, ****
>
> ** **
>
> *public* *static* *class* ReduceClass *extends* *MapReduceBase* *
> implements*****
>
>       *Reducer*<Text, Text, Text, Text> {****
>
>     Text v = *new* Text();****
>
> ** **
>
>     *public* *void* reduce(Text key, *Iterator*<Text> values,****
>
>         *OutputCollector*<Text, Text> output, *Reporter* reporter)****
>
>         *throws* IOException {****
>
>       StringBuffer value = *new* StringBuffer();****
>
>       *while* (values.hasNext()){****
>
>         value.append(values.next().toString());****
>
>         value.append(",");****
>
>       }****
>
>       v.set(value.toString());****
>
>       output.collect(key, v);****
>
>     }****
>
>   }****
>
> In the above reduce function you can add logical condition to avoid extra
> ‘,’ at end of each value line.****
>
> ** **
>
> Thanks****
>
> Devaraj k****
>
> ** **
>
> *From:* manish dunani [mailto:manishd207@gmail.com]
> *Sent:* 27 July 2013 10:02
> *To:* user@hadoop.apache.org
> *Subject:* Want to Sort the values in one line using map reduce****
>
> ** **
>
> Hi,****
>
> ** **
>
> *I have input file and my data looks like:*****
>
> ** **
>
>    date ****
>
> country****
>
>  city****
>
> pagePath****
>
> visits****
>
> 20120301 ****
>
> India****
>
> Ahmedabad****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Ahmedabad****
>
> /gtuadmissionhelpline-team****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /merit-calculator****
>
> 1****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> * I wrote the map and reduce application to convert it into page_url by
> city:*****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> package data.ga;****
>
> ** **
>
> import java.io.IOException;****
>
> import java.util.Iterator;****
>
> import org.apache.hadoop.fs.Path;****
>
> import org.apache.hadoop.io.LongWritable;****
>
> import org.apache.hadoop.io.Text;****
>
> import org.apache.hadoop.mapred.FileInputFormat;****
>
> import org.apache.hadoop.mapred.FileOutputFormat;****
>
> import org.apache.hadoop.mapred.JobClient;****
>
> import org.apache.hadoop.mapred.JobConf;****
>
> import org.apache.hadoop.mapred.MapReduceBase;****
>
> import org.apache.hadoop.mapred.Mapper;****
>
> import org.apache.hadoop.mapred.OutputCollector;****
>
> import org.apache.hadoop.mapred.Reducer;****
>
> import org.apache.hadoop.mapred.Reporter;****
>
> import org.apache.hadoop.mapred.TextInputFormat;****
>
> import org.apache.hadoop.mapred.TextOutputFormat;****
>
> ** **
>
> ** **
>
> public class pharmecy ****
>
> {****
>
>             public static class MapClass extends MapReduceBase implements
> Mapper<LongWritable,Text,Text,Text>****
>
>             {****
>
>                         Text k = new Text();****
>
>                         Text v = new Text();****
>
>                         ****
>
>                         public void map(LongWritable key,Text
> value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
> ****
>
>                         {****
>
>                                     try****
>
>                                     {****
>
>                                     String[] line =
> value.toString().split(",",5);****
>
>                                     ****
>
>                                     String city = String.valueOf(line[2]);
> ****
>
>                                     String url = String.valueOf(line[3]);*
> ***
>
>                                     ****
>
>                                     k.set(city);****
>
>                                     v.set(url);****
>
>                                     ****
>
>                                     output.collect(k, v);****
>
>                                     }****
>
>                                     catch(Exception e)****
>
>                                     {****
>
>                                                 System.out.println(e);****
>
>                                     }****
>
>                                                                         **
> **
>
>                         }****
>
>             }****
>
>             ****
>
>             public static class ReduceClass extends MapReduceBase
> implements Reducer <Text,Text,Text,Text>****
>
>             {****
>
>                         Text v = new Text();****
>
>                         ****
>
>                         public void reduce(Text key,Iterator<Text>
> values,OutputCollector<Text,Text>output,Reporter reporter) throws
> IOException****
>
>                         {****
>
>                          ****
>
>                         ****
>
>                                     while(values.hasNext())****
>
>                                       ****
>
>                                     {****
>
>                                                 String
> val=values.next().toString();****
>
>                                                 ****
>
>                                                 v.set(val);****
>
>
> ****
>
>                                                 output.collect(key,v);****
>
>                                     ****
>
>                                     ****
>
>                                     }****
>
>                                     ****
>
>                                     ****
>
>                         }****
>
>             ****
>
>             ****
>
>             public static void main(String[] args) {****
>
>                         JobClient client = new JobClient();****
>
>                         JobConf conf = new JobConf(data.ga.pharmecy.class);
> ****
>
> ** **
>
>                         conf.setMapOutputKeyClass(Text.class);****
>
>                         conf.setMapOutputValueClass(Text.class);****
>
>                         // TODO: specify output types****
>
>                         conf.setOutputKeyClass(Text.class);****
>
>                         conf.setOutputValueClass(Text.class);****
>
> ** **
>
>                         FileInputFormat.setInputPaths(conf, new
> Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));*
> ***
>
>                         FileOutputFormat.setOutputPath(conf, new
> Path("hdfs://localhost:54310/user/manish/gadataoutput11"));****
>
> ** **
>
>                         conf.setInputFormat(TextInputFormat.class);****
>
>                         conf.setOutputFormat(TextOutputFormat.class);****
>
>                         ****
>
>                         conf.setMapperClass(MapClass.class);****
>
>                         conf.setReducerClass(ReduceClass.class);****
>
>                         ****
>
>                         client.setConf(conf);****
>
>                         try {****
>
>                                     JobClient.runJob(conf);****
>
>                         } catch (Exception e) {****
>
>                                     e.printStackTrace();****
>
>                         }****
>
>             }****
>
> ** **
>
>             }****
>
> }****
>
> * *
>
> *Output:***
>
> * *
>
> * *
>
> *#city*                 * #pagepath*****
>
> "Aachen"                 "/medicalcollege/m-p-shah-medical-college"****
>
> "Abbottabad"  "/merit-calculator"****
>
> "Abbottabad"  "/merit-calculator"****
>
> "Abidjan"
> "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"****
>
> "Abidjan"
> "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
> ****
>
> ** **
>
> ** **
>
> *My question is:*****
>
> ** **
>
> I want to convert this output in below format::****
>
> ** **
>
> #city                        #pagepath****
>
> city1                        url1,url2,url3****
>
> city2                        url1,url2,url3****
>
> ** **
>
> Is it possible to convert it in this format using map and reduce ???****
>
> ** **
>
> If yes then how??****
>
> ** **
>
> -- ****
>
> MANISH DUNANI
> -THANX****
>
> ** **
>



-- 
MANISH DUNANI
-THANX
+91 9426881954,+91 8460656443
manishd207@gmail.com

Re: Want to Sort the values in one line using map reduce

Posted by manish dunani <ma...@gmail.com>.
     *"Thanks a lot Devraj!!!!!!!!"*


On Sat, Jul 27, 2013 at 10:25 AM, Devaraj k <de...@huawei.com> wrote:

>  You are almost done to get the desired output. You need to change little
> in the reduce function like this, ****
>
> ** **
>
> *public* *static* *class* ReduceClass *extends* *MapReduceBase* *
> implements*****
>
>       *Reducer*<Text, Text, Text, Text> {****
>
>     Text v = *new* Text();****
>
> ** **
>
>     *public* *void* reduce(Text key, *Iterator*<Text> values,****
>
>         *OutputCollector*<Text, Text> output, *Reporter* reporter)****
>
>         *throws* IOException {****
>
>       StringBuffer value = *new* StringBuffer();****
>
>       *while* (values.hasNext()){****
>
>         value.append(values.next().toString());****
>
>         value.append(",");****
>
>       }****
>
>       v.set(value.toString());****
>
>       output.collect(key, v);****
>
>     }****
>
>   }****
>
> In the above reduce function you can add logical condition to avoid extra
> ‘,’ at end of each value line.****
>
> ** **
>
> Thanks****
>
> Devaraj k****
>
> ** **
>
> *From:* manish dunani [mailto:manishd207@gmail.com]
> *Sent:* 27 July 2013 10:02
> *To:* user@hadoop.apache.org
> *Subject:* Want to Sort the values in one line using map reduce****
>
> ** **
>
> Hi,****
>
> ** **
>
> *I have input file and my data looks like:*****
>
> ** **
>
>    date ****
>
> country****
>
>  city****
>
> pagePath****
>
> visits****
>
> 20120301 ****
>
> India****
>
> Ahmedabad****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Ahmedabad****
>
> /gtuadmissionhelpline-team****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /merit-calculator****
>
> 1****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> * I wrote the map and reduce application to convert it into page_url by
> city:*****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> package data.ga;****
>
> ** **
>
> import java.io.IOException;****
>
> import java.util.Iterator;****
>
> import org.apache.hadoop.fs.Path;****
>
> import org.apache.hadoop.io.LongWritable;****
>
> import org.apache.hadoop.io.Text;****
>
> import org.apache.hadoop.mapred.FileInputFormat;****
>
> import org.apache.hadoop.mapred.FileOutputFormat;****
>
> import org.apache.hadoop.mapred.JobClient;****
>
> import org.apache.hadoop.mapred.JobConf;****
>
> import org.apache.hadoop.mapred.MapReduceBase;****
>
> import org.apache.hadoop.mapred.Mapper;****
>
> import org.apache.hadoop.mapred.OutputCollector;****
>
> import org.apache.hadoop.mapred.Reducer;****
>
> import org.apache.hadoop.mapred.Reporter;****
>
> import org.apache.hadoop.mapred.TextInputFormat;****
>
> import org.apache.hadoop.mapred.TextOutputFormat;****
>
> ** **
>
> ** **
>
> public class pharmecy ****
>
> {****
>
>             public static class MapClass extends MapReduceBase implements
> Mapper<LongWritable,Text,Text,Text>****
>
>             {****
>
>                         Text k = new Text();****
>
>                         Text v = new Text();****
>
>                         ****
>
>                         public void map(LongWritable key,Text
> value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
> ****
>
>                         {****
>
>                                     try****
>
>                                     {****
>
>                                     String[] line =
> value.toString().split(",",5);****
>
>                                     ****
>
>                                     String city = String.valueOf(line[2]);
> ****
>
>                                     String url = String.valueOf(line[3]);*
> ***
>
>                                     ****
>
>                                     k.set(city);****
>
>                                     v.set(url);****
>
>                                     ****
>
>                                     output.collect(k, v);****
>
>                                     }****
>
>                                     catch(Exception e)****
>
>                                     {****
>
>                                                 System.out.println(e);****
>
>                                     }****
>
>                                                                         **
> **
>
>                         }****
>
>             }****
>
>             ****
>
>             public static class ReduceClass extends MapReduceBase
> implements Reducer <Text,Text,Text,Text>****
>
>             {****
>
>                         Text v = new Text();****
>
>                         ****
>
>                         public void reduce(Text key,Iterator<Text>
> values,OutputCollector<Text,Text>output,Reporter reporter) throws
> IOException****
>
>                         {****
>
>                          ****
>
>                         ****
>
>                                     while(values.hasNext())****
>
>                                       ****
>
>                                     {****
>
>                                                 String
> val=values.next().toString();****
>
>                                                 ****
>
>                                                 v.set(val);****
>
>
> ****
>
>                                                 output.collect(key,v);****
>
>                                     ****
>
>                                     ****
>
>                                     }****
>
>                                     ****
>
>                                     ****
>
>                         }****
>
>             ****
>
>             ****
>
>             public static void main(String[] args) {****
>
>                         JobClient client = new JobClient();****
>
>                         JobConf conf = new JobConf(data.ga.pharmecy.class);
> ****
>
> ** **
>
>                         conf.setMapOutputKeyClass(Text.class);****
>
>                         conf.setMapOutputValueClass(Text.class);****
>
>                         // TODO: specify output types****
>
>                         conf.setOutputKeyClass(Text.class);****
>
>                         conf.setOutputValueClass(Text.class);****
>
> ** **
>
>                         FileInputFormat.setInputPaths(conf, new
> Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));*
> ***
>
>                         FileOutputFormat.setOutputPath(conf, new
> Path("hdfs://localhost:54310/user/manish/gadataoutput11"));****
>
> ** **
>
>                         conf.setInputFormat(TextInputFormat.class);****
>
>                         conf.setOutputFormat(TextOutputFormat.class);****
>
>                         ****
>
>                         conf.setMapperClass(MapClass.class);****
>
>                         conf.setReducerClass(ReduceClass.class);****
>
>                         ****
>
>                         client.setConf(conf);****
>
>                         try {****
>
>                                     JobClient.runJob(conf);****
>
>                         } catch (Exception e) {****
>
>                                     e.printStackTrace();****
>
>                         }****
>
>             }****
>
> ** **
>
>             }****
>
> }****
>
> * *
>
> *Output:***
>
> * *
>
> * *
>
> *#city*                 * #pagepath*****
>
> "Aachen"                 "/medicalcollege/m-p-shah-medical-college"****
>
> "Abbottabad"  "/merit-calculator"****
>
> "Abbottabad"  "/merit-calculator"****
>
> "Abidjan"
> "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"****
>
> "Abidjan"
> "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
> ****
>
> ** **
>
> ** **
>
> *My question is:*****
>
> ** **
>
> I want to convert this output in below format::****
>
> ** **
>
> #city                        #pagepath****
>
> city1                        url1,url2,url3****
>
> city2                        url1,url2,url3****
>
> ** **
>
> Is it possible to convert it in this format using map and reduce ???****
>
> ** **
>
> If yes then how??****
>
> ** **
>
> -- ****
>
> MANISH DUNANI
> -THANX****
>
> ** **
>



-- 
MANISH DUNANI
-THANX
+91 9426881954,+91 8460656443
manishd207@gmail.com

Re: Want to Sort the values in one line using map reduce

Posted by manish dunani <ma...@gmail.com>.
     *"Thanks a lot Devraj!!!!!!!!"*


On Sat, Jul 27, 2013 at 10:25 AM, Devaraj k <de...@huawei.com> wrote:

>  You are almost done to get the desired output. You need to change little
> in the reduce function like this, ****
>
> ** **
>
> *public* *static* *class* ReduceClass *extends* *MapReduceBase* *
> implements*****
>
>       *Reducer*<Text, Text, Text, Text> {****
>
>     Text v = *new* Text();****
>
> ** **
>
>     *public* *void* reduce(Text key, *Iterator*<Text> values,****
>
>         *OutputCollector*<Text, Text> output, *Reporter* reporter)****
>
>         *throws* IOException {****
>
>       StringBuffer value = *new* StringBuffer();****
>
>       *while* (values.hasNext()){****
>
>         value.append(values.next().toString());****
>
>         value.append(",");****
>
>       }****
>
>       v.set(value.toString());****
>
>       output.collect(key, v);****
>
>     }****
>
>   }****
>
> In the above reduce function you can add logical condition to avoid extra
> ‘,’ at end of each value line.****
>
> ** **
>
> Thanks****
>
> Devaraj k****
>
> ** **
>
> *From:* manish dunani [mailto:manishd207@gmail.com]
> *Sent:* 27 July 2013 10:02
> *To:* user@hadoop.apache.org
> *Subject:* Want to Sort the values in one line using map reduce****
>
> ** **
>
> Hi,****
>
> ** **
>
> *I have input file and my data looks like:*****
>
> ** **
>
>    date ****
>
> country****
>
>  city****
>
> pagePath****
>
> visits****
>
> 20120301 ****
>
> India****
>
> Ahmedabad****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Ahmedabad****
>
> /gtuadmissionhelpline-team****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /merit-calculator****
>
> 1****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> * I wrote the map and reduce application to convert it into page_url by
> city:*****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> package data.ga;****
>
> ** **
>
> import java.io.IOException;****
>
> import java.util.Iterator;****
>
> import org.apache.hadoop.fs.Path;****
>
> import org.apache.hadoop.io.LongWritable;****
>
> import org.apache.hadoop.io.Text;****
>
> import org.apache.hadoop.mapred.FileInputFormat;****
>
> import org.apache.hadoop.mapred.FileOutputFormat;****
>
> import org.apache.hadoop.mapred.JobClient;****
>
> import org.apache.hadoop.mapred.JobConf;****
>
> import org.apache.hadoop.mapred.MapReduceBase;****
>
> import org.apache.hadoop.mapred.Mapper;****
>
> import org.apache.hadoop.mapred.OutputCollector;****
>
> import org.apache.hadoop.mapred.Reducer;****
>
> import org.apache.hadoop.mapred.Reporter;****
>
> import org.apache.hadoop.mapred.TextInputFormat;****
>
> import org.apache.hadoop.mapred.TextOutputFormat;****
>
> ** **
>
> ** **
>
> public class pharmecy ****
>
> {****
>
>             public static class MapClass extends MapReduceBase implements
> Mapper<LongWritable,Text,Text,Text>****
>
>             {****
>
>                         Text k = new Text();****
>
>                         Text v = new Text();****
>
>                         ****
>
>                         public void map(LongWritable key,Text
> value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
> ****
>
>                         {****
>
>                                     try****
>
>                                     {****
>
>                                     String[] line =
> value.toString().split(",",5);****
>
>                                     ****
>
>                                     String city = String.valueOf(line[2]);
> ****
>
>                                     String url = String.valueOf(line[3]);*
> ***
>
>                                     ****
>
>                                     k.set(city);****
>
>                                     v.set(url);****
>
>                                     ****
>
>                                     output.collect(k, v);****
>
>                                     }****
>
>                                     catch(Exception e)****
>
>                                     {****
>
>                                                 System.out.println(e);****
>
>                                     }****
>
>                                                                         **
> **
>
>                         }****
>
>             }****
>
>             ****
>
>             public static class ReduceClass extends MapReduceBase
> implements Reducer <Text,Text,Text,Text>****
>
>             {****
>
>                         Text v = new Text();****
>
>                         ****
>
>                         public void reduce(Text key,Iterator<Text>
> values,OutputCollector<Text,Text>output,Reporter reporter) throws
> IOException****
>
>                         {****
>
>                          ****
>
>                         ****
>
>                                     while(values.hasNext())****
>
>                                       ****
>
>                                     {****
>
>                                                 String
> val=values.next().toString();****
>
>                                                 ****
>
>                                                 v.set(val);****
>
>
> ****
>
>                                                 output.collect(key,v);****
>
>                                     ****
>
>                                     ****
>
>                                     }****
>
>                                     ****
>
>                                     ****
>
>                         }****
>
>             ****
>
>             ****
>
>             public static void main(String[] args) {****
>
>                         JobClient client = new JobClient();****
>
>                         JobConf conf = new JobConf(data.ga.pharmecy.class);
> ****
>
> ** **
>
>                         conf.setMapOutputKeyClass(Text.class);****
>
>                         conf.setMapOutputValueClass(Text.class);****
>
>                         // TODO: specify output types****
>
>                         conf.setOutputKeyClass(Text.class);****
>
>                         conf.setOutputValueClass(Text.class);****
>
> ** **
>
>                         FileInputFormat.setInputPaths(conf, new
> Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));*
> ***
>
>                         FileOutputFormat.setOutputPath(conf, new
> Path("hdfs://localhost:54310/user/manish/gadataoutput11"));****
>
> ** **
>
>                         conf.setInputFormat(TextInputFormat.class);****
>
>                         conf.setOutputFormat(TextOutputFormat.class);****
>
>                         ****
>
>                         conf.setMapperClass(MapClass.class);****
>
>                         conf.setReducerClass(ReduceClass.class);****
>
>                         ****
>
>                         client.setConf(conf);****
>
>                         try {****
>
>                                     JobClient.runJob(conf);****
>
>                         } catch (Exception e) {****
>
>                                     e.printStackTrace();****
>
>                         }****
>
>             }****
>
> ** **
>
>             }****
>
> }****
>
> * *
>
> *Output:***
>
> * *
>
> * *
>
> *#city*                 * #pagepath*****
>
> "Aachen"                 "/medicalcollege/m-p-shah-medical-college"****
>
> "Abbottabad"  "/merit-calculator"****
>
> "Abbottabad"  "/merit-calculator"****
>
> "Abidjan"
> "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"****
>
> "Abidjan"
> "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
> ****
>
> ** **
>
> ** **
>
> *My question is:*****
>
> ** **
>
> I want to convert this output in below format::****
>
> ** **
>
> #city                        #pagepath****
>
> city1                        url1,url2,url3****
>
> city2                        url1,url2,url3****
>
> ** **
>
> Is it possible to convert it in this format using map and reduce ???****
>
> ** **
>
> If yes then how??****
>
> ** **
>
> -- ****
>
> MANISH DUNANI
> -THANX****
>
> ** **
>



-- 
MANISH DUNANI
-THANX
+91 9426881954,+91 8460656443
manishd207@gmail.com

RE: Want to Sort the values in one line using map reduce

Posted by Devaraj k <de...@huawei.com>.
You are almost done to get the desired output. You need to change little in the reduce function like this,

public static class ReduceClass extends MapReduceBase implements
      Reducer<Text, Text, Text, Text> {
    Text v = new Text();

    public void reduce(Text key, Iterator<Text> values,
        OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      StringBuffer value = new StringBuffer();
      while (values.hasNext()){
        value.append(values.next().toString());
        value.append(",");
      }
      v.set(value.toString());
      output.collect(key, v);
    }
  }
In the above reduce function you can add logical condition to avoid extra ',' at end of each value line.

Thanks
Devaraj k

From: manish dunani [mailto:manishd207@gmail.com]
Sent: 27 July 2013 10:02
To: user@hadoop.apache.org
Subject: Want to Sort the values in one line using map reduce

Hi,

I have input file and my data looks like:

date

country

 city

pagePath

visits

20120301

India

Ahmedabad

/

1

20120302

India

Ahmedabad

/gtuadmissionhelpline-team

1

20120302

India

Mumbai

/

1

20120302

India

Mumbai

/merit-calculator

1





 I wrote the map and reduce application to convert it into page_url by city:




package data.ga<http://data.ga>;

import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;


public class pharmecy
{
            public static class MapClass extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>
            {
                        Text k = new Text();
                        Text v = new Text();

                        public void map(LongWritable key,Text value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
                        {
                                    try
                                    {
                                    String[] line = value.toString().split(",",5);

                                    String city = String.valueOf(line[2]);
                                    String url = String.valueOf(line[3]);

                                    k.set(city);
                                    v.set(url);

                                    output.collect(k, v);
                                    }
                                    catch(Exception e)
                                    {
                                                System.out.println(e);
                                    }

                        }
            }

            public static class ReduceClass extends MapReduceBase implements Reducer <Text,Text,Text,Text>
            {
                        Text v = new Text();

                        public void reduce(Text key,Iterator<Text> values,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
                        {


                                    while(values.hasNext())

                                    {
                                                String val=values.next().toString();

                                                v.set(val);

                                                output.collect(key,v);


                                    }


                        }


            public static void main(String[] args) {
                        JobClient client = new JobClient();
                        JobConf conf = new JobConf(data.ga.pharmecy.class);

                        conf.setMapOutputKeyClass(Text.class);
                        conf.setMapOutputValueClass(Text.class);
                        // TODO: specify output types
                        conf.setOutputKeyClass(Text.class);
                        conf.setOutputValueClass(Text.class);

                        FileInputFormat.setInputPaths(conf, new Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));
                        FileOutputFormat.setOutputPath(conf, new Path("hdfs://localhost:54310/user/manish/gadataoutput11"));

                        conf.setInputFormat(TextInputFormat.class);
                        conf.setOutputFormat(TextOutputFormat.class);

                        conf.setMapperClass(MapClass.class);
                        conf.setReducerClass(ReduceClass.class);

                        client.setConf(conf);
                        try {
                                    JobClient.runJob(conf);
                        } catch (Exception e) {
                                    e.printStackTrace();
                        }
            }

            }
}

Output:


#city                  #pagepath
"Aachen"                 "/medicalcollege/m-p-shah-medical-college"
"Abbottabad"  "/merit-calculator"
"Abbottabad"  "/merit-calculator"
"Abidjan"                "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"
"Abidjan"                "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"


My question is:

I want to convert this output in below format::

#city                        #pagepath
city1                        url1,url2,url3
city2                        url1,url2,url3

Is it possible to convert it in this format using map and reduce ???

If yes then how??

--
MANISH DUNANI
-THANX


RE: Want to Sort the values in one line using map reduce

Posted by Devaraj k <de...@huawei.com>.
You are almost done to get the desired output. You need to change little in the reduce function like this,

public static class ReduceClass extends MapReduceBase implements
      Reducer<Text, Text, Text, Text> {
    Text v = new Text();

    public void reduce(Text key, Iterator<Text> values,
        OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      StringBuffer value = new StringBuffer();
      while (values.hasNext()){
        value.append(values.next().toString());
        value.append(",");
      }
      v.set(value.toString());
      output.collect(key, v);
    }
  }
In the above reduce function you can add logical condition to avoid extra ',' at end of each value line.

Thanks
Devaraj k

From: manish dunani [mailto:manishd207@gmail.com]
Sent: 27 July 2013 10:02
To: user@hadoop.apache.org
Subject: Want to Sort the values in one line using map reduce

Hi,

I have input file and my data looks like:

date

country

 city

pagePath

visits

20120301

India

Ahmedabad

/

1

20120302

India

Ahmedabad

/gtuadmissionhelpline-team

1

20120302

India

Mumbai

/

1

20120302

India

Mumbai

/merit-calculator

1





 I wrote the map and reduce application to convert it into page_url by city:




package data.ga<http://data.ga>;

import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;


public class pharmecy
{
            public static class MapClass extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>
            {
                        Text k = new Text();
                        Text v = new Text();

                        public void map(LongWritable key,Text value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
                        {
                                    try
                                    {
                                    String[] line = value.toString().split(",",5);

                                    String city = String.valueOf(line[2]);
                                    String url = String.valueOf(line[3]);

                                    k.set(city);
                                    v.set(url);

                                    output.collect(k, v);
                                    }
                                    catch(Exception e)
                                    {
                                                System.out.println(e);
                                    }

                        }
            }

            public static class ReduceClass extends MapReduceBase implements Reducer <Text,Text,Text,Text>
            {
                        Text v = new Text();

                        public void reduce(Text key,Iterator<Text> values,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
                        {


                                    while(values.hasNext())

                                    {
                                                String val=values.next().toString();

                                                v.set(val);

                                                output.collect(key,v);


                                    }


                        }


            public static void main(String[] args) {
                        JobClient client = new JobClient();
                        JobConf conf = new JobConf(data.ga.pharmecy.class);

                        conf.setMapOutputKeyClass(Text.class);
                        conf.setMapOutputValueClass(Text.class);
                        // TODO: specify output types
                        conf.setOutputKeyClass(Text.class);
                        conf.setOutputValueClass(Text.class);

                        FileInputFormat.setInputPaths(conf, new Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));
                        FileOutputFormat.setOutputPath(conf, new Path("hdfs://localhost:54310/user/manish/gadataoutput11"));

                        conf.setInputFormat(TextInputFormat.class);
                        conf.setOutputFormat(TextOutputFormat.class);

                        conf.setMapperClass(MapClass.class);
                        conf.setReducerClass(ReduceClass.class);

                        client.setConf(conf);
                        try {
                                    JobClient.runJob(conf);
                        } catch (Exception e) {
                                    e.printStackTrace();
                        }
            }

            }
}

Output:


#city                  #pagepath
"Aachen"                 "/medicalcollege/m-p-shah-medical-college"
"Abbottabad"  "/merit-calculator"
"Abbottabad"  "/merit-calculator"
"Abidjan"                "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"
"Abidjan"                "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"


My question is:

I want to convert this output in below format::

#city                        #pagepath
city1                        url1,url2,url3
city2                        url1,url2,url3

Is it possible to convert it in this format using map and reduce ???

If yes then how??

--
MANISH DUNANI
-THANX


RE: Want to Sort the values in one line using map reduce

Posted by Devaraj k <de...@huawei.com>.
You are almost done to get the desired output. You need to change little in the reduce function like this,

public static class ReduceClass extends MapReduceBase implements
      Reducer<Text, Text, Text, Text> {
    Text v = new Text();

    public void reduce(Text key, Iterator<Text> values,
        OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      StringBuffer value = new StringBuffer();
      while (values.hasNext()){
        value.append(values.next().toString());
        value.append(",");
      }
      v.set(value.toString());
      output.collect(key, v);
    }
  }
In the above reduce function you can add logical condition to avoid extra ',' at end of each value line.

Thanks
Devaraj k

From: manish dunani [mailto:manishd207@gmail.com]
Sent: 27 July 2013 10:02
To: user@hadoop.apache.org
Subject: Want to Sort the values in one line using map reduce

Hi,

I have input file and my data looks like:

date

country

 city

pagePath

visits

20120301

India

Ahmedabad

/

1

20120302

India

Ahmedabad

/gtuadmissionhelpline-team

1

20120302

India

Mumbai

/

1

20120302

India

Mumbai

/merit-calculator

1





 I wrote the map and reduce application to convert it into page_url by city:




package data.ga<http://data.ga>;

import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;


public class pharmecy
{
            public static class MapClass extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>
            {
                        Text k = new Text();
                        Text v = new Text();

                        public void map(LongWritable key,Text value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
                        {
                                    try
                                    {
                                    String[] line = value.toString().split(",",5);

                                    String city = String.valueOf(line[2]);
                                    String url = String.valueOf(line[3]);

                                    k.set(city);
                                    v.set(url);

                                    output.collect(k, v);
                                    }
                                    catch(Exception e)
                                    {
                                                System.out.println(e);
                                    }

                        }
            }

            public static class ReduceClass extends MapReduceBase implements Reducer <Text,Text,Text,Text>
            {
                        Text v = new Text();

                        public void reduce(Text key,Iterator<Text> values,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
                        {


                                    while(values.hasNext())

                                    {
                                                String val=values.next().toString();

                                                v.set(val);

                                                output.collect(key,v);


                                    }


                        }


            public static void main(String[] args) {
                        JobClient client = new JobClient();
                        JobConf conf = new JobConf(data.ga.pharmecy.class);

                        conf.setMapOutputKeyClass(Text.class);
                        conf.setMapOutputValueClass(Text.class);
                        // TODO: specify output types
                        conf.setOutputKeyClass(Text.class);
                        conf.setOutputValueClass(Text.class);

                        FileInputFormat.setInputPaths(conf, new Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));
                        FileOutputFormat.setOutputPath(conf, new Path("hdfs://localhost:54310/user/manish/gadataoutput11"));

                        conf.setInputFormat(TextInputFormat.class);
                        conf.setOutputFormat(TextOutputFormat.class);

                        conf.setMapperClass(MapClass.class);
                        conf.setReducerClass(ReduceClass.class);

                        client.setConf(conf);
                        try {
                                    JobClient.runJob(conf);
                        } catch (Exception e) {
                                    e.printStackTrace();
                        }
            }

            }
}

Output:


#city                  #pagepath
"Aachen"                 "/medicalcollege/m-p-shah-medical-college"
"Abbottabad"  "/merit-calculator"
"Abbottabad"  "/merit-calculator"
"Abidjan"                "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"
"Abidjan"                "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"


My question is:

I want to convert this output in below format::

#city                        #pagepath
city1                        url1,url2,url3
city2                        url1,url2,url3

Is it possible to convert it in this format using map and reduce ???

If yes then how??

--
MANISH DUNANI
-THANX


RE: Want to Sort the values in one line using map reduce

Posted by Devaraj k <de...@huawei.com>.
You are almost done to get the desired output. You need to change little in the reduce function like this,

public static class ReduceClass extends MapReduceBase implements
      Reducer<Text, Text, Text, Text> {
    Text v = new Text();

    public void reduce(Text key, Iterator<Text> values,
        OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      StringBuffer value = new StringBuffer();
      while (values.hasNext()){
        value.append(values.next().toString());
        value.append(",");
      }
      v.set(value.toString());
      output.collect(key, v);
    }
  }
In the above reduce function you can add logical condition to avoid extra ',' at end of each value line.

Thanks
Devaraj k

From: manish dunani [mailto:manishd207@gmail.com]
Sent: 27 July 2013 10:02
To: user@hadoop.apache.org
Subject: Want to Sort the values in one line using map reduce

Hi,

I have input file and my data looks like:

date

country

 city

pagePath

visits

20120301

India

Ahmedabad

/

1

20120302

India

Ahmedabad

/gtuadmissionhelpline-team

1

20120302

India

Mumbai

/

1

20120302

India

Mumbai

/merit-calculator

1





 I wrote the map and reduce application to convert it into page_url by city:




package data.ga<http://data.ga>;

import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;


public class pharmecy
{
            public static class MapClass extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>
            {
                        Text k = new Text();
                        Text v = new Text();

                        public void map(LongWritable key,Text value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
                        {
                                    try
                                    {
                                    String[] line = value.toString().split(",",5);

                                    String city = String.valueOf(line[2]);
                                    String url = String.valueOf(line[3]);

                                    k.set(city);
                                    v.set(url);

                                    output.collect(k, v);
                                    }
                                    catch(Exception e)
                                    {
                                                System.out.println(e);
                                    }

                        }
            }

            public static class ReduceClass extends MapReduceBase implements Reducer <Text,Text,Text,Text>
            {
                        Text v = new Text();

                        public void reduce(Text key,Iterator<Text> values,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
                        {


                                    while(values.hasNext())

                                    {
                                                String val=values.next().toString();

                                                v.set(val);

                                                output.collect(key,v);


                                    }


                        }


            public static void main(String[] args) {
                        JobClient client = new JobClient();
                        JobConf conf = new JobConf(data.ga.pharmecy.class);

                        conf.setMapOutputKeyClass(Text.class);
                        conf.setMapOutputValueClass(Text.class);
                        // TODO: specify output types
                        conf.setOutputKeyClass(Text.class);
                        conf.setOutputValueClass(Text.class);

                        FileInputFormat.setInputPaths(conf, new Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));
                        FileOutputFormat.setOutputPath(conf, new Path("hdfs://localhost:54310/user/manish/gadataoutput11"));

                        conf.setInputFormat(TextInputFormat.class);
                        conf.setOutputFormat(TextOutputFormat.class);

                        conf.setMapperClass(MapClass.class);
                        conf.setReducerClass(ReduceClass.class);

                        client.setConf(conf);
                        try {
                                    JobClient.runJob(conf);
                        } catch (Exception e) {
                                    e.printStackTrace();
                        }
            }

            }
}

Output:


#city                  #pagepath
"Aachen"                 "/medicalcollege/m-p-shah-medical-college"
"Abbottabad"  "/merit-calculator"
"Abbottabad"  "/merit-calculator"
"Abidjan"                "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"
"Abidjan"                "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"


My question is:

I want to convert this output in below format::

#city                        #pagepath
city1                        url1,url2,url3
city2                        url1,url2,url3

Is it possible to convert it in this format using map and reduce ???

If yes then how??

--
MANISH DUNANI
-THANX