You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@nutch.apache.org by Apache Wiki <wi...@apache.org> on 2015/10/03 02:41:02 UTC

[Nutch Wiki] Trivial Update of "NutchFileFormats" by LewisJohnMcgibbney

Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Nutch Wiki" for change notification.

The "NutchFileFormats" page has been changed by LewisJohnMcgibbney:
https://wiki.apache.org/nutch/NutchFileFormats?action=diff&rev1=8&rev2=9

  
  == File Formats ==
  
- {{{#!CSV ,
+ 
- file,key datatype,value datatype,codec
+ ||file||key datatype||value datatype||codec||
- data,org.apache.hadoop.io.Text,org.apache.nutch.crawl.CrawlDatum,
+ ||data||org.apache.hadoop.io.Text||org.apache.nutch.crawl.CrawlDatum||org.apache.hadoop.io.compress.DefaultCodec||
- index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec
+ ||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec||
- }}}
+ 
  
  = LinkDB =
  
@@ -110, +110 @@

  
  == File Formats ==
  
- {{{#!CSV ,
- file,key datatype,value datatype,codec
+ ||file||key datatype||value datatype||codec||
- data,org.apache.hadoop.io.Text,org.apache.nutch.crawl.Inlinks,org.apache.hadoop.io.compress.DefaultCodec
+ ||data||org.apache.hadoop.io.Text||org.apache.nutch.crawl.Inlinks||org.apache.hadoop.io.compress.DefaultCodec||
- index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec
+ ||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec||
- }}}
  
  = Segments =
  
@@ -155, +153 @@

  
  == Description ==
  
- {{{#!CSV ,
- Subdirectory,file,key datatype,value datatype,codec
- content,data,org.apache.hadoop.io.Text,org.apache.nutch.protocol.Content,org.apache.hadoop.io.compress.DefaultCodec
+ ||content||data||org.apache.hadoop.io.Text||org.apache.nutch.protocol.Content||org.apache.hadoop.io.compress.DefaultCodec||
- content,index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec
+ ||content||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec||
- crawl_fetch,data,org.apache.hadoop.io.Text,org.apache.nutch.crawl.CrawlDatum,org.apache.hadoop.io.compress.DefaultCodec
+ ||crawl_fetch||data||org.apache.hadoop.io.Text||org.apache.nutch.crawl.CrawlDatum||org.apache.hadoop.io.compress.DefaultCodec||
- crawl_fetch,index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec
+ ||crawl_fetch||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec||
- crawl_generate,part-0000,org.apache.hadoop.io.Text,org.apache.nutch.crawl.CrawlDatum,org.apache.hadoop.io.compress.DefaultCodec
+ ||crawl_generate||part-0000||org.apache.hadoop.io.Text||org.apache.nutch.crawl.CrawlDatum||org.apache.hadoop.io.compress.DefaultCodec||
- crawl_parse,data,org.apache.hadoop.io.Text,org.apache.nutch.crawl.CrawlDatum,org.apache.hadoop.io.compress.DefaultCodec
+ ||crawl_parse||data||org.apache.hadoop.io.Text||org.apache.nutch.crawl.CrawlDatum||org.apache.hadoop.io.compress.DefaultCodec||
- crawl_parse,index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec
+ ||crawl_parse||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec||
- parse_data,data,org.apache.hadoop.io.Text,org.apache.nutch.parse.ParseData,org.apache.hadoop.io.compress.DefaultCodec
+ ||parse_data||data||org.apache.hadoop.io.Text||org.apache.nutch.parse.ParseData||org.apache.hadoop.io.compress.DefaultCodec||
- parse_data,index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec
+ ||parse_data||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec||
- parse_text,data,org.apache.hadoop.io.Text,org.apache.nutch.parse.ParseText,org.apache.hadoop.io.compress.DefaultCodec
+ ||parse_text||data||org.apache.hadoop.io.Text||org.apache.nutch.parse.ParseText||org.apache.hadoop.io.compress.DefaultCodec||
- parse_text,index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec
+ ||parse_text||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec||
- }}}
  
  = Old File Format Documentation =