You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2015/01/09 04:53:39 UTC

svn commit: r1650437 [3/3] - in /nutch/branches/2.x: ./ ivy/ src/gora/ src/java/org/apache/nutch/storage/

Modified: nutch/branches/2.x/src/java/org/apache/nutch/storage/WebPage.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/storage/WebPage.java?rev=1650437&r1=1650436&r2=1650437&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/storage/WebPage.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/storage/WebPage.java Fri Jan  9 03:53:39 2015
@@ -1,19 +1,19 @@
 /*******************************************************************************
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+******************************************************************************/
 /**
  * Autogenerated by Avro
  * 
@@ -21,36 +21,36 @@
  */
 package org.apache.nutch.storage;  
 @SuppressWarnings("all")
+/** WebPage is the primary data structure in Nutch representing crawl data for a given WebPage at some point in time */
 public class WebPage extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
-  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"__g__dirty\",\"type\":\"bytes\",\"doc\":\"Bytes used to represent weather or not a field is dirty.\",\"default\":\"AAAAAA==\"},{\"name\":\"baseUrl\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"status\",\"type\":\"int\",\"default\":0},{\"name\":\"fetchTime\",\"type\":\"long\",\"default\":0},{\"name\":\"prevFetchTime\",\"type\":\"long\",\"default\":0},{\"name\":\"fetchInterval\",\"type\":\"int\",\"default\":0},{\"name\":\"retriesSinceFetch\",\"type\":\"int\",\"default\":0},{\"name\":\"modifiedTime\",\"type\":\"long\",\"default\":0},{\"name\":\"prevModifiedTime\",\"type\":\"long\",\"default\":0},{\"name\":\"protocolStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"fields\":[{\"name\":\"__g__dirty\",\"type\":\"bytes\",\"doc\":\"Bytes used
  to represent weather or not a field is dirty.\",\"default\":\"AA==\"},{\"name\":\"code\",\"type\":\"int\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"default\":0}]}],\"default\":null},{\"name\":\"content\",\"type\":[\"null\",\"bytes\"],\"default\":null},{\"name\":\"contentType\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"prevSignature\",\"type\":[\"null\",\"bytes\"],\"default\":null},{\"name\":\"signature\",\"type\":[\"null\",\"bytes\"],\"default\":null},{\"name\":\"title\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"text\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"parseStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ParseStatus\",\"fields\":[{\"name\":\"__g__dirty\",\"type\":\"bytes\",\"doc\":\"Bytes used to represent weather or not a field is dirty.\",\"default\":\"AA==\"},{\"name\":\"majorCode\",\"type\":\"int\"
 ,\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"default\":[]}]}],\"default\":null},{\"name\":\"score\",\"type\":\"float\",\"default\":0},{\"name\":\"reprUrl\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"default\":{}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"default\":{}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"default\":{}},{\"name\":\"batchId\",\"type\":[\"null\",\"string\"],\"default\":null}]}");
+  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"WebPage is the primary data structure in Nutch representing crawl data for a given WebPage at some point in time\",\"fields\":[{\"name\":\"baseUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"The original associated with this WebPage.\",\"default\":null},{\"name\":\"status\",\"type\":\"int\",\"doc\":\"A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified\",\"default\":0},{\"name\"
 :\"fetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was fetched.\",\"default\":0},{\"name\":\"prevFetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation\",\"default\":0},{\"name\":\"fetchInterval\",\"type\":\"int\",\"doc\":\"The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.\",\"default\":0},{\"name\":\"retriesSinceFetch\",\"type\":\"int\",\"doc\":\"The number of retried attempts at fetching the WebPage since it was last successfully fetched.\",\"default\":0},{\"name\":\"modifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important 
 to understand the changing nature of the WebPage.\",\"default\":0},{\"name\":\"prevModifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.\",\"default\":0},{\"name\":\"protocolStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resour
 ce has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating w
 hen this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}],\"default\":null},{\"name\":\"content\",\"type\":[\"null\",\"bytes\"],\"doc\":\"The entire raw document content e.g. raw XHTML\",\"default\":null},{\"name\":\"contentType\",\"type\":[\"null\",\"string\"],\"doc\":\"The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.\",\"default\":null},{\"name\":\"prevSignature\",\"type\":[\"null\",\
 "bytes\"],\"doc\":\"An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.\",\"default\":null},{\"name\":\"signature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.\",\"default\":null},{\"name\":\"title\",\"type\":[\"null\",\"string\"],\"doc\":\"The title of the WebPage.\",\"default\":null},{\"name\":\"text\",\"type\":[\"null\",\"string\"],\"doc\":\"The textual content of the WebPage devoid from native markup.\",\"default\":null},{\"name\":\"parseStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ParseStatus\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields
 \":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be
  provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}],\"default\":null},{\"name\":\"score\",\"type\":\"float\",\"doc\":\"A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.\",\"default\":0},{\"name\":\"reprUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler\",\"default\":null},{
 \"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which direct outside of the current domain.\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which link to pages within the current domain.\",\"default\":{}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system 
 specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.\",\"default\":{}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.\",\"default\":{}},{\"name\":\"batchId\",\"type\":[\"null\",\"string\"],\"doc\":\"A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.\",\"default\":null}]}");
 
   /** Enum containing all data bean's fields. */
   public static enum Field {
-    __G__DIRTY(0, "__g__dirty"),
-    BASE_URL(1, "baseUrl"),
-    STATUS(2, "status"),
-    FETCH_TIME(3, "fetchTime"),
-    PREV_FETCH_TIME(4, "prevFetchTime"),
-    FETCH_INTERVAL(5, "fetchInterval"),
-    RETRIES_SINCE_FETCH(6, "retriesSinceFetch"),
-    MODIFIED_TIME(7, "modifiedTime"),
-    PREV_MODIFIED_TIME(8, "prevModifiedTime"),
-    PROTOCOL_STATUS(9, "protocolStatus"),
-    CONTENT(10, "content"),
-    CONTENT_TYPE(11, "contentType"),
-    PREV_SIGNATURE(12, "prevSignature"),
-    SIGNATURE(13, "signature"),
-    TITLE(14, "title"),
-    TEXT(15, "text"),
-    PARSE_STATUS(16, "parseStatus"),
-    SCORE(17, "score"),
-    REPR_URL(18, "reprUrl"),
-    HEADERS(19, "headers"),
-    OUTLINKS(20, "outlinks"),
-    INLINKS(21, "inlinks"),
-    MARKERS(22, "markers"),
-    METADATA(23, "metadata"),
-    BATCH_ID(24, "batchId"),
+    BASE_URL(0, "baseUrl"),
+    STATUS(1, "status"),
+    FETCH_TIME(2, "fetchTime"),
+    PREV_FETCH_TIME(3, "prevFetchTime"),
+    FETCH_INTERVAL(4, "fetchInterval"),
+    RETRIES_SINCE_FETCH(5, "retriesSinceFetch"),
+    MODIFIED_TIME(6, "modifiedTime"),
+    PREV_MODIFIED_TIME(7, "prevModifiedTime"),
+    PROTOCOL_STATUS(8, "protocolStatus"),
+    CONTENT(9, "content"),
+    CONTENT_TYPE(10, "contentType"),
+    PREV_SIGNATURE(11, "prevSignature"),
+    SIGNATURE(12, "signature"),
+    TITLE(13, "title"),
+    TEXT(14, "text"),
+    PARSE_STATUS(15, "parseStatus"),
+    SCORE(16, "score"),
+    REPR_URL(17, "reprUrl"),
+    HEADERS(18, "headers"),
+    OUTLINKS(19, "outlinks"),
+    INLINKS(20, "inlinks"),
+    MARKERS(21, "markers"),
+    METADATA(22, "metadata"),
+    BATCH_ID(23, "batchId"),
     ;
     /**
      * Field's index.
@@ -89,7 +89,6 @@ public class WebPage extends org.apache.
   };
 
   public static final String[] _ALL_FIELDS = {
-  "__g__dirty",
   "baseUrl",
   "status",
   "fetchTime",
@@ -124,295 +123,312 @@ public class WebPage extends org.apache.
     return WebPage._ALL_FIELDS.length;
   }
 
-  
-  /** Bytes used to represent weather or not a field is dirty. */
-  private java.nio.ByteBuffer __g__dirty = java.nio.ByteBuffer.wrap(new byte[4]);
-  private CharSequence baseUrl;
+  /** The original associated with this WebPage. */
+  private java.lang.CharSequence baseUrl;
+  /** A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified */
   private int status;
+  /** The system time in milliseconds for when the page was fetched. */
   private long fetchTime;
+  /** The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation */
   private long prevFetchTime;
+  /** The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented. */
   private int fetchInterval;
+  /** The number of retried attempts at fetching the WebPage since it was last successfully fetched. */
   private int retriesSinceFetch;
+  /** The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage. */
   private long modifiedTime;
+  /** The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage. */
   private long prevModifiedTime;
-  private ProtocolStatus protocolStatus;
+  private org.apache.nutch.storage.ProtocolStatus protocolStatus;
+  /** The entire raw document content e.g. raw XHTML */
   private java.nio.ByteBuffer content;
-  private CharSequence contentType;
+  /** The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used. */
+  private java.lang.CharSequence contentType;
+  /** An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints. */
   private java.nio.ByteBuffer prevSignature;
+  /** An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time. */
   private java.nio.ByteBuffer signature;
-  private CharSequence title;
-  private CharSequence text;
-  private ParseStatus parseStatus;
+  /** The title of the WebPage. */
+  private java.lang.CharSequence title;
+  /** The textual content of the WebPage devoid from native markup. */
+  private java.lang.CharSequence text;
+  private org.apache.nutch.storage.ParseStatus parseStatus;
+  /** A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics. */
   private float score;
-  private CharSequence reprUrl;
-  private java.util.Map<CharSequence,CharSequence> headers;
-  private java.util.Map<CharSequence,CharSequence> outlinks;
-  private java.util.Map<CharSequence,CharSequence> inlinks;
-  private java.util.Map<CharSequence,CharSequence> markers;
-  private java.util.Map<CharSequence,java.nio.ByteBuffer> metadata;
-  private CharSequence batchId;
+  /** In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler */
+  private java.lang.CharSequence reprUrl;
+  /** Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. */
+  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> headers;
+  /** Embedded hyperlinks which direct outside of the current domain. */
+  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> outlinks;
+  /** Embedded hyperlinks which link to pages within the current domain. */
+  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> inlinks;
+  /** Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage. */
+  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> markers;
+  /** A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage. */
+  private java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> metadata;
+  /** A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId. */
+  private java.lang.CharSequence batchId;
   public org.apache.avro.Schema getSchema() { return SCHEMA$; }
   // Used by DatumWriter.  Applications should not call. 
-  public Object get(int field$) {
+  public java.lang.Object get(int field$) {
     switch (field$) {
-    case 0: return __g__dirty;
-    case 1: return baseUrl;
-    case 2: return status;
-    case 3: return fetchTime;
-    case 4: return prevFetchTime;
-    case 5: return fetchInterval;
-    case 6: return retriesSinceFetch;
-    case 7: return modifiedTime;
-    case 8: return prevModifiedTime;
-    case 9: return protocolStatus;
-    case 10: return content;
-    case 11: return contentType;
-    case 12: return prevSignature;
-    case 13: return signature;
-    case 14: return title;
-    case 15: return text;
-    case 16: return parseStatus;
-    case 17: return score;
-    case 18: return reprUrl;
-    case 19: return headers;
-    case 20: return outlinks;
-    case 21: return inlinks;
-    case 22: return markers;
-    case 23: return metadata;
-    case 24: return batchId;
+    case 0: return baseUrl;
+    case 1: return status;
+    case 2: return fetchTime;
+    case 3: return prevFetchTime;
+    case 4: return fetchInterval;
+    case 5: return retriesSinceFetch;
+    case 6: return modifiedTime;
+    case 7: return prevModifiedTime;
+    case 8: return protocolStatus;
+    case 9: return content;
+    case 10: return contentType;
+    case 11: return prevSignature;
+    case 12: return signature;
+    case 13: return title;
+    case 14: return text;
+    case 15: return parseStatus;
+    case 16: return score;
+    case 17: return reprUrl;
+    case 18: return headers;
+    case 19: return outlinks;
+    case 20: return inlinks;
+    case 21: return markers;
+    case 22: return metadata;
+    case 23: return batchId;
     default: throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
   
   // Used by DatumReader.  Applications should not call. 
   @SuppressWarnings(value="unchecked")
-  public void put(int field$, Object value) {
+  public void put(int field$, java.lang.Object value) {
     switch (field$) {
-    case 0: __g__dirty = (java.nio.ByteBuffer)(value); break;
-    case 1: baseUrl = (CharSequence)(value); break;
-    case 2: status = (Integer)(value); break;
-    case 3: fetchTime = (Long)(value); break;
-    case 4: prevFetchTime = (Long)(value); break;
-    case 5: fetchInterval = (Integer)(value); break;
-    case 6: retriesSinceFetch = (Integer)(value); break;
-    case 7: modifiedTime = (Long)(value); break;
-    case 8: prevModifiedTime = (Long)(value); break;
-    case 9: protocolStatus = (ProtocolStatus)(value); break;
-    case 10: content = (java.nio.ByteBuffer)(value); break;
-    case 11: contentType = (CharSequence)(value); break;
-    case 12: prevSignature = (java.nio.ByteBuffer)(value); break;
-    case 13: signature = (java.nio.ByteBuffer)(value); break;
-    case 14: title = (CharSequence)(value); break;
-    case 15: text = (CharSequence)(value); break;
-    case 16: parseStatus = (ParseStatus)(value); break;
-    case 17: score = (Float)(value); break;
-    case 18: reprUrl = (CharSequence)(value); break;
-    case 19: headers = (java.util.Map<CharSequence,CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 20: outlinks = (java.util.Map<CharSequence,CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 21: inlinks = (java.util.Map<CharSequence,CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 22: markers = (java.util.Map<CharSequence,CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 23: metadata = (java.util.Map<CharSequence,java.nio.ByteBuffer>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 24: batchId = (CharSequence)(value); break;
+    case 0: baseUrl = (java.lang.CharSequence)(value); break;
+    case 1: status = (java.lang.Integer)(value); break;
+    case 2: fetchTime = (java.lang.Long)(value); break;
+    case 3: prevFetchTime = (java.lang.Long)(value); break;
+    case 4: fetchInterval = (java.lang.Integer)(value); break;
+    case 5: retriesSinceFetch = (java.lang.Integer)(value); break;
+    case 6: modifiedTime = (java.lang.Long)(value); break;
+    case 7: prevModifiedTime = (java.lang.Long)(value); break;
+    case 8: protocolStatus = (org.apache.nutch.storage.ProtocolStatus)(value); break;
+    case 9: content = (java.nio.ByteBuffer)(value); break;
+    case 10: contentType = (java.lang.CharSequence)(value); break;
+    case 11: prevSignature = (java.nio.ByteBuffer)(value); break;
+    case 12: signature = (java.nio.ByteBuffer)(value); break;
+    case 13: title = (java.lang.CharSequence)(value); break;
+    case 14: text = (java.lang.CharSequence)(value); break;
+    case 15: parseStatus = (org.apache.nutch.storage.ParseStatus)(value); break;
+    case 16: score = (java.lang.Float)(value); break;
+    case 17: reprUrl = (java.lang.CharSequence)(value); break;
+    case 18: headers = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+    case 19: outlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+    case 20: inlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+    case 21: markers = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+    case 22: metadata = (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+    case 23: batchId = (java.lang.CharSequence)(value); break;
     default: throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
 
   /**
    * Gets the value of the 'baseUrl' field.
-   */
-  public CharSequence getBaseUrl() {
+   * The original associated with this WebPage.   */
+  public java.lang.CharSequence getBaseUrl() {
     return baseUrl;
   }
 
   /**
    * Sets the value of the 'baseUrl' field.
-   * @param value the value to set.
+   * The original associated with this WebPage.   * @param value the value to set.
    */
-  public void setBaseUrl(CharSequence value) {
+  public void setBaseUrl(java.lang.CharSequence value) {
     this.baseUrl = value;
-    setDirty(1);
+    setDirty(0);
   }
   
   /**
    * Checks the dirty status of the 'baseUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * The original associated with this WebPage.   * @param value the value to set.
    */
-  public boolean isBaseUrlDirty(CharSequence value) {
-    return isDirty(1);
+  public boolean isBaseUrlDirty(java.lang.CharSequence value) {
+    return isDirty(0);
   }
 
   /**
    * Gets the value of the 'status' field.
-   */
-  public Integer getStatus() {
+   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified   */
+  public java.lang.Integer getStatus() {
     return status;
   }
 
   /**
    * Sets the value of the 'status' field.
-   * @param value the value to set.
+   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified   * @param value the value to set.
    */
-  public void setStatus(Integer value) {
+  public void setStatus(java.lang.Integer value) {
     this.status = value;
-    setDirty(2);
+    setDirty(1);
   }
   
   /**
    * Checks the dirty status of the 'status' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified   * @param value the value to set.
    */
-  public boolean isStatusDirty(Integer value) {
-    return isDirty(2);
+  public boolean isStatusDirty(java.lang.Integer value) {
+    return isDirty(1);
   }
 
   /**
    * Gets the value of the 'fetchTime' field.
-   */
-  public Long getFetchTime() {
+   * The system time in milliseconds for when the page was fetched.   */
+  public java.lang.Long getFetchTime() {
     return fetchTime;
   }
 
   /**
    * Sets the value of the 'fetchTime' field.
-   * @param value the value to set.
+   * The system time in milliseconds for when the page was fetched.   * @param value the value to set.
    */
-  public void setFetchTime(Long value) {
+  public void setFetchTime(java.lang.Long value) {
     this.fetchTime = value;
-    setDirty(3);
+    setDirty(2);
   }
   
   /**
    * Checks the dirty status of the 'fetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * The system time in milliseconds for when the page was fetched.   * @param value the value to set.
    */
-  public boolean isFetchTimeDirty(Long value) {
-    return isDirty(3);
+  public boolean isFetchTimeDirty(java.lang.Long value) {
+    return isDirty(2);
   }
 
   /**
    * Gets the value of the 'prevFetchTime' field.
-   */
-  public Long getPrevFetchTime() {
+   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation   */
+  public java.lang.Long getPrevFetchTime() {
     return prevFetchTime;
   }
 
   /**
    * Sets the value of the 'prevFetchTime' field.
-   * @param value the value to set.
+   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation   * @param value the value to set.
    */
-  public void setPrevFetchTime(Long value) {
+  public void setPrevFetchTime(java.lang.Long value) {
     this.prevFetchTime = value;
-    setDirty(4);
+    setDirty(3);
   }
   
   /**
    * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation   * @param value the value to set.
    */
-  public boolean isPrevFetchTimeDirty(Long value) {
-    return isDirty(4);
+  public boolean isPrevFetchTimeDirty(java.lang.Long value) {
+    return isDirty(3);
   }
 
   /**
    * Gets the value of the 'fetchInterval' field.
-   */
-  public Integer getFetchInterval() {
+   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.   */
+  public java.lang.Integer getFetchInterval() {
     return fetchInterval;
   }
 
   /**
    * Sets the value of the 'fetchInterval' field.
-   * @param value the value to set.
+   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.   * @param value the value to set.
    */
-  public void setFetchInterval(Integer value) {
+  public void setFetchInterval(java.lang.Integer value) {
     this.fetchInterval = value;
-    setDirty(5);
+    setDirty(4);
   }
   
   /**
    * Checks the dirty status of the 'fetchInterval' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.   * @param value the value to set.
    */
-  public boolean isFetchIntervalDirty(Integer value) {
-    return isDirty(5);
+  public boolean isFetchIntervalDirty(java.lang.Integer value) {
+    return isDirty(4);
   }
 
   /**
    * Gets the value of the 'retriesSinceFetch' field.
-   */
-  public Integer getRetriesSinceFetch() {
+   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.   */
+  public java.lang.Integer getRetriesSinceFetch() {
     return retriesSinceFetch;
   }
 
   /**
    * Sets the value of the 'retriesSinceFetch' field.
-   * @param value the value to set.
+   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.   * @param value the value to set.
    */
-  public void setRetriesSinceFetch(Integer value) {
+  public void setRetriesSinceFetch(java.lang.Integer value) {
     this.retriesSinceFetch = value;
-    setDirty(6);
+    setDirty(5);
   }
   
   /**
    * Checks the dirty status of the 'retriesSinceFetch' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * The number of retried attempts at fetching the WebPage since it was last successfully fetched.   * @param value the value to set.
    */
-  public boolean isRetriesSinceFetchDirty(Integer value) {
-    return isDirty(6);
+  public boolean isRetriesSinceFetchDirty(java.lang.Integer value) {
+    return isDirty(5);
   }
 
   /**
    * Gets the value of the 'modifiedTime' field.
-   */
-  public Long getModifiedTime() {
+   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.   */
+  public java.lang.Long getModifiedTime() {
     return modifiedTime;
   }
 
   /**
    * Sets the value of the 'modifiedTime' field.
-   * @param value the value to set.
+   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.   * @param value the value to set.
    */
-  public void setModifiedTime(Long value) {
+  public void setModifiedTime(java.lang.Long value) {
     this.modifiedTime = value;
-    setDirty(7);
+    setDirty(6);
   }
   
   /**
    * Checks the dirty status of the 'modifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.   * @param value the value to set.
    */
-  public boolean isModifiedTimeDirty(Long value) {
-    return isDirty(7);
+  public boolean isModifiedTimeDirty(java.lang.Long value) {
+    return isDirty(6);
   }
 
   /**
    * Gets the value of the 'prevModifiedTime' field.
-   */
-  public Long getPrevModifiedTime() {
+   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.   */
+  public java.lang.Long getPrevModifiedTime() {
     return prevModifiedTime;
   }
 
   /**
    * Sets the value of the 'prevModifiedTime' field.
-   * @param value the value to set.
+   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.   * @param value the value to set.
    */
-  public void setPrevModifiedTime(Long value) {
+  public void setPrevModifiedTime(java.lang.Long value) {
     this.prevModifiedTime = value;
-    setDirty(8);
+    setDirty(7);
   }
   
   /**
    * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.   * @param value the value to set.
    */
-  public boolean isPrevModifiedTimeDirty(Long value) {
-    return isDirty(8);
+  public boolean isPrevModifiedTimeDirty(java.lang.Long value) {
+    return isDirty(7);
   }
 
   /**
    * Gets the value of the 'protocolStatus' field.
    */
-  public ProtocolStatus getProtocolStatus() {
+  public org.apache.nutch.storage.ProtocolStatus getProtocolStatus() {
     return protocolStatus;
   }
 
@@ -420,167 +436,167 @@ public class WebPage extends org.apache.
    * Sets the value of the 'protocolStatus' field.
    * @param value the value to set.
    */
-  public void setProtocolStatus(ProtocolStatus value) {
+  public void setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
     this.protocolStatus = value;
-    setDirty(9);
+    setDirty(8);
   }
   
   /**
    * Checks the dirty status of the 'protocolStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
    * @param value the value to set.
    */
-  public boolean isProtocolStatusDirty(ProtocolStatus value) {
-    return isDirty(9);
+  public boolean isProtocolStatusDirty(org.apache.nutch.storage.ProtocolStatus value) {
+    return isDirty(8);
   }
 
   /**
    * Gets the value of the 'content' field.
-   */
+   * The entire raw document content e.g. raw XHTML   */
   public java.nio.ByteBuffer getContent() {
     return content;
   }
 
   /**
    * Sets the value of the 'content' field.
-   * @param value the value to set.
+   * The entire raw document content e.g. raw XHTML   * @param value the value to set.
    */
   public void setContent(java.nio.ByteBuffer value) {
     this.content = value;
-    setDirty(10);
+    setDirty(9);
   }
   
   /**
    * Checks the dirty status of the 'content' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * The entire raw document content e.g. raw XHTML   * @param value the value to set.
    */
   public boolean isContentDirty(java.nio.ByteBuffer value) {
-    return isDirty(10);
+    return isDirty(9);
   }
 
   /**
    * Gets the value of the 'contentType' field.
-   */
-  public CharSequence getContentType() {
+   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.   */
+  public java.lang.CharSequence getContentType() {
     return contentType;
   }
 
   /**
    * Sets the value of the 'contentType' field.
-   * @param value the value to set.
+   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.   * @param value the value to set.
    */
-  public void setContentType(CharSequence value) {
+  public void setContentType(java.lang.CharSequence value) {
     this.contentType = value;
-    setDirty(11);
+    setDirty(10);
   }
   
   /**
    * Checks the dirty status of the 'contentType' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.   * @param value the value to set.
    */
-  public boolean isContentTypeDirty(CharSequence value) {
-    return isDirty(11);
+  public boolean isContentTypeDirty(java.lang.CharSequence value) {
+    return isDirty(10);
   }
 
   /**
    * Gets the value of the 'prevSignature' field.
-   */
+   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.   */
   public java.nio.ByteBuffer getPrevSignature() {
     return prevSignature;
   }
 
   /**
    * Sets the value of the 'prevSignature' field.
-   * @param value the value to set.
+   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.   * @param value the value to set.
    */
   public void setPrevSignature(java.nio.ByteBuffer value) {
     this.prevSignature = value;
-    setDirty(12);
+    setDirty(11);
   }
   
   /**
    * Checks the dirty status of the 'prevSignature' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.   * @param value the value to set.
    */
   public boolean isPrevSignatureDirty(java.nio.ByteBuffer value) {
-    return isDirty(12);
+    return isDirty(11);
   }
 
   /**
    * Gets the value of the 'signature' field.
-   */
+   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.   */
   public java.nio.ByteBuffer getSignature() {
     return signature;
   }
 
   /**
    * Sets the value of the 'signature' field.
-   * @param value the value to set.
+   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.   * @param value the value to set.
    */
   public void setSignature(java.nio.ByteBuffer value) {
     this.signature = value;
-    setDirty(13);
+    setDirty(12);
   }
   
   /**
    * Checks the dirty status of the 'signature' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.   * @param value the value to set.
    */
   public boolean isSignatureDirty(java.nio.ByteBuffer value) {
-    return isDirty(13);
+    return isDirty(12);
   }
 
   /**
    * Gets the value of the 'title' field.
-   */
-  public CharSequence getTitle() {
+   * The title of the WebPage.   */
+  public java.lang.CharSequence getTitle() {
     return title;
   }
 
   /**
    * Sets the value of the 'title' field.
-   * @param value the value to set.
+   * The title of the WebPage.   * @param value the value to set.
    */
-  public void setTitle(CharSequence value) {
+  public void setTitle(java.lang.CharSequence value) {
     this.title = value;
-    setDirty(14);
+    setDirty(13);
   }
   
   /**
    * Checks the dirty status of the 'title' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * The title of the WebPage.   * @param value the value to set.
    */
-  public boolean isTitleDirty(CharSequence value) {
-    return isDirty(14);
+  public boolean isTitleDirty(java.lang.CharSequence value) {
+    return isDirty(13);
   }
 
   /**
    * Gets the value of the 'text' field.
-   */
-  public CharSequence getText() {
+   * The textual content of the WebPage devoid from native markup.   */
+  public java.lang.CharSequence getText() {
     return text;
   }
 
   /**
    * Sets the value of the 'text' field.
-   * @param value the value to set.
+   * The textual content of the WebPage devoid from native markup.   * @param value the value to set.
    */
-  public void setText(CharSequence value) {
+  public void setText(java.lang.CharSequence value) {
     this.text = value;
-    setDirty(15);
+    setDirty(14);
   }
   
   /**
    * Checks the dirty status of the 'text' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * The textual content of the WebPage devoid from native markup.   * @param value the value to set.
    */
-  public boolean isTextDirty(CharSequence value) {
-    return isDirty(15);
+  public boolean isTextDirty(java.lang.CharSequence value) {
+    return isDirty(14);
   }
 
   /**
    * Gets the value of the 'parseStatus' field.
    */
-  public ParseStatus getParseStatus() {
+  public org.apache.nutch.storage.ParseStatus getParseStatus() {
     return parseStatus;
   }
 
@@ -588,227 +604,227 @@ public class WebPage extends org.apache.
    * Sets the value of the 'parseStatus' field.
    * @param value the value to set.
    */
-  public void setParseStatus(ParseStatus value) {
+  public void setParseStatus(org.apache.nutch.storage.ParseStatus value) {
     this.parseStatus = value;
-    setDirty(16);
+    setDirty(15);
   }
   
   /**
    * Checks the dirty status of the 'parseStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
    * @param value the value to set.
    */
-  public boolean isParseStatusDirty(ParseStatus value) {
-    return isDirty(16);
+  public boolean isParseStatusDirty(org.apache.nutch.storage.ParseStatus value) {
+    return isDirty(15);
   }
 
   /**
    * Gets the value of the 'score' field.
-   */
-  public Float getScore() {
+   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.   */
+  public java.lang.Float getScore() {
     return score;
   }
 
   /**
    * Sets the value of the 'score' field.
-   * @param value the value to set.
+   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.   * @param value the value to set.
    */
-  public void setScore(Float value) {
+  public void setScore(java.lang.Float value) {
     this.score = value;
-    setDirty(17);
+    setDirty(16);
   }
   
   /**
    * Checks the dirty status of the 'score' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.   * @param value the value to set.
    */
-  public boolean isScoreDirty(Float value) {
-    return isDirty(17);
+  public boolean isScoreDirty(java.lang.Float value) {
+    return isDirty(16);
   }
 
   /**
    * Gets the value of the 'reprUrl' field.
-   */
-  public CharSequence getReprUrl() {
+   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler   */
+  public java.lang.CharSequence getReprUrl() {
     return reprUrl;
   }
 
   /**
    * Sets the value of the 'reprUrl' field.
-   * @param value the value to set.
+   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler   * @param value the value to set.
    */
-  public void setReprUrl(CharSequence value) {
+  public void setReprUrl(java.lang.CharSequence value) {
     this.reprUrl = value;
-    setDirty(18);
+    setDirty(17);
   }
   
   /**
    * Checks the dirty status of the 'reprUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler   * @param value the value to set.
    */
-  public boolean isReprUrlDirty(CharSequence value) {
-    return isDirty(18);
+  public boolean isReprUrlDirty(java.lang.CharSequence value) {
+    return isDirty(17);
   }
 
   /**
    * Gets the value of the 'headers' field.
-   */
-  public java.util.Map<CharSequence,CharSequence> getHeaders() {
+   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.   */
+  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getHeaders() {
     return headers;
   }
 
   /**
    * Sets the value of the 'headers' field.
-   * @param value the value to set.
+   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.   * @param value the value to set.
    */
-  public void setHeaders(java.util.Map<CharSequence,CharSequence> value) {
+  public void setHeaders(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
     this.headers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
-    setDirty(19);
+    setDirty(18);
   }
   
   /**
    * Checks the dirty status of the 'headers' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.   * @param value the value to set.
    */
-  public boolean isHeadersDirty(java.util.Map<CharSequence,CharSequence> value) {
-    return isDirty(19);
+  public boolean isHeadersDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    return isDirty(18);
   }
 
   /**
    * Gets the value of the 'outlinks' field.
-   */
-  public java.util.Map<CharSequence,CharSequence> getOutlinks() {
+   * Embedded hyperlinks which direct outside of the current domain.   */
+  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getOutlinks() {
     return outlinks;
   }
 
   /**
    * Sets the value of the 'outlinks' field.
-   * @param value the value to set.
+   * Embedded hyperlinks which direct outside of the current domain.   * @param value the value to set.
    */
-  public void setOutlinks(java.util.Map<CharSequence,CharSequence> value) {
+  public void setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
     this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
-    setDirty(20);
+    setDirty(19);
   }
   
   /**
    * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * Embedded hyperlinks which direct outside of the current domain.   * @param value the value to set.
    */
-  public boolean isOutlinksDirty(java.util.Map<CharSequence,CharSequence> value) {
-    return isDirty(20);
+  public boolean isOutlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    return isDirty(19);
   }
 
   /**
    * Gets the value of the 'inlinks' field.
-   */
-  public java.util.Map<CharSequence,CharSequence> getInlinks() {
+   * Embedded hyperlinks which link to pages within the current domain.   */
+  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getInlinks() {
     return inlinks;
   }
 
   /**
    * Sets the value of the 'inlinks' field.
-   * @param value the value to set.
+   * Embedded hyperlinks which link to pages within the current domain.   * @param value the value to set.
    */
-  public void setInlinks(java.util.Map<CharSequence,CharSequence> value) {
+  public void setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
     this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
-    setDirty(21);
+    setDirty(20);
   }
   
   /**
    * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * Embedded hyperlinks which link to pages within the current domain.   * @param value the value to set.
    */
-  public boolean isInlinksDirty(java.util.Map<CharSequence,CharSequence> value) {
-    return isDirty(21);
+  public boolean isInlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    return isDirty(20);
   }
 
   /**
    * Gets the value of the 'markers' field.
-   */
-  public java.util.Map<CharSequence,CharSequence> getMarkers() {
+   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.   */
+  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> getMarkers() {
     return markers;
   }
 
   /**
    * Sets the value of the 'markers' field.
-   * @param value the value to set.
+   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.   * @param value the value to set.
    */
-  public void setMarkers(java.util.Map<CharSequence,CharSequence> value) {
+  public void setMarkers(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
     this.markers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
-    setDirty(22);
+    setDirty(21);
   }
   
   /**
    * Checks the dirty status of the 'markers' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.   * @param value the value to set.
    */
-  public boolean isMarkersDirty(java.util.Map<CharSequence,CharSequence> value) {
-    return isDirty(22);
+  public boolean isMarkersDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
+    return isDirty(21);
   }
 
   /**
    * Gets the value of the 'metadata' field.
-   */
-  public java.util.Map<CharSequence,java.nio.ByteBuffer> getMetadata() {
+   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.   */
+  public java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> getMetadata() {
     return metadata;
   }
 
   /**
    * Sets the value of the 'metadata' field.
-   * @param value the value to set.
+   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.   * @param value the value to set.
    */
-  public void setMetadata(java.util.Map<CharSequence,java.nio.ByteBuffer> value) {
+  public void setMetadata(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
     this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
-    setDirty(23);
+    setDirty(22);
   }
   
   /**
    * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.   * @param value the value to set.
    */
-  public boolean isMetadataDirty(java.util.Map<CharSequence,java.nio.ByteBuffer> value) {
-    return isDirty(23);
+  public boolean isMetadataDirty(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> value) {
+    return isDirty(22);
   }
 
   /**
    * Gets the value of the 'batchId' field.
-   */
-  public CharSequence getBatchId() {
+   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.   */
+  public java.lang.CharSequence getBatchId() {
     return batchId;
   }
 
   /**
    * Sets the value of the 'batchId' field.
-   * @param value the value to set.
+   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.   * @param value the value to set.
    */
-  public void setBatchId(CharSequence value) {
+  public void setBatchId(java.lang.CharSequence value) {
     this.batchId = value;
-    setDirty(24);
+    setDirty(23);
   }
   
   /**
    * Checks the dirty status of the 'batchId' field. A field is dirty if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.   * @param value the value to set.
    */
-  public boolean isBatchIdDirty(CharSequence value) {
-    return isDirty(24);
+  public boolean isBatchIdDirty(java.lang.CharSequence value) {
+    return isDirty(23);
   }
 
   /** Creates a new WebPage RecordBuilder */
-  public static Builder newBuilder() {
-    return new Builder();
+  public static org.apache.nutch.storage.WebPage.Builder newBuilder() {
+    return new org.apache.nutch.storage.WebPage.Builder();
   }
   
   /** Creates a new WebPage RecordBuilder by copying an existing Builder */
-  public static Builder newBuilder(Builder other) {
-    return new Builder(other);
+  public static org.apache.nutch.storage.WebPage.Builder newBuilder(org.apache.nutch.storage.WebPage.Builder other) {
+    return new org.apache.nutch.storage.WebPage.Builder(other);
   }
   
   /** Creates a new WebPage RecordBuilder by copying an existing WebPage instance */
-  public static Builder newBuilder(WebPage other) {
-    return new Builder(other);
+  public static org.apache.nutch.storage.WebPage.Builder newBuilder(org.apache.nutch.storage.WebPage other) {
+    return new org.apache.nutch.storage.WebPage.Builder(other);
   }
   
-  private static java.nio.ByteBuffer deepCopyToWriteOnlyBuffer(
+  private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
       java.nio.ByteBuffer input) {
     java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
     int position = input.position();
@@ -837,8 +853,7 @@ public class WebPage extends org.apache.
   public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<WebPage>
     implements org.apache.avro.data.RecordBuilder<WebPage> {
 
-    private java.nio.ByteBuffer __g__dirty;
-    private CharSequence baseUrl;
+    private java.lang.CharSequence baseUrl;
     private int status;
     private long fetchTime;
     private long prevFetchTime;
@@ -846,353 +861,349 @@ public class WebPage extends org.apache.
     private int retriesSinceFetch;
     private long modifiedTime;
     private long prevModifiedTime;
-    private ProtocolStatus protocolStatus;
+    private org.apache.nutch.storage.ProtocolStatus protocolStatus;
     private java.nio.ByteBuffer content;
-    private CharSequence contentType;
+    private java.lang.CharSequence contentType;
     private java.nio.ByteBuffer prevSignature;
     private java.nio.ByteBuffer signature;
-    private CharSequence title;
-    private CharSequence text;
-    private ParseStatus parseStatus;
+    private java.lang.CharSequence title;
+    private java.lang.CharSequence text;
+    private org.apache.nutch.storage.ParseStatus parseStatus;
     private float score;
-    private CharSequence reprUrl;
-    private java.util.Map<CharSequence,CharSequence> headers;
-    private java.util.Map<CharSequence,CharSequence> outlinks;
-    private java.util.Map<CharSequence,CharSequence> inlinks;
-    private java.util.Map<CharSequence,CharSequence> markers;
-    private java.util.Map<CharSequence,java.nio.ByteBuffer> metadata;
-    private CharSequence batchId;
+    private java.lang.CharSequence reprUrl;
+    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> headers;
+    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> outlinks;
+    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> inlinks;
+    private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> markers;
+    private java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> metadata;
+    private java.lang.CharSequence batchId;
 
     /** Creates a new Builder */
     private Builder() {
-      super(WebPage.SCHEMA$);
+      super(org.apache.nutch.storage.WebPage.SCHEMA$);
     }
     
     /** Creates a Builder by copying an existing Builder */
-    private Builder(Builder other) {
+    private Builder(org.apache.nutch.storage.WebPage.Builder other) {
       super(other);
     }
     
     /** Creates a Builder by copying an existing WebPage instance */
-    private Builder(WebPage other) {
-            super(WebPage.SCHEMA$);
-      if (isValidValue(fields()[0], other.__g__dirty)) {
-        this.__g__dirty = (java.nio.ByteBuffer) data().deepCopy(fields()[0].schema(), other.__g__dirty);
+    private Builder(org.apache.nutch.storage.WebPage other) {
+            super(org.apache.nutch.storage.WebPage.SCHEMA$);
+      if (isValidValue(fields()[0], other.baseUrl)) {
+        this.baseUrl = (java.lang.CharSequence) data().deepCopy(fields()[0].schema(), other.baseUrl);
         fieldSetFlags()[0] = true;
       }
-      if (isValidValue(fields()[1], other.baseUrl)) {
-        this.baseUrl = (CharSequence) data().deepCopy(fields()[1].schema(), other.baseUrl);
+      if (isValidValue(fields()[1], other.status)) {
+        this.status = (java.lang.Integer) data().deepCopy(fields()[1].schema(), other.status);
         fieldSetFlags()[1] = true;
       }
-      if (isValidValue(fields()[2], other.status)) {
-        this.status = (Integer) data().deepCopy(fields()[2].schema(), other.status);
+      if (isValidValue(fields()[2], other.fetchTime)) {
+        this.fetchTime = (java.lang.Long) data().deepCopy(fields()[2].schema(), other.fetchTime);
         fieldSetFlags()[2] = true;
       }
-      if (isValidValue(fields()[3], other.fetchTime)) {
-        this.fetchTime = (Long) data().deepCopy(fields()[3].schema(), other.fetchTime);
+      if (isValidValue(fields()[3], other.prevFetchTime)) {
+        this.prevFetchTime = (java.lang.Long) data().deepCopy(fields()[3].schema(), other.prevFetchTime);
         fieldSetFlags()[3] = true;
       }
-      if (isValidValue(fields()[4], other.prevFetchTime)) {
-        this.prevFetchTime = (Long) data().deepCopy(fields()[4].schema(), other.prevFetchTime);
+      if (isValidValue(fields()[4], other.fetchInterval)) {
+        this.fetchInterval = (java.lang.Integer) data().deepCopy(fields()[4].schema(), other.fetchInterval);
         fieldSetFlags()[4] = true;
       }
-      if (isValidValue(fields()[5], other.fetchInterval)) {
-        this.fetchInterval = (Integer) data().deepCopy(fields()[5].schema(), other.fetchInterval);
+      if (isValidValue(fields()[5], other.retriesSinceFetch)) {
+        this.retriesSinceFetch = (java.lang.Integer) data().deepCopy(fields()[5].schema(), other.retriesSinceFetch);
         fieldSetFlags()[5] = true;
       }
-      if (isValidValue(fields()[6], other.retriesSinceFetch)) {
-        this.retriesSinceFetch = (Integer) data().deepCopy(fields()[6].schema(), other.retriesSinceFetch);
+      if (isValidValue(fields()[6], other.modifiedTime)) {
+        this.modifiedTime = (java.lang.Long) data().deepCopy(fields()[6].schema(), other.modifiedTime);
         fieldSetFlags()[6] = true;
       }
-      if (isValidValue(fields()[7], other.modifiedTime)) {
-        this.modifiedTime = (Long) data().deepCopy(fields()[7].schema(), other.modifiedTime);
+      if (isValidValue(fields()[7], other.prevModifiedTime)) {
+        this.prevModifiedTime = (java.lang.Long) data().deepCopy(fields()[7].schema(), other.prevModifiedTime);
         fieldSetFlags()[7] = true;
       }
-      if (isValidValue(fields()[8], other.prevModifiedTime)) {
-        this.prevModifiedTime = (Long) data().deepCopy(fields()[8].schema(), other.prevModifiedTime);
+      if (isValidValue(fields()[8], other.protocolStatus)) {
+        this.protocolStatus = (org.apache.nutch.storage.ProtocolStatus) data().deepCopy(fields()[8].schema(), other.protocolStatus);
         fieldSetFlags()[8] = true;
       }
-      if (isValidValue(fields()[9], other.protocolStatus)) {
-        this.protocolStatus = (ProtocolStatus) data().deepCopy(fields()[9].schema(), other.protocolStatus);
+      if (isValidValue(fields()[9], other.content)) {
+        this.content = (java.nio.ByteBuffer) data().deepCopy(fields()[9].schema(), other.content);
         fieldSetFlags()[9] = true;
       }
-      if (isValidValue(fields()[10], other.content)) {
-        this.content = (java.nio.ByteBuffer) data().deepCopy(fields()[10].schema(), other.content);
+      if (isValidValue(fields()[10], other.contentType)) {
+        this.contentType = (java.lang.CharSequence) data().deepCopy(fields()[10].schema(), other.contentType);
         fieldSetFlags()[10] = true;
       }
-      if (isValidValue(fields()[11], other.contentType)) {
-        this.contentType = (CharSequence) data().deepCopy(fields()[11].schema(), other.contentType);
+      if (isValidValue(fields()[11], other.prevSignature)) {
+        this.prevSignature = (java.nio.ByteBuffer) data().deepCopy(fields()[11].schema(), other.prevSignature);
         fieldSetFlags()[11] = true;
       }
-      if (isValidValue(fields()[12], other.prevSignature)) {
-        this.prevSignature = (java.nio.ByteBuffer) data().deepCopy(fields()[12].schema(), other.prevSignature);
+      if (isValidValue(fields()[12], other.signature)) {
+        this.signature = (java.nio.ByteBuffer) data().deepCopy(fields()[12].schema(), other.signature);
         fieldSetFlags()[12] = true;
       }
-      if (isValidValue(fields()[13], other.signature)) {
-        this.signature = (java.nio.ByteBuffer) data().deepCopy(fields()[13].schema(), other.signature);
+      if (isValidValue(fields()[13], other.title)) {
+        this.title = (java.lang.CharSequence) data().deepCopy(fields()[13].schema(), other.title);
         fieldSetFlags()[13] = true;
       }
-      if (isValidValue(fields()[14], other.title)) {
-        this.title = (CharSequence) data().deepCopy(fields()[14].schema(), other.title);
+      if (isValidValue(fields()[14], other.text)) {
+        this.text = (java.lang.CharSequence) data().deepCopy(fields()[14].schema(), other.text);
         fieldSetFlags()[14] = true;
       }
-      if (isValidValue(fields()[15], other.text)) {
-        this.text = (CharSequence) data().deepCopy(fields()[15].schema(), other.text);
+      if (isValidValue(fields()[15], other.parseStatus)) {
+        this.parseStatus = (org.apache.nutch.storage.ParseStatus) data().deepCopy(fields()[15].schema(), other.parseStatus);
         fieldSetFlags()[15] = true;
       }
-      if (isValidValue(fields()[16], other.parseStatus)) {
-        this.parseStatus = (ParseStatus) data().deepCopy(fields()[16].schema(), other.parseStatus);
+      if (isValidValue(fields()[16], other.score)) {
+        this.score = (java.lang.Float) data().deepCopy(fields()[16].schema(), other.score);
         fieldSetFlags()[16] = true;
       }
-      if (isValidValue(fields()[17], other.score)) {
-        this.score = (Float) data().deepCopy(fields()[17].schema(), other.score);
+      if (isValidValue(fields()[17], other.reprUrl)) {
+        this.reprUrl = (java.lang.CharSequence) data().deepCopy(fields()[17].schema(), other.reprUrl);
         fieldSetFlags()[17] = true;
       }
-      if (isValidValue(fields()[18], other.reprUrl)) {
-        this.reprUrl = (CharSequence) data().deepCopy(fields()[18].schema(), other.reprUrl);
+      if (isValidValue(fields()[18], other.headers)) {
+        this.headers = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[18].schema(), other.headers);
         fieldSetFlags()[18] = true;
       }
-      if (isValidValue(fields()[19], other.headers)) {
-        this.headers = (java.util.Map<CharSequence,CharSequence>) data().deepCopy(fields()[19].schema(), other.headers);
+      if (isValidValue(fields()[19], other.outlinks)) {
+        this.outlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[19].schema(), other.outlinks);
         fieldSetFlags()[19] = true;
       }
-      if (isValidValue(fields()[20], other.outlinks)) {
-        this.outlinks = (java.util.Map<CharSequence,CharSequence>) data().deepCopy(fields()[20].schema(), other.outlinks);
+      if (isValidValue(fields()[20], other.inlinks)) {
+        this.inlinks = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[20].schema(), other.inlinks);
         fieldSetFlags()[20] = true;
       }
-      if (isValidValue(fields()[21], other.inlinks)) {
-        this.inlinks = (java.util.Map<CharSequence,CharSequence>) data().deepCopy(fields()[21].schema(), other.inlinks);
+      if (isValidValue(fields()[21], other.markers)) {
+        this.markers = (java.util.Map<java.lang.CharSequence,java.lang.CharSequence>) data().deepCopy(fields()[21].schema(), other.markers);
         fieldSetFlags()[21] = true;
       }
-      if (isValidValue(fields()[22], other.markers)) {
-        this.markers = (java.util.Map<CharSequence,CharSequence>) data().deepCopy(fields()[22].schema(), other.markers);
+      if (isValidValue(fields()[22], other.metadata)) {
+        this.metadata = (java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>) data().deepCopy(fields()[22].schema(), other.metadata);
         fieldSetFlags()[22] = true;
       }
-      if (isValidValue(fields()[23], other.metadata)) {
-        this.metadata = (java.util.Map<CharSequence,java.nio.ByteBuffer>) data().deepCopy(fields()[23].schema(), other.metadata);
+      if (isValidValue(fields()[23], other.batchId)) {
+        this.batchId = (java.lang.CharSequence) data().deepCopy(fields()[23].schema(), other.batchId);
         fieldSetFlags()[23] = true;
       }
-      if (isValidValue(fields()[24], other.batchId)) {
-        this.batchId = (CharSequence) data().deepCopy(fields()[24].schema(), other.batchId);
-        fieldSetFlags()[24] = true;
-      }
     }
 
     /** Gets the value of the 'baseUrl' field */
-    public CharSequence getBaseUrl() {
+    public java.lang.CharSequence getBaseUrl() {
       return baseUrl;
     }
     
     /** Sets the value of the 'baseUrl' field */
-    public Builder setBaseUrl(CharSequence value) {
-      validate(fields()[1], value);
+    public org.apache.nutch.storage.WebPage.Builder setBaseUrl(java.lang.CharSequence value) {
+      validate(fields()[0], value);
       this.baseUrl = value;
-      fieldSetFlags()[1] = true;
+      fieldSetFlags()[0] = true;
       return this; 
     }
     
     /** Checks whether the 'baseUrl' field has been set */
     public boolean hasBaseUrl() {
-      return fieldSetFlags()[1];
+      return fieldSetFlags()[0];
     }
     
     /** Clears the value of the 'baseUrl' field */
-    public Builder clearBaseUrl() {
+    public org.apache.nutch.storage.WebPage.Builder clearBaseUrl() {
       baseUrl = null;
-      fieldSetFlags()[1] = false;
+      fieldSetFlags()[0] = false;
       return this;
     }
     
     /** Gets the value of the 'status' field */
-    public Integer getStatus() {
+    public java.lang.Integer getStatus() {
       return status;
     }
     
     /** Sets the value of the 'status' field */
-    public Builder setStatus(int value) {
-      validate(fields()[2], value);
+    public org.apache.nutch.storage.WebPage.Builder setStatus(int value) {
+      validate(fields()[1], value);
       this.status = value;
-      fieldSetFlags()[2] = true;
+      fieldSetFlags()[1] = true;
       return this; 
     }
     
     /** Checks whether the 'status' field has been set */
     public boolean hasStatus() {
-      return fieldSetFlags()[2];
+      return fieldSetFlags()[1];
     }
     
     /** Clears the value of the 'status' field */
-    public Builder clearStatus() {
-      fieldSetFlags()[2] = false;
+    public org.apache.nutch.storage.WebPage.Builder clearStatus() {
+      fieldSetFlags()[1] = false;
       return this;
     }
     
     /** Gets the value of the 'fetchTime' field */
-    public Long getFetchTime() {
+    public java.lang.Long getFetchTime() {
       return fetchTime;
     }
     
     /** Sets the value of the 'fetchTime' field */
-    public Builder setFetchTime(long value) {
-      validate(fields()[3], value);
+    public org.apache.nutch.storage.WebPage.Builder setFetchTime(long value) {
+      validate(fields()[2], value);
       this.fetchTime = value;
-      fieldSetFlags()[3] = true;
+      fieldSetFlags()[2] = true;
       return this; 
     }
     
     /** Checks whether the 'fetchTime' field has been set */
     public boolean hasFetchTime() {
-      return fieldSetFlags()[3];
+      return fieldSetFlags()[2];
     }
     
     /** Clears the value of the 'fetchTime' field */
-    public Builder clearFetchTime() {
-      fieldSetFlags()[3] = false;
+    public org.apache.nutch.storage.WebPage.Builder clearFetchTime() {
+      fieldSetFlags()[2] = false;
       return this;
     }
     
     /** Gets the value of the 'prevFetchTime' field */
-    public Long getPrevFetchTime() {
+    public java.lang.Long getPrevFetchTime() {
       return prevFetchTime;
     }
     
     /** Sets the value of the 'prevFetchTime' field */
-    public Builder setPrevFetchTime(long value) {
-      validate(fields()[4], value);
+    public org.apache.nutch.storage.WebPage.Builder setPrevFetchTime(long value) {
+      validate(fields()[3], value);
       this.prevFetchTime = value;
-      fieldSetFlags()[4] = true;
+      fieldSetFlags()[3] = true;
       return this; 
     }
     
     /** Checks whether the 'prevFetchTime' field has been set */
     public boolean hasPrevFetchTime() {
-      return fieldSetFlags()[4];
+      return fieldSetFlags()[3];
     }
     
     /** Clears the value of the 'prevFetchTime' field */
-    public Builder clearPrevFetchTime() {
-      fieldSetFlags()[4] = false;
+    public org.apache.nutch.storage.WebPage.Builder clearPrevFetchTime() {
+      fieldSetFlags()[3] = false;
       return this;
     }
     
     /** Gets the value of the 'fetchInterval' field */
-    public Integer getFetchInterval() {
+    public java.lang.Integer getFetchInterval() {
       return fetchInterval;
     }
     
     /** Sets the value of the 'fetchInterval' field */
-    public Builder setFetchInterval(int value) {
-      validate(fields()[5], value);
+    public org.apache.nutch.storage.WebPage.Builder setFetchInterval(int value) {
+      validate(fields()[4], value);
       this.fetchInterval = value;
-      fieldSetFlags()[5] = true;
+      fieldSetFlags()[4] = true;
       return this; 
     }
     
     /** Checks whether the 'fetchInterval' field has been set */
     public boolean hasFetchInterval() {
-      return fieldSetFlags()[5];
+      return fieldSetFlags()[4];
     }
     
     /** Clears the value of the 'fetchInterval' field */
-    public Builder clearFetchInterval() {
-      fieldSetFlags()[5] = false;
+    public org.apache.nutch.storage.WebPage.Builder clearFetchInterval() {
+      fieldSetFlags()[4] = false;
       return this;
     }
     
     /** Gets the value of the 'retriesSinceFetch' field */
-    public Integer getRetriesSinceFetch() {
+    public java.lang.Integer getRetriesSinceFetch() {
       return retriesSinceFetch;
     }
     
     /** Sets the value of the 'retriesSinceFetch' field */
-    public Builder setRetriesSinceFetch(int value) {
-      validate(fields()[6], value);
+    public org.apache.nutch.storage.WebPage.Builder setRetriesSinceFetch(int value) {
+      validate(fields()[5], value);
       this.retriesSinceFetch = value;
-      fieldSetFlags()[6] = true;
+      fieldSetFlags()[5] = true;
       return this; 
     }
     
     /** Checks whether the 'retriesSinceFetch' field has been set */
     public boolean hasRetriesSinceFetch() {
-      return fieldSetFlags()[6];
+      return fieldSetFlags()[5];
     }
     
     /** Clears the value of the 'retriesSinceFetch' field */
-    public Builder clearRetriesSinceFetch() {
-      fieldSetFlags()[6] = false;
+    public org.apache.nutch.storage.WebPage.Builder clearRetriesSinceFetch() {
+      fieldSetFlags()[5] = false;
       return this;
     }
     
     /** Gets the value of the 'modifiedTime' field */
-    public Long getModifiedTime() {
+    public java.lang.Long getModifiedTime() {
       return modifiedTime;
     }
     
     /** Sets the value of the 'modifiedTime' field */
-    public Builder setModifiedTime(long value) {
-      validate(fields()[7], value);
+    public org.apache.nutch.storage.WebPage.Builder setModifiedTime(long value) {
+      validate(fields()[6], value);
       this.modifiedTime = value;
-      fieldSetFlags()[7] = true;
+      fieldSetFlags()[6] = true;
       return this; 
     }
     
     /** Checks whether the 'modifiedTime' field has been set */
     public boolean hasModifiedTime() {
-      return fieldSetFlags()[7];
+      return fieldSetFlags()[6];
     }
     
     /** Clears the value of the 'modifiedTime' field */
-    public Builder clearModifiedTime() {
-      fieldSetFlags()[7] = false;
+    public org.apache.nutch.storage.WebPage.Builder clearModifiedTime() {
+      fieldSetFlags()[6] = false;
       return this;
     }
     
     /** Gets the value of the 'prevModifiedTime' field */
-    public Long getPrevModifiedTime() {
+    public java.lang.Long getPrevModifiedTime() {
       return prevModifiedTime;
     }
     
     /** Sets the value of the 'prevModifiedTime' field */
-    public Builder setPrevModifiedTime(long value) {
-      validate(fields()[8], value);
+    public org.apache.nutch.storage.WebPage.Builder setPrevModifiedTime(long value) {
+      validate(fields()[7], value);
       this.prevModifiedTime = value;
-      fieldSetFlags()[8] = true;
+      fieldSetFlags()[7] = true;
       return this; 
     }
     
     /** Checks whether the 'prevModifiedTime' field has been set */
     public boolean hasPrevModifiedTime() {
-      return fieldSetFlags()[8];
+      return fieldSetFlags()[7];
     }
     
     /** Clears the value of the 'prevModifiedTime' field */
-    public Builder clearPrevModifiedTime() {
-      fieldSetFlags()[8] = false;
+    public org.apache.nutch.storage.WebPage.Builder clearPrevModifiedTime() {
+      fieldSetFlags()[7] = false;
       return this;
     }
     
     /** Gets the value of the 'protocolStatus' field */
-    public ProtocolStatus getProtocolStatus() {
+    public org.apache.nutch.storage.ProtocolStatus getProtocolStatus() {
       return protocolStatus;
     }
     
     /** Sets the value of the 'protocolStatus' field */
-    public Builder setProtocolStatus(ProtocolStatus value) {
-      validate(fields()[9], value);
+    public org.apache.nutch.storage.WebPage.Builder setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
+      validate(fields()[8], value);
       this.protocolStatus = value;
-      fieldSetFlags()[9] = true;
+      fieldSetFlags()[8] = true;
       return this; 
     }
     
     /** Checks whether the 'protocolStatus' field has been set */
     public boolean hasProtocolStatus() {
-      return fieldSetFlags()[9];
+      return fieldSetFlags()[8];
     }
     
     /** Clears the value of the 'protocolStatus' field */
-    public Builder clearProtocolStatus() {
+    public org.apache.nutch.storage.WebPage.Builder clearProtocolStatus() {
       protocolStatus = null;
-      fieldSetFlags()[9] = false;
+      fieldSetFlags()[8] = false;
       return this;
     }
     
@@ -1202,47 +1213,47 @@ public class WebPage extends org.apache.
     }
     
     /** Sets the value of the 'content' field */
-    public Builder setContent(java.nio.ByteBuffer value) {
-      validate(fields()[10], value);
+    public org.apache.nutch.storage.WebPage.Builder setContent(java.nio.ByteBuffer value) {
+      validate(fields()[9], value);
       this.content = value;
-      fieldSetFlags()[10] = true;
+      fieldSetFlags()[9] = true;
       return this; 
     }
     
     /** Checks whether the 'content' field has been set */
     public boolean hasContent() {
-      return fieldSetFlags()[10];
+      return fieldSetFlags()[9];
     }
     
     /** Clears the value of the 'content' field */
-    public Builder clearContent() {
+    public org.apache.nutch.storage.WebPage.Builder clearContent() {
       content = null;
-      fieldSetFlags()[10] = false;
+      fieldSetFlags()[9] = false;
       return this;
     }
     
     /** Gets the value of the 'contentType' field */
-    public CharSequence getContentType() {
+    public java.lang.CharSequence getContentType() {
       return contentType;
     }
     
     /** Sets the value of the 'contentType' field */
-    public Builder setContentType(CharSequence value) {
-      validate(fields()[11], value);

[... 1263 lines stripped ...]