You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by th...@apache.org on 2016/07/16 19:49:07 UTC

[51/51] [partial] nutch git commit: NUTCH-2292 : Mavenize the build for nutch-core and nutch-plugins

NUTCH-2292 : Mavenize the build for nutch-core and nutch-plugins


Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/0bf453e5
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/0bf453e5
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/0bf453e5

Branch: refs/heads/NUTCH-2292
Commit: 0bf453e5754967541a0798585dbe115630679c5f
Parents: 5943d11
Author: Thamme Gowda <th...@apache.org>
Authored: Sat Jul 16 12:47:08 2016 -0700
Committer: Thamme Gowda <th...@apache.org>
Committed: Sat Jul 16 12:47:08 2016 -0700

----------------------------------------------------------------------
 .gitignore                                      |   6 +
 bin/crawl                                       | 281 ++++++
 bin/nutch                                       | 324 +++++++
 nutch-core/pom.xml                              | 522 +++++++++++
 .../nutch/crawl/AbstractFetchSchedule.java      | 227 +++++
 .../nutch/crawl/AdaptiveFetchSchedule.java      | 203 +++++
 .../java/org/apache/nutch/crawl/CrawlDatum.java | 572 ++++++++++++
 .../java/org/apache/nutch/crawl/CrawlDb.java    | 349 ++++++++
 .../org/apache/nutch/crawl/CrawlDbFilter.java   | 111 +++
 .../org/apache/nutch/crawl/CrawlDbMerger.java   | 216 +++++
 .../org/apache/nutch/crawl/CrawlDbReader.java   | 887 +++++++++++++++++++
 .../org/apache/nutch/crawl/CrawlDbReducer.java  | 339 +++++++
 .../apache/nutch/crawl/DeduplicationJob.java    | 389 ++++++++
 .../nutch/crawl/DefaultFetchSchedule.java       |  45 +
 .../org/apache/nutch/crawl/FetchSchedule.java   | 208 +++++
 .../nutch/crawl/FetchScheduleFactory.java       |  53 ++
 .../java/org/apache/nutch/crawl/Generator.java  | 859 ++++++++++++++++++
 .../java/org/apache/nutch/crawl/Injector.java   | 510 +++++++++++
 .../java/org/apache/nutch/crawl/Inlink.java     |  83 ++
 .../java/org/apache/nutch/crawl/Inlinks.java    | 110 +++
 .../java/org/apache/nutch/crawl/LinkDb.java     | 428 +++++++++
 .../org/apache/nutch/crawl/LinkDbFilter.java    | 128 +++
 .../org/apache/nutch/crawl/LinkDbMerger.java    | 204 +++++
 .../org/apache/nutch/crawl/LinkDbReader.java    | 203 +++++
 .../org/apache/nutch/crawl/MD5Signature.java    |  39 +
 .../nutch/crawl/MimeAdaptiveFetchSchedule.java  | 236 +++++
 .../org/apache/nutch/crawl/NutchWritable.java   |  66 ++
 .../java/org/apache/nutch/crawl/Signature.java  |  37 +
 .../apache/nutch/crawl/SignatureComparator.java |  57 ++
 .../apache/nutch/crawl/SignatureFactory.java    |  62 ++
 .../apache/nutch/crawl/TextMD5Signature.java    |  42 +
 .../nutch/crawl/TextProfileSignature.java       | 199 +++++
 .../org/apache/nutch/crawl/URLPartitioner.java  |  96 ++
 .../java/org/apache/nutch/crawl/package.html    |   5 +
 .../org/apache/nutch/fetcher/FetchItem.java     | 118 +++
 .../apache/nutch/fetcher/FetchItemQueue.java    | 139 +++
 .../apache/nutch/fetcher/FetchItemQueues.java   | 212 +++++
 .../org/apache/nutch/fetcher/FetchNode.java     |  59 ++
 .../org/apache/nutch/fetcher/FetchNodeDb.java   |  49 +
 .../java/org/apache/nutch/fetcher/Fetcher.java  | 600 +++++++++++++
 .../nutch/fetcher/FetcherOutputFormat.java      | 123 +++
 .../org/apache/nutch/fetcher/FetcherThread.java | 768 ++++++++++++++++
 .../org/apache/nutch/fetcher/QueueFeeder.java   | 104 +++
 .../java/org/apache/nutch/fetcher/package.html  |   5 +
 .../java/org/apache/nutch/hostdb/HostDatum.java | 324 +++++++
 .../org/apache/nutch/hostdb/ReadHostDb.java     | 240 +++++
 .../org/apache/nutch/hostdb/ResolverThread.java | 121 +++
 .../org/apache/nutch/hostdb/UpdateHostDb.java   | 259 ++++++
 .../apache/nutch/hostdb/UpdateHostDbMapper.java | 239 +++++
 .../nutch/hostdb/UpdateHostDbReducer.java       | 427 +++++++++
 .../org/apache/nutch/indexer/CleaningJob.java   | 210 +++++
 .../org/apache/nutch/indexer/IndexWriter.java   |  47 +
 .../org/apache/nutch/indexer/IndexWriters.java  | 145 +++
 .../apache/nutch/indexer/IndexerMapReduce.java  | 422 +++++++++
 .../nutch/indexer/IndexerOutputFormat.java      |  57 ++
 .../apache/nutch/indexer/IndexingException.java |  39 +
 .../apache/nutch/indexer/IndexingFilter.java    |  61 ++
 .../apache/nutch/indexer/IndexingFilters.java   |  60 ++
 .../nutch/indexer/IndexingFiltersChecker.java   | 371 ++++++++
 .../org/apache/nutch/indexer/IndexingJob.java   | 358 ++++++++
 .../org/apache/nutch/indexer/NutchDocument.java | 144 +++
 .../org/apache/nutch/indexer/NutchField.java    | 137 +++
 .../apache/nutch/indexer/NutchIndexAction.java  |  58 ++
 .../java/org/apache/nutch/indexer/package.html  |  10 +
 .../apache/nutch/metadata/CreativeCommons.java  |  35 +
 .../org/apache/nutch/metadata/DublinCore.java   | 161 ++++
 .../java/org/apache/nutch/metadata/Feed.java    |  38 +
 .../org/apache/nutch/metadata/HttpHeaders.java  |  51 ++
 .../org/apache/nutch/metadata/MetaWrapper.java  | 120 +++
 .../org/apache/nutch/metadata/Metadata.java     | 280 ++++++
 .../java/org/apache/nutch/metadata/Nutch.java   |  98 ++
 .../nutch/metadata/SpellCheckedMetadata.java    | 150 ++++
 .../java/org/apache/nutch/metadata/package.html |   6 +
 .../apache/nutch/net/URLExemptionFilter.java    |  43 +
 .../apache/nutch/net/URLExemptionFilters.java   |  64 ++
 .../java/org/apache/nutch/net/URLFilter.java    |  40 +
 .../org/apache/nutch/net/URLFilterChecker.java  | 134 +++
 .../apache/nutch/net/URLFilterException.java    |  39 +
 .../java/org/apache/nutch/net/URLFilters.java   |  44 +
 .../org/apache/nutch/net/URLNormalizer.java     |  37 +
 .../apache/nutch/net/URLNormalizerChecker.java  | 117 +++
 .../org/apache/nutch/net/URLNormalizers.java    | 325 +++++++
 .../java/org/apache/nutch/net/package-info.java |  23 +
 .../nutch/net/protocols/HttpDateFormat.java     | 124 +++
 .../nutch/net/protocols/ProtocolException.java  |  47 +
 .../apache/nutch/net/protocols/Response.java    |  46 +
 .../nutch/net/protocols/package-info.java       |  23 +
 .../org/apache/nutch/parse/HTMLMetaTags.java    | 203 +++++
 .../org/apache/nutch/parse/HtmlParseFilter.java |  45 +
 .../apache/nutch/parse/HtmlParseFilters.java    |  62 ++
 .../java/org/apache/nutch/parse/Outlink.java    | 135 +++
 .../apache/nutch/parse/OutlinkExtractor.java    | 145 +++
 .../main/java/org/apache/nutch/parse/Parse.java |  38 +
 .../org/apache/nutch/parse/ParseCallable.java   |  37 +
 .../java/org/apache/nutch/parse/ParseData.java  | 255 ++++++
 .../org/apache/nutch/parse/ParseException.java  |  39 +
 .../java/org/apache/nutch/parse/ParseImpl.java  |  87 ++
 .../apache/nutch/parse/ParseOutputFormat.java   | 398 +++++++++
 .../org/apache/nutch/parse/ParsePluginList.java |  71 ++
 .../apache/nutch/parse/ParsePluginsReader.java  | 278 ++++++
 .../org/apache/nutch/parse/ParseResult.java     | 178 ++++
 .../org/apache/nutch/parse/ParseSegment.java    | 309 +++++++
 .../org/apache/nutch/parse/ParseStatus.java     | 311 +++++++
 .../java/org/apache/nutch/parse/ParseText.java  | 119 +++
 .../java/org/apache/nutch/parse/ParseUtil.java  | 181 ++++
 .../java/org/apache/nutch/parse/Parser.java     |  58 ++
 .../org/apache/nutch/parse/ParserChecker.java   | 270 ++++++
 .../org/apache/nutch/parse/ParserFactory.java   | 428 +++++++++
 .../org/apache/nutch/parse/ParserNotFound.java  |  47 +
 .../org/apache/nutch/parse/package-info.java    |  22 +
 .../plugin/CircularDependencyException.java     |  36 +
 .../java/org/apache/nutch/plugin/Extension.java | 194 ++++
 .../org/apache/nutch/plugin/ExtensionPoint.java | 123 +++
 .../plugin/MissingDependencyException.java      |  36 +
 .../java/org/apache/nutch/plugin/Pluggable.java |  31 +
 .../java/org/apache/nutch/plugin/Plugin.java    |  95 ++
 .../apache/nutch/plugin/PluginClassLoader.java  |  80 ++
 .../apache/nutch/plugin/PluginDescriptor.java   | 363 ++++++++
 .../nutch/plugin/PluginManifestParser.java      | 303 +++++++
 .../apache/nutch/plugin/PluginRepository.java   | 523 +++++++++++
 .../nutch/plugin/PluginRuntimeException.java    |  37 +
 .../java/org/apache/nutch/plugin/package.html   |  40 +
 .../java/org/apache/nutch/protocol/Content.java | 296 +++++++
 .../org/apache/nutch/protocol/Protocol.java     |  68 ++
 .../nutch/protocol/ProtocolException.java       |  39 +
 .../apache/nutch/protocol/ProtocolFactory.java  | 119 +++
 .../apache/nutch/protocol/ProtocolNotFound.java |  36 +
 .../apache/nutch/protocol/ProtocolOutput.java   |  55 ++
 .../apache/nutch/protocol/ProtocolStatus.java   | 297 +++++++
 .../apache/nutch/protocol/RobotRulesParser.java | 325 +++++++
 .../org/apache/nutch/protocol/package-info.java |  23 +
 .../nutch/scoring/AbstractScoringFilter.java    |  68 ++
 .../org/apache/nutch/scoring/ScoringFilter.java | 213 +++++
 .../nutch/scoring/ScoringFilterException.java   |  43 +
 .../apache/nutch/scoring/ScoringFilters.java    | 118 +++
 .../org/apache/nutch/scoring/package-info.java  |  22 +
 .../nutch/scoring/webgraph/LinkDatum.java       | 140 +++
 .../nutch/scoring/webgraph/LinkDumper.java      | 433 +++++++++
 .../apache/nutch/scoring/webgraph/LinkRank.java | 677 ++++++++++++++
 .../org/apache/nutch/scoring/webgraph/Node.java | 102 +++
 .../nutch/scoring/webgraph/NodeDumper.java      | 433 +++++++++
 .../nutch/scoring/webgraph/NodeReader.java      | 136 +++
 .../nutch/scoring/webgraph/ScoreUpdater.java    | 253 ++++++
 .../apache/nutch/scoring/webgraph/WebGraph.java | 783 ++++++++++++++++
 .../nutch/scoring/webgraph/package-info.java    |  24 +
 .../nutch/segment/ContentAsTextInputFormat.java | 104 +++
 .../apache/nutch/segment/SegmentChecker.java    | 136 +++
 .../nutch/segment/SegmentMergeFilter.java       |  47 +
 .../nutch/segment/SegmentMergeFilters.java      |  84 ++
 .../org/apache/nutch/segment/SegmentMerger.java | 793 +++++++++++++++++
 .../org/apache/nutch/segment/SegmentPart.java   | 113 +++
 .../org/apache/nutch/segment/SegmentReader.java | 719 +++++++++++++++
 .../org/apache/nutch/segment/package-info.java  |  23 +
 .../org/apache/nutch/service/ConfManager.java   |  39 +
 .../org/apache/nutch/service/JobManager.java    |  44 +
 .../org/apache/nutch/service/NutchReader.java   |  37 +
 .../org/apache/nutch/service/NutchServer.java   | 224 +++++
 .../nutch/service/impl/ConfManagerImpl.java     | 132 +++
 .../apache/nutch/service/impl/JobFactory.java   |  75 ++
 .../nutch/service/impl/JobManagerImpl.java      |  95 ++
 .../apache/nutch/service/impl/JobWorker.java    | 114 +++
 .../apache/nutch/service/impl/LinkReader.java   | 175 ++++
 .../apache/nutch/service/impl/NodeReader.java   | 184 ++++
 .../service/impl/NutchServerPoolExecutor.java   | 131 +++
 .../nutch/service/impl/SequenceReader.java      | 171 ++++
 .../nutch/service/model/request/DbQuery.java    |  56 ++
 .../nutch/service/model/request/JobConfig.java  |  71 ++
 .../service/model/request/NutchConfig.java      |  51 ++
 .../service/model/request/ReaderConfig.java     |  30 +
 .../nutch/service/model/request/SeedList.java   |  93 ++
 .../nutch/service/model/request/SeedUrl.java    |  89 ++
 .../service/model/response/FetchNodeDbInfo.java | 103 +++
 .../nutch/service/model/response/JobInfo.java   | 102 +++
 .../service/model/response/NutchServerInfo.java |  55 ++
 .../service/resources/AbstractResource.java     |  45 +
 .../nutch/service/resources/AdminResource.java  |  85 ++
 .../nutch/service/resources/ConfigResource.java | 137 +++
 .../nutch/service/resources/DbResource.java     | 143 +++
 .../nutch/service/resources/JobResource.java    |  99 +++
 .../nutch/service/resources/ReaderResouce.java  | 177 ++++
 .../nutch/service/resources/SeedResource.java   | 111 +++
 .../nutch/tools/AbstractCommonCrawlFormat.java  | 393 ++++++++
 .../java/org/apache/nutch/tools/Benchmark.java  | 284 ++++++
 .../apache/nutch/tools/CommonCrawlConfig.java   | 147 +++
 .../nutch/tools/CommonCrawlDataDumper.java      | 716 +++++++++++++++
 .../apache/nutch/tools/CommonCrawlFormat.java   |  87 ++
 .../nutch/tools/CommonCrawlFormatFactory.java   |  74 ++
 .../nutch/tools/CommonCrawlFormatJackson.java   | 109 +++
 .../nutch/tools/CommonCrawlFormatJettinson.java | 122 +++
 .../nutch/tools/CommonCrawlFormatSimple.java    | 174 ++++
 .../nutch/tools/CommonCrawlFormatWARC.java      | 286 ++++++
 .../java/org/apache/nutch/tools/DmozParser.java | 391 ++++++++
 .../java/org/apache/nutch/tools/FileDumper.java | 419 +++++++++
 .../org/apache/nutch/tools/FreeGenerator.java   | 214 +++++
 .../org/apache/nutch/tools/ResolveUrls.java     | 204 +++++
 .../java/org/apache/nutch/tools/WARCUtils.java  | 154 ++++
 .../apache/nutch/tools/arc/ArcInputFormat.java  |  51 ++
 .../apache/nutch/tools/arc/ArcRecordReader.java | 299 +++++++
 .../nutch/tools/arc/ArcSegmentCreator.java      | 426 +++++++++
 .../apache/nutch/tools/arc/package-info.java    |  23 +
 .../org/apache/nutch/tools/package-info.java    |  22 +
 .../apache/nutch/tools/warc/WARCExporter.java   | 333 +++++++
 .../apache/nutch/tools/warc/package-info.java   |  23 +
 .../org/apache/nutch/util/CommandRunner.java    | 291 ++++++
 .../apache/nutch/util/CrawlCompletionStats.java | 245 +++++
 .../org/apache/nutch/util/DeflateUtils.java     | 140 +++
 .../java/org/apache/nutch/util/DomUtil.java     | 104 +++
 .../org/apache/nutch/util/DumpFileUtil.java     | 147 +++
 .../org/apache/nutch/util/EncodingDetector.java | 386 ++++++++
 .../java/org/apache/nutch/util/FSUtils.java     | 106 +++
 .../java/org/apache/nutch/util/GZIPUtils.java   | 148 ++++
 .../nutch/util/GenericWritableConfigurable.java |  60 ++
 .../org/apache/nutch/util/HadoopFSUtil.java     |  72 ++
 .../java/org/apache/nutch/util/JexlUtil.java    |  76 ++
 .../java/org/apache/nutch/util/LockUtil.java    |  84 ++
 .../java/org/apache/nutch/util/MimeUtil.java    | 279 ++++++
 .../java/org/apache/nutch/util/NodeWalker.java  | 129 +++
 .../apache/nutch/util/NutchConfiguration.java   | 104 +++
 .../java/org/apache/nutch/util/NutchJob.java    |  30 +
 .../java/org/apache/nutch/util/NutchTool.java   | 109 +++
 .../java/org/apache/nutch/util/ObjectCache.java |  56 ++
 .../apache/nutch/util/PrefixStringMatcher.java  | 119 +++
 .../nutch/util/ProtocolStatusStatistics.java    | 179 ++++
 .../java/org/apache/nutch/util/StringUtil.java  | 155 ++++
 .../apache/nutch/util/SuffixStringMatcher.java  | 114 +++
 .../java/org/apache/nutch/util/TableUtil.java   | 161 ++++
 .../java/org/apache/nutch/util/TimingUtil.java  |  72 ++
 .../apache/nutch/util/TrieStringMatcher.java    | 202 +++++
 .../java/org/apache/nutch/util/URLUtil.java     | 533 +++++++++++
 .../nutch/util/domain/DomainStatistics.java     | 234 +++++
 .../apache/nutch/util/domain/DomainSuffix.java  |  79 ++
 .../nutch/util/domain/DomainSuffixes.java       |  86 ++
 .../nutch/util/domain/DomainSuffixesReader.java | 164 ++++
 .../nutch/util/domain/TopLevelDomain.java       |  67 ++
 .../org/apache/nutch/util/domain/package.html   |  14 +
 .../org/apache/nutch/util/package-info.java     |  22 +
 .../apache/nutch/webui/NutchUiApplication.java  |  75 ++
 .../nutch/webui/NutchUiApplication.properties   |  63 ++
 .../org/apache/nutch/webui/NutchUiServer.java   | 104 +++
 .../apache/nutch/webui/client/NutchClient.java  |  49 +
 .../nutch/webui/client/NutchClientFactory.java  |  52 ++
 .../nutch/webui/client/impl/CrawlingCycle.java  |  82 ++
 .../client/impl/CrawlingCycleListener.java      |  31 +
 .../webui/client/impl/NutchClientImpl.java      |  99 +++
 .../nutch/webui/client/impl/RemoteCommand.java  |  76 ++
 .../webui/client/impl/RemoteCommandBuilder.java |  64 ++
 .../client/impl/RemoteCommandExecutor.java      | 110 +++
 .../client/impl/RemoteCommandsBatchFactory.java |  97 ++
 .../webui/client/model/ConnectionStatus.java    |  21 +
 .../apache/nutch/webui/client/model/Crawl.java  | 126 +++
 .../nutch/webui/client/model/JobConfig.java     |  77 ++
 .../nutch/webui/client/model/JobInfo.java       | 104 +++
 .../nutch/webui/client/model/NutchStatus.java   |  62 ++
 .../nutch/webui/config/CustomDaoFactory.java    |  58 ++
 .../nutch/webui/config/CustomTableCreator.java  |  83 ++
 .../webui/config/NutchGuiConfiguration.java     |  33 +
 .../nutch/webui/config/SpringConfiguration.java |  91 ++
 .../apache/nutch/webui/model/NutchConfig.java   |  24 +
 .../apache/nutch/webui/model/NutchInstance.java | 118 +++
 .../org/apache/nutch/webui/model/SeedList.java  | 106 +++
 .../org/apache/nutch/webui/model/SeedUrl.java   |  96 ++
 .../nutch/webui/pages/AbstractBasePage.html     |  33 +
 .../nutch/webui/pages/AbstractBasePage.java     | 206 +++++
 .../apache/nutch/webui/pages/DashboardPage.html |  52 ++
 .../apache/nutch/webui/pages/DashboardPage.java |  65 ++
 .../apache/nutch/webui/pages/LogOutPage.java    |  21 +
 .../nutch/webui/pages/SchedulingPage.java       |  21 +
 .../apache/nutch/webui/pages/SearchPage.java    |  21 +
 .../nutch/webui/pages/StatisticsPage.java       |  21 +
 .../nutch/webui/pages/UrlsUploadPage.java       |  21 +
 .../nutch/webui/pages/UserSettingsPage.java     |  21 +
 .../webui/pages/assets/NutchUiCssReference.java |  39 +
 .../nutch/webui/pages/assets/nutch-style.css    | 149 ++++
 .../webui/pages/components/ColorEnumLabel.java  |  71 ++
 .../pages/components/ColorEnumLabelBuilder.java |  49 +
 .../pages/components/CpmIteratorAdapter.java    |  41 +
 .../nutch/webui/pages/crawls/CrawlPanel.html    |  58 ++
 .../nutch/webui/pages/crawls/CrawlPanel.java    |  98 ++
 .../nutch/webui/pages/crawls/CrawlsPage.html    |  90 ++
 .../nutch/webui/pages/crawls/CrawlsPage.java    | 139 +++
 .../webui/pages/instances/InstancePanel.html    |  46 +
 .../webui/pages/instances/InstancePanel.java    |  62 ++
 .../webui/pages/instances/InstancesPage.html    |  66 ++
 .../webui/pages/instances/InstancesPage.java    | 127 +++
 .../nutch/webui/pages/menu/VerticalMenu.html    |  48 +
 .../nutch/webui/pages/menu/VerticalMenu.java    |  27 +
 .../nutch/webui/pages/seed/SeedListsPage.html   |  75 ++
 .../nutch/webui/pages/seed/SeedListsPage.java   |  79 ++
 .../apache/nutch/webui/pages/seed/SeedPage.html |  91 ++
 .../apache/nutch/webui/pages/seed/SeedPage.java | 153 ++++
 .../webui/pages/settings/SettingsPage.html      |  43 +
 .../webui/pages/settings/SettingsPage.java      |  59 ++
 .../nutch/webui/service/CrawlService.java       |  33 +
 .../webui/service/NutchInstanceService.java     |  33 +
 .../nutch/webui/service/NutchService.java       |  31 +
 .../nutch/webui/service/SeedListService.java    |  33 +
 .../webui/service/impl/CrawlServiceImpl.java    | 132 +++
 .../service/impl/NutchInstanceServiceImpl.java  |  76 ++
 .../webui/service/impl/NutchServiceImpl.java    |  82 ++
 .../webui/service/impl/SeedListServiceImpl.java |  77 ++
 nutch-core/src/main/java/overview.html          |   9 +
 .../nutch/crawl/ContinuousCrawlTestUtil.java    | 270 ++++++
 .../org/apache/nutch/crawl/CrawlDBTestUtil.java | 179 ++++
 .../nutch/crawl/CrawlDbUpdateTestDriver.java    | 138 +++
 .../apache/nutch/crawl/CrawlDbUpdateUtil.java   | 166 ++++
 .../org/apache/nutch/crawl/DummyWritable.java   |  32 +
 .../nutch/crawl/TODOTestCrawlDbStates.java      | 171 ++++
 .../nutch/crawl/TestAdaptiveFetchSchedule.java  | 121 +++
 .../apache/nutch/crawl/TestCrawlDbFilter.java   | 148 ++++
 .../apache/nutch/crawl/TestCrawlDbMerger.java   | 163 ++++
 .../apache/nutch/crawl/TestCrawlDbStates.java   | 569 ++++++++++++
 .../org/apache/nutch/crawl/TestGenerator.java   | 373 ++++++++
 .../org/apache/nutch/crawl/TestInjector.java    | 184 ++++
 .../apache/nutch/crawl/TestLinkDbMerger.java    | 160 ++++
 .../nutch/crawl/TestSignatureFactory.java       |  35 +
 .../org/apache/nutch/fetcher/TestFetcher.java   | 210 +++++
 .../nutch/indexer/TestIndexerMapReduce.java     | 190 ++++
 .../nutch/indexer/TestIndexingFilters.java      | 113 +++
 .../org/apache/nutch/metadata/TestMetadata.java | 281 ++++++
 .../metadata/TestSpellCheckedMetadata.java      | 303 +++++++
 .../org/apache/nutch/net/TestURLFilters.java    |  44 +
 .../apache/nutch/net/TestURLNormalizers.java    |  86 ++
 .../nutch/parse/TestOutlinkExtractor.java       |  99 +++
 .../org/apache/nutch/parse/TestParseData.java   |  58 ++
 .../org/apache/nutch/parse/TestParseText.java   |  34 +
 .../apache/nutch/parse/TestParserFactory.java   | 108 +++
 .../apache/nutch/parse/parse-plugin-test.xml    |  58 ++
 .../nutch/plugin/HelloWorldExtension.java       |  36 +
 .../org/apache/nutch/plugin/ITestExtension.java |  27 +
 .../apache/nutch/plugin/SimpleTestPlugin.java   |  57 ++
 .../apache/nutch/plugin/TestPluginSystem.java   | 305 +++++++
 .../org/apache/nutch/protocol/TestContent.java  |  94 ++
 .../nutch/protocol/TestProtocolFactory.java     |  88 ++
 .../apache/nutch/segment/TestSegmentMerger.java | 131 +++
 .../segment/TestSegmentMergerCrawlDatums.java   | 427 +++++++++
 .../apache/nutch/service/TestNutchServer.java   |  65 ++
 .../org/apache/nutch/test/IntegrationTest.java  |   6 +
 .../java/org/apache/nutch/test/TestUtils.java   |  29 +
 .../nutch/tools/TestCommonCrawlDataDumper.java  | 126 +++
 .../tools/proxy/AbstractTestbedHandler.java     |  49 +
 .../apache/nutch/tools/proxy/DelayHandler.java  |  56 ++
 .../apache/nutch/tools/proxy/FakeHandler.java   | 102 +++
 .../nutch/tools/proxy/LogDebugHandler.java      |  64 ++
 .../nutch/tools/proxy/NotFoundHandler.java      |  40 +
 .../apache/nutch/tools/proxy/ProxyTestbed.java  | 156 ++++
 .../nutch/tools/proxy/SegmentHandler.java       | 255 ++++++
 .../apache/nutch/tools/proxy/package-info.java  |  22 +
 .../org/apache/nutch/util/DumpFileUtilTest.java |  68 ++
 .../apache/nutch/util/TestEncodingDetector.java |  90 ++
 .../org/apache/nutch/util/TestGZIPUtils.java    | 241 +++++
 .../org/apache/nutch/util/TestMimeUtil.java     | 135 +++
 .../org/apache/nutch/util/TestNodeWalker.java   | 107 +++
 .../nutch/util/TestPrefixStringMatcher.java     | 115 +++
 .../org/apache/nutch/util/TestStringUtil.java   |  61 ++
 .../nutch/util/TestSuffixStringMatcher.java     | 114 +++
 .../org/apache/nutch/util/TestTableUtil.java    |  75 ++
 .../java/org/apache/nutch/util/TestURLUtil.java | 281 ++++++
 .../apache/nutch/util/WritableTestUtils.java    |  55 ++
 nutch-core/src/test/resources/crawl-tests.xml   |  62 ++
 .../src/test/resources/domain-urlfilter.txt     |  22 +
 .../resources/fetch-test-site/dup_of_pagea.html |  11 +
 .../resources/fetch-test-site/exception.html    |  13 +
 .../test/resources/fetch-test-site/index.html   |  13 +
 .../fetch-test-site/nested_spider_trap.html     |  23 +
 .../test/resources/fetch-test-site/pagea.html   |  11 +
 .../test/resources/fetch-test-site/pageb.html   |  11 +
 .../test/resources/fetch-test-site/robots.txt   |   0
 nutch-core/src/test/resources/filter-all.txt    |   7 +
 nutch-core/src/test/resources/log4j.properties  |   7 +
 nutch-core/src/test/resources/nutch-site.xml    |  19 +
 .../src/test/resources/test-mime-util/test.xlsx | Bin 0 -> 3950 bytes
 .../20150309101625/content/part-00000/.data.crc | Bin 0 -> 124 bytes
 .../content/part-00000/.index.crc               | Bin 0 -> 12 bytes
 .../20150309101625/content/part-00000/data      | Bin 0 -> 14452 bytes
 .../20150309101625/content/part-00000/index     | Bin 0 -> 217 bytes
 .../crawl_fetch/part-00000/.data.crc            | Bin 0 -> 12 bytes
 .../crawl_fetch/part-00000/.index.crc           | Bin 0 -> 12 bytes
 .../20150309101625/crawl_fetch/part-00000/data  | Bin 0 -> 293 bytes
 .../20150309101625/crawl_fetch/part-00000/index | Bin 0 -> 217 bytes
 .../crawl_generate/.part-00000.crc              | Bin 0 -> 12 bytes
 .../20150309101625/crawl_generate/part-00000    | Bin 0 -> 169 bytes
 .../20150309101625/crawl_parse/.part-00000.crc  | Bin 0 -> 68 bytes
 .../20150309101625/crawl_parse/part-00000       | Bin 0 -> 7627 bytes
 .../parse_data/part-00000/.data.crc             | Bin 0 -> 24 bytes
 .../parse_data/part-00000/.index.crc            | Bin 0 -> 12 bytes
 .../20150309101625/parse_data/part-00000/data   | Bin 0 -> 1985 bytes
 .../20150309101625/parse_data/part-00000/index  | Bin 0 -> 217 bytes
 .../parse_text/part-00000/.data.crc             | Bin 0 -> 60 bytes
 .../parse_text/part-00000/.index.crc            | Bin 0 -> 12 bytes
 .../20150309101625/parse_text/part-00000/data   | Bin 0 -> 6554 bytes
 .../20150309101625/parse_text/part-00000/index  | Bin 0 -> 217 bytes
 .../20150309101656/content/part-00000/.data.crc | Bin 0 -> 3372 bytes
 .../content/part-00000/.index.crc               | Bin 0 -> 12 bytes
 .../20150309101656/content/part-00000/data      | Bin 0 -> 430250 bytes
 .../20150309101656/content/part-00000/index     | Bin 0 -> 220 bytes
 .../crawl_fetch/part-00000/.data.crc            | Bin 0 -> 104 bytes
 .../crawl_fetch/part-00000/.index.crc           | Bin 0 -> 12 bytes
 .../20150309101656/crawl_fetch/part-00000/data  | Bin 0 -> 12121 bytes
 .../20150309101656/crawl_fetch/part-00000/index | Bin 0 -> 220 bytes
 .../crawl_generate/.part-00000.crc              | Bin 0 -> 52 bytes
 .../20150309101656/crawl_generate/part-00000    | Bin 0 -> 5590 bytes
 .../20150309101656/crawl_parse/.part-00000.crc  | Bin 0 -> 1652 bytes
 .../20150309101656/crawl_parse/part-00000       | Bin 0 -> 210047 bytes
 .../parse_data/part-00000/.data.crc             | Bin 0 -> 460 bytes
 .../parse_data/part-00000/.index.crc            | Bin 0 -> 12 bytes
 .../20150309101656/parse_data/part-00000/data   | Bin 0 -> 57355 bytes
 .../20150309101656/parse_data/part-00000/index  | Bin 0 -> 220 bytes
 .../parse_text/part-00000/.data.crc             | Bin 0 -> 1260 bytes
 .../parse_text/part-00000/.index.crc            | Bin 0 -> 12 bytes
 .../20150309101656/parse_text/part-00000/data   | Bin 0 -> 159920 bytes
 .../20150309101656/parse_text/part-00000/index  | Bin 0 -> 220 bytes
 nutch-plugins/build-plugin.xml                  | 255 ++++++
 nutch-plugins/build.xml                         | 213 +++++
 nutch-plugins/creativecommons/README.txt        |   1 +
 nutch-plugins/creativecommons/build.xml         |  28 +
 .../creativecommons/conf/crawl-urlfilter.txt    |  18 +
 .../creativecommons/conf/nutch-site.xml         |  50 ++
 nutch-plugins/creativecommons/ivy.xml           |  41 +
 nutch-plugins/creativecommons/plugin.xml        |  48 +
 nutch-plugins/creativecommons/pom.xml           |  38 +
 .../creativecommons/nutch/CCIndexingFilter.java | 124 +++
 .../creativecommons/nutch/CCParseFilter.java    | 300 +++++++
 .../java/org/creativecommons/nutch/package.html |   5 +
 .../nutch/TestCCParseFilter.java                |  73 ++
 .../src/test/resources/anchor.html              |   9 +
 .../creativecommons/src/test/resources/rdf.html |  35 +
 .../creativecommons/src/test/resources/rel.html |   6 +
 nutch-plugins/feed/build.xml                    |  45 +
 nutch-plugins/feed/ivy.xml                      |  43 +
 nutch-plugins/feed/plugin.xml                   |  49 +
 nutch-plugins/feed/pom.xml                      |  45 +
 .../nutch/indexer/feed/FeedIndexingFilter.java  | 129 +++
 .../apache/nutch/indexer/feed/package-info.java |  22 +
 .../org/apache/nutch/parse/feed/FeedParser.java | 374 ++++++++
 .../apache/nutch/parse/feed/package-info.java   |  22 +
 .../apache/nutch/parse/feed/TestFeedParser.java | 124 +++
 .../feed/src/test/resources/rsstest.rss         |  36 +
 nutch-plugins/headings/build.xml                |  22 +
 nutch-plugins/headings/ivy.xml                  |  41 +
 nutch-plugins/headings/plugin.xml               |  45 +
 nutch-plugins/headings/pom.xml                  |  38 +
 .../parse/headings/HeadingsParseFilter.java     | 124 +++
 .../nutch/parse/headings/package-info.java      |  22 +
 nutch-plugins/index-anchor/build.xml            |  22 +
 nutch-plugins/index-anchor/ivy.xml              |  41 +
 nutch-plugins/index-anchor/plugin.xml           |  38 +
 nutch-plugins/index-anchor/pom.xml              |  38 +
 .../indexer/anchor/AnchorIndexingFilter.java    | 107 +++
 .../apache/nutch/indexer/anchor/package.html    |   5 +
 .../anchor/TestAnchorIndexingFilter.java        |  67 ++
 nutch-plugins/index-basic/build.xml             |  22 +
 nutch-plugins/index-basic/ivy.xml               |  41 +
 nutch-plugins/index-basic/plugin.xml            |  42 +
 nutch-plugins/index-basic/pom.xml               |  38 +
 .../indexer/basic/BasicIndexingFilter.java      | 158 ++++
 .../org/apache/nutch/indexer/basic/package.html |   5 +
 .../indexer/basic/TestBasicIndexingFilter.java  |  99 +++
 nutch-plugins/index-geoip/build-ivy.xml         |  54 ++
 nutch-plugins/index-geoip/build.xml             |  27 +
 nutch-plugins/index-geoip/ivy.xml               |  46 +
 nutch-plugins/index-geoip/plugin.xml            |  51 ++
 nutch-plugins/index-geoip/pom.xml               |  55 ++
 .../indexer/geoip/GeoIPDocumentCreator.java     | 210 +++++
 .../indexer/geoip/GeoIPIndexingFilter.java      | 241 +++++
 .../nutch/indexer/geoip/package-info.java       |  28 +
 nutch-plugins/index-links/build.xml             |  22 +
 nutch-plugins/index-links/ivy.xml               |  41 +
 nutch-plugins/index-links/plugin.xml            |  41 +
 nutch-plugins/index-links/pom.xml               |  38 +
 .../indexer/links/LinksIndexingFilter.java      | 167 ++++
 .../indexer/links/TestLinksIndexingFilter.java  | 218 +++++
 .../org/apache/nutch/parse/TestOutlinks.java    |  54 ++
 nutch-plugins/index-metadata/build.xml          |  22 +
 nutch-plugins/index-metadata/ivy.xml            |  41 +
 nutch-plugins/index-metadata/plugin.xml         |  42 +
 nutch-plugins/index-metadata/pom.xml            |  38 +
 .../nutch/indexer/metadata/MetadataIndexer.java | 104 +++
 .../nutch/indexer/metadata/package-info.java    |  23 +
 nutch-plugins/index-more/build.xml              |  22 +
 nutch-plugins/index-more/ivy.xml                |  41 +
 nutch-plugins/index-more/plugin.xml             |  42 +
 nutch-plugins/index-more/pom.xml                |  38 +
 .../nutch/indexer/more/MoreIndexingFilter.java  | 344 +++++++
 .../org/apache/nutch/indexer/more/package.html  |   6 +
 .../indexer/more/TestMoreIndexingFilter.java    | 123 +++
 nutch-plugins/index-replace/README.txt          |  95 ++
 nutch-plugins/index-replace/build.xml           |  55 ++
 nutch-plugins/index-replace/ivy.xml             |  41 +
 nutch-plugins/index-replace/plugin.xml          |  22 +
 nutch-plugins/index-replace/pom.xml             |  50 ++
 .../nutch/indexer/replace/FieldReplacer.java    | 196 ++++
 .../nutch/indexer/replace/ReplaceIndexer.java   | 330 +++++++
 .../nutch/indexer/replace/package-info.java     |  22 +
 .../nutch/indexer/replace/TestIndexReplace.java | 456 ++++++++++
 .../src/test/resources/testIndexReplace.html    |  12 +
 nutch-plugins/index-static/build.xml            |  22 +
 nutch-plugins/index-static/ivy.xml              |  41 +
 nutch-plugins/index-static/plugin.xml           |  42 +
 nutch-plugins/index-static/pom.xml              |  38 +
 .../indexer/staticfield/StaticFieldIndexer.java | 143 +++
 .../nutch/indexer/staticfield/package.html      |   5 +
 .../staticfield/TestStaticFieldIndexerTest.java | 194 ++++
 nutch-plugins/indexer-cloudsearch/README.md     |  58 ++
 nutch-plugins/indexer-cloudsearch/build.xml     |  22 +
 .../indexer-cloudsearch/createCSDomain.sh       |  22 +
 nutch-plugins/indexer-cloudsearch/ivy.xml       |  41 +
 nutch-plugins/indexer-cloudsearch/plugin.xml    |  50 ++
 nutch-plugins/indexer-cloudsearch/pom.xml       |  45 +
 .../cloudsearch/CloudSearchConstants.java       |  27 +
 .../cloudsearch/CloudSearchIndexWriter.java     | 382 ++++++++
 .../cloudsearch/CloudSearchUtils.java           |  73 ++
 nutch-plugins/indexer-dummy/build.xml           |  22 +
 nutch-plugins/indexer-dummy/ivy.xml             |  41 +
 nutch-plugins/indexer-dummy/plugin.xml          |  38 +
 nutch-plugins/indexer-dummy/pom.xml             |  38 +
 .../indexwriter/dummy/DummyIndexWriter.java     | 103 +++
 .../nutch/indexwriter/dummy/package-info.java   |  23 +
 nutch-plugins/indexer-elastic/build-ivy.xml     |  54 ++
 nutch-plugins/indexer-elastic/build.xml         |  22 +
 .../indexer-elastic/howto_upgrade_es.txt        |   6 +
 nutch-plugins/indexer-elastic/ivy.xml           |  43 +
 nutch-plugins/indexer-elastic/plugin.xml        |  71 ++
 nutch-plugins/indexer-elastic/pom.xml           |  45 +
 .../indexwriter/elastic/ElasticConstants.java   |  28 +
 .../indexwriter/elastic/ElasticIndexWriter.java | 279 ++++++
 .../nutch/indexwriter/elastic/package-info.java |  22 +
 nutch-plugins/indexer-solr/build-ivy.xml        |  54 ++
 nutch-plugins/indexer-solr/build.xml            |  22 +
 nutch-plugins/indexer-solr/ivy.xml              |  44 +
 nutch-plugins/indexer-solr/plugin.xml           |  48 +
 nutch-plugins/indexer-solr/pom.xml              |  55 ++
 .../nutch/indexwriter/solr/SolrConstants.java   |  56 ++
 .../nutch/indexwriter/solr/SolrIndexWriter.java | 277 ++++++
 .../indexwriter/solr/SolrMappingReader.java     | 147 +++
 .../nutch/indexwriter/solr/SolrUtils.java       |  97 ++
 .../nutch/indexwriter/solr/package-info.java    |  22 +
 nutch-plugins/language-identifier/build.xml     |  38 +
 nutch-plugins/language-identifier/ivy.xml       |  41 +
 nutch-plugins/language-identifier/plugin.xml    |  49 +
 nutch-plugins/language-identifier/pom.xml       |  38 +
 .../nutch/analysis/lang/HTMLLanguageParser.java | 320 +++++++
 .../analysis/lang/LanguageIndexingFilter.java   |  89 ++
 .../nutch/analysis/lang/langmappings.properties | 188 ++++
 .../org/apache/nutch/analysis/lang/package.html |   6 +
 .../analysis/lang/TestHTMLLanguageParser.java   | 149 ++++
 .../java/org/apache/nutch/analysis/lang/da.test | 108 +++
 .../java/org/apache/nutch/analysis/lang/de.test | 104 +++
 .../java/org/apache/nutch/analysis/lang/el.test | 109 +++
 .../java/org/apache/nutch/analysis/lang/en.test | 105 +++
 .../java/org/apache/nutch/analysis/lang/es.test | 107 +++
 .../java/org/apache/nutch/analysis/lang/fi.test | 106 +++
 .../java/org/apache/nutch/analysis/lang/fr.test | 105 +++
 .../java/org/apache/nutch/analysis/lang/it.test | 109 +++
 .../java/org/apache/nutch/analysis/lang/nl.test | 105 +++
 .../java/org/apache/nutch/analysis/lang/pt.test | 105 +++
 .../java/org/apache/nutch/analysis/lang/sv.test | 108 +++
 .../nutch/analysis/lang/test-referencial.txt    |  10 +
 nutch-plugins/lib-htmlunit/build-ivy.xml        |  54 ++
 nutch-plugins/lib-htmlunit/build.xml            |  28 +
 nutch-plugins/lib-htmlunit/ivy.xml              |  52 ++
 nutch-plugins/lib-htmlunit/plugin.xml           | 166 ++++
 nutch-plugins/lib-htmlunit/pom.xml              |  55 ++
 .../protocol/htmlunit/HtmlUnitWebDriver.java    | 189 ++++
 .../htmlunit/HtmlUnitWebWindowListener.java     |  53 ++
 nutch-plugins/lib-http/build.xml                |  22 +
 nutch-plugins/lib-http/ivy.xml                  |  41 +
 nutch-plugins/lib-http/plugin.xml               |  33 +
 nutch-plugins/lib-http/pom.xml                  |  38 +
 .../protocol/http/api/BlockedException.java     |  26 +
 .../nutch/protocol/http/api/HttpBase.java       | 587 ++++++++++++
 .../nutch/protocol/http/api/HttpException.java  |  40 +
 .../protocol/http/api/HttpRobotRulesParser.java | 167 ++++
 .../apache/nutch/protocol/http/api/package.html |   6 +
 .../protocol/http/api/TestRobotRulesParser.java | 123 +++
 nutch-plugins/lib-nekohtml/build.xml            |  30 +
 nutch-plugins/lib-nekohtml/ivy.xml              |  42 +
 nutch-plugins/lib-nekohtml/plugin.xml           |  38 +
 nutch-plugins/lib-nekohtml/pom.xml              |  45 +
 nutch-plugins/lib-regex-filter/build.xml        |  22 +
 nutch-plugins/lib-regex-filter/ivy.xml          |  41 +
 nutch-plugins/lib-regex-filter/plugin.xml       |  33 +
 nutch-plugins/lib-regex-filter/pom.xml          |  54 ++
 .../apache/nutch/urlfilter/api/RegexRule.java   | 102 +++
 .../nutch/urlfilter/api/RegexURLFilterBase.java | 315 +++++++
 .../nutch/urlfilter/api/package-info.java       |  23 +
 .../urlfilter/api/RegexURLFilterBaseTest.java   | 134 +++
 nutch-plugins/lib-selenium/build-ivy.xml        |  54 ++
 nutch-plugins/lib-selenium/build.xml            |  28 +
 .../lib-selenium/howto_upgrade_selenium.txt     |  15 +
 nutch-plugins/lib-selenium/ivy.xml              |  52 ++
 nutch-plugins/lib-selenium/plugin.xml           | 175 ++++
 nutch-plugins/lib-selenium/pom.xml              |  49 +
 .../nutch/protocol/selenium/HttpWebClient.java  | 236 +++++
 nutch-plugins/lib-xml/build.xml                 |  36 +
 nutch-plugins/lib-xml/ivy.xml                   |  44 +
 nutch-plugins/lib-xml/plugin.xml                |  65 ++
 nutch-plugins/lib-xml/pom.xml                   |  38 +
 nutch-plugins/microformats-reltag/build.xml     |  27 +
 nutch-plugins/microformats-reltag/ivy.xml       |  41 +
 nutch-plugins/microformats-reltag/plugin.xml    |  49 +
 nutch-plugins/microformats-reltag/pom.xml       |  38 +
 .../reltag/RelTagIndexingFilter.java            |  77 ++
 .../nutch/microformats/reltag/RelTagParser.java | 148 ++++
 .../nutch/microformats/reltag/package.html      |   8 +
 nutch-plugins/mimetype-filter/build.xml         |  28 +
 nutch-plugins/mimetype-filter/ivy.xml           |  41 +
 nutch-plugins/mimetype-filter/plugin.xml        |  37 +
 nutch-plugins/mimetype-filter/pom.xml           |  38 +
 .../indexer/filter/MimeTypeIndexingFilter.java  | 273 ++++++
 .../filter/MimeTypeIndexingFilterTest.java      | 114 +++
 .../src/test/resources/allow-images.txt         |  34 +
 .../src/test/resources/block-html.txt           |  34 +
 nutch-plugins/nutch-extensionpoints/build.xml   |  30 +
 nutch-plugins/nutch-extensionpoints/ivy.xml     |  41 +
 nutch-plugins/nutch-extensionpoints/plugin.xml  |  67 ++
 nutch-plugins/nutch-extensionpoints/pom.xml     |  38 +
 nutch-plugins/parse-ext/build.xml               |  32 +
 nutch-plugins/parse-ext/command                 |  24 +
 nutch-plugins/parse-ext/ivy.xml                 |  41 +
 nutch-plugins/parse-ext/plugin.xml              |  60 ++
 nutch-plugins/parse-ext/pom.xml                 |  38 +
 .../org/apache/nutch/parse/ext/ExtParser.java   | 183 ++++
 .../apache/nutch/parse/ext/package-info.java    |  22 +
 .../apache/nutch/parse/ext/TestExtParser.java   | 130 +++
 nutch-plugins/parse-html/build.xml              |  40 +
 nutch-plugins/parse-html/ivy.xml                |  42 +
 nutch-plugins/parse-html/plugin.xml             |  48 +
 nutch-plugins/parse-html/pom.xml                |  49 +
 .../org/apache/nutch/parse/html/DOMBuilder.java | 766 ++++++++++++++++
 .../nutch/parse/html/DOMContentUtils.java       | 400 +++++++++
 .../nutch/parse/html/HTMLMetaProcessor.java     | 214 +++++
 .../org/apache/nutch/parse/html/HtmlParser.java | 352 ++++++++
 .../parse/html/XMLCharacterRecognizer.java      | 112 +++
 .../org/apache/nutch/parse/html/package.html    |   5 +
 .../nutch/parse/html/TestDOMContentUtils.java   | 347 ++++++++
 .../apache/nutch/parse/html/TestHtmlParser.java | 122 +++
 .../parse/html/TestRobotsMetaProcessor.java     | 155 ++++
 nutch-plugins/parse-js/build.xml                |  22 +
 nutch-plugins/parse-js/ivy.xml                  |  41 +
 nutch-plugins/parse-js/plugin.xml               |  53 ++
 nutch-plugins/parse-js/pom.xml                  |  38 +
 .../apache/nutch/parse/js/JSParseFilter.java    | 301 +++++++
 .../org/apache/nutch/parse/js/package-info.java |  23 +
 nutch-plugins/parse-metatags/README.txt         |  17 +
 nutch-plugins/parse-metatags/build.xml          |  37 +
 nutch-plugins/parse-metatags/ivy.xml            |  41 +
 nutch-plugins/parse-metatags/plugin.xml         |  22 +
 nutch-plugins/parse-metatags/pom.xml            |  38 +
 .../nutch/parse/metatags/MetaTagsParser.java    | 124 +++
 .../nutch/parse/metatags/package-info.java      |  24 +
 .../nutch/parse/metatags/TestMetatagParser.java | 104 +++
 .../src/test/resources/testMetatags.html        |   9 +
 .../test/resources/testMultivalueMetatags.html  |  12 +
 nutch-plugins/parse-replace/README.txt          |  91 ++
 nutch-plugins/parse-replace/build.xml           |  37 +
 nutch-plugins/parse-replace/ivy.xml             |  41 +
 nutch-plugins/parse-replace/plugin.xml          |  22 +
 nutch-plugins/parse-replace/pom.xml             |  38 +
 .../nutch/parse/replace/ReplaceParser.java      |  74 ++
 .../nutch/parse/replace/package-info.java       |  22 +
 .../nutch/parse/replace/TestParseReplace.java   |  68 ++
 .../src/test/resources/testParseReplace.html    |  11 +
 nutch-plugins/parse-swf/build.xml               |  38 +
 nutch-plugins/parse-swf/ivy.xml                 |  41 +
 nutch-plugins/parse-swf/lib/javaswf-LICENSE.txt |  33 +
 nutch-plugins/parse-swf/lib/javaswf.jar         | Bin 0 -> 125369 bytes
 nutch-plugins/parse-swf/plugin.xml              |  44 +
 nutch-plugins/parse-swf/pom.xml                 |  46 +
 .../org/apache/nutch/parse/swf/SWFParser.java   | 685 ++++++++++++++
 .../apache/nutch/parse/swf/package-info.java    |  22 +
 .../apache/nutch/parse/swf/TestSWFParser.java   |  94 ++
 .../parse-swf/src/test/resources/test1.swf      | Bin 0 -> 21054 bytes
 .../parse-swf/src/test/resources/test1.txt      |  60 ++
 .../parse-swf/src/test/resources/test2.swf      | Bin 0 -> 42534 bytes
 .../parse-swf/src/test/resources/test2.txt      |   5 +
 .../parse-swf/src/test/resources/test3.swf      | Bin 0 -> 51562 bytes
 .../parse-swf/src/test/resources/test3.txt      |  11 +
 nutch-plugins/parse-tika/build-ivy.xml          |  54 ++
 nutch-plugins/parse-tika/build.xml              |  55 ++
 nutch-plugins/parse-tika/howto_upgrade_tika.txt |   8 +
 nutch-plugins/parse-tika/ivy.xml                |  46 +
 nutch-plugins/parse-tika/plugin.xml             | 136 +++
 nutch-plugins/parse-tika/pom.xml                |  54 ++
 .../tika/BoilerpipeExtractorRepository.java     |  62 ++
 .../org/apache/nutch/parse/tika/DOMBuilder.java | 794 +++++++++++++++++
 .../nutch/parse/tika/DOMContentUtils.java       | 402 +++++++++
 .../nutch/parse/tika/HTMLMetaProcessor.java     | 214 +++++
 .../org/apache/nutch/parse/tika/TikaParser.java | 286 ++++++
 .../parse/tika/XMLCharacterRecognizer.java      | 112 +++
 .../apache/nutch/parse/tika/package-info.java   |  23 +
 .../apache/nutch/tika/TestDOMContentUtils.java  | 337 +++++++
 .../org/apache/nutch/tika/TestFeedParser.java   | 121 +++
 .../apache/nutch/tika/TestImageMetadata.java    |  67 ++
 .../org/apache/nutch/tika/TestMSWordParser.java |  92 ++
 .../org/apache/nutch/tika/TestOOParser.java     | 107 +++
 .../org/apache/nutch/tika/TestPdfParser.java    |  73 ++
 .../org/apache/nutch/tika/TestRTFParser.java    |  81 ++
 .../nutch/tika/TestRobotsMetaProcessor.java     | 156 ++++
 .../parse-tika/src/test/resources/encrypted.pdf | Bin 0 -> 3431 bytes
 .../parse-tika/src/test/resources/nutch.html    | 519 +++++++++++
 .../src/test/resources/nutch_logo_tm.gif        | Bin 0 -> 2747 bytes
 .../parse-tika/src/test/resources/ootest.odt    | Bin 0 -> 20753 bytes
 .../parse-tika/src/test/resources/ootest.sxw    | Bin 0 -> 20125 bytes
 .../parse-tika/src/test/resources/ootest.txt    |  30 +
 .../parse-tika/src/test/resources/pdftest.pdf   | 157 ++++
 .../parse-tika/src/test/resources/rsstest.rss   |  37 +
 .../parse-tika/src/test/resources/test.rtf      |  17 +
 .../parse-tika/src/test/resources/word97.doc    | Bin 0 -> 8192 bytes
 nutch-plugins/parse-zip/build.xml               |  38 +
 nutch-plugins/parse-zip/ivy.xml                 |  41 +
 nutch-plugins/parse-zip/plugin.xml              |  46 +
 nutch-plugins/parse-zip/pom.xml                 |  38 +
 .../org/apache/nutch/parse/zip/ZipParser.java   | 144 +++
 .../nutch/parse/zip/ZipTextExtractor.java       | 120 +++
 .../apache/nutch/parse/zip/package-info.java    |  22 +
 .../apache/nutch/parse/zip/TestZipParser.java   |  71 ++
 .../parse-zip/src/test/resources/test.zip       | Bin 0 -> 182 bytes
 .../parsefilter-naivebayes/build-ivy.xml        |  54 ++
 nutch-plugins/parsefilter-naivebayes/build.xml  |  22 +
 nutch-plugins/parsefilter-naivebayes/ivy.xml    |  49 +
 nutch-plugins/parsefilter-naivebayes/plugin.xml |  56 ++
 nutch-plugins/parsefilter-naivebayes/pom.xml    |  38 +
 .../nutch/parsefilter/naivebayes/Classify.java  | 120 +++
 .../naivebayes/NaiveBayesParseFilter.java       | 197 ++++
 .../nutch/parsefilter/naivebayes/Train.java     | 148 ++++
 .../parsefilter/naivebayes/package-info.java    |  28 +
 nutch-plugins/parsefilter-regex/build.xml       |  27 +
 nutch-plugins/parsefilter-regex/ivy.xml         |  37 +
 nutch-plugins/parsefilter-regex/plugin.xml      |  42 +
 nutch-plugins/parsefilter-regex/pom.xml         |  38 +
 .../parsefilter/regex/RegexParseFilter.java     | 199 +++++
 .../nutch/parsefilter/regex/package-info.java   |  23 +
 .../parsefilter/regex/TestRegexParseFilter.java |  77 ++
 .../src/test/resources/regex-parsefilter.txt    |  10 +
 nutch-plugins/plugin.dtd                        | 206 +++++
 nutch-plugins/plugin/pom.xml                    |  38 +
 nutch-plugins/pom.xml                           | 164 ++++
 nutch-plugins/protocol-file/build.xml           |  29 +
 nutch-plugins/protocol-file/ivy.xml             |  41 +
 nutch-plugins/protocol-file/plugin.xml          |  46 +
 nutch-plugins/protocol-file/pom.xml             |  38 +
 .../org/apache/nutch/protocol/file/File.java    | 228 +++++
 .../apache/nutch/protocol/file/FileError.java   |  36 +
 .../nutch/protocol/file/FileException.java      |  40 +
 .../nutch/protocol/file/FileResponse.java       | 317 +++++++
 .../org/apache/nutch/protocol/file/package.html |   5 +
 .../nutch/protocol/file/TestProtocolFile.java   |  99 +++
 .../src/test/resources/testprotocolfile.txt     |   1 +
 .../resources/testprotocolfile_(encoded).txt    |   1 +
 nutch-plugins/protocol-ftp/build.xml            |  22 +
 nutch-plugins/protocol-ftp/ivy.xml              |  42 +
 nutch-plugins/protocol-ftp/plugin.xml           |  46 +
 nutch-plugins/protocol-ftp/pom.xml              |  38 +
 .../org/apache/nutch/protocol/ftp/Client.java   | 595 +++++++++++++
 .../java/org/apache/nutch/protocol/ftp/Ftp.java | 267 ++++++
 .../org/apache/nutch/protocol/ftp/FtpError.java |  36 +
 .../apache/nutch/protocol/ftp/FtpException.java |  46 +
 .../ftp/FtpExceptionBadSystResponse.java        |  29 +
 .../FtpExceptionCanNotHaveDataConnection.java   |  29 +
 ...ExceptionControlClosedByForcedDataClose.java |  30 +
 .../ftp/FtpExceptionUnknownForcedDataClose.java |  30 +
 .../apache/nutch/protocol/ftp/FtpResponse.java  | 521 +++++++++++
 .../nutch/protocol/ftp/FtpRobotRulesParser.java | 121 +++
 .../protocol/ftp/PrintCommandListener.java      |  71 ++
 .../org/apache/nutch/protocol/ftp/package.html  |   5 +
 nutch-plugins/protocol-htmlunit/build.xml       |  37 +
 nutch-plugins/protocol-htmlunit/ivy.xml         |  38 +
 nutch-plugins/protocol-htmlunit/plugin.xml      |  51 ++
 nutch-plugins/protocol-htmlunit/pom.xml         |  51 ++
 .../apache/nutch/protocol/htmlunit/Http.java    |  63 ++
 .../nutch/protocol/htmlunit/HttpResponse.java   | 573 ++++++++++++
 .../apache/nutch/protocol/htmlunit/package.html |  21 +
 nutch-plugins/protocol-http/build.xml           |  50 ++
 nutch-plugins/protocol-http/ivy.xml             |  41 +
 nutch-plugins/protocol-http/jsp/basic-http.jsp  |  44 +
 nutch-plugins/protocol-http/jsp/brokenpage.jsp  |  47 +
 nutch-plugins/protocol-http/jsp/redirect301.jsp |  49 +
 nutch-plugins/protocol-http/jsp/redirect302.jsp |  49 +
 nutch-plugins/protocol-http/plugin.xml          |  51 ++
 nutch-plugins/protocol-http/pom.xml             |  57 ++
 .../org/apache/nutch/protocol/http/Http.java    |  73 ++
 .../nutch/protocol/http/HttpResponse.java       | 558 ++++++++++++
 .../org/apache/nutch/protocol/http/package.html |   5 +
 .../src/test/conf/nutch-site-test.xml           |  52 ++
 .../nutch/protocol/http/TestProtocolHttp.java   | 140 +++
 nutch-plugins/protocol-httpclient/build.xml     |  45 +
 nutch-plugins/protocol-httpclient/ivy.xml       |  42 +
 nutch-plugins/protocol-httpclient/jsp/basic.jsp |  74 ++
 .../protocol-httpclient/jsp/cookies.jsp         |  63 ++
 .../protocol-httpclient/jsp/digest.jsp          |  68 ++
 .../protocol-httpclient/jsp/noauth.jsp          |  36 +
 nutch-plugins/protocol-httpclient/jsp/ntlm.jsp  |  89 ++
 nutch-plugins/protocol-httpclient/plugin.xml    |  58 ++
 nutch-plugins/protocol-httpclient/pom.xml       |  62 ++
 .../DummySSLProtocolSocketFactory.java          | 163 ++++
 .../httpclient/DummyX509TrustManager.java       |  92 ++
 .../apache/nutch/protocol/httpclient/Http.java  | 572 ++++++++++++
 .../protocol/httpclient/HttpAuthentication.java |  45 +
 .../httpclient/HttpAuthenticationException.java |  71 ++
 .../httpclient/HttpAuthenticationFactory.java   |  98 ++
 .../httpclient/HttpBasicAuthentication.java     | 199 +++++
 .../httpclient/HttpFormAuthConfigurer.java      | 106 +++
 .../httpclient/HttpFormAuthentication.java      | 223 +++++
 .../nutch/protocol/httpclient/HttpResponse.java | 216 +++++
 .../nutch/protocol/httpclient/package.html      |   9 +
 .../src/test/conf/httpclient-auth-test.xml      |  58 ++
 .../src/test/conf/nutch-site-test.xml           |  52 ++
 .../httpclient/TestProtocolHttpClient.java      | 217 +++++
 .../protocol-interactiveselenium/README.md      |  38 +
 .../protocol-interactiveselenium/build-ivy.xml  |  54 ++
 .../protocol-interactiveselenium/build.xml      |  37 +
 .../protocol-interactiveselenium/ivy.xml        |  42 +
 .../protocol-interactiveselenium/plugin.xml     |  47 +
 .../protocol-interactiveselenium/pom.xml        |  50 ++
 .../protocol/interactiveselenium/Http.java      |  59 ++
 .../interactiveselenium/HttpResponse.java       | 399 +++++++++
 .../DefalultMultiInteractionHandler.java        |  53 ++
 .../DefaultClickAllAjaxLinksHandler.java        |  88 ++
 .../handlers/DefaultHandler.java                |  30 +
 .../handlers/InteractiveSeleniumHandler.java    |  25 +
 .../protocol/interactiveselenium/package.html   |   5 +
 nutch-plugins/protocol-selenium/README.md       | 208 +++++
 nutch-plugins/protocol-selenium/build-ivy.xml   |  54 ++
 nutch-plugins/protocol-selenium/build.xml       |  36 +
 nutch-plugins/protocol-selenium/ivy.xml         |  42 +
 nutch-plugins/protocol-selenium/plugin.xml      |  47 +
 nutch-plugins/protocol-selenium/pom.xml         |  50 ++
 .../apache/nutch/protocol/selenium/Http.java    |  59 ++
 .../nutch/protocol/selenium/HttpResponse.java   | 360 ++++++++
 .../apache/nutch/protocol/selenium/package.html |   5 +
 nutch-plugins/scoring-depth/build.xml           |   6 +
 nutch-plugins/scoring-depth/ivy.xml             |  41 +
 nutch-plugins/scoring-depth/plugin.xml          |  24 +
 nutch-plugins/scoring-depth/pom.xml             |  38 +
 .../nutch/scoring/depth/DepthScoringFilter.java | 207 +++++
 .../nutch/scoring/depth/package-info.java       |  23 +
 nutch-plugins/scoring-link/build.xml            |  27 +
 nutch-plugins/scoring-link/ivy.xml              |  41 +
 nutch-plugins/scoring-link/plugin.xml           |  39 +
 nutch-plugins/scoring-link/pom.xml              |  38 +
 .../scoring/link/LinkAnalysisScoringFilter.java |  95 ++
 .../apache/nutch/scoring/link/package-info.java |  23 +
 nutch-plugins/scoring-opic/build.xml            |  27 +
 nutch-plugins/scoring-opic/ivy.xml              |  41 +
 nutch-plugins/scoring-opic/plugin.xml           |  39 +
 nutch-plugins/scoring-opic/pom.xml              |  38 +
 .../nutch/scoring/opic/OPICScoringFilter.java   | 173 ++++
 .../apache/nutch/scoring/opic/package-info.java |  23 +
 nutch-plugins/scoring-similarity/build-ivy.xml  |  54 ++
 nutch-plugins/scoring-similarity/build.xml      |  27 +
 nutch-plugins/scoring-similarity/ivy.xml        |  42 +
 nutch-plugins/scoring-similarity/plugin.xml     |  45 +
 nutch-plugins/scoring-similarity/pom.xml        |  45 +
 .../scoring/similarity/SimilarityModel.java     |  38 +
 .../similarity/SimilarityScoringFilter.java     |  70 ++
 .../similarity/cosine/CosineSimilarity.java     |  84 ++
 .../scoring/similarity/cosine/DocVector.java    |  57 ++
 .../nutch/scoring/similarity/cosine/Model.java  | 190 ++++
 .../scoring/similarity/cosine/package-info.java |   7 +
 .../similarity/util/LuceneAnalyzerUtil.java     |  93 ++
 .../similarity/util/LuceneTokenizer.java        | 166 ++++
 .../scoring/similarity/util/package-info.java   |  24 +
 nutch-plugins/subcollection/README.txt          |  10 +
 nutch-plugins/subcollection/build.xml           |  22 +
 nutch-plugins/subcollection/ivy.xml             |  41 +
 nutch-plugins/subcollection/plugin.xml          |  41 +
 nutch-plugins/subcollection/pom.xml             |  38 +
 .../nutch/collection/CollectionManager.java     | 240 +++++
 .../apache/nutch/collection/Subcollection.java  | 259 ++++++
 .../org/apache/nutch/collection/package.html    |  36 +
 .../SubcollectionIndexingFilter.java            | 101 +++
 .../indexer/subcollection/package-info.java     |  25 +
 .../nutch/collection/TestSubcollection.java     | 112 +++
 nutch-plugins/tld/build.xml                     |  22 +
 nutch-plugins/tld/ivy.xml                       |  41 +
 nutch-plugins/tld/plugin.xml                    |  51 ++
 nutch-plugins/tld/pom.xml                       |  38 +
 .../nutch/indexer/tld/TLDIndexingFilter.java    |  69 ++
 .../org/apache/nutch/indexer/tld/package.html   |   5 +
 .../nutch/scoring/tld/TLDScoringFilter.java     | 114 +++
 .../org/apache/nutch/scoring/tld/package.html   |   5 +
 nutch-plugins/urlfilter-automaton/build.xml     |  51 ++
 nutch-plugins/urlfilter-automaton/ivy.xml       |  42 +
 nutch-plugins/urlfilter-automaton/plugin.xml    |  43 +
 nutch-plugins/urlfilter-automaton/pom.xml       |  58 ++
 .../urlfilter/automaton/AutomatonURLFilter.java | 116 +++
 .../nutch/urlfilter/automaton/package.html      |   9 +
 .../automaton/TestAutomatonURLFilter.java       |  56 ++
 .../src/test/resources/Benchmarks.rules         |  26 +
 .../src/test/resources/Benchmarks.urls          | 297 +++++++
 .../src/test/resources/IntranetCrawling.rules   |  24 +
 .../src/test/resources/IntranetCrawling.urls    |   8 +
 .../src/test/resources/WholeWebCrawling.rules   |  19 +
 .../src/test/resources/WholeWebCrawling.urls    |  11 +
 nutch-plugins/urlfilter-domain/build.xml        |  28 +
 nutch-plugins/urlfilter-domain/ivy.xml          |  41 +
 nutch-plugins/urlfilter-domain/plugin.xml       |  43 +
 nutch-plugins/urlfilter-domain/pom.xml          |  38 +
 .../nutch/urlfilter/domain/DomainURLFilter.java | 212 +++++
 .../nutch/urlfilter/domain/package-info.java    |  25 +
 .../urlfilter/domain/TestDomainURLFilter.java   |  67 ++
 .../src/test/resources/hosts.txt                |   5 +
 .../urlfilter-domainblacklist/build.xml         |  28 +
 nutch-plugins/urlfilter-domainblacklist/ivy.xml |  41 +
 .../urlfilter-domainblacklist/plugin.xml        |  43 +
 nutch-plugins/urlfilter-domainblacklist/pom.xml |  38 +
 .../DomainBlacklistURLFilter.java               | 210 +++++
 .../urlfilter/domainblacklist/package-info.java |  24 +
 .../TestDomainBlacklistURLFilter.java           |  49 +
 .../src/test/resources/hosts.txt                |   5 +
 nutch-plugins/urlfilter-ignoreexempt/README.md  |  43 +
 nutch-plugins/urlfilter-ignoreexempt/build.xml  |  55 ++
 nutch-plugins/urlfilter-ignoreexempt/ivy.xml    |  41 +
 nutch-plugins/urlfilter-ignoreexempt/plugin.xml |  45 +
 nutch-plugins/urlfilter-ignoreexempt/pom.xml    |  45 +
 .../ignoreexempt/ExemptionUrlFilter.java        | 101 +++
 .../urlfilter/ignoreexempt/package-info.java    |  24 +
 nutch-plugins/urlfilter-prefix/build.xml        |  22 +
 nutch-plugins/urlfilter-prefix/ivy.xml          |  41 +
 nutch-plugins/urlfilter-prefix/plugin.xml       |  47 +
 nutch-plugins/urlfilter-prefix/pom.xml          |  38 +
 .../nutch/urlfilter/prefix/PrefixURLFilter.java | 178 ++++
 .../apache/nutch/urlfilter/prefix/package.html  |   5 +
 .../urlfilter/prefix/TestPrefixURLFilter.java   |  79 ++
 nutch-plugins/urlfilter-regex/build.xml         |  51 ++
 nutch-plugins/urlfilter-regex/ivy.xml           |  41 +
 nutch-plugins/urlfilter-regex/plugin.xml        |  48 +
 nutch-plugins/urlfilter-regex/pom.xml           |  53 ++
 .../nutch/urlfilter/regex/RegexURLFilter.java   | 111 +++
 .../apache/nutch/urlfilter/regex/package.html   |   5 +
 .../urlfilter/regex/TestRegexURLFilter.java     |  61 ++
 .../src/test/resources/Benchmarks.rules         |  26 +
 .../src/test/resources/Benchmarks.urls          | 297 +++++++
 .../src/test/resources/IntranetCrawling.rules   |  27 +
 .../src/test/resources/IntranetCrawling.urls    |   8 +
 .../src/test/resources/WholeWebCrawling.rules   |  22 +
 .../src/test/resources/WholeWebCrawling.urls    |  11 +
 .../src/test/resources/nutch1838.rules          |  12 +
 .../src/test/resources/nutch1838.urls           |   3 +
 nutch-plugins/urlfilter-suffix/build.xml        |  22 +
 nutch-plugins/urlfilter-suffix/ivy.xml          |  41 +
 nutch-plugins/urlfilter-suffix/plugin.xml       |  47 +
 nutch-plugins/urlfilter-suffix/pom.xml          |  38 +
 .../nutch/urlfilter/suffix/SuffixURLFilter.java | 331 +++++++
 .../nutch/urlfilter/suffix/package-info.java    |  23 +
 .../urlfilter/suffix/TestSuffixURLFilter.java   | 123 +++
 nutch-plugins/urlfilter-validator/build.xml     |  22 +
 nutch-plugins/urlfilter-validator/ivy.xml       |  41 +
 nutch-plugins/urlfilter-validator/plugin.xml    |  41 +
 nutch-plugins/urlfilter-validator/pom.xml       |  38 +
 .../nutch/urlfilter/validator/UrlValidator.java | 386 ++++++++
 .../nutch/urlfilter/validator/package.html      |   9 +
 .../urlfilter/validator/TestUrlValidator.java   |  79 ++
 nutch-plugins/urlmeta/build.xml                 |  22 +
 nutch-plugins/urlmeta/ivy.xml                   |  41 +
 nutch-plugins/urlmeta/plugin.xml                |  47 +
 nutch-plugins/urlmeta/pom.xml                   |  38 +
 .../indexer/urlmeta/URLMetaIndexingFilter.java  | 118 +++
 .../apache/nutch/indexer/urlmeta/package.html   |  12 +
 .../scoring/urlmeta/URLMetaScoringFilter.java   | 175 ++++
 .../apache/nutch/scoring/urlmeta/package.html   |  11 +
 nutch-plugins/urlnormalizer-ajax/build.xml      |  22 +
 nutch-plugins/urlnormalizer-ajax/ivy.xml        |  41 +
 nutch-plugins/urlnormalizer-ajax/plugin.xml     |  41 +
 nutch-plugins/urlnormalizer-ajax/pom.xml        |  38 +
 .../urlnormalizer/ajax/AjaxURLNormalizer.java   | 236 +++++
 .../ajax/TestAjaxURLNormalizer.java             |  67 ++
 nutch-plugins/urlnormalizer-basic/build.xml     |  22 +
 nutch-plugins/urlnormalizer-basic/ivy.xml       |  41 +
 nutch-plugins/urlnormalizer-basic/plugin.xml    |  41 +
 nutch-plugins/urlnormalizer-basic/pom.xml       |  38 +
 .../urlnormalizer/basic/BasicURLNormalizer.java | 290 ++++++
 .../net/urlnormalizer/basic/package-info.java   |  23 +
 .../basic/TestBasicURLNormalizer.java           | 175 ++++
 nutch-plugins/urlnormalizer-host/build.xml      |  27 +
 nutch-plugins/urlnormalizer-host/ivy.xml        |  41 +
 nutch-plugins/urlnormalizer-host/plugin.xml     |  43 +
 nutch-plugins/urlnormalizer-host/pom.xml        |  38 +
 .../urlnormalizer/host/HostURLNormalizer.java   | 198 +++++
 .../net/urlnormalizer/host/package-info.java    |  23 +
 .../host/TestHostURLNormalizer.java             |  57 ++
 .../src/test/resources/hosts.txt                |   8 +
 nutch-plugins/urlnormalizer-pass/build.xml      |  22 +
 nutch-plugins/urlnormalizer-pass/ivy.xml        |  41 +
 nutch-plugins/urlnormalizer-pass/plugin.xml     |  41 +
 nutch-plugins/urlnormalizer-pass/pom.xml        |  38 +
 .../urlnormalizer/pass/PassURLNormalizer.java   |  49 +
 .../net/urlnormalizer/pass/package-info.java    |  23 +
 .../pass/TestPassURLNormalizer.java             |  45 +
 nutch-plugins/urlnormalizer-protocol/build.xml  |  27 +
 nutch-plugins/urlnormalizer-protocol/ivy.xml    |  41 +
 nutch-plugins/urlnormalizer-protocol/plugin.xml |  43 +
 nutch-plugins/urlnormalizer-protocol/pom.xml    |  38 +
 .../protocol/ProtocolURLNormalizer.java         | 190 ++++
 .../protocol/TestProtocolURLNormalizer.java     |  55 ++
 .../src/test/resources/protocols.txt            |   7 +
 .../urlnormalizer-querystring/build.xml         |  22 +
 nutch-plugins/urlnormalizer-querystring/ivy.xml |  41 +
 .../urlnormalizer-querystring/plugin.xml        |  42 +
 nutch-plugins/urlnormalizer-querystring/pom.xml |  38 +
 .../querystring/QuerystringURLNormalizer.java   |  91 ++
 .../urlnormalizer/querystring/package-info.java |  23 +
 .../TestQuerystringURLNormalizer.java           |  49 +
 nutch-plugins/urlnormalizer-regex/build.xml     |  34 +
 nutch-plugins/urlnormalizer-regex/ivy.xml       |  41 +
 nutch-plugins/urlnormalizer-regex/plugin.xml    |  41 +
 nutch-plugins/urlnormalizer-regex/pom.xml       |  38 +
 .../urlnormalizer/regex/RegexURLNormalizer.java | 324 +++++++
 .../net/urlnormalizer/regex/package-info.java   |  23 +
 .../regex/TestRegexURLNormalizer.java           | 186 ++++
 .../test/resources/regex-normalize-default.test |  84 ++
 .../test/resources/regex-normalize-default.xml  |  66 ++
 .../test/resources/regex-normalize-scope1.test  |   8 +
 .../test/resources/regex-normalize-scope1.xml   |  21 +
 nutch-plugins/urlnormalizer-slash/build.xml     |  27 +
 nutch-plugins/urlnormalizer-slash/ivy.xml       |  41 +
 nutch-plugins/urlnormalizer-slash/plugin.xml    |  43 +
 nutch-plugins/urlnormalizer-slash/pom.xml       |  38 +
 .../urlnormalizer/slash/SlashURLNormalizer.java | 224 +++++
 .../slash/TestSlashURLNormalizer.java           |  73 ++
 .../src/test/resources/slashes.txt              |   7 +
 pom.xml                                         | 157 ++++
 src/bin/crawl                                   | 281 ------
 src/bin/nutch                                   | 324 -------
 .../nutch/crawl/AbstractFetchSchedule.java      | 227 -----
 .../nutch/crawl/AdaptiveFetchSchedule.java      | 203 -----
 src/java/org/apache/nutch/crawl/CrawlDatum.java | 572 ------------
 src/java/org/apache/nutch/crawl/CrawlDb.java    | 349 --------
 .../org/apache/nutch/crawl/CrawlDbFilter.java   | 111 ---
 .../org/apache/nutch/crawl/CrawlDbMerger.java   | 216 -----
 .../org/apache/nutch/crawl/CrawlDbReader.java   | 887 -------------------
 .../org/apache/nutch/crawl/CrawlDbReducer.java  | 339 -------
 .../apache/nutch/crawl/DeduplicationJob.java    | 389 --------
 .../nutch/crawl/DefaultFetchSchedule.java       |  45 -
 .../org/apache/nutch/crawl/FetchSchedule.java   | 208 -----
 .../nutch/crawl/FetchScheduleFactory.java       |  53 --
 src/java/org/apache/nutch/crawl/Generator.java  | 859 ------------------
 src/java/org/apache/nutch/crawl/Injector.java   | 510 -----------
 src/java/org/apache/nutch/crawl/Inlink.java     |  83 --
 src/java/org/apache/nutch/crawl/Inlinks.java    | 110 ---
 src/java/org/apache/nutch/crawl/LinkDb.java     | 428 ---------
 .../org/apache/nutch/crawl/LinkDbFilter.java    | 128 ---
 .../org/apache/nutch/crawl/LinkDbMerger.java    | 204 -----
 .../org/apache/nutch/crawl/LinkDbReader.java    | 203 -----
 .../org/apache/nutch/crawl/MD5Signature.java    |  39 -
 .../nutch/crawl/MimeAdaptiveFetchSchedule.java  | 236 -----
 .../org/apache/nutch/crawl/NutchWritable.java   |  66 --
 src/java/org/apache/nutch/crawl/Signature.java  |  37 -
 .../apache/nutch/crawl/SignatureComparator.java |  57 --
 .../apache/nutch/crawl/SignatureFactory.java    |  62 --
 .../apache/nutch/crawl/TextMD5Signature.java    |  42 -
 .../nutch/crawl/TextProfileSignature.java       | 199 -----
 .../org/apache/nutch/crawl/URLPartitioner.java  |  96 --
 src/java/org/apache/nutch/crawl/package.html    |   5 -
 .../org/apache/nutch/fetcher/FetchItem.java     | 118 ---
 .../apache/nutch/fetcher/FetchItemQueue.java    | 139 ---
 .../apache/nutch/fetcher/FetchItemQueues.java   | 212 -----
 .../org/apache/nutch/fetcher/FetchNode.java     |  59 --
 .../org/apache/nutch/fetcher/FetchNodeDb.java   |  49 -
 src/java/org/apache/nutch/fetcher/Fetcher.java  | 600 -------------
 .../nutch/fetcher/FetcherOutputFormat.java      | 123 ---
 .../org/apache/nutch/fetcher/FetcherThread.java | 768 ----------------
 .../org/apache/nutch/fetcher/QueueFeeder.java   | 104 ---
 src/java/org/apache/nutch/fetcher/package.html  |   5 -
 src/java/org/apache/nutch/hostdb/HostDatum.java | 324 -------
 .../org/apache/nutch/hostdb/ReadHostDb.java     | 240 -----
 .../org/apache/nutch/hostdb/ResolverThread.java | 121 ---
 .../org/apache/nutch/hostdb/UpdateHostDb.java   | 259 ------
 .../apache/nutch/hostdb/UpdateHostDbMapper.java | 239 -----
 .../nutch/hostdb/UpdateHostDbReducer.java       | 427 ---------
 .../org/apache/nutch/indexer/CleaningJob.java   | 210 -----
 .../org/apache/nutch/indexer/IndexWriter.java   |  47 -
 .../org/apache/nutch/indexer/IndexWriters.java  | 145 ---
 .../apache/nutch/indexer/IndexerMapReduce.java  | 422 ---------
 .../nutch/indexer/IndexerOutputFormat.java      |  57 --
 .../apache/nutch/indexer/IndexingException.java |  39 -
 .../apache/nutch/indexer/IndexingFilter.java    |  61 --
 .../apache/nutch/indexer/IndexingFilters.java   |  60 --
 .../nutch/indexer/IndexingFiltersChecker.java   | 371 --------
 .../org/apache/nutch/indexer/IndexingJob.java   | 358 --------
 .../org/apache/nutch/indexer/NutchDocument.java | 144 ---
 .../org/apache/nutch/indexer/NutchField.java    | 137 ---
 .../apache/nutch/indexer/NutchIndexAction.java  |  58 --
 src/java/org/apache/nutch/indexer/package.html  |  10 -
 .../apache/nutch/metadata/CreativeCommons.java  |  35 -
 .../org/apache/nutch/metadata/DublinCore.java   | 161 ----
 src/java/org/apache/nutch/metadata/Feed.java    |  38 -
 .../org/apache/nutch/metadata/HttpHeaders.java  |  51 --
 .../org/apache/nutch/metadata/MetaWrapper.java  | 120 ---
 .../org/apache/nutch/metadata/Metadata.java     | 280 ------
 src/java/org/apache/nutch/metadata/Nutch.java   |  98 --
 .../nutch/metadata/SpellCheckedMetadata.java    | 150 ----
 src/java/org/apache/nutch/metadata/package.html |   6 -
 .../apache/nutch/net/URLExemptionFilter.java    |  43 -
 .../apache/nutch/net/URLExemptionFilters.java   |  64 --
 src/java/org/apache/nutch/net/URLFilter.java    |  40 -
 .../org/apache/nutch/net/URLFilterChecker.java  | 134 ---
 .../apache/nutch/net/URLFilterException.java    |  39 -
 src/java/org/apache/nutch/net/URLFilters.java   |  44 -
 .../org/apache/nutch/net/URLNormalizer.java     |  37 -
 .../apache/nutch/net/URLNormalizerChecker.java  | 117 ---
 .../org/apache/nutch/net/URLNormalizers.java    | 325 -------
 src/java/org/apache/nutch/net/package-info.java |  23 -
 .../nutch/net/protocols/HttpDateFormat.java     | 124 ---
 .../nutch/net/protocols/ProtocolException.java  |  47 -
 .../apache/nutch/net/protocols/Response.java    |  46 -
 .../nutch/net/protocols/package-info.java       |  23 -
 .../org/apache/nutch/parse/HTMLMetaTags.java    | 203 -----
 .../org/apache/nutch/parse/HtmlParseFilter.java |  45 -
 .../apache/nutch/parse/HtmlParseFilters.java    |  62 --
 src/java/org/apache/nutch/parse/Outlink.java    | 135 ---
 .../apache/nutch/parse/OutlinkExtractor.java    | 145 ---
 src/java/org/apache/nutch/parse/Parse.java      |  38 -
 .../org/apache/nutch/parse/ParseCallable.java   |  37 -
 src/java/org/apache/nutch/parse/ParseData.java  | 255 ------
 .../org/apache/nutch/parse/ParseException.java  |  39 -
 src/java/org/apache/nutch/parse/ParseImpl.java  |  87 --
 .../apache/nutch/parse/ParseOutputFormat.java   | 398 ---------
 .../org/apache/nutch/parse/ParsePluginList.java |  71 --
 .../apache/nutch/parse/ParsePluginsReader.java  | 278 ------
 .../org/apache/nutch/parse/ParseResult.java     | 178 ----
 .../org/apache/nutch/parse/ParseSegment.java    | 309 -------
 .../org/apache/nutch/parse/ParseStatus.java     | 311 -------
 src/java/org/apache/nutch/parse/ParseText.java  | 119 ---
 src/java/org/apache/nutch/parse/ParseUtil.java  | 181 ----
 src/java/org/apache/nutch/parse/Parser.java     |  58 --
 .../org/apache/nutch/parse/ParserChecker.java   | 270 ------
 .../org/apache/nutch/parse/ParserFactory.java   | 428 ---------
 .../org/apache/nutch/parse/ParserNotFound.java  |  47 -
 .../org/apache/nutch/parse/package-info.java    |  22 -
 .../plugin/CircularDependencyException.java     |  36 -
 src/java/org/apache/nutch/plugin/Extension.java | 194 ----
 .../org/apache/nutch/plugin/ExtensionPoint.java | 123 ---
 .../plugin/MissingDependencyException.java      |  36 -
 src/java/org/apache/nutch/plugin/Pluggable.java |  31 -
 src/java/org/apache/nutch/plugin/Plugin.java    |  95 --
 .../apache/nutch/plugin/PluginClassLoader.java  |  80 --
 .../apache/nutch/plugin/PluginDescriptor.java   | 363 --------
 .../nutch/plugin/PluginManifestParser.java      | 303 -------
 .../apache/nutch/plugin/PluginRepository.java   | 523 -----------
 .../nutch/plugin/PluginRuntimeException.java    |  37 -
 src/java/org/apache/nutch/plugin/package.html   |  40 -
 src/java/org/apache/nutch/protocol/Content.java | 296 -------
 .../org/apache/nutch/protocol/Protocol.java     |  68 --
 .../nutch/protocol/ProtocolException.java       |  39 -
 .../apache/nutch/protocol/ProtocolFactory.java  | 119 ---
 .../apache/nutch/protocol/ProtocolNotFound.java |  36 -
 .../apache/nutch/protocol/ProtocolOutput.java   |  55 --
 .../apache/nutch/protocol/ProtocolStatus.java   | 297 -------
 .../apache/nutch/protocol/RobotRulesParser.java | 325 -------
 .../org/apache/nutch/protocol/package-info.java |  23 -
 .../nutch/scoring/AbstractScoringFilter.java    |  68 --
 .../org/apache/nutch/scoring/ScoringFilter.java | 213 -----
 .../nutch/scoring/ScoringFilterException.java   |  43 -
 .../apache/nutch/scoring/ScoringFilters.java    | 118 ---
 .../org/apache/nutch/scoring/package-info.java  |  22 -
 .../nutch/scoring/webgraph/LinkDatum.java       | 140 ---
 .../nutch/scoring/webgraph/LinkDumper.java      | 433 ---------
 .../apache/nutch/scoring/webgraph/LinkRank.java | 677 --------------
 .../org/apache/nutch/scoring/webgraph/Node.java | 102 ---
 .../nutch/scoring/webgraph/NodeDumper.java      | 433 ---------
 .../nutch/scoring/webgraph/NodeReader.java      | 136 ---
 .../nutch/scoring/webgraph/ScoreUpdater.java    | 253 ------
 .../apache/nutch/scoring/webgraph/WebGraph.java | 783 ----------------
 .../nutch/scoring/webgraph/package-info.java    |  24 -
 .../nutch/segment/ContentAsTextInputFormat.java | 104 ---
 .../apache/nutch/segment/SegmentChecker.java    | 136 ---
 .../nutch/segment/SegmentMergeFilter.java       |  47 -
 .../nutch/segment/SegmentMergeFilters.java      |  84 --
 .../org/apache/nutch/segment/SegmentMerger.java | 793 -----------------
 .../org/apache/nutch/segment/SegmentPart.java   | 113 ---
 .../org/apache/nutch/segment/SegmentReader.java | 719 ---------------
 .../org/apache/nutch/segment/package-info.java  |  23 -
 .../org/apache/nutch/service/ConfManager.java   |  39 -
 .../org/apache/nutch/service/JobManager.java    |  44 -
 .../org/apache/nutch/service/NutchReader.java   |  37 -
 .../org/apache/nutch/service/NutchServer.java   | 224 -----
 .../nutch/service/impl/ConfManagerImpl.java     | 132 ---
 .../apache/nutch/service/impl/JobFactory.java   |  75 --
 .../nutch/service/impl/JobManagerImpl.java      |  95 --
 .../apache/nutch/service/impl/JobWorker.java    | 114 ---
 .../apache/nutch/service/impl/LinkReader.java   | 175 ----
 .../apache/nutch/service/impl/NodeReader.java   | 184 ----
 .../service/impl/NutchServerPoolExecutor.java   | 131 ---
 .../nutch/service/impl/SequenceReader.java      | 171 ----
 .../nutch/service/model/request/DbQuery.java    |  56 --
 .../nutch/service/model/request/JobConfig.java  |  71 --
 .../service/model/request/NutchConfig.java      |  51 --
 .../service/model/request/ReaderConfig.java     |  30 -
 .../nutch/service/model/request/SeedList.java   |  93 --
 .../nutch/service/model/request/SeedUrl.java    |  89 --
 .../service/model/response/FetchNodeDbInfo.java | 103 ---
 .../nutch/service/model/response/JobInfo.java   | 102 ---
 .../service/model/response/NutchServerInfo.java |  55 --
 .../service/resources/AbstractResource.java     |  45 -
 .../nutch/service/resources/AdminResource.java  |  85 --
 .../nutch/service/resources/ConfigResource.java | 137 ---
 .../nutch/service/resources/DbResource.java     | 143 ---
 .../nutch/service/resources/JobResource.java    |  99 ---
 .../nutch/service/resources/ReaderResouce.java  | 177 ----
 .../nutch/service/resources/SeedResource.java   | 111 ---
 .../nutch/tools/AbstractCommonCrawlFormat.java  | 393 --------
 src/java/org/apache/nutch/tools/Benchmark.java  | 284 ------
 .../apache/nutch/tools/CommonCrawlConfig.java   | 147 ---
 .../nutch/tools/CommonCrawlDataDumper.java      | 716 ---------------
 .../apache/nutch/tools/CommonCrawlFormat.java   |  87 --
 .../nutch/tools/CommonCrawlFormatFactory.java   |  74 --
 .../nutch/tools/CommonCrawlFormatJackson.java   | 109 ---
 .../nutch/tools/CommonCrawlFormatJettinson.java | 122 ---
 .../nutch/tools/CommonCrawlFormatSimple.java    | 174 ----
 .../nutch/tools/CommonCrawlFormatWARC.java      | 286 ------
 src/java/org/apache/nutch/tools/DmozParser.java | 391 --------
 src/java/org/apache/nutch/tools/FileDumper.java | 419 ---------
 .../org/apache/nutch/tools/FreeGenerator.java   | 214 -----
 .../org/apache/nutch/tools/ResolveUrls.java     | 204 -----
 src/java/org/apache/nutch/tools/WARCUtils.java  | 154 ----
 .../apache/nutch/tools/arc/ArcInputFormat.java  |  51 --
 .../apache/nutch/tools/arc/ArcRecordReader.java | 299 -------
 .../nutch/tools/arc/ArcSegmentCreator.java      | 426 ---------
 .../apache/nutch/tools/arc/package-info.java    |  23 -
 .../org/apache/nutch/tools/package-info.java    |  22 -
 .../apache/nutch/tools/warc/WARCExporter.java   | 333 -------
 .../apache/nutch/tools/warc/package-info.java   |  23 -
 .../org/apache/nutch/util/CommandRunner.java    | 291 ------
 .../apache/nutch/util/CrawlCompletionStats.java | 245 -----
 .../org/apache/nutch/util/DeflateUtils.java     | 140 ---
 src/java/org/apache/nutch/util/DomUtil.java     | 104 ---
 .../org/apache/nutch/util/DumpFileUtil.java     | 147 ---
 .../org/apache/nutch/util/EncodingDetector.java | 386 --------
 src/java/org/apache/nutch/util/FSUtils.java     | 106 ---
 src/java/org/apache/nutch/util/GZIPUtils.java   | 148 ----
 .../nutch/util/GenericWritableConfigurable.java |  60 --
 .../org/apache/nutch/util/HadoopFSUtil.java     |  72 --
 src/java/org/apache/nutch/util/JexlUtil.java    |  76 --
 src/java/org/apache/nutch/util/LockUtil.java    |  84 --
 src/java/org/apache/nutch/util/MimeUtil.java    | 279 ------
 src/java/org/apache/nutch/util/NodeWalker.java  | 129 ---
 .../apache/nutch/util/NutchConfiguration.java   | 104 ---
 src/java/org/apache/nutch/util/NutchJob.java    |  30 -
 src/java/org/apache/nutch/util/NutchTool.java   | 109 ---
 src/java/org/apache/nutch/util/ObjectCache.java |  56 --
 .../apache/nutch/util/PrefixStringMatcher.java  | 119 ---
 .../nutch/util/ProtocolStatusStatistics.java    | 179 ----
 src/java/org/apache/nutch/util/StringUtil.java  | 155 ----
 .../apache/nutch/util/SuffixStringMatcher.java  | 114 ---
 src/java/org/apache/nutch/util/TableUtil.java   | 161 ----
 src/java/org/apache/nutch/util/TimingUtil.java  |  72 --
 .../apache/nutch/util/TrieStringMatcher.java    | 202 -----
 src/java/org/apache/nutch/util/URLUtil.java     | 533 -----------
 .../nutch/util/domain/DomainStatistics.java     | 234 -----
 .../apache/nutch/util/domain/DomainSuffix.java  |  79 --
 .../nutch/util/domain/DomainSuffixes.java       |  86 --
 .../nutch/util/domain/DomainSuffixesReader.java | 164 ----
 .../nutch/util/domain/TopLevelDomain.java       |  67 --
 .../org/apache/nutch/util/domain/package.html   |  14 -
 .../org/apache/nutch/util/package-info.java     |  22 -
 .../apache/nutch/webui/NutchUiApplication.java  |  75 --
 .../nutch/webui/NutchUiApplication.properties   |  63 --
 .../org/apache/nutch/webui/NutchUiServer.java   | 104 ---
 .../apache/nutch/webui/client/NutchClient.java  |  49 -
 .../nutch/webui/client/NutchClientFactory.java  |  52 --
 .../nutch/webui/client/impl/CrawlingCycle.java  |  82 --
 .../client/impl/CrawlingCycleListener.java      |  31 -
 .../webui/client/impl/NutchClientImpl.java      |  99 ---
 .../nutch/webui/client/impl/RemoteCommand.java  |  76 --
 .../webui/client/impl/RemoteCommandBuilder.java |  64 --
 .../client/impl/RemoteCommandExecutor.java      | 110 ---
 .../client/impl/RemoteCommandsBatchFactory.java |  97 --
 .../webui/client/model/ConnectionStatus.java    |  21 -
 .../apache/nutch/webui/client/model/Crawl.java  | 126 ---
 .../nutch/webui/client/model/JobConfig.java     |  77 --
 .../nutch/webui/client/model/JobInfo.java       | 104 ---
 .../nutch/webui/client/model/NutchStatus.java   |  62 --
 .../nutch/webui/config/CustomDaoFactory.java    |  58 --
 .../nutch/webui/config/CustomTableCreator.java  |  83 --
 .../webui/config/NutchGuiConfiguration.java     |  33 -
 .../nutch/webui/config/SpringConfiguration.java |  91 --
 .../apache/nutch/webui/model/NutchConfig.java   |  24 -
 .../apache/nutch/webui/model/NutchInstance.java | 118 ---
 .../org/apache/nutch/webui/model/SeedList.java  | 106 ---
 .../org/apache/nutch/webui/model/SeedUrl.java   |  96 --
 .../nutch/webui/pages/AbstractBasePage.html     |  33 -
 .../nutch/webui/pages/AbstractBasePage.java     | 206 -----
 .../apache/nutch/webui/pages/DashboardPage.html |  52 --
 .../apache/nutch/webui/pages/DashboardPage.java |  65 --
 .../apache/nutch/webui/pages/LogOutPage.java    |  21 -
 .../nutch/webui/pages/SchedulingPage.java       |  21 -
 .../apache/nutch/webui/pages/SearchPage.java    |  21 -
 .../nutch/webui/pages/StatisticsPage.java       |  21 -
 .../nutch/webui/pages/UrlsUploadPage.java       |  21 -
 .../nutch/webui/pages/UserSettingsPage.java     |  21 -
 .../webui/pages/assets/NutchUiCssReference.java |  39 -
 .../nutch/webui/pages/assets/nutch-style.css    | 149 ----
 .../webui/pages/components/ColorEnumLabel.java  |  71 --
 .../pages/components/ColorEnumLabelBuilder.java |  49 -
 .../pages/components/CpmIteratorAdapter.java    |  41 -
 .../nutch/webui/pages/crawls/CrawlPanel.html    |  58 --
 .../nutch/webui/pages/crawls/CrawlPanel.java    |  98 --
 .../nutch/webui/pages/crawls/CrawlsPage.html    |  90 --
 .../nutch/webui/pages/crawls/CrawlsPage.java    | 139 ---
 .../webui/pages/instances/InstancePanel.html    |  46 -
 .../webui/pages/instances/InstancePanel.java    |  62 --
 .../webui/pages/instances/InstancesPage.html    |  66 --
 .../webui/pages/instances/InstancesPage.java    | 127 ---
 .../nutch/webui/pages/menu/VerticalMenu.html    |  48 -
 .../nutch/webui/pages/menu/VerticalMenu.java    |  27 -
 .../nutch/webui/pages/seed/SeedListsPage.html   |  75 --
 .../nutch/webui/pages/seed/SeedListsPage.java   |  79 --
 .../apache/nutch/webui/pages/seed/SeedPage.html |  91 --
 .../apache/nutch/webui/pages/seed/SeedPage.java | 153 ----
 .../webui/pages/settings/SettingsPage.html      |  43 -
 .../webui/pages/settings/SettingsPage.java      |  59 --
 .../nutch/webui/service/CrawlService.java       |  33 -
 .../webui/service/NutchInstanceService.java     |  33 -
 .../nutch/webui/service/NutchService.java       |  31 -
 .../nutch/webui/service/SeedListService.java    |  33 -
 .../webui/service/impl/CrawlServiceImpl.java    | 132 ---
 .../service/impl/NutchInstanceServiceImpl.java  |  76 --
 .../webui/service/impl/NutchServiceImpl.java    |  82 --
 .../webui/service/impl/SeedListServiceImpl.java |  77 --
 src/java/overview.html                          |   9 -
 src/plugin/build-plugin.xml                     | 255 ------
 src/plugin/build.xml                            | 213 -----
 src/plugin/creativecommons/README.txt           |   1 -
 src/plugin/creativecommons/build.xml            |  28 -
 .../creativecommons/conf/crawl-urlfilter.txt    |  18 -
 src/plugin/creativecommons/conf/nutch-site.xml  |  50 --
 src/plugin/creativecommons/data/anchor.html     |   9 -
 src/plugin/creativecommons/data/rdf.html        |  35 -
 src/plugin/creativecommons/data/rel.html        |   6 -
 src/plugin/creativecommons/ivy.xml              |  41 -
 src/plugin/creativecommons/plugin.xml           |  48 -
 .../creativecommons/nutch/CCIndexingFilter.java | 124 ---
 .../creativecommons/nutch/CCParseFilter.java    | 300 -------
 .../java/org/creativecommons/nutch/package.html |   5 -
 .../nutch/TestCCParseFilter.java                |  73 --
 src/plugin/feed/build.xml                       |  45 -
 src/plugin/feed/ivy.xml                         |  43 -
 src/plugin/feed/plugin.xml                      |  49 -
 src/plugin/feed/sample/rsstest.rss              |  36 -
 .../nutch/indexer/feed/FeedIndexingFilter.java  | 129 ---
 .../apache/nutch/indexer/feed/package-info.java |  22 -
 .../org/apache/nutch/parse/feed/FeedParser.java | 374 --------
 .../apache/nutch/parse/feed/package-info.java   |  22 -
 .../apache/nutch/parse/feed/TestFeedParser.java | 124 ---
 src/plugin/headings/build.xml                   |  22 -
 src/plugin/headings/ivy.xml                     |  41 -
 src/plugin/headings/plugin.xml                  |  45 -
 .../parse/headings/HeadingsParseFilter.java     | 124 ---
 .../nutch/parse/headings/package-info.java      |  22 -
 src/plugin/index-anchor/build.xml               |  22 -
 src/plugin/index-anchor/ivy.xml                 |  41 -
 src/plugin/index-anchor/plugin.xml              |  38 -
 .../indexer/anchor/AnchorIndexingFilter.java    | 107 ---
 .../apache/nutch/indexer/anchor/package.html    |   5 -
 .../anchor/TestAnchorIndexingFilter.java        |  67 --
 src/plugin/index-basic/build.xml                |  22 -
 src/plugin/index-basic/ivy.xml                  |  41 -
 src/plugin/index-basic/plugin.xml               |  42 -
 .../indexer/basic/BasicIndexingFilter.java      | 158 ----
 .../org/apache/nutch/indexer/basic/package.html |   5 -
 .../indexer/basic/TestBasicIndexingFilter.java  |  99 ---
 src/plugin/index-geoip/build-ivy.xml            |  54 --
 src/plugin/index-geoip/build.xml                |  27 -
 src/plugin/index-geoip/ivy.xml                  |  46 -
 src/plugin/index-geoip/plugin.xml               |  51 --
 .../indexer/geoip/GeoIPDocumentCreator.java     | 210 -----
 .../indexer/geoip/GeoIPIndexingFilter.java      | 241 -----
 .../nutch/indexer/geoip/package-info.java       |  28 -
 src/plugin/index-links/build.xml                |  22 -
 src/plugin/index-links/ivy.xml                  |  41 -
 src/plugin/index-links/plugin.xml               |  41 -
 .../indexer/links/LinksIndexingFilter.java      | 167 ----
 .../indexer/links/TestLinksIndexingFilter.java  | 218 -----
 .../org/apache/nutch/parse/TestOutlinks.java    |  54 --
 src/plugin/index-metadata/build.xml             |  22 -
 src/plugin/index-metadata/ivy.xml               |  41 -
 src/plugin/index-metadata/plugin.xml            |  42 -
 .../nutch/indexer/metadata/MetadataIndexer.java | 104 ---
 .../nutch/indexer/metadata/package-info.java    |  23 -
 src/plugin/index-more/build.xml                 |  22 -
 src/plugin/index-more/ivy.xml                   |  41 -
 src/plugin/index-more/plugin.xml                |  42 -
 .../nutch/indexer/more/MoreIndexingFilter.java  | 344 -------
 .../org/apache/nutch/indexer/more/package.html  |   6 -
 .../indexer/more/TestMoreIndexingFilter.java    | 123 ---
 src/plugin/index-replace/README.txt             |  95 --
 src/plugin/index-replace/build.xml              |  55 --
 src/plugin/index-replace/ivy.xml                |  41 -
 src/plugin/index-replace/plugin.xml             |  22 -
 .../index-replace/sample/testIndexReplace.html  |  12 -
 .../nutch/indexer/replace/FieldReplacer.java    | 196 ----
 .../nutch/indexer/replace/ReplaceIndexer.java   | 330 -------
 .../nutch/indexer/replace/package-info.java     |  22 -
 .../nutch/indexer/replace/TestIndexReplace.java | 456 ----------
 src/plugin/index-static/build.xml               |  22 -
 src/plugin/index-static/ivy.xml                 |  41 -
 src/plugin/index-static/plugin.xml              |  42 -
 .../indexer/staticfield/StaticFieldIndexer.java | 143 ---
 .../nutch/indexer/staticfield/package.html      |   5 -
 .../staticfield/TestStaticFieldIndexerTest.java | 194 ----
 src/plugin/indexer-cloudsearch/README.md        |  58 --
 src/plugin/indexer-cloudsearch/build.xml        |  22 -
 .../indexer-cloudsearch/createCSDomain.sh       |  22 -
 src/plugin/indexer-cloudsearch/ivy.xml          |  41 -
 src/plugin/indexer-cloudsearch/plugin.xml       |  50 --
 .../cloudsearch/CloudSearchConstants.java       |  27 -
 .../cloudsearch/CloudSearchIndexWriter.java     | 382 --------
 .../cloudsearch/CloudSearchUtils.java           |  73 --
 src/plugin/indexer-dummy/build.xml              |  22 -
 src/plugin/indexer-dummy/ivy.xml                |  41 -
 src/plugin/indexer-dummy/plugin.xml             |  38 -
 .../indexwriter/dummy/DummyIndexWriter.java     | 103 ---
 .../nutch/indexwriter/dummy/package-info.java   |  23 -
 src/plugin/indexer-elastic/build-ivy.xml        |  54 --
 src/plugin/indexer-elastic/build.xml            |  22 -
 src/plugin/indexer-elastic/howto_upgrade_es.txt |   6 -
 src/plugin/indexer-elastic/ivy.xml              |  43 -
 src/plugin/indexer-elastic/plugin.xml           |  71 --
 .../indexwriter/elastic/ElasticConstants.java   |  28 -
 .../indexwriter/elastic/ElasticIndexWriter.java | 279 ------
 .../nutch/indexwriter/elastic/package-info.java |  22 -
 src/plugin/indexer-solr/build-ivy.xml           |  54 --
 src/plugin/indexer-solr/build.xml               |  22 -
 src/plugin/indexer-solr/ivy.xml                 |  44 -
 src/plugin/indexer-solr/plugin.xml              |  48 -
 .../nutch/indexwriter/solr/SolrConstants.java   |  56 --
 .../nutch/indexwriter/solr/SolrIndexWriter.java | 277 ------
 .../indexwriter/solr/SolrMappingReader.java     | 147 ---
 .../nutch/indexwriter/solr/SolrUtils.java       |  97 --
 .../nutch/indexwriter/solr/package-info.java    |  22 -
 src/plugin/language-identifier/build.xml        |  38 -
 src/plugin/language-identifier/ivy.xml          |  41 -
 src/plugin/language-identifier/plugin.xml       |  49 -
 .../nutch/analysis/lang/HTMLLanguageParser.java | 320 -------
 .../analysis/lang/LanguageIndexingFilter.java   |  89 --
 .../nutch/analysis/lang/langmappings.properties | 188 ----
 .../org/apache/nutch/analysis/lang/package.html |   6 -
 .../analysis/lang/TestHTMLLanguageParser.java   | 149 ----
 .../test/org/apache/nutch/analysis/lang/da.test | 108 ---
 .../test/org/apache/nutch/analysis/lang/de.test | 104 ---
 .../test/org/apache/nutch/analysis/lang/el.test | 109 ---
 .../test/org/apache/nutch/analysis/lang/en.test | 105 ---
 .../test/org/apache/nutch/analysis/lang/es.test | 107 ---
 .../test/org/apache/nutch/analysis/lang/fi.test | 106 ---
 .../test/org/apache/nutch/analysis/lang/fr.test | 105 ---
 .../test/org/apache/nutch/analysis/lang/it.test | 109 ---
 .../test/org/apache/nutch/analysis/lang/nl.test | 105 ---
 .../test/org/apache/nutch/analysis/lang/pt.test | 105 ---
 .../test/org/apache/nutch/analysis/lang/sv.test | 108 ---
 .../nutch/analysis/lang/test-referencial.txt    |  10 -
 src/plugin/lib-htmlunit/build-ivy.xml           |  54 --
 src/plugin/lib-htmlunit/build.xml               |  28 -
 src/plugin/lib-htmlunit/ivy.xml                 |  52 --
 src/plugin/lib-htmlunit/plugin.xml              | 166 ----
 .../protocol/htmlunit/HtmlUnitWebDriver.java    | 189 ----
 .../htmlunit/HtmlUnitWebWindowListener.java     |  53 --
 src/plugin/lib-http/build.xml                   |  22 -
 src/plugin/lib-http/ivy.xml                     |  41 -
 src/plugin/lib-http/plugin.xml                  |  33 -
 .../protocol/http/api/BlockedException.java     |  26 -
 .../nutch/protocol/http/api/HttpBase.java       | 587 ------------
 .../nutch/protocol/http/api/HttpException.java  |  40 -
 .../protocol/http/api/HttpRobotRulesParser.java | 167 ----
 .../apache/nutch/protocol/http/api/package.html |   6 -
 .../protocol/http/api/TestRobotRulesParser.java | 123 ---
 src/plugin/lib-nekohtml/build.xml               |  30 -
 src/plugin/lib-nekohtml/ivy.xml                 |  42 -
 src/plugin/lib-nekohtml/plugin.xml              |  38 -
 src/plugin/lib-regex-filter/build.xml           |  22 -
 src/plugin/lib-regex-filter/ivy.xml             |  41 -
 src/plugin/lib-regex-filter/plugin.xml          |  33 -
 .../apache/nutch/urlfilter/api/RegexRule.java   | 102 ---
 .../nutch/urlfilter/api/RegexURLFilterBase.java | 315 -------
 .../nutch/urlfilter/api/package-info.java       |  23 -
 .../urlfilter/api/RegexURLFilterBaseTest.java   | 134 ---
 src/plugin/lib-selenium/build-ivy.xml           |  54 --
 src/plugin/lib-selenium/build.xml               |  28 -
 .../lib-selenium/howto_upgrade_selenium.txt     |  15 -
 src/plugin/lib-selenium/ivy.xml                 |  52 --
 src/plugin/lib-selenium/plugin.xml              | 175 ----
 .../nutch/protocol/selenium/HttpWebClient.java  | 236 -----
 src/plugin/lib-xml/build.xml                    |  36 -
 src/plugin/lib-xml/ivy.xml                      |  44 -
 src/plugin/lib-xml/plugin.xml                   |  65 --
 src/plugin/microformats-reltag/build.xml        |  27 -
 src/plugin/microformats-reltag/ivy.xml          |  41 -
 src/plugin/microformats-reltag/plugin.xml       |  49 -
 .../reltag/RelTagIndexingFilter.java            |  77 --
 .../nutch/microformats/reltag/RelTagParser.java | 148 ----
 .../nutch/microformats/reltag/package.html      |   8 -
 src/plugin/mimetype-filter/build.xml            |  28 -
 src/plugin/mimetype-filter/ivy.xml              |  41 -
 src/plugin/mimetype-filter/plugin.xml           |  37 -
 .../mimetype-filter/sample/allow-images.txt     |  34 -
 .../mimetype-filter/sample/block-html.txt       |  34 -
 .../indexer/filter/MimeTypeIndexingFilter.java  | 273 ------
 .../filter/MimeTypeIndexingFilterTest.java      | 114 ---
 src/plugin/nutch-extensionpoints/build.xml      |  30 -
 src/plugin/nutch-extensionpoints/ivy.xml        |  41 -
 src/plugin/nutch-extensionpoints/plugin.xml     |  67 --
 src/plugin/parse-ext/build.xml                  |  32 -
 src/plugin/parse-ext/command                    |  24 -
 src/plugin/parse-ext/ivy.xml                    |  41 -
 src/plugin/parse-ext/plugin.xml                 |  60 --
 .../org/apache/nutch/parse/ext/ExtParser.java   | 183 ----
 .../apache/nutch/parse/ext/package-info.java    |  22 -
 .../apache/nutch/parse/ext/TestExtParser.java   | 130 ---
 src/plugin/parse-html/build.xml                 |  40 -
 src/plugin/parse-html/ivy.xml                   |  42 -
 src/plugin/parse-html/plugin.xml                |  48 -
 .../org/apache/nutch/parse/html/DOMBuilder.java | 766 ----------------
 .../nutch/parse/html/DOMContentUtils.java       | 400 ---------
 .../nutch/parse/html/HTMLMetaProcessor.java     | 214 -----
 .../org/apache/nutch/parse/html/HtmlParser.java | 352 --------
 .../parse/html/XMLCharacterRecognizer.java      | 112 ---
 .../org/apache/nutch/parse/html/package.html    |   5 -
 .../nutch/parse/html/TestDOMContentUtils.java   | 347 --------
 .../apache/nutch/parse/html/TestHtmlParser.java | 122 ---
 .../parse/html/TestRobotsMetaProcessor.java     | 155 ----
 src/plugin/parse-js/build.xml                   |  22 -
 src/plugin/parse-js/ivy.xml                     |  41 -
 src/plugin/parse-js/plugin.xml                  |  53 --
 .../apache/nutch/parse/js/JSParseFilter.java    | 301 -------
 .../org/apache/nutch/parse/js/package-info.java |  23 -
 src/plugin/parse-metatags/README.txt            |  17 -
 src/plugin/parse-metatags/build.xml             |  37 -
 src/plugin/parse-metatags/ivy.xml               |  41 -
 src/plugin/parse-metatags/plugin.xml            |  22 -
 .../parse-metatags/sample/testMetatags.html     |   9 -
 .../sample/testMultivalueMetatags.html          |  12 -
 .../nutch/parse/metatags/MetaTagsParser.java    | 124 ---
 .../nutch/parse/metatags/package-info.java      |  24 -
 .../nutch/parse/metatags/TestMetatagParser.java | 104 ---
 src/plugin/parse-replace/README.txt             |  91 --
 src/plugin/parse-replace/build.xml              |  37 -
 src/plugin/parse-replace/ivy.xml                |  41 -
 src/plugin/parse-replace/plugin.xml             |  22 -
 .../parse-replace/sample/testParseReplace.html  |  11 -
 .../nutch/parse/replace/ReplaceParser.java      |  74 --
 .../nutch/parse/replace/package-info.java       |  22 -
 .../nutch/parse/replace/TestParseReplace.java   |  68 --
 src/plugin/parse-swf/build.xml                  |  38 -
 src/plugin/parse-swf/ivy.xml                    |  41 -
 src/plugin/parse-swf/lib/javaswf-LICENSE.txt    |  33 -
 src/plugin/parse-swf/lib/javaswf.jar            | Bin 125369 -> 0 bytes
 src/plugin/parse-swf/plugin.xml                 |  44 -
 src/plugin/parse-swf/sample/test1.swf           | Bin 21054 -> 0 bytes
 src/plugin/parse-swf/sample/test1.txt           |  60 --
 src/plugin/parse-swf/sample/test2.swf           | Bin 42534 -> 0 bytes
 src/plugin/parse-swf/sample/test2.txt           |   5 -
 src/plugin/parse-swf/sample/test3.swf           | Bin 51562 -> 0 bytes
 src/plugin/parse-swf/sample/test3.txt           |  11 -
 .../org/apache/nutch/parse/swf/SWFParser.java   | 685 --------------
 .../apache/nutch/parse/swf/package-info.java    |  22 -
 .../apache/nutch/parse/swf/TestSWFParser.java   |  94 --
 src/plugin/parse-tika/build-ivy.xml             |  54 --
 src/plugin/parse-tika/build.xml                 |  55 --
 src/plugin/parse-tika/howto_upgrade_tika.txt    |   8 -
 src/plugin/parse-tika/ivy.xml                   |  46 -
 src/plugin/parse-tika/plugin.xml                | 136 ---
 src/plugin/parse-tika/sample/encrypted.pdf      | Bin 3431 -> 0 bytes
 src/plugin/parse-tika/sample/nutch.html         | 519 -----------
 src/plugin/parse-tika/sample/nutch_logo_tm.gif  | Bin 2747 -> 0 bytes
 src/plugin/parse-tika/sample/ootest.odt         | Bin 20753 -> 0 bytes
 src/plugin/parse-tika/sample/ootest.sxw         | Bin 20125 -> 0 bytes
 src/plugin/parse-tika/sample/ootest.txt         |  30 -
 src/plugin/parse-tika/sample/pdftest.pdf        | 157 ----
 src/plugin/parse-tika/sample/rsstest.rss        |  37 -
 src/plugin/parse-tika/sample/test.rtf           |  17 -
 src/plugin/parse-tika/sample/word97.doc         | Bin 8192 -> 0 bytes
 .../tika/BoilerpipeExtractorRepository.java     |  62 --
 .../org/apache/nutch/parse/tika/DOMBuilder.java | 794 -----------------
 .../nutch/parse/tika/DOMContentUtils.java       | 402 ---------
 .../nutch/parse/tika/HTMLMetaProcessor.java     | 214 -----
 .../org/apache/nutch/parse/tika/TikaParser.java | 286 ------
 .../parse/tika/XMLCharacterRecognizer.java      | 112 ---
 .../apache/nutch/parse/tika/package-info.java   |  23 -
 .../apache/nutch/tika/TestDOMContentUtils.java  | 337 -------
 .../org/apache/nutch/tika/TestFeedParser.java   | 121 ---
 .../apache/nutch/tika/TestImageMetadata.java    |  67 --
 .../org/apache/nutch/tika/TestMSWordParser.java |  92 --
 .../org/apache/nutch/tika/TestOOParser.java     | 107 ---
 .../org/apache/nutch/tika/TestPdfParser.java    |  73 --
 .../org/apache/nutch/tika/TestRTFParser.java    |  81 --
 .../nutch/tika/TestRobotsMetaProcessor.java     | 156 ----
 src/plugin/parse-zip/build.xml                  |  38 -
 src/plugin/parse-zip/ivy.xml                    |  41 -
 src/plugin/parse-zip/plugin.xml                 |  46 -
 src/plugin/parse-zip/sample/test.zip            | Bin 182 -> 0 bytes
 .../org/apache/nutch/parse/zip/ZipParser.java   | 144 ---
 .../nutch/parse/zip/ZipTextExtractor.java       | 120 ---
 .../apache/nutch/parse/zip/package-info.java    |  22 -
 .../apache/nutch/parse/zip/TestZipParser.java   |  71 --
 src/plugin/parsefilter-naivebayes/build-ivy.xml |  54 --
 src/plugin/parsefilter-naivebayes/build.xml     |  22 -
 src/plugin/parsefilter-naivebayes/ivy.xml       |  49 -
 src/plugin/parsefilter-naivebayes/plugin.xml    |  56 --
 .../nutch/parsefilter/naivebayes/Classify.java  | 120 ---
 .../naivebayes/NaiveBayesParseFilter.java       | 197 ----
 .../nutch/parsefilter/naivebayes/Train.java     | 148 ----
 .../parsefilter/naivebayes/package-info.java    |  28 -
 src/plugin/parsefilter-regex/build.xml          |  27 -
 .../data/regex-parsefilter.txt                  |  10 -
 src/plugin/parsefilter-regex/ivy.xml            |  37 -
 src/plugin/parsefilter-regex/plugin.xml         |  42 -
 .../parsefilter/regex/RegexParseFilter.java     | 199 -----
 .../nutch/parsefilter/regex/package-info.java   |  23 -
 .../parsefilter/regex/TestRegexParseFilter.java |  77 --
 src/plugin/plugin.dtd                           | 206 -----
 src/plugin/protocol-file/build.xml              |  29 -
 src/plugin/protocol-file/ivy.xml                |  41 -
 src/plugin/protocol-file/plugin.xml             |  46 -
 .../protocol-file/sample/testprotocolfile.txt   |   1 -
 .../sample/testprotocolfile_(encoded).txt       |   1 -
 .../org/apache/nutch/protocol/file/File.java    | 228 -----
 .../apache/nutch/protocol/file/FileError.java   |  36 -
 .../nutch/protocol/file/FileException.java      |  40 -
 .../nutch/protocol/file/FileResponse.java       | 317 -------
 .../org/apache/nutch/protocol/file/package.html |   5 -
 .../nutch/protocol/file/TestProtocolFile.java   |  99 ---
 src/plugin/protocol-ftp/build.xml               |  22 -
 src/plugin/protocol-ftp/ivy.xml                 |  42 -
 src/plugin/protocol-ftp/plugin.xml              |  46 -
 .../org/apache/nutch/protocol/ftp/Client.java   | 595 -------------
 .../java/org/apache/nutch/protocol/ftp/Ftp.java | 267 ------
 .../org/apache/nutch/protocol/ftp/FtpError.java |  36 -
 .../apache/nutch/protocol/ftp/FtpException.java |  46 -
 .../ftp/FtpExceptionBadSystResponse.java        |  29 -
 .../FtpExceptionCanNotHaveDataConnection.java   |  29 -
 ...ExceptionControlClosedByForcedDataClose.java |  30 -
 .../ftp/FtpExceptionUnknownForcedDataClose.java |  30 -
 .../apache/nutch/protocol/ftp/FtpResponse.java  | 521 -----------
 .../nutch/protocol/ftp/FtpRobotRulesParser.java | 121 ---
 .../protocol/ftp/PrintCommandListener.java      |  71 --
 .../org/apache/nutch/protocol/ftp/package.html  |   5 -
 src/plugin/protocol-htmlunit/build.xml          |  37 -
 src/plugin/protocol-htmlunit/ivy.xml            |  38 -
 src/plugin/protocol-htmlunit/plugin.xml         |  51 --
 .../apache/nutch/protocol/htmlunit/Http.java    |  63 --
 .../nutch/protocol/htmlunit/HttpResponse.java   | 573 ------------
 .../apache/nutch/protocol/htmlunit/package.html |  21 -
 src/plugin/protocol-http/build.xml              |  50 --
 src/plugin/protocol-http/ivy.xml                |  41 -
 src/plugin/protocol-http/jsp/basic-http.jsp     |  44 -
 src/plugin/protocol-http/jsp/brokenpage.jsp     |  47 -
 src/plugin/protocol-http/jsp/redirect301.jsp    |  49 -
 src/plugin/protocol-http/jsp/redirect302.jsp    |  49 -
 src/plugin/protocol-http/plugin.xml             |  51 --
 .../org/apache/nutch/protocol/http/Http.java    |  73 --
 .../nutch/protocol/http/HttpResponse.java       | 558 ------------
 .../org/apache/nutch/protocol/http/package.html |   5 -
 .../src/test/conf/nutch-site-test.xml           |  52 --
 .../nutch/protocol/http/TestProtocolHttp.java   | 140 ---
 src/plugin/protocol-httpclient/build.xml        |  45 -
 src/plugin/protocol-httpclient/ivy.xml          |  42 -
 src/plugin/protocol-httpclient/jsp/basic.jsp    |  74 --
 src/plugin/protocol-httpclient/jsp/cookies.jsp  |  63 --
 src/plugin/protocol-httpclient/jsp/digest.jsp   |  68 --
 src/plugin/protocol-httpclient/jsp/noauth.jsp   |  36 -
 src/plugin/protocol-httpclient/jsp/ntlm.jsp     |  89 --
 src/plugin/protocol-httpclient/plugin.xml       |  58 --
 .../DummySSLProtocolSocketFactory.java          | 163 ----
 .../httpclient/DummyX509TrustManager.java       |  92 --
 .../apache/nutch/protocol/httpclient/Http.java  | 572 ------------
 .../protocol/httpclient/HttpAuthentication.java |  45 -
 .../httpclient/HttpAuthenticationException.java |  71 --
 .../httpclient/HttpAuthenticationFactory.java   |  98 --
 .../httpclient/HttpBasicAuthentication.java     | 199 -----
 .../httpclient/HttpFormAuthConfigurer.java      | 106 ---
 .../httpclient/HttpFormAuthentication.java      | 223 -----
 .../nutch/protocol/httpclient/HttpResponse.java | 216 -----
 .../nutch/protocol/httpclient/package.html      |   9 -
 .../src/test/conf/httpclient-auth-test.xml      |  58 --
 .../src/test/conf/nutch-site-test.xml           |  52 --
 .../httpclient/TestProtocolHttpClient.java      | 217 -----
 .../protocol-interactiveselenium/README.md      |  38 -
 .../protocol-interactiveselenium/build-ivy.xml  |  54 --
 .../protocol-interactiveselenium/build.xml      |  37 -
 src/plugin/protocol-interactiveselenium/ivy.xml |  42 -
 .../protocol-interactiveselenium/plugin.xml     |  47 -
 .../protocol/interactiveselenium/Http.java      |  59 --
 .../interactiveselenium/HttpResponse.java       | 399 ---------
 .../DefalultMultiInteractionHandler.java        |  53 --
 .../DefaultClickAllAjaxLinksHandler.java        |  88 --
 .../handlers/DefaultHandler.java                |  30 -
 .../handlers/InteractiveSeleniumHandler.java    |  25 -
 .../protocol/interactiveselenium/package.html   |   5 -
 src/plugin/protocol-selenium/README.md          | 208 -----
 src/plugin/protocol-selenium/build-ivy.xml      |  54 --
 src/plugin/protocol-selenium/build.xml          |  36 -
 src/plugin/protocol-selenium/ivy.xml            |  42 -
 src/plugin/protocol-selenium/plugin.xml         |  47 -
 .../apache/nutch/protocol/selenium/Http.java    |  59 --
 .../nutch/protocol/selenium/HttpResponse.java   | 360 --------
 .../apache/nutch/protocol/selenium/package.html |   5 -
 src/plugin/scoring-depth/build.xml              |   6 -
 src/plugin/scoring-depth/ivy.xml                |  41 -
 src/plugin/scoring-depth/plugin.xml             |  24 -
 .../nutch/scoring/depth/DepthScoringFilter.java | 207 -----
 .../nutch/scoring/depth/package-info.java       |  23 -
 src/plugin/scoring-link/build.xml               |  27 -
 src/plugin/scoring-link/ivy.xml                 |  41 -
 src/plugin/scoring-link/plugin.xml              |  39 -
 .../scoring/link/LinkAnalysisScoringFilter.java |  95 --
 .../apache/nutch/scoring/link/package-info.java |  23 -
 src/plugin/scoring-opic/build.xml               |  27 -
 src/plugin/scoring-opic/ivy.xml                 |  41 -
 src/plugin/scoring-opic/plugin.xml              |  39 -
 .../nutch/scoring/opic/OPICScoringFilter.java   | 173 ----
 .../apache/nutch/scoring/opic/package-info.java |  23 -
 src/plugin/scoring-similarity/build-ivy.xml     |  54 --
 src/plugin/scoring-similarity/build.xml         |  27 -
 src/plugin/scoring-similarity/ivy.xml           |  42 -
 src/plugin/scoring-similarity/plugin.xml        |  45 -
 .../scoring/similarity/SimilarityModel.java     |  38 -
 .../similarity/SimilarityScoringFilter.java     |  70 --
 .../similarity/cosine/CosineSimilarity.java     |  84 --
 .../scoring/similarity/cosine/DocVector.java    |  57 --
 .../nutch/scoring/similarity/cosine/Model.java  | 190 ----
 .../scoring/similarity/cosine/package-info.java |   7 -
 .../similarity/util/LuceneAnalyzerUtil.java     |  93 --
 .../similarity/util/LuceneTokenizer.java        | 166 ----
 .../scoring/similarity/util/package-info.java   |  24 -
 src/plugin/subcollection/README.txt             |  10 -
 src/plugin/subcollection/build.xml              |  22 -
 src/plugin/subcollection/ivy.xml                |  41 -
 src/plugin/subcollection/plugin.xml             |  41 -
 .../nutch/collection/CollectionManager.java     | 240 -----
 .../apache/nutch/collection/Subcollection.java  | 259 ------
 .../org/apache/nutch/collection/package.html    |  36 -
 .../SubcollectionIndexingFilter.java            | 101 ---
 .../indexer/subcollection/package-info.java     |  25 -
 .../nutch/collection/TestSubcollection.java     | 112 ---
 src/plugin/tld/build.xml                        |  22 -
 src/plugin/tld/ivy.xml                          |  41 -
 src/plugin/tld/plugin.xml                       |  51 --
 .../nutch/indexer/tld/TLDIndexingFilter.java    |  69 --
 .../org/apache/nutch/indexer/tld/package.html   |   5 -
 .../nutch/scoring/tld/TLDScoringFilter.java     | 114 ---
 .../org/apache/nutch/scoring/tld/package.html   |   5 -
 src/plugin/urlfilter-automaton/build.xml        |  51 --
 src/plugin/urlfilter-automaton/ivy.xml          |  42 -
 src/plugin/urlfilter-automaton/plugin.xml       |  43 -
 .../urlfilter-automaton/sample/Benchmarks.rules |  26 -
 .../urlfilter-automaton/sample/Benchmarks.urls  | 297 -------
 .../sample/IntranetCrawling.rules               |  24 -
 .../sample/IntranetCrawling.urls                |   8 -
 .../sample/WholeWebCrawling.rules               |  19 -
 .../sample/WholeWebCrawling.urls                |  11 -
 .../urlfilter/automaton/AutomatonURLFilter.java | 116 ---
 .../nutch/urlfilter/automaton/package.html      |   9 -
 .../automaton/TestAutomatonURLFilter.java       |  56 --
 src/plugin/urlfilter-domain/build.xml           |  28 -
 src/plugin/urlfilter-domain/data/hosts.txt      |   5 -
 src/plugin/urlfilter-domain/ivy.xml             |  41 -
 src/plugin/urlfilter-domain/plugin.xml          |  43 -
 .../nutch/urlfilter/domain/DomainURLFilter.java | 212 -----
 .../nutch/urlfilter/domain/package-info.java    |  25 -
 .../urlfilter/domain/TestDomainURLFilter.java   |  67 --
 src/plugin/urlfilter-domainblacklist/build.xml  |  28 -
 .../urlfilter-domainblacklist/data/hosts.txt    |   5 -
 src/plugin/urlfilter-domainblacklist/ivy.xml    |  41 -
 src/plugin/urlfilter-domainblacklist/plugin.xml |  43 -
 .../DomainBlacklistURLFilter.java               | 210 -----
 .../urlfilter/domainblacklist/package-info.java |  24 -
 .../TestDomainBlacklistURLFilter.java           |  49 -
 src/plugin/urlfilter-ignoreexempt/README.md     |  43 -
 src/plugin/urlfilter-ignoreexempt/build.xml     |  55 --
 .../urlfilter-ignoreexempt/data/.donotdelete    |   0
 src/plugin/urlfilter-ignoreexempt/ivy.xml       |  41 -
 src/plugin/urlfilter-ignoreexempt/plugin.xml    |  45 -
 .../ignoreexempt/ExemptionUrlFilter.java        | 101 ---
 .../urlfilter/ignoreexempt/package-info.java    |  24 -
 src/plugin/urlfilter-prefix/build.xml           |  22 -
 src/plugin/urlfilter-prefix/ivy.xml             |  41 -
 src/plugin/urlfilter-prefix/plugin.xml          |  47 -
 .../nutch/urlfilter/prefix/PrefixURLFilter.java | 178 ----
 .../apache/nutch/urlfilter/prefix/package.html  |   5 -
 .../urlfilter/prefix/TestPrefixURLFilter.java   |  79 --
 src/plugin/urlfilter-regex/build.xml            |  51 --
 src/plugin/urlfilter-regex/ivy.xml              |  41 -
 src/plugin/urlfilter-regex/plugin.xml           |  48 -
 .../urlfilter-regex/sample/Benchmarks.rules     |  26 -
 .../urlfilter-regex/sample/Benchmarks.urls      | 297 -------
 .../sample/IntranetCrawling.rules               |  27 -
 .../sample/IntranetCrawling.urls                |   8 -
 .../sample/WholeWebCrawling.rules               |  22 -
 .../sample/WholeWebCrawling.urls                |  11 -
 .../urlfilter-regex/sample/nutch1838.rules      |  12 -
 .../urlfilter-regex/sample/nutch1838.urls       |   3 -
 .../nutch/urlfilter/regex/RegexURLFilter.java   | 111 ---
 .../apache/nutch/urlfilter/regex/package.html   |   5 -
 .../urlfilter/regex/TestRegexURLFilter.java     |  61 --
 src/plugin/urlfilter-suffix/build.xml           |  22 -
 src/plugin/urlfilter-suffix/ivy.xml             |  41 -
 src/plugin/urlfilter-suffix/plugin.xml          |  47 -
 .../nutch/urlfilter/suffix/SuffixURLFilter.java | 331 -------
 .../nutch/urlfilter/suffix/package-info.java    |  23 -
 .../urlfilter/suffix/TestSuffixURLFilter.java   | 123 ---
 src/plugin/urlfilter-validator/build.xml        |  22 -
 src/plugin/urlfilter-validator/ivy.xml          |  41 -
 src/plugin/urlfilter-validator/plugin.xml       |  41 -
 .../nutch/urlfilter/validator/UrlValidator.java | 386 --------
 .../nutch/urlfilter/validator/package.html      |   9 -
 .../urlfilter/validator/TestUrlValidator.java   |  79 --
 src/plugin/urlmeta/build.xml                    |  22 -
 src/plugin/urlmeta/ivy.xml                      |  41 -
 src/plugin/urlmeta/plugin.xml                   |  47 -
 .../indexer/urlmeta/URLMetaIndexingFilter.java  | 118 ---
 .../apache/nutch/indexer/urlmeta/package.html   |  12 -
 .../scoring/urlmeta/URLMetaScoringFilter.java   | 175 ----
 .../apache/nutch/scoring/urlmeta/package.html   |  11 -
 src/plugin/urlnormalizer-ajax/build.xml         |  22 -
 src/plugin/urlnormalizer-ajax/ivy.xml           |  41 -
 src/plugin/urlnormalizer-ajax/plugin.xml        |  41 -
 .../urlnormalizer/ajax/AjaxURLNormalizer.java   | 236 -----
 .../ajax/TestAjaxURLNormalizer.java             |  67 --
 src/plugin/urlnormalizer-basic/build.xml        |  22 -
 src/plugin/urlnormalizer-basic/ivy.xml          |  41 -
 src/plugin/urlnormalizer-basic/plugin.xml       |  41 -
 .../urlnormalizer/basic/BasicURLNormalizer.java | 290 ------
 .../net/urlnormalizer/basic/package-info.java   |  23 -
 .../basic/TestBasicURLNormalizer.java           | 175 ----
 src/plugin/urlnormalizer-host/build.xml         |  27 -
 src/plugin/urlnormalizer-host/data/hosts.txt    |   8 -
 src/plugin/urlnormalizer-host/ivy.xml           |  41 -
 src/plugin/urlnormalizer-host/plugin.xml        |  43 -
 .../urlnormalizer/host/HostURLNormalizer.java   | 198 -----
 .../net/urlnormalizer/host/package-info.java    |  23 -
 .../host/TestHostURLNormalizer.java             |  57 --
 src/plugin/urlnormalizer-pass/build.xml         |  22 -
 src/plugin/urlnormalizer-pass/ivy.xml           |  41 -
 src/plugin/urlnormalizer-pass/plugin.xml        |  41 -
 .../urlnormalizer/pass/PassURLNormalizer.java   |  49 -
 .../net/urlnormalizer/pass/package-info.java    |  23 -
 .../pass/TestPassURLNormalizer.java             |  45 -
 src/plugin/urlnormalizer-protocol/build.xml     |  27 -
 .../urlnormalizer-protocol/data/protocols.txt   |   7 -
 src/plugin/urlnormalizer-protocol/ivy.xml       |  41 -
 src/plugin/urlnormalizer-protocol/plugin.xml    |  43 -
 .../protocol/ProtocolURLNormalizer.java         | 190 ----
 .../protocol/TestProtocolURLNormalizer.java     |  55 --
 src/plugin/urlnormalizer-querystring/build.xml  |  22 -
 src/plugin/urlnormalizer-querystring/ivy.xml    |  41 -
 src/plugin/urlnormalizer-querystring/plugin.xml |  42 -
 .../querystring/QuerystringURLNormalizer.java   |  91 --
 .../urlnormalizer/querystring/package-info.java |  23 -
 .../TestQuerystringURLNormalizer.java           |  49 -
 src/plugin/urlnormalizer-regex/build.xml        |  34 -
 src/plugin/urlnormalizer-regex/ivy.xml          |  41 -
 src/plugin/urlnormalizer-regex/plugin.xml       |  41 -
 .../sample/regex-normalize-default.test         |  84 --
 .../sample/regex-normalize-default.xml          |  66 --
 .../sample/regex-normalize-scope1.test          |   8 -
 .../sample/regex-normalize-scope1.xml           |  21 -
 .../urlnormalizer/regex/RegexURLNormalizer.java | 324 -------
 .../net/urlnormalizer/regex/package-info.java   |  23 -
 .../regex/TestRegexURLNormalizer.java           | 186 ----
 src/plugin/urlnormalizer-slash/build.xml        |  27 -
 src/plugin/urlnormalizer-slash/data/slashes.txt |   7 -
 src/plugin/urlnormalizer-slash/ivy.xml          |  41 -
 src/plugin/urlnormalizer-slash/plugin.xml       |  43 -
 .../urlnormalizer/slash/SlashURLNormalizer.java | 224 -----
 .../slash/TestSlashURLNormalizer.java           |  73 --
 src/test/crawl-tests.xml                        |  62 --
 src/test/domain-urlfilter.txt                   |  22 -
 src/test/filter-all.txt                         |   7 -
 src/test/log4j.properties                       |   7 -
 src/test/nutch-site.xml                         |  19 -
 .../nutch/crawl/ContinuousCrawlTestUtil.java    | 270 ------
 .../org/apache/nutch/crawl/CrawlDBTestUtil.java | 179 ----
 .../nutch/crawl/CrawlDbUpdateTestDriver.java    | 138 ---
 .../apache/nutch/crawl/CrawlDbUpdateUtil.java   | 166 ----
 .../org/apache/nutch/crawl/DummyWritable.java   |  32 -
 .../nutch/crawl/TODOTestCrawlDbStates.java      | 168 ----
 .../nutch/crawl/TestAdaptiveFetchSchedule.java  | 121 ---
 .../apache/nutch/crawl/TestCrawlDbFilter.java   | 145 ---
 .../apache/nutch/crawl/TestCrawlDbMerger.java   | 160 ----
 .../apache/nutch/crawl/TestCrawlDbStates.java   | 566 ------------
 .../org/apache/nutch/crawl/TestGenerator.java   | 370 --------
 .../org/apache/nutch/crawl/TestInjector.java    | 181 ----
 .../apache/nutch/crawl/TestLinkDbMerger.java    | 160 ----
 .../nutch/crawl/TestSignatureFactory.java       |  35 -
 .../org/apache/nutch/fetcher/TestFetcher.java   | 207 -----
 .../nutch/indexer/TestIndexerMapReduce.java     | 187 ----
 .../nutch/indexer/TestIndexingFilters.java      | 110 ---
 .../org/apache/nutch/metadata/TestMetadata.java | 281 ------
 .../metadata/TestSpellCheckedMetadata.java      | 303 -------
 .../org/apache/nutch/net/TestURLFilters.java    |  41 -
 .../apache/nutch/net/TestURLNormalizers.java    |  83 --
 .../nutch/parse/TestOutlinkExtractor.java       |  99 ---
 .../org/apache/nutch/parse/TestParseData.java   |  58 --
 .../org/apache/nutch/parse/TestParseText.java   |  34 -
 .../apache/nutch/parse/TestParserFactory.java   | 105 ---
 .../apache/nutch/parse/parse-plugin-test.xml    |  58 --
 .../nutch/plugin/HelloWorldExtension.java       |  36 -
 .../org/apache/nutch/plugin/ITestExtension.java |  27 -
 .../apache/nutch/plugin/SimpleTestPlugin.java   |  57 --
 .../apache/nutch/plugin/TestPluginSystem.java   | 302 -------
 .../org/apache/nutch/protocol/TestContent.java  |  94 --
 .../nutch/protocol/TestProtocolFactory.java     |  85 --
 .../apache/nutch/segment/TestSegmentMerger.java | 131 ---
 .../segment/TestSegmentMergerCrawlDatums.java   | 427 ---------
 .../apache/nutch/service/TestNutchServer.java   |  65 --
 .../nutch/tools/TestCommonCrawlDataDumper.java  | 125 ---
 .../tools/proxy/AbstractTestbedHandler.java     |  49 -
 .../apache/nutch/tools/proxy/DelayHandler.java  |  56 --
 .../apache/nutch/tools/proxy/FakeHandler.java   | 102 ---
 .../nutch/tools/proxy/LogDebugHandler.java      |  64 --
 .../nutch/tools/proxy/NotFoundHandler.java      |  40 -
 .../apache/nutch/tools/proxy/ProxyTestbed.java  | 156 ----
 .../nutch/tools/proxy/SegmentHandler.java       | 255 ------
 .../apache/nutch/tools/proxy/package-info.java  |  22 -
 .../org/apache/nutch/util/DumpFileUtilTest.java |  68 --
 .../apache/nutch/util/TestEncodingDetector.java |  90 --
 .../org/apache/nutch/util/TestGZIPUtils.java    | 241 -----
 .../org/apache/nutch/util/TestMimeUtil.java     | 127 ---
 .../org/apache/nutch/util/TestNodeWalker.java   | 107 ---
 .../nutch/util/TestPrefixStringMatcher.java     | 115 ---
 .../org/apache/nutch/util/TestStringUtil.java   |  61 --
 .../nutch/util/TestSuffixStringMatcher.java     | 114 ---
 .../org/apache/nutch/util/TestTableUtil.java    |  75 --
 src/test/org/apache/nutch/util/TestURLUtil.java | 281 ------
 .../apache/nutch/util/WritableTestUtils.java    |  55 --
 .../fetch-test-site/dup_of_pagea.html           |  11 -
 .../fetch-test-site/exception.html              |  13 -
 src/testresources/fetch-test-site/index.html    |  13 -
 .../fetch-test-site/nested_spider_trap.html     |  23 -
 src/testresources/fetch-test-site/pagea.html    |  11 -
 src/testresources/fetch-test-site/pageb.html    |  11 -
 src/testresources/fetch-test-site/robots.txt    |   0
 src/testresources/test-mime-util/test.xlsx      | Bin 3950 -> 0 bytes
 .../20150309101625/content/part-00000/.data.crc | Bin 124 -> 0 bytes
 .../content/part-00000/.index.crc               | Bin 12 -> 0 bytes
 .../20150309101625/content/part-00000/data      | Bin 14452 -> 0 bytes
 .../20150309101625/content/part-00000/index     | Bin 217 -> 0 bytes
 .../crawl_fetch/part-00000/.data.crc            | Bin 12 -> 0 bytes
 .../crawl_fetch/part-00000/.index.crc           | Bin 12 -> 0 bytes
 .../20150309101625/crawl_fetch/part-00000/data  | Bin 293 -> 0 bytes
 .../20150309101625/crawl_fetch/part-00000/index | Bin 217 -> 0 bytes
 .../crawl_generate/.part-00000.crc              | Bin 12 -> 0 bytes
 .../20150309101625/crawl_generate/part-00000    | Bin 169 -> 0 bytes
 .../20150309101625/crawl_parse/.part-00000.crc  | Bin 68 -> 0 bytes
 .../20150309101625/crawl_parse/part-00000       | Bin 7627 -> 0 bytes
 .../parse_data/part-00000/.data.crc             | Bin 24 -> 0 bytes
 .../parse_data/part-00000/.index.crc            | Bin 12 -> 0 bytes
 .../20150309101625/parse_data/part-00000/data   | Bin 1985 -> 0 bytes
 .../20150309101625/parse_data/part-00000/index  | Bin 217 -> 0 bytes
 .../parse_text/part-00000/.data.crc             | Bin 60 -> 0 bytes
 .../parse_text/part-00000/.index.crc            | Bin 12 -> 0 bytes
 .../20150309101625/parse_text/part-00000/data   | Bin 6554 -> 0 bytes
 .../20150309101625/parse_text/part-00000/index  | Bin 217 -> 0 bytes
 .../20150309101656/content/part-00000/.data.crc | Bin 3372 -> 0 bytes
 .../content/part-00000/.index.crc               | Bin 12 -> 0 bytes
 .../20150309101656/content/part-00000/data      | Bin 430250 -> 0 bytes
 .../20150309101656/content/part-00000/index     | Bin 220 -> 0 bytes
 .../crawl_fetch/part-00000/.data.crc            | Bin 104 -> 0 bytes
 .../crawl_fetch/part-00000/.index.crc           | Bin 12 -> 0 bytes
 .../20150309101656/crawl_fetch/part-00000/data  | Bin 12121 -> 0 bytes
 .../20150309101656/crawl_fetch/part-00000/index | Bin 220 -> 0 bytes
 .../crawl_generate/.part-00000.crc              | Bin 52 -> 0 bytes
 .../20150309101656/crawl_generate/part-00000    | Bin 5590 -> 0 bytes
 .../20150309101656/crawl_parse/.part-00000.crc  | Bin 1652 -> 0 bytes
 .../20150309101656/crawl_parse/part-00000       | Bin 210047 -> 0 bytes
 .../parse_data/part-00000/.data.crc             | Bin 460 -> 0 bytes
 .../parse_data/part-00000/.index.crc            | Bin 12 -> 0 bytes
 .../20150309101656/parse_data/part-00000/data   | Bin 57355 -> 0 bytes
 .../20150309101656/parse_data/part-00000/index  | Bin 220 -> 0 bytes
 .../parse_text/part-00000/.data.crc             | Bin 1260 -> 0 bytes
 .../parse_text/part-00000/.index.crc            | Bin 12 -> 0 bytes
 .../20150309101656/parse_text/part-00000/data   | Bin 159920 -> 0 bytes
 .../20150309101656/parse_text/part-00000/index  | Bin 220 -> 0 bytes
 1973 files changed, 102499 insertions(+), 98774 deletions(-)
----------------------------------------------------------------------