You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2017/03/08 06:52:41 UTC

[nutch] 01/03: Merge branch 'NUTCH-2292' of https://github.com/apache/nutch into NUTCH-2292

This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch NUTCH-2292
in repository https://gitbox.apache.org/repos/asf/nutch.git

commit ecc60d7890e20ae822c848661ee2a6224f9fbe1d
Merge: 2b93a66 2175c76
Author: Lewis John McGibbney <le...@gmail.com>
AuthorDate: Fri Feb 24 12:13:47 2017 -0800

    Merge branch 'NUTCH-2292' of https://github.com/apache/nutch into NUTCH-2292

 .gitignore                                         |   6 +
 {src/bin => bin}/crawl                             |   0
 {src/bin => bin}/nutch                             |   0
 nutch-core/pom.xml                                 | 522 +++++++++++++++++++++
 .../apache/nutch/crawl/AbstractFetchSchedule.java  |   0
 .../apache/nutch/crawl/AdaptiveFetchSchedule.java  |   0
 .../java/org/apache/nutch/crawl/CrawlDatum.java    |   0
 .../main}/java/org/apache/nutch/crawl/CrawlDb.java |   0
 .../java/org/apache/nutch/crawl/CrawlDbFilter.java |   0
 .../java/org/apache/nutch/crawl/CrawlDbMerger.java |   0
 .../java/org/apache/nutch/crawl/CrawlDbReader.java |   0
 .../org/apache/nutch/crawl/CrawlDbReducer.java     |   0
 .../org/apache/nutch/crawl/DeduplicationJob.java   |   0
 .../apache/nutch/crawl/DefaultFetchSchedule.java   |   0
 .../java/org/apache/nutch/crawl/FetchSchedule.java |   0
 .../apache/nutch/crawl/FetchScheduleFactory.java   |   0
 .../java/org/apache/nutch/crawl/Generator.java     |   0
 .../java/org/apache/nutch/crawl/Injector.java      |   0
 .../main}/java/org/apache/nutch/crawl/Inlink.java  |   0
 .../main}/java/org/apache/nutch/crawl/Inlinks.java |   0
 .../main}/java/org/apache/nutch/crawl/LinkDb.java  |   0
 .../java/org/apache/nutch/crawl/LinkDbFilter.java  |   0
 .../java/org/apache/nutch/crawl/LinkDbMerger.java  |   0
 .../java/org/apache/nutch/crawl/LinkDbReader.java  |   0
 .../java/org/apache/nutch/crawl/MD5Signature.java  |   0
 .../nutch/crawl/MimeAdaptiveFetchSchedule.java     |   0
 .../java/org/apache/nutch/crawl/NutchWritable.java |   0
 .../java/org/apache/nutch/crawl/Signature.java     |   0
 .../apache/nutch/crawl/SignatureComparator.java    |   0
 .../org/apache/nutch/crawl/SignatureFactory.java   |   0
 .../org/apache/nutch/crawl/TextMD5Signature.java   |   0
 .../apache/nutch/crawl/TextProfileSignature.java   |   0
 .../org/apache/nutch/crawl/URLPartitioner.java     |   0
 .../main}/java/org/apache/nutch/crawl/package.html |   0
 .../java/org/apache/nutch/fetcher/FetchItem.java   |   0
 .../org/apache/nutch/fetcher/FetchItemQueue.java   |   0
 .../org/apache/nutch/fetcher/FetchItemQueues.java  |   0
 .../java/org/apache/nutch/fetcher/FetchNode.java   |   0
 .../java/org/apache/nutch/fetcher/FetchNodeDb.java |   0
 .../java/org/apache/nutch/fetcher/Fetcher.java     |   0
 .../apache/nutch/fetcher/FetcherOutputFormat.java  |   0
 .../org/apache/nutch/fetcher/FetcherThread.java    |   0
 .../java/org/apache/nutch/fetcher/QueueFeeder.java |   0
 .../java/org/apache/nutch/fetcher/package.html     |   0
 .../java/org/apache/nutch/hostdb/HostDatum.java    |   0
 .../java/org/apache/nutch/hostdb/ReadHostDb.java   |   0
 .../org/apache/nutch/hostdb/ResolverThread.java    |   0
 .../java/org/apache/nutch/hostdb/UpdateHostDb.java |   0
 .../apache/nutch/hostdb/UpdateHostDbMapper.java    |   0
 .../apache/nutch/hostdb/UpdateHostDbReducer.java   |   0
 .../java/org/apache/nutch/indexer/CleaningJob.java |   0
 .../java/org/apache/nutch/indexer/IndexWriter.java |   0
 .../org/apache/nutch/indexer/IndexWriters.java     |   0
 .../org/apache/nutch/indexer/IndexerMapReduce.java |   0
 .../apache/nutch/indexer/IndexerOutputFormat.java  |   0
 .../apache/nutch/indexer/IndexingException.java    |   0
 .../org/apache/nutch/indexer/IndexingFilter.java   |   0
 .../org/apache/nutch/indexer/IndexingFilters.java  |   0
 .../nutch/indexer/IndexingFiltersChecker.java      |   0
 .../java/org/apache/nutch/indexer/IndexingJob.java |   0
 .../org/apache/nutch/indexer/NutchDocument.java    |   0
 .../java/org/apache/nutch/indexer/NutchField.java  |   0
 .../org/apache/nutch/indexer/NutchIndexAction.java |   0
 .../java/org/apache/nutch/indexer/package.html     |   0
 .../org/apache/nutch/metadata/CreativeCommons.java |   0
 .../java/org/apache/nutch/metadata/DublinCore.java |   0
 .../main}/java/org/apache/nutch/metadata/Feed.java |   0
 .../org/apache/nutch/metadata/HttpHeaders.java     |   0
 .../org/apache/nutch/metadata/MetaWrapper.java     |   0
 .../java/org/apache/nutch/metadata/Metadata.java   |   0
 .../java/org/apache/nutch/metadata/Nutch.java      |   0
 .../nutch/metadata/SpellCheckedMetadata.java       |   0
 .../java/org/apache/nutch/metadata/package.html    |   0
 .../org/apache/nutch/net/URLExemptionFilter.java   |   0
 .../org/apache/nutch/net/URLExemptionFilters.java  |   0
 .../main}/java/org/apache/nutch/net/URLFilter.java |   0
 .../org/apache/nutch/net/URLFilterChecker.java     |   0
 .../org/apache/nutch/net/URLFilterException.java   |   0
 .../java/org/apache/nutch/net/URLFilters.java      |   0
 .../java/org/apache/nutch/net/URLNormalizer.java   |   0
 .../org/apache/nutch/net/URLNormalizerChecker.java |   0
 .../java/org/apache/nutch/net/URLNormalizers.java  |   0
 .../java/org/apache/nutch/net/package-info.java    |   0
 .../apache/nutch/net/protocols/HttpDateFormat.java |   0
 .../nutch/net/protocols/ProtocolException.java     |   0
 .../org/apache/nutch/net/protocols/Response.java   |   0
 .../apache/nutch/net/protocols/package-info.java   |   0
 .../java/org/apache/nutch/parse/HTMLMetaTags.java  |   0
 .../org/apache/nutch/parse/HtmlParseFilter.java    |   0
 .../org/apache/nutch/parse/HtmlParseFilters.java   |   0
 .../main}/java/org/apache/nutch/parse/Outlink.java |   0
 .../org/apache/nutch/parse/OutlinkExtractor.java   |   0
 .../main}/java/org/apache/nutch/parse/Parse.java   |   0
 .../java/org/apache/nutch/parse/ParseCallable.java |   0
 .../java/org/apache/nutch/parse/ParseData.java     |   0
 .../org/apache/nutch/parse/ParseException.java     |   0
 .../java/org/apache/nutch/parse/ParseImpl.java     |   0
 .../org/apache/nutch/parse/ParseOutputFormat.java  |   0
 .../org/apache/nutch/parse/ParsePluginList.java    |   0
 .../org/apache/nutch/parse/ParsePluginsReader.java |   0
 .../java/org/apache/nutch/parse/ParseResult.java   |   0
 .../java/org/apache/nutch/parse/ParseSegment.java  |   0
 .../java/org/apache/nutch/parse/ParseStatus.java   |   0
 .../java/org/apache/nutch/parse/ParseText.java     |   0
 .../java/org/apache/nutch/parse/ParseUtil.java     |   0
 .../main}/java/org/apache/nutch/parse/Parser.java  |   0
 .../java/org/apache/nutch/parse/ParserChecker.java |   0
 .../java/org/apache/nutch/parse/ParserFactory.java |   0
 .../org/apache/nutch/parse/ParserNotFound.java     |   0
 .../java/org/apache/nutch/parse/package-info.java  |   0
 .../nutch/plugin/CircularDependencyException.java  |   0
 .../java/org/apache/nutch/plugin/Extension.java    |   0
 .../org/apache/nutch/plugin/ExtensionPoint.java    |   0
 .../nutch/plugin/MissingDependencyException.java   |   0
 .../java/org/apache/nutch/plugin/Pluggable.java    |   0
 .../main}/java/org/apache/nutch/plugin/Plugin.java |   0
 .../org/apache/nutch/plugin/PluginClassLoader.java |   0
 .../org/apache/nutch/plugin/PluginDescriptor.java  |   0
 .../apache/nutch/plugin/PluginManifestParser.java  |   0
 .../org/apache/nutch/plugin/PluginRepository.java  |   0
 .../nutch/plugin/PluginRuntimeException.java       |   0
 .../java/org/apache/nutch/plugin/package.html      |   0
 .../java/org/apache/nutch/protocol/Content.java    |   0
 .../java/org/apache/nutch/protocol/Protocol.java   |   0
 .../apache/nutch/protocol/ProtocolException.java   |   0
 .../org/apache/nutch/protocol/ProtocolFactory.java |   0
 .../apache/nutch/protocol/ProtocolNotFound.java    |   0
 .../org/apache/nutch/protocol/ProtocolOutput.java  |   0
 .../org/apache/nutch/protocol/ProtocolStatus.java  |   0
 .../apache/nutch/protocol/RobotRulesParser.java    |   0
 .../org/apache/nutch/protocol/package-info.java    |   0
 .../nutch/scoring/AbstractScoringFilter.java       |   0
 .../org/apache/nutch/scoring/ScoringFilter.java    |   0
 .../nutch/scoring/ScoringFilterException.java      |   0
 .../org/apache/nutch/scoring/ScoringFilters.java   |   0
 .../org/apache/nutch/scoring/package-info.java     |   0
 .../apache/nutch/scoring/webgraph/LinkDatum.java   |   0
 .../apache/nutch/scoring/webgraph/LinkDumper.java  |   0
 .../apache/nutch/scoring/webgraph/LinkRank.java    |   0
 .../org/apache/nutch/scoring/webgraph/Node.java    |   0
 .../apache/nutch/scoring/webgraph/NodeDumper.java  |   0
 .../apache/nutch/scoring/webgraph/NodeReader.java  |   0
 .../nutch/scoring/webgraph/ScoreUpdater.java       |   0
 .../apache/nutch/scoring/webgraph/WebGraph.java    |   0
 .../nutch/scoring/webgraph/package-info.java       |   0
 .../nutch/segment/ContentAsTextInputFormat.java    |   0
 .../org/apache/nutch/segment/SegmentChecker.java   |   0
 .../apache/nutch/segment/SegmentMergeFilter.java   |   0
 .../apache/nutch/segment/SegmentMergeFilters.java  |   0
 .../org/apache/nutch/segment/SegmentMerger.java    |   0
 .../java/org/apache/nutch/segment/SegmentPart.java |   0
 .../org/apache/nutch/segment/SegmentReader.java    |   0
 .../org/apache/nutch/segment/package-info.java     |   0
 .../java/org/apache/nutch/service/ConfManager.java |   0
 .../java/org/apache/nutch/service/JobManager.java  |   0
 .../java/org/apache/nutch/service/NutchReader.java |   0
 .../java/org/apache/nutch/service/NutchServer.java |   0
 .../apache/nutch/service/impl/ConfManagerImpl.java |   0
 .../org/apache/nutch/service/impl/JobFactory.java  |   0
 .../apache/nutch/service/impl/JobManagerImpl.java  |   0
 .../org/apache/nutch/service/impl/JobWorker.java   |   0
 .../org/apache/nutch/service/impl/LinkReader.java  |   0
 .../org/apache/nutch/service/impl/NodeReader.java  |   0
 .../service/impl/NutchServerPoolExecutor.java      |   0
 .../apache/nutch/service/impl/SequenceReader.java  |   0
 .../nutch/service/model/request/DbQuery.java       |   0
 .../nutch/service/model/request/JobConfig.java     |   0
 .../nutch/service/model/request/NutchConfig.java   |   0
 .../nutch/service/model/request/ReaderConfig.java  |   0
 .../nutch/service/model/request/SeedList.java      |   0
 .../nutch/service/model/request/SeedUrl.java       |   0
 .../service/model/response/FetchNodeDbInfo.java    |   0
 .../nutch/service/model/response/JobInfo.java      |   0
 .../service/model/response/NutchServerInfo.java    |   0
 .../nutch/service/resources/AbstractResource.java  |   0
 .../nutch/service/resources/AdminResource.java     |   0
 .../nutch/service/resources/ConfigResource.java    |   0
 .../apache/nutch/service/resources/DbResource.java |   0
 .../nutch/service/resources/JobResource.java       |   0
 .../nutch/service/resources/ReaderResouce.java     |   0
 .../nutch/service/resources/SeedResource.java      |   0
 .../nutch/tools/AbstractCommonCrawlFormat.java     |   0
 .../java/org/apache/nutch/tools/Benchmark.java     |   0
 .../org/apache/nutch/tools/CommonCrawlConfig.java  |   0
 .../apache/nutch/tools/CommonCrawlDataDumper.java  |   0
 .../org/apache/nutch/tools/CommonCrawlFormat.java  |   0
 .../nutch/tools/CommonCrawlFormatFactory.java      |   0
 .../nutch/tools/CommonCrawlFormatJackson.java      |   0
 .../nutch/tools/CommonCrawlFormatJettinson.java    |   0
 .../nutch/tools/CommonCrawlFormatSimple.java       |   0
 .../apache/nutch/tools/CommonCrawlFormatWARC.java  |   0
 .../java/org/apache/nutch/tools/DmozParser.java    |   0
 .../java/org/apache/nutch/tools/FileDumper.java    |   0
 .../java/org/apache/nutch/tools/FreeGenerator.java |   0
 .../java/org/apache/nutch/tools/ResolveUrls.java   |   0
 .../java/org/apache/nutch/tools/WARCUtils.java     |   0
 .../org/apache/nutch/tools/arc/ArcInputFormat.java |   0
 .../apache/nutch/tools/arc/ArcRecordReader.java    |   0
 .../apache/nutch/tools/arc/ArcSegmentCreator.java  |   0
 .../org/apache/nutch/tools/arc/package-info.java   |   0
 .../java/org/apache/nutch/tools/package-info.java  |   0
 .../org/apache/nutch/tools/warc/WARCExporter.java  |   0
 .../org/apache/nutch/tools/warc/package-info.java  |   0
 .../java/org/apache/nutch/util/CommandRunner.java  |   0
 .../apache/nutch/util/CrawlCompletionStats.java    |   0
 .../java/org/apache/nutch/util/DeflateUtils.java   |   0
 .../main}/java/org/apache/nutch/util/DomUtil.java  |   0
 .../java/org/apache/nutch/util/DumpFileUtil.java   |   0
 .../org/apache/nutch/util/EncodingDetector.java    |   0
 .../main}/java/org/apache/nutch/util/FSUtils.java  |   0
 .../java/org/apache/nutch/util/GZIPUtils.java      |   0
 .../nutch/util/GenericWritableConfigurable.java    |   0
 .../java/org/apache/nutch/util/HadoopFSUtil.java   |   0
 .../main}/java/org/apache/nutch/util/JexlUtil.java |   0
 .../main}/java/org/apache/nutch/util/LockUtil.java |   0
 .../main}/java/org/apache/nutch/util/MimeUtil.java |   0
 .../java/org/apache/nutch/util/NodeWalker.java     |   0
 .../org/apache/nutch/util/NutchConfiguration.java  |   0
 .../main}/java/org/apache/nutch/util/NutchJob.java |   0
 .../java/org/apache/nutch/util/NutchTool.java      |   0
 .../java/org/apache/nutch/util/ObjectCache.java    |   0
 .../org/apache/nutch/util/PrefixStringMatcher.java |   0
 .../nutch/util/ProtocolStatusStatistics.java       |   0
 .../java/org/apache/nutch/util/StringUtil.java     |   0
 .../org/apache/nutch/util/SuffixStringMatcher.java |   0
 .../java/org/apache/nutch/util/TableUtil.java      |   0
 .../java/org/apache/nutch/util/TimingUtil.java     |   0
 .../org/apache/nutch/util/TrieStringMatcher.java   |   0
 .../main}/java/org/apache/nutch/util/URLUtil.java  |   0
 .../apache/nutch/util/domain/DomainStatistics.java |   0
 .../org/apache/nutch/util/domain/DomainSuffix.java |   0
 .../apache/nutch/util/domain/DomainSuffixes.java   |   0
 .../nutch/util/domain/DomainSuffixesReader.java    |   0
 .../apache/nutch/util/domain/TopLevelDomain.java   |   0
 .../java/org/apache/nutch/util/domain/package.html |   0
 .../java/org/apache/nutch/util/package-info.java   |   0
 .../org/apache/nutch/webui/NutchUiApplication.java |   0
 .../nutch/webui/NutchUiApplication.properties      |   0
 .../java/org/apache/nutch/webui/NutchUiServer.java |   0
 .../org/apache/nutch/webui/client/NutchClient.java |   0
 .../nutch/webui/client/NutchClientFactory.java     |   0
 .../nutch/webui/client/impl/CrawlingCycle.java     |   0
 .../webui/client/impl/CrawlingCycleListener.java   |   0
 .../nutch/webui/client/impl/NutchClientImpl.java   |   0
 .../nutch/webui/client/impl/RemoteCommand.java     |   0
 .../webui/client/impl/RemoteCommandBuilder.java    |   0
 .../webui/client/impl/RemoteCommandExecutor.java   |   0
 .../client/impl/RemoteCommandsBatchFactory.java    |   0
 .../nutch/webui/client/model/ConnectionStatus.java |   0
 .../org/apache/nutch/webui/client/model/Crawl.java |   0
 .../apache/nutch/webui/client/model/JobConfig.java |   0
 .../apache/nutch/webui/client/model/JobInfo.java   |   0
 .../nutch/webui/client/model/NutchStatus.java      |   0
 .../nutch/webui/config/CustomDaoFactory.java       |   0
 .../nutch/webui/config/CustomTableCreator.java     |   0
 .../nutch/webui/config/NutchGuiConfiguration.java  |   0
 .../nutch/webui/config/SpringConfiguration.java    |   0
 .../org/apache/nutch/webui/model/NutchConfig.java  |   0
 .../apache/nutch/webui/model/NutchInstance.java    |   0
 .../org/apache/nutch/webui/model/SeedList.java     |   0
 .../java/org/apache/nutch/webui/model/SeedUrl.java |   0
 .../apache/nutch/webui/pages/AbstractBasePage.html |   0
 .../apache/nutch/webui/pages/AbstractBasePage.java |   0
 .../apache/nutch/webui/pages/DashboardPage.html    |   0
 .../apache/nutch/webui/pages/DashboardPage.java    |   0
 .../org/apache/nutch/webui/pages/LogOutPage.java   |   0
 .../apache/nutch/webui/pages/SchedulingPage.java   |   0
 .../org/apache/nutch/webui/pages/SearchPage.java   |   0
 .../apache/nutch/webui/pages/StatisticsPage.java   |   0
 .../apache/nutch/webui/pages/UrlsUploadPage.java   |   0
 .../apache/nutch/webui/pages/UserSettingsPage.java |   0
 .../webui/pages/assets/NutchUiCssReference.java    |   0
 .../nutch/webui/pages/assets/nutch-style.css       |   0
 .../webui/pages/components/ColorEnumLabel.java     |   0
 .../pages/components/ColorEnumLabelBuilder.java    |   0
 .../webui/pages/components/CpmIteratorAdapter.java |   0
 .../nutch/webui/pages/crawls/CrawlPanel.html       |   0
 .../nutch/webui/pages/crawls/CrawlPanel.java       |   0
 .../nutch/webui/pages/crawls/CrawlsPage.html       |   0
 .../nutch/webui/pages/crawls/CrawlsPage.java       |   0
 .../nutch/webui/pages/instances/InstancePanel.html |   0
 .../nutch/webui/pages/instances/InstancePanel.java |   0
 .../nutch/webui/pages/instances/InstancesPage.html |   0
 .../nutch/webui/pages/instances/InstancesPage.java |   0
 .../nutch/webui/pages/menu/VerticalMenu.html       |   0
 .../nutch/webui/pages/menu/VerticalMenu.java       |   0
 .../nutch/webui/pages/seed/SeedListsPage.html      |   0
 .../nutch/webui/pages/seed/SeedListsPage.java      |   0
 .../apache/nutch/webui/pages/seed/SeedPage.html    |   0
 .../apache/nutch/webui/pages/seed/SeedPage.java    |   0
 .../nutch/webui/pages/settings/SettingsPage.html   |   0
 .../nutch/webui/pages/settings/SettingsPage.java   |   0
 .../apache/nutch/webui/service/CrawlService.java   |   0
 .../nutch/webui/service/NutchInstanceService.java  |   0
 .../apache/nutch/webui/service/NutchService.java   |   0
 .../nutch/webui/service/SeedListService.java       |   0
 .../nutch/webui/service/impl/CrawlServiceImpl.java |   0
 .../service/impl/NutchInstanceServiceImpl.java     |   0
 .../nutch/webui/service/impl/NutchServiceImpl.java |   0
 .../webui/service/impl/SeedListServiceImpl.java    |   0
 {src => nutch-core/src/main}/java/overview.html    |   0
 .../nutch/crawl/ContinuousCrawlTestUtil.java       |   0
 .../org/apache/nutch/crawl/CrawlDBTestUtil.java    |   0
 .../nutch/crawl/CrawlDbUpdateTestDriver.java       |   0
 .../org/apache/nutch/crawl/CrawlDbUpdateUtil.java  |   0
 .../org/apache/nutch/crawl/DummyWritable.java      |   0
 .../apache/nutch/crawl/TODOTestCrawlDbStates.java  |   3 +
 .../nutch/crawl/TestAdaptiveFetchSchedule.java     |   0
 .../org/apache/nutch/crawl/TestCrawlDbFilter.java  |   3 +
 .../org/apache/nutch/crawl/TestCrawlDbMerger.java  |   3 +
 .../org/apache/nutch/crawl/TestCrawlDbStates.java  |   3 +
 .../org/apache/nutch/crawl/TestGenerator.java      |   3 +
 .../java}/org/apache/nutch/crawl/TestInjector.java |   3 +
 .../org/apache/nutch/crawl/TestLinkDbMerger.java   |   0
 .../apache/nutch/crawl/TestSignatureFactory.java   |   0
 .../org/apache/nutch/fetcher/TestFetcher.java      |   3 +
 .../apache/nutch/indexer/TestIndexerMapReduce.java |   3 +
 .../apache/nutch/indexer/TestIndexingFilters.java  |   3 +
 .../org/apache/nutch/metadata/TestMetadata.java    |   0
 .../nutch/metadata/TestSpellCheckedMetadata.java   |   0
 .../java}/org/apache/nutch/net/TestURLFilters.java |   3 +
 .../org/apache/nutch/net/TestURLNormalizers.java   |   3 +
 .../apache/nutch/parse/TestOutlinkExtractor.java   |   0
 .../org/apache/nutch/parse/TestParseData.java      |   0
 .../org/apache/nutch/parse/TestParseText.java      |   0
 .../org/apache/nutch/parse/TestParserFactory.java  |   3 +
 .../org/apache/nutch/parse/parse-plugin-test.xml   |   0
 .../apache/nutch/plugin/HelloWorldExtension.java   |   0
 .../org/apache/nutch/plugin/ITestExtension.java    |   0
 .../org/apache/nutch/plugin/SimpleTestPlugin.java  |   0
 .../org/apache/nutch/plugin/TestPluginSystem.java  |   3 +
 .../org/apache/nutch/protocol/TestContent.java     |   0
 .../apache/nutch/protocol/TestProtocolFactory.java |   3 +
 .../apache/nutch/segment/TestSegmentMerger.java    |   0
 .../segment/TestSegmentMergerCrawlDatums.java      |   0
 .../org/apache/nutch/service/TestNutchServer.java  |   0
 .../org/apache/nutch/test/IntegrationTest.java     |   6 +
 .../test/java/org/apache/nutch/test/TestUtils.java |  29 ++
 .../nutch/tools/TestCommonCrawlDataDumper.java     |   5 +-
 .../nutch/tools/proxy/AbstractTestbedHandler.java  |   0
 .../org/apache/nutch/tools/proxy/DelayHandler.java |   0
 .../org/apache/nutch/tools/proxy/FakeHandler.java  |   0
 .../apache/nutch/tools/proxy/LogDebugHandler.java  |   0
 .../apache/nutch/tools/proxy/NotFoundHandler.java  |   0
 .../org/apache/nutch/tools/proxy/ProxyTestbed.java |   0
 .../apache/nutch/tools/proxy/SegmentHandler.java   |   0
 .../org/apache/nutch/tools/proxy/package-info.java |   0
 .../org/apache/nutch/util/DumpFileUtilTest.java    |   0
 .../apache/nutch/util/TestEncodingDetector.java    |   0
 .../java}/org/apache/nutch/util/TestGZIPUtils.java |   0
 .../java}/org/apache/nutch/util/TestMimeUtil.java  |  12 +-
 .../org/apache/nutch/util/TestNodeWalker.java      |   0
 .../apache/nutch/util/TestPrefixStringMatcher.java |   0
 .../org/apache/nutch/util/TestStringUtil.java      |   0
 .../apache/nutch/util/TestSuffixStringMatcher.java |   0
 .../java}/org/apache/nutch/util/TestTableUtil.java |   0
 .../java}/org/apache/nutch/util/TestURLUtil.java   |   0
 .../org/apache/nutch/util/WritableTestUtils.java   |   0
 .../src/test/resources}/crawl-tests.xml            |   0
 .../src/test/resources}/domain-urlfilter.txt       |   0
 .../resources}/fetch-test-site/dup_of_pagea.html   |   0
 .../test/resources}/fetch-test-site/exception.html |   0
 .../src/test/resources}/fetch-test-site/index.html |   0
 .../fetch-test-site/nested_spider_trap.html        |   0
 .../src/test/resources}/fetch-test-site/pagea.html |   0
 .../src/test/resources}/fetch-test-site/pageb.html |   0
 .../src/test/resources}/fetch-test-site/robots.txt |   0
 .../src/test/resources}/filter-all.txt             |   0
 .../src/test/resources}/log4j.properties           |   0
 .../src/test/resources}/nutch-site.xml             |   0
 .../src/test/resources}/test-mime-util/test.xlsx   | Bin
 .../20150309101625/content/part-00000/.data.crc    | Bin
 .../20150309101625/content/part-00000/.index.crc   | Bin
 .../20150309101625/content/part-00000/data         | Bin
 .../20150309101625/content/part-00000/index        | Bin
 .../crawl_fetch/part-00000/.data.crc               | Bin
 .../crawl_fetch/part-00000/.index.crc              | Bin
 .../20150309101625/crawl_fetch/part-00000/data     | Bin
 .../20150309101625/crawl_fetch/part-00000/index    | Bin
 .../20150309101625/crawl_generate/.part-00000.crc  | Bin
 .../20150309101625/crawl_generate/part-00000       | Bin
 .../20150309101625/crawl_parse/.part-00000.crc     | Bin
 .../20150309101625/crawl_parse/part-00000          | Bin
 .../20150309101625/parse_data/part-00000/.data.crc | Bin
 .../parse_data/part-00000/.index.crc               | Bin
 .../20150309101625/parse_data/part-00000/data      | Bin
 .../20150309101625/parse_data/part-00000/index     | Bin
 .../20150309101625/parse_text/part-00000/.data.crc | Bin
 .../parse_text/part-00000/.index.crc               | Bin
 .../20150309101625/parse_text/part-00000/data      | Bin
 .../20150309101625/parse_text/part-00000/index     | Bin
 .../20150309101656/content/part-00000/.data.crc    | Bin
 .../20150309101656/content/part-00000/.index.crc   | Bin
 .../20150309101656/content/part-00000/data         | Bin
 .../20150309101656/content/part-00000/index        | Bin
 .../crawl_fetch/part-00000/.data.crc               | Bin
 .../crawl_fetch/part-00000/.index.crc              | Bin
 .../20150309101656/crawl_fetch/part-00000/data     | Bin
 .../20150309101656/crawl_fetch/part-00000/index    | Bin
 .../20150309101656/crawl_generate/.part-00000.crc  | Bin
 .../20150309101656/crawl_generate/part-00000       | Bin
 .../20150309101656/crawl_parse/.part-00000.crc     | Bin
 .../20150309101656/crawl_parse/part-00000          | Bin
 .../20150309101656/parse_data/part-00000/.data.crc | Bin
 .../parse_data/part-00000/.index.crc               | Bin
 .../20150309101656/parse_data/part-00000/data      | Bin
 .../20150309101656/parse_data/part-00000/index     | Bin
 .../20150309101656/parse_text/part-00000/.data.crc | Bin
 .../parse_text/part-00000/.index.crc               | Bin
 .../20150309101656/parse_text/part-00000/data      | Bin
 .../20150309101656/parse_text/part-00000/index     | Bin
 {src/plugin => nutch-plugins}/build-plugin.xml     |   0
 {src/plugin => nutch-plugins}/build.xml            |   0
 .../creativecommons/README.txt                     |   0
 .../creativecommons/build.xml                      |   0
 .../creativecommons/conf/crawl-urlfilter.txt       |   0
 .../creativecommons/conf/nutch-site.xml            |   0
 .../creativecommons}/ivy.xml                       |   0
 .../creativecommons/plugin.xml                     |   0
 nutch-plugins/creativecommons/pom.xml              |  38 ++
 .../creativecommons/nutch/CCIndexingFilter.java    |   0
 .../org/creativecommons/nutch/CCParseFilter.java   |   0
 .../java/org/creativecommons/nutch/package.html    |   0
 .../creativecommons/nutch/TestCCParseFilter.java   |  10 +-
 .../src/test/resources}/anchor.html                |   0
 .../creativecommons/src/test/resources}/rdf.html   |   0
 .../creativecommons/src/test/resources}/rel.html   |   0
 {src/plugin => nutch-plugins}/feed/build.xml       |   0
 {src/plugin => nutch-plugins}/feed/ivy.xml         |   0
 {src/plugin => nutch-plugins}/feed/plugin.xml      |   0
 nutch-plugins/feed/pom.xml                         |  45 ++
 .../nutch/indexer/feed/FeedIndexingFilter.java     |   0
 .../apache/nutch/indexer/feed/package-info.java    |   0
 .../org/apache/nutch/parse/feed/FeedParser.java    |   0
 .../org/apache/nutch/parse/feed/package-info.java  |   0
 .../apache/nutch/parse/feed/TestFeedParser.java    |   0
 .../feed/src/test/resources}/rsstest.rss           |   0
 {src/plugin => nutch-plugins}/headings/build.xml   |   0
 {src/plugin => nutch-plugins}/headings/ivy.xml     |   0
 {src/plugin => nutch-plugins}/headings/plugin.xml  |   0
 nutch-plugins/headings/pom.xml                     |  38 ++
 .../nutch/parse/headings/HeadingsParseFilter.java  |   0
 .../apache/nutch/parse/headings/package-info.java  |   0
 .../index-anchor/build.xml                         |   0
 .../index-anchor}/ivy.xml                          |   0
 .../index-anchor/plugin.xml                        |   0
 nutch-plugins/index-anchor/pom.xml                 |  38 ++
 .../nutch/indexer/anchor/AnchorIndexingFilter.java |   0
 .../org/apache/nutch/indexer/anchor/package.html   |   0
 .../indexer/anchor/TestAnchorIndexingFilter.java   |   0
 .../plugin => nutch-plugins}/index-basic/build.xml |   0
 {src/plugin => nutch-plugins}/index-basic/ivy.xml  |   0
 .../index-basic/plugin.xml                         |   0
 nutch-plugins/index-basic/pom.xml                  |  38 ++
 .../nutch/indexer/basic/BasicIndexingFilter.java   |   0
 .../org/apache/nutch/indexer/basic/package.html    |   0
 .../indexer/basic/TestBasicIndexingFilter.java     |   0
 .../index-geoip/build-ivy.xml                      |   0
 .../plugin => nutch-plugins}/index-geoip/build.xml |   0
 {src/plugin => nutch-plugins}/index-geoip/ivy.xml  |   0
 .../index-geoip/plugin.xml                         |   0
 nutch-plugins/index-geoip/pom.xml                  |  55 +++
 .../nutch/indexer/geoip/GeoIPDocumentCreator.java  |   0
 .../nutch/indexer/geoip/GeoIPIndexingFilter.java   |   0
 .../apache/nutch/indexer/geoip/package-info.java   |   0
 .../plugin => nutch-plugins}/index-links/build.xml |   0
 .../index-links}/ivy.xml                           |   0
 .../index-links/plugin.xml                         |   0
 nutch-plugins/index-links/pom.xml                  |  38 ++
 .../nutch/indexer/links/LinksIndexingFilter.java   |   0
 .../indexer/links/TestLinksIndexingFilter.java     |   0
 .../java}/org/apache/nutch/parse/TestOutlinks.java |   0
 .../index-metadata/build.xml                       |   0
 .../index-metadata}/ivy.xml                        |   0
 .../index-metadata/plugin.xml                      |   0
 nutch-plugins/index-metadata/pom.xml               |  38 ++
 .../nutch/indexer/metadata/MetadataIndexer.java    |   0
 .../nutch/indexer/metadata/package-info.java       |   0
 {src/plugin => nutch-plugins}/index-more/build.xml |   0
 .../index-more}/ivy.xml                            |   0
 .../plugin => nutch-plugins}/index-more/plugin.xml |   0
 nutch-plugins/index-more/pom.xml                   |  38 ++
 .../nutch/indexer/more/MoreIndexingFilter.java     |   0
 .../org/apache/nutch/indexer/more/package.html     |   0
 .../nutch/indexer/more/TestMoreIndexingFilter.java |   0
 .../index-replace/README.txt                       |   0
 .../index-replace/build.xml                        |   0
 .../index-replace}/ivy.xml                         |   0
 .../index-replace/plugin.xml                       |   0
 nutch-plugins/index-replace/pom.xml                |  50 ++
 .../nutch/indexer/replace/FieldReplacer.java       |   0
 .../nutch/indexer/replace/ReplaceIndexer.java      |   0
 .../apache/nutch/indexer/replace/package-info.java |   0
 .../nutch/indexer/replace/TestIndexReplace.java    |   0
 .../src/test/resources}/testIndexReplace.html      |   0
 .../index-static/build.xml                         |   0
 .../index-static}/ivy.xml                          |   0
 .../index-static/plugin.xml                        |   0
 nutch-plugins/index-static/pom.xml                 |  38 ++
 .../indexer/staticfield/StaticFieldIndexer.java    |   0
 .../apache/nutch/indexer/staticfield/package.html  |   0
 .../staticfield/TestStaticFieldIndexerTest.java    |   0
 .../indexer-cloudsearch/README.md                  |   0
 .../indexer-cloudsearch/build.xml                  |   0
 .../indexer-cloudsearch/createCSDomain.sh          |   0
 .../indexer-cloudsearch/ivy.xml                    |   0
 .../indexer-cloudsearch/plugin.xml                 |   0
 nutch-plugins/indexer-cloudsearch/pom.xml          |  45 ++
 .../cloudsearch/CloudSearchConstants.java          |   0
 .../cloudsearch/CloudSearchIndexWriter.java        |   0
 .../indexwriter/cloudsearch/CloudSearchUtils.java  |   0
 .../indexer-dummy/build.xml                        |   0
 .../indexer-dummy}/ivy.xml                         |   0
 .../indexer-dummy/plugin.xml                       |   0
 nutch-plugins/indexer-dummy/pom.xml                |  38 ++
 .../nutch/indexwriter/dummy/DummyIndexWriter.java  |   0
 .../nutch/indexwriter/dummy/package-info.java      |   0
 .../indexer-elastic/build-ivy.xml                  |   0
 .../indexer-elastic/build.xml                      |   0
 .../indexer-elastic/howto_upgrade_es.txt           |   0
 .../indexer-elastic/ivy.xml                        |   0
 .../indexer-elastic/plugin.xml                     |   0
 nutch-plugins/indexer-elastic/pom.xml              |  45 ++
 .../indexwriter/elastic/ElasticConstants.java      |   0
 .../indexwriter/elastic/ElasticIndexWriter.java    |   0
 .../nutch/indexwriter/elastic/package-info.java    |   0
 .../indexer-solr/build-ivy.xml                     |   0
 .../indexer-solr/build.xml                         |   0
 {src/plugin => nutch-plugins}/indexer-solr/ivy.xml |   0
 .../indexer-solr/plugin.xml                        |   0
 nutch-plugins/indexer-solr/pom.xml                 |  55 +++
 .../nutch/indexwriter/solr/SolrConstants.java      |   0
 .../nutch/indexwriter/solr/SolrIndexWriter.java    |   0
 .../nutch/indexwriter/solr/SolrMappingReader.java  |   0
 .../apache/nutch/indexwriter/solr/SolrUtils.java   |   0
 .../nutch/indexwriter/solr/package-info.java       |   0
 .../language-identifier/build.xml                  |   0
 .../language-identifier}/ivy.xml                   |   0
 .../language-identifier/plugin.xml                 |   0
 nutch-plugins/language-identifier/pom.xml          |  38 ++
 .../nutch/analysis/lang/HTMLLanguageParser.java    |   0
 .../analysis/lang/LanguageIndexingFilter.java      |   0
 .../nutch/analysis/lang/langmappings.properties    |   0
 .../org/apache/nutch/analysis/lang/package.html    |   0
 .../analysis/lang/TestHTMLLanguageParser.java      |   0
 .../java}/org/apache/nutch/analysis/lang/da.test   |   0
 .../java}/org/apache/nutch/analysis/lang/de.test   |   0
 .../java}/org/apache/nutch/analysis/lang/el.test   |   0
 .../java}/org/apache/nutch/analysis/lang/en.test   |   0
 .../java}/org/apache/nutch/analysis/lang/es.test   |   0
 .../java}/org/apache/nutch/analysis/lang/fi.test   |   0
 .../java}/org/apache/nutch/analysis/lang/fr.test   |   0
 .../java}/org/apache/nutch/analysis/lang/it.test   |   0
 .../java}/org/apache/nutch/analysis/lang/nl.test   |   0
 .../java}/org/apache/nutch/analysis/lang/pt.test   |   0
 .../java}/org/apache/nutch/analysis/lang/sv.test   |   0
 .../nutch/analysis/lang/test-referencial.txt       |   0
 .../lib-htmlunit/build-ivy.xml                     |   0
 .../lib-htmlunit/build.xml                         |   0
 {src/plugin => nutch-plugins}/lib-htmlunit/ivy.xml |   0
 .../lib-htmlunit/plugin.xml                        |   0
 nutch-plugins/lib-htmlunit/pom.xml                 |  55 +++
 .../nutch/protocol/htmlunit/HtmlUnitWebDriver.java |   0
 .../htmlunit/HtmlUnitWebWindowListener.java        |   0
 {src/plugin => nutch-plugins}/lib-http/build.xml   |   0
 .../lib-http}/ivy.xml                              |   0
 {src/plugin => nutch-plugins}/lib-http/plugin.xml  |   0
 nutch-plugins/lib-http/pom.xml                     |  38 ++
 .../nutch/protocol/http/api/BlockedException.java  |   0
 .../apache/nutch/protocol/http/api/HttpBase.java   |   0
 .../nutch/protocol/http/api/HttpException.java     |   0
 .../protocol/http/api/HttpRobotRulesParser.java    |   0
 .../apache/nutch/protocol/http/api/package.html    |   0
 .../protocol/http/api/TestRobotRulesParser.java    |   0
 .../lib-nekohtml/build.xml                         |   0
 {src/plugin => nutch-plugins}/lib-nekohtml/ivy.xml |   0
 .../lib-nekohtml/plugin.xml                        |   0
 nutch-plugins/lib-nekohtml/pom.xml                 |  45 ++
 .../lib-regex-filter/build.xml                     |   0
 .../lib-regex-filter}/ivy.xml                      |   0
 .../lib-regex-filter/plugin.xml                    |   0
 nutch-plugins/lib-regex-filter/pom.xml             |  54 +++
 .../org/apache/nutch/urlfilter/api/RegexRule.java  |   0
 .../nutch/urlfilter/api/RegexURLFilterBase.java    |   0
 .../apache/nutch/urlfilter/api/package-info.java   |   0
 .../urlfilter/api/RegexURLFilterBaseTest.java      |   0
 .../lib-selenium/build-ivy.xml                     |   0
 .../lib-selenium/build.xml                         |   0
 .../lib-selenium/howto_upgrade_selenium.txt        |   0
 {src/plugin => nutch-plugins}/lib-selenium/ivy.xml |   0
 .../lib-selenium/plugin.xml                        |   0
 nutch-plugins/lib-selenium/pom.xml                 |  49 ++
 .../nutch/protocol/selenium/HttpWebClient.java     |   0
 {src/plugin => nutch-plugins}/lib-xml/build.xml    |   0
 {src/plugin => nutch-plugins}/lib-xml/ivy.xml      |   0
 {src/plugin => nutch-plugins}/lib-xml/plugin.xml   |   0
 nutch-plugins/lib-xml/pom.xml                      |  38 ++
 .../microformats-reltag/build.xml                  |   0
 .../microformats-reltag}/ivy.xml                   |   0
 .../microformats-reltag/plugin.xml                 |   0
 nutch-plugins/microformats-reltag/pom.xml          |  38 ++
 .../microformats/reltag/RelTagIndexingFilter.java  |   0
 .../nutch/microformats/reltag/RelTagParser.java    |   0
 .../apache/nutch/microformats/reltag/package.html  |   0
 .../mimetype-filter/build.xml                      |   0
 .../mimetype-filter}/ivy.xml                       |   0
 .../mimetype-filter/plugin.xml                     |   0
 nutch-plugins/mimetype-filter/pom.xml              |  38 ++
 .../indexer/filter/MimeTypeIndexingFilter.java     |   0
 .../indexer/filter/MimeTypeIndexingFilterTest.java |   0
 .../src/test/resources}/allow-images.txt           |   0
 .../src/test/resources}/block-html.txt             |   0
 .../nutch-extensionpoints/build.xml                |   0
 .../nutch-extensionpoints}/ivy.xml                 |   0
 .../nutch-extensionpoints/plugin.xml               |   0
 nutch-plugins/nutch-extensionpoints/pom.xml        |  38 ++
 {src/plugin => nutch-plugins}/parse-ext/build.xml  |   0
 {src/plugin => nutch-plugins}/parse-ext/command    |   0
 .../plugin/tld => nutch-plugins/parse-ext}/ivy.xml |   0
 {src/plugin => nutch-plugins}/parse-ext/plugin.xml |   0
 nutch-plugins/parse-ext/pom.xml                    |  38 ++
 .../java/org/apache/nutch/parse/ext/ExtParser.java |   0
 .../org/apache/nutch/parse/ext/package-info.java   |   0
 .../org/apache/nutch/parse/ext/TestExtParser.java  |   0
 {src/plugin => nutch-plugins}/parse-html/build.xml |   0
 {src/plugin => nutch-plugins}/parse-html/ivy.xml   |   0
 .../plugin => nutch-plugins}/parse-html/plugin.xml |   0
 nutch-plugins/parse-html/pom.xml                   |  49 ++
 .../org/apache/nutch/parse/html/DOMBuilder.java    |   0
 .../apache/nutch/parse/html/DOMContentUtils.java   |   0
 .../apache/nutch/parse/html/HTMLMetaProcessor.java |   0
 .../org/apache/nutch/parse/html/HtmlParser.java    |   0
 .../nutch/parse/html/XMLCharacterRecognizer.java   |   0
 .../java/org/apache/nutch/parse/html/package.html  |   0
 .../nutch/parse/html/TestDOMContentUtils.java      |   0
 .../apache/nutch/parse/html/TestHtmlParser.java    |   0
 .../nutch/parse/html/TestRobotsMetaProcessor.java  |   0
 {src/plugin => nutch-plugins}/parse-js/build.xml   |   0
 .../parse-js}/ivy.xml                              |   0
 {src/plugin => nutch-plugins}/parse-js/plugin.xml  |   0
 nutch-plugins/parse-js/pom.xml                     |  38 ++
 .../org/apache/nutch/parse/js/JSParseFilter.java   |   0
 .../org/apache/nutch/parse/js/package-info.java    |   0
 .../parse-metatags/README.txt                      |   0
 .../parse-metatags/build.xml                       |   0
 .../parse-metatags}/ivy.xml                        |   0
 .../parse-metatags/plugin.xml                      |   0
 nutch-plugins/parse-metatags/pom.xml               |  38 ++
 .../nutch/parse/metatags/MetaTagsParser.java       |   0
 .../apache/nutch/parse/metatags/package-info.java  |   0
 .../nutch/parse/metatags/TestMetatagParser.java    |   0
 .../src/test/resources}/testMetatags.html          |   0
 .../test/resources}/testMultivalueMetatags.html    |   0
 .../parse-replace/README.txt                       |   0
 .../parse-replace/build.xml                        |   0
 .../parse-replace}/ivy.xml                         |   0
 .../parse-replace/plugin.xml                       |   0
 nutch-plugins/parse-replace/pom.xml                |  38 ++
 .../apache/nutch/parse/replace/ReplaceParser.java  |   0
 .../apache/nutch/parse/replace/package-info.java   |   0
 .../nutch/parse/replace/TestParseReplace.java      |   0
 .../src/test/resources}/testParseReplace.html      |   0
 {src/plugin => nutch-plugins}/parse-swf/build.xml  |   0
 .../parse-swf}/ivy.xml                             |   0
 .../parse-swf/lib/javaswf-LICENSE.txt              |   0
 .../parse-swf/lib/javaswf.jar                      | Bin
 {src/plugin => nutch-plugins}/parse-swf/plugin.xml |   0
 nutch-plugins/parse-swf/pom.xml                    |  46 ++
 .../java/org/apache/nutch/parse/swf/SWFParser.java |   0
 .../org/apache/nutch/parse/swf/package-info.java   |   0
 .../org/apache/nutch/parse/swf/TestSWFParser.java  |   0
 .../parse-swf/src/test/resources}/test1.swf        | Bin
 .../parse-swf/src/test/resources}/test1.txt        |   0
 .../parse-swf/src/test/resources}/test2.swf        | Bin
 .../parse-swf/src/test/resources}/test2.txt        |   0
 .../parse-swf/src/test/resources}/test3.swf        | Bin
 .../parse-swf/src/test/resources}/test3.txt        |   0
 .../parse-tika/build-ivy.xml                       |   0
 {src/plugin => nutch-plugins}/parse-tika/build.xml |   0
 .../parse-tika/howto_upgrade_tika.txt              |   0
 {src/plugin => nutch-plugins}/parse-tika/ivy.xml   |   0
 .../plugin => nutch-plugins}/parse-tika/plugin.xml |   0
 nutch-plugins/parse-tika/pom.xml                   |  54 +++
 .../parse/tika/BoilerpipeExtractorRepository.java  |   0
 .../org/apache/nutch/parse/tika/DOMBuilder.java    |   0
 .../apache/nutch/parse/tika/DOMContentUtils.java   |   0
 .../apache/nutch/parse/tika/HTMLMetaProcessor.java |   0
 .../org/apache/nutch/parse/tika/TikaParser.java    |   0
 .../nutch/parse/tika/XMLCharacterRecognizer.java   |   0
 .../org/apache/nutch/parse/tika/package-info.java  |   0
 .../org/apache/nutch/tika/TestDOMContentUtils.java |   0
 .../org/apache/nutch/tika/TestFeedParser.java      |   0
 .../org/apache/nutch/tika/TestImageMetadata.java   |   0
 .../org/apache/nutch/tika/TestMSWordParser.java    |   0
 .../java}/org/apache/nutch/tika/TestOOParser.java  |   0
 .../java}/org/apache/nutch/tika/TestPdfParser.java |   0
 .../java}/org/apache/nutch/tika/TestRTFParser.java |   0
 .../apache/nutch/tika/TestRobotsMetaProcessor.java |   0
 .../parse-tika/src/test/resources}/encrypted.pdf   | Bin
 .../parse-tika/src/test/resources}/nutch.html      |   0
 .../src/test/resources}/nutch_logo_tm.gif          | Bin
 .../parse-tika/src/test/resources}/ootest.odt      | Bin
 .../parse-tika/src/test/resources}/ootest.sxw      | Bin
 .../parse-tika/src/test/resources}/ootest.txt      |   0
 .../parse-tika/src/test/resources}/pdftest.pdf     |   0
 .../parse-tika/src/test/resources}/rsstest.rss     |   0
 .../parse-tika/src/test/resources}/test.rtf        |   0
 .../parse-tika/src/test/resources}/word97.doc      | Bin
 {src/plugin => nutch-plugins}/parse-zip/build.xml  |   0
 .../parse-zip}/ivy.xml                             |   0
 {src/plugin => nutch-plugins}/parse-zip/plugin.xml |   0
 nutch-plugins/parse-zip/pom.xml                    |  38 ++
 .../java/org/apache/nutch/parse/zip/ZipParser.java |   0
 .../apache/nutch/parse/zip/ZipTextExtractor.java   |   0
 .../org/apache/nutch/parse/zip/package-info.java   |   0
 .../org/apache/nutch/parse/zip/TestZipParser.java  |   0
 .../parse-zip/src/test/resources}/test.zip         | Bin
 .../parsefilter-naivebayes/build-ivy.xml           |   0
 .../parsefilter-naivebayes/build.xml               |   0
 .../parsefilter-naivebayes/ivy.xml                 |   0
 .../parsefilter-naivebayes/plugin.xml              |   0
 nutch-plugins/parsefilter-naivebayes/pom.xml       |  38 ++
 .../nutch/parsefilter/naivebayes/Classify.java     |   0
 .../naivebayes/NaiveBayesParseFilter.java          |   0
 .../apache/nutch/parsefilter/naivebayes/Train.java |   0
 .../nutch/parsefilter/naivebayes/package-info.java |   0
 .../parsefilter-regex/build.xml                    |   0
 .../parsefilter-regex/ivy.xml                      |   0
 .../parsefilter-regex/plugin.xml                   |   0
 nutch-plugins/parsefilter-regex/pom.xml            |  38 ++
 .../nutch/parsefilter/regex/RegexParseFilter.java  |   0
 .../nutch/parsefilter/regex/package-info.java      |   0
 .../parsefilter/regex/TestRegexParseFilter.java    |   0
 .../src/test/resources}/regex-parsefilter.txt      |   0
 {src/plugin => nutch-plugins}/plugin.dtd           |   0
 nutch-plugins/plugin/pom.xml                       |  38 ++
 nutch-plugins/pom.xml                              | 164 +++++++
 .../protocol-file/build.xml                        |   0
 .../protocol-file}/ivy.xml                         |   0
 .../protocol-file/plugin.xml                       |   0
 nutch-plugins/protocol-file/pom.xml                |  38 ++
 .../java/org/apache/nutch/protocol/file/File.java  |   0
 .../org/apache/nutch/protocol/file/FileError.java  |   0
 .../apache/nutch/protocol/file/FileException.java  |   0
 .../apache/nutch/protocol/file/FileResponse.java   |   0
 .../org/apache/nutch/protocol/file/package.html    |   0
 .../nutch/protocol/file/TestProtocolFile.java      |   0
 .../src/test/resources}/testprotocolfile.txt       |   0
 .../test/resources}/testprotocolfile_(encoded).txt |   0
 .../protocol-ftp/build.xml                         |   0
 {src/plugin => nutch-plugins}/protocol-ftp/ivy.xml |   0
 .../protocol-ftp/plugin.xml                        |   0
 nutch-plugins/protocol-ftp/pom.xml                 |  38 ++
 .../java/org/apache/nutch/protocol/ftp/Client.java |   0
 .../java/org/apache/nutch/protocol/ftp/Ftp.java    |   0
 .../org/apache/nutch/protocol/ftp/FtpError.java    |   0
 .../apache/nutch/protocol/ftp/FtpException.java    |   0
 .../protocol/ftp/FtpExceptionBadSystResponse.java  |   0
 .../ftp/FtpExceptionCanNotHaveDataConnection.java  |   0
 ...FtpExceptionControlClosedByForcedDataClose.java |   0
 .../ftp/FtpExceptionUnknownForcedDataClose.java    |   0
 .../org/apache/nutch/protocol/ftp/FtpResponse.java |   0
 .../nutch/protocol/ftp/FtpRobotRulesParser.java    |   0
 .../nutch/protocol/ftp/PrintCommandListener.java   |   0
 .../org/apache/nutch/protocol/ftp/package.html     |   0
 .../protocol-htmlunit/build.xml                    |   0
 .../protocol-htmlunit/ivy.xml                      |   0
 .../protocol-htmlunit/plugin.xml                   |   0
 nutch-plugins/protocol-htmlunit/pom.xml            |  51 ++
 .../org/apache/nutch/protocol/htmlunit/Http.java   |   0
 .../nutch/protocol/htmlunit/HttpResponse.java      |   0
 .../apache/nutch/protocol/htmlunit/package.html    |   0
 .../protocol-http/build.xml                        |   0
 .../protocol-http}/ivy.xml                         |   0
 .../protocol-http/jsp/basic-http.jsp               |   0
 .../protocol-http/jsp/brokenpage.jsp               |   0
 .../protocol-http/jsp/redirect301.jsp              |   0
 .../protocol-http/jsp/redirect302.jsp              |   0
 .../protocol-http/plugin.xml                       |   0
 nutch-plugins/protocol-http/pom.xml                |  57 +++
 .../java/org/apache/nutch/protocol/http/Http.java  |   0
 .../apache/nutch/protocol/http/HttpResponse.java   |   0
 .../org/apache/nutch/protocol/http/package.html    |   0
 .../src/test/conf/nutch-site-test.xml              |   0
 .../nutch/protocol/http/TestProtocolHttp.java      |   0
 .../protocol-httpclient/build.xml                  |   0
 .../protocol-httpclient/ivy.xml                    |   0
 .../protocol-httpclient/jsp/basic.jsp              |   0
 .../protocol-httpclient/jsp/cookies.jsp            |   0
 .../protocol-httpclient/jsp/digest.jsp             |   0
 .../protocol-httpclient/jsp/noauth.jsp             |   0
 .../protocol-httpclient/jsp/ntlm.jsp               |   0
 .../protocol-httpclient/plugin.xml                 |   0
 nutch-plugins/protocol-httpclient/pom.xml          |  62 +++
 .../httpclient/DummySSLProtocolSocketFactory.java  |   0
 .../protocol/httpclient/DummyX509TrustManager.java |   0
 .../org/apache/nutch/protocol/httpclient/Http.java |   0
 .../protocol/httpclient/HttpAuthentication.java    |   0
 .../httpclient/HttpAuthenticationException.java    |   0
 .../httpclient/HttpAuthenticationFactory.java      |   0
 .../httpclient/HttpBasicAuthentication.java        |   0
 .../httpclient/HttpFormAuthConfigurer.java         |   0
 .../httpclient/HttpFormAuthentication.java         |   0
 .../nutch/protocol/httpclient/HttpResponse.java    |   0
 .../apache/nutch/protocol/httpclient/package.html  |   0
 .../src/test/conf/httpclient-auth-test.xml         |   0
 .../src/test/conf/nutch-site-test.xml              |   0
 .../httpclient/TestProtocolHttpClient.java         |   0
 .../protocol-interactiveselenium/README.md         |   0
 .../protocol-interactiveselenium/build-ivy.xml     |   0
 .../protocol-interactiveselenium/build.xml         |   0
 .../protocol-interactiveselenium}/ivy.xml          |   0
 .../protocol-interactiveselenium/plugin.xml        |   0
 nutch-plugins/protocol-interactiveselenium/pom.xml |  50 ++
 .../nutch/protocol/interactiveselenium/Http.java   |   0
 .../protocol/interactiveselenium/HttpResponse.java |   0
 .../handlers/DefalultMultiInteractionHandler.java  |   0
 .../handlers/DefaultClickAllAjaxLinksHandler.java  |   0
 .../handlers/DefaultHandler.java                   |   0
 .../handlers/InteractiveSeleniumHandler.java       |   0
 .../protocol/interactiveselenium}/package.html     |   0
 .../protocol-selenium/README.md                    |   0
 .../protocol-selenium/build-ivy.xml                |   0
 .../protocol-selenium/build.xml                    |   0
 .../protocol-selenium}/ivy.xml                     |   0
 .../protocol-selenium/plugin.xml                   |   0
 nutch-plugins/protocol-selenium/pom.xml            |  50 ++
 .../org/apache/nutch/protocol/selenium/Http.java   |   0
 .../nutch/protocol/selenium/HttpResponse.java      |   0
 .../apache/nutch/protocol/selenium}/package.html   |   0
 .../scoring-depth/build.xml                        |   0
 .../plugin => nutch-plugins}/scoring-depth/ivy.xml |   0
 .../scoring-depth/plugin.xml                       |   0
 nutch-plugins/scoring-depth/pom.xml                |  38 ++
 .../nutch/scoring/depth/DepthScoringFilter.java    |   0
 .../apache/nutch/scoring/depth/package-info.java   |   0
 .../scoring-link/build.xml                         |   0
 .../scoring-link}/ivy.xml                          |   0
 .../scoring-link/plugin.xml                        |   0
 nutch-plugins/scoring-link/pom.xml                 |  38 ++
 .../scoring/link/LinkAnalysisScoringFilter.java    |   0
 .../apache/nutch/scoring/link/package-info.java    |   0
 .../scoring-opic/build.xml                         |   0
 .../scoring-opic}/ivy.xml                          |   0
 .../scoring-opic/plugin.xml                        |   0
 nutch-plugins/scoring-opic/pom.xml                 |  38 ++
 .../nutch/scoring/opic/OPICScoringFilter.java      |   0
 .../apache/nutch/scoring/opic/package-info.java    |   0
 .../scoring-similarity/build-ivy.xml               |   0
 .../scoring-similarity/build.xml                   |   0
 .../scoring-similarity/ivy.xml                     |   0
 .../scoring-similarity/plugin.xml                  |   0
 nutch-plugins/scoring-similarity/pom.xml           |  45 ++
 .../nutch/scoring/similarity/SimilarityModel.java  |   0
 .../similarity/SimilarityScoringFilter.java        |   0
 .../similarity/cosine/CosineSimilarity.java        |   0
 .../nutch/scoring/similarity/cosine/DocVector.java |   0
 .../nutch/scoring/similarity/cosine/Model.java     |   0
 .../scoring/similarity/cosine/package-info.java    |   0
 .../similarity/util/LuceneAnalyzerUtil.java        |   0
 .../scoring/similarity/util/LuceneTokenizer.java   |   0
 .../scoring/similarity/util/package-info.java      |   0
 .../subcollection/README.txt                       |   0
 .../subcollection/build.xml                        |   0
 .../subcollection}/ivy.xml                         |   0
 .../subcollection/plugin.xml                       |   0
 nutch-plugins/subcollection/pom.xml                |  38 ++
 .../apache/nutch/collection/CollectionManager.java |   0
 .../org/apache/nutch/collection/Subcollection.java |   0
 .../java/org/apache/nutch/collection/package.html  |   0
 .../subcollection/SubcollectionIndexingFilter.java |   0
 .../nutch/indexer/subcollection/package-info.java  |   0
 .../apache/nutch/collection/TestSubcollection.java |   0
 {src/plugin => nutch-plugins}/tld/build.xml        |   0
 .../plugin/parse-ext => nutch-plugins/tld}/ivy.xml |   0
 {src/plugin => nutch-plugins}/tld/plugin.xml       |   0
 nutch-plugins/tld/pom.xml                          |  38 ++
 .../nutch/indexer/tld/TLDIndexingFilter.java       |   0
 .../java/org/apache/nutch/indexer/tld/package.html |   0
 .../apache/nutch/scoring/tld/TLDScoringFilter.java |   0
 .../java/org/apache/nutch/scoring/tld/package.html |   0
 .../urlfilter-automaton/build.xml                  |   0
 .../urlfilter-automaton/ivy.xml                    |   0
 .../urlfilter-automaton/plugin.xml                 |   0
 nutch-plugins/urlfilter-automaton/pom.xml          |  58 +++
 .../urlfilter/automaton/AutomatonURLFilter.java    |   0
 .../apache/nutch/urlfilter/automaton/package.html  |   0
 .../automaton/TestAutomatonURLFilter.java          |   0
 .../src/test/resources}/Benchmarks.rules           |   0
 .../src/test/resources}/Benchmarks.urls            |   0
 .../src/test/resources}/IntranetCrawling.rules     |   0
 .../src/test/resources}/IntranetCrawling.urls      |   0
 .../src/test/resources}/WholeWebCrawling.rules     |   0
 .../src/test/resources}/WholeWebCrawling.urls      |   0
 .../urlfilter-domain/build.xml                     |   0
 .../urlfilter-domain}/ivy.xml                      |   0
 .../urlfilter-domain/plugin.xml                    |   0
 nutch-plugins/urlfilter-domain/pom.xml             |  38 ++
 .../nutch/urlfilter/domain/DomainURLFilter.java    |   0
 .../nutch/urlfilter/domain/package-info.java       |   0
 .../urlfilter/domain/TestDomainURLFilter.java      |   0
 .../urlfilter-domain/src/test/resources}/hosts.txt |   0
 .../urlfilter-domainblacklist/build.xml            |   0
 .../urlfilter-domainblacklist}/ivy.xml             |   0
 .../urlfilter-domainblacklist/plugin.xml           |   0
 nutch-plugins/urlfilter-domainblacklist/pom.xml    |  38 ++
 .../domainblacklist/DomainBlacklistURLFilter.java  |   0
 .../urlfilter/domainblacklist/package-info.java    |   0
 .../TestDomainBlacklistURLFilter.java              |   0
 .../src/test/resources}/hosts.txt                  |   0
 .../urlfilter-ignoreexempt/README.md               |   0
 .../urlfilter-ignoreexempt/build.xml               |   0
 .../urlfilter-ignoreexempt}/ivy.xml                |   0
 .../urlfilter-ignoreexempt/plugin.xml              |   0
 nutch-plugins/urlfilter-ignoreexempt/pom.xml       |  45 ++
 .../urlfilter/ignoreexempt/ExemptionUrlFilter.java |   0
 .../nutch/urlfilter/ignoreexempt/package-info.java |   0
 .../urlfilter-prefix/build.xml                     |   0
 .../urlfilter-prefix}/ivy.xml                      |   0
 .../urlfilter-prefix/plugin.xml                    |   0
 nutch-plugins/urlfilter-prefix/pom.xml             |  38 ++
 .../nutch/urlfilter/prefix/PrefixURLFilter.java    |   0
 .../org/apache/nutch/urlfilter/prefix/package.html |   0
 .../urlfilter/prefix/TestPrefixURLFilter.java      |   0
 .../urlfilter-regex/build.xml                      |   0
 .../urlfilter-regex}/ivy.xml                       |   0
 .../urlfilter-regex/plugin.xml                     |   0
 nutch-plugins/urlfilter-regex/pom.xml              |  53 +++
 .../nutch/urlfilter/regex/RegexURLFilter.java      |   0
 .../org/apache/nutch/urlfilter/regex/package.html  |   0
 .../nutch/urlfilter/regex/TestRegexURLFilter.java  |   0
 .../src/test/resources}/Benchmarks.rules           |   0
 .../src/test/resources}/Benchmarks.urls            |   0
 .../src/test/resources}/IntranetCrawling.rules     |   0
 .../src/test/resources}/IntranetCrawling.urls      |   0
 .../src/test/resources}/WholeWebCrawling.rules     |   0
 .../src/test/resources}/WholeWebCrawling.urls      |   0
 .../src/test/resources}/nutch1838.rules            |   0
 .../src/test/resources}/nutch1838.urls             |   0
 .../urlfilter-suffix/build.xml                     |   0
 .../urlfilter-suffix}/ivy.xml                      |   0
 .../urlfilter-suffix/plugin.xml                    |   0
 nutch-plugins/urlfilter-suffix/pom.xml             |  38 ++
 .../nutch/urlfilter/suffix/SuffixURLFilter.java    |   0
 .../nutch/urlfilter/suffix/package-info.java       |   0
 .../urlfilter/suffix/TestSuffixURLFilter.java      |   0
 .../urlfilter-validator/build.xml                  |   0
 .../urlfilter-validator}/ivy.xml                   |   0
 .../urlfilter-validator/plugin.xml                 |   0
 nutch-plugins/urlfilter-validator/pom.xml          |  38 ++
 .../nutch/urlfilter/validator/UrlValidator.java    |   0
 .../apache/nutch/urlfilter/validator/package.html  |   0
 .../urlfilter/validator/TestUrlValidator.java      |   0
 {src/plugin => nutch-plugins}/urlmeta/build.xml    |   0
 .../urlmeta}/ivy.xml                               |   0
 {src/plugin => nutch-plugins}/urlmeta/plugin.xml   |   0
 nutch-plugins/urlmeta/pom.xml                      |  38 ++
 .../indexer/urlmeta/URLMetaIndexingFilter.java     |   0
 .../org/apache/nutch/indexer/urlmeta/package.html  |   0
 .../scoring/urlmeta/URLMetaScoringFilter.java      |   0
 .../org/apache/nutch/scoring/urlmeta/package.html  |   0
 .../urlnormalizer-ajax/build.xml                   |   0
 .../urlnormalizer-ajax}/ivy.xml                    |   0
 .../urlnormalizer-ajax/plugin.xml                  |   0
 nutch-plugins/urlnormalizer-ajax/pom.xml           |  38 ++
 .../net/urlnormalizer/ajax/AjaxURLNormalizer.java  |   0
 .../urlnormalizer/ajax/TestAjaxURLNormalizer.java  |   0
 .../urlnormalizer-basic/build.xml                  |   0
 .../urlnormalizer-basic}/ivy.xml                   |   0
 .../urlnormalizer-basic/plugin.xml                 |   0
 nutch-plugins/urlnormalizer-basic/pom.xml          |  38 ++
 .../urlnormalizer/basic/BasicURLNormalizer.java    |   0
 .../net/urlnormalizer/basic/package-info.java      |   0
 .../basic/TestBasicURLNormalizer.java              |   0
 .../urlnormalizer-host/build.xml                   |   0
 .../urlnormalizer-host}/ivy.xml                    |   0
 .../urlnormalizer-host/plugin.xml                  |   0
 nutch-plugins/urlnormalizer-host/pom.xml           |  38 ++
 .../net/urlnormalizer/host/HostURLNormalizer.java  |   0
 .../nutch/net/urlnormalizer/host/package-info.java |   0
 .../urlnormalizer/host/TestHostURLNormalizer.java  |   0
 .../src/test/resources}/hosts.txt                  |   0
 .../urlnormalizer-pass/build.xml                   |   0
 .../urlnormalizer-pass}/ivy.xml                    |   0
 .../urlnormalizer-pass/plugin.xml                  |   0
 nutch-plugins/urlnormalizer-pass/pom.xml           |  38 ++
 .../net/urlnormalizer/pass/PassURLNormalizer.java  |   0
 .../nutch/net/urlnormalizer/pass/package-info.java |   0
 .../urlnormalizer/pass/TestPassURLNormalizer.java  |   0
 .../urlnormalizer-protocol/build.xml               |   0
 .../urlnormalizer-protocol}/ivy.xml                |   0
 .../urlnormalizer-protocol/plugin.xml              |   0
 nutch-plugins/urlnormalizer-protocol/pom.xml       |  38 ++
 .../protocol/ProtocolURLNormalizer.java            |   0
 .../protocol/TestProtocolURLNormalizer.java        |   0
 .../src/test/resources}/protocols.txt              |   0
 .../urlnormalizer-querystring/build.xml            |   0
 .../urlnormalizer-querystring}/ivy.xml             |   0
 .../urlnormalizer-querystring/plugin.xml           |   0
 nutch-plugins/urlnormalizer-querystring/pom.xml    |  38 ++
 .../querystring/QuerystringURLNormalizer.java      |   0
 .../urlnormalizer/querystring/package-info.java    |   0
 .../querystring/TestQuerystringURLNormalizer.java  |   0
 .../urlnormalizer-regex/build.xml                  |   0
 .../urlnormalizer-regex}/ivy.xml                   |   0
 .../urlnormalizer-regex/plugin.xml                 |   0
 nutch-plugins/urlnormalizer-regex/pom.xml          |  38 ++
 .../urlnormalizer/regex/RegexURLNormalizer.java    |   0
 .../net/urlnormalizer/regex/package-info.java      |   0
 .../regex/TestRegexURLNormalizer.java              |   0
 .../test/resources}/regex-normalize-default.test   |   0
 .../test/resources}/regex-normalize-default.xml    |   0
 .../test/resources}/regex-normalize-scope1.test    |   0
 .../src/test/resources}/regex-normalize-scope1.xml |   0
 .../urlnormalizer-slash/build.xml                  |   0
 .../urlnormalizer-slash}/ivy.xml                   |   0
 .../urlnormalizer-slash/plugin.xml                 |   0
 nutch-plugins/urlnormalizer-slash/pom.xml          |  38 ++
 .../urlnormalizer/slash/SlashURLNormalizer.java    |   0
 .../slash/TestSlashURLNormalizer.java              |   0
 .../src/test/resources}/slashes.txt                |   0
 pom.xml                                            | 157 +++++++
 .../urlfilter-ignoreexempt/data/.donotdelete       |   0
 1023 files changed, 3734 insertions(+), 9 deletions(-)

diff --cc nutch-core/src/test/java/org/apache/nutch/crawl/TestCrawlDbMerger.java
index 7c4b2eb,599c353..bfb1581
--- a/nutch-core/src/test/java/org/apache/nutch/crawl/TestCrawlDbMerger.java
+++ b/nutch-core/src/test/java/org/apache/nutch/crawl/TestCrawlDbMerger.java
@@@ -37,10 -36,11 +38,11 @@@ import org.junit.After
  import org.junit.Assert;
  import org.junit.Before;
  import org.junit.Test;
+ import org.junit.experimental.categories.Category;
  
  public class TestCrawlDbMerger {
 -  private static final Logger LOG = Logger.getLogger(CrawlDbMerger.class
 -      .getName());
 +  private static final Logger LOG = LoggerFactory
 +      .getLogger(MethodHandles.lookup().lookupClass());
  
    String url10 = "http://example.com/";
    String url11 = "http://example.com/foo";

-- 
To stop receiving notification emails like this one, please contact
"commits@nutch.apache.org" <co...@nutch.apache.org>.