You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2017/03/08 06:52:42 UTC
[nutch] 02/03: Merge branch 'NUTCH-2292' of
https://github.com/apache/nutch into NUTCH-2292
This is an automated email from the ASF dual-hosted git repository.
lewismc pushed a commit to branch NUTCH-2292
in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 9a0ce9e3e8a7d1fc6093b31e40ff26e503dc7beb
Merge: ecc60d7 62491d5
Author: Lewis John McGibbney <le...@gmail.com>
AuthorDate: Tue Mar 7 14:20:23 2017 -0800
Merge branch 'NUTCH-2292' of https://github.com/apache/nutch into NUTCH-2292
.gitignore | 6 +++-
conf/nutch-default.xml | 8 +++++
default.properties | 4 +--
ivy/mvn.template | 4 +--
.../java/org/apache/nutch/crawl/CrawlDatum.java | 8 ++---
.../main/java/org/apache/nutch/crawl/CrawlDb.java | 21 +++++------
.../java/org/apache/nutch/crawl/CrawlDbMerger.java | 2 +-
.../java/org/apache/nutch/crawl/CrawlDbReader.java | 14 ++++----
.../org/apache/nutch/crawl/CrawlDbReducer.java | 2 +-
.../org/apache/nutch/crawl/DeduplicationJob.java | 2 +-
.../java/org/apache/nutch/crawl/Generator.java | 6 ++--
.../main/java/org/apache/nutch/crawl/Injector.java | 2 +-
.../main/java/org/apache/nutch/crawl/Inlinks.java | 8 ++---
.../main/java/org/apache/nutch/crawl/LinkDb.java | 21 +++++------
.../java/org/apache/nutch/crawl/LinkDbMerger.java | 2 +-
.../java/org/apache/nutch/crawl/LinkDbReader.java | 2 +-
.../nutch/crawl/MimeAdaptiveFetchSchedule.java | 2 +-
.../apache/nutch/crawl/TextProfileSignature.java | 6 ++--
.../org/apache/nutch/fetcher/FetchItemQueues.java | 2 +-
.../java/org/apache/nutch/fetcher/FetchNodeDb.java | 2 +-
.../java/org/apache/nutch/fetcher/Fetcher.java | 17 ++++-----
.../org/apache/nutch/fetcher/FetcherThread.java | 6 ++--
.../apache/nutch/fetcher/FetcherThreadEvent.java | 2 +-
.../nutch/fetcher/FetcherThreadPublisher.java | 0
.../apache/nutch/hostdb/UpdateHostDbReducer.java | 14 ++++----
.../org/apache/nutch/indexer/IndexWriters.java | 2 +-
.../nutch/indexer/IndexingFiltersChecker.java | 2 +-
.../java/org/apache/nutch/indexer/IndexingJob.java | 21 +++++------
.../org/apache/nutch/indexer/NutchDocument.java | 2 +-
.../java/org/apache/nutch/indexer/NutchField.java | 4 +--
.../java/org/apache/nutch/metadata/Metadata.java | 2 +-
.../nutch/metadata/SpellCheckedMetadata.java | 2 +-
.../java/org/apache/nutch/net/URLNormalizers.java | 8 ++---
.../org/apache/nutch/parse/OutlinkExtractor.java | 2 +-
.../java/org/apache/nutch/parse/ParseData.java | 5 +--
.../org/apache/nutch/parse/ParseOutputFormat.java | 4 +--
.../org/apache/nutch/parse/ParsePluginList.java | 4 +--
.../org/apache/nutch/parse/ParsePluginsReader.java | 4 +--
.../java/org/apache/nutch/parse/ParseResult.java | 2 +-
.../java/org/apache/nutch/parse/ParseSegment.java | 15 ++++----
.../java/org/apache/nutch/parse/ParseText.java | 5 +--
.../java/org/apache/nutch/parse/ParserChecker.java | 2 +-
.../java/org/apache/nutch/parse/ParserFactory.java | 4 +--
.../java/org/apache/nutch/plugin/Extension.java | 2 +-
.../org/apache/nutch/plugin/ExtensionPoint.java | 2 +-
.../org/apache/nutch/plugin/PluginDescriptor.java | 16 ++++-----
.../apache/nutch/plugin/PluginManifestParser.java | 2 +-
.../org/apache/nutch/plugin/PluginRepository.java | 30 ++++++++--------
.../java/org/apache/nutch/protocol/Content.java | 5 +--
.../org/apache/nutch/protocol/ProtocolStatus.java | 2 +-
.../apache/nutch/protocol/RobotRulesParser.java | 6 ++--
.../org/apache/nutch/publisher/NutchPublisher.java | 0
.../apache/nutch/publisher/NutchPublishers.java | 0
.../apache/nutch/scoring/webgraph/LinkDumper.java | 6 ++--
.../apache/nutch/scoring/webgraph/LinkRank.java | 6 ++--
.../apache/nutch/scoring/webgraph/NodeReader.java | 2 +-
.../apache/nutch/scoring/webgraph/WebGraph.java | 8 ++---
.../nutch/segment/ContentAsTextInputFormat.java | 2 +-
.../org/apache/nutch/segment/SegmentMerger.java | 10 +++---
.../org/apache/nutch/segment/SegmentReader.java | 17 ++++-----
.../java/org/apache/nutch/service/NutchServer.java | 4 +--
.../java/org/apache/nutch/service/SeedManager.java | 0
.../org/apache/nutch/service/impl/LinkReader.java | 8 ++---
.../org/apache/nutch/service/impl/NodeReader.java | 8 ++---
.../apache/nutch/service/impl/SeedManagerImpl.java | 0
.../apache/nutch/service/impl/SequenceReader.java | 12 +++----
.../nutch/service/model/request/DbQuery.java | 2 +-
.../service/model/response/FetchNodeDbInfo.java | 2 +-
.../apache/nutch/service/resources/DbResource.java | 2 +-
.../java/org/apache/nutch/tools/Benchmark.java | 8 ++---
.../apache/nutch/tools/CommonCrawlDataDumper.java | 4 +--
.../nutch/tools/CommonCrawlFormatJettinson.java | 4 +--
.../java/org/apache/nutch/tools/DmozParser.java | 22 ++++--------
.../java/org/apache/nutch/tools/FileDumper.java | 35 ++++++------------
.../java/org/apache/nutch/tools/FreeGenerator.java | 2 +-
.../org/apache/nutch/tools/warc/WARCExporter.java | 2 +-
.../org/apache/nutch/util/EncodingDetector.java | 10 +++---
.../java/org/apache/nutch/util/HadoopFSUtil.java | 19 ++++------
.../main/java/org/apache/nutch/util/MimeUtil.java | 5 +--
.../java/org/apache/nutch/util/NodeWalker.java | 2 +-
.../main/java/org/apache/nutch/util/NutchTool.java | 2 +-
.../java/org/apache/nutch/util/ObjectCache.java | 4 +--
.../org/apache/nutch/util/TrieStringMatcher.java | 4 +--
.../apache/nutch/util/domain/DomainSuffixes.java | 2 +-
.../webui/pages/components/ColorEnumLabel.java | 2 +-
.../pages/components/ColorEnumLabelBuilder.java | 2 +-
.../webui/pages/components/CpmIteratorAdapter.java | 2 +-
.../nutch/webui/pages/crawls/CrawlPanel.java | 8 ++---
.../nutch/webui/pages/crawls/CrawlsPage.java | 4 +--
.../nutch/webui/pages/instances/InstancePanel.java | 2 +-
.../nutch/webui/pages/instances/InstancesPage.java | 4 +--
.../nutch/webui/pages/seed/SeedListsPage.java | 4 +--
.../apache/nutch/webui/pages/seed/SeedPage.java | 6 ++--
.../nutch/webui/pages/settings/SettingsPage.java | 4 +--
.../elastic/TestElasticIndexWriter.java | 0
.../src/test/resources}/nutch-site-test.xml | 0
.../apache/nutch/protocol/http/api/HttpBase.java | 15 ++++++--
nutch-plugins/parsefilter-regex/README.txt | 41 ++++++++++++++++++++++
.../nutch/parsefilter/regex/RegexParseFilter.java | 18 ++++++----
nutch-plugins/pom.xml | 1 +
.../apache/nutch/protocol/http/HttpResponse.java | 11 ++++--
nutch-plugins/protocol-httpclient/pom.xml | 20 +++++++++--
.../nutch/protocol/httpclient/HttpResponse.java | 7 ++++
.../publish-rabbitmq/build-ivy.xml | 0
.../publish-rabbitmq/build.xml | 0
.../publish-rabbitmq/ivy.xml | 0
.../publish-rabbitmq/plugin.xml | 0
.../pom.xml | 32 ++++++-----------
.../publisher/rabbitmq/RabbitMQPublisherImpl.java | 0
.../nutch/publisher/rabbitmq/package-info.java | 0
.../urlnormalizer/basic/BasicURLNormalizer.java | 5 ++-
.../basic/TestBasicURLNormalizer.java | 6 ++++
pom.xml | 20 +++++++++--
113 files changed, 398 insertions(+), 348 deletions(-)
--
To stop receiving notification emails like this one, please contact
"commits@nutch.apache.org" <co...@nutch.apache.org>.