You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2018/03/27 13:49:12 UTC
[nutch] 01/01: Merge pull request #295 from lewismc/NUTCH-2516
This is an automated email from the ASF dual-hosted git repository.
lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 7cb7abde6e3fae7c2ae70865e7ee2ee5ff95e2ba
Merge: 31819b7 303fd19
Author: Lewis John McGibbney <le...@gmail.com>
AuthorDate: Tue Mar 27 06:49:09 2018 -0700
Merge pull request #295 from lewismc/NUTCH-2516
NUTCH-2516 Hadoop imports use wildcards
.gitignore | 6 +
ivy/ivy-2.4.0.jar | Bin 1282424 -> 0 bytes
src/java/org/apache/nutch/crawl/CrawlDatum.java | 28 ++-
src/java/org/apache/nutch/crawl/CrawlDb.java | 30 ++-
src/java/org/apache/nutch/crawl/CrawlDbFilter.java | 1 -
src/java/org/apache/nutch/crawl/CrawlDbMerger.java | 15 +-
src/java/org/apache/nutch/crawl/CrawlDbReader.java | 4 -
.../org/apache/nutch/crawl/CrawlDbReducer.java | 8 +-
.../org/apache/nutch/crawl/DeduplicationJob.java | 4 -
src/java/org/apache/nutch/crawl/Generator.java | 31 ++-
src/java/org/apache/nutch/crawl/Inlink.java | 8 +-
src/java/org/apache/nutch/crawl/Inlinks.java | 19 +-
src/java/org/apache/nutch/crawl/LinkDbFilter.java | 2 -
src/java/org/apache/nutch/crawl/LinkDbMerger.java | 1 -
src/java/org/apache/nutch/crawl/LinkDbReader.java | 19 +-
.../org/apache/nutch/crawl/SignatureFactory.java | 1 -
.../org/apache/nutch/crawl/URLPartitioner.java | 3 +-
src/java/org/apache/nutch/fetcher/FetchNodeDb.java | 1 -
src/java/org/apache/nutch/fetcher/Fetcher.java | 28 ++-
.../apache/nutch/fetcher/FetcherOutputFormat.java | 5 -
.../org/apache/nutch/fetcher/FetcherThread.java | 2 -
.../apache/nutch/fetcher/FetcherThreadEvent.java | 1 -
src/java/org/apache/nutch/fetcher/QueueFeeder.java | 1 -
src/java/org/apache/nutch/hostdb/HostDatum.java | 2 -
src/java/org/apache/nutch/hostdb/ReadHostDb.java | 5 -
src/java/org/apache/nutch/hostdb/UpdateHostDb.java | 6 -
.../apache/nutch/hostdb/UpdateHostDbMapper.java | 4 -
.../apache/nutch/hostdb/UpdateHostDbReducer.java | 3 -
src/java/org/apache/nutch/indexer/CleaningJob.java | 3 -
src/java/org/apache/nutch/indexer/IndexWriter.java | 1 -
.../org/apache/nutch/indexer/IndexWriters.java | 1 -
.../org/apache/nutch/indexer/IndexerMapReduce.java | 5 -
.../apache/nutch/indexer/IndexerOutputFormat.java | 1 -
.../org/apache/nutch/indexer/IndexingFilter.java | 2 -
.../org/apache/nutch/indexer/IndexingFilters.java | 1 -
.../nutch/indexer/IndexingFiltersChecker.java | 1 -
src/java/org/apache/nutch/indexer/IndexingJob.java | 1 -
src/java/org/apache/nutch/indexer/NutchField.java | 17 +-
.../org/apache/nutch/metadata/CreativeCommons.java | 6 +-
.../org/apache/nutch/metadata/HttpHeaders.java | 18 +-
.../org/apache/nutch/net/URLExemptionFilter.java | 3 +-
src/java/org/apache/nutch/net/URLFilter.java | 2 -
.../org/apache/nutch/net/URLFilterChecker.java | 7 -
.../org/apache/nutch/net/URLNormalizerChecker.java | 7 -
.../org/apache/nutch/net/protocols/Response.java | 2 -
.../org/apache/nutch/parse/HtmlParseFilter.java | 3 -
src/java/org/apache/nutch/parse/ParseData.java | 16 +-
src/java/org/apache/nutch/parse/ParseImpl.java | 7 +-
.../org/apache/nutch/parse/ParseOutputFormat.java | 19 +-
.../org/apache/nutch/parse/ParsePluginList.java | 1 -
.../org/apache/nutch/parse/ParsePluginsReader.java | 4 -
src/java/org/apache/nutch/parse/ParseSegment.java | 39 ++--
src/java/org/apache/nutch/parse/ParseText.java | 24 +-
src/java/org/apache/nutch/parse/ParseUtil.java | 2 -
src/java/org/apache/nutch/parse/Parser.java | 2 -
src/java/org/apache/nutch/parse/ParserFactory.java | 4 -
src/java/org/apache/nutch/protocol/Content.java | 3 -
src/java/org/apache/nutch/protocol/Protocol.java | 2 -
.../org/apache/nutch/protocol/ProtocolFactory.java | 6 +-
.../apache/nutch/protocol/RobotRulesParser.java | 3 -
.../apache/nutch/scoring/webgraph/LinkDumper.java | 4 -
.../apache/nutch/scoring/webgraph/LinkRank.java | 2 -
.../apache/nutch/scoring/webgraph/NodeDumper.java | 2 -
.../apache/nutch/scoring/webgraph/NodeReader.java | 1 -
.../nutch/scoring/webgraph/ScoreUpdater.java | 2 -
.../apache/nutch/scoring/webgraph/WebGraph.java | 2 -
.../org/apache/nutch/segment/SegmentReader.java | 9 +-
src/java/org/apache/nutch/service/NutchReader.java | 1 -
.../org/apache/nutch/service/impl/NodeReader.java | 1 -
.../service/impl/NutchServerPoolExecutor.java | 2 -
.../nutch/service/model/request/JobConfig.java | 1 -
.../nutch/service/resources/ConfigResource.java | 3 -
.../nutch/service/resources/SeedResource.java | 1 -
.../nutch/tools/AbstractCommonCrawlFormat.java | 2 -
.../apache/nutch/tools/CommonCrawlDataDumper.java | 5 -
.../apache/nutch/tools/CommonCrawlFormatWARC.java | 4 +-
src/java/org/apache/nutch/tools/DmozParser.java | 50 +++--
src/java/org/apache/nutch/tools/FileDumper.java | 5 -
src/java/org/apache/nutch/tools/FreeGenerator.java | 2 -
src/java/org/apache/nutch/tools/WARCUtils.java | 218 +++++++++---------
.../apache/nutch/tools/arc/ArcSegmentCreator.java | 1 -
.../org/apache/nutch/tools/warc/WARCExporter.java | 2 -
src/java/org/apache/nutch/util/DeflateUtils.java | 1 -
src/java/org/apache/nutch/util/DomUtil.java | 1 -
src/java/org/apache/nutch/util/GZIPUtils.java | 1 -
src/java/org/apache/nutch/util/MimeUtil.java | 5 -
.../nutch/util/ProtocolStatusStatistics.java | 1 -
src/java/org/apache/nutch/util/URLUtil.java | 4 +-
.../org/apache/nutch/webui/model/NutchConfig.java | 16 ++
.../creativecommons/nutch/CCIndexingFilter.java | 2 +-
.../org/creativecommons/nutch/CCParseFilter.java | 38 ++--
.../nutch/indexer/feed/FeedIndexingFilter.java | 2 -
.../org/apache/nutch/parse/feed/FeedParser.java | 4 -
.../nutch/parse/headings/HeadingsParseFilter.java | 9 +-
.../nutch/indexer/more/MoreIndexingFilter.java | 4 +-
.../nutch/indexer/replace/FieldReplacer.java | 6 +-
.../elasticrest/ElasticRestIndexWriter.java | 1 -
.../indexwriter/elastic/ElasticIndexWriter.java | 3 +-
.../indexwriter/rabbit/RabbitIndexWriter.java | 246 ++++++++++-----------
.../indexwriter/rabbit/RabbitMQConstants.java | 24 +-
.../nutch/indexwriter/rabbit/RabbitMessage.java | 72 +++---
.../nutch/indexwriter/solr/SolrIndexWriter.java | 7 -
.../apache/nutch/indexwriter/solr/SolrUtils.java | 1 -
.../nutch/analysis/lang/HTMLLanguageParser.java | 1 -
.../analysis/lang/LanguageIndexingFilter.java | 2 -
.../apache/nutch/protocol/http/api/HttpBase.java | 5 -
.../nutch/protocol/http/api/HttpException.java | 1 -
.../nutch/urlfilter/api/RegexURLFilterBase.java | 7 +-
.../microformats/reltag/RelTagIndexingFilter.java | 2 -
.../nutch/microformats/reltag/RelTagParser.java | 4 -
.../indexer/filter/MimeTypeIndexingFilter.java | 1 -
.../apache/nutch/parse/html/DOMContentUtils.java | 6 +-
.../apache/nutch/parse/html/HTMLMetaProcessor.java | 4 +-
.../org/apache/nutch/parse/html/HtmlParser.java | 40 +++-
.../java/org/apache/nutch/parse/swf/SWFParser.java | 88 ++++----
.../parse/tika/BoilerpipeExtractorRepository.java | 6 +-
.../apache/nutch/parse/tika/HTMLMetaProcessor.java | 4 +-
.../org/apache/nutch/parse/tika/TikaParser.java | 1 -
.../apache/nutch/parse/zip/ZipTextExtractor.java | 4 -
.../nutch/parsefilter/naivebayes/Classify.java | 1 -
.../nutch/parsefilter/regex/RegexParseFilter.java | 9 +-
.../apache/nutch/protocol/file/FileResponse.java | 4 -
.../java/org/apache/nutch/protocol/ftp/Client.java | 1 -
.../nutch/protocol/ftp/FtpRobotRulesParser.java | 1 -
.../java/org/apache/nutch/protocol/http/Http.java | 4 -
.../org/apache/nutch/protocol/httpclient/Http.java | 6 -
.../httpclient/HttpAuthenticationFactory.java | 4 -
.../httpclient/HttpBasicAuthentication.java | 4 -
.../nutch/protocol/httpclient/HttpResponse.java | 3 -
.../nutch/protocol/interactiveselenium/Http.java | 1 -
.../protocol/interactiveselenium/HttpResponse.java | 1 -
.../handlers/DefaultHandler.java | 15 +-
.../handlers/InteractiveSeleniumHandler.java | 2 +-
.../org/apache/nutch/protocol/selenium/Http.java | 1 -
.../nutch/protocol/selenium/HttpResponse.java | 1 -
.../nutch/scoring/opic/OPICScoringFilter.java | 1 -
.../similarity/SimilarityScoringFilter.java | 1 -
.../similarity/cosine/CosineSimilarity.java | 1 -
.../nutch/scoring/similarity/cosine/Model.java | 7 -
.../similarity/util/LuceneAnalyzerUtil.java | 1 -
.../urlfilter/automaton/AutomatonURLFilter.java | 4 -
.../urlfilter/ignoreexempt/ExemptionUrlFilter.java | 4 -
.../nutch/urlfilter/prefix/PrefixURLFilter.java | 6 +-
.../nutch/urlfilter/regex/RegexURLFilter.java | 1 -
.../nutch/urlfilter/suffix/SuffixURLFilter.java | 3 +-
.../net/urlnormalizer/ajax/AjaxURLNormalizer.java | 2 -
.../net/urlnormalizer/host/HostURLNormalizer.java | 1 -
.../protocol/ProtocolURLNormalizer.java | 1 -
.../querystring/QuerystringURLNormalizer.java | 4 -
.../urlnormalizer/slash/SlashURLNormalizer.java | 7 +-
150 files changed, 706 insertions(+), 791 deletions(-)
--
To stop receiving notification emails like this one, please contact
lewismc@apache.org.