You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2018/03/27 13:49:12 UTC

[nutch] 01/01: Merge pull request #295 from lewismc/NUTCH-2516

This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git

commit 7cb7abde6e3fae7c2ae70865e7ee2ee5ff95e2ba
Merge: 31819b7 303fd19
Author: Lewis John McGibbney <le...@gmail.com>
AuthorDate: Tue Mar 27 06:49:09 2018 -0700

    Merge pull request #295 from lewismc/NUTCH-2516
    
    NUTCH-2516 Hadoop imports use wildcards

 .gitignore                                         |   6 +
 ivy/ivy-2.4.0.jar                                  | Bin 1282424 -> 0 bytes
 src/java/org/apache/nutch/crawl/CrawlDatum.java    |  28 ++-
 src/java/org/apache/nutch/crawl/CrawlDb.java       |  30 ++-
 src/java/org/apache/nutch/crawl/CrawlDbFilter.java |   1 -
 src/java/org/apache/nutch/crawl/CrawlDbMerger.java |  15 +-
 src/java/org/apache/nutch/crawl/CrawlDbReader.java |   4 -
 .../org/apache/nutch/crawl/CrawlDbReducer.java     |   8 +-
 .../org/apache/nutch/crawl/DeduplicationJob.java   |   4 -
 src/java/org/apache/nutch/crawl/Generator.java     |  31 ++-
 src/java/org/apache/nutch/crawl/Inlink.java        |   8 +-
 src/java/org/apache/nutch/crawl/Inlinks.java       |  19 +-
 src/java/org/apache/nutch/crawl/LinkDbFilter.java  |   2 -
 src/java/org/apache/nutch/crawl/LinkDbMerger.java  |   1 -
 src/java/org/apache/nutch/crawl/LinkDbReader.java  |  19 +-
 .../org/apache/nutch/crawl/SignatureFactory.java   |   1 -
 .../org/apache/nutch/crawl/URLPartitioner.java     |   3 +-
 src/java/org/apache/nutch/fetcher/FetchNodeDb.java |   1 -
 src/java/org/apache/nutch/fetcher/Fetcher.java     |  28 ++-
 .../apache/nutch/fetcher/FetcherOutputFormat.java  |   5 -
 .../org/apache/nutch/fetcher/FetcherThread.java    |   2 -
 .../apache/nutch/fetcher/FetcherThreadEvent.java   |   1 -
 src/java/org/apache/nutch/fetcher/QueueFeeder.java |   1 -
 src/java/org/apache/nutch/hostdb/HostDatum.java    |   2 -
 src/java/org/apache/nutch/hostdb/ReadHostDb.java   |   5 -
 src/java/org/apache/nutch/hostdb/UpdateHostDb.java |   6 -
 .../apache/nutch/hostdb/UpdateHostDbMapper.java    |   4 -
 .../apache/nutch/hostdb/UpdateHostDbReducer.java   |   3 -
 src/java/org/apache/nutch/indexer/CleaningJob.java |   3 -
 src/java/org/apache/nutch/indexer/IndexWriter.java |   1 -
 .../org/apache/nutch/indexer/IndexWriters.java     |   1 -
 .../org/apache/nutch/indexer/IndexerMapReduce.java |   5 -
 .../apache/nutch/indexer/IndexerOutputFormat.java  |   1 -
 .../org/apache/nutch/indexer/IndexingFilter.java   |   2 -
 .../org/apache/nutch/indexer/IndexingFilters.java  |   1 -
 .../nutch/indexer/IndexingFiltersChecker.java      |   1 -
 src/java/org/apache/nutch/indexer/IndexingJob.java |   1 -
 src/java/org/apache/nutch/indexer/NutchField.java  |  17 +-
 .../org/apache/nutch/metadata/CreativeCommons.java |   6 +-
 .../org/apache/nutch/metadata/HttpHeaders.java     |  18 +-
 .../org/apache/nutch/net/URLExemptionFilter.java   |   3 +-
 src/java/org/apache/nutch/net/URLFilter.java       |   2 -
 .../org/apache/nutch/net/URLFilterChecker.java     |   7 -
 .../org/apache/nutch/net/URLNormalizerChecker.java |   7 -
 .../org/apache/nutch/net/protocols/Response.java   |   2 -
 .../org/apache/nutch/parse/HtmlParseFilter.java    |   3 -
 src/java/org/apache/nutch/parse/ParseData.java     |  16 +-
 src/java/org/apache/nutch/parse/ParseImpl.java     |   7 +-
 .../org/apache/nutch/parse/ParseOutputFormat.java  |  19 +-
 .../org/apache/nutch/parse/ParsePluginList.java    |   1 -
 .../org/apache/nutch/parse/ParsePluginsReader.java |   4 -
 src/java/org/apache/nutch/parse/ParseSegment.java  |  39 ++--
 src/java/org/apache/nutch/parse/ParseText.java     |  24 +-
 src/java/org/apache/nutch/parse/ParseUtil.java     |   2 -
 src/java/org/apache/nutch/parse/Parser.java        |   2 -
 src/java/org/apache/nutch/parse/ParserFactory.java |   4 -
 src/java/org/apache/nutch/protocol/Content.java    |   3 -
 src/java/org/apache/nutch/protocol/Protocol.java   |   2 -
 .../org/apache/nutch/protocol/ProtocolFactory.java |   6 +-
 .../apache/nutch/protocol/RobotRulesParser.java    |   3 -
 .../apache/nutch/scoring/webgraph/LinkDumper.java  |   4 -
 .../apache/nutch/scoring/webgraph/LinkRank.java    |   2 -
 .../apache/nutch/scoring/webgraph/NodeDumper.java  |   2 -
 .../apache/nutch/scoring/webgraph/NodeReader.java  |   1 -
 .../nutch/scoring/webgraph/ScoreUpdater.java       |   2 -
 .../apache/nutch/scoring/webgraph/WebGraph.java    |   2 -
 .../org/apache/nutch/segment/SegmentReader.java    |   9 +-
 src/java/org/apache/nutch/service/NutchReader.java |   1 -
 .../org/apache/nutch/service/impl/NodeReader.java  |   1 -
 .../service/impl/NutchServerPoolExecutor.java      |   2 -
 .../nutch/service/model/request/JobConfig.java     |   1 -
 .../nutch/service/resources/ConfigResource.java    |   3 -
 .../nutch/service/resources/SeedResource.java      |   1 -
 .../nutch/tools/AbstractCommonCrawlFormat.java     |   2 -
 .../apache/nutch/tools/CommonCrawlDataDumper.java  |   5 -
 .../apache/nutch/tools/CommonCrawlFormatWARC.java  |   4 +-
 src/java/org/apache/nutch/tools/DmozParser.java    |  50 +++--
 src/java/org/apache/nutch/tools/FileDumper.java    |   5 -
 src/java/org/apache/nutch/tools/FreeGenerator.java |   2 -
 src/java/org/apache/nutch/tools/WARCUtils.java     | 218 +++++++++---------
 .../apache/nutch/tools/arc/ArcSegmentCreator.java  |   1 -
 .../org/apache/nutch/tools/warc/WARCExporter.java  |   2 -
 src/java/org/apache/nutch/util/DeflateUtils.java   |   1 -
 src/java/org/apache/nutch/util/DomUtil.java        |   1 -
 src/java/org/apache/nutch/util/GZIPUtils.java      |   1 -
 src/java/org/apache/nutch/util/MimeUtil.java       |   5 -
 .../nutch/util/ProtocolStatusStatistics.java       |   1 -
 src/java/org/apache/nutch/util/URLUtil.java        |   4 +-
 .../org/apache/nutch/webui/model/NutchConfig.java  |  16 ++
 .../creativecommons/nutch/CCIndexingFilter.java    |   2 +-
 .../org/creativecommons/nutch/CCParseFilter.java   |  38 ++--
 .../nutch/indexer/feed/FeedIndexingFilter.java     |   2 -
 .../org/apache/nutch/parse/feed/FeedParser.java    |   4 -
 .../nutch/parse/headings/HeadingsParseFilter.java  |   9 +-
 .../nutch/indexer/more/MoreIndexingFilter.java     |   4 +-
 .../nutch/indexer/replace/FieldReplacer.java       |   6 +-
 .../elasticrest/ElasticRestIndexWriter.java        |   1 -
 .../indexwriter/elastic/ElasticIndexWriter.java    |   3 +-
 .../indexwriter/rabbit/RabbitIndexWriter.java      | 246 ++++++++++-----------
 .../indexwriter/rabbit/RabbitMQConstants.java      |  24 +-
 .../nutch/indexwriter/rabbit/RabbitMessage.java    |  72 +++---
 .../nutch/indexwriter/solr/SolrIndexWriter.java    |   7 -
 .../apache/nutch/indexwriter/solr/SolrUtils.java   |   1 -
 .../nutch/analysis/lang/HTMLLanguageParser.java    |   1 -
 .../analysis/lang/LanguageIndexingFilter.java      |   2 -
 .../apache/nutch/protocol/http/api/HttpBase.java   |   5 -
 .../nutch/protocol/http/api/HttpException.java     |   1 -
 .../nutch/urlfilter/api/RegexURLFilterBase.java    |   7 +-
 .../microformats/reltag/RelTagIndexingFilter.java  |   2 -
 .../nutch/microformats/reltag/RelTagParser.java    |   4 -
 .../indexer/filter/MimeTypeIndexingFilter.java     |   1 -
 .../apache/nutch/parse/html/DOMContentUtils.java   |   6 +-
 .../apache/nutch/parse/html/HTMLMetaProcessor.java |   4 +-
 .../org/apache/nutch/parse/html/HtmlParser.java    |  40 +++-
 .../java/org/apache/nutch/parse/swf/SWFParser.java |  88 ++++----
 .../parse/tika/BoilerpipeExtractorRepository.java  |   6 +-
 .../apache/nutch/parse/tika/HTMLMetaProcessor.java |   4 +-
 .../org/apache/nutch/parse/tika/TikaParser.java    |   1 -
 .../apache/nutch/parse/zip/ZipTextExtractor.java   |   4 -
 .../nutch/parsefilter/naivebayes/Classify.java     |   1 -
 .../nutch/parsefilter/regex/RegexParseFilter.java  |   9 +-
 .../apache/nutch/protocol/file/FileResponse.java   |   4 -
 .../java/org/apache/nutch/protocol/ftp/Client.java |   1 -
 .../nutch/protocol/ftp/FtpRobotRulesParser.java    |   1 -
 .../java/org/apache/nutch/protocol/http/Http.java  |   4 -
 .../org/apache/nutch/protocol/httpclient/Http.java |   6 -
 .../httpclient/HttpAuthenticationFactory.java      |   4 -
 .../httpclient/HttpBasicAuthentication.java        |   4 -
 .../nutch/protocol/httpclient/HttpResponse.java    |   3 -
 .../nutch/protocol/interactiveselenium/Http.java   |   1 -
 .../protocol/interactiveselenium/HttpResponse.java |   1 -
 .../handlers/DefaultHandler.java                   |  15 +-
 .../handlers/InteractiveSeleniumHandler.java       |   2 +-
 .../org/apache/nutch/protocol/selenium/Http.java   |   1 -
 .../nutch/protocol/selenium/HttpResponse.java      |   1 -
 .../nutch/scoring/opic/OPICScoringFilter.java      |   1 -
 .../similarity/SimilarityScoringFilter.java        |   1 -
 .../similarity/cosine/CosineSimilarity.java        |   1 -
 .../nutch/scoring/similarity/cosine/Model.java     |   7 -
 .../similarity/util/LuceneAnalyzerUtil.java        |   1 -
 .../urlfilter/automaton/AutomatonURLFilter.java    |   4 -
 .../urlfilter/ignoreexempt/ExemptionUrlFilter.java |   4 -
 .../nutch/urlfilter/prefix/PrefixURLFilter.java    |   6 +-
 .../nutch/urlfilter/regex/RegexURLFilter.java      |   1 -
 .../nutch/urlfilter/suffix/SuffixURLFilter.java    |   3 +-
 .../net/urlnormalizer/ajax/AjaxURLNormalizer.java  |   2 -
 .../net/urlnormalizer/host/HostURLNormalizer.java  |   1 -
 .../protocol/ProtocolURLNormalizer.java            |   1 -
 .../querystring/QuerystringURLNormalizer.java      |   4 -
 .../urlnormalizer/slash/SlashURLNormalizer.java    |   7 +-
 150 files changed, 706 insertions(+), 791 deletions(-)


-- 
To stop receiving notification emails like this one, please contact
lewismc@apache.org.