You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ar...@apache.org on 2020/08/29 14:52:18 UTC
[lucene-solr] branch master updated: SOLR-14783: Remove DIH from
9.0 (#1794)
This is an automated email from the ASF dual-hosted git repository.
arafalov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new a57ba25 SOLR-14783: Remove DIH from 9.0 (#1794)
a57ba25 is described below
commit a57ba254001b59ab8f357162632bb28c0cabc7ba
Author: Alexandre Rafalovitch <ar...@gmail.com>
AuthorDate: Sat Aug 29 10:52:04 2020 -0400
SOLR-14783: Remove DIH from 9.0 (#1794)
* Remove DIH example directory
* Remove contrib code directories
* Remove contrib package related configurations for build tools
* Remove mention of DIH example
* remove dih as build dependencies and no-longer needed version pins
* Remove README references to DIH
* Remove dih mention from the script that probably does need to exist at all
* More build artifact references
* More removed dependencies leftovers (licenses/versions)
* No need to smoke exclude DIH anymore
* Remove Admin UI's DIH integration
* Remove DIH from shortname package list
* Remove unused DIH (related? not?) dataset
Unclear what is happening here, but there is no reference to that directory anywhere else
The other parallel directories ARE referenced in a TestConfigSetsAPI.java
* Hidden Idea files references
* No DIH to ignore anymore
* Remove last Derby DB references
* Remove DIH from documentation
Add the information in Major Changes document with the link to the external repo
* Added/updated a mention to CHANGES
* Fix leftover library mentions
* Fix Spellings
---
dev-tools/idea/.idea/libraries/Derby.xml | 9 -
dev-tools/idea/.idea/libraries/HSQLDB.xml | 9 -
.../idea/.idea/libraries/Solr_DIH_core_library.xml | 10 -
.../.idea/libraries/Solr_DIH_extras_library.xml | 10 -
.../idea/.idea/libraries/Solr_DIH_test_library.xml | 10 -
dev-tools/idea/.idea/modules.xml | 2 -
dev-tools/idea/.idea/workspace.xml | 30 +-
.../dataimporthandler-extras.iml | 29 -
.../dataimporthandler/dataimporthandler.iml | 31 -
dev-tools/maven/solr/contrib/pom.xml.template | 55 +
dev-tools/scripts/SOLR-2452.patch.hack.pl | 27 -
dev-tools/scripts/smokeTestRelease.py | 3 +-
gradle/ant-compat/resolve.gradle | 6 +-
gradle/ant-compat/test-classes-cross-deps.gradle | 12 +-
gradle/maven/defaults-maven.gradle | 2 -
gradle/testing/policies/solr-tests.policy | 4 +-
.../owasp-dependency-check/exclusions.xml | 24 -
lucene/ivy-versions.properties | 327 ++
settings.gradle | 2 -
solr/.gitignore | 5 -
solr/CHANGES.txt | 3 +
solr/README.md | 8 +-
solr/bin/solr | 1 -
solr/bin/solr.cmd | 1 -
solr/common-build.xml | 547 +++
solr/contrib/dataimporthandler-extras/build.gradle | 33 -
.../handler/dataimport/MailEntityProcessor.java | 901 -----
.../handler/dataimport/TikaEntityProcessor.java | 253 --
.../apache/solr/handler/dataimport/package.html | 23 -
.../src/java/overview.html | 21 -
.../src/resources/solr-default-tika-config.xml | 20 -
.../src/test-files/dihextras/bad.doc | Bin 116222 -> 0 bytes
.../src/test-files/dihextras/solr-word.pdf | Bin 21052 -> 0 bytes
.../conf/dataimport-schema-no-unique-key.xml | 205 --
.../collection1/conf/dataimport-solrconfig.xml | 277 --
.../src/test-files/dihextras/structured.html | 29 -
.../src/test-files/dihextras/test_jpeg.jpg | Bin 12924 -> 0 bytes
.../dihextras/test_recursive_embedded.docx | Bin 27082 -> 0 bytes
.../src/test-files/dihextras/test_vsdx.vsdx | Bin 44426 -> 0 bytes
.../dataimport/TestMailEntityProcessor.java | 199 -
.../dataimport/TestTikaEntityProcessor.java | 221 --
solr/contrib/dataimporthandler/README.md | 26 -
solr/contrib/dataimporthandler/build.gradle | 34 -
.../dataimport/BinContentStreamDataSource.java | 70 -
.../solr/handler/dataimport/BinFileDataSource.java | 64 -
.../solr/handler/dataimport/BinURLDataSource.java | 104 -
.../solr/handler/dataimport/CachePropertyUtil.java | 48 -
.../solr/handler/dataimport/ClobTransformer.java | 85 -
.../solr/handler/dataimport/ConfigParseUtil.java | 73 -
.../dataimport/ContentStreamDataSource.java | 69 -
.../apache/solr/handler/dataimport/Context.java | 221 --
.../solr/handler/dataimport/ContextImpl.java | 264 --
.../apache/solr/handler/dataimport/DIHCache.java | 103 -
.../solr/handler/dataimport/DIHCacheSupport.java | 279 --
.../solr/handler/dataimport/DIHLogLevels.java | 21 -
.../solr/handler/dataimport/DIHProperties.java | 45 -
.../apache/solr/handler/dataimport/DIHWriter.java | 99 -
.../solr/handler/dataimport/DIHWriterBase.java | 44 -
.../solr/handler/dataimport/DataImportHandler.java | 318 --
.../dataimport/DataImportHandlerException.java | 75 -
.../solr/handler/dataimport/DataImporter.java | 628 ----
.../apache/solr/handler/dataimport/DataSource.java | 66 -
.../handler/dataimport/DateFormatEvaluator.java | 180 -
.../handler/dataimport/DateFormatTransformer.java | 106 -
.../apache/solr/handler/dataimport/DebugInfo.java | 66 -
.../solr/handler/dataimport/DebugLogger.java | 295 --
.../apache/solr/handler/dataimport/DocBuilder.java | 1020 ------
.../solr/handler/dataimport/EntityProcessor.java | 114 -
.../handler/dataimport/EntityProcessorBase.java | 174 -
.../handler/dataimport/EntityProcessorWrapper.java | 357 --
.../apache/solr/handler/dataimport/Evaluator.java | 140 -
.../solr/handler/dataimport/EventListener.java | 35 -
.../handler/dataimport/FieldReaderDataSource.java | 122 -
.../handler/dataimport/FieldStreamDataSource.java | 85 -
.../solr/handler/dataimport/FileDataSource.java | 155 -
.../dataimport/FileListEntityProcessor.java | 305 --
.../handler/dataimport/HTMLStripTransformer.java | 96 -
.../solr/handler/dataimport/JdbcDataSource.java | 583 ---
.../handler/dataimport/LineEntityProcessor.java | 164 -
.../solr/handler/dataimport/LogTransformer.java | 67 -
.../solr/handler/dataimport/MockDataSource.java | 61 -
.../dataimport/NumberFormatTransformer.java | 134 -
.../dataimport/PlainTextEntityProcessor.java | 78 -
.../solr/handler/dataimport/RegexTransformer.java | 200 -
.../solr/handler/dataimport/RequestInfo.java | 177 -
.../solr/handler/dataimport/ScriptTransformer.java | 131 -
.../handler/dataimport/SimplePropertiesWriter.java | 247 --
.../handler/dataimport/SolrEntityProcessor.java | 321 --
.../dataimport/SolrQueryEscapingEvaluator.java | 35 -
.../apache/solr/handler/dataimport/SolrWriter.java | 175 -
.../handler/dataimport/SortedMapBackedCache.java | 238 --
.../handler/dataimport/SqlEntityProcessor.java | 173 -
.../handler/dataimport/SqlEscapingEvaluator.java | 41 -
.../handler/dataimport/TemplateTransformer.java | 115 -
.../solr/handler/dataimport/Transformer.java | 50 -
.../solr/handler/dataimport/URLDataSource.java | 154 -
.../solr/handler/dataimport/UrlEvaluator.java | 46 -
.../solr/handler/dataimport/VariableResolver.java | 211 --
.../handler/dataimport/XPathEntityProcessor.java | 555 ---
.../solr/handler/dataimport/XPathRecordReader.java | 670 ----
.../handler/dataimport/ZKPropertiesWriter.java | 95 -
.../org/apache/solr/handler/dataimport/Zipper.java | 115 -
.../dataimport/config/ConfigNameConstants.java | 59 -
.../handler/dataimport/config/ConfigParseUtil.java | 72 -
.../dataimport/config/DIHConfiguration.java | 199 -
.../solr/handler/dataimport/config/Entity.java | 228 --
.../handler/dataimport/config/EntityField.java | 102 -
.../solr/handler/dataimport/config/Field.java | 108 -
.../handler/dataimport/config/PropertyWriter.java | 38 -
.../solr/handler/dataimport/config/Script.java | 41 -
.../handler/dataimport/config/package-info.java | 24 -
.../solr/handler/dataimport/package-info.java | 25 -
.../dataimporthandler/src/java/overview.html | 21 -
.../collection1/conf/contentstream-solrconfig.xml | 287 --
.../collection1/conf/data-config-end-to-end.xml | 41 -
.../conf/data-config-with-datasource.xml | 9 -
.../conf/data-config-with-transformer.xml | 10 -
.../collection1/conf/dataconfig-contentstream.xml | 10 -
.../conf/dataimport-nodatasource-solrconfig.xml | 279 --
.../solr/collection1/conf/dataimport-schema.xml | 70 -
.../collection1/conf/dataimport-solr_id-schema.xml | 313 --
.../collection1/conf/dataimport-solrconfig.xml | 287 --
.../dih/solr/collection1/conf/protwords.txt | 20 -
.../collection1/conf/single-entity-data-config.xml | 9 -
.../dih/solr/collection1/conf/stopwords.txt | 16 -
.../dih/solr/collection1/conf/synonyms.txt | 22 -
.../src/test-files/dih/solr/solr.xml | 27 -
.../dataimporthandler/src/test-files/log4j2.xml | 42 -
.../src/test-files/solr/collection1/README | 1 -
.../solr/configsets/dihconfigset/conf/README | 2 -
.../solr/configsets/dihconfigset/conf/schema.xml | 70 -
.../configsets/dihconfigset/conf/solrconfig.xml | 287 --
.../dataimporthandler/src/test-files/solr/solr.xml | 27 -
.../dataimport/AbstractDIHCacheTestCase.java | 235 --
.../dataimport/AbstractDIHJdbcTestCase.java | 198 -
.../AbstractDataImportHandlerTestCase.java | 379 --
.../AbstractSqlEntityProcessorTestCase.java | 848 -----
.../handler/dataimport/AddAColumnTransformer.java | 31 -
.../solr/handler/dataimport/DestroyCountCache.java | 37 -
.../dataimport/MockInitialContextFactory.java | 52 -
.../dataimport/MockSolrEntityProcessor.java | 76 -
.../handler/dataimport/MockStringDataSource.java | 54 -
.../handler/dataimport/TestBuiltInEvaluators.java | 188 -
.../handler/dataimport/TestClobTransformer.java | 64 -
.../dataimport/TestContentStreamDataSource.java | 196 -
.../solr/handler/dataimport/TestContextImpl.java | 69 -
.../solr/handler/dataimport/TestDataConfig.java | 77 -
.../dataimport/TestDateFormatTransformer.java | 89 -
.../solr/handler/dataimport/TestDocBuilder.java | 341 --
.../solr/handler/dataimport/TestDocBuilder2.java | 445 ---
.../dataimport/TestEntityProcessorBase.java | 84 -
.../handler/dataimport/TestEphemeralCache.java | 143 -
.../solr/handler/dataimport/TestErrorHandling.java | 210 --
.../solr/handler/dataimport/TestFieldReader.java | 66 -
.../dataimport/TestFileListEntityProcessor.java | 194 -
.../TestFileListWithLineEntityProcessor.java | 64 -
.../dataimport/TestHierarchicalDocBuilder.java | 483 ---
.../handler/dataimport/TestJdbcDataSource.java | 663 ----
.../dataimport/TestJdbcDataSourceConvertType.java | 76 -
.../dataimport/TestLineEntityProcessor.java | 259 --
.../handler/dataimport/TestNestedChildren.java | 65 -
.../dataimport/TestNonWritablePersistFile.java | 102 -
.../dataimport/TestNumberFormatTransformer.java | 160 -
.../dataimport/TestPlainTextEntityProcessor.java | 182 -
.../handler/dataimport/TestRegexTransformer.java | 213 --
.../handler/dataimport/TestScriptTransformer.java | 173 -
.../dataimport/TestSimplePropertiesWriter.java | 135 -
.../TestSolrEntityProcessorEndToEnd.java | 374 --
.../dataimport/TestSolrEntityProcessorUnit.java | 188 -
.../dataimport/TestSortedMapBackedCache.java | 192 -
.../handler/dataimport/TestSqlEntityProcessor.java | 115 -
.../dataimport/TestSqlEntityProcessorDelta.java | 209 --
.../dataimport/TestTemplateTransformer.java | 115 -
.../solr/handler/dataimport/TestURLDataSource.java | 45 -
.../handler/dataimport/TestVariableResolver.java | 173 -
.../dataimport/TestVariableResolverEndToEnd.java | 141 -
.../solr/handler/dataimport/TestWriterImpl.java | 83 -
.../dataimport/TestXPathEntityProcessor.java | 506 ---
.../handler/dataimport/TestXPathRecordReader.java | 591 ---
.../handler/dataimport/TestZKPropertiesWriter.java | 279 --
.../dataimport/TripleThreatTransformer.java | 75 -
.../org/apache/solr/core/SolrResourceLoader.java | 2 +-
.../src/java/org/apache/solr/util/SolrCLI.java | 24 +-
.../upload/dih-script-transformer/managed-schema | 25 -
.../upload/dih-script-transformer/solrconfig.xml | 61 -
solr/example/README.md | 9 +-
solr/example/build.gradle | 8 -
solr/example/example-DIH/.gitignore | 1 -
solr/example/example-DIH/README.md | 55 -
solr/example/example-DIH/hsqldb/.gitignore | 5 -
solr/example/example-DIH/hsqldb/ex.script | 165 -
.../solr/atom/conf/atom-data-config.xml | 35 -
.../solr/atom/conf/lang/stopwords_en.txt | 54 -
.../example-DIH/solr/atom/conf/managed-schema | 106 -
.../example-DIH/solr/atom/conf/protwords.txt | 17 -
.../example-DIH/solr/atom/conf/solrconfig.xml | 64 -
.../example-DIH/solr/atom/conf/synonyms.txt | 29 -
.../example-DIH/solr/atom/conf/url_types.txt | 1 -
solr/example/example-DIH/solr/atom/core.properties | 0
.../conf/clustering/carrot2/kmeans-attributes.xml | 19 -
.../conf/clustering/carrot2/lingo-attributes.xml | 24 -
.../db/conf/clustering/carrot2/stc-attributes.xml | 19 -
solr/example/example-DIH/solr/db/conf/currency.xml | 67 -
.../example-DIH/solr/db/conf/db-data-config.xml | 29 -
solr/example/example-DIH/solr/db/conf/elevate.xml | 42 -
.../solr/db/conf/lang/contractions_ca.txt | 8 -
.../solr/db/conf/lang/contractions_fr.txt | 15 -
.../solr/db/conf/lang/contractions_ga.txt | 5 -
.../solr/db/conf/lang/contractions_it.txt | 23 -
.../solr/db/conf/lang/hyphenations_ga.txt | 5 -
.../example-DIH/solr/db/conf/lang/stemdict_nl.txt | 6 -
.../example-DIH/solr/db/conf/lang/stoptags_ja.txt | 420 ---
.../example-DIH/solr/db/conf/lang/stopwords_ar.txt | 125 -
.../example-DIH/solr/db/conf/lang/stopwords_bg.txt | 193 -
.../example-DIH/solr/db/conf/lang/stopwords_ca.txt | 220 --
.../solr/db/conf/lang/stopwords_ckb.txt | 136 -
.../example-DIH/solr/db/conf/lang/stopwords_cz.txt | 172 -
.../example-DIH/solr/db/conf/lang/stopwords_da.txt | 110 -
.../example-DIH/solr/db/conf/lang/stopwords_de.txt | 294 --
.../example-DIH/solr/db/conf/lang/stopwords_el.txt | 78 -
.../example-DIH/solr/db/conf/lang/stopwords_en.txt | 54 -
.../example-DIH/solr/db/conf/lang/stopwords_es.txt | 356 --
.../example-DIH/solr/db/conf/lang/stopwords_eu.txt | 99 -
.../example-DIH/solr/db/conf/lang/stopwords_fa.txt | 313 --
.../example-DIH/solr/db/conf/lang/stopwords_fi.txt | 97 -
.../example-DIH/solr/db/conf/lang/stopwords_fr.txt | 186 -
.../example-DIH/solr/db/conf/lang/stopwords_ga.txt | 110 -
.../example-DIH/solr/db/conf/lang/stopwords_gl.txt | 161 -
.../example-DIH/solr/db/conf/lang/stopwords_hi.txt | 235 --
.../example-DIH/solr/db/conf/lang/stopwords_hu.txt | 211 --
.../example-DIH/solr/db/conf/lang/stopwords_hy.txt | 46 -
.../example-DIH/solr/db/conf/lang/stopwords_id.txt | 359 --
.../example-DIH/solr/db/conf/lang/stopwords_it.txt | 303 --
.../example-DIH/solr/db/conf/lang/stopwords_ja.txt | 127 -
.../example-DIH/solr/db/conf/lang/stopwords_lv.txt | 172 -
.../example-DIH/solr/db/conf/lang/stopwords_nl.txt | 119 -
.../example-DIH/solr/db/conf/lang/stopwords_no.txt | 194 -
.../example-DIH/solr/db/conf/lang/stopwords_pt.txt | 253 --
.../example-DIH/solr/db/conf/lang/stopwords_ro.txt | 233 --
.../example-DIH/solr/db/conf/lang/stopwords_ru.txt | 243 --
.../example-DIH/solr/db/conf/lang/stopwords_sv.txt | 133 -
.../example-DIH/solr/db/conf/lang/stopwords_th.txt | 119 -
.../example-DIH/solr/db/conf/lang/stopwords_tr.txt | 212 --
.../example-DIH/solr/db/conf/lang/userdict_ja.txt | 29 -
.../example-DIH/solr/db/conf/managed-schema | 1143 ------
.../solr/db/conf/mapping-FoldToASCII.txt | 3813 --------------------
.../solr/db/conf/mapping-ISOLatin1Accent.txt | 246 --
.../example/example-DIH/solr/db/conf/protwords.txt | 21 -
.../example-DIH/solr/db/conf/solrconfig.xml | 1342 -------
.../example/example-DIH/solr/db/conf/spellings.txt | 2 -
.../example/example-DIH/solr/db/conf/stopwords.txt | 14 -
solr/example/example-DIH/solr/db/conf/synonyms.txt | 29 -
.../example-DIH/solr/db/conf/update-script.js | 53 -
.../example-DIH/solr/db/conf/xslt/example.xsl | 132 -
.../example-DIH/solr/db/conf/xslt/example_atom.xsl | 67 -
.../example-DIH/solr/db/conf/xslt/example_rss.xsl | 66 -
.../example/example-DIH/solr/db/conf/xslt/luke.xsl | 337 --
.../example-DIH/solr/db/conf/xslt/updateXml.xsl | 70 -
solr/example/example-DIH/solr/db/core.properties | 0
.../conf/clustering/carrot2/kmeans-attributes.xml | 19 -
.../conf/clustering/carrot2/lingo-attributes.xml | 24 -
.../conf/clustering/carrot2/stc-attributes.xml | 19 -
.../example-DIH/solr/mail/conf/currency.xml | 67 -
.../example/example-DIH/solr/mail/conf/elevate.xml | 42 -
.../solr/mail/conf/lang/contractions_ca.txt | 8 -
.../solr/mail/conf/lang/contractions_fr.txt | 15 -
.../solr/mail/conf/lang/contractions_ga.txt | 5 -
.../solr/mail/conf/lang/contractions_it.txt | 23 -
.../solr/mail/conf/lang/hyphenations_ga.txt | 5 -
.../solr/mail/conf/lang/stemdict_nl.txt | 6 -
.../solr/mail/conf/lang/stoptags_ja.txt | 420 ---
.../solr/mail/conf/lang/stopwords_ar.txt | 125 -
.../solr/mail/conf/lang/stopwords_bg.txt | 193 -
.../solr/mail/conf/lang/stopwords_ca.txt | 220 --
.../solr/mail/conf/lang/stopwords_ckb.txt | 136 -
.../solr/mail/conf/lang/stopwords_cz.txt | 172 -
.../solr/mail/conf/lang/stopwords_da.txt | 110 -
.../solr/mail/conf/lang/stopwords_de.txt | 294 --
.../solr/mail/conf/lang/stopwords_el.txt | 78 -
.../solr/mail/conf/lang/stopwords_en.txt | 54 -
.../solr/mail/conf/lang/stopwords_es.txt | 356 --
.../solr/mail/conf/lang/stopwords_eu.txt | 99 -
.../solr/mail/conf/lang/stopwords_fa.txt | 313 --
.../solr/mail/conf/lang/stopwords_fi.txt | 97 -
.../solr/mail/conf/lang/stopwords_fr.txt | 186 -
.../solr/mail/conf/lang/stopwords_ga.txt | 110 -
.../solr/mail/conf/lang/stopwords_gl.txt | 161 -
.../solr/mail/conf/lang/stopwords_hi.txt | 235 --
.../solr/mail/conf/lang/stopwords_hu.txt | 211 --
.../solr/mail/conf/lang/stopwords_hy.txt | 46 -
.../solr/mail/conf/lang/stopwords_id.txt | 359 --
.../solr/mail/conf/lang/stopwords_it.txt | 303 --
.../solr/mail/conf/lang/stopwords_ja.txt | 127 -
.../solr/mail/conf/lang/stopwords_lv.txt | 172 -
.../solr/mail/conf/lang/stopwords_nl.txt | 119 -
.../solr/mail/conf/lang/stopwords_no.txt | 194 -
.../solr/mail/conf/lang/stopwords_pt.txt | 253 --
.../solr/mail/conf/lang/stopwords_ro.txt | 233 --
.../solr/mail/conf/lang/stopwords_ru.txt | 243 --
.../solr/mail/conf/lang/stopwords_sv.txt | 133 -
.../solr/mail/conf/lang/stopwords_th.txt | 119 -
.../solr/mail/conf/lang/stopwords_tr.txt | 212 --
.../solr/mail/conf/lang/userdict_ja.txt | 29 -
.../solr/mail/conf/mail-data-config.xml | 12 -
.../example-DIH/solr/mail/conf/managed-schema | 1062 ------
.../solr/mail/conf/mapping-FoldToASCII.txt | 3813 --------------------
.../solr/mail/conf/mapping-ISOLatin1Accent.txt | 246 --
.../example-DIH/solr/mail/conf/protwords.txt | 21 -
.../example-DIH/solr/mail/conf/solrconfig.xml | 1345 -------
.../example-DIH/solr/mail/conf/spellings.txt | 2 -
.../example-DIH/solr/mail/conf/stopwords.txt | 14 -
.../example-DIH/solr/mail/conf/synonyms.txt | 29 -
.../example-DIH/solr/mail/conf/update-script.js | 53 -
.../example-DIH/solr/mail/conf/xslt/example.xsl | 132 -
.../solr/mail/conf/xslt/example_atom.xsl | 67 -
.../solr/mail/conf/xslt/example_rss.xsl | 66 -
.../example-DIH/solr/mail/conf/xslt/luke.xsl | 337 --
.../example-DIH/solr/mail/conf/xslt/updateXml.xsl | 70 -
solr/example/example-DIH/solr/mail/core.properties | 0
solr/example/example-DIH/solr/solr.xml | 2 -
.../conf/clustering/carrot2/kmeans-attributes.xml | 19 -
.../conf/clustering/carrot2/lingo-attributes.xml | 24 -
.../conf/clustering/carrot2/stc-attributes.xml | 19 -
.../example-DIH/solr/solr/conf/currency.xml | 67 -
.../example/example-DIH/solr/solr/conf/elevate.xml | 42 -
.../solr/solr/conf/lang/contractions_ca.txt | 8 -
.../solr/solr/conf/lang/contractions_fr.txt | 15 -
.../solr/solr/conf/lang/contractions_ga.txt | 5 -
.../solr/solr/conf/lang/contractions_it.txt | 23 -
.../solr/solr/conf/lang/hyphenations_ga.txt | 5 -
.../solr/solr/conf/lang/stemdict_nl.txt | 6 -
.../solr/solr/conf/lang/stoptags_ja.txt | 420 ---
.../solr/solr/conf/lang/stopwords_ar.txt | 125 -
.../solr/solr/conf/lang/stopwords_bg.txt | 193 -
.../solr/solr/conf/lang/stopwords_ca.txt | 220 --
.../solr/solr/conf/lang/stopwords_ckb.txt | 136 -
.../solr/solr/conf/lang/stopwords_cz.txt | 172 -
.../solr/solr/conf/lang/stopwords_da.txt | 110 -
.../solr/solr/conf/lang/stopwords_de.txt | 294 --
.../solr/solr/conf/lang/stopwords_el.txt | 78 -
.../solr/solr/conf/lang/stopwords_en.txt | 54 -
.../solr/solr/conf/lang/stopwords_es.txt | 356 --
.../solr/solr/conf/lang/stopwords_eu.txt | 99 -
.../solr/solr/conf/lang/stopwords_fa.txt | 313 --
.../solr/solr/conf/lang/stopwords_fi.txt | 97 -
.../solr/solr/conf/lang/stopwords_fr.txt | 186 -
.../solr/solr/conf/lang/stopwords_ga.txt | 110 -
.../solr/solr/conf/lang/stopwords_gl.txt | 161 -
.../solr/solr/conf/lang/stopwords_hi.txt | 235 --
.../solr/solr/conf/lang/stopwords_hu.txt | 211 --
.../solr/solr/conf/lang/stopwords_hy.txt | 46 -
.../solr/solr/conf/lang/stopwords_id.txt | 359 --
.../solr/solr/conf/lang/stopwords_it.txt | 303 --
.../solr/solr/conf/lang/stopwords_ja.txt | 127 -
.../solr/solr/conf/lang/stopwords_lv.txt | 172 -
.../solr/solr/conf/lang/stopwords_nl.txt | 119 -
.../solr/solr/conf/lang/stopwords_no.txt | 194 -
.../solr/solr/conf/lang/stopwords_pt.txt | 253 --
.../solr/solr/conf/lang/stopwords_ro.txt | 233 --
.../solr/solr/conf/lang/stopwords_ru.txt | 243 --
.../solr/solr/conf/lang/stopwords_sv.txt | 133 -
.../solr/solr/conf/lang/stopwords_th.txt | 119 -
.../solr/solr/conf/lang/stopwords_tr.txt | 212 --
.../solr/solr/conf/lang/userdict_ja.txt | 29 -
.../example-DIH/solr/solr/conf/managed-schema | 1143 ------
.../solr/solr/conf/mapping-FoldToASCII.txt | 3813 --------------------
.../solr/solr/conf/mapping-ISOLatin1Accent.txt | 246 --
.../example-DIH/solr/solr/conf/protwords.txt | 21 -
.../solr/solr/conf/solr-data-config.xml | 25 -
.../example-DIH/solr/solr/conf/solrconfig.xml | 1340 -------
.../example-DIH/solr/solr/conf/spellings.txt | 2 -
.../example-DIH/solr/solr/conf/stopwords.txt | 14 -
.../example-DIH/solr/solr/conf/synonyms.txt | 29 -
.../example-DIH/solr/solr/conf/update-script.js | 53 -
.../example-DIH/solr/solr/conf/xslt/example.xsl | 132 -
.../solr/solr/conf/xslt/example_atom.xsl | 67 -
.../solr/solr/conf/xslt/example_rss.xsl | 66 -
.../example-DIH/solr/solr/conf/xslt/luke.xsl | 337 --
.../example-DIH/solr/solr/conf/xslt/updateXml.xsl | 70 -
solr/example/example-DIH/solr/solr/core.properties | 0
.../example-DIH/solr/tika/conf/managed-schema | 54 -
.../example-DIH/solr/tika/conf/solrconfig.xml | 61 -
.../solr/tika/conf/tika-data-config.xml | 26 -
solr/example/example-DIH/solr/tika/core.properties | 0
solr/licenses/activation-1.1.1.jar.sha1 | 1 -
solr/licenses/activation-LICENSE-CDDL.txt | 119 -
solr/licenses/derby-10.9.1.0.jar.sha1 | 1 -
solr/licenses/derby-LICENSE-ASL.txt | 202 --
solr/licenses/derby-NOTICE.txt | 182 -
solr/licenses/gimap-1.5.1.jar.sha1 | 1 -
solr/licenses/gimap-LICENSE-CDDL.txt | 135 -
solr/licenses/javax.mail-1.5.1.jar.sha1 | 1 -
solr/licenses/javax.mail-LICENSE-CDDL.txt | 135 -
solr/packaging/build.gradle | 2 -
solr/server/README.md | 4 +-
solr/server/etc/security.policy | 2 +-
.../src/collection-specific-tools.adoc | 3 +-
solr/solr-ref-guide/src/config-sets.adoc | 2 +-
solr/solr-ref-guide/src/configsets-api.adoc | 3 +-
.../src/configuring-solrconfig-xml.adoc | 13 +-
solr/solr-ref-guide/src/core-specific-tools.adoc | 1 -
solr/solr-ref-guide/src/dataimport-screen.adoc | 28 -
.../src/images/dataimport-screen/dataimport.png | Bin 126440 -> 0 bytes
.../src/indexing-and-basic-data-operations.adoc | 3 -
solr/solr-ref-guide/src/installing-solr.adoc | 5 +-
.../src/major-changes-in-solr-9.adoc | 2 +
.../src/solr-control-script-reference.adoc | 6 -
solr/solr-ref-guide/src/solr-tutorial.adoc | 5 -
.../src/uploading-data-with-index-handlers.adoc | 2 +-
...ta-store-data-with-the-data-import-handler.adoc | 1077 ------
...ing-the-solr-administration-user-interface.adoc | 1 -
solr/webapp/web/css/angular/dataimport.css | 371 --
solr/webapp/web/css/angular/menu.css | 1 -
solr/webapp/web/index.html | 4 -
solr/webapp/web/js/angular/app.js | 16 -
.../web/js/angular/controllers/dataimport.js | 302 --
solr/webapp/web/js/angular/services.js | 15 -
solr/webapp/web/partials/dataimport.html | 210 --
versions.lock | 4 -
versions.props | 3 -
420 files changed, 972 insertions(+), 70374 deletions(-)
diff --git a/dev-tools/idea/.idea/libraries/Derby.xml b/dev-tools/idea/.idea/libraries/Derby.xml
deleted file mode 100644
index a23a28e..0000000
--- a/dev-tools/idea/.idea/libraries/Derby.xml
+++ /dev/null
@@ -1,9 +0,0 @@
-<component name="libraryTable">
- <library name="Derby">
- <CLASSES>
- <root url="jar://$PROJECT_DIR$/solr/example/example-DIH/solr/db/lib/derby-10.9.1.0.jar!/" />
- </CLASSES>
- <JAVADOC />
- <SOURCES />
- </library>
-</component>
\ No newline at end of file
diff --git a/dev-tools/idea/.idea/libraries/HSQLDB.xml b/dev-tools/idea/.idea/libraries/HSQLDB.xml
deleted file mode 100644
index 39efcbf..0000000
--- a/dev-tools/idea/.idea/libraries/HSQLDB.xml
+++ /dev/null
@@ -1,9 +0,0 @@
-<component name="libraryTable">
- <library name="HSQLDB">
- <CLASSES>
- <root url="jar://$PROJECT_DIR$/solr/example/example-DIH/solr/db/lib/hsqldb-2.4.0.jar!/" />
- </CLASSES>
- <JAVADOC />
- <SOURCES />
- </library>
-</component>
\ No newline at end of file
diff --git a/dev-tools/idea/.idea/libraries/Solr_DIH_core_library.xml b/dev-tools/idea/.idea/libraries/Solr_DIH_core_library.xml
deleted file mode 100644
index d363b92..0000000
--- a/dev-tools/idea/.idea/libraries/Solr_DIH_core_library.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-<component name="libraryTable">
- <library name="Solr DIH core library">
- <CLASSES>
- <root url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler/lib" />
- </CLASSES>
- <JAVADOC />
- <SOURCES />
- <jarDirectory url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler/lib" recursive="false" />
- </library>
-</component>
diff --git a/dev-tools/idea/.idea/libraries/Solr_DIH_extras_library.xml b/dev-tools/idea/.idea/libraries/Solr_DIH_extras_library.xml
deleted file mode 100644
index 1bfc63b..0000000
--- a/dev-tools/idea/.idea/libraries/Solr_DIH_extras_library.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-<component name="libraryTable">
- <library name="Solr DIH extras library">
- <CLASSES>
- <root url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler-extras/lib" />
- </CLASSES>
- <JAVADOC />
- <SOURCES />
- <jarDirectory url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler-extras/lib" recursive="false" />
- </library>
-</component>
\ No newline at end of file
diff --git a/dev-tools/idea/.idea/libraries/Solr_DIH_test_library.xml b/dev-tools/idea/.idea/libraries/Solr_DIH_test_library.xml
deleted file mode 100644
index 304589c..0000000
--- a/dev-tools/idea/.idea/libraries/Solr_DIH_test_library.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-<component name="libraryTable">
- <library name="Solr DIH test library">
- <CLASSES>
- <root url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler/test-lib" />
- </CLASSES>
- <JAVADOC />
- <SOURCES />
- <jarDirectory url="file://$PROJECT_DIR$/solr/contrib/dataimporthandler/test-lib" recursive="false" />
- </library>
-</component>
\ No newline at end of file
diff --git a/dev-tools/idea/.idea/modules.xml b/dev-tools/idea/.idea/modules.xml
index e87ff94..53f2bda 100644
--- a/dev-tools/idea/.idea/modules.xml
+++ b/dev-tools/idea/.idea/modules.xml
@@ -53,8 +53,6 @@
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/analytics/analytics.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />
- <module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/dataimporthandler-extras/dataimporthandler-extras.iml" />
- <module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/dataimporthandler/dataimporthandler.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/extraction/extraction.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/langid/langid.iml" />
<module group="Solr/Contrib" filepath="$PROJECT_DIR$/solr/contrib/ltr/ltr.iml" />
diff --git a/dev-tools/idea/.idea/workspace.xml b/dev-tools/idea/.idea/workspace.xml
index 8503297..49ddb4f 100644
--- a/dev-tools/idea/.idea/workspace.xml
+++ b/dev-tools/idea/.idea/workspace.xml
@@ -284,22 +284,6 @@
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
</configuration>
- <configuration default="false" name="Solr dataimporthandler contrib" type="JUnit" factoryName="JUnit">
- <module name="dataimporthandler" />
- <option name="TEST_OBJECT" value="pattern" />
- <option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/solr/contrib/solr-dataimporthandler" />
- <option name="VM_PARAMETERS" value="-ea -DtempDir=temp -Djetty.testMode=1 -Djetty.insecurerandom=1 -Dsolr.directoryFactory=org.apache.solr.core.MockDirectoryFactory" />
- <option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
- <patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
- </configuration>
- <configuration default="false" name="Solr dataimporthandler-extras contrib" type="JUnit" factoryName="JUnit">
- <module name="dataimporthandler-extras" />
- <option name="TEST_OBJECT" value="pattern" />
- <option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/solr/contrib/solr-dataimporthandler-extras" />
- <option name="VM_PARAMETERS" value="-ea -DtempDir=temp -Djetty.testMode=1 -Djetty.insecurerandom=1 -Dsolr.directoryFactory=org.apache.solr.core.MockDirectoryFactory" />
- <option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
- <patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
- </configuration>
<configuration default="false" name="Solr extraction contrib" type="JUnit" factoryName="JUnit">
<module name="extraction" />
<option name="TEST_OBJECT" value="pattern" />
@@ -341,7 +325,7 @@
<patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
</configuration>
- <list size="42">
+ <list size="39">
<item index="0" class="java.lang.String" itemvalue="JUnit.Lucene core" />
<item index="1" class="java.lang.String" itemvalue="JUnit.Module analyzers-common" />
<item index="2" class="java.lang.String" itemvalue="JUnit.Module analyzers-icu" />
@@ -376,13 +360,11 @@
<item index="32" class="java.lang.String" itemvalue="JUnit.Solr analysis-extras contrib" />
<item index="33" class="java.lang.String" itemvalue="JUnit.Solr analytics contrib" />
<item index="34" class="java.lang.String" itemvalue="JUnit.Solr clustering contrib" />
- <item index="35" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler contrib" />
- <item index="36" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler-extras contrib" />
- <item index="37" class="java.lang.String" itemvalue="JUnit.Solr extraction contrib" />
- <item index="38" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
- <item index="39" class="java.lang.String" itemvalue="JUnit.Solr ltr contrib" />
- <item index="40" class="java.lang.String" itemvalue="JUnit.Solr prometheus-exporter contrib" />
- <item index="42" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
+ <item index="35" class="java.lang.String" itemvalue="JUnit.Solr extraction contrib" />
+ <item index="36" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
+ <item index="37" class="java.lang.String" itemvalue="JUnit.Solr ltr contrib" />
+ <item index="38" class="java.lang.String" itemvalue="JUnit.Solr prometheus-exporter contrib" />
+ <item index="39" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
</list>
</component>
</project>
diff --git a/dev-tools/idea/solr/contrib/dataimporthandler-extras/dataimporthandler-extras.iml b/dev-tools/idea/solr/contrib/dataimporthandler-extras/dataimporthandler-extras.iml
deleted file mode 100644
index 8bc21aa..0000000
--- a/dev-tools/idea/solr/contrib/dataimporthandler-extras/dataimporthandler-extras.iml
+++ /dev/null
@@ -1,29 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="JAVA_MODULE" version="4">
- <component name="NewModuleRootManager" inherit-compiler-output="false">
- <output url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-dataimporthandler-extras/classes/java" />
- <output-test url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-dataimporthandler-extras/classes/test" />
- <exclude-output />
- <content url="file://$MODULE_DIR$">
- <sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
- <sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
- <sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
- <sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
- </content>
- <orderEntry type="inheritedJdk" />
- <orderEntry type="sourceFolder" forTests="false" />
- <orderEntry type="library" scope="TEST" name="JUnit" level="project" />
- <orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
- <orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
- <orderEntry type="module" scope="TEST" module-name="lucene-core" />
- <orderEntry type="library" name="Solr core library" level="project" />
- <orderEntry type="library" name="Solrj library" level="project" />
- <orderEntry type="library" name="Solr DIH extras library" level="project" />
- <orderEntry type="library" name="Solr extraction library" level="project" />
- <orderEntry type="module" module-name="solr-core" />
- <orderEntry type="module" module-name="solrj" />
- <orderEntry type="module" module-name="dataimporthandler" />
- <orderEntry type="module" module-name="analysis-common" />
- </component>
-</module>
-
diff --git a/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml b/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml
deleted file mode 100644
index 8240ff2..0000000
--- a/dev-tools/idea/solr/contrib/dataimporthandler/dataimporthandler.iml
+++ /dev/null
@@ -1,31 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="JAVA_MODULE" version="4">
- <component name="NewModuleRootManager" inherit-compiler-output="false">
- <output url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-dataimporthandler/classes/java" />
- <output-test url="file://$MODULE_DIR$/../../../idea-build/solr/contrib/solr-dataimporthandler/classes/test" />
- <exclude-output />
- <content url="file://$MODULE_DIR$">
- <sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
- <sourceFolder url="file://$MODULE_DIR$/src/webapp" isTestSource="false" />
- <sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
- <sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
- </content>
- <orderEntry type="inheritedJdk" />
- <orderEntry type="sourceFolder" forTests="false" />
- <orderEntry type="library" scope="TEST" name="JUnit" level="project" />
- <orderEntry type="library" scope="TEST" name="HSQLDB" level="project" />
- <orderEntry type="library" scope="TEST" name="Derby" level="project" />
- <orderEntry type="library" scope="TEST" name="Solr DIH test library" level="project" />
- <orderEntry type="library" name="Solr example library" level="project" />
- <orderEntry type="library" name="Solr core library" level="project" />
- <orderEntry type="library" name="Solrj library" level="project" />
- <orderEntry type="library" name="Solr DIH core library" level="project" />
- <orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
- <orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
- <orderEntry type="module" module-name="solr-core" />
- <orderEntry type="module" module-name="solrj" />
- <orderEntry type="module" module-name="analysis-common" />
- <orderEntry type="module" module-name="lucene-core" />
- <orderEntry type="module" scope="TEST" module-name="join" />
- </component>
-</module>
diff --git a/dev-tools/maven/solr/contrib/pom.xml.template b/dev-tools/maven/solr/contrib/pom.xml.template
new file mode 100644
index 0000000..0b1f83c
--- /dev/null
+++ b/dev-tools/maven/solr/contrib/pom.xml.template
@@ -0,0 +1,55 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-parent</artifactId>
+ <version>@version@</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-contrib-aggregator</artifactId>
+ <name>Apache Solr Contrib aggregator POM</name>
+ <packaging>pom</packaging>
+ <modules>
+ <module>analysis-extras</module>
+ <module>analytics</module>
+ <module>clustering</module>
+ <module>extraction</module>
+ <module>jaegertracer-configurator</module>
+ <module>langid</module>
+ <module>ltr</module>
+ <module>prometheus-exporter</module>
+ <module>velocity</module>
+ </modules>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-deploy-plugin</artifactId>
+ <configuration>
+ <skip>true</skip>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/dev-tools/scripts/SOLR-2452.patch.hack.pl b/dev-tools/scripts/SOLR-2452.patch.hack.pl
index 2f6c7fc..244242c 100755
--- a/dev-tools/scripts/SOLR-2452.patch.hack.pl
+++ b/dev-tools/scripts/SOLR-2452.patch.hack.pl
@@ -48,33 +48,6 @@ my @moves = (
'solr/contrib/clustering/src/main/java'
=> 'solr/contrib/clustering/src/java',
- 'solr/contrib/dataimporthandler/src/test/java'
- => 'solr/contrib/dataimporthandler/src/test',
-
- 'solr/contrib/dataimporthandler/src/test/resources/solr-dih'
- => 'solr/contrib/dataimporthandler/src/test-files/dih/solr',
-
- 'solr/contrib/dataimporthandler/src/test/resources'
- => 'solr/contrib/dataimporthandler/src/test-files/dih',
-
- 'solr/contrib/dataimporthandler/src/main/java'
- => 'solr/contrib/dataimporthandler/src/java',
-
- 'solr/contrib/dataimporthandler/src/main/webapp'
- => 'solr/contrib/dataimporthandler/src/webapp',
-
- 'solr/contrib/dataimporthandler/src/extras/test/java'
- => 'solr/contrib/dataimporthandler-extras/src/test',
-
- 'solr/contrib/dataimporthandler/src/extras/test/resources/solr-dihextras'
- => 'solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr',
-
- 'solr/contrib/dataimporthandler/src/extras/test/resources'
- => 'solr/contrib/dataimporthandler-extras/src/test-files/dihextras',
-
- 'solr/contrib/dataimporthandler/src/extras/main/java'
- => 'solr/contrib/dataimporthandler-extras/src/java',
-
'solr/contrib/extraction/src/test/java'
=> 'solr/contrib/extraction/src/test',
diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py
index 768474b..e2d336d 100755
--- a/dev-tools/scripts/smokeTestRelease.py
+++ b/dev-tools/scripts/smokeTestRelease.py
@@ -225,8 +225,7 @@ def checkAllJARs(topDir, project, gitRevision, version, tmpDir, baseURL):
for file in files:
if file.lower().endswith('.jar'):
if project == 'solr':
- if ((normRoot.endswith('/contrib/dataimporthandler-extras/lib') and (file.startswith('javax.mail-') or file.startswith('activation-')))
- or (normRoot.endswith('/test-framework/lib') and file.startswith('jersey-'))
+ if ((normRoot.endswith('/test-framework/lib') and file.startswith('jersey-'))
or (normRoot.endswith('/contrib/extraction/lib') and file.startswith('xml-apis-'))):
print(' **WARNING**: skipping check of %s/%s: it has javax.* classes' % (root, file))
continue
diff --git a/gradle/ant-compat/resolve.gradle b/gradle/ant-compat/resolve.gradle
index ee18aa8..53e4dc5 100644
--- a/gradle/ant-compat/resolve.gradle
+++ b/gradle/ant-compat/resolve.gradle
@@ -164,10 +164,6 @@ configure(project(":solr:example")) {
into "exampledocs/"
})
- from(configurations.dih, {
- into "example-DIH/solr/db/lib"
- })
-
into projectDir
}
}
@@ -224,4 +220,4 @@ configure(project(":solr:solrj")) {
into "lib"
}
-}
\ No newline at end of file
+}
diff --git a/gradle/ant-compat/test-classes-cross-deps.gradle b/gradle/ant-compat/test-classes-cross-deps.gradle
index 1c32dba..d0985eb 100644
--- a/gradle/ant-compat/test-classes-cross-deps.gradle
+++ b/gradle/ant-compat/test-classes-cross-deps.gradle
@@ -20,8 +20,7 @@
configure([project(":lucene:spatial3d"),
project(":lucene:analysis:common"),
project(":lucene:backward-codecs"),
- project(":lucene:queryparser"),
- project(":solr:contrib:dataimporthandler")]) {
+ project(":lucene:queryparser")]) {
plugins.withType(JavaPlugin) {
configurations {
testClassesExported
@@ -56,15 +55,6 @@ configure(project(":solr:contrib:analysis-extras")) {
plugins.withType(JavaPlugin) {
dependencies {
testImplementation project(path: ':lucene:analysis:common', configuration: 'testClassesExported')
- testImplementation project(path: ':solr:contrib:dataimporthandler', configuration: 'testClassesExported')
- }
- }
-}
-
-configure(project(":solr:contrib:dataimporthandler-extras")) {
- plugins.withType(JavaPlugin) {
- dependencies {
- testImplementation project(path: ':solr:contrib:dataimporthandler', configuration: 'testClassesExported')
}
}
}
diff --git a/gradle/maven/defaults-maven.gradle b/gradle/maven/defaults-maven.gradle
index 6c4b458..570d011 100644
--- a/gradle/maven/defaults-maven.gradle
+++ b/gradle/maven/defaults-maven.gradle
@@ -60,8 +60,6 @@ configure(rootProject) {
":solr:core",
":solr:solrj",
":solr:contrib:analysis-extras",
- ":solr:contrib:dataimporthandler",
- ":solr:contrib:dataimporthandler-extras",
":solr:contrib:analytics",
":solr:contrib:clustering",
":solr:contrib:extraction",
diff --git a/gradle/testing/policies/solr-tests.policy b/gradle/testing/policies/solr-tests.policy
index 1290a38..35b3e84 100644
--- a/gradle/testing/policies/solr-tests.policy
+++ b/gradle/testing/policies/solr-tests.policy
@@ -108,7 +108,7 @@ grant {
// needed by hadoop htrace
permission java.net.NetPermission "getNetworkInformation";
- // needed by DIH
+ // needed by DIH - possibly even after DIH is a package
permission java.sql.SQLPermission "deregisterDriver";
permission java.util.logging.LoggingPermission "control";
@@ -214,4 +214,4 @@ grant {
permission java.io.FilePermission "${gradle.worker.jar}", "read";
// Allow reading from classpath JARs (resources).
permission java.io.FilePermission "${gradle.user.home}${/}-", "read";
-};
\ No newline at end of file
+};
diff --git a/gradle/validation/owasp-dependency-check/exclusions.xml b/gradle/validation/owasp-dependency-check/exclusions.xml
index d6de0e4..0a77b99 100644
--- a/gradle/validation/owasp-dependency-check/exclusions.xml
+++ b/gradle/validation/owasp-dependency-check/exclusions.xml
@@ -48,30 +48,6 @@
</suppress>
<suppress>
<notes><![CDATA[
- file name: derby-10.9.1.0.jar
- Only used in tests and dih-example
- ]]></notes>
- <packageUrl regex="true">^pkg:maven/org\.apache\.derby/derby@.*$</packageUrl>
- <cpe>cpe:/a:apache:derby</cpe>
- </suppress>
- <suppress>
- <notes><![CDATA[
- file name: derby-10.9.1.0.jar
- Only used in tests and dih-example
- ]]></notes>
- <packageUrl regex="true">^pkg:maven/org\.apache\.derby/derby@.*$</packageUrl>
- <vulnerabilityName>CVE-2015-1832</vulnerabilityName>
- </suppress>
- <suppress>
- <notes><![CDATA[
- file name: derby-10.9.1.0.jar
- Only used in tests and dih-example
- ]]></notes>
- <packageUrl regex="true">^pkg:maven/org\.apache\.derby/derby@.*$</packageUrl>
- <vulnerabilityName>CVE-2018-1313</vulnerabilityName>
- </suppress>
- <suppress>
- <notes><![CDATA[
file name: carrot2-guava-18.0.jar
Only used with clustering engine, and the risk is DOS attack
]]></notes>
diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties
new file mode 100644
index 0000000..9057499
--- /dev/null
+++ b/lucene/ivy-versions.properties
@@ -0,0 +1,327 @@
+# The /org/name keys in this file must be kept lexically sorted.
+# Blank lines, comment lines, and keys that aren't in /org/name format are ignored
+# when the lexical sort check is performed by the ant check-lib-versions target.
+
+/com.adobe.xmp/xmpcore = 5.1.3
+
+com.carrotsearch.randomizedtesting.version = 2.7.6
+/com.carrotsearch.randomizedtesting/junit4-ant = ${com.carrotsearch.randomizedtesting.version}
+/com.carrotsearch.randomizedtesting/randomizedtesting-runner = ${com.carrotsearch.randomizedtesting.version}
+
+/com.carrotsearch.thirdparty/simple-xml-safe = 2.7.1
+
+/com.carrotsearch/hppc = 0.8.2
+
+/com.cybozu.labs/langdetect = 1.1-20120112
+/com.drewnoakes/metadata-extractor = 2.11.0
+
+/com.epam/parso = 2.0.11
+
+com.fasterxml.jackson.core.version = 2.10.1
+/com.fasterxml.jackson.core/jackson-annotations = ${com.fasterxml.jackson.core.version}
+/com.fasterxml.jackson.core/jackson-core = ${com.fasterxml.jackson.core.version}
+/com.fasterxml.jackson.core/jackson-databind = ${com.fasterxml.jackson.core.version}
+/com.fasterxml.jackson.dataformat/jackson-dataformat-smile = ${com.fasterxml.jackson.core.version}
+
+/com.github.ben-manes.caffeine/caffeine = 2.8.4
+/com.github.virtuald/curvesapi = 1.06
+
+/com.github.zafarkhaja/java-semver = 0.9.0
+
+/com.google.guava/guava = 25.1-jre
+/com.google.protobuf/protobuf-java = 3.11.0
+/com.google.re2j/re2j = 1.2
+/com.googlecode.juniversalchardet/juniversalchardet = 1.0.3
+/com.googlecode.mp4parser/isoparser = 1.1.22
+/com.healthmarketscience.jackcess/jackcess = 3.0.1
+/com.healthmarketscience.jackcess/jackcess-encrypt = 3.0.0
+/com.ibm.icu/icu4j = 62.2
+/com.jayway.jsonpath/json-path = 2.4.0
+/com.lmax/disruptor = 3.4.2
+/com.pff/java-libpst = 0.8.1
+
+com.rometools.version = 1.12.2
+/com.rometools/rome = ${com.rometools.version}
+/com.rometools/rome-utils = ${com.rometools.version}
+
+com.sun.jersey.version = 1.19
+/com.sun.jersey/jersey-servlet = ${com.sun.jersey.version}
+
+/com.tdunning/t-digest = 3.1
+/com.vaadin.external.google/android-json = 0.0.20131108.vaadin1
+/commons-cli/commons-cli = 1.4
+/commons-codec/commons-codec = 1.13
+/commons-collections/commons-collections = 3.2.2
+/commons-io/commons-io = 2.6
+# necessary to run test or embedded Zookeeper as of 3.6.1
+commons.lang.version = 2.6
+/commons-lang/commons-lang = ${commons.lang.version}
+/commons-logging/commons-logging = 1.1.3
+/de.l3s.boilerpipe/boilerpipe = 1.1.0
+
+io.dropwizard.metrics.version = 4.1.5
+/io.dropwizard.metrics/metrics-core = ${io.dropwizard.metrics.version}
+/io.dropwizard.metrics/metrics-graphite = ${io.dropwizard.metrics.version}
+/io.dropwizard.metrics/metrics-jetty9 = ${io.dropwizard.metrics.version}
+/io.dropwizard.metrics/metrics-jmx = ${io.dropwizard.metrics.version}
+/io.dropwizard.metrics/metrics-jvm = ${io.dropwizard.metrics.version}
+
+io.jaegertracing.version = 1.1.0
+/io.jaegertracing/jaeger-core = ${io.jaegertracing.version}
+/io.jaegertracing/jaeger-thrift = ${io.jaegertracing.version}
+
+io.netty.netty.version = 4.1.50.Final
+/io.netty/netty-buffer = ${io.netty.netty.version}
+/io.netty/netty-codec = ${io.netty.netty.version}
+/io.netty/netty-common = ${io.netty.netty.version}
+/io.netty/netty-handler = ${io.netty.netty.version}
+/io.netty/netty-resolver = ${io.netty.netty.version}
+/io.netty/netty-transport = ${io.netty.netty.version}
+/io.netty/netty-transport-native-epoll = ${io.netty.netty.version}
+/io.netty/netty-transport-native-unix-common = ${io.netty.netty.version}
+
+io.opentracing.version = 0.33.0
+/io.opentracing/opentracing-api = ${io.opentracing.version}
+/io.opentracing/opentracing-mock = ${io.opentracing.version}
+/io.opentracing/opentracing-noop = ${io.opentracing.version}
+/io.opentracing/opentracing-util = ${io.opentracing.version}
+
+io.prometheus.version = 0.2.0
+/io.prometheus/simpleclient = ${io.prometheus.version}
+/io.prometheus/simpleclient_common = ${io.prometheus.version}
+/io.prometheus/simpleclient_httpserver = ${io.prometheus.version}
+
+/io.sgr/s2-geometry-library-java = 1.0.0
+
+/javax.servlet/javax.servlet-api = 3.1.0
+/junit/junit = 4.12
+
+/mecab/mecab-ipadic = 2.7.0-20070801
+/mecab/mecab-ko-dic = 2.0.3-20170922
+/mecab/mecab-naist-jdic = 0.6.3b-20111013
+/net.arnx/jsonic = 1.2.7
+/net.bytebuddy/byte-buddy = 1.9.3
+/net.hydromatic/eigenbase-properties = 1.1.5
+
+net.sourceforge.argparse4j.version = 0.8.1
+/net.sourceforge.argparse4j/argparse4j = ${net.sourceforge.argparse4j.version}
+
+/net.sourceforge.nekohtml/nekohtml = 1.9.17
+
+net.thisptr.version = 0.0.8
+/net.thisptr/jackson-jq = ${net.thisptr.version}
+
+/org.antlr/antlr4-runtime = 4.5.1-1
+
+/org.apache.ant/ant = 1.8.2
+
+org.apache.calcite.avatica.version = 1.13.0
+/org.apache.calcite.avatica/avatica-core = ${org.apache.calcite.avatica.version}
+
+org.apache.calcite.version = 1.18.0
+/org.apache.calcite/calcite-core = ${org.apache.calcite.version}
+/org.apache.calcite/calcite-linq4j = ${org.apache.calcite.version}
+
+org.apache.commons.commons-collections4-rev = 4.4
+/org.apache.commons/commons-collections4 = ${org.apache.commons.commons-collections4-rev}
+/org.apache.commons/commons-compress = 1.19
+/org.apache.commons/commons-configuration2 = 2.1.1
+/org.apache.commons/commons-csv = 1.7
+/org.apache.commons/commons-exec = 1.3
+/org.apache.commons/commons-lang3 = 3.9
+/org.apache.commons/commons-math3 = 3.6.1
+/org.apache.commons/commons-text = 1.6
+
+org.apache.curator.version = 2.13.0
+/org.apache.curator/curator-client = ${org.apache.curator.version}
+/org.apache.curator/curator-framework = ${org.apache.curator.version}
+/org.apache.curator/curator-recipes = ${org.apache.curator.version}
+
+org.apache.hadoop.version = 3.2.0
+/org.apache.hadoop/hadoop-annotations = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-auth = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-common = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-hdfs = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-hdfs-client = ${org.apache.hadoop.version}
+/org.apache.hadoop/hadoop-minikdc = ${org.apache.hadoop.version}
+
+/org.apache.htrace/htrace-core4 = 4.1.0-incubating
+
+# The httpcore version is often different from the httpclient and httpmime versions,
+# so the httpcore version value should not share the same symbolic name with them.
+/org.apache.httpcomponents/httpclient = 4.5.10
+/org.apache.httpcomponents/httpcore = 4.4.12
+/org.apache.httpcomponents/httpmime = 4.5.10
+
+/org.apache.ivy/ivy = 2.4.0
+
+org.apache.james.apache.mime4j.version = 0.8.3
+/org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version}
+/org.apache.james/apache-mime4j-dom = ${org.apache.james.apache.mime4j.version}
+
+org.apache.kerby.version = 1.0.1
+/org.apache.kerby/kerb-admin = ${org.apache.kerby.version}
+/org.apache.kerby/kerb-client = ${org.apache.kerby.version}
+/org.apache.kerby/kerb-common = ${org.apache.kerby.version}
+/org.apache.kerby/kerb-core = ${org.apache.kerby.version}
+/org.apache.kerby/kerb-crypto = ${org.apache.kerby.version}
+/org.apache.kerby/kerb-identity= ${org.apache.kerby.version}
+/org.apache.kerby/kerb-server = ${org.apache.kerby.version}
+/org.apache.kerby/kerb-simplekdc = ${org.apache.kerby.version}
+/org.apache.kerby/kerb-util = ${org.apache.kerby.version}
+
+/org.apache.kerby/kerby-asn1 = ${org.apache.kerby.version}
+/org.apache.kerby/kerby-config = ${org.apache.kerby.version}
+/org.apache.kerby/kerby-kdc = ${org.apache.kerby.version}
+/org.apache.kerby/kerby-pkix = ${org.apache.kerby.version}
+/org.apache.kerby/kerby-util = ${org.apache.kerby.version}
+
+org.apache.logging.log4j.version = 2.13.2
+/org.apache.logging.log4j/log4j-1.2-api = ${org.apache.logging.log4j.version}
+/org.apache.logging.log4j/log4j-api = ${org.apache.logging.log4j.version}
+/org.apache.logging.log4j/log4j-core = ${org.apache.logging.log4j.version}
+/org.apache.logging.log4j/log4j-slf4j-impl = ${org.apache.logging.log4j.version}
+/org.apache.logging.log4j/log4j-web = ${org.apache.logging.log4j.version}
+
+/org.apache.opennlp/opennlp-tools = 1.9.1
+
+org.apache.pdfbox.version = 2.0.17
+/org.apache.pdfbox/fontbox = ${org.apache.pdfbox.version}
+/org.apache.pdfbox/jempbox = 1.8.16
+/org.apache.pdfbox/pdfbox = ${org.apache.pdfbox.version}
+/org.apache.pdfbox/pdfbox-tools = ${org.apache.pdfbox.version}
+
+org.apache.poi.version = 4.1.1
+/org.apache.poi/poi = ${org.apache.poi.version}
+/org.apache.poi/poi-ooxml = ${org.apache.poi.version}
+/org.apache.poi/poi-ooxml-schemas = ${org.apache.poi.version}
+/org.apache.poi/poi-scratchpad = ${org.apache.poi.version}
+
+org.apache.thrift.version = 0.13.0
+/org.apache.thrift/libthrift = ${org.apache.thrift.version}
+
+org.apache.tika.version = 1.24
+/org.apache.tika/tika-core = ${org.apache.tika.version}
+/org.apache.tika/tika-java7 = ${org.apache.tika.version}
+/org.apache.tika/tika-parsers = ${org.apache.tika.version}
+/org.apache.tika/tika-xmp = ${org.apache.tika.version}
+
+org.apache.velocity.tools.version = 3.0
+/org.apache.velocity.tools/velocity-tools-generic = ${org.apache.velocity.tools.version}
+/org.apache.velocity.tools/velocity-tools-view = ${org.apache.velocity.tools.version}
+/org.apache.velocity.tools/velocity-tools-view-jsp = ${org.apache.velocity.tools.version}
+
+/org.apache.velocity/velocity-engine-core = 2.0
+
+/org.apache.xmlbeans/xmlbeans = 3.1.0
+
+org.apache.zookeeper.version = 3.6.1
+/org.apache.zookeeper/zookeeper = ${org.apache.zookeeper.version}
+/org.apache.zookeeper/zookeeper-jute = ${org.apache.zookeeper.version}
+
+# v1.6.2 of asciidoctor-ant includes asciidoctorj 1.6.2, which uses
+# asciidoctor 1.5.8, and asciidoctorj-pdf 1.5.0-alpha.16, which is the same
+# as asciidoctor-pdf 1.5.0-alpha.16
+/org.asciidoctor/asciidoctor-ant = 1.6.2
+
+/org.aspectj/aspectjrt = 1.8.0
+
+/org.bitbucket.b_c/jose4j = 0.6.5
+
+org.bouncycastle.version = 1.64
+/org.bouncycastle/bcmail-jdk15on = ${org.bouncycastle.version}
+/org.bouncycastle/bcpkix-jdk15on = ${org.bouncycastle.version}
+/org.bouncycastle/bcprov-jdk15on = ${org.bouncycastle.version}
+
+/org.brotli/dec = 0.1.2
+
+/org.carrot2.attributes/attributes-binder = 1.3.3
+/org.carrot2.shaded/carrot2-guava = 18.0
+
+/org.carrot2/carrot2-mini = 3.16.2
+
+org.carrot2.morfologik.version = 2.1.5
+/org.carrot2/morfologik-fsa = ${org.carrot2.morfologik.version}
+/org.carrot2/morfologik-polish = ${org.carrot2.morfologik.version}
+/org.carrot2/morfologik-stemming = ${org.carrot2.morfologik.version}
+
+/org.ccil.cowan.tagsoup/tagsoup = 1.2.1
+
+org.codehaus.janino.version = 3.0.9
+/org.codehaus.janino/commons-compiler = ${org.codehaus.janino.version}
+/org.codehaus.janino/janino = ${org.codehaus.janino.version}
+
+/org.codehaus.woodstox/stax2-api = 3.1.4
+/org.codehaus.woodstox/woodstox-core-asl = 4.4.1
+
+org.eclipse.jetty.version = 9.4.27.v20200227
+/org.eclipse.jetty.http2/http2-client = ${org.eclipse.jetty.version}
+/org.eclipse.jetty.http2/http2-common = ${org.eclipse.jetty.version}
+/org.eclipse.jetty.http2/http2-hpack = ${org.eclipse.jetty.version}
+/org.eclipse.jetty.http2/http2-http-client-transport = ${org.eclipse.jetty.version}
+/org.eclipse.jetty.http2/http2-server = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-alpn-client = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-alpn-java-client = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-alpn-java-server = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-alpn-server = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-client = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-continuation = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-deploy = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-http = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-io = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-jmx = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-rewrite = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-security = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-server = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-servlet = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-servlets = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-start = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-util = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-webapp = ${org.eclipse.jetty.version}
+/org.eclipse.jetty/jetty-xml = ${org.eclipse.jetty.version}
+
+org.gagravarr.vorbis.java.version = 0.8
+/org.gagravarr/vorbis-java-core = ${org.gagravarr.vorbis.java.version}
+/org.gagravarr/vorbis-java-tika = ${org.gagravarr.vorbis.java.version}
+
+/org.hamcrest/hamcrest = 2.2
+
+/org.jdom/jdom2 = 2.0.6
+
+/org.jsoup/jsoup = 1.12.1
+
+/org.locationtech.jts/jts-core = 1.15.0
+/org.locationtech.spatial4j/spatial4j = 0.7
+
+/org.mockito/mockito-core = 2.23.4
+
+/org.objenesis/objenesis = 2.6
+
+org.ow2.asm.version = 7.2
+/org.ow2.asm/asm = ${org.ow2.asm.version}
+/org.ow2.asm/asm-commons = ${org.ow2.asm.version}
+
+org.restlet.jee.version = 2.4.3
+/org.restlet.jee/org.restlet = ${org.restlet.jee.version}
+/org.restlet.jee/org.restlet.ext.servlet = ${org.restlet.jee.version}
+
+/org.rrd4j/rrd4j = 3.5
+
+org.slf4j.version = 1.7.24
+/org.slf4j/jcl-over-slf4j = ${org.slf4j.version}
+/org.slf4j/jul-to-slf4j = ${org.slf4j.version}
+/org.slf4j/slf4j-api = ${org.slf4j.version}
+/org.slf4j/slf4j-simple = ${org.slf4j.version}
+
+/org.tallison/jmatio = 1.5
+/org.tukaani/xz = 1.8
+
+# required for instantiating a Zookeeper server in tests or embedded
+org.xerial.snappy.version = 1.1.7.6
+/org.xerial.snappy/snappy-java = ${org.xerial.snappy.version}
+
+
+ua.net.nlp.morfologik-ukrainian-search.version = 4.9.1
+/ua.net.nlp/morfologik-ukrainian-search = ${ua.net.nlp.morfologik-ukrainian-search.version}
+
+/xerces/xercesImpl = 2.12.0
diff --git a/settings.gradle b/settings.gradle
index fb85047..fdf46af 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -53,8 +53,6 @@ include "solr:solrj"
include "solr:core"
include "solr:server"
include "solr:contrib:analysis-extras"
-include "solr:contrib:dataimporthandler"
-include "solr:contrib:dataimporthandler-extras"
include "solr:contrib:analytics"
include "solr:contrib:clustering"
include "solr:contrib:extraction"
diff --git a/solr/.gitignore b/solr/.gitignore
index 421dbcb..a0d8aa8 100644
--- a/solr/.gitignore
+++ b/solr/.gitignore
@@ -2,8 +2,6 @@
/bin/*.pid
-/contrib/dataimporthandler/test-lib/
-
/core/test-lib/
/example/start.jar
@@ -15,9 +13,6 @@
/example/solr/zoo_data
/example/work/*
/example/exampledocs/post.jar
-/example/example-DIH/**/data
-/example/example-DIH/**/dataimport.properties
-/example/example-DIH/solr/mail/lib/*.jar
/package
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 2983a23..d2bc274 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -118,6 +118,9 @@ Other Changes
* LUCENE-9433: Remove Ant support from trunk (Erick Erickson, Uwe Schindler et.al.)
+* SOLR-14783: Remove Data Import Handler (DIH), previously deprecated (Alexandre Rafalovitch)
+
+
Bug Fixes
---------------------
* SOLR-14546: Fix for a relatively hard to hit issue in OverseerTaskProcessor that could lead to out of order execution
diff --git a/solr/README.md b/solr/README.md
index 176e007..fd775e1 100644
--- a/solr/README.md
+++ b/solr/README.md
@@ -90,15 +90,14 @@ Solr includes a few examples to help you get started. To run a specific example,
bin/solr -e <EXAMPLE> where <EXAMPLE> is one of:
cloud : SolrCloud example
- dih : Data Import Handler (rdbms, mail, atom, tika)
schemaless : Schema-less example (schema is inferred from data during indexing)
techproducts : Kitchen sink example providing comprehensive examples of Solr features
```
-For instance, if you want to run the Solr Data Import Handler example, do:
+For instance, if you want to run the SolrCloud example, do:
```
- bin/solr -e dih
+ bin/solr -e cloud
```
Indexing Documents
@@ -142,8 +141,7 @@ server/
example/
Contains example documents and an alternative Solr home
- directory containing examples of how to use the Data Import Handler,
- see example/example-DIH/README.md for more information.
+ directory containing various examples.
dist/solr-<component>-XX.jar
The Apache Solr libraries. To compile Apache Solr Plugins,
diff --git a/solr/bin/solr b/solr/bin/solr
index f6062a5..6ef2a29 100755
--- a/solr/bin/solr
+++ b/solr/bin/solr
@@ -386,7 +386,6 @@ function print_usage() {
echo " -e <example> Name of the example to run; available examples:"
echo " cloud: SolrCloud example"
echo " techproducts: Comprehensive example illustrating many of Solr's core capabilities"
- echo " dih: Data Import Handler"
echo " schemaless: Schema-less example"
echo ""
echo " -a Additional parameters to pass to the JVM when starting Solr, such as to setup"
diff --git a/solr/bin/solr.cmd b/solr/bin/solr.cmd
index 53fafe2..b4e1740 100755
--- a/solr/bin/solr.cmd
+++ b/solr/bin/solr.cmd
@@ -360,7 +360,6 @@ goto done
@echo -e example Name of the example to run; available examples:
@echo cloud: SolrCloud example
@echo techproducts: Comprehensive example illustrating many of Solr's core capabilities
-@echo dih: Data Import Handler
@echo schemaless: Schema-less example
@echo.
@echo -a opts Additional parameters to pass to the JVM when starting Solr, such as to setup
diff --git a/solr/common-build.xml b/solr/common-build.xml
new file mode 100644
index 0000000..eb1fc52
--- /dev/null
+++ b/solr/common-build.xml
@@ -0,0 +1,547 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project name="common-solr" default="default" xmlns:rsel="antlib:org.apache.tools.ant.types.resources.selectors">
+ <description>
+ This file is designed for importing into a main build file, and not intended
+ for standalone use.
+ </description>
+
+ <dirname file="${ant.file.common-solr}" property="common-solr.dir"/>
+
+ <property name="Name" value="Solr" />
+
+ <!-- solr uses Java 11 -->
+ <property name="javac.release" value="11"/>
+ <property name="javac.args" value="-Xlint:-deprecation"/>
+ <property name="javac.profile.args" value=""/>
+
+ <property name="dest" location="${common-solr.dir}/build" />
+ <property name="build.dir" location="${dest}/${ant.project.name}"/>
+ <property name="jacoco.report.dir" location="${dest}/jacoco"/>
+ <property name="dist" location="${common-solr.dir}/dist"/>
+ <property name="package.dir" location="${common-solr.dir}/package"/>
+ <property name="maven.dist.dir" location="${package.dir}/maven"/>
+ <property name="lucene-libs" location="${dest}/lucene-libs" />
+ <property name="tests.userdir" location="src/test-files"/>
+ <property name="tests.policy" location="${common-solr.dir}/server/etc/security.policy"/>
+ <property name="server.dir" location="${common-solr.dir}/server" />
+ <property name="example" location="${common-solr.dir}/example" />
+ <property name="javadoc.dir" location="${dest}/docs"/>
+ <property name="javadoc-online.dir" location="${dest}/docs-online"/>
+ <property name="tests.cleanthreads.sysprop" value="perClass"/>
+
+ <property name="changes.target.dir" location="${dest}/docs/changes"/>
+ <property name="license.dir" location="${common-solr.dir}/licenses"/>
+
+ <property name="solr.tgz.unpack.dir" location="${common-solr.dir}/build/solr.tgz.unpacked"/>
+ <property name="dist.jar.dir.prefix" value="${solr.tgz.unpack.dir}/solr"/>
+ <property name="dist.jar.dir.suffix" value="dist"/>
+
+ <import file="${common-solr.dir}/../lucene/module-build.xml"/>
+
+ <property name="solr.tgz.file" location="${common-solr.dir}/package/solr-${version}.tgz"/>
+ <available file="${solr.tgz.file}" property="solr.tgz.exists"/>
+ <available type="dir" file="${solr.tgz.unpack.dir}" property="solr.tgz.unpack.dir.exists"/>
+ <target name="-ensure-solr-tgz-exists" unless="solr.tgz.exists">
+ <ant dir="${common-solr.dir}" target="create-package" inheritall="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ </target>
+ <target name="-unpack-solr-tgz" unless="${solr.tgz.unpack.dir.exists}">
+ <antcall target="-ensure-solr-tgz-exists">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </antcall>
+ <mkdir dir="${solr.tgz.unpack.dir}"/>
+ <untar compression="gzip" src="${solr.tgz.file}" dest="${solr.tgz.unpack.dir}">
+ <patternset refid="patternset.lucene.solr.jars"/>
+ </untar>
+ </target>
+
+ <!-- backwards compatibility with existing targets/tasks; TODO: remove this! -->
+ <property name="fullnamever" value="${final.name}"/>
+
+ <path id="additional.dependencies">
+ <fileset dir="${common-solr.dir}/core/lib" excludes="${common.classpath.excludes}"/>
+ <fileset dir="${common-solr.dir}/solrj/lib" excludes="${common.classpath.excludes}"/>
+ <fileset dir="${common-solr.dir}/server/lib" excludes="${common.classpath.excludes}"/>
+ <fileset dir="lib" excludes="${common.classpath.excludes}" erroronmissingdir="false"/>
+ </path>
+
+ <path id="solr.lucene.libs">
+ <!-- List of jars that will be used as the foundation for both
+ the base classpath, as well as copied into the lucene-libs dir
+ in the release.
+ -->
+ <!-- NOTE: lucene-core is explicitly not included because of the
+ base.classpath (compilation & tests are done directly against
+ the class files w/o needing to build the jar)
+ -->
+ <pathelement location="${analyzers-common.jar}"/>
+ <pathelement location="${analyzers-kuromoji.jar}"/>
+ <pathelement location="${analyzers-nori.jar}"/>
+ <pathelement location="${analyzers-phonetic.jar}"/>
+ <pathelement location="${codecs.jar}"/>
+ <pathelement location="${backward-codecs.jar}"/>
+ <pathelement location="${highlighter.jar}"/>
+ <pathelement location="${memory.jar}"/>
+ <pathelement location="${misc.jar}"/>
+ <pathelement location="${spatial-extras.jar}"/>
+ <pathelement location="${spatial3d.jar}"/>
+ <pathelement location="${expressions.jar}"/>
+ <pathelement location="${suggest.jar}"/>
+ <pathelement location="${grouping.jar}"/>
+ <pathelement location="${queries.jar}"/>
+ <pathelement location="${queryparser.jar}"/>
+ <pathelement location="${join.jar}"/>
+ <pathelement location="${sandbox.jar}"/>
+ <pathelement location="${classification.jar}"/>
+ </path>
+
+ <path id="solr.base.classpath">
+ <pathelement location="${common-solr.dir}/build/solr-solrj/classes/java"/>
+ <pathelement location="${common-solr.dir}/build/solr-core/classes/java"/>
+ <path refid="solr.lucene.libs" />
+ <path refid="additional.dependencies"/>
+ <path refid="base.classpath"/>
+ </path>
+
+ <path id="classpath" refid="solr.base.classpath"/>
+
+ <path id="solr.test.base.classpath">
+ <pathelement path="${common-solr.dir}/build/solr-test-framework/classes/java"/>
+ <fileset dir="${common-solr.dir}/test-framework/lib">
+ <include name="*.jar"/>
+ <exclude name="junit-*.jar" />
+ <exclude name="randomizedtesting-runner-*.jar" />
+ <exclude name="ant*.jar" />
+ </fileset>
+ <pathelement path="src/test-files"/>
+ <path refid="test.base.classpath"/>
+ </path>
+
+ <path id="test.classpath" refid="solr.test.base.classpath"/>
+
+ <macrodef name="solr-contrib-uptodate">
+ <attribute name="name"/>
+ <attribute name="property" default="@{name}.uptodate"/>
+ <attribute name="classpath.property" default="@{name}.jar"/>
+ <!-- set jarfile only, if the target jar file has no generic name -->
+ <attribute name="jarfile" default="${common-solr.dir}/build/contrib/solr-@{name}/solr-@{name}-${version}.jar"/>
+ <sequential>
+ <!--<echo message="Checking '@{jarfile}' against source folder '${common.dir}/contrib/@{name}/src/java'"/>-->
+ <property name="@{classpath.property}" location="@{jarfile}"/>
+ <uptodate property="@{property}" targetfile="@{jarfile}">
+ <srcfiles dir="${common-solr.dir}/contrib/@{name}/src/java" includes="**/*.java"/>
+ </uptodate>
+ </sequential>
+ </macrodef>
+
+ <target name="validate" depends="compile-tools">
+ </target>
+
+ <target name="init-dist" depends="resolve-groovy">
+ <mkdir dir="${build.dir}"/>
+ <mkdir dir="${package.dir}"/>
+ <mkdir dir="${dist}"/>
+ <mkdir dir="${maven.dist.dir}"/>
+ </target>
+
+ <target name="prep-lucene-jars"
+ depends="resolve-groovy,
+ jar-lucene-core, jar-backward-codecs, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-analyzers-nori, jar-codecs,jar-expressions, jar-suggest, jar-highlighter, jar-memory,
+ jar-misc, jar-spatial-extras, jar-spatial3d, jar-grouping, jar-queries, jar-queryparser, jar-join, jar-sandbox, jar-classification">
+ <property name="solr.deps.compiled" value="true"/>
+ </target>
+
+ <target name="lucene-jars-to-solr"
+ depends="-lucene-jars-to-solr-not-for-package,-lucene-jars-to-solr-package"/>
+
+ <target name="-lucene-jars-to-solr-not-for-package" unless="called.from.create-package">
+ <sequential>
+ <antcall target="prep-lucene-jars" inheritall="true"/>
+ <property name="solr.deps.compiled" value="true"/>
+ <copy todir="${lucene-libs}" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
+ <path refid="solr.lucene.libs" />
+ <!-- NOTE: lucene-core is not already included in "solr.lucene.libs" because of its use in classpaths. -->
+ <fileset file="${lucene-core.jar}" />
+ </copy>
+ </sequential>
+ </target>
+
+ <target name="-lucene-jars-to-solr-package" if="called.from.create-package">
+ <sequential>
+ <antcall target="-unpack-lucene-tgz" inheritall="true"/>
+ <pathconvert property="relative.solr.lucene.libs" pathsep=",">
+ <path refid="solr.lucene.libs"/>
+ <fileset file="${lucene-core.jar}"/>
+ <globmapper from="${common.build.dir}/*" to="*" handledirsep="true"/>
+ </pathconvert>
+ <mkdir dir="${lucene-libs}"/>
+ <copy todir="${lucene-libs}" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
+ <fileset dir="${lucene.tgz.unpack.dir}/lucene-${version}" includes="${relative.solr.lucene.libs}"/>
+ </copy>
+ </sequential>
+ </target>
+
+ <!-- Shared core/solrj/test-framework/contrib targets -->
+
+ <macrodef name="solr-jarify" description="Builds a Solr JAR file">
+ <attribute name="basedir" default="${build.dir}/classes/java"/>
+ <attribute name="destfile" default="${build.dir}/${final.name}.jar"/>
+ <attribute name="title" default="Apache Solr Search Server: ${ant.project.name}"/>
+ <attribute name="excludes" default="**/pom.xml,**/*.iml"/>
+ <attribute name="metainf.source.dir" default="${common-solr.dir}"/>
+ <attribute name="implementation.title" default="org.apache.solr"/>
+ <attribute name="manifest.file" default="${manifest.file}"/>
+ <element name="solr-jarify-filesets" optional="true"/>
+ <element name="solr-jarify-additional-manifest-attributes" optional="true"/>
+ <sequential>
+ <jarify basedir="@{basedir}" destfile="@{destfile}"
+ title="@{title}" excludes="@{excludes}"
+ metainf.source.dir="@{metainf.source.dir}"
+ implementation.title="@{implementation.title}"
+ manifest.file="@{manifest.file}">
+ <filesets>
+ <solr-jarify-filesets />
+ </filesets>
+ <jarify-additional-manifest-attributes>
+ <solr-jarify-additional-manifest-attributes />
+ </jarify-additional-manifest-attributes>
+ </jarify>
+ </sequential>
+ </macrodef>
+
+ <target name="jar-core" depends="compile-core">
+ <solr-jarify/>
+ </target>
+
+ <target name="compile-core" depends="prep-lucene-jars,resolve-example,resolve-server,common.compile-core"/>
+ <target name="compile-test" depends="compile-solr-test-framework,common.compile-test"/>
+
+ <target name="dist" depends="jar-core">
+ <copy file="${build.dir}/${fullnamever}.jar" todir="${dist}"/>
+ </target>
+
+ <property name="lucenedocs" location="${common.dir}/build/docs"/>
+
+ <!-- dependency to ensure all lucene javadocs are present -->
+ <target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-nori,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-backward-codecs,javadocs-codecs,javadocs-expressions,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial-extras,javado [...]
+
+ <!-- create javadocs for the current module -->
+ <target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs,javadocs-solr-core,check-javadocs-uptodate" unless="javadocs-uptodate-${name}">
+ <sequential>
+ <mkdir dir="${javadoc.dir}/${name}"/>
+ <solr-invoke-javadoc>
+ <solrsources>
+ <packageset dir="${src.dir}"/>
+ </solrsources>
+ <links>
+ <link href="../solr-solrj"/>
+ <link href="../solr-core"/>
+ </links>
+ </solr-invoke-javadoc>
+ <solr-jarify basedir="${javadoc.dir}/${name}" destfile="${build.dir}/${final.name}-javadoc.jar"/>
+ </sequential>
+ </target>
+
+ <target name="check-solr-core-javadocs-uptodate" unless="solr-core-javadocs.uptodate">
+ <uptodate property="solr-core-javadocs.uptodate" targetfile="${build.dir}/solr-core/solr-core-${version}-javadoc.jar">
+ <srcfiles dir="${common-solr.dir}/core/src/java" includes="**/*.java"/>
+ </uptodate>
+ </target>
+
+ <target name="check-solrj-javadocs-uptodate" unless="solrj-javadocs.uptodate">
+ <uptodate property="solrj-javadocs.uptodate" targetfile="${build.dir}/solr-solrj/solr-solrj-${version}-javadoc.jar">
+ <srcfiles dir="${common-solr.dir}/solrj/src/java" includes="**/*.java"/>
+ </uptodate>
+ </target>
+
+ <target name="javadocs-solr-core" depends="check-solr-core-javadocs-uptodate" unless="solr-core-javadocs.uptodate">
+ <ant dir="${common-solr.dir}/core" target="javadocs" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="solr-core-javadocs.uptodate" value="true"/>
+ </target>
+
+ <target name="javadocs-solrj" depends="check-solrj-javadocs-uptodate" unless="solrj-javadocs.uptodate">
+ <ant dir="${common-solr.dir}/solrj" target="javadocs" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="solrj-javadocs.uptodate" value="true"/>
+ </target>
+
+ <!-- macro to create solr javadocs with links to lucene. make sure calling task depends on lucene-javadocs -->
+ <macrodef name="solr-invoke-javadoc">
+ <element name="solrsources" optional="yes"/>
+ <element name="links" optional="yes"/>
+ <attribute name="destdir" default="${javadoc.dir}/${name}"/>
+ <attribute name="title" default="${Name} ${version} ${name} API"/>
+ <attribute name="overview" default="${src.dir}/overview.html"/>
+ <sequential>
+ <mkdir dir="@{destdir}"/>
+ <invoke-javadoc destdir="@{destdir}" title="@{title}" overview="@{overview}">
+ <sources>
+ <solrsources/>
+ <link offline="true" href="${lucene.javadoc.url}core" packagelistloc="${lucenedocs}/core"/>
+ <link offline="true" href="${lucene.javadoc.url}analyzers-common" packagelistloc="${lucenedocs}/analyzers-common"/>
+ <link offline="true" href="${lucene.javadoc.url}analyzers-icu" packagelistloc="${lucenedocs}/analyzers-icu"/>
+ <link offline="true" href="${lucene.javadoc.url}analyzers-kuromoji" packagelistloc="${lucenedocs}/analyzers-kuromoji"/>
+ <link offline="true" href="${lucene.javadoc.url}analyzers-nori" packagelistloc="${lucenedocs}/analyzers-nori"/>
+ <link offline="true" href="${lucene.javadoc.url}analyzers-morfologik" packagelistloc="${lucenedocs}/analyzers-morfologik"/>
+ <link offline="true" href="${lucene.javadoc.url}analyzers-phonetic" packagelistloc="${lucenedocs}/analyzers-phonetic"/>
+ <link offline="true" href="${lucene.javadoc.url}analyzers-smartcn" packagelistloc="${lucenedocs}/analyzers-smartcn"/>
+ <link offline="true" href="${lucene.javadoc.url}analyzers-stempel" packagelistloc="${lucenedocs}/analyzers-stempel"/>
+ <link offline="true" href="${lucene.javadoc.url}backward-codecs" packagelistloc="${lucenedocs}/backward-codecs"/>
+ <link offline="true" href="${lucene.javadoc.url}codecs" packagelistloc="${lucenedocs}/codecs"/>
+ <link offline="true" href="${lucene.javadoc.url}expressions" packagelistloc="${lucenedocs}/expressions"/>
+ <link offline="true" href="${lucene.javadoc.url}suggest" packagelistloc="${lucenedocs}/suggest"/>
+ <link offline="true" href="${lucene.javadoc.url}grouping" packagelistloc="${lucenedocs}/grouping"/>
+ <link offline="true" href="${lucene.javadoc.url}join" packagelistloc="${lucenedocs}/join"/>
+ <link offline="true" href="${lucene.javadoc.url}queries" packagelistloc="${lucenedocs}/queries"/>
+ <link offline="true" href="${lucene.javadoc.url}queryparser" packagelistloc="${lucenedocs}/queryparser"/>
+ <link offline="true" href="${lucene.javadoc.url}highlighter" packagelistloc="${lucenedocs}/highlighter"/>
+ <link offline="true" href="${lucene.javadoc.url}memory" packagelistloc="${lucenedocs}/memory"/>
+ <link offline="true" href="${lucene.javadoc.url}misc" packagelistloc="${lucenedocs}/misc"/>
+ <link offline="true" href="${lucene.javadoc.url}classification" packagelistloc="${lucenedocs}/classification"/>
+ <link offline="true" href="${lucene.javadoc.url}spatial-extras" packagelistloc="${lucenedocs}/spatial-extras"/>
+ <links/>
+ <link href=""/>
+ </sources>
+ </invoke-javadoc>
+ </sequential>
+ </macrodef>
+
+ <target name="define-lucene-javadoc-url" depends="resolve-groovy" unless="lucene.javadoc.url">
+ <property name="useLocalJavadocUrl" value=""/>
+ <groovy><![CDATA[
+ String url, version = properties['version'];
+ String useLocalJavadocUrl = properties['useLocalJavadocUrl'];
+ if (version != properties['version.base'] || Boolean.parseBoolean(useLocalJavadocUrl)) {
+ url = new File(properties['common.dir'], 'build' + File.separator + 'docs').toURI().toASCIIString();
+ if (!(url =~ /\/$/)) url += '/';
+ } else {
+ version = version.replace('.', '_');
+ url = 'https://lucene.apache.org/core/' + version + '/';
+ }
+ task.log('Using the following URL to refer to Lucene Javadocs: ' + url);
+ properties['lucene.javadoc.url'] = url;
+ ]]></groovy>
+ </target>
+
+ <target name="define-solr-javadoc-url" depends="resolve-groovy" unless="solr.javadoc.url">
+ <groovy><![CDATA[
+ String url, version = properties['version'];
+ if (version != properties['version.base']) {
+ url = '';
+ task.log('Disabled Solr Javadocs online URL for packaging (custom build / SNAPSHOT version).');
+ } else {
+ version = version.replace('.', '_');
+ url = 'https://lucene.apache.org/solr/' + version + '/';
+ task.log('Using the following URL to refer to Solr Javadocs: ' + url);
+ }
+ properties['solr.javadoc.url'] = url;
+ ]]></groovy>
+ </target>
+
+ <target name="jar-src">
+ <sequential>
+ <mkdir dir="${build.dir}"/>
+ <solr-jarify basedir="${src.dir}" destfile="${build.dir}/${final.name}-src.jar">
+ <solr-jarify-filesets>
+ <fileset dir="${resources.dir}" erroronmissingdir="no"/>
+ </solr-jarify-filesets>
+ </solr-jarify>
+ </sequential>
+ </target>
+
+ <target name="-validate-maven-dependencies" depends="-validate-maven-dependencies.init">
+ <m2-validate-dependencies pom.xml="${maven.pom.xml}" licenseDirectory="${license.dir}">
+ <additional-filters>
+ <replaceregex pattern="jetty([^/]+)$" replace="jetty" flags="gi" />
+ <replaceregex pattern="slf4j-([^/]+)$" replace="slf4j" flags="gi" />
+ <replaceregex pattern="(bcmail|bcprov)-([^/]+)$" replace="\1" flags="gi" />
+ </additional-filters>
+ <excludes>
+ <rsel:or>
+ <rsel:name name="**/lucene-*-${maven.version.glob}.jar" handledirsep="true"/>
+ <rsel:name name="**/solr-*-${maven.version.glob}.jar" handledirsep="true"/>
+ <!-- TODO: figure out what is going on here with servlet-apis -->
+ <rsel:name name="**/*servlet*.jar" handledirsep="true"/>
+ </rsel:or>
+ </excludes>
+ </m2-validate-dependencies>
+ </target>
+
+ <!-- Solr core targets -->
+ <target name="compile-solr-core" description="Compile Solr core." unless="solr.core.compiled">
+ <ant dir="${common-solr.dir}/core" target="compile-core" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="solr.core.compiled" value="true"/>
+ </target>
+ <target name="compile-test-solr-core" description="Compile solr core tests">
+ <ant dir="${common-solr.dir}/core" target="compile-test" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="solr.core.compiled" value="true"/>
+ </target>
+ <target name="dist-core" depends="init-dist"
+ description="Creates the Solr JAR Distribution file.">
+ <ant dir="${common-solr.dir}/core" target="dist" inheritall="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ </target>
+
+ <!-- Solrj targets -->
+ <target name="compile-solrj" description="Compile the java client." unless="solrj.compiled">
+ <ant dir="${common-solr.dir}/solrj" target="compile-core" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="solrj.compiled" value="true"/>
+ </target>
+ <target name="compile-test-solrj" description="Compile java client tests">
+ <ant dir="${common-solr.dir}/solrj" target="compile-test" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="solrj.compiled" value="true"/>
+ </target>
+ <target name="dist-solrj" depends="init-dist"
+ description="Creates the Solr-J JAR Distribution file.">
+ <ant dir="${common-solr.dir}/solrj" target="dist" inheritall="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ </target>
+ <target name="jar-solrj" description="Jar Solr-J">
+ <ant dir="${common-solr.dir}/solrj" target="jar-core" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ </target>
+
+ <!-- Solr test-framework targets -->
+ <target name="compile-solr-test-framework" description="Compile the Solr test-framework" unless="solr.test.framework.compiled">
+ <ant dir="${common-solr.dir}/test-framework" target="compile-core" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="solr.core.compiled" value="true"/>
+ <property name="solr.test.framework.compiled" value="true"/>
+ </target>
+
+ <target name="jar-solr-test-framework" depends="compile-solr-test-framework">
+ <ant dir="${common-solr.dir}/test-framework" target="jar-core" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ </target>
+
+ <!-- resolve dependencies in the example (relied upon by compile/tests) -->
+ <target name="resolve-example" unless="example.libs.uptodate">
+ <property name="example.libs.uptodate" value="true"/>
+ </target>
+
+ <!-- resolve dependencies in the server directory (relied upon by compile/tests) -->
+ <target name="resolve-server" unless="server.libs.uptodate">
+ <ant dir="${common-solr.dir}/server" target="resolve" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="server.libs.uptodate" value="true"/>
+ </target>
+
+ <macrodef name="contrib-crawl">
+ <attribute name="target" default=""/>
+ <attribute name="failonerror" default="true"/>
+ <sequential>
+ <subant target="@{target}" failonerror="@{failonerror}" inheritall="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ <fileset dir="." includes="contrib/*/build.xml"/>
+ </subant>
+ </sequential>
+ </macrodef>
+
+ <target name="-compile-test-lucene-analysis">
+ <ant dir="${common.dir}/analysis" target="compile-test" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ </target>
+
+ <target name="-compile-test-lucene-queryparser">
+ <ant dir="${common.dir}/queryparser" target="compile-test" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ </target>
+
+ <target name="-compile-test-lucene-backward-codecs">
+ <ant dir="${common.dir}/backward-codecs" target="compile-test" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ </target>
+
+ <!-- Solr contrib targets -->
+ <target name="-compile-analysis-extras">
+ <ant dir="${common-solr.dir}/contrib/analysis-extras" target="compile" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ </target>
+
+ <target name="compile-contrib" description="Compile contrib modules">
+ <contrib-crawl target="compile-core"/>
+ </target>
+
+ <target name="compile-test-contrib" description="Compile contrib modules' tests">
+ <contrib-crawl target="compile-test"/>
+ </target>
+
+ <target name="javadocs-contrib" description="Compile contrib modules">
+ <contrib-crawl target="javadocs"/>
+ </target>
+
+ <target name="jar-contrib" description="Jar contrib modules">
+ <contrib-crawl target="jar-core"/>
+ </target>
+
+ <target name="contribs-add-to-webapp">
+ <mkdir dir="${dest}/web"/>
+ <delete dir="${dest}/web" includes="**/*" failonerror="false"/>
+ <contrib-crawl target="add-to-webapp"/>
+ </target>
+
+ <!-- Forbidden API Task, customizations for Solr -->
+ <target name="-check-forbidden-all" depends="-init-forbidden-apis,compile-core,compile-test">
+ <property prefix="ivyversions" file="${common.dir}/ivy-versions.properties"/><!-- for commons-io version -->
+ <forbidden-apis suppressAnnotation="**.SuppressForbidden" classpathref="forbidden-apis.allclasses.classpath" targetVersion="${javac.release}">
+ <signatures>
+ <bundled name="jdk-unsafe"/>
+ <bundled name="jdk-deprecated"/>
+ <bundled name="jdk-non-portable"/>
+ <bundled name="jdk-reflection"/>
+ <bundled name="commons-io-unsafe-${ivyversions./commons-io/commons-io}"/>
+ <fileset dir="${common.dir}/tools/forbiddenApis">
+ <include name="base.txt" />
+ <include name="servlet-api.txt" />
+ <include name="solr.txt" />
+ </fileset>
+ </signatures>
+ <fileset dir="${build.dir}/classes/java" excludes="${forbidden-base-excludes}"/>
+ <fileset dir="${build.dir}/classes/test" excludes="${forbidden-tests-excludes}" erroronmissingdir="false"/>
+ </forbidden-apis>
+ </target>
+
+
+ <!-- hack for now to disable *all* Solr tests on Jenkins when "tests.disable-solr" property is set -->
+ <target name="test" unless="tests.disable-solr">
+ <antcall target="common.test" inheritrefs="true" inheritall="true"/>
+ </target>
+</project>
diff --git a/solr/contrib/dataimporthandler-extras/build.gradle b/solr/contrib/dataimporthandler-extras/build.gradle
deleted file mode 100644
index fde00c3..0000000
--- a/solr/contrib/dataimporthandler-extras/build.gradle
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-apply plugin: 'java-library'
-
-description = 'Data Import Handler Extras'
-
-dependencies {
- implementation project(':solr:core')
-
- implementation project(':solr:contrib:dataimporthandler')
- implementation project(':solr:contrib:extraction')
-
- implementation ('javax.activation:activation')
- implementation ('com.sun.mail:javax.mail')
- implementation ('com.sun.mail:gimap')
-
- testImplementation project(':solr:test-framework')
-}
diff --git a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java
deleted file mode 100644
index 6861ae3..0000000
--- a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java
+++ /dev/null
@@ -1,901 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import com.sun.mail.imap.IMAPMessage;
-
-import org.apache.solr.common.util.SuppressForbidden;
-import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
-import org.apache.solr.util.RTimer;
-import org.apache.tika.Tika;
-import org.apache.tika.metadata.Metadata;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.mail.*;
-import javax.mail.internet.AddressException;
-import javax.mail.internet.ContentType;
-import javax.mail.internet.InternetAddress;
-import javax.mail.internet.MimeMessage;
-import javax.mail.search.*;
-
-import java.io.InputStream;
-import java.lang.invoke.MethodHandles;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.*;
-import java.util.function.Supplier;
-
-import com.sun.mail.gimap.GmailFolder;
-import com.sun.mail.gimap.GmailRawSearchTerm;
-
-/**
- * An EntityProcessor instance which can index emails along with their
- * attachments from POP3 or IMAP sources. Refer to <a
- * href="http://wiki.apache.org/solr/DataImportHandler"
- * >http://wiki.apache.org/solr/DataImportHandler</a> for more details. <b>This
- * API is experimental and subject to change</b>
- *
- * @since solr 1.4
- */
-public class MailEntityProcessor extends EntityProcessorBase {
-
- private static final SimpleDateFormat sinceDateParser =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT);
- private static final SimpleDateFormat afterFmt =
- new SimpleDateFormat("yyyy/MM/dd", Locale.ROOT);
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- public static interface CustomFilter {
- public SearchTerm getCustomSearch(Folder folder);
- }
-
- public void init(Context context) {
- super.init(context);
- // set attributes using XXX getXXXFromContext(attribute, defaultValue);
- // applies variable resolver and return default if value is not found or null
- // REQUIRED : connection and folder info
- user = getStringFromContext("user", null);
- password = getStringFromContext("password", null);
- host = getStringFromContext("host", null);
- protocol = getStringFromContext("protocol", null);
- folderNames = getStringFromContext("folders", null);
- // validate
- if (host == null || protocol == null || user == null || password == null
- || folderNames == null) throw new DataImportHandlerException(
- DataImportHandlerException.SEVERE,
- "'user|password|protocol|host|folders' are required attributes");
-
- // OPTIONAL : have defaults and are optional
- recurse = getBoolFromContext("recurse", true);
-
- exclude.clear();
- String excludes = getStringFromContext("exclude", "");
- if (excludes != null && !excludes.trim().equals("")) {
- exclude = Arrays.asList(excludes.split(","));
- }
-
- include.clear();
- String includes = getStringFromContext("include", "");
- if (includes != null && !includes.trim().equals("")) {
- include = Arrays.asList(includes.split(","));
- }
- batchSize = getIntFromContext("batchSize", 20);
- customFilter = getStringFromContext("customFilter", "");
- if (filters != null) filters.clear();
- folderIter = null;
- msgIter = null;
-
- String lastIndexTime = null;
- String command =
- String.valueOf(context.getRequestParameters().get("command"));
- if (!DataImporter.FULL_IMPORT_CMD.equals(command))
- throw new IllegalArgumentException(this.getClass().getSimpleName()+
- " only supports "+DataImporter.FULL_IMPORT_CMD);
-
- // Read the last_index_time out of the dataimport.properties if available
- String cname = getStringFromContext("name", "mailimporter");
- String varName = ConfigNameConstants.IMPORTER_NS_SHORT + "." + cname + "."
- + DocBuilder.LAST_INDEX_TIME;
- Object varValue = context.getVariableResolver().resolve(varName);
- log.info("{}={}", varName, varValue);
-
- if (varValue != null && !"".equals(varValue) &&
- !"".equals(getStringFromContext("fetchMailsSince", ""))) {
-
- // need to check if varValue is the epoch, which we'll take to mean the
- // initial value, in which case means we should use fetchMailsSince instead
- Date tmp = null;
- try {
- tmp = sinceDateParser.parse((String)varValue);
- if (tmp.getTime() == 0) {
- log.info("Ignoring initial value {} for {} in favor of fetchMailsSince config parameter"
- , varValue, varName);
- tmp = null; // don't use this value
- }
- } catch (ParseException e) {
- // probably ok to ignore this since we have other options below
- // as we're just trying to figure out if the date is 0
- log.warn("Failed to parse {} from {} due to", varValue, varName, e);
- }
-
- if (tmp == null) {
- // favor fetchMailsSince in this case because the value from
- // dataimport.properties is the default/init value
- varValue = getStringFromContext("fetchMailsSince", "");
- log.info("fetchMailsSince={}", varValue);
- }
- }
-
- if (varValue == null || "".equals(varValue)) {
- varName = ConfigNameConstants.IMPORTER_NS_SHORT + "."
- + DocBuilder.LAST_INDEX_TIME;
- varValue = context.getVariableResolver().resolve(varName);
- log.info("{}={}", varName, varValue);
- }
-
- if (varValue != null && varValue instanceof String) {
- lastIndexTime = (String)varValue;
- if (lastIndexTime != null && lastIndexTime.length() == 0)
- lastIndexTime = null;
- }
-
- if (lastIndexTime == null)
- lastIndexTime = getStringFromContext("fetchMailsSince", "");
-
- log.info("Using lastIndexTime {} for mail import", lastIndexTime);
-
- this.fetchMailsSince = null;
- if (lastIndexTime != null && lastIndexTime.length() > 0) {
- try {
- fetchMailsSince = sinceDateParser.parse(lastIndexTime);
- log.info("Parsed fetchMailsSince={}", lastIndexTime);
- } catch (ParseException e) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
- "Invalid value for fetchMailSince: " + lastIndexTime, e);
- }
- }
-
- fetchSize = getIntFromContext("fetchSize", 32 * 1024);
- cTimeout = getIntFromContext("connectTimeout", 30 * 1000);
- rTimeout = getIntFromContext("readTimeout", 60 * 1000);
-
- String tmp = context.getEntityAttribute("includeOtherUserFolders");
- includeOtherUserFolders = (tmp != null && Boolean.valueOf(tmp.trim()));
- tmp = context.getEntityAttribute("includeSharedFolders");
- includeSharedFolders = (tmp != null && Boolean.valueOf(tmp.trim()));
-
- setProcessAttachmentConfig();
- includeContent = getBoolFromContext("includeContent", true);
-
- logConfig();
- }
-
- private void setProcessAttachmentConfig() {
- processAttachment = true;
- String tbval = context.getEntityAttribute("processAttachments");
- if (tbval == null) {
- tbval = context.getEntityAttribute("processAttachement");
- if (tbval != null) processAttachment = Boolean.valueOf(tbval);
- } else processAttachment = Boolean.valueOf(tbval);
- }
-
- @Override
- public Map<String,Object> nextRow() {
- Message mail = null;
- Map<String,Object> row = null;
- do {
- // try till there is a valid document or folders get exhausted.
- // when mail == NULL, it means end of processing
- mail = getNextMail();
-
- if (mail != null)
- row = getDocumentFromMail(mail);
-
- if (row != null && row.get("folder") == null)
- row.put("folder", mail.getFolder().getFullName());
-
- } while (row == null && mail != null);
- return row;
- }
-
- private Message getNextMail() {
- if (!connected) {
- // this is needed to load the activation mail stuff correctly
- // otherwise, the JavaMail multipart support doesn't get configured
- // correctly, which leads to a class cast exception when processing
- // multipart messages: IMAPInputStream cannot be cast to
- // javax.mail.Multipart
- if (false == withContextClassLoader(getClass().getClassLoader(), this::connectToMailBox)) {
- return null;
- }
- connected = true;
- }
- if (folderIter == null) {
- createFilters();
- folderIter = new FolderIterator(mailbox);
- }
- // get next message from the folder
- // if folder is exhausted get next folder
- // loop till a valid mail or all folders exhausted.
- while (msgIter == null || !msgIter.hasNext()) {
- Folder next = folderIter.hasNext() ? folderIter.next() : null;
- if (next == null) return null;
-
- msgIter = new MessageIterator(next, batchSize);
- }
- return msgIter.next();
- }
-
- private Map<String,Object> getDocumentFromMail(Message mail) {
- Map<String,Object> row = new HashMap<>();
- try {
- addPartToDocument(mail, row, true);
- return row;
- } catch (Exception e) {
- log.error("Failed to convert message [{}] to document due to: {}"
- , mail, e, e);
- return null;
- }
- }
-
- @SuppressWarnings({"unchecked"})
- public void addPartToDocument(Part part, Map<String,Object> row, boolean outerMost) throws Exception {
- if (part instanceof Message) {
- addEnvelopeToDocument(part, row);
- }
-
- String ct = part.getContentType().toLowerCase(Locale.ROOT);
- ContentType ctype = new ContentType(ct);
- if (part.isMimeType("multipart/*")) {
- Object content = part.getContent();
- if (content != null && content instanceof Multipart) {
- Multipart mp = (Multipart) part.getContent();
- int count = mp.getCount();
- if (part.isMimeType("multipart/alternative")) count = 1;
- for (int i = 0; i < count; i++)
- addPartToDocument(mp.getBodyPart(i), row, false);
- } else {
- log.warn("Multipart content is a not an instance of Multipart! Content is: {}"
- + ". Typically, this is due to the Java Activation JAR being loaded by the wrong classloader."
- , (content != null ? content.getClass().getName() : "null"));
- }
- } else if (part.isMimeType("message/rfc822")) {
- addPartToDocument((Part) part.getContent(), row, false);
- } else {
- String disp = part.getDisposition();
- if (includeContent
- && !(disp != null && disp.equalsIgnoreCase(Part.ATTACHMENT))) {
- InputStream is = part.getInputStream();
- Metadata contentTypeHint = new Metadata();
- contentTypeHint.set(Metadata.CONTENT_TYPE, ctype.getBaseType()
- .toLowerCase(Locale.ENGLISH));
- String content = (new Tika()).parseToString(is, contentTypeHint);
- if (row.get(CONTENT) == null) row.put(CONTENT, new ArrayList<String>());
- List<String> contents = (List<String>) row.get(CONTENT);
- contents.add(content.trim());
- row.put(CONTENT, contents);
- }
- if (!processAttachment || disp == null
- || !disp.equalsIgnoreCase(Part.ATTACHMENT)) return;
- InputStream is = part.getInputStream();
- String fileName = part.getFileName();
- Metadata contentTypeHint = new Metadata();
- contentTypeHint.set(Metadata.CONTENT_TYPE, ctype.getBaseType()
- .toLowerCase(Locale.ENGLISH));
- String content = (new Tika()).parseToString(is, contentTypeHint);
- if (content == null || content.trim().length() == 0) return;
-
- if (row.get(ATTACHMENT) == null) row.put(ATTACHMENT,
- new ArrayList<String>());
- List<String> contents = (List<String>) row.get(ATTACHMENT);
- contents.add(content.trim());
- row.put(ATTACHMENT, contents);
- if (row.get(ATTACHMENT_NAMES) == null) row.put(ATTACHMENT_NAMES,
- new ArrayList<String>());
- List<String> names = (List<String>) row.get(ATTACHMENT_NAMES);
- names.add(fileName);
- row.put(ATTACHMENT_NAMES, names);
- }
- }
-
- private void addEnvelopeToDocument(Part part, Map<String,Object> row)
- throws MessagingException {
- MimeMessage mail = (MimeMessage) part;
- Address[] adresses;
- if ((adresses = mail.getFrom()) != null && adresses.length > 0) row.put(
- FROM, adresses[0].toString());
-
- List<String> to = new ArrayList<>();
- if ((adresses = mail.getRecipients(Message.RecipientType.TO)) != null) addAddressToList(
- adresses, to);
- if ((adresses = mail.getRecipients(Message.RecipientType.CC)) != null) addAddressToList(
- adresses, to);
- if ((adresses = mail.getRecipients(Message.RecipientType.BCC)) != null) addAddressToList(
- adresses, to);
- if (to.size() > 0) row.put(TO_CC_BCC, to);
-
- row.put(MESSAGE_ID, mail.getMessageID());
- row.put(SUBJECT, mail.getSubject());
-
- Date d = mail.getSentDate();
- if (d != null) {
- row.put(SENT_DATE, d);
- }
-
- List<String> flags = new ArrayList<>();
- for (Flags.Flag flag : mail.getFlags().getSystemFlags()) {
- if (flag == Flags.Flag.ANSWERED) flags.add(FLAG_ANSWERED);
- else if (flag == Flags.Flag.DELETED) flags.add(FLAG_DELETED);
- else if (flag == Flags.Flag.DRAFT) flags.add(FLAG_DRAFT);
- else if (flag == Flags.Flag.FLAGGED) flags.add(FLAG_FLAGGED);
- else if (flag == Flags.Flag.RECENT) flags.add(FLAG_RECENT);
- else if (flag == Flags.Flag.SEEN) flags.add(FLAG_SEEN);
- }
- flags.addAll(Arrays.asList(mail.getFlags().getUserFlags()));
- if (flags.size() == 0) flags.add(FLAG_NONE);
- row.put(FLAGS, flags);
-
- String[] hdrs = mail.getHeader("X-Mailer");
- if (hdrs != null) row.put(XMAILER, hdrs[0]);
- }
-
- private void addAddressToList(Address[] adresses, List<String> to)
- throws AddressException {
- for (Address address : adresses) {
- to.add(address.toString());
- InternetAddress ia = (InternetAddress) address;
- if (ia.isGroup()) {
- InternetAddress[] group = ia.getGroup(false);
- for (InternetAddress member : group)
- to.add(member.toString());
- }
- }
- }
-
- private boolean connectToMailBox() {
- try {
- Properties props = new Properties();
- if (System.getProperty("mail.debug") != null)
- props.setProperty("mail.debug", System.getProperty("mail.debug"));
-
- if (("imap".equals(protocol) || "imaps".equals(protocol))
- && "imap.gmail.com".equals(host)) {
- log.info("Consider using 'gimaps' protocol instead of '{}' for enabling GMail specific extensions for {}"
- , protocol, host);
- }
-
- props.setProperty("mail.store.protocol", protocol);
-
- String imapPropPrefix = protocol.startsWith("gimap") ? "gimap" : "imap";
- props.setProperty("mail." + imapPropPrefix + ".fetchsize", "" + fetchSize);
- props.setProperty("mail." + imapPropPrefix + ".timeout", "" + rTimeout);
- props.setProperty("mail." + imapPropPrefix + ".connectiontimeout", "" + cTimeout);
-
- int port = -1;
- int colonAt = host.indexOf(":");
- if (colonAt != -1) {
- port = Integer.parseInt(host.substring(colonAt + 1));
- host = host.substring(0, colonAt);
- }
-
- Session session = Session.getDefaultInstance(props, null);
- mailbox = session.getStore(protocol);
- if (port != -1) {
- mailbox.connect(host, port, user, password);
- } else {
- mailbox.connect(host, user, password);
- }
- log.info("Connected to {}'s mailbox on {}", user, host);
-
- return true;
- } catch (MessagingException e) {
- String errMsg = String.format(Locale.ENGLISH,
- "Failed to connect to %s server %s as user %s due to: %s", protocol,
- host, user, e.toString());
- log.error(errMsg, e);
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
- errMsg, e);
- }
- }
-
- private void createFilters() {
- if (fetchMailsSince != null) {
- filters.add(new MailsSinceLastCheckFilter(fetchMailsSince));
- }
- if (customFilter != null && !customFilter.equals("")) {
- try {
- Class<?> cf = Class.forName(customFilter);
- Object obj = cf.getConstructor().newInstance();
- if (obj instanceof CustomFilter) {
- filters.add((CustomFilter) obj);
- }
- } catch (Exception e) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
- "Custom filter could not be created", e);
- }
- }
- }
-
- private void logConfig() {
- if (!log.isInfoEnabled()) return;
-
- String lineSep = System.getProperty("line.separator");
-
- StringBuffer config = new StringBuffer();
- config.append("user : ").append(user).append(lineSep);
- config
- .append("pwd : ")
- .append(
- password != null && password.length() > 0 ? "<non-null>" : "<null>")
- .append(lineSep);
- config.append("protocol : ").append(protocol)
- .append(lineSep);
- config.append("host : ").append(host)
- .append(lineSep);
- config.append("folders : ").append(folderNames)
- .append(lineSep);
- config.append("recurse : ").append(recurse)
- .append(lineSep);
- config.append("exclude : ").append(exclude.toString())
- .append(lineSep);
- config.append("include : ").append(include.toString())
- .append(lineSep);
- config.append("batchSize : ").append(batchSize)
- .append(lineSep);
- config.append("fetchSize : ").append(fetchSize)
- .append(lineSep);
- config.append("read timeout : ").append(rTimeout)
- .append(lineSep);
- config.append("conection timeout : ").append(cTimeout)
- .append(lineSep);
- config.append("custom filter : ").append(customFilter)
- .append(lineSep);
- config.append("fetch mail since : ").append(fetchMailsSince)
- .append(lineSep);
- config.append("includeContent : ").append(includeContent)
- .append(lineSep);
- config.append("processAttachments : ").append(processAttachment)
- .append(lineSep);
- config.append("includeOtherUserFolders : ").append(includeOtherUserFolders)
- .append(lineSep);
- config.append("includeSharedFolders : ").append(includeSharedFolders)
- .append(lineSep);
- log.info("{}", config);
- }
-
- class FolderIterator implements Iterator<Folder> {
- private Store mailbox;
- private List<String> topLevelFolders;
- private List<Folder> folders = null;
- private Folder lastFolder = null;
-
- public FolderIterator(Store mailBox) {
- this.mailbox = mailBox;
- folders = new ArrayList<>();
- getTopLevelFolders(mailBox);
- if (includeOtherUserFolders) getOtherUserFolders();
- if (includeSharedFolders) getSharedFolders();
- }
-
- public boolean hasNext() {
- return !folders.isEmpty();
- }
-
- public Folder next() {
- try {
- boolean hasMessages = false;
- Folder next;
- do {
- if (lastFolder != null) {
- lastFolder.close(false);
- lastFolder = null;
- }
- if (folders.isEmpty()) {
- mailbox.close();
- return null;
- }
- next = folders.remove(0);
- if (next != null) {
- String fullName = next.getFullName();
- if (!excludeFolder(fullName)) {
- hasMessages = (next.getType() & Folder.HOLDS_MESSAGES) != 0;
- next.open(Folder.READ_ONLY);
- lastFolder = next;
- log.info("Opened folder : {}", fullName);
- }
- if (recurse && ((next.getType() & Folder.HOLDS_FOLDERS) != 0)) {
- Folder[] children = next.list();
- log.info("Added its children to list : ");
- for (int i = children.length - 1; i >= 0; i--) {
- folders.add(0, children[i]);
- if (log.isInfoEnabled()) {
- log.info("child name : {}", children[i].getFullName());
- }
- }
- if (children.length == 0) log.info("NO children : ");
- }
- }
- } while (!hasMessages);
- return next;
- } catch (Exception e) {
- log.warn("Failed to read folders due to: {}", e);
- // throw new
- // DataImportHandlerException(DataImportHandlerException.SEVERE,
- // "Folder open failed", e);
- }
- return null;
- }
-
- public void remove() {
- throw new UnsupportedOperationException("It's read only mode...");
- }
-
- private void getTopLevelFolders(Store mailBox) {
- if (folderNames != null) topLevelFolders = Arrays.asList(folderNames
- .split(","));
- for (int i = 0; topLevelFolders != null && i < topLevelFolders.size(); i++) {
- try {
- folders.add(mailbox.getFolder(topLevelFolders.get(i)));
- } catch (MessagingException e) {
- // skip bad ones unless it's the last one and still no good folder
- if (folders.size() == 0 && i == topLevelFolders.size() - 1) throw new DataImportHandlerException(
- DataImportHandlerException.SEVERE, "Folder retreival failed");
- }
- }
- if (topLevelFolders == null || topLevelFolders.size() == 0) {
- try {
- folders.add(mailBox.getDefaultFolder());
- } catch (MessagingException e) {
- throw new DataImportHandlerException(
- DataImportHandlerException.SEVERE, "Folder retreival failed");
- }
- }
- }
-
- private void getOtherUserFolders() {
- try {
- Folder[] ufldrs = mailbox.getUserNamespaces(null);
- if (ufldrs != null) {
- log.info("Found {} user namespace folders", ufldrs.length);
- for (Folder ufldr : ufldrs)
- folders.add(ufldr);
- }
- } catch (MessagingException me) {
- log.warn("Messaging exception retrieving user namespaces: ", me);
- }
- }
-
- private void getSharedFolders() {
- try {
- Folder[] sfldrs = mailbox.getSharedNamespaces();
- if (sfldrs != null) {
- log.info("Found {} shared namespace folders", sfldrs.length);
- for (Folder sfldr : sfldrs)
- folders.add(sfldr);
- }
- } catch (MessagingException me) {
- log.warn("Messaging exception retrieving shared namespaces: ", me);
- }
- }
-
- private boolean excludeFolder(String name) {
- for (String s : exclude) {
- if (name.matches(s)) return true;
- }
- for (String s : include) {
- if (name.matches(s)) return false;
- }
- return include.size() > 0;
- }
- }
-
- class MessageIterator extends SearchTerm implements Iterator<Message> {
- private Folder folder;
- private Message[] messagesInCurBatch = null;
- private int current = 0;
- private int currentBatch = 0;
- private int batchSize = 0;
- private int totalInFolder = 0;
- private boolean doBatching = true;
-
- public MessageIterator(Folder folder, int batchSize) {
- super();
-
- try {
- this.folder = folder;
- this.batchSize = batchSize;
- SearchTerm st = getSearchTerm();
-
- log.info("SearchTerm={}", st);
-
- if (st != null || folder instanceof GmailFolder) {
- doBatching = false;
- // Searching can still take a while even though we're only pulling
- // envelopes; unless you're using gmail server-side filter, which is
- // fast
- if (log.isInfoEnabled()) {
- log.info("Searching folder {} for messages", folder.getName());
- }
- final RTimer searchTimer = new RTimer();
-
- // If using GMail, speed up the envelope processing by doing a
- // server-side
- // search for messages occurring on or after the fetch date (at
- // midnight),
- // which reduces the number of envelopes we need to pull from the
- // server
- // to apply the precise DateTerm filter; GMail server-side search has
- // date
- // granularity only but the local filters are also applied
-
- if (folder instanceof GmailFolder && fetchMailsSince != null) {
- String afterCrit = "after:" + afterFmt.format(fetchMailsSince);
- log.info("Added server-side gmail filter: {}", afterCrit);
- Message[] afterMessages = folder.search(new GmailRawSearchTerm(
- afterCrit));
-
- if (log.isInfoEnabled()) {
- log.info("GMail server-side filter found {} messages received {} in folder {}"
- , afterMessages.length, afterCrit, folder.getName());
- }
-
- // now pass in the server-side filtered messages to the local filter
- messagesInCurBatch = folder.search((st != null ? st : this), afterMessages);
- } else {
- messagesInCurBatch = folder.search(st);
- }
- totalInFolder = messagesInCurBatch.length;
- folder.fetch(messagesInCurBatch, fp);
- current = 0;
- if (log.isInfoEnabled()) {
- log.info("Total messages : {}", totalInFolder);
- log.info("Search criteria applied. Batching disabled. Took {} (ms)", searchTimer.getTime()); // logOk
- }
- } else {
- totalInFolder = folder.getMessageCount();
- log.info("Total messages : {}", totalInFolder);
- getNextBatch(batchSize, folder);
- }
- } catch (MessagingException e) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
- "Message retreival failed", e);
- }
- }
-
- private void getNextBatch(int batchSize, Folder folder)
- throws MessagingException {
- // after each batch invalidate cache
- if (messagesInCurBatch != null) {
- for (Message m : messagesInCurBatch) {
- if (m instanceof IMAPMessage) ((IMAPMessage) m).invalidateHeaders();
- }
- }
- int lastMsg = (currentBatch + 1) * batchSize;
- lastMsg = lastMsg > totalInFolder ? totalInFolder : lastMsg;
- messagesInCurBatch = folder.getMessages(currentBatch * batchSize + 1,
- lastMsg);
- folder.fetch(messagesInCurBatch, fp);
- current = 0;
- currentBatch++;
- log.info("Current Batch : {}", currentBatch);
- log.info("Messages in this batch : {}", messagesInCurBatch.length);
- }
-
- public boolean hasNext() {
- boolean hasMore = current < messagesInCurBatch.length;
- if (!hasMore && doBatching && currentBatch * batchSize < totalInFolder) {
- // try next batch
- try {
- getNextBatch(batchSize, folder);
- hasMore = current < messagesInCurBatch.length;
- } catch (MessagingException e) {
- throw new DataImportHandlerException(
- DataImportHandlerException.SEVERE, "Message retreival failed", e);
- }
- }
- return hasMore;
- }
-
- public Message next() {
- return hasNext() ? messagesInCurBatch[current++] : null;
- }
-
- public void remove() {
- throw new UnsupportedOperationException("It's read only mode...");
- }
-
- private SearchTerm getSearchTerm() {
- if (filters.size() == 0) return null;
- if (filters.size() == 1) return filters.get(0).getCustomSearch(folder);
- SearchTerm last = filters.get(0).getCustomSearch(folder);
- for (int i = 1; i < filters.size(); i++) {
- CustomFilter filter = filters.get(i);
- SearchTerm st = filter.getCustomSearch(folder);
- if (st != null) {
- last = new AndTerm(last, st);
- }
- }
- return last;
- }
-
- public boolean match(Message message) {
- return true;
- }
- }
-
- static class MailsSinceLastCheckFilter implements CustomFilter {
-
- private Date since;
-
- public MailsSinceLastCheckFilter(Date date) {
- since = date;
- }
-
- @SuppressWarnings("serial")
- public SearchTerm getCustomSearch(final Folder folder) {
- if (log.isInfoEnabled()) {
- log.info("Building mail filter for messages in {} that occur after {}"
- , folder.getName(), sinceDateParser.format(since));
- }
- return new DateTerm(ComparisonTerm.GE, since) {
- private int matched = 0;
- private int seen = 0;
-
- @Override
- public boolean match(Message msg) {
- boolean isMatch = false;
- ++seen;
- try {
- Date msgDate = msg.getReceivedDate();
- if (msgDate == null) msgDate = msg.getSentDate();
-
- if (msgDate != null && msgDate.getTime() >= since.getTime()) {
- ++matched;
- isMatch = true;
- } else {
- String msgDateStr = (msgDate != null) ? sinceDateParser.format(msgDate) : "null";
- String sinceDateStr = (since != null) ? sinceDateParser.format(since) : "null";
- if (log.isDebugEnabled()) {
- log.debug("Message {} was received at [{}], since filter is [{}]"
- , msg.getSubject(), msgDateStr, sinceDateStr);
- }
- }
- } catch (MessagingException e) {
- log.warn("Failed to process message due to: {}", e, e);
- }
-
- if (seen % 100 == 0) {
- if (log.isInfoEnabled()) {
- log.info("Matched {} of {} messages since: {}"
- , matched, seen, sinceDateParser.format(since));
- }
- }
-
- return isMatch;
- }
- };
- }
- }
-
- // user settings stored in member variables
- private String user;
- private String password;
- private String host;
- private String protocol;
-
- private String folderNames;
- private List<String> exclude = new ArrayList<>();
- private List<String> include = new ArrayList<>();
- private boolean recurse;
-
- private int batchSize;
- private int fetchSize;
- private int cTimeout;
- private int rTimeout;
-
- private Date fetchMailsSince;
- private String customFilter;
-
- private boolean processAttachment = true;
- private boolean includeContent = true;
- private boolean includeOtherUserFolders = false;
- private boolean includeSharedFolders = false;
-
- // holds the current state
- private Store mailbox;
- private boolean connected = false;
- private FolderIterator folderIter;
- private MessageIterator msgIter;
- private List<CustomFilter> filters = new ArrayList<>();
- private static FetchProfile fp = new FetchProfile();
-
- static {
- fp.add(FetchProfile.Item.ENVELOPE);
- fp.add(FetchProfile.Item.FLAGS);
- fp.add("X-Mailer");
- }
-
- // Fields To Index
- // single valued
- private static final String MESSAGE_ID = "messageId";
- private static final String SUBJECT = "subject";
- private static final String FROM = "from";
- private static final String SENT_DATE = "sentDate";
- private static final String XMAILER = "xMailer";
- // multi valued
- private static final String TO_CC_BCC = "allTo";
- private static final String FLAGS = "flags";
- private static final String CONTENT = "content";
- private static final String ATTACHMENT = "attachment";
- private static final String ATTACHMENT_NAMES = "attachmentNames";
- // flag values
- private static final String FLAG_NONE = "none";
- private static final String FLAG_ANSWERED = "answered";
- private static final String FLAG_DELETED = "deleted";
- private static final String FLAG_DRAFT = "draft";
- private static final String FLAG_FLAGGED = "flagged";
- private static final String FLAG_RECENT = "recent";
- private static final String FLAG_SEEN = "seen";
-
- private int getIntFromContext(String prop, int ifNull) {
- int v = ifNull;
- try {
- String val = context.getEntityAttribute(prop);
- if (val != null) {
- val = context.replaceTokens(val);
- v = Integer.parseInt(val);
- }
- } catch (NumberFormatException e) {
- // do nothing
- }
- return v;
- }
-
- private boolean getBoolFromContext(String prop, boolean ifNull) {
- boolean v = ifNull;
- String val = context.getEntityAttribute(prop);
- if (val != null) {
- val = context.replaceTokens(val);
- v = Boolean.valueOf(val);
- }
- return v;
- }
-
- private String getStringFromContext(String prop, String ifNull) {
- String v = ifNull;
- String val = context.getEntityAttribute(prop);
- if (val != null) {
- val = context.replaceTokens(val);
- v = val;
- }
- return v;
- }
-
- @SuppressForbidden(reason = "Uses context class loader as a workaround to inject correct classloader to 3rd party libs")
- private static <T> T withContextClassLoader(ClassLoader loader, Supplier<T> action) {
- Thread ct = Thread.currentThread();
- ClassLoader prev = ct.getContextClassLoader();
- try {
- ct.setContextClassLoader(loader);
- return action.get();
- } finally {
- ct.setContextClassLoader(prev);
- }
- }
-
-}
diff --git a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
deleted file mode 100644
index 78a53fa..0000000
--- a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.EmptyParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.html.HtmlMapper;
-import org.apache.tika.parser.html.IdentityHtmlMapper;
-import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.ContentHandlerDecorator;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.Attributes;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.TransformerConfigurationException;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.sax.SAXTransformerFactory;
-import javax.xml.transform.sax.TransformerHandler;
-import javax.xml.transform.stream.StreamResult;
-import java.io.File;
-import java.io.InputStream;
-import java.io.StringWriter;
-import java.io.Writer;
-import java.util.HashMap;
-import java.util.Locale;
-import java.util.Map;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-import static org.apache.solr.handler.dataimport.DataImporter.COLUMN;
-import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL;
-/**
- * <p>An implementation of {@link EntityProcessor} which reads data from rich docs
- * using <a href="http://tika.apache.org/">Apache Tika</a>
- *
- * <p>To index latitude/longitude data that might
- * be extracted from a file's metadata, identify
- * the geo field for this information with this attribute:
- * <code>spatialMetadataField</code>
- *
- * @since solr 3.1
- */
-public class TikaEntityProcessor extends EntityProcessorBase {
- private static Parser EMPTY_PARSER = new EmptyParser();
- private TikaConfig tikaConfig;
- private String format = "text";
- private boolean done = false;
- private boolean extractEmbedded = false;
- private String parser;
- static final String AUTO_PARSER = "org.apache.tika.parser.AutoDetectParser";
- private String htmlMapper;
- private String spatialMetadataField;
-
- @Override
- public void init(Context context) {
- super.init(context);
- done = false;
- }
-
- @Override
- protected void firstInit(Context context) {
- super.firstInit(context);
- // See similar code in ExtractingRequestHandler.inform
- try {
- String tikaConfigLoc = context.getResolvedEntityAttribute("tikaConfig");
- if (tikaConfigLoc == null) {
- ClassLoader classLoader = context.getSolrCore().getResourceLoader().getClassLoader();
- try (InputStream is = classLoader.getResourceAsStream("solr-default-tika-config.xml")) {
- tikaConfig = new TikaConfig(is);
- }
- } else {
- File configFile = new File(tikaConfigLoc);
- if (configFile.isAbsolute()) {
- tikaConfig = new TikaConfig(configFile);
- } else { // in conf/
- try (InputStream is = context.getSolrCore().getResourceLoader().openResource(tikaConfigLoc)) {
- tikaConfig = new TikaConfig(is);
- }
- }
- }
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e,"Unable to load Tika Config");
- }
-
- String extractEmbeddedString = context.getResolvedEntityAttribute("extractEmbedded");
- if ("true".equals(extractEmbeddedString)) {
- extractEmbedded = true;
- }
- format = context.getResolvedEntityAttribute("format");
- if(format == null)
- format = "text";
- if (!"html".equals(format) && !"xml".equals(format) && !"text".equals(format)&& !"none".equals(format) )
- throw new DataImportHandlerException(SEVERE, "'format' can be one of text|html|xml|none");
-
- htmlMapper = context.getResolvedEntityAttribute("htmlMapper");
- if (htmlMapper == null)
- htmlMapper = "default";
- if (!"default".equals(htmlMapper) && !"identity".equals(htmlMapper))
- throw new DataImportHandlerException(SEVERE, "'htmlMapper', if present, must be 'default' or 'identity'");
-
- parser = context.getResolvedEntityAttribute("parser");
- if(parser == null) {
- parser = AUTO_PARSER;
- }
-
- spatialMetadataField = context.getResolvedEntityAttribute("spatialMetadataField");
- }
-
- @Override
- public Map<String, Object> nextRow() {
- if(done) return null;
- Map<String, Object> row = new HashMap<>();
- @SuppressWarnings({"unchecked"})
- DataSource<InputStream> dataSource = context.getDataSource();
- InputStream is = dataSource.getData(context.getResolvedEntityAttribute(URL));
- ContentHandler contentHandler = null;
- Metadata metadata = new Metadata();
- StringWriter sw = new StringWriter();
- try {
- if ("html".equals(format)) {
- contentHandler = getHtmlHandler(sw);
- } else if ("xml".equals(format)) {
- contentHandler = getXmlContentHandler(sw);
- } else if ("text".equals(format)) {
- contentHandler = getTextContentHandler(sw);
- } else if("none".equals(format)){
- contentHandler = new DefaultHandler();
- }
- } catch (TransformerConfigurationException e) {
- wrapAndThrow(SEVERE, e, "Unable to create content handler");
- }
- Parser tikaParser = null;
- if(parser.equals(AUTO_PARSER)){
- tikaParser = new AutoDetectParser(tikaConfig);
- } else {
- tikaParser = context.getSolrCore().getResourceLoader().newInstance(parser, Parser.class);
- }
- try {
- ParseContext context = new ParseContext();
- if ("identity".equals(htmlMapper)){
- context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
- }
- if (extractEmbedded) {
- context.set(Parser.class, tikaParser);
- } else {
- context.set(Parser.class, EMPTY_PARSER);
- }
- tikaParser.parse(is, contentHandler, metadata , context);
- } catch (Exception e) {
- if(SKIP.equals(onError)) {
- throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW,
- "Document skipped :" + e.getMessage());
- }
- wrapAndThrow(SEVERE, e, "Unable to read content");
- }
- IOUtils.closeQuietly(is);
- for (Map<String, String> field : context.getAllEntityFields()) {
- if (!"true".equals(field.get("meta"))) continue;
- String col = field.get(COLUMN);
- String s = metadata.get(col);
- if (s != null) row.put(col, s);
- }
- if(!"none".equals(format) ) row.put("text", sw.toString());
- tryToAddLatLon(metadata, row);
- done = true;
- return row;
- }
-
- private void tryToAddLatLon(Metadata metadata, Map<String, Object> row) {
- if (spatialMetadataField == null) return;
- String latString = metadata.get(Metadata.LATITUDE);
- String lonString = metadata.get(Metadata.LONGITUDE);
- if (latString != null && lonString != null) {
- row.put(spatialMetadataField, String.format(Locale.ROOT, "%s,%s", latString, lonString));
- }
- }
-
- private static ContentHandler getHtmlHandler(Writer writer)
- throws TransformerConfigurationException {
- SAXTransformerFactory factory = (SAXTransformerFactory)
- TransformerFactory.newInstance();
- TransformerHandler handler = factory.newTransformerHandler();
- handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
- handler.setResult(new StreamResult(writer));
- return new ContentHandlerDecorator(handler) {
- @Override
- public void startElement(
- String uri, String localName, String name, Attributes atts)
- throws SAXException {
- if (XHTMLContentHandler.XHTML.equals(uri)) {
- uri = null;
- }
- if (!"head".equals(localName)) {
- super.startElement(uri, localName, name, atts);
- }
- }
-
- @Override
- public void endElement(String uri, String localName, String name)
- throws SAXException {
- if (XHTMLContentHandler.XHTML.equals(uri)) {
- uri = null;
- }
- if (!"head".equals(localName)) {
- super.endElement(uri, localName, name);
- }
- }
-
- @Override
- public void startPrefixMapping(String prefix, String uri) {/*no op*/ }
-
- @Override
- public void endPrefixMapping(String prefix) {/*no op*/ }
- };
- }
-
- private static ContentHandler getTextContentHandler(Writer writer) {
- return new BodyContentHandler(writer);
- }
-
- private static ContentHandler getXmlContentHandler(Writer writer)
- throws TransformerConfigurationException {
- SAXTransformerFactory factory = (SAXTransformerFactory)
- TransformerFactory.newInstance();
- TransformerHandler handler = factory.newTransformerHandler();
- handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
- handler.setResult(new StreamResult(writer));
- return handler;
- }
-
-}
diff --git a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/package.html b/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/package.html
deleted file mode 100644
index 9a7f6f2..0000000
--- a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/package.html
+++ /dev/null
@@ -1,23 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- not a package-info.java, because we already defined this package in core/ -->
-<html>
-<body>
-Plugins for <code>DataImportHandler</code> that have additional dependencies.
-</body>
-</html>
diff --git a/solr/contrib/dataimporthandler-extras/src/java/overview.html b/solr/contrib/dataimporthandler-extras/src/java/overview.html
deleted file mode 100644
index 5a55432..0000000
--- a/solr/contrib/dataimporthandler-extras/src/java/overview.html
+++ /dev/null
@@ -1,21 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<body>
-Apache Solr Search Server: DataImportHandler Extras contrib. <b>This contrib module is deprecated as of 8.6</b>
-</body>
-</html>
diff --git a/solr/contrib/dataimporthandler-extras/src/resources/solr-default-tika-config.xml b/solr/contrib/dataimporthandler-extras/src/resources/solr-default-tika-config.xml
deleted file mode 100644
index b598d9e..0000000
--- a/solr/contrib/dataimporthandler-extras/src/resources/solr-default-tika-config.xml
+++ /dev/null
@@ -1,20 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<properties>
- <service-loader initializableProblemHandler="ignore"/>
-</properties>
\ No newline at end of file
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc
deleted file mode 100644
index 5944c24..0000000
Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc and /dev/null differ
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr-word.pdf b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr-word.pdf
deleted file mode 100644
index bd8b865..0000000
Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr-word.pdf and /dev/null differ
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
deleted file mode 100644
index 793482a..0000000
--- a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-schema-no-unique-key.xml
+++ /dev/null
@@ -1,205 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--
- This is the Solr schema file. This file should be named "schema.xml" and
- should be in the conf directory under the solr home
- (i.e. ./solr/conf/schema.xml by default)
- or located where the classloader for the Solr webapp can find it.
-
- This example schema is the recommended starting point for users.
- It should be kept correct and concise, usable out-of-the-box.
-
- For more information, on how to customize this file, please see
- http://wiki.apache.org/solr/SchemaXml
--->
-
-<schema name="test" version="1.2">
- <!-- attribute "name" is the name of this schema and is only used for display purposes.
- Applications should change this to reflect the nature of the search collection.
- version="1.1" is Solr's version number for the schema syntax and semantics. It should
- not normally be changed by applications.
- 1.0: multiValued attribute did not exist, all fields are multiValued by nature
- 1.1: multiValued attribute introduced, false by default -->
-
-
- <!-- field type definitions. The "name" attribute is
- just a label to be used by field definitions. The "class"
- attribute and any other attributes determine the real
- behavior of the fieldType.
- Class names starting with "solr" refer to java classes in the
- org.apache.solr.analysis package.
- -->
-
- <!-- The StrField type is not analyzed, but indexed/stored verbatim.
- - StrField and TextField support an optional compressThreshold which
- limits compression (if enabled in the derived fields) to values which
- exceed a certain size (in characters).
- -->
- <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
-
- <!-- boolean type: "true" or "false" -->
- <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
-
- <!-- The optional sortMissingLast and sortMissingFirst attributes are
- currently supported on types that are sorted internally as strings.
- - If sortMissingLast="true", then a sort on this field will cause documents
- without the field to come after documents with the field,
- regardless of the requested sort order (asc or desc).
- - If sortMissingFirst="true", then a sort on this field will cause documents
- without the field to come before documents with the field,
- regardless of the requested sort order.
- - If sortMissingLast="false" and sortMissingFirst="false" (the default),
- then default lucene sorting will be used which places docs without the
- field first in an ascending sort and last in a descending sort.
- -->
-
-
- <!--
- Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
- -->
- <fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
- <fieldType name="float" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
- <fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
- <fieldType name="double" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
- <fieldType name="latLon" class="solr.LatLonType" subFieldType="double"/>
-
-
- <!--
- Numeric field types that index each value at various levels of precision
- to accelerate range queries when the number of values between the range
- endpoints is large. See the javadoc for NumericRangeQuery for internal
- implementation details.
-
- Smaller precisionStep values (specified in bits) will lead to more tokens
- indexed per value, slightly larger index size, and faster range queries.
- A precisionStep of 0 disables indexing at different precision levels.
- -->
- <fieldType name="tint" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
- <fieldType name="tfloat" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
- <fieldType name="tlong" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
- <fieldType name="tdouble" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
-
-
- <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
- is a more restricted form of the canonical representation of dateTime
- http://www.w3.org/TR/xmlschema-2/#dateTime
- The trailing "Z" designates UTC time and is mandatory.
- Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
- All other components are mandatory.
-
- Expressions can also be used to denote calculations that should be
- performed relative to "NOW" to determine the value, ie...
-
- NOW/HOUR
- ... Round to the start of the current hour
- NOW-1DAY
- ... Exactly 1 day prior to now
- NOW/DAY+6MONTHS+3DAYS
- ... 6 months and 3 days in the future from the start of
- the current day
-
- Consult the TrieDateField javadocs for more information.
- -->
- <fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}" sortMissingLast="true" omitNorms="true"/>
-
-
- <!-- The "RandomSortField" is not used to store or search any
- data. You can declare fields of this type it in your schema
- to generate psuedo-random orderings of your docs for sorting
- purposes. The ordering is generated based on the field name
- and the version of the index, As long as the index version
- remains unchanged, and the same field name is reused,
- the ordering of the docs will be consistent.
- If you want differend psuedo-random orderings of documents,
- for the same version of the index, use a dynamicField and
- change the name
- -->
- <fieldType name="random" class="solr.RandomSortField" indexed="true"/>
-
- <!-- solr.TextField allows the specification of custom text analyzers
- specified as a tokenizer and a list of token filters. Different
- analyzers may be specified for indexing and querying.
-
- The optional positionIncrementGap puts space between multiple fields of
- this type on the same document, with the purpose of preventing false phrase
- matching across fields.
-
- For more info on customizing your analyzer chain, please see
- http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
- -->
-
- <!-- One can also specify an existing Analyzer class that has a
- default constructor via the class attribute on the analyzer element
- <fieldType name="text_greek" class="solr.TextField">
- <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
- </fieldType>
- -->
-
- <!-- A text field that only splits on whitespace for exact matching of words -->
- <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <tokenizer class="solr.MockTokenizerFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- A text field that uses WordDelimiterGraphFilter to enable splitting and matching of
- words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
- so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
- Synonyms and stopwords are customized by external files, and stemming is enabled.
- Duplicate tokens at the same position (which may result from Stemmed Synonyms or
- WordDelim parts) are removed.
- -->
- <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
- <analyzer type="index">
- <tokenizer class="solr.MockTokenizerFactory"/>
- <!-- in this example, we will only use synonyms at query time
- <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
- -->
- <!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
- <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
- catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
- <filter class="solr.PorterStemFilterFactory"/>-->
- <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
- <filter class="solr.FlattenGraphFilterFactory"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.MockTokenizerFactory"/>
- <!--<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
- <!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
- <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
- catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
- <filter class="solr.PorterStemFilterFactory"/>-->
- <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
- </analyzer>
- </fieldType>
- <!-- since fields of this type are by default not stored or indexed, any data added to
- them will be ignored outright
- -->
- <fieldType name="ignored" stored="false" indexed="false" class="solr.StrField"/>
-
- <field name="title" type="string" indexed="true" stored="true"/>
- <field name="author" type="string" indexed="true" stored="true"/>
- <field name="text" type="text" indexed="true" stored="true"/>
- <field name="foo_i" type="int" indexed="true" stored="false"/>
- <field name="home" type="latLon" indexed="true" stored="true"/>
-</schema>
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-solrconfig.xml b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-solrconfig.xml
deleted file mode 100644
index 344589e..0000000
--- a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/collection1/conf/dataimport-solrconfig.xml
+++ /dev/null
@@ -1,277 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<config>
- <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
- <indexConfig>
- <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
- </indexConfig>
-
- <!-- Used to specify an alternate directory to hold all index data
- other than the default ./data under the Solr home.
- If replication is in use, this should match the replication configuration. -->
- <dataDir>${solr.data.dir:}</dataDir>
-
- <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
- <schemaFactory class="ClassicIndexSchemaFactory"/>
-
- <!-- the default high-performance update handler -->
- <updateHandler class="solr.DirectUpdateHandler2">
-
- <!-- A prefix of "solr." for class names is an alias that
- causes solr to search appropriate packages, including
- org.apache.solr.(search|update|request|core|analysis)
- -->
-
- <!-- Limit the number of deletions Solr will buffer during doc updating.
-
- Setting this lower can help bound memory use during indexing.
- -->
- <maxPendingDeletes>100000</maxPendingDeletes>
-
- </updateHandler>
-
-
- <query>
- <!-- Maximum number of clauses in a boolean query... can affect
- range or prefix queries that expand to big boolean
- queries. An exception is thrown if exceeded. -->
- <maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses>
-
-
- <!-- Cache used by SolrIndexSearcher for filters (DocSets),
- unordered sets of *all* documents that match a query.
- When a new searcher is opened, its caches may be prepopulated
- or "autowarmed" using data from caches in the old searcher.
- autowarmCount is the number of items to prepopulate. For CaffeineCache,
- the autowarmed items will be the most recently accessed items.
- Parameters:
- class - the SolrCache implementation (currently only CaffeineCache)
- size - the maximum number of entries in the cache
- initialSize - the initial capacity (number of entries) of
- the cache. (seel java.util.HashMap)
- autowarmCount - the number of entries to prepopulate from
- and old cache.
- -->
- <filterCache
- class="solr.CaffeineCache"
- size="512"
- initialSize="512"
- autowarmCount="256"/>
-
- <!-- queryResultCache caches results of searches - ordered lists of
- document ids (DocList) based on a query, a sort, and the range
- of documents requested. -->
- <queryResultCache
- class="solr.CaffeineCache"
- size="512"
- initialSize="512"
- autowarmCount="256"/>
-
- <!-- documentCache caches Lucene Document objects (the stored fields for each document).
- Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
- <documentCache
- class="solr.CaffeineCache"
- size="512"
- initialSize="512"
- autowarmCount="0"/>
-
- <!-- If true, stored fields that are not requested will be loaded lazily.
-
- This can result in a significant speed improvement if the usual case is to
- not load all stored fields, especially if the skipped fields are large compressed
- text fields.
- -->
- <enableLazyFieldLoading>true</enableLazyFieldLoading>
-
- <!-- Example of a generic cache. These caches may be accessed by name
- through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
- The purpose is to enable easy caching of user/application level data.
- The regenerator argument should be specified as an implementation
- of solr.search.CacheRegenerator if autowarming is desired. -->
- <!--
- <cache name="myUserCache"
- class="solr.CaffeineCache"
- size="4096"
- initialSize="1024"
- autowarmCount="1024"
- regenerator="org.mycompany.mypackage.MyRegenerator"
- />
- -->
-
- <!-- An optimization that attempts to use a filter to satisfy a search.
- If the requested sort does not include score, then the filterCache
- will be checked for a filter matching the query. If found, the filter
- will be used as the source of document ids, and then the sort will be
- applied to that.
- <useFilterForSortedQuery>true</useFilterForSortedQuery>
- -->
-
- <!-- An optimization for use with the queryResultCache. When a search
- is requested, a superset of the requested number of document ids
- are collected. For example, if a search for a particular query
- requests matching documents 10 through 19, and queryWindowSize is 50,
- then documents 0 through 49 will be collected and cached. Any further
- requests in that range can be satisfied via the cache. -->
- <queryResultWindowSize>50</queryResultWindowSize>
-
- <!-- Maximum number of documents to cache for any entry in the
- queryResultCache. -->
- <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
-
- <!-- a newSearcher event is fired whenever a new searcher is being prepared
- and there is a current searcher handling requests (aka registered). -->
- <!-- QuerySenderListener takes an array of NamedList and executes a
- local query request for each NamedList in sequence. -->
- <!--<listener event="newSearcher" class="solr.QuerySenderListener">-->
- <!--<arr name="queries">-->
- <!--<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>-->
- <!--<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>-->
- <!--<lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>-->
- <!--</arr>-->
- <!--</listener>-->
-
- <!-- a firstSearcher event is fired whenever a new searcher is being
- prepared but there is no current registered searcher to handle
- requests or to gain autowarming data from. -->
- <!--<listener event="firstSearcher" class="solr.QuerySenderListener">-->
- <!--<arr name="queries">-->
- <!--</arr>-->
- <!--</listener>-->
-
- <!-- If a search request comes in and there is no current registered searcher,
- then immediately register the still warming searcher and use it. If
- "false" then all requests will block until the first searcher is done
- warming. -->
- <useColdSearcher>false</useColdSearcher>
-
- <!-- Maximum number of searchers that may be warming in the background
- concurrently. An error is returned if this limit is exceeded. Recommend
- 1-2 for read-only followers, higher for leaders w/o cache warming. -->
- <maxWarmingSearchers>4</maxWarmingSearchers>
-
- </query>
-
- <requestDispatcher>
- <!--Make sure your system has some authentication before enabling remote streaming!
- <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="-1" />
- -->
-
- <!-- Set HTTP caching related parameters (for proxy caches and clients).
-
- To get the behaviour of Solr 1.2 (ie: no caching related headers)
- use the never304="true" option and do not specify a value for
- <cacheControl>
- -->
- <httpCaching never304="true">
- <!--httpCaching lastModifiedFrom="openTime"
- etagSeed="Solr"-->
- <!-- lastModFrom="openTime" is the default, the Last-Modified value
- (and validation against If-Modified-Since requests) will all be
- relative to when the current Searcher was opened.
- You can change it to lastModFrom="dirLastMod" if you want the
- value to exactly corrispond to when the physical index was last
- modified.
-
- etagSeed="..." is an option you can change to force the ETag
- header (and validation against If-None-Match requests) to be
- differnet even if the index has not changed (ie: when making
- significant changes to your config file)
-
- lastModifiedFrom and etagSeed are both ignored if you use the
- never304="true" option.
- -->
- <!-- If you include a <cacheControl> directive, it will be used to
- generate a Cache-Control header, as well as an Expires header
- if the value contains "max-age="
-
- By default, no Cache-Control header is generated.
-
- You can use the <cacheControl> option even if you have set
- never304="true"
- -->
- <!-- <cacheControl>max-age=30, public</cacheControl> -->
- </httpCaching>
- </requestDispatcher>
-
- <requestHandler name="/select" class="solr.SearchHandler">
- <!-- default values for query parameters -->
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- <!--
- <int name="rows">10</int>
- <str name="fl">*</str>
- <str name="version">2.1</str>
- -->
- </lst>
- </requestHandler>
-
- <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
- </requestHandler>
-
- <!--
-
- Search components are registered to SolrCore and used by Search Handlers
-
- By default, the following components are avaliable:
-
- <searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" />
- <searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
- <searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
- <searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
- <searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" />
-
- If you register a searchComponent to one of the standard names, that will be used instead.
-
- -->
-
- <requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler">
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- </lst>
- <!--
- By default, this will register the following components:
-
- <arr name="components">
- <str>query</str>
- <str>facet</str>
- <str>mlt</str>
- <str>highlight</str>
- <str>debug</str>
- </arr>
-
- To insert handlers before or after the 'standard' components, use:
-
- <arr name="first-components">
- <str>first</str>
- </arr>
-
- <arr name="last-components">
- <str>last</str>
- </arr>
-
- -->
- </requestHandler>
-
- <!-- config for the admin interface -->
- <admin>
- <defaultQuery>*:*</defaultQuery>
- </admin>
-
-</config>
-
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/structured.html b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/structured.html
deleted file mode 100644
index 1037481..0000000
--- a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/structured.html
+++ /dev/null
@@ -1,29 +0,0 @@
-<!DOCTYPE html>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-
-<html>
-<head>
- <title>Title in the header</title>
-</head>
-<body>
-<h1>H1 Header</h1>
-<div>Basic div</div>
-<div class="classAttribute">Div with attribute</div>
-</body>
-</html>
-
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_jpeg.jpg b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_jpeg.jpg
deleted file mode 100644
index 10d1ebb..0000000
Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_jpeg.jpg and /dev/null differ
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_recursive_embedded.docx b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_recursive_embedded.docx
deleted file mode 100644
index cd562cb..0000000
Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_recursive_embedded.docx and /dev/null differ
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_vsdx.vsdx b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_vsdx.vsdx
deleted file mode 100644
index 659ecdd..0000000
Binary files a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/test_vsdx.vsdx and /dev/null differ
diff --git a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java
deleted file mode 100644
index 027a8d7..0000000
--- a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.SolrInputDocument;
-import org.junit.Ignore;
-import org.junit.Test;
-
-import java.text.ParseException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-// Test mailbox is like this: foldername(mailcount)
-// top1(2) -> child11(6)
-// -> child12(0)
-// top2(2) -> child21(1)
-// -> grandchild211(2)
-// -> grandchild212(1)
-// -> child22(2)
-
-/**
- * Test for MailEntityProcessor. The tests are marked as ignored because we'd need a mail server (real or mocked) for
- * these to work.
- *
- * TODO: Find a way to make the tests actually test code
- *
- *
- * @see org.apache.solr.handler.dataimport.MailEntityProcessor
- * @since solr 1.4
- */
-@Ignore("Needs a Mock Mail Server to work")
-public class TestMailEntityProcessor extends AbstractDataImportHandlerTestCase {
-
- // Credentials
- private static final String user = "user";
- private static final String password = "password";
- private static final String host = "host";
- private static final String protocol = "imaps";
-
- private static Map<String, String> paramMap = new HashMap<>();
-
- @Test
- @Ignore("Needs a Mock Mail Server to work")
- public void testConnection() {
- // also tests recurse = false and default settings
- paramMap.put("folders", "top2");
- paramMap.put("recurse", "false");
- paramMap.put("processAttachement", "false");
- DataImporter di = new DataImporter();
- di.loadAndInit(getConfigFromMap(paramMap));
- @SuppressWarnings({"unchecked"})
- RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
- SolrWriterImpl swi = new SolrWriterImpl();
- di.runCmd(rp, swi);
- assertEquals("top1 did not return 2 messages", swi.docs.size(), 2);
- }
-
- @Test
- @Ignore("Needs a Mock Mail Server to work")
- public void testRecursion() {
- paramMap.put("folders", "top2");
- paramMap.put("recurse", "true");
- paramMap.put("processAttachement", "false");
- DataImporter di = new DataImporter();
- di.loadAndInit(getConfigFromMap(paramMap));
- @SuppressWarnings({"unchecked"})
- RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
- SolrWriterImpl swi = new SolrWriterImpl();
- di.runCmd(rp, swi);
- assertEquals("top2 and its children did not return 8 messages", swi.docs.size(), 8);
- }
-
- @Test
- @Ignore("Needs a Mock Mail Server to work")
- public void testExclude() {
- paramMap.put("folders", "top2");
- paramMap.put("recurse", "true");
- paramMap.put("processAttachement", "false");
- paramMap.put("exclude", ".*grandchild.*");
- DataImporter di = new DataImporter();
- di.loadAndInit(getConfigFromMap(paramMap));
- @SuppressWarnings({"unchecked"})
- RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
- SolrWriterImpl swi = new SolrWriterImpl();
- di.runCmd(rp, swi);
- assertEquals("top2 and its direct children did not return 5 messages", swi.docs.size(), 5);
- }
-
- @Test
- @Ignore("Needs a Mock Mail Server to work")
- public void testInclude() {
- paramMap.put("folders", "top2");
- paramMap.put("recurse", "true");
- paramMap.put("processAttachement", "false");
- paramMap.put("include", ".*grandchild.*");
- DataImporter di = new DataImporter();
- di.loadAndInit(getConfigFromMap(paramMap));
- @SuppressWarnings({"unchecked"})
- RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
- SolrWriterImpl swi = new SolrWriterImpl();
- di.runCmd(rp, swi);
- assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
- }
-
- @Test
- @Ignore("Needs a Mock Mail Server to work")
- public void testIncludeAndExclude() {
- paramMap.put("folders", "top1,top2");
- paramMap.put("recurse", "true");
- paramMap.put("processAttachement", "false");
- paramMap.put("exclude", ".*top1.*");
- paramMap.put("include", ".*grandchild.*");
- DataImporter di = new DataImporter();
- di.loadAndInit(getConfigFromMap(paramMap));
- @SuppressWarnings({"unchecked"})
- RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
- SolrWriterImpl swi = new SolrWriterImpl();
- di.runCmd(rp, swi);
- assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
- }
-
- @Test
- @Ignore("Needs a Mock Mail Server to work")
- @SuppressWarnings({"unchecked"})
- public void testFetchTimeSince() throws ParseException {
- paramMap.put("folders", "top1/child11");
- paramMap.put("recurse", "true");
- paramMap.put("processAttachement", "false");
- paramMap.put("fetchMailsSince", "2008-12-26 00:00:00");
- DataImporter di = new DataImporter();
- di.loadAndInit(getConfigFromMap(paramMap));
- RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
- SolrWriterImpl swi = new SolrWriterImpl();
- di.runCmd(rp, swi);
- assertEquals("top2 and its direct children did not return 3 messages", swi.docs.size(), 3);
- }
-
- private String getConfigFromMap(Map<String, String> params) {
- String conf =
- "<dataConfig>" +
- "<document>" +
- "<entity processor=\"org.apache.solr.handler.dataimport.MailEntityProcessor\" " +
- "someconfig" +
- "/>" +
- "</document>" +
- "</dataConfig>";
- params.put("user", user);
- params.put("password", password);
- params.put("host", host);
- params.put("protocol", protocol);
- StringBuilder attribs = new StringBuilder("");
- for (String key : params.keySet())
- attribs.append(" ").append(key).append("=" + "\"").append(params.get(key)).append("\"");
- attribs.append(" ");
- return conf.replace("someconfig", attribs.toString());
- }
-
- static class SolrWriterImpl extends SolrWriter {
- List<SolrInputDocument> docs = new ArrayList<>();
- Boolean deleteAllCalled;
- Boolean commitCalled;
-
- public SolrWriterImpl() {
- super(null, null);
- }
-
- @Override
- public boolean upload(SolrInputDocument doc) {
- return docs.add(doc);
- }
-
-
- @Override
- public void doDeleteAll() {
- deleteAllCalled = Boolean.TRUE;
- }
-
- @Override
- public void commit(boolean b) {
- commitCalled = Boolean.TRUE;
- }
- }
-}
diff --git a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
deleted file mode 100644
index 05acfca..0000000
--- a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import java.util.Locale;
-
-/**Testcase for TikaEntityProcessor
- *
- * @since solr 3.1
- */
-public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase {
- private String conf =
- "<dataConfig>" +
- " <dataSource type=\"BinFileDataSource\"/>" +
- " <document>" +
- " <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/solr-word.pdf").getAbsolutePath() + "\" >" +
- " <field column=\"Author\" meta=\"true\" name=\"author\"/>" +
- " <field column=\"title\" meta=\"true\" name=\"title\"/>" +
- " <field column=\"text\"/>" +
- " </entity>" +
- " </document>" +
- "</dataConfig>";
-
- private String skipOnErrConf =
- "<dataConfig>" +
- " <dataSource type=\"BinFileDataSource\"/>" +
- " <document>" +
- " <entity name=\"Tika\" onError=\"skip\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/bad.doc").getAbsolutePath() + "\" >" +
- "<field column=\"content\" name=\"text\"/>" +
- " </entity>" +
- " <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/solr-word.pdf").getAbsolutePath() + "\" >" +
- " <field column=\"text\"/>" +
- "</entity>" +
- " </document>" +
- "</dataConfig>";
-
- private String spatialConf =
- "<dataConfig>" +
- " <dataSource type=\"BinFileDataSource\"/>" +
- " <document>" +
- " <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" +
- getFile("dihextras/test_jpeg.jpg").getAbsolutePath() + "\" spatialMetadataField=\"home\">" +
- " <field column=\"text\"/>" +
- " </entity>" +
- " </document>" +
- "</dataConfig>";
-
- private String vsdxConf =
- "<dataConfig>" +
- " <dataSource type=\"BinFileDataSource\"/>" +
- " <document>" +
- " <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/test_vsdx.vsdx").getAbsolutePath() + "\" >" +
- " <field column=\"text\"/>" +
- " </entity>" +
- " </document>" +
- "</dataConfig>";
-
- private String[] tests = {
- "//*[@numFound='1']"
- ,"//str[@name='author'][.='Grant Ingersoll']"
- ,"//str[@name='title'][.='solr-word']"
- ,"//str[@name='text']"
- };
-
- private String[] testsHTMLDefault = {
- "//*[@numFound='1']"
- , "//str[@name='text'][contains(.,'Basic div')]"
- , "//str[@name='text'][contains(.,'<h1>')]"
- , "//str[@name='text'][not(contains(.,'<div>'))]" //default mapper lower-cases elements as it maps
- , "//str[@name='text'][not(contains(.,'<DIV>'))]"
- };
-
- private String[] testsHTMLIdentity = {
- "//*[@numFound='1']"
- , "//str[@name='text'][contains(.,'Basic div')]"
- , "//str[@name='text'][contains(.,'<h1>')]"
- , "//str[@name='text'][contains(.,'<div>')]"
- , "//str[@name='text'][contains(.,'class=\"classAttribute\"')]" //attributes are lower-cased
- };
-
- private String[] testsSpatial = {
- "//*[@numFound='1']"
- };
-
- private String[] testsEmbedded = {
- "//*[@numFound='1']",
- "//str[@name='text'][contains(.,'When in the Course')]"
- };
-
- private String[] testsIgnoreEmbedded = {
- "//*[@numFound='1']",
- "//str[@name='text'][not(contains(.,'When in the Course'))]"
- };
-
- private String[] testsVSDX = {
- "//*[@numFound='1']",
- "//str[@name='text'][contains(.,'Arrears')]"
- };
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)",
- new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage()));
- initCore("dataimport-solrconfig.xml", "dataimport-schema-no-unique-key.xml", getFile("dihextras/solr").getAbsolutePath());
- }
-
- @Test
- public void testIndexingWithTikaEntityProcessor() throws Exception {
- runFullImport(conf);
- assertQ(req("*:*"), tests );
- }
-
- @Test
- public void testSkip() throws Exception {
- runFullImport(skipOnErrConf);
- assertQ(req("*:*"), "//*[@numFound='1']");
- }
-
- @Test
- public void testVSDX() throws Exception {
- //this ensures that we've included the curvesapi dependency
- //and that the ConnectsType class is bundled with poi-ooxml-schemas.
- runFullImport(vsdxConf);
- assertQ(req("*:*"), testsVSDX);
- }
-
- @Test
- public void testTikaHTMLMapperEmpty() throws Exception {
- runFullImport(getConfigHTML(null));
- assertQ(req("*:*"), testsHTMLDefault);
- }
-
- @Test
- public void testTikaHTMLMapperDefault() throws Exception {
- runFullImport(getConfigHTML("default"));
- assertQ(req("*:*"), testsHTMLDefault);
- }
-
- @Test
- public void testTikaHTMLMapperIdentity() throws Exception {
- runFullImport(getConfigHTML("identity"));
- assertQ(req("*:*"), testsHTMLIdentity);
- }
-
- @Test
- public void testTikaGeoMetadata() throws Exception {
- runFullImport(spatialConf);
- String pt = "38.97,-77.018";
- Double distance = 5.0d;
- assertQ(req("q", "*:* OR foo_i:" + random().nextInt(100), "fq",
- "{!geofilt sfield=\"home\"}\"",
- "pt", pt, "d", String.valueOf(distance)), testsSpatial);
- }
-
- private String getConfigHTML(String htmlMapper) {
- return
- "<dataConfig>" +
- " <dataSource type='BinFileDataSource'/>" +
- " <document>" +
- " <entity name='Tika' format='xml' processor='TikaEntityProcessor' " +
- " url='" + getFile("dihextras/structured.html").getAbsolutePath() + "' " +
- ((htmlMapper == null) ? "" : (" htmlMapper='" + htmlMapper + "'")) + ">" +
- " <field column='text'/>" +
- " </entity>" +
- " </document>" +
- "</dataConfig>";
-
- }
-
- @Test
- public void testEmbeddedDocsLegacy() throws Exception {
- //test legacy behavior: ignore embedded docs
- runFullImport(conf);
- assertQ(req("*:*"), testsIgnoreEmbedded);
- }
-
- @Test
- public void testEmbeddedDocsTrue() throws Exception {
- runFullImport(getConfigEmbedded(true));
- assertQ(req("*:*"), testsEmbedded);
- }
-
- @Test
- public void testEmbeddedDocsFalse() throws Exception {
- runFullImport(getConfigEmbedded(false));
- assertQ(req("*:*"), testsIgnoreEmbedded);
- }
-
- private String getConfigEmbedded(boolean extractEmbedded) {
- return
- "<dataConfig>" +
- " <dataSource type=\"BinFileDataSource\"/>" +
- " <document>" +
- " <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" +
- getFile("dihextras/test_recursive_embedded.docx").getAbsolutePath() + "\" " +
- " extractEmbedded=\""+extractEmbedded+"\">" +
- " <field column=\"Author\" meta=\"true\" name=\"author\"/>" +
- " <field column=\"title\" meta=\"true\" name=\"title\"/>" +
- " <field column=\"text\"/>" +
- " </entity>" +
- " </document>" +
- "</dataConfig>";
- }
-}
diff --git a/solr/contrib/dataimporthandler/README.md b/solr/contrib/dataimporthandler/README.md
deleted file mode 100644
index 8dc9391..0000000
--- a/solr/contrib/dataimporthandler/README.md
+++ /dev/null
@@ -1,26 +0,0 @@
-Apache Solr - DataImportHandler
-================================
-
-Introduction
-------------
-DataImportHandler is a data import tool for Solr which makes importing data from Databases, XML files and
-HTTP data sources quick and easy.
-
-Important Note
---------------
-Although Solr strives to be agnostic of the Locale where the server is
-running, some code paths in DataImportHandler are known to depend on the
-System default Locale, Timezone, or Charset. It is recommended that when
-running Solr you set the following system properties:
- -Duser.language=xx -Duser.country=YY -Duser.timezone=ZZZ
-
-where xx, YY, and ZZZ are consistent with any database server's configuration.
-
-Deprecation notice
-------------------
-This contrib module is deprecated as of v8.6, scheduled for removal in Solr 9.0.
-The reason is that DIH is no longer being maintained in a manner we feel is necessary in order to keep it
-healthy and secure. Also it was not designed to work with SolrCloud and does not meet current performance requirements.
-
-The project hopes that the community will take over maintenance of DIH as a 3rd party package (See SOLR-14066 for more details). Please reach out to us at the dev@ mailing list if you want to help.
-
diff --git a/solr/contrib/dataimporthandler/build.gradle b/solr/contrib/dataimporthandler/build.gradle
deleted file mode 100644
index 9286d43..0000000
--- a/solr/contrib/dataimporthandler/build.gradle
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-apply plugin: 'java-library'
-
-description = 'Data Import Handler'
-
-dependencies {
- implementation project(':solr:core')
-
- testImplementation project(':solr:test-framework')
-
- testImplementation('org.mockito:mockito-core', {
- exclude group: "net.bytebuddy", module: "byte-buddy-agent"
- })
- testImplementation ('org.hsqldb:hsqldb')
- testImplementation ('org.apache.derby:derby')
- testImplementation ('org.objenesis:objenesis')
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java
deleted file mode 100644
index f4b1d7a..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.util.ContentStream;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-
-import java.io.InputStream;
-import java.io.IOException;
-import java.util.Properties;
-/**
- * <p> A data source implementation which can be used to read binary stream from content streams. </p> <p> Refer to <a
- * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
- * details. </p>
- * <p>
- * <b>This API is experimental and may change in the future.</b>
- *
- * @since solr 3.1
- */
-
-public class BinContentStreamDataSource extends DataSource<InputStream> {
- private ContextImpl context;
- private ContentStream contentStream;
- private InputStream in;
-
-
- @Override
- public void init(Context context, Properties initProps) {
- this.context = (ContextImpl) context;
- }
-
- @Override
- public InputStream getData(String query) {
- contentStream = context.getDocBuilder().getReqParams().getContentStream();
- if (contentStream == null)
- throw new DataImportHandlerException(SEVERE, "No stream available. The request has no body");
- try {
- return in = contentStream.getStream();
- } catch (IOException e) {
- DataImportHandlerException.wrapAndThrow(SEVERE, e);
- return null;
- }
- }
-
- @Override
- public void close() {
- if (contentStream != null) {
- try {
- if (in == null) in = contentStream.getStream();
- in.close();
- } catch (IOException e) {
- /*no op*/
- }
- }
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinFileDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinFileDataSource.java
deleted file mode 100644
index dc7a0f5..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinFileDataSource.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-
-import java.io.InputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.util.Properties;
-/**
- * <p>
- * A DataSource which reads from local files
- * </p>
- * <p>
- * Refer to <a
- * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
- * for more details.
- * </p>
- * <p>
- * <b>This API is experimental and may change in the future.</b>
- *
- * @since solr 3.1
- */
-
-public class BinFileDataSource extends DataSource<InputStream>{
- protected String basePath;
- @Override
- public void init(Context context, Properties initProps) {
- basePath = initProps.getProperty(FileDataSource.BASE_PATH);
- }
-
- @Override
- public InputStream getData(String query) {
- File f = FileDataSource.getFile(basePath,query);
- try {
- return new FileInputStream(f);
- } catch (FileNotFoundException e) {
- wrapAndThrow(SEVERE,e,"Unable to open file "+f.getAbsolutePath());
- return null;
- }
- }
-
- @Override
- public void close() {
-
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinURLDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinURLDataSource.java
deleted file mode 100644
index 03a30ab..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/BinURLDataSource.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
-import static org.apache.solr.handler.dataimport.URLDataSource.*;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.InputStream;
-import java.lang.invoke.MethodHandles;
-import java.net.URL;
-import java.net.URLConnection;
-import java.util.Properties;
-/**
- * <p> A data source implementation which can be used to read binary streams using HTTP. </p> <p> Refer to <a
- * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
- * details. </p>
- * <p>
- * <b>This API is experimental and may change in the future.</b>
- *
- * @since solr 3.1
- */
-public class BinURLDataSource extends DataSource<InputStream>{
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- private String baseUrl;
- private int connectionTimeout = CONNECTION_TIMEOUT;
-
- private int readTimeout = READ_TIMEOUT;
-
- private Context context;
-
- private Properties initProps;
-
- public BinURLDataSource() { }
-
- @Override
- public void init(Context context, Properties initProps) {
- this.context = context;
- this.initProps = initProps;
-
- baseUrl = getInitPropWithReplacements(BASE_URL);
- String cTimeout = getInitPropWithReplacements(CONNECTION_TIMEOUT_FIELD_NAME);
- String rTimeout = getInitPropWithReplacements(READ_TIMEOUT_FIELD_NAME);
- if (cTimeout != null) {
- try {
- connectionTimeout = Integer.parseInt(cTimeout);
- } catch (NumberFormatException e) {
- log.warn("Invalid connection timeout: {}", cTimeout);
- }
- }
- if (rTimeout != null) {
- try {
- readTimeout = Integer.parseInt(rTimeout);
- } catch (NumberFormatException e) {
- log.warn("Invalid read timeout: {}", rTimeout);
- }
- }
- }
-
- @Override
- public InputStream getData(String query) {
- URL url = null;
- try {
- if (URIMETHOD.matcher(query).find()) url = new URL(query);
- else url = new URL(baseUrl + query);
- log.debug("Accessing URL: {}", url);
- URLConnection conn = url.openConnection();
- conn.setConnectTimeout(connectionTimeout);
- conn.setReadTimeout(readTimeout);
- return conn.getInputStream();
- } catch (Exception e) {
- log.error("Exception thrown while getting data", e);
- wrapAndThrow (SEVERE, e, "Exception in invoking url " + url);
- return null;//unreachable
- }
- }
-
- @Override
- public void close() { }
-
- private String getInitPropWithReplacements(String propertyName) {
- final String expr = initProps.getProperty(propertyName);
- if (expr == null) {
- return null;
- }
- return context.replaceTokens(expr);
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/CachePropertyUtil.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/CachePropertyUtil.java
deleted file mode 100644
index 544761f..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/CachePropertyUtil.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-public class CachePropertyUtil {
- public static String getAttributeValueAsString(Context context, String attr) {
- Object o = context.getSessionAttribute(attr, Context.SCOPE_ENTITY);
- if (o == null) {
- o = context.getResolvedEntityAttribute(attr);
- }
- if (o == null && context.getRequestParameters() != null) {
- o = context.getRequestParameters().get(attr);
- }
- if (o == null) {
- return null;
- }
- return o.toString();
- }
-
- public static Object getAttributeValue(Context context, String attr) {
- Object o = context.getSessionAttribute(attr, Context.SCOPE_ENTITY);
- if (o == null) {
- o = context.getResolvedEntityAttribute(attr);
- }
- if (o == null && context.getRequestParameters() != null) {
- o = context.getRequestParameters().get(attr);
- }
- if (o == null) {
- return null;
- }
- return o;
- }
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ClobTransformer.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ClobTransformer.java
deleted file mode 100644
index 2e9d93a0..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ClobTransformer.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import static org.apache.solr.handler.dataimport.HTMLStripTransformer.TRUE;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.sql.Clob;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-/**
- * {@link Transformer} instance which converts a {@link Clob} to a {@link String}.
- * <p>
- * Refer to <a href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
- * for more details.
- * <p>
- * <b>This API is experimental and subject to change</b>
- *
- * @since solr 1.4
- */
-public class ClobTransformer extends Transformer {
- @Override
- public Object transformRow(Map<String, Object> aRow, Context context) {
- for (Map<String, String> map : context.getAllEntityFields()) {
- if (!TRUE.equals(map.get(CLOB))) continue;
- String column = map.get(DataImporter.COLUMN);
- String srcCol = map.get(RegexTransformer.SRC_COL_NAME);
- if (srcCol == null)
- srcCol = column;
- Object o = aRow.get(srcCol);
- if (o instanceof List) {
- @SuppressWarnings({"unchecked"})
- List<Clob> inputs = (List<Clob>) o;
- List<String> results = new ArrayList<>();
- for (Object input : inputs) {
- if (input instanceof Clob) {
- Clob clob = (Clob) input;
- results.add(readFromClob(clob));
- }
- }
- aRow.put(column, results);
- } else {
- if (o instanceof Clob) {
- Clob clob = (Clob) o;
- aRow.put(column, readFromClob(clob));
- }
- }
- }
- return aRow;
- }
-
- private String readFromClob(Clob clob) {
- Reader reader = FieldReaderDataSource.readCharStream(clob);
- StringBuilder sb = new StringBuilder();
- char[] buf = new char[1024];
- int len;
- try {
- while ((len = reader.read(buf)) != -1) {
- sb.append(buf, 0, len);
- }
- } catch (IOException e) {
- DataImportHandlerException.wrapAndThrow(DataImportHandlerException.SEVERE, e);
- }
- return sb.toString();
- }
-
- public static final String CLOB = "clob";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ConfigParseUtil.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ConfigParseUtil.java
deleted file mode 100644
index 179df23..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ConfigParseUtil.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.w3c.dom.Element;
-import org.w3c.dom.NamedNodeMap;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-
-public class ConfigParseUtil {
- public static String getStringAttribute(Element e, String name, String def) {
- String r = e.getAttribute(name);
- if (r == null || "".equals(r.trim()))
- r = def;
- return r;
- }
-
- public static HashMap<String, String> getAllAttributes(Element e) {
- HashMap<String, String> m = new HashMap<>();
- NamedNodeMap nnm = e.getAttributes();
- for (int i = 0; i < nnm.getLength(); i++) {
- m.put(nnm.item(i).getNodeName(), nnm.item(i).getNodeValue());
- }
- return m;
- }
-
- public static String getText(Node elem, StringBuilder buffer) {
- if (elem.getNodeType() != Node.CDATA_SECTION_NODE) {
- NodeList childs = elem.getChildNodes();
- for (int i = 0; i < childs.getLength(); i++) {
- Node child = childs.item(i);
- short childType = child.getNodeType();
- if (childType != Node.COMMENT_NODE
- && childType != Node.PROCESSING_INSTRUCTION_NODE) {
- getText(child, buffer);
- }
- }
- } else {
- buffer.append(elem.getNodeValue());
- }
-
- return buffer.toString();
- }
-
- public static List<Element> getChildNodes(Element e, String byName) {
- List<Element> result = new ArrayList<>();
- NodeList l = e.getChildNodes();
- for (int i = 0; i < l.getLength(); i++) {
- if (e.equals(l.item(i).getParentNode())
- && byName.equals(l.item(i).getNodeName()))
- result.add((Element) l.item(i));
- }
- return result;
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java
deleted file mode 100644
index 4482160..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.util.ContentStream;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.util.Properties;
-
-/**
- * A DataSource implementation which reads from the ContentStream of a POST request
- * <p>
- * Refer to <a href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
- * for more details.
- * <p>
- * <b>This API is experimental and may change in the future.</b>
- *
- * @since solr 1.4
- */
-public class ContentStreamDataSource extends DataSource<Reader> {
- private ContextImpl context;
- private ContentStream contentStream;
- private Reader reader;
-
- @Override
- public void init(Context context, Properties initProps) {
- this.context = (ContextImpl) context;
- }
-
- @Override
- public Reader getData(String query) {
- contentStream = context.getDocBuilder().getReqParams().getContentStream();
- if (contentStream == null)
- throw new DataImportHandlerException(SEVERE, "No stream available. The request has no body");
- try {
- return reader = contentStream.getReader();
- } catch (IOException e) {
- DataImportHandlerException.wrapAndThrow(SEVERE, e);
- return null;
- }
- }
-
- @Override
- public void close() {
- if (contentStream != null) {
- try {
- if (reader == null) reader = contentStream.getReader();
- reader.close();
- } catch (IOException e) {
- }
- }
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Context.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Context.java
deleted file mode 100644
index 70dbbcb..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Context.java
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.core.SolrCore;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- * <p>
- * This abstract class gives access to all available objects. So any
- * component implemented by a user can have the full power of DataImportHandler
- * </p>
- * <p>
- * Refer to <a
- * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
- * for more details.
- * </p>
- * <p>
- * <b>This API is experimental and subject to change</b>
- *
- * @since solr 1.3
- */
-public abstract class Context {
- public static final String FULL_DUMP = "FULL_DUMP", DELTA_DUMP = "DELTA_DUMP", FIND_DELTA = "FIND_DELTA";
-
- /**
- * An object stored in entity scope is valid only for the current entity for the current document only.
- */
- public static final String SCOPE_ENTITY = "entity";
-
- /**
- * An object stored in global scope is available for the current import only but across entities and documents.
- */
- public static final String SCOPE_GLOBAL = "global";
-
- /**
- * An object stored in document scope is available for the current document only but across entities.
- */
- public static final String SCOPE_DOC = "document";
-
- /**
- * An object stored in 'solrcore' scope is available across imports, entities and documents throughout the life of
- * a solr core. A solr core unload or reload will destroy this data.
- */
- public static final String SCOPE_SOLR_CORE = "solrcore";
-
- /**
- * Get the value of any attribute put into this entity
- *
- * @param name name of the attribute eg: 'name'
- * @return value of named attribute in entity
- */
- public abstract String getEntityAttribute(String name);
-
- /**
- * Get the value of any attribute put into this entity after resolving all variables found in the attribute value
- * @param name name of the attribute
- * @return value of the named attribute after resolving all variables
- */
- public abstract String getResolvedEntityAttribute(String name);
-
- /**
- * Returns all the fields put into an entity. each item (which is a map ) in
- * the list corresponds to one field. each if the map contains the attribute
- * names and values in a field
- *
- * @return all fields in an entity
- */
- public abstract List<Map<String, String>> getAllEntityFields();
-
- /**
- * Returns the VariableResolver used in this entity which can be used to
- * resolve the tokens in ${<namespce.name>}
- *
- * @return a VariableResolver instance
- * @see org.apache.solr.handler.dataimport.VariableResolver
- */
-
- public abstract VariableResolver getVariableResolver();
-
- /**
- * Gets the datasource instance defined for this entity. Do not close() this instance.
- * Transformers should use the getDataSource(String name) method.
- *
- * @return a new DataSource instance as configured for the current entity
- * @see org.apache.solr.handler.dataimport.DataSource
- * @see #getDataSource(String)
- */
- @SuppressWarnings({"rawtypes"})
- public abstract DataSource getDataSource();
-
- /**
- * Gets a new DataSource instance with a name. Ensure that you close() this after use
- * because this is created just for this method call.
- *
- * @param name Name of the dataSource as defined in the dataSource tag
- * @return a new DataSource instance
- * @see org.apache.solr.handler.dataimport.DataSource
- */
- @SuppressWarnings({"rawtypes"})
- public abstract DataSource getDataSource(String name);
-
- /**
- * Returns the instance of EntityProcessor used for this entity
- *
- * @return instance of EntityProcessor used for the current entity
- * @see org.apache.solr.handler.dataimport.EntityProcessor
- */
- public abstract EntityProcessor getEntityProcessor();
-
- /**
- * Store values in a certain name and scope (entity, document,global)
- *
- * @param name the key
- * @param val the value
- * @param scope the scope in which the given key, value pair is to be stored
- */
- public abstract void setSessionAttribute(String name, Object val, String scope);
-
- /**
- * get a value by name in the given scope (entity, document,global)
- *
- * @param name the key
- * @param scope the scope from which the value is to be retrieved
- * @return the object stored in the given scope with the given key
- */
- public abstract Object getSessionAttribute(String name, String scope);
-
- /**
- * Get the context instance for the parent entity. works only in the full dump
- * If the current entity is rootmost a null is returned
- *
- * @return parent entity's Context
- */
- public abstract Context getParentContext();
-
- /**
- * The request parameters passed over HTTP for this command the values in the
- * map are either String(for single valued parameters) or List<String> (for
- * multi-valued parameters)
- *
- * @return the request parameters passed in the URL to initiate this process
- */
- public abstract Map<String, Object> getRequestParameters();
-
- /**
- * Returns if the current entity is the root entity
- *
- * @return true if current entity is the root entity, false otherwise
- */
- public abstract boolean isRootEntity();
-
- /**
- * Returns the current process FULL_DUMP, DELTA_DUMP, FIND_DELTA
- *
- * @return the type of the current running process
- */
- public abstract String currentProcess();
-
- /**
- * Exposing the actual SolrCore to the components
- *
- * @return the core
- */
- public abstract SolrCore getSolrCore();
-
- /**
- * Makes available some basic running statistics such as "docCount",
- * "deletedDocCount", "rowCount", "queryCount" and "skipDocCount"
- *
- * @return a Map containing running statistics of the current import
- */
- public abstract Map<String, Object> getStats();
-
- /**
- * Returns the text specified in the script tag in the data-config.xml
- */
- public abstract String getScript();
-
- /**
- * Returns the language of the script as specified in the script tag in data-config.xml
- */
- public abstract String getScriptLanguage();
-
- /**delete a document by id
- */
- public abstract void deleteDoc(String id);
-
- /**delete documents by query
- */
- public abstract void deleteDocByQuery(String query);
-
- /**Use this directly to resolve variable
- * @param var the variable name
- * @return the resolved value
- */
- public abstract Object resolve(String var);
-
- /** Resolve variables in a template
- *
- * @return The string w/ variables resolved
- */
- public abstract String replaceTokens(String template);
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java
deleted file mode 100644
index 3d9f386..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.dataimport.config.Script;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * <p>
- * An implementation for the Context
- * </p>
- * <b>This API is experimental and subject to change</b>
- *
- * @since solr 1.3
- */
-public class ContextImpl extends Context {
- protected EntityProcessorWrapper epw;
-
- private ContextImpl parent;
-
- private VariableResolver resolver;
-
- @SuppressWarnings({"rawtypes"})
- private DataSource ds;
-
- private String currProcess;
-
- private Map<String, Object> requestParams;
-
- private DataImporter dataImporter;
-
- private Map<String, Object> entitySession, globalSession;
-
- private Exception lastException = null;
-
- DocBuilder.DocWrapper doc;
-
- DocBuilder docBuilder;
-
-
-
- public ContextImpl(EntityProcessorWrapper epw, VariableResolver resolver,
- @SuppressWarnings({"rawtypes"})DataSource ds, String currProcess,
- Map<String, Object> global, ContextImpl parentContext, DocBuilder docBuilder) {
- this.epw = epw;
- this.docBuilder = docBuilder;
- this.resolver = resolver;
- this.ds = ds;
- this.currProcess = currProcess;
- if (docBuilder != null) {
- this.requestParams = docBuilder.getReqParams().getRawParams();
- dataImporter = docBuilder.dataImporter;
- }
- globalSession = global;
- parent = parentContext;
- }
-
- @Override
- public String getEntityAttribute(String name) {
- return epw==null || epw.getEntity() == null ? null : epw.getEntity().getAllAttributes().get(name);
- }
-
- @Override
- public String getResolvedEntityAttribute(String name) {
- return epw==null || epw.getEntity() == null ? null : resolver.replaceTokens(epw.getEntity().getAllAttributes().get(name));
- }
-
- @Override
- public List<Map<String, String>> getAllEntityFields() {
- return epw==null || epw.getEntity() == null ? Collections.emptyList() : epw.getEntity().getAllFieldsList();
- }
-
- @Override
- public VariableResolver getVariableResolver() {
- return resolver;
- }
-
- @Override
- @SuppressWarnings({"rawtypes"})
- public DataSource getDataSource() {
- if (ds != null) return ds;
- if(epw==null) { return null; }
- if (epw!=null && epw.getDatasource() == null) {
- epw.setDatasource(dataImporter.getDataSourceInstance(epw.getEntity(), epw.getEntity().getDataSourceName(), this));
- }
- if (epw!=null && epw.getDatasource() != null && docBuilder != null && docBuilder.verboseDebug &&
- Context.FULL_DUMP.equals(currentProcess())) {
- //debug is not yet implemented properly for deltas
- epw.setDatasource(docBuilder.getDebugLogger().wrapDs(epw.getDatasource()));
- }
- return epw.getDatasource();
- }
-
- @Override
- @SuppressWarnings({"rawtypes"})
- public DataSource getDataSource(String name) {
- return dataImporter.getDataSourceInstance(epw==null ? null : epw.getEntity(), name, this);
- }
-
- @Override
- public boolean isRootEntity() {
- return epw==null ? false : epw.getEntity().isDocRoot();
- }
-
- @Override
- public String currentProcess() {
- return currProcess;
- }
-
- @Override
- public Map<String, Object> getRequestParameters() {
- return requestParams;
- }
-
- @Override
- public EntityProcessor getEntityProcessor() {
- return epw;
- }
-
- @Override
- public void setSessionAttribute(String name, Object val, String scope) {
- if(name == null) {
- return;
- }
- if (Context.SCOPE_ENTITY.equals(scope)) {
- if (entitySession == null) {
- entitySession = new HashMap<>();
- }
- entitySession.put(name, val);
- } else if (Context.SCOPE_GLOBAL.equals(scope)) {
- if (globalSession != null) {
- globalSession.put(name, val);
- }
- } else if (Context.SCOPE_DOC.equals(scope)) {
- DocBuilder.DocWrapper doc = getDocument();
- if (doc != null) {
- doc.setSessionAttribute(name, val);
- }
- } else if (SCOPE_SOLR_CORE.equals(scope)){
- if(dataImporter != null) {
- dataImporter.putToCoreScopeSession(name, val);
- }
- }
- }
-
- @Override
- public Object getSessionAttribute(String name, String scope) {
- if (Context.SCOPE_ENTITY.equals(scope)) {
- if (entitySession == null)
- return null;
- return entitySession.get(name);
- } else if (Context.SCOPE_GLOBAL.equals(scope)) {
- if (globalSession != null) {
- return globalSession.get(name);
- }
- } else if (Context.SCOPE_DOC.equals(scope)) {
- DocBuilder.DocWrapper doc = getDocument();
- return doc == null ? null: doc.getSessionAttribute(name);
- } else if (SCOPE_SOLR_CORE.equals(scope)){
- return dataImporter == null ? null : dataImporter.getFromCoreScopeSession(name);
- }
- return null;
- }
-
- @Override
- public Context getParentContext() {
- return parent;
- }
-
- private DocBuilder.DocWrapper getDocument() {
- ContextImpl c = this;
- while (true) {
- if (c.doc != null)
- return c.doc;
- if (c.parent != null)
- c = c.parent;
- else
- return null;
- }
- }
-
- void setDoc(DocBuilder.DocWrapper docWrapper) {
- this.doc = docWrapper;
- }
-
-
- @Override
- public SolrCore getSolrCore() {
- return dataImporter == null ? null : dataImporter.getCore();
- }
-
-
- @Override
- public Map<String, Object> getStats() {
- return docBuilder != null ? docBuilder.importStatistics.getStatsSnapshot() : Collections.<String, Object>emptyMap();
- }
-
- @Override
- public String getScript() {
- if (dataImporter != null) {
- Script script = dataImporter.getConfig().getScript();
- return script == null ? null : script.getText();
- }
- return null;
- }
-
- @Override
- public String getScriptLanguage() {
- if (dataImporter != null) {
- Script script = dataImporter.getConfig().getScript();
- return script == null ? null : script.getLanguage();
- }
- return null;
- }
-
- @Override
- public void deleteDoc(String id) {
- if(docBuilder != null){
- docBuilder.writer.deleteDoc(id);
- }
- }
-
- @Override
- public void deleteDocByQuery(String query) {
- if(docBuilder != null){
- docBuilder.writer.deleteByQuery(query);
- }
- }
-
- DocBuilder getDocBuilder(){
- return docBuilder;
- }
- @Override
- public Object resolve(String var) {
- return resolver.resolve(var);
- }
-
- @Override
- public String replaceTokens(String template) {
- return resolver.replaceTokens(template);
- }
-
- public Exception getLastException() { return lastException; }
-
- public void setLastException(Exception lastException) {this.lastException = lastException; }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCache.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCache.java
deleted file mode 100644
index a67b3e4..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCache.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.util.Iterator;
-import java.util.Map;
-
-/**
- * <p>
- * A cache that allows a DIH entity's data to persist locally prior being joined
- * to other data and/or indexed.
- * </p>
- *
- * @lucene.experimental
- */
-public interface DIHCache extends Iterable<Map<String,Object>> {
-
- /**
- * <p>
- * Opens the cache using the specified properties. The {@link Context}
- * includes any parameters needed by the cache impl. This must be called
- * before any read/write operations are permitted.
- */
- void open(Context context);
-
- /**
- * <p>
- * Releases resources used by this cache, if possible. The cache is flushed
- * but not destroyed.
- * </p>
- */
- void close();
-
- /**
- * <p>
- * Persists any pending data to the cache
- * </p>
- */
- void flush();
-
- /**
- * <p>
- * Closes the cache, if open. Then removes all data, possibly removing the
- * cache entirely from persistent storage.
- * </p>
- */
- public void destroy();
-
- /**
- * <p>
- * Adds a document. If a document already exists with the same key, both
- * documents will exist in the cache, as the cache allows duplicate keys. To
- * update a key's documents, first call delete(Object key).
- * </p>
- */
- void add(Map<String, Object> rec);
-
- /**
- * <p>
- * Returns an iterator, allowing callers to iterate through the entire cache
- * in key, then insertion, order.
- * </p>
- */
- @Override
- Iterator<Map<String,Object>> iterator();
-
- /**
- * <p>
- * Returns an iterator, allowing callers to iterate through all documents that
- * match the given key in insertion order.
- * </p>
- */
- Iterator<Map<String,Object>> iterator(Object key);
-
- /**
- * <p>
- * Delete all documents associated with the given key
- * </p>
- */
- void delete(Object key);
-
- /**
- * <p>
- * Delete all data from the cache,leaving the empty cache intact.
- * </p>
- */
- void deleteAll();
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCacheSupport.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCacheSupport.java
deleted file mode 100644
index 2f3d957..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCacheSupport.java
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-
-import java.lang.invoke.MethodHandles;
-import java.lang.reflect.Constructor;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-
-import org.apache.solr.common.SolrException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DIHCacheSupport {
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- private String cacheForeignKey;
- private String cacheImplName;
- private Map<String,DIHCache> queryVsCache = new HashMap<>();
- private Map<String,Iterator<Map<String,Object>>> queryVsCacheIterator;
- private Iterator<Map<String,Object>> dataSourceRowCache;
- private boolean cacheDoKeyLookup;
-
- public DIHCacheSupport(Context context, String cacheImplName) {
- this.cacheImplName = cacheImplName;
-
- Relation r = new Relation(context);
- cacheDoKeyLookup = r.doKeyLookup;
- String cacheKey = r.primaryKey;
- cacheForeignKey = r.foreignKey;
-
- context.setSessionAttribute(DIHCacheSupport.CACHE_PRIMARY_KEY, cacheKey,
- Context.SCOPE_ENTITY);
- context.setSessionAttribute(DIHCacheSupport.CACHE_FOREIGN_KEY, cacheForeignKey,
- Context.SCOPE_ENTITY);
- context.setSessionAttribute(DIHCacheSupport.CACHE_DELETE_PRIOR_DATA,
- "true", Context.SCOPE_ENTITY);
- context.setSessionAttribute(DIHCacheSupport.CACHE_READ_ONLY, "false",
- Context.SCOPE_ENTITY);
- }
-
- static class Relation{
- protected final boolean doKeyLookup;
- protected final String foreignKey;
- protected final String primaryKey;
-
- public Relation(Context context) {
- String where = context.getEntityAttribute("where");
- String cacheKey = context.getEntityAttribute(DIHCacheSupport.CACHE_PRIMARY_KEY);
- String lookupKey = context.getEntityAttribute(DIHCacheSupport.CACHE_FOREIGN_KEY);
- if (cacheKey != null && lookupKey == null) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
- "'cacheKey' is specified for the entity "
- + context.getEntityAttribute("name")
- + " but 'cacheLookup' is missing");
-
- }
- if (where == null && cacheKey == null) {
- doKeyLookup = false;
- primaryKey = null;
- foreignKey = null;
- } else {
- if (where != null) {
- String[] splits = where.split("=");
- primaryKey = splits[0];
- foreignKey = splits[1].trim();
- } else {
- primaryKey = cacheKey;
- foreignKey = lookupKey;
- }
- doKeyLookup = true;
- }
- }
-
- @Override
- public String toString() {
- return "Relation "
- + primaryKey + "="+foreignKey ;
- }
-
-
- }
-
- private DIHCache instantiateCache(Context context) {
- DIHCache cache = null;
- try {
- @SuppressWarnings("unchecked")
- Class<DIHCache> cacheClass = DocBuilder.loadClass(cacheImplName, context
- .getSolrCore());
- Constructor<DIHCache> constr = cacheClass.getConstructor();
- cache = constr.newInstance();
- cache.open(context);
- } catch (Exception e) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
- "Unable to load Cache implementation:" + cacheImplName, e);
- }
- return cache;
- }
-
- public void initNewParent(Context context) {
- dataSourceRowCache = null;
- queryVsCacheIterator = new HashMap<>();
- for (Map.Entry<String,DIHCache> entry : queryVsCache.entrySet()) {
- queryVsCacheIterator.put(entry.getKey(), entry.getValue().iterator());
- }
- }
-
- public void destroyAll() {
- if (queryVsCache != null) {
- for (DIHCache cache : queryVsCache.values()) {
- cache.destroy();
- }
- }
- queryVsCache = null;
- dataSourceRowCache = null;
- cacheForeignKey = null;
- }
-
- /**
- * <p>
- * Get all the rows from the datasource for the given query and cache them
- * </p>
- */
- public void populateCache(String query,
- Iterator<Map<String,Object>> rowIterator) {
- Map<String,Object> aRow = null;
- DIHCache cache = queryVsCache.get(query);
- while ((aRow = getNextFromCache(query, rowIterator)) != null) {
- cache.add(aRow);
- }
- }
-
- private Map<String,Object> getNextFromCache(String query,
- Iterator<Map<String,Object>> rowIterator) {
- try {
- if (rowIterator == null) return null;
- if (rowIterator.hasNext()) return rowIterator.next();
- return null;
- } catch (Exception e) {
- SolrException.log(log, "getNextFromCache() failed for query '" + query
- + "'", e);
- wrapAndThrow(DataImportHandlerException.WARN, e);
- return null;
- }
- }
-
- public Map<String,Object> getCacheData(Context context, String query,
- Iterator<Map<String,Object>> rowIterator) {
- if (cacheDoKeyLookup) {
- return getIdCacheData(context, query, rowIterator);
- } else {
- return getSimpleCacheData(context, query, rowIterator);
- }
- }
-
- /**
- * If the where clause is present the cache is sql Vs Map of key Vs List of
- * Rows.
- *
- * @param query
- * the query string for which cached data is to be returned
- *
- * @return the cached row corresponding to the given query after all variables
- * have been resolved
- */
- protected Map<String,Object> getIdCacheData(Context context, String query,
- Iterator<Map<String,Object>> rowIterator) {
- Object key = context.resolve(cacheForeignKey);
- if (key == null) {
- throw new DataImportHandlerException(DataImportHandlerException.WARN,
- "The cache lookup value : " + cacheForeignKey
- + " is resolved to be null in the entity :"
- + context.getEntityAttribute("name"));
-
- }
- if (dataSourceRowCache == null) {
- DIHCache cache = queryVsCache.get(query);
-
- if (cache == null) {
- cache = instantiateCache(context);
- queryVsCache.put(query, cache);
- populateCache(query, rowIterator);
- }
- dataSourceRowCache = cache.iterator(key);
- }
- return getFromRowCacheTransformed();
- }
-
- /**
- * If where clause is not present the cache is a Map of query vs List of Rows.
- *
- * @param query
- * string for which cached row is to be returned
- *
- * @return the cached row corresponding to the given query
- */
- protected Map<String,Object> getSimpleCacheData(Context context,
- String query, Iterator<Map<String,Object>> rowIterator) {
- if (dataSourceRowCache == null) {
- DIHCache cache = queryVsCache.get(query);
- if (cache == null) {
- cache = instantiateCache(context);
- queryVsCache.put(query, cache);
- populateCache(query, rowIterator);
- queryVsCacheIterator.put(query, cache.iterator());
- }
- Iterator<Map<String,Object>> cacheIter = queryVsCacheIterator.get(query);
- dataSourceRowCache = cacheIter;
- }
-
- return getFromRowCacheTransformed();
- }
-
- protected Map<String,Object> getFromRowCacheTransformed() {
- if (dataSourceRowCache == null || !dataSourceRowCache.hasNext()) {
- dataSourceRowCache = null;
- return null;
- }
- Map<String,Object> r = dataSourceRowCache.next();
- return r;
- }
-
- /**
- * <p>
- * Specify the class for the cache implementation
- * </p>
- */
- public static final String CACHE_IMPL = "cacheImpl";
-
- /**
- * <p>
- * If the cache supports persistent data, set to "true" to delete any prior
- * persisted data before running the entity.
- * </p>
- */
-
- public static final String CACHE_DELETE_PRIOR_DATA = "cacheDeletePriorData";
- /**
- * <p>
- * Specify the Foreign Key from the parent entity to join on. Use if the cache
- * is on a child entity.
- * </p>
- */
- public static final String CACHE_FOREIGN_KEY = "cacheLookup";
-
- /**
- * <p>
- * Specify the Primary Key field from this Entity to map the input records
- * with
- * </p>
- */
- public static final String CACHE_PRIMARY_KEY = "cacheKey";
- /**
- * <p>
- * If true, a pre-existing cache is re-opened for read-only access.
- * </p>
- */
- public static final String CACHE_READ_ONLY = "cacheReadOnly";
-
-
-
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHLogLevels.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHLogLevels.java
deleted file mode 100644
index 24732d1..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHLogLevels.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-public enum DIHLogLevels {
- START_ENTITY, END_ENTITY, TRANSFORMED_ROW, ENTITY_META, PRE_TRANSFORMER_ROW, START_DOC, END_DOC, ENTITY_OUT, ROW_END, TRANSFORMER_EXCEPTION, ENTITY_EXCEPTION, DISABLE_LOGGING, ENABLE_LOGGING, NONE
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java
deleted file mode 100644
index f51ef07..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHProperties.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.util.Date;
-import java.util.Map;
-
-/**
- * Implementations write out properties about the last data import
- * for use by the next import. ex: to persist the last import timestamp
- * so that future delta imports can know what needs to be updated.
- *
- * @lucene.experimental
- */
-public abstract class DIHProperties {
-
- public abstract void init(DataImporter dataImporter, Map<String, String> initParams);
-
- public abstract boolean isWritable();
-
- public abstract void persist(Map<String, Object> props);
-
- public abstract Map<String, Object> readIndexerProperties();
-
- public abstract String convertDateToString(Date d);
-
- public Date getCurrentTimestamp() {
- return new Date();
- }
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriter.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriter.java
deleted file mode 100644
index bdb988d..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriter.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.solr.common.SolrInputDocument;
-
-/**
- * @lucene.experimental
- *
- */
-public interface DIHWriter {
-
- /**
- * <p>
- * If this writer supports transactions or commit points, then commit any changes,
- * optionally optimizing the data for read/write performance
- * </p>
- */
- public void commit(boolean optimize);
-
- /**
- * <p>
- * Release resources used by this writer. After calling close, reads & updates will throw exceptions.
- * </p>
- */
- public void close();
-
- /**
- * <p>
- * If this writer supports transactions or commit points, then roll back any uncommitted changes.
- * </p>
- */
- public void rollback();
-
- /**
- * <p>
- * Delete from the writer's underlying data store based the passed-in writer-specific query. (Optional Operation)
- * </p>
- */
- public void deleteByQuery(String q);
-
- /**
- * <p>
- * Delete everything from the writer's underlying data store
- * </p>
- */
- public void doDeleteAll();
-
- /**
- * <p>
- * Delete from the writer's underlying data store based on the passed-in Primary Key
- * </p>
- */
- public void deleteDoc(Object key);
-
-
-
- /**
- * <p>
- * Add a document to this writer's underlying data store.
- * </p>
- * @return true on success, false on failure
- */
- public boolean upload(SolrInputDocument doc);
-
-
-
- /**
- * <p>
- * Provide context information for this writer. init() should be called before using the writer.
- * </p>
- */
- public void init(Context context) ;
-
-
- /**
- * <p>
- * Specify the keys to be modified by a delta update (required by writers that can store duplicate keys)
- * </p>
- */
- public void setDeltaKeys(Set<Map<String, Object>> deltaKeys) ;
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriterBase.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriterBase.java
deleted file mode 100644
index 43e92c3..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHWriterBase.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
-public abstract class DIHWriterBase implements DIHWriter {
- protected String keyFieldName;
- protected Set<Object> deltaKeys = null;
-
- @Override
- public void setDeltaKeys(Set<Map<String,Object>> passedInDeltaKeys) {
- deltaKeys = new HashSet<>();
- for (Map<String,Object> aMap : passedInDeltaKeys) {
- if (aMap.size() > 0) {
- Object key = null;
- if (keyFieldName != null) {
- key = aMap.get(keyFieldName);
- } else {
- key = aMap.entrySet().iterator().next();
- }
- if (key != null) {
- deltaKeys.add(key);
- }
- }
- }
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java
deleted file mode 100644
index 278de7d..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.lang.invoke.MethodHandles;
-import java.lang.reflect.Constructor;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.params.CommonParams;
-import org.apache.solr.common.params.MapSolrParams;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.ContentStream;
-import org.apache.solr.common.util.ContentStreamBase;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.StrUtils;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.core.SolrResourceLoader;
-import org.apache.solr.handler.RequestHandlerBase;
-import org.apache.solr.metrics.MetricsMap;
-import org.apache.solr.metrics.SolrMetricsContext;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.RawResponseWriter;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.update.processor.UpdateRequestProcessor;
-import org.apache.solr.update.processor.UpdateRequestProcessorChain;
-import org.apache.solr.util.plugin.SolrCoreAware;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static org.apache.solr.handler.dataimport.DataImporter.IMPORT_CMD;
-
-/**
- * <p>
- * Solr Request Handler for data import from databases and REST data sources.
- * </p>
- * <p>
- * It is configured in solrconfig.xml
- * </p>
- * <p>
- * Refer to <a
- * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
- * for more details.
- * </p>
- * <p>
- * <b>This API is experimental and subject to change</b>
- *
- * @deprecated since 8.6
- * @since solr 1.3
- */
-@Deprecated(since = "8.6")
-public class DataImportHandler extends RequestHandlerBase implements
- SolrCoreAware {
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- private DataImporter importer;
-
- private boolean debugEnabled = true;
-
- private String myName = "dataimport";
-
- private MetricsMap metrics;
-
- private static final String PARAM_WRITER_IMPL = "writerImpl";
- private static final String DEFAULT_WRITER_NAME = "SolrWriter";
- static final String ENABLE_DIH_DATA_CONFIG_PARAM = "enable.dih.dataConfigParam";
-
- final boolean dataConfigParam_enabled = Boolean.getBoolean(ENABLE_DIH_DATA_CONFIG_PARAM);
-
- public DataImporter getImporter() {
- return this.importer;
- }
-
- @Override
-
- public void init(@SuppressWarnings({"rawtypes"})NamedList args) {
- super.init(args);
- Map<String,String> macro = new HashMap<>();
- macro.put("expandMacros", "false");
- defaults = SolrParams.wrapDefaults(defaults, new MapSolrParams(macro));
- log.warn("Data Import Handler is deprecated as of Solr 8.6. See SOLR-14066 for more details.");
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public void inform(SolrCore core) {
- try {
- String name = getPluginInfo().name;
- if (name.startsWith("/")) {
- myName = name.substring(1);
- }
- // some users may have '/' in the handler name. replace with '_'
- myName = myName.replaceAll("/", "_");
- debugEnabled = StrUtils.parseBool((String)initArgs.get(ENABLE_DEBUG), true);
- importer = new DataImporter(core, myName);
- } catch (Exception e) {
- log.error( DataImporter.MSG.LOAD_EXP, e);
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, DataImporter.MSG.LOAD_EXP, e);
- }
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp)
- throws Exception {
- rsp.setHttpCaching(false);
-
- //TODO: figure out why just the first one is OK...
- ContentStream contentStream = null;
- Iterable<ContentStream> streams = req.getContentStreams();
- if(streams != null){
- for (ContentStream stream : streams) {
- contentStream = stream;
- break;
- }
- }
- SolrParams params = req.getParams();
- @SuppressWarnings({"rawtypes"})
- NamedList defaultParams = (NamedList) initArgs.get("defaults");
- RequestInfo requestParams = new RequestInfo(req, getParamsMap(params), contentStream);
- String command = requestParams.getCommand();
-
- if (DataImporter.SHOW_CONF_CMD.equals(command)) {
- String dataConfigFile = params.get("config");
- String dataConfig = params.get("dataConfig"); // needn't check dataConfigParam_enabled; we don't execute it
- if(dataConfigFile != null) {
- dataConfig = SolrWriter.getResourceAsString(req.getCore().getResourceLoader().openResource(dataConfigFile));
- }
- if(dataConfig==null) {
- rsp.add("status", DataImporter.MSG.NO_CONFIG_FOUND);
- } else {
- // Modify incoming request params to add wt=raw
- ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams());
- rawParams.set(CommonParams.WT, "raw");
- req.setParams(rawParams);
- ContentStreamBase content = new ContentStreamBase.StringStream(dataConfig);
- rsp.add(RawResponseWriter.CONTENT, content);
- }
- return;
- }
-
- if (params.get("dataConfig") != null && dataConfigParam_enabled == false) {
- throw new SolrException(SolrException.ErrorCode.FORBIDDEN,
- "Use of the dataConfig param (DIH debug mode) requires the system property " +
- ENABLE_DIH_DATA_CONFIG_PARAM + " because it's a security risk.");
- }
-
- rsp.add("initArgs", initArgs);
- String message = "";
-
- if (command != null) {
- rsp.add("command", command);
- }
- // If importer is still null
- if (importer == null) {
- rsp.add("status", DataImporter.MSG.NO_INIT);
- return;
- }
-
- if (command != null && DataImporter.ABORT_CMD.equals(command)) {
- importer.runCmd(requestParams, null);
- } else if (importer.isBusy()) {
- message = DataImporter.MSG.CMD_RUNNING;
- } else if (command != null) {
- if (DataImporter.FULL_IMPORT_CMD.equals(command)
- || DataImporter.DELTA_IMPORT_CMD.equals(command) ||
- IMPORT_CMD.equals(command)) {
- importer.maybeReloadConfiguration(requestParams, defaultParams);
- UpdateRequestProcessorChain processorChain =
- req.getCore().getUpdateProcessorChain(params);
- UpdateRequestProcessor processor = processorChain.createProcessor(req, rsp);
- SolrResourceLoader loader = req.getCore().getResourceLoader();
- DIHWriter sw = getSolrWriter(processor, loader, requestParams, req);
-
- if (requestParams.isDebug()) {
- if (debugEnabled) {
- // Synchronous request for the debug mode
- importer.runCmd(requestParams, sw);
- rsp.add("mode", "debug");
- rsp.add("documents", requestParams.getDebugInfo().debugDocuments);
- if (requestParams.getDebugInfo().debugVerboseOutput != null) {
- rsp.add("verbose-output", requestParams.getDebugInfo().debugVerboseOutput);
- }
- } else {
- message = DataImporter.MSG.DEBUG_NOT_ENABLED;
- }
- } else {
- // Asynchronous request for normal mode
- if(requestParams.getContentStream() == null && !requestParams.isSyncMode()){
- importer.runAsync(requestParams, sw);
- } else {
- importer.runCmd(requestParams, sw);
- }
- }
- } else if (DataImporter.RELOAD_CONF_CMD.equals(command)) {
- if(importer.maybeReloadConfiguration(requestParams, defaultParams)) {
- message = DataImporter.MSG.CONFIG_RELOADED;
- } else {
- message = DataImporter.MSG.CONFIG_NOT_RELOADED;
- }
- }
- }
- rsp.add("status", importer.isBusy() ? "busy" : "idle");
- rsp.add("importResponse", message);
- rsp.add("statusMessages", importer.getStatusMessages());
- }
-
- /** The value is converted to a String or {@code List<String>} if multi-valued. */
- private Map<String, Object> getParamsMap(SolrParams params) {
- Map<String, Object> result = new HashMap<>();
- for (Map.Entry<String, String[]> pair : params){
- String s = pair.getKey();
- String[] val = pair.getValue();
- if (val == null || val.length < 1)
- continue;
- if (val.length == 1)
- result.put(s, val[0]);
- else
- result.put(s, Arrays.asList(val));
- }
- return result;
- }
-
- private DIHWriter getSolrWriter(final UpdateRequestProcessor processor,
- final SolrResourceLoader loader, final RequestInfo requestParams,
- SolrQueryRequest req) {
- SolrParams reqParams = req.getParams();
- String writerClassStr = null;
- if (reqParams != null && reqParams.get(PARAM_WRITER_IMPL) != null) {
- writerClassStr = reqParams.get(PARAM_WRITER_IMPL);
- }
- DIHWriter writer;
- if (writerClassStr != null
- && !writerClassStr.equals(DEFAULT_WRITER_NAME)
- && !writerClassStr.equals(DocBuilder.class.getPackage().getName() + "."
- + DEFAULT_WRITER_NAME)) {
- try {
- @SuppressWarnings("unchecked")
- Class<DIHWriter> writerClass = DocBuilder.loadClass(writerClassStr, req.getCore());
- @SuppressWarnings({"rawtypes"})
- Constructor<DIHWriter> cnstr = writerClass.getConstructor(new Class[] {
- UpdateRequestProcessor.class, SolrQueryRequest.class});
- return cnstr.newInstance((Object) processor, (Object) req);
- } catch (Exception e) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
- "Unable to load Writer implementation:" + writerClassStr, e);
- }
- } else {
- return new SolrWriter(processor, req) {
- @Override
- public boolean upload(SolrInputDocument document) {
- try {
- return super.upload(document);
- } catch (RuntimeException e) {
- log.error("Exception while adding: {}", document, e);
- return false;
- }
- }
- };
- }
- }
-
- @Override
- public void initializeMetrics(SolrMetricsContext parentContext, String scope) {
- super.initializeMetrics(parentContext, scope);
- metrics = new MetricsMap((detailed, map) -> {
- if (importer != null) {
- DocBuilder.Statistics cumulative = importer.cumulativeStatistics;
-
- map.put("Status", importer.getStatus().toString());
-
- if (importer.docBuilder != null) {
- DocBuilder.Statistics running = importer.docBuilder.importStatistics;
- map.put("Documents Processed", running.docCount);
- map.put("Requests made to DataSource", running.queryCount);
- map.put("Rows Fetched", running.rowsCount);
- map.put("Documents Deleted", running.deletedDocCount);
- map.put("Documents Skipped", running.skipDocCount);
- }
-
- map.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, cumulative.docCount);
- map.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, cumulative.queryCount);
- map.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED, cumulative.rowsCount);
- map.put(DataImporter.MSG.TOTAL_DOCS_DELETED, cumulative.deletedDocCount);
- map.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED, cumulative.skipDocCount);
- }
- });
- solrMetricsContext.gauge(metrics, true, "importer", getCategory().toString(), scope);
- }
-
- // //////////////////////SolrInfoMBeans methods //////////////////////
-
- @Override
- public String getDescription() {
- return DataImporter.MSG.JMX_DESC;
- }
-
- public static final String ENABLE_DEBUG = "enableDebug";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java
deleted file mode 100644
index e69b3fd..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandlerException.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-/**
- * <p> Exception class for all DataImportHandler exceptions </p>
- * <p>
- * <b>This API is experimental and subject to change</b>
- *
- * @since solr 1.3
- */
-public class DataImportHandlerException extends RuntimeException {
- private int errCode;
-
- public boolean debugged = false;
-
- public static final int SEVERE = 500, WARN = 400, SKIP = 300, SKIP_ROW =301;
-
- public DataImportHandlerException(int err) {
- super();
- errCode = err;
- }
-
- public DataImportHandlerException(int err, String message) {
- super(message + (SolrWriter.getDocCount() == null ? "" : MSG + SolrWriter.getDocCount()));
- errCode = err;
- }
-
- public DataImportHandlerException(int err, String message, Throwable cause) {
- super(message + (SolrWriter.getDocCount() == null ? "" : MSG + SolrWriter.getDocCount()), cause);
- errCode = err;
- }
-
- public DataImportHandlerException(int err, Throwable cause) {
- super(cause);
- errCode = err;
- }
-
- public int getErrCode() {
- return errCode;
- }
-
- public static DataImportHandlerException wrapAndThrow(int err, Exception e) {
- if (e instanceof DataImportHandlerException) {
- throw (DataImportHandlerException) e;
- } else {
- throw new DataImportHandlerException(err, e);
- }
- }
-
- public static DataImportHandlerException wrapAndThrow(int err, Exception e, String msg) {
- if (e instanceof DataImportHandlerException) {
- throw (DataImportHandlerException) e;
- } else {
- throw new DataImportHandlerException(err, msg, e);
- }
- }
-
-
- public static final String MSG = " Processing Document # ";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java
deleted file mode 100644
index c5b2f70..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java
+++ /dev/null
@@ -1,628 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.EmptyEntityResolver;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.util.SystemIdResolver;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.XMLErrorLogger;
-import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
-import org.apache.solr.handler.dataimport.config.ConfigParseUtil;
-import org.apache.solr.handler.dataimport.config.DIHConfiguration;
-import org.apache.solr.handler.dataimport.config.Entity;
-import org.apache.solr.handler.dataimport.config.PropertyWriter;
-import org.apache.solr.handler.dataimport.config.Script;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-import static org.apache.solr.handler.dataimport.DocBuilder.loadClass;
-import static org.apache.solr.handler.dataimport.config.ConfigNameConstants.CLASS;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.NodeList;
-import org.xml.sax.InputSource;
-import org.apache.commons.io.IOUtils;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.locks.ReentrantLock;
-
-/**
- * <p> Stores all configuration information for pulling and indexing data. </p>
- * <p>
- * <b>This API is experimental and subject to change</b>
- *
- * @since solr 1.3
- */
-public class DataImporter {
-
- public enum Status {
- IDLE, RUNNING_FULL_DUMP, RUNNING_DELTA_DUMP, JOB_FAILED
- }
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- private static final XMLErrorLogger XMLLOG = new XMLErrorLogger(log);
-
- private Status status = Status.IDLE;
- private DIHConfiguration config;
- private Date indexStartTime;
- private Properties store = new Properties();
- private Map<String, Map<String,String>> requestLevelDataSourceProps = new HashMap<>();
- private IndexSchema schema;
- public DocBuilder docBuilder;
- public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics();
- private SolrCore core;
- private Map<String, Object> coreScopeSession = new ConcurrentHashMap<>();
- private ReentrantLock importLock = new ReentrantLock();
- private boolean isDeltaImportSupported = false;
- private final String handlerName;
-
- /**
- * Only for testing purposes
- */
- DataImporter() {
- this.handlerName = "dataimport" ;
- }
-
- DataImporter(SolrCore core, String handlerName) {
- this.handlerName = handlerName;
- this.core = core;
- this.schema = core.getLatestSchema();
- }
-
-
-
-
- boolean maybeReloadConfiguration(RequestInfo params,
- NamedList<?> defaultParams) throws IOException {
- if (importLock.tryLock()) {
- boolean success = false;
- try {
- if (null != params.getRequest()) {
- if (schema != params.getRequest().getSchema()) {
- schema = params.getRequest().getSchema();
- }
- }
- String dataConfigText = params.getDataConfig();
- String dataconfigFile = params.getConfigFile();
- InputSource is = null;
- if(dataConfigText!=null && dataConfigText.length()>0) {
- is = new InputSource(new StringReader(dataConfigText));
- } else if(dataconfigFile!=null) {
- is = new InputSource(core.getResourceLoader().openResource(dataconfigFile));
- is.setSystemId(SystemIdResolver.createSystemIdFromResourceName(dataconfigFile));
- log.info("Loading DIH Configuration: {}", dataconfigFile);
- }
- if(is!=null) {
- config = loadDataConfig(is);
- success = true;
- }
-
- Map<String,Map<String,String>> dsProps = new HashMap<>();
- if(defaultParams!=null) {
- int position = 0;
- while (position < defaultParams.size()) {
- if (defaultParams.getName(position) == null) {
- break;
- }
- String name = defaultParams.getName(position);
- if (name.equals("datasource")) {
- success = true;
- @SuppressWarnings({"rawtypes"})
- NamedList dsConfig = (NamedList) defaultParams.getVal(position);
- log.info("Getting configuration for Global Datasource...");
- Map<String,String> props = new HashMap<>();
- for (int i = 0; i < dsConfig.size(); i++) {
- props.put(dsConfig.getName(i), dsConfig.getVal(i).toString());
- }
- log.info("Adding properties to datasource: {}", props);
- dsProps.put((String) dsConfig.get("name"), props);
- }
- position++;
- }
- }
- requestLevelDataSourceProps = Collections.unmodifiableMap(dsProps);
- } catch(IOException ioe) {
- throw ioe;
- } finally {
- importLock.unlock();
- }
- return success;
- } else {
- return false;
- }
- }
-
-
-
- public String getHandlerName() {
- return handlerName;
- }
-
- public IndexSchema getSchema() {
- return schema;
- }
-
- /**
- * Used by tests
- */
- void loadAndInit(String configStr) {
- config = loadDataConfig(new InputSource(new StringReader(configStr)));
- }
-
- void loadAndInit(InputSource configFile) {
- config = loadDataConfig(configFile);
- }
-
- public DIHConfiguration loadDataConfig(InputSource configFile) {
-
- DIHConfiguration dihcfg = null;
- try {
- DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
- dbf.setValidating(false);
-
- // only enable xinclude, if XML is coming from safe source (local file)
- // and a a SolrCore and SystemId is present (makes no sense otherwise):
- if (core != null && configFile.getSystemId() != null) {
- try {
- dbf.setXIncludeAware(true);
- dbf.setNamespaceAware(true);
- } catch( UnsupportedOperationException e ) {
- log.warn( "XML parser doesn't support XInclude option" );
- }
- }
-
- DocumentBuilder builder = dbf.newDocumentBuilder();
- // only enable xinclude / external entities, if XML is coming from
- // safe source (local file) and a a SolrCore and SystemId is present:
- if (core != null && configFile.getSystemId() != null) {
- builder.setEntityResolver(new SystemIdResolver(core.getResourceLoader()));
- } else {
- // Don't allow external entities without having a system ID:
- builder.setEntityResolver(EmptyEntityResolver.SAX_INSTANCE);
- }
- builder.setErrorHandler(XMLLOG);
- Document document;
- try {
- document = builder.parse(configFile);
- } finally {
- // some XML parsers are broken and don't close the byte stream (but they should according to spec)
- IOUtils.closeQuietly(configFile.getByteStream());
- }
-
- dihcfg = readFromXml(document);
- log.info("Data Configuration loaded successfully");
- } catch (Exception e) {
- throw new DataImportHandlerException(SEVERE,
- "Data Config problem: " + e.getMessage(), e);
- }
- for (Entity e : dihcfg.getEntities()) {
- if (e.getAllAttributes().containsKey(SqlEntityProcessor.DELTA_QUERY)) {
- isDeltaImportSupported = true;
- break;
- }
- }
- return dihcfg;
- }
-
- public DIHConfiguration readFromXml(Document xmlDocument) {
- DIHConfiguration config;
- List<Map<String, String >> functions = new ArrayList<>();
- Script script = null;
- Map<String, Map<String,String>> dataSources = new HashMap<>();
-
- NodeList dataConfigTags = xmlDocument.getElementsByTagName("dataConfig");
- if(dataConfigTags == null || dataConfigTags.getLength() == 0) {
- throw new DataImportHandlerException(SEVERE, "the root node '<dataConfig>' is missing");
- }
- Element e = (Element) dataConfigTags.item(0);
- List<Element> documentTags = ConfigParseUtil.getChildNodes(e, "document");
- if (documentTags.isEmpty()) {
- throw new DataImportHandlerException(SEVERE, "DataImportHandler " +
- "configuration file must have one <document> node.");
- }
-
- List<Element> scriptTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.SCRIPT);
- if (!scriptTags.isEmpty()) {
- script = new Script(scriptTags.get(0));
- }
-
- // Add the provided evaluators
- List<Element> functionTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.FUNCTION);
- if (!functionTags.isEmpty()) {
- for (Element element : functionTags) {
- String func = ConfigParseUtil.getStringAttribute(element, NAME, null);
- String clz = ConfigParseUtil.getStringAttribute(element, ConfigNameConstants.CLASS, null);
- if (func == null || clz == null){
- throw new DataImportHandlerException(
- SEVERE,
- "<function> must have a 'name' and 'class' attributes");
- } else {
- functions.add(ConfigParseUtil.getAllAttributes(element));
- }
- }
- }
- List<Element> dataSourceTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.DATA_SRC);
- if (!dataSourceTags.isEmpty()) {
- for (Element element : dataSourceTags) {
- Map<String,String> p = new HashMap<>();
- HashMap<String, String> attrs = ConfigParseUtil.getAllAttributes(element);
- for (Map.Entry<String, String> entry : attrs.entrySet()) {
- p.put(entry.getKey(), entry.getValue());
- }
- dataSources.put(p.get("name"), p);
- }
- }
- if(dataSources.get(null) == null){
- for (Map<String,String> properties : dataSources.values()) {
- dataSources.put(null,properties);
- break;
- }
- }
- PropertyWriter pw = null;
- List<Element> propertyWriterTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.PROPERTY_WRITER);
- if (propertyWriterTags.isEmpty()) {
- boolean zookeeper = false;
- if (this.core != null
- && this.core.getCoreContainer().isZooKeeperAware()) {
- zookeeper = true;
- }
- pw = new PropertyWriter(zookeeper ? "ZKPropertiesWriter"
- : "SimplePropertiesWriter", Collections.<String,String> emptyMap());
- } else if (propertyWriterTags.size() > 1) {
- throw new DataImportHandlerException(SEVERE, "Only one "
- + ConfigNameConstants.PROPERTY_WRITER + " can be configured.");
- } else {
- Element pwElement = propertyWriterTags.get(0);
- String type = null;
- Map<String,String> params = new HashMap<>();
- for (Map.Entry<String,String> entry : ConfigParseUtil.getAllAttributes(
- pwElement).entrySet()) {
- if (TYPE.equals(entry.getKey())) {
- type = entry.getValue();
- } else {
- params.put(entry.getKey(), entry.getValue());
- }
- }
- if (type == null) {
- throw new DataImportHandlerException(SEVERE, "The "
- + ConfigNameConstants.PROPERTY_WRITER + " element must specify "
- + TYPE);
- }
- pw = new PropertyWriter(type, params);
- }
- return new DIHConfiguration(documentTags.get(0), this, functions, script, dataSources, pw);
- }
-
- @SuppressWarnings("unchecked")
- private DIHProperties createPropertyWriter() {
- DIHProperties propWriter = null;
- PropertyWriter configPw = config.getPropertyWriter();
- try {
- Class<DIHProperties> writerClass = DocBuilder.loadClass(configPw.getType(), this.core);
- propWriter = writerClass.getConstructor().newInstance();
- propWriter.init(this, configPw.getParameters());
- } catch (Exception e) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Unable to PropertyWriter implementation:" + configPw.getType(), e);
- }
- return propWriter;
- }
-
- public DIHConfiguration getConfig() {
- return config;
- }
-
- Date getIndexStartTime() {
- return indexStartTime;
- }
-
- void setIndexStartTime(Date indextStartTime) {
- this.indexStartTime = indextStartTime;
- }
-
- void store(Object key, Object value) {
- store.put(key, value);
- }
-
- Object retrieve(Object key) {
- return store.get(key);
- }
-
- @SuppressWarnings({"unchecked", "rawtypes"})
- public DataSource getDataSourceInstance(Entity key, String name, Context ctx) {
- Map<String,String> p = requestLevelDataSourceProps.get(name);
- if (p == null)
- p = config.getDataSources().get(name);
- if (p == null)
- p = requestLevelDataSourceProps.get(null);// for default data source
- if (p == null)
- p = config.getDataSources().get(null);
- if (p == null)
- throw new DataImportHandlerException(SEVERE,
- "No dataSource :" + name + " available for entity :" + key.getName());
- String type = p.get(TYPE);
- @SuppressWarnings({"rawtypes"})
- DataSource dataSrc = null;
- if (type == null) {
- dataSrc = new JdbcDataSource();
- } else {
- try {
- dataSrc = (DataSource) DocBuilder.loadClass(type, getCore()).getConstructor().newInstance();
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e, "Invalid type for data source: " + type);
- }
- }
- try {
- Properties copyProps = new Properties();
- copyProps.putAll(p);
- Map<String, Object> map = ctx.getRequestParameters();
- if (map.containsKey("rows")) {
- int rows = Integer.parseInt((String) map.get("rows"));
- if (map.containsKey("start")) {
- rows += Integer.parseInt((String) map.get("start"));
- }
- copyProps.setProperty("maxRows", String.valueOf(rows));
- }
- dataSrc.init(ctx, copyProps);
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e, "Failed to initialize DataSource: " + key.getDataSourceName());
- }
- return dataSrc;
- }
-
- public Status getStatus() {
- return status;
- }
-
- public void setStatus(Status status) {
- this.status = status;
- }
-
- public boolean isBusy() {
- return importLock.isLocked();
- }
-
- public void doFullImport(DIHWriter writer, RequestInfo requestParams) {
- log.info("Starting Full Import");
- setStatus(Status.RUNNING_FULL_DUMP);
- try {
- DIHProperties dihPropWriter = createPropertyWriter();
- setIndexStartTime(dihPropWriter.getCurrentTimestamp());
- docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams);
- checkWritablePersistFile(writer, dihPropWriter);
- docBuilder.execute();
- if (!requestParams.isDebug())
- cumulativeStatistics.add(docBuilder.importStatistics);
- } catch (Exception e) {
- SolrException.log(log, "Full Import failed", e);
- docBuilder.handleError("Full Import failed", e);
- } finally {
- setStatus(Status.IDLE);
- DocBuilder.INSTANCE.set(null);
- }
-
- }
-
- private void checkWritablePersistFile(DIHWriter writer, DIHProperties dihPropWriter) {
- if (isDeltaImportSupported && !dihPropWriter.isWritable()) {
- throw new DataImportHandlerException(SEVERE,
- "Properties is not writable. Delta imports are supported by data config but will not work.");
- }
- }
-
- public void doDeltaImport(DIHWriter writer, RequestInfo requestParams) {
- log.info("Starting Delta Import");
- setStatus(Status.RUNNING_DELTA_DUMP);
- try {
- DIHProperties dihPropWriter = createPropertyWriter();
- setIndexStartTime(dihPropWriter.getCurrentTimestamp());
- docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams);
- checkWritablePersistFile(writer, dihPropWriter);
- docBuilder.execute();
- if (!requestParams.isDebug())
- cumulativeStatistics.add(docBuilder.importStatistics);
- } catch (Exception e) {
- log.error("Delta Import Failed", e);
- docBuilder.handleError("Delta Import Failed", e);
- } finally {
- setStatus(Status.IDLE);
- DocBuilder.INSTANCE.set(null);
- }
-
- }
-
- public void runAsync(final RequestInfo reqParams, final DIHWriter sw) {
- new Thread(() -> runCmd(reqParams, sw)).start();
- }
-
- void runCmd(RequestInfo reqParams, DIHWriter sw) {
- String command = reqParams.getCommand();
- if (command.equals(ABORT_CMD)) {
- if (docBuilder != null) {
- docBuilder.abort();
- }
- return;
- }
- if (!importLock.tryLock()){
- log.warn("Import command failed . another import is running");
- return;
- }
- try {
- if (FULL_IMPORT_CMD.equals(command) || IMPORT_CMD.equals(command)) {
- doFullImport(sw, reqParams);
- } else if (command.equals(DELTA_IMPORT_CMD)) {
- doDeltaImport(sw, reqParams);
- }
- } finally {
- importLock.unlock();
- }
- }
-
- @SuppressWarnings("unchecked")
- Map<String, String> getStatusMessages() {
- //this map object is a Collections.synchronizedMap(new LinkedHashMap()). if we
- // synchronize on the object it must be safe to iterate through the map
- @SuppressWarnings({"rawtypes"})
- Map statusMessages = (Map) retrieve(STATUS_MSGS);
- Map<String, String> result = new LinkedHashMap<>();
- if (statusMessages != null) {
- synchronized (statusMessages) {
- for (Object o : statusMessages.entrySet()) {
- @SuppressWarnings({"rawtypes"})
- Map.Entry e = (Map.Entry) o;
- //the toString is taken because some of the Objects create the data lazily when toString() is called
- result.put((String) e.getKey(), e.getValue().toString());
- }
- }
- }
- return result;
-
- }
-
- public DocBuilder getDocBuilder() {
- return docBuilder;
- }
-
- public DocBuilder getDocBuilder(DIHWriter writer, RequestInfo requestParams) {
- DIHProperties dihPropWriter = createPropertyWriter();
- return new DocBuilder(this, writer, dihPropWriter, requestParams);
- }
-
- Map<String, Evaluator> getEvaluators() {
- return getEvaluators(config.getFunctions());
- }
-
- /**
- * used by tests.
- */
- @SuppressWarnings({"unchecked"})
- Map<String, Evaluator> getEvaluators(List<Map<String,String>> fn) {
- Map<String, Evaluator> evaluators = new HashMap<>();
- evaluators.put(Evaluator.DATE_FORMAT_EVALUATOR, new DateFormatEvaluator());
- evaluators.put(Evaluator.SQL_ESCAPE_EVALUATOR, new SqlEscapingEvaluator());
- evaluators.put(Evaluator.URL_ENCODE_EVALUATOR, new UrlEvaluator());
- evaluators.put(Evaluator.ESCAPE_SOLR_QUERY_CHARS, new SolrQueryEscapingEvaluator());
- SolrCore core = docBuilder == null ? null : docBuilder.dataImporter.getCore();
- for (Map<String, String> map : fn) {
- try {
- evaluators.put(map.get(NAME), (Evaluator) loadClass(map.get(CLASS), core).getConstructor().newInstance());
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e, "Unable to instantiate evaluator: " + map.get(CLASS));
- }
- }
- return evaluators;
- }
-
- static final ThreadLocal<AtomicLong> QUERY_COUNT = new ThreadLocal<AtomicLong>() {
- @Override
- protected AtomicLong initialValue() {
- return new AtomicLong();
- }
- };
-
-
-
- static final class MSG {
- public static final String NO_CONFIG_FOUND = "Configuration not found";
-
- public static final String NO_INIT = "DataImportHandler started. Not Initialized. No commands can be run";
-
- public static final String INVALID_CONFIG = "FATAL: Could not create importer. DataImporter config invalid";
-
- public static final String LOAD_EXP = "Exception while loading DataImporter";
-
- public static final String JMX_DESC = "Manage data import from databases to Solr";
-
- public static final String CMD_RUNNING = "A command is still running...";
-
- public static final String DEBUG_NOT_ENABLED = "Debug not enabled. Add a tag <str name=\"enableDebug\">true</str> in solrconfig.xml";
-
- public static final String CONFIG_RELOADED = "Configuration Re-loaded sucessfully";
-
- public static final String CONFIG_NOT_RELOADED = "Configuration NOT Re-loaded...Data Importer is busy.";
-
- public static final String TOTAL_DOC_PROCESSED = "Total Documents Processed";
-
- public static final String TOTAL_FAILED_DOCS = "Total Documents Failed";
-
- public static final String TOTAL_QUERIES_EXECUTED = "Total Requests made to DataSource";
-
- public static final String TOTAL_ROWS_EXECUTED = "Total Rows Fetched";
-
- public static final String TOTAL_DOCS_DELETED = "Total Documents Deleted";
-
- public static final String TOTAL_DOCS_SKIPPED = "Total Documents Skipped";
- }
-
- public SolrCore getCore() {
- return core;
- }
-
- void putToCoreScopeSession(String key, Object val) {
- coreScopeSession.put(key, val);
- }
- Object getFromCoreScopeSession(String key) {
- return coreScopeSession.get(key);
- }
-
- public static final String COLUMN = "column";
-
- public static final String TYPE = "type";
-
- public static final String DATA_SRC = "dataSource";
-
- public static final String MULTI_VALUED = "multiValued";
-
- public static final String NAME = "name";
-
- public static final String STATUS_MSGS = "status-messages";
-
- public static final String FULL_IMPORT_CMD = "full-import";
-
- public static final String IMPORT_CMD = "import";
-
- public static final String DELTA_IMPORT_CMD = "delta-import";
-
- public static final String ABORT_CMD = "abort";
-
- public static final String DEBUG_MODE = "debug";
-
- public static final String RELOAD_CONF_CMD = "reload-config";
-
- public static final String SHOW_CONF_CMD = "show-config";
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataSource.java
deleted file mode 100644
index aeded27..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataSource.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.io.Closeable;
-import java.util.Properties;
-
-/**
- * <p>
- * Provides data from a source with a given query.
- * </p>
- * <p>
- * Implementation of this abstract class must provide a default no-arg constructor
- * </p>
- * <p>
- * Refer to <a
- * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
- * for more details.
- * </p>
- * <p>
- * <b>This API is experimental and may change in the future.</b>
- *
- * @since solr 1.3
- */
-public abstract class DataSource<T> implements Closeable {
-
- /**
- * Initializes the DataSource with the <code>Context</code> and
- * initialization properties.
- * <p>
- * This is invoked by the <code>DataImporter</code> after creating an
- * instance of this class.
- */
- public abstract void init(Context context, Properties initProps);
-
- /**
- * Get records for the given query.The return type depends on the
- * implementation .
- *
- * @param query The query string. It can be a SQL for JdbcDataSource or a URL
- * for HttpDataSource or a file location for FileDataSource or a custom
- * format for your own custom DataSource.
- * @return Depends on the implementation. For instance JdbcDataSource returns
- * an Iterator<Map <String,Object>>
- */
- public abstract T getData(String query);
-
- /**
- * Cleans up resources of this DataSource after use.
- */
- public abstract void close();
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java
deleted file mode 100644
index f4df820..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatEvaluator.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.IllformedLocaleException;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
-import java.util.TimeZone;
-
-import org.apache.solr.common.util.SuppressForbidden;
-import org.apache.solr.handler.dataimport.config.EntityField;
-import org.apache.solr.util.DateMathParser;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-
-/**
- * <p>Formats values using a given date format. </p>
- * <p>Pass three parameters:
- * <ul>
- * <li>An {@link EntityField} or a date expression to be parsed with
- * the {@link DateMathParser} class If the value is in a String,
- * then it is assumed to be a datemath expression, otherwise it
- * resolved using a {@link VariableResolver} instance</li>
- * <li>A date format see {@link SimpleDateFormat} for the syntax.</li>
- * <li>The {@link Locale} to parse.
- * (optional. Defaults to the Root Locale) </li>
- * </ul>
- */
-public class DateFormatEvaluator extends Evaluator {
-
- public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
- protected Map<String, Locale> availableLocales = new HashMap<>();
- protected Set<String> availableTimezones = new HashSet<>();
-
- @SuppressForbidden(reason = "Usage of outdated locale parsing with Locale#toString() because of backwards compatibility")
- public DateFormatEvaluator() {
- for (Locale locale : Locale.getAvailableLocales()) {
- availableLocales.put(locale.toString(), locale);
- }
- for (String tz : TimeZone.getAvailableIDs()) {
- availableTimezones.add(tz);
- }
- }
-
- private SimpleDateFormat getDateFormat(String pattern, TimeZone timezone, Locale locale) {
- final SimpleDateFormat sdf = new SimpleDateFormat(pattern, locale);
- sdf.setTimeZone(timezone);
- return sdf;
- }
-
- @Override
- public String evaluate(String expression, Context context) {
- List<Object> l = parseParams(expression, context.getVariableResolver());
- if (l.size() < 2 || l.size() > 4) {
- throw new DataImportHandlerException(SEVERE, "'formatDate()' must have two, three or four parameters ");
- }
- Object o = l.get(0);
- Object format = l.get(1);
- if (format instanceof VariableWrapper) {
- VariableWrapper wrapper = (VariableWrapper) format;
- o = wrapper.resolve();
- format = o.toString();
- }
- Locale locale = Locale.ENGLISH; // we default to ENGLISH for dates for full Java 9 compatibility
- if(l.size()>2) {
- Object localeObj = l.get(2);
- String localeStr = null;
- if (localeObj instanceof VariableWrapper) {
- localeStr = ((VariableWrapper) localeObj).resolve().toString();
- } else {
- localeStr = localeObj.toString();
- }
- locale = availableLocales.get(localeStr);
- if (locale == null) try {
- locale = new Locale.Builder().setLanguageTag(localeStr).build();
- } catch (IllformedLocaleException ex) {
- throw new DataImportHandlerException(SEVERE, "Malformed / non-existent locale: " + localeStr, ex);
- }
- }
- TimeZone tz = TimeZone.getDefault(); // DWS TODO: is this the right default for us? Deserves explanation if so.
- if(l.size()==4) {
- Object tzObj = l.get(3);
- String tzStr = null;
- if (tzObj instanceof VariableWrapper) {
- tzStr = ((VariableWrapper) tzObj).resolve().toString();
- } else {
- tzStr = tzObj.toString();
- }
- if(availableTimezones.contains(tzStr)) {
- tz = TimeZone.getTimeZone(tzStr);
- } else {
- throw new DataImportHandlerException(SEVERE, "Unsupported Timezone: " + tzStr);
- }
- }
- String dateFmt = format.toString();
- SimpleDateFormat fmt = getDateFormat(dateFmt, tz, locale);
- Date date = null;
- if (o instanceof VariableWrapper) {
- date = evaluateWrapper((VariableWrapper) o, locale, tz);
- } else {
- date = evaluateString(o.toString(), locale, tz);
- }
- return fmt.format(date);
- }
-
- /**
- * NOTE: declared as a method to allow for extensibility
- *
- * @lucene.experimental this API is experimental and subject to change
- * @return the result of evaluating a string
- */
- protected Date evaluateWrapper(VariableWrapper variableWrapper, Locale locale, TimeZone tz) {
- Date date = null;
- Object variableval = resolveWrapper(variableWrapper,locale,tz);
- if (variableval instanceof Date) {
- date = (Date) variableval;
- } else {
- String s = variableval.toString();
- try {
- date = getDateFormat(DEFAULT_DATE_FORMAT, tz, locale).parse(s);
- } catch (ParseException exp) {
- wrapAndThrow(SEVERE, exp, "Invalid expression for date");
- }
- }
- return date;
- }
-
- /**
- * NOTE: declared as a method to allow for extensibility
- * @lucene.experimental
- * @return the result of evaluating a string
- */
- protected Date evaluateString(String datemathfmt, Locale locale, TimeZone tz) {
- // note: DMP does not use the locale but perhaps a subclass might use it, for e.g. parsing a date in a custom
- // string that doesn't necessarily have date math?
- //TODO refactor DateMathParser.parseMath a bit to have a static method for this logic.
- if (datemathfmt.startsWith("NOW")) {
- datemathfmt = datemathfmt.substring("NOW".length());
- }
- try {
- DateMathParser parser = new DateMathParser(tz);
- parser.setNow(new Date());// thus do *not* use SolrRequestInfo
- return parser.parseMath(datemathfmt);
- } catch (ParseException e) {
- throw wrapAndThrow(SEVERE, e, "Invalid expression for date");
- }
- }
-
- /**
- * NOTE: declared as a method to allow for extensibility
- * @lucene.experimental
- * @return the result of resolving the variable wrapper
- */
- protected Object resolveWrapper(VariableWrapper variableWrapper, Locale locale, TimeZone tz) {
- return variableWrapper.resolve();
- }
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java
deleted file mode 100644
index 61edbe6..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.lang.invoke.MethodHandles;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.*;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * <p>
- * {@link Transformer} instance which creates {@link Date} instances out of {@link String}s.
- * </p>
- * <p>
- * Refer to <a
- * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
- * for more details.
- * <p>
- * <b>This API is experimental and subject to change</b>
- *
- * @since solr 1.3
- */
-public class DateFormatTransformer extends Transformer {
- private Map<String, SimpleDateFormat> fmtCache = new HashMap<>();
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- @Override
- @SuppressWarnings("unchecked")
- public Object transformRow(Map<String, Object> aRow, Context context) {
-
- for (Map<String, String> map : context.getAllEntityFields()) {
- Locale locale = Locale.ENGLISH; // we default to ENGLISH for dates for full Java 9 compatibility
- String customLocale = map.get(LOCALE);
- if (customLocale != null) {
- try {
- locale = new Locale.Builder().setLanguageTag(customLocale).build();
- } catch (IllformedLocaleException e) {
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Invalid Locale specified: " + customLocale, e);
- }
- }
-
- String fmt = map.get(DATE_TIME_FMT);
- if (fmt == null)
- continue;
- VariableResolver resolver = context.getVariableResolver();
- fmt = resolver.replaceTokens(fmt);
- String column = map.get(DataImporter.COLUMN);
- String srcCol = map.get(RegexTransformer.SRC_COL_NAME);
- if (srcCol == null)
- srcCol = column;
- try {
- Object o = aRow.get(srcCol);
- if (o instanceof List) {
- @SuppressWarnings({"rawtypes"})
- List inputs = (List) o;
- List<Date> results = new ArrayList<>();
- for (Object input : inputs) {
- results.add(process(input, fmt, locale));
- }
- aRow.put(column, results);
- } else {
- if (o != null) {
- aRow.put(column, process(o, fmt, locale));
- }
- }
- } catch (ParseException e) {
- log.warn("Could not parse a Date field ", e);
- }
- }
- return aRow;
- }
-
- private Date process(Object value, String format, Locale locale) throws ParseException {
- if (value == null) return null;
- String strVal = value.toString().trim();
- if (strVal.length() == 0)
- return null;
- SimpleDateFormat fmt = fmtCache.get(format);
- if (fmt == null) {
- fmt = new SimpleDateFormat(format, locale);
- fmtCache.put(format, fmt);
- }
- return fmt.parse(strVal);
- }
-
- public static final String DATE_TIME_FMT = "dateTimeFormat";
-
- public static final String LOCALE = "locale";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugInfo.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugInfo.java
deleted file mode 100644
index 623832f..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugInfo.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.util.AbstractList;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.StrUtils;
-
-public class DebugInfo {
-
- private static final class ChildRollupDocs extends AbstractList<SolrInputDocument> {
-
- private List<SolrInputDocument> delegate = new ArrayList<>();
-
- @Override
- public SolrInputDocument get(int index) {
- return delegate.get(index);
- }
-
- @Override
- public int size() {
- return delegate.size();
- }
-
- public boolean add(SolrInputDocument e) {
- SolrInputDocument transformed = e.deepCopy();
- if (transformed.hasChildDocuments()) {
- ChildRollupDocs childList = new ChildRollupDocs();
- childList.addAll(transformed.getChildDocuments());
- transformed.addField("_childDocuments_", childList);
- transformed.getChildDocuments().clear();
- }
- return delegate.add(transformed);
- }
- }
-
- public List<SolrInputDocument> debugDocuments = new ChildRollupDocs();
-
- public NamedList<String> debugVerboseOutput = null;
- public boolean verbose;
-
- public DebugInfo(Map<String,Object> requestParams) {
- verbose = StrUtils.parseBool((String) requestParams.get("verbose"), false);
- debugVerboseOutput = new NamedList<>();
- }
-}
-
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java
deleted file mode 100644
index 9de42fc..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-import org.apache.solr.common.util.NamedList;
-
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.text.MessageFormat;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Stack;
-
-/**
- * <p>
- * Implements most of the interactive development functionality
- * </p>
- * <p/>
- * <p>
- * Refer to <a
- * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
- * for more details.
- * </p>
- * <p/>
- * <b>This API is experimental and subject to change</b>
- *
- * @since solr 1.3
- */
-class DebugLogger {
- private Stack<DebugInfo> debugStack;
-
- @SuppressWarnings({"rawtypes"})
- NamedList output;
-// private final SolrWriter writer1;
-
- private static final String LINE = "---------------------------------------------";
-
- private MessageFormat fmt = new MessageFormat(
- "----------- row #{0}-------------", Locale.ROOT);
-
- boolean enabled = true;
-
- @SuppressWarnings({"rawtypes"})
- public DebugLogger() {
-// writer = solrWriter;
- output = new NamedList();
- debugStack = new Stack<DebugInfo>() {
-
- @Override
- public DebugInfo pop() {
- if (size() == 1)
- throw new DataImportHandlerException(
- DataImportHandlerException.SEVERE, "Stack is becoming empty");
- return super.pop();
- }
- };
- debugStack.push(new DebugInfo(null, DIHLogLevels.NONE, null));
- output = debugStack.peek().lst;
- }
-
- private DebugInfo peekStack() {
- return debugStack.isEmpty() ? null : debugStack.peek();
- }
-
- @SuppressWarnings({"unchecked"})
- public void log(DIHLogLevels event, String name, Object row) {
- if (event == DIHLogLevels.DISABLE_LOGGING) {
- enabled = false;
- return;
- } else if (event == DIHLogLevels.ENABLE_LOGGING) {
- enabled = true;
- return;
- }
-
- if (!enabled && event != DIHLogLevels.START_ENTITY
- && event != DIHLogLevels.END_ENTITY) {
- return;
- }
-
- if (event == DIHLogLevels.START_DOC) {
- debugStack.push(new DebugInfo(null, DIHLogLevels.START_DOC, peekStack()));
- } else if (DIHLogLevels.START_ENTITY == event) {
- debugStack
- .push(new DebugInfo(name, DIHLogLevels.START_ENTITY, peekStack()));
- } else if (DIHLogLevels.ENTITY_OUT == event
- || DIHLogLevels.PRE_TRANSFORMER_ROW == event) {
- if (debugStack.peek().type == DIHLogLevels.START_ENTITY
- || debugStack.peek().type == DIHLogLevels.START_DOC) {
- debugStack.peek().lst.add(null, fmt.format(new Object[]{++debugStack
- .peek().rowCount}));
- addToNamedList(debugStack.peek().lst, row);
- debugStack.peek().lst.add(null, LINE);
- }
- } else if (event == DIHLogLevels.ROW_END) {
- popAllTransformers();
- } else if (DIHLogLevels.END_ENTITY == event) {
- while (debugStack.pop().type != DIHLogLevels.START_ENTITY)
- ;
- } else if (DIHLogLevels.END_DOC == event) {
- while (debugStack.pop().type != DIHLogLevels.START_DOC)
- ;
- } else if (event == DIHLogLevels.TRANSFORMER_EXCEPTION) {
- debugStack.push(new DebugInfo(name, event, peekStack()));
- debugStack.peek().lst.add("EXCEPTION",
- getStacktraceString((Exception) row));
- } else if (DIHLogLevels.TRANSFORMED_ROW == event) {
- debugStack.push(new DebugInfo(name, event, peekStack()));
- debugStack.peek().lst.add(null, LINE);
- addToNamedList(debugStack.peek().lst, row);
- debugStack.peek().lst.add(null, LINE);
- if (row instanceof DataImportHandlerException) {
- DataImportHandlerException dataImportHandlerException = (DataImportHandlerException) row;
- dataImportHandlerException.debugged = true;
- }
- } else if (DIHLogLevels.ENTITY_META == event) {
- popAllTransformers();
- debugStack.peek().lst.add(name, row);
- } else if (DIHLogLevels.ENTITY_EXCEPTION == event) {
- if (row instanceof DataImportHandlerException) {
- DataImportHandlerException dihe = (DataImportHandlerException) row;
- if (dihe.debugged)
- return;
- dihe.debugged = true;
- }
-
- popAllTransformers();
- debugStack.peek().lst.add("EXCEPTION",
- getStacktraceString((Exception) row));
- }
- }
-
- private void popAllTransformers() {
- while (true) {
- DIHLogLevels type = debugStack.peek().type;
- if (type == DIHLogLevels.START_DOC || type == DIHLogLevels.START_ENTITY)
- break;
- debugStack.pop();
- }
- }
-
- @SuppressWarnings({"unchecked"})
- private void addToNamedList(@SuppressWarnings({"rawtypes"})NamedList nl, Object row) {
- if (row instanceof List) {
- @SuppressWarnings({"rawtypes"})
- List list = (List) row;
- @SuppressWarnings({"rawtypes"})
- NamedList l = new NamedList();
- nl.add(null, l);
- for (Object o : list) {
- Map<String, Object> map = (Map<String, Object>) o;
- for (Map.Entry<String, Object> entry : map.entrySet())
- nl.add(entry.getKey(), entry.getValue());
- }
- } else if (row instanceof Map) {
- Map<String, Object> map = (Map<String, Object>) row;
- for (Map.Entry<String, Object> entry : map.entrySet())
- nl.add(entry.getKey(), entry.getValue());
- }
- }
-
- @SuppressWarnings({"rawtypes"})
- DataSource wrapDs(final DataSource ds) {
- return new DataSource() {
- @Override
- public void init(Context context, Properties initProps) {
- ds.init(context, initProps);
- }
-
- @Override
- public void close() {
- ds.close();
- }
-
- @Override
- public Object getData(String query) {
- log(DIHLogLevels.ENTITY_META, "query", query);
- long start = System.nanoTime();
- try {
- return ds.getData(query);
- } catch (DataImportHandlerException de) {
- log(DIHLogLevels.ENTITY_EXCEPTION,
- null, de);
- throw de;
- } catch (Exception e) {
- log(DIHLogLevels.ENTITY_EXCEPTION,
- null, e);
- DataImportHandlerException de = new DataImportHandlerException(
- DataImportHandlerException.SEVERE, "", e);
- de.debugged = true;
- throw de;
- } finally {
- log(DIHLogLevels.ENTITY_META, "time-taken", DocBuilder
- .getTimeElapsedSince(start));
- }
- }
- };
- }
-
- Transformer wrapTransformer(final Transformer t) {
- return new Transformer() {
- @Override
- public Object transformRow(Map<String, Object> row, Context context) {
- log(DIHLogLevels.PRE_TRANSFORMER_ROW, null, row);
- String tName = getTransformerName(t);
- Object result = null;
- try {
- result = t.transformRow(row, context);
- log(DIHLogLevels.TRANSFORMED_ROW, tName, result);
- } catch (DataImportHandlerException de) {
- log(DIHLogLevels.TRANSFORMER_EXCEPTION, tName, de);
- de.debugged = true;
- throw de;
- } catch (Exception e) {
- log(DIHLogLevels.TRANSFORMER_EXCEPTION, tName, e);
- DataImportHandlerException de = new DataImportHandlerException(DataImportHandlerException.SEVERE, "", e);
- de.debugged = true;
- throw de;
- }
- return result;
- }
- };
- }
-
- public static String getStacktraceString(Exception e) {
- StringWriter sw = new StringWriter();
- e.printStackTrace(new PrintWriter(sw));
- return sw.toString();
- }
-
- static String getTransformerName(Transformer t) {
- @SuppressWarnings({"rawtypes"})
- Class transClass = t.getClass();
- if (t instanceof EntityProcessorWrapper.ReflectionTransformer) {
- return ((EntityProcessorWrapper.ReflectionTransformer) t).trans;
- }
- if (t instanceof ScriptTransformer) {
- ScriptTransformer scriptTransformer = (ScriptTransformer) t;
- return "script:" + scriptTransformer.getFunctionName();
- }
- if (transClass.getPackage().equals(DebugLogger.class.getPackage())) {
- return transClass.getSimpleName();
- } else {
- return transClass.getName();
- }
- }
-
- private static class DebugInfo {
- String name;
-
- int tCount, rowCount;
-
- @SuppressWarnings({"rawtypes"})
- NamedList lst;
-
- DIHLogLevels type;
-
- DebugInfo parent;
-
- @SuppressWarnings({"unchecked", "rawtypes"})
- public DebugInfo(String name, DIHLogLevels type, DebugInfo parent) {
- this.name = name;
- this.type = type;
- this.parent = parent;
- lst = new NamedList();
- if (parent != null) {
- String displayName = null;
- if (type == DIHLogLevels.START_ENTITY) {
- displayName = "entity:" + name;
- } else if (type == DIHLogLevels.TRANSFORMED_ROW
- || type == DIHLogLevels.TRANSFORMER_EXCEPTION) {
- displayName = "transformer:" + name;
- } else if (type == DIHLogLevels.START_DOC) {
- this.name = displayName = "document#" + SolrWriter.getDocCount();
- }
- parent.lst.add(displayName, lst);
- }
- }
- }
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java
deleted file mode 100644
index 8115695..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java
+++ /dev/null
@@ -1,1020 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.util.IOUtils;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
-import org.apache.solr.handler.dataimport.config.DIHConfiguration;
-import org.apache.solr.handler.dataimport.config.Entity;
-import org.apache.solr.handler.dataimport.config.EntityField;
-
-import static org.apache.solr.handler.dataimport.SolrWriter.LAST_INDEX_KEY;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.lang.invoke.MethodHandles;
-import java.text.SimpleDateFormat;
-import java.util.*;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicLong;
-
-/**
- * <p> {@link DocBuilder} is responsible for creating Solr documents out of the given configuration. It also maintains
- * statistics information. It depends on the {@link EntityProcessor} implementations to fetch data. </p>
- * <p>
- * <b>This API is experimental and subject to change</b>
- *
- * @since solr 1.3
- */
-public class DocBuilder {
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- private static final AtomicBoolean WARNED_ABOUT_INDEX_TIME_BOOSTS = new AtomicBoolean();
-
- private static final Date EPOCH = new Date(0);
- public static final String DELETE_DOC_BY_ID = "$deleteDocById";
- public static final String DELETE_DOC_BY_QUERY = "$deleteDocByQuery";
- public static final String DOC_BOOST = "$docBoost";
- public static final String SKIP_DOC = "$skipDoc";
- public static final String SKIP_ROW = "$skipRow";
-
- DataImporter dataImporter;
-
- private DIHConfiguration config;
-
- private EntityProcessorWrapper currentEntityProcessorWrapper;
-
- @SuppressWarnings({"unchecked", "rawtypes"})
- private Map statusMessages = Collections.synchronizedMap(new LinkedHashMap());
-
- public Statistics importStatistics = new Statistics();
-
- DIHWriter writer;
-
- boolean verboseDebug = false;
-
- Map<String, Object> session = new HashMap<>();
-
- static final ThreadLocal<DocBuilder> INSTANCE = new ThreadLocal<>();
- private Map<String, Object> persistedProperties;
-
- private DIHProperties propWriter;
- private DebugLogger debugLogger;
- private final RequestInfo reqParams;
-
- public DocBuilder(DataImporter dataImporter, DIHWriter solrWriter, DIHProperties propWriter, RequestInfo reqParams) {
- INSTANCE.set(this);
- this.dataImporter = dataImporter;
- this.reqParams = reqParams;
- this.propWriter = propWriter;
- DataImporter.QUERY_COUNT.set(importStatistics.queryCount);
- verboseDebug = reqParams.isDebug() && reqParams.getDebugInfo().verbose;
- persistedProperties = propWriter.readIndexerProperties();
-
- writer = solrWriter;
- ContextImpl ctx = new ContextImpl(null, null, null, null, reqParams.getRawParams(), null, this);
- if (writer != null) {
- writer.init(ctx);
- }
- }
-
-
- DebugLogger getDebugLogger(){
- if (debugLogger == null) {
- debugLogger = new DebugLogger();
- }
- return debugLogger;
- }
-
- private VariableResolver getVariableResolver() {
- try {
- VariableResolver resolver = null;
- String epoch = propWriter.convertDateToString(EPOCH);
- if(dataImporter != null && dataImporter.getCore() != null
- && dataImporter.getCore().getCoreDescriptor().getSubstitutableProperties() != null){
- resolver = new VariableResolver(dataImporter.getCore().getCoreDescriptor().getSubstitutableProperties());
- } else {
- resolver = new VariableResolver();
- }
- resolver.setEvaluators(dataImporter.getEvaluators());
- Map<String, Object> indexerNamespace = new HashMap<>();
- if (persistedProperties.get(LAST_INDEX_TIME) != null) {
- indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.get(LAST_INDEX_TIME));
- } else {
- // set epoch
- indexerNamespace.put(LAST_INDEX_TIME, epoch);
- }
- indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime());
- indexerNamespace.put("request", new HashMap<>(reqParams.getRawParams()));
- indexerNamespace.put("handlerName", dataImporter.getHandlerName());
- for (Entity entity : dataImporter.getConfig().getEntities()) {
- Map<String, Object> entityNamespace = new HashMap<>();
- String key = SolrWriter.LAST_INDEX_KEY;
- Object lastIndex = persistedProperties.get(entity.getName() + "." + key);
- if (lastIndex != null) {
- entityNamespace.put(SolrWriter.LAST_INDEX_KEY, lastIndex);
- } else {
- entityNamespace.put(SolrWriter.LAST_INDEX_KEY, epoch);
- }
- indexerNamespace.put(entity.getName(), entityNamespace);
- }
- resolver.addNamespace(ConfigNameConstants.IMPORTER_NS_SHORT, indexerNamespace);
- resolver.addNamespace(ConfigNameConstants.IMPORTER_NS, indexerNamespace);
- return resolver;
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e);
- // unreachable statement
- return null;
- }
- }
-
- private void invokeEventListener(String className) {
- invokeEventListener(className, null);
- }
-
-
- private void invokeEventListener(String className, Exception lastException) {
- try {
- @SuppressWarnings({"unchecked"})
- EventListener listener = (EventListener) loadClass(className, dataImporter.getCore()).getConstructor().newInstance();
- notifyListener(listener, lastException);
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e, "Unable to load class : " + className);
- }
- }
-
- private void notifyListener(EventListener listener, Exception lastException) {
- String currentProcess;
- if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP) {
- currentProcess = Context.DELTA_DUMP;
- } else {
- currentProcess = Context.FULL_DUMP;
- }
- ContextImpl ctx = new ContextImpl(null, getVariableResolver(), null, currentProcess, session, null, this);
- ctx.setLastException(lastException);
- listener.onEvent(ctx);
- }
-
- @SuppressWarnings("unchecked")
- public void execute() {
- List<EntityProcessorWrapper> epwList = null;
- try {
- dataImporter.store(DataImporter.STATUS_MSGS, statusMessages);
- config = dataImporter.getConfig();
- final AtomicLong startTime = new AtomicLong(System.nanoTime());
- statusMessages.put(TIME_ELAPSED, new Object() {
- @Override
- public String toString() {
- return getTimeElapsedSince(startTime.get());
- }
- });
-
- statusMessages.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED,
- importStatistics.queryCount);
- statusMessages.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED,
- importStatistics.rowsCount);
- statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED,
- importStatistics.docCount);
- statusMessages.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED,
- importStatistics.skipDocCount);
-
- List<String> entities = reqParams.getEntitiesToRun();
-
- // Trigger onImportStart
- if (config.getOnImportStart() != null) {
- invokeEventListener(config.getOnImportStart());
- }
- AtomicBoolean fullCleanDone = new AtomicBoolean(false);
- //we must not do a delete of *:* multiple times if there are multiple root entities to be run
- Map<String,Object> lastIndexTimeProps = new HashMap<>();
- lastIndexTimeProps.put(LAST_INDEX_KEY, dataImporter.getIndexStartTime());
-
- epwList = new ArrayList<>(config.getEntities().size());
- for (Entity e : config.getEntities()) {
- epwList.add(getEntityProcessorWrapper(e));
- }
- for (EntityProcessorWrapper epw : epwList) {
- if (entities != null && !entities.contains(epw.getEntity().getName()))
- continue;
- lastIndexTimeProps.put(epw.getEntity().getName() + "." + LAST_INDEX_KEY, propWriter.getCurrentTimestamp());
- currentEntityProcessorWrapper = epw;
- String delQuery = epw.getEntity().getAllAttributes().get("preImportDeleteQuery");
- if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP) {
- cleanByQuery(delQuery, fullCleanDone);
- doDelta();
- delQuery = epw.getEntity().getAllAttributes().get("postImportDeleteQuery");
- if (delQuery != null) {
- fullCleanDone.set(false);
- cleanByQuery(delQuery, fullCleanDone);
- }
- } else {
- cleanByQuery(delQuery, fullCleanDone);
- doFullDump();
- delQuery = epw.getEntity().getAllAttributes().get("postImportDeleteQuery");
- if (delQuery != null) {
- fullCleanDone.set(false);
- cleanByQuery(delQuery, fullCleanDone);
- }
- }
- }
-
- if (stop.get()) {
- // Dont commit if aborted using command=abort
- statusMessages.put("Aborted", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).format(new Date()));
- handleError("Aborted", null);
- } else {
- // Do not commit unnecessarily if this is a delta-import and no documents were created or deleted
- if (!reqParams.isClean()) {
- if (importStatistics.docCount.get() > 0 || importStatistics.deletedDocCount.get() > 0) {
- finish(lastIndexTimeProps);
- }
- } else {
- // Finished operation normally, commit now
- finish(lastIndexTimeProps);
- }
-
- if (config.getOnImportEnd() != null) {
- invokeEventListener(config.getOnImportEnd());
- }
- }
-
- statusMessages.remove(TIME_ELAPSED);
- statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, ""+ importStatistics.docCount.get());
- if(importStatistics.failedDocCount.get() > 0)
- statusMessages.put(DataImporter.MSG.TOTAL_FAILED_DOCS, ""+ importStatistics.failedDocCount.get());
-
- statusMessages.put("Time taken", getTimeElapsedSince(startTime.get()));
- if (log.isInfoEnabled()) {
- log.info("Time taken = {}", getTimeElapsedSince(startTime.get()));
- }
- } catch(Exception e)
- {
- throw new RuntimeException(e);
- } finally {
- // Cannot use IOUtils.closeQuietly since DIH relies on exceptions bubbling out of writer.close() to indicate
- // success/failure of the run.
- RuntimeException raisedDuringClose = null;
- try {
- if (writer != null) {
- writer.close();
- }
- } catch (RuntimeException e) {
- if (log.isWarnEnabled()) {
- log.warn("Exception encountered while closing DIHWriter " + writer + "; temporarily suppressing to ensure other DocBuilder elements are closed", e); // logOk
- }
- raisedDuringClose = e;
- }
-
- if (epwList != null) {
- closeEntityProcessorWrappers(epwList);
- }
- if(reqParams.isDebug()) {
- reqParams.getDebugInfo().debugVerboseOutput = getDebugLogger().output;
- }
-
- if (raisedDuringClose != null) {
- throw raisedDuringClose;
- }
- }
- }
- private void closeEntityProcessorWrappers(List<EntityProcessorWrapper> epwList) {
- for(EntityProcessorWrapper epw : epwList) {
- IOUtils.closeQuietly(epw);
-
- if(epw.getDatasource() != null) {
- IOUtils.closeQuietly(epw.getDatasource());
- }
- closeEntityProcessorWrappers(epw.getChildren());
- }
- }
-
- @SuppressWarnings("unchecked")
- private void finish(Map<String,Object> lastIndexTimeProps) {
- log.info("Import completed successfully");
- statusMessages.put("", "Indexing completed. Added/Updated: "
- + importStatistics.docCount + " documents. Deleted "
- + importStatistics.deletedDocCount + " documents.");
- if(reqParams.isCommit()) {
- writer.commit(reqParams.isOptimize());
- addStatusMessage("Committed");
- if (reqParams.isOptimize())
- addStatusMessage("Optimized");
- }
- try {
- propWriter.persist(lastIndexTimeProps);
- } catch (Exception e) {
- log.error("Could not write property file", e);
- statusMessages.put("error", "Could not write property file. Delta imports will not work. " +
- "Make sure your conf directory is writable");
- }
- }
-
- @SuppressWarnings({"unchecked"})
- void handleError(String message, Exception e) {
- if (!dataImporter.getCore().getCoreContainer().isZooKeeperAware()) {
- writer.rollback();
- }
-
- statusMessages.put(message, "Indexing error");
- addStatusMessage(message);
- if ((config != null) && (config.getOnError() != null)) {
- invokeEventListener(config.getOnError(), e);
- }
- }
-
- private void doFullDump() {
- addStatusMessage("Full Dump Started");
- buildDocument(getVariableResolver(), null, null, currentEntityProcessorWrapper, true, null);
- }
-
- @SuppressWarnings("unchecked")
- private void doDelta() {
- addStatusMessage("Delta Dump started");
- VariableResolver resolver = getVariableResolver();
-
- if (config.getDeleteQuery() != null) {
- writer.deleteByQuery(config.getDeleteQuery());
- }
-
- addStatusMessage("Identifying Delta");
- log.info("Starting delta collection.");
- Set<Map<String, Object>> deletedKeys = new HashSet<>();
- Set<Map<String, Object>> allPks = collectDelta(currentEntityProcessorWrapper, resolver, deletedKeys);
- if (stop.get())
- return;
- addStatusMessage("Deltas Obtained");
- addStatusMessage("Building documents");
- if (!deletedKeys.isEmpty()) {
- allPks.removeAll(deletedKeys);
- deleteAll(deletedKeys);
- // Make sure that documents are not re-created
- }
- deletedKeys = null;
- writer.setDeltaKeys(allPks);
-
- statusMessages.put("Total Changed Documents", allPks.size());
- VariableResolver vri = getVariableResolver();
- Iterator<Map<String, Object>> pkIter = allPks.iterator();
- while (pkIter.hasNext()) {
- Map<String, Object> map = pkIter.next();
- vri.addNamespace(ConfigNameConstants.IMPORTER_NS_SHORT + ".delta", map);
- buildDocument(vri, null, map, currentEntityProcessorWrapper, true, null);
- pkIter.remove();
- // check for abort
- if (stop.get())
- break;
- }
-
- if (!stop.get()) {
- log.info("Delta Import completed successfully");
- }
- }
-
- private void deleteAll(Set<Map<String, Object>> deletedKeys) {
- log.info("Deleting stale documents ");
- Iterator<Map<String, Object>> iter = deletedKeys.iterator();
- while (iter.hasNext()) {
- Map<String, Object> map = iter.next();
- String keyName = currentEntityProcessorWrapper.getEntity().isDocRoot() ? currentEntityProcessorWrapper.getEntity().getPk() : currentEntityProcessorWrapper.getEntity().getSchemaPk();
- Object key = map.get(keyName);
- if(key == null) {
- keyName = findMatchingPkColumn(keyName, map);
- key = map.get(keyName);
- }
- if(key == null) {
- log.warn("no key was available for deleted pk query. keyName = {}", keyName);
- continue;
- }
- writer.deleteDoc(key);
- importStatistics.deletedDocCount.incrementAndGet();
- iter.remove();
- }
- }
-
- @SuppressWarnings("unchecked")
- public void addStatusMessage(String msg) {
- statusMessages.put(msg, new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).format(new Date()));
- }
-
- private void resetEntity(EntityProcessorWrapper epw) {
- epw.setInitialized(false);
- for (EntityProcessorWrapper child : epw.getChildren()) {
- resetEntity(child);
- }
-
- }
-
- private void buildDocument(VariableResolver vr, DocWrapper doc,
- Map<String,Object> pk, EntityProcessorWrapper epw, boolean isRoot,
- ContextImpl parentCtx) {
- List<EntityProcessorWrapper> entitiesToDestroy = new ArrayList<>();
- try {
- buildDocument(vr, doc, pk, epw, isRoot, parentCtx, entitiesToDestroy);
- } catch (Exception e) {
- throw new RuntimeException(e);
- } finally {
- for (EntityProcessorWrapper entityWrapper : entitiesToDestroy) {
- entityWrapper.destroy();
- }
- resetEntity(epw);
- }
- }
-
- @SuppressWarnings("unchecked")
- private void buildDocument(VariableResolver vr, DocWrapper doc,
- Map<String, Object> pk, EntityProcessorWrapper epw, boolean isRoot,
- ContextImpl parentCtx, List<EntityProcessorWrapper> entitiesToDestroy) {
-
- ContextImpl ctx = new ContextImpl(epw, vr, null,
- pk == null ? Context.FULL_DUMP : Context.DELTA_DUMP,
- session, parentCtx, this);
- epw.init(ctx);
- if (!epw.isInitialized()) {
- entitiesToDestroy.add(epw);
- epw.setInitialized(true);
- }
-
- if (reqParams.getStart() > 0) {
- getDebugLogger().log(DIHLogLevels.DISABLE_LOGGING, null, null);
- }
-
- if (verboseDebug) {
- getDebugLogger().log(DIHLogLevels.START_ENTITY, epw.getEntity().getName(), null);
- }
-
- int seenDocCount = 0;
-
- try {
- while (true) {
- if (stop.get())
- return;
- if(importStatistics.docCount.get() > (reqParams.getStart() + reqParams.getRows())) break;
- try {
- seenDocCount++;
-
- if (seenDocCount > reqParams.getStart()) {
- getDebugLogger().log(DIHLogLevels.ENABLE_LOGGING, null, null);
- }
-
- if (verboseDebug && epw.getEntity().isDocRoot()) {
- getDebugLogger().log(DIHLogLevels.START_DOC, epw.getEntity().getName(), null);
- }
- if (doc == null && epw.getEntity().isDocRoot()) {
- doc = new DocWrapper();
- ctx.setDoc(doc);
- Entity e = epw.getEntity();
- while (e.getParentEntity() != null) {
- addFields(e.getParentEntity(), doc, (Map<String, Object>) vr
- .resolve(e.getParentEntity().getName()), vr);
- e = e.getParentEntity();
- }
- }
-
- Map<String, Object> arow = epw.nextRow();
- if (arow == null) {
- break;
- }
-
- // Support for start parameter in debug mode
- if (epw.getEntity().isDocRoot()) {
- if (seenDocCount <= reqParams.getStart())
- continue;
- if (seenDocCount > reqParams.getStart() + reqParams.getRows()) {
- log.info("Indexing stopped at docCount = {}", importStatistics.docCount);
- break;
- }
- }
-
- if (verboseDebug) {
- getDebugLogger().log(DIHLogLevels.ENTITY_OUT, epw.getEntity().getName(), arow);
- }
- importStatistics.rowsCount.incrementAndGet();
-
- DocWrapper childDoc = null;
- if (doc != null) {
- if (epw.getEntity().isChild()) {
- childDoc = new DocWrapper();
- handleSpecialCommands(arow, childDoc);
- addFields(epw.getEntity(), childDoc, arow, vr);
- doc.addChildDocument(childDoc);
- } else {
- handleSpecialCommands(arow, doc);
- vr.addNamespace(epw.getEntity().getName(), arow);
- addFields(epw.getEntity(), doc, arow, vr);
- vr.removeNamespace(epw.getEntity().getName());
- }
- }
- if (epw.getEntity().getChildren() != null) {
- vr.addNamespace(epw.getEntity().getName(), arow);
- for (EntityProcessorWrapper child : epw.getChildren()) {
- if (childDoc != null) {
- buildDocument(vr, childDoc,
- child.getEntity().isDocRoot() ? pk : null, child, false, ctx, entitiesToDestroy);
- } else {
- buildDocument(vr, doc,
- child.getEntity().isDocRoot() ? pk : null, child, false, ctx, entitiesToDestroy);
- }
- }
- vr.removeNamespace(epw.getEntity().getName());
- }
- if (epw.getEntity().isDocRoot()) {
- if (stop.get())
- return;
- if (!doc.isEmpty()) {
- boolean result = writer.upload(doc);
- if(reqParams.isDebug()) {
- reqParams.getDebugInfo().debugDocuments.add(doc);
- }
- doc = null;
- if (result){
- importStatistics.docCount.incrementAndGet();
- } else {
- importStatistics.failedDocCount.incrementAndGet();
- }
- }
- }
- } catch (DataImportHandlerException e) {
- if (verboseDebug) {
- getDebugLogger().log(DIHLogLevels.ENTITY_EXCEPTION, epw.getEntity().getName(), e);
- }
- if(e.getErrCode() == DataImportHandlerException.SKIP_ROW){
- continue;
- }
- if (isRoot) {
- if (e.getErrCode() == DataImportHandlerException.SKIP) {
- importStatistics.skipDocCount.getAndIncrement();
- doc = null;
- } else {
- SolrException.log(log, "Exception while processing: "
- + epw.getEntity().getName() + " document : " + doc, e);
- }
- if (e.getErrCode() == DataImportHandlerException.SEVERE)
- throw e;
- } else
- throw e;
- } catch (Exception t) {
- if (verboseDebug) {
- getDebugLogger().log(DIHLogLevels.ENTITY_EXCEPTION, epw.getEntity().getName(), t);
- }
- throw new DataImportHandlerException(DataImportHandlerException.SEVERE, t);
- } finally {
- if (verboseDebug) {
- getDebugLogger().log(DIHLogLevels.ROW_END, epw.getEntity().getName(), null);
- if (epw.getEntity().isDocRoot())
- getDebugLogger().log(DIHLogLevels.END_DOC, null, null);
- }
- }
- }
- } finally {
- if (verboseDebug) {
- getDebugLogger().log(DIHLogLevels.END_ENTITY, null, null);
- }
- }
- }
-
- static class DocWrapper extends SolrInputDocument {
- //final SolrInputDocument solrDocument = new SolrInputDocument();
- Map<String ,Object> session;
-
- public void setSessionAttribute(String key, Object val){
- if(session == null) session = new HashMap<>();
- session.put(key, val);
- }
-
- public Object getSessionAttribute(String key) {
- return session == null ? null : session.get(key);
- }
- }
-
- private void handleSpecialCommands(Map<String, Object> arow, DocWrapper doc) {
- Object value = arow.get(DELETE_DOC_BY_ID);
- if (value != null) {
- if (value instanceof Collection) {
- @SuppressWarnings({"rawtypes"})
- Collection collection = (Collection) value;
- for (Object o : collection) {
- writer.deleteDoc(o.toString());
- importStatistics.deletedDocCount.incrementAndGet();
- }
- } else {
- writer.deleteDoc(value);
- importStatistics.deletedDocCount.incrementAndGet();
- }
- }
- value = arow.get(DELETE_DOC_BY_QUERY);
- if (value != null) {
- if (value instanceof Collection) {
- @SuppressWarnings({"rawtypes"})
- Collection collection = (Collection) value;
- for (Object o : collection) {
- writer.deleteByQuery(o.toString());
- importStatistics.deletedDocCount.incrementAndGet();
- }
- } else {
- writer.deleteByQuery(value.toString());
- importStatistics.deletedDocCount.incrementAndGet();
- }
- }
- value = arow.get(DOC_BOOST);
- if (value != null) {
- String message = "Ignoring document boost: " + value + " as index-time boosts are not supported anymore";
- if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
- log.warn(message);
- } else {
- log.debug(message);
- }
- }
-
- value = arow.get(SKIP_DOC);
- if (value != null) {
- if (Boolean.parseBoolean(value.toString())) {
- throw new DataImportHandlerException(DataImportHandlerException.SKIP,
- "Document skipped :" + arow);
- }
- }
-
- value = arow.get(SKIP_ROW);
- if (value != null) {
- if (Boolean.parseBoolean(value.toString())) {
- throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW);
- }
- }
- }
-
- @SuppressWarnings("unchecked")
- private void addFields(Entity entity, DocWrapper doc,
- Map<String, Object> arow, VariableResolver vr) {
- for (Map.Entry<String, Object> entry : arow.entrySet()) {
- String key = entry.getKey();
- Object value = entry.getValue();
- if (value == null) continue;
- if (key.startsWith("$")) continue;
- Set<EntityField> field = entity.getColNameVsField().get(key);
- IndexSchema schema = null == reqParams.getRequest() ? null : reqParams.getRequest().getSchema();
- if (field == null && schema != null) {
- // This can be a dynamic field or a field which does not have an entry in data-config ( an implicit field)
- SchemaField sf = schema.getFieldOrNull(key);
- if (sf == null) {
- sf = config.getSchemaField(key);
- }
- if (sf != null) {
- addFieldToDoc(entry.getValue(), sf.getName(), sf.multiValued(), doc);
- }
- //else do nothing. if we add it it may fail
- } else {
- if (field != null) {
- for (EntityField f : field) {
- String name = f.getName();
- boolean multiValued = f.isMultiValued();
- boolean toWrite = f.isToWrite();
- if(f.isDynamicName()){
- name = vr.replaceTokens(name);
- SchemaField schemaField = config.getSchemaField(name);
- if(schemaField == null) {
- toWrite = false;
- } else {
- multiValued = schemaField.multiValued();
- toWrite = true;
- }
- }
- if (toWrite) {
- addFieldToDoc(entry.getValue(), name, multiValued, doc);
- }
- }
- }
- }
- }
- }
-
- private void addFieldToDoc(Object value, String name, boolean multiValued, DocWrapper doc) {
- if (value instanceof Collection) {
- @SuppressWarnings({"rawtypes"})
- Collection collection = (Collection) value;
- if (multiValued) {
- for (Object o : collection) {
- if (o != null)
- doc.addField(name, o);
- }
- } else {
- if (doc.getField(name) == null)
- for (Object o : collection) {
- if (o != null) {
- doc.addField(name, o);
- break;
- }
- }
- }
- } else if (multiValued) {
- if (value != null) {
- doc.addField(name, value);
- }
- } else {
- if (doc.getField(name) == null && value != null)
- doc.addField(name, value);
- }
- }
-
- @SuppressWarnings({"unchecked"})
- public EntityProcessorWrapper getEntityProcessorWrapper(Entity entity) {
- EntityProcessor entityProcessor = null;
- if (entity.getProcessorName() == null) {
- entityProcessor = new SqlEntityProcessor();
- } else {
- try {
- entityProcessor = (EntityProcessor) loadClass(entity.getProcessorName(), dataImporter.getCore())
- .getConstructor().newInstance();
- } catch (Exception e) {
- wrapAndThrow (SEVERE,e,
- "Unable to load EntityProcessor implementation for entity:" + entity.getName());
- }
- }
- EntityProcessorWrapper epw = new EntityProcessorWrapper(entityProcessor, entity, this);
- for(Entity e1 : entity.getChildren()) {
- epw.getChildren().add(getEntityProcessorWrapper(e1));
- }
-
- return epw;
- }
-
- private String findMatchingPkColumn(String pk, Map<String, Object> row) {
- if (row.containsKey(pk)) {
- throw new IllegalArgumentException(String.format(Locale.ROOT,
- "deltaQuery returned a row with null for primary key %s", pk));
- }
- String resolvedPk = null;
- for (String columnName : row.keySet()) {
- if (columnName.endsWith("." + pk) || pk.endsWith("." + columnName)) {
- if (resolvedPk != null)
- throw new IllegalArgumentException(
- String.format(Locale.ROOT,
- "deltaQuery has more than one column (%s and %s) that might resolve to declared primary key pk='%s'",
- resolvedPk, columnName, pk));
- resolvedPk = columnName;
- }
- }
- if (resolvedPk == null) {
- throw new IllegalArgumentException(
- String
- .format(
- Locale.ROOT,
- "deltaQuery has no column to resolve to declared primary key pk='%s'",
- pk));
- }
- if (log.isInfoEnabled()) {
- log.info(String.format(Locale.ROOT,
- "Resolving deltaQuery column '%s' to match entity's declared pk '%s'",
- resolvedPk, pk));
- }
- return resolvedPk;
- }
-
- /**
- * <p> Collects unique keys of all Solr documents for whom one or more source tables have been changed since the last
- * indexed time. </p> <p> Note: In our definition, unique key of Solr document is the primary key of the top level
- * entity (unless skipped using docRoot=false) in the Solr document in data-config.xml </p>
- *
- * @return an iterator to the list of keys for which Solr documents should be updated.
- */
- @SuppressWarnings({"unchecked", "rawtypes"})
- public Set<Map<String, Object>> collectDelta(EntityProcessorWrapper epw, VariableResolver resolver,
- Set<Map<String, Object>> deletedRows) {
- //someone called abort
- if (stop.get())
- return new HashSet();
-
- ContextImpl context1 = new ContextImpl(epw, resolver, null, Context.FIND_DELTA, session, null, this);
- epw.init(context1);
-
- Set<Map<String, Object>> myModifiedPks = new HashSet<>();
-
-
-
- for (EntityProcessorWrapper childEpw : epw.getChildren()) {
- //this ensures that we start from the leaf nodes
- myModifiedPks.addAll(collectDelta(childEpw, resolver, deletedRows));
- //someone called abort
- if (stop.get())
- return new HashSet();
- }
-
- // identifying the modified rows for this entity
- Map<String, Map<String, Object>> deltaSet = new HashMap<>();
- if (log.isInfoEnabled()) {
- log.info("Running ModifiedRowKey() for Entity: {}", epw.getEntity().getName());
- }
- //get the modified rows in this entity
- String pk = epw.getEntity().getPk();
- while (true) {
- Map<String, Object> row = epw.nextModifiedRowKey();
-
- if (row == null)
- break;
-
- Object pkValue = row.get(pk);
- if (pkValue == null) {
- pk = findMatchingPkColumn(pk, row);
- pkValue = row.get(pk);
- }
-
- deltaSet.put(pkValue.toString(), row);
- importStatistics.rowsCount.incrementAndGet();
- // check for abort
- if (stop.get())
- return new HashSet();
- }
- //get the deleted rows for this entity
- Set<Map<String, Object>> deletedSet = new HashSet<>();
- while (true) {
- Map<String, Object> row = epw.nextDeletedRowKey();
- if (row == null)
- break;
-
- deletedSet.add(row);
-
- Object pkValue = row.get(pk);
- if (pkValue == null) {
- pk = findMatchingPkColumn(pk, row);
- pkValue = row.get(pk);
- }
-
- // Remove deleted rows from the delta rows
- String deletedRowPk = pkValue.toString();
- if (deltaSet.containsKey(deletedRowPk)) {
- deltaSet.remove(deletedRowPk);
- }
-
- importStatistics.rowsCount.incrementAndGet();
- // check for abort
- if (stop.get())
- return new HashSet();
- }
-
- if (log.isInfoEnabled()) {
- log.info("Completed ModifiedRowKey for Entity: {} rows obtained: {}", epw.getEntity().getName(), deltaSet.size());
- log.info("Completed DeletedRowKey for Entity: {} rows obtained : {}", epw.getEntity().getName(), deletedSet.size()); // logOk
- }
-
- myModifiedPks.addAll(deltaSet.values());
- Set<Map<String, Object>> parentKeyList = new HashSet<>();
- //all that we have captured is useless (in a sub-entity) if no rows in the parent is modified because of these
- //propogate up the changes in the chain
- if (epw.getEntity().getParentEntity() != null) {
- // identifying deleted rows with deltas
-
- for (Map<String, Object> row : myModifiedPks) {
- resolver.addNamespace(epw.getEntity().getName(), row);
- getModifiedParentRows(resolver, epw.getEntity().getName(), epw, parentKeyList);
- // check for abort
- if (stop.get())
- return new HashSet();
- }
- // running the same for deletedrows
- for (Map<String, Object> row : deletedSet) {
- resolver.addNamespace(epw.getEntity().getName(), row);
- getModifiedParentRows(resolver, epw.getEntity().getName(), epw, parentKeyList);
- // check for abort
- if (stop.get())
- return new HashSet();
- }
- }
- if (log.isInfoEnabled()) {
- log.info("Completed parentDeltaQuery for Entity: {}", epw.getEntity().getName());
- }
- if (epw.getEntity().isDocRoot())
- deletedRows.addAll(deletedSet);
-
- // Do not use entity.isDocRoot here because one of descendant entities may set rootEntity="true"
- return epw.getEntity().getParentEntity() == null ?
- myModifiedPks : new HashSet<>(parentKeyList);
- }
-
- private void getModifiedParentRows(VariableResolver resolver,
- String entity, EntityProcessor entityProcessor,
- Set<Map<String, Object>> parentKeyList) {
- try {
- while (true) {
- Map<String, Object> parentRow = entityProcessor
- .nextModifiedParentRowKey();
- if (parentRow == null)
- break;
-
- parentKeyList.add(parentRow);
- importStatistics.rowsCount.incrementAndGet();
- // check for abort
- if (stop.get())
- return;
- }
-
- } finally {
- resolver.removeNamespace(entity);
- }
- }
-
- public void abort() {
- stop.set(true);
- }
-
- private AtomicBoolean stop = new AtomicBoolean(false);
-
- public static final String TIME_ELAPSED = "Time Elapsed";
-
- static String getTimeElapsedSince(long l) {
- l = TimeUnit.MILLISECONDS.convert(System.nanoTime() - l, TimeUnit.NANOSECONDS);
- return (l / (60000 * 60)) + ":" + (l / 60000) % 60 + ":" + (l / 1000)
- % 60 + "." + l % 1000;
- }
-
- public RequestInfo getReqParams() {
- return reqParams;
- }
-
- @SuppressWarnings({"unchecked", "rawtypes"})
- static Class loadClass(String name, SolrCore core) throws ClassNotFoundException {
- try {
- return core != null ?
- core.getResourceLoader().findClass(name, Object.class) :
- Class.forName(name);
- } catch (Exception e) {
- try {
- String n = DocBuilder.class.getPackage().getName() + "." + name;
- return core != null ?
- core.getResourceLoader().findClass(n, Object.class) :
- Class.forName(n);
- } catch (Exception e1) {
- throw new ClassNotFoundException("Unable to load " + name + " or " + DocBuilder.class.getPackage().getName() + "." + name, e);
- }
- }
- }
-
- public static class Statistics {
- public AtomicLong docCount = new AtomicLong();
-
- public AtomicLong deletedDocCount = new AtomicLong();
-
- public AtomicLong failedDocCount = new AtomicLong();
-
- public AtomicLong rowsCount = new AtomicLong();
-
- public AtomicLong queryCount = new AtomicLong();
-
- public AtomicLong skipDocCount = new AtomicLong();
-
- public Statistics add(Statistics stats) {
- this.docCount.addAndGet(stats.docCount.get());
- this.deletedDocCount.addAndGet(stats.deletedDocCount.get());
- this.rowsCount.addAndGet(stats.rowsCount.get());
- this.queryCount.addAndGet(stats.queryCount.get());
-
- return this;
- }
-
- public Map<String, Object> getStatsSnapshot() {
- Map<String, Object> result = new HashMap<>();
- result.put("docCount", docCount.get());
- result.put("deletedDocCount", deletedDocCount.get());
- result.put("rowCount", rowsCount.get());
- result.put("queryCount", rowsCount.get());
- result.put("skipDocCount", skipDocCount.get());
- return result;
- }
-
- }
-
- private void cleanByQuery(String delQuery, AtomicBoolean completeCleanDone) {
- delQuery = getVariableResolver().replaceTokens(delQuery);
- if (reqParams.isClean()) {
- if (delQuery == null && !completeCleanDone.get()) {
- writer.doDeleteAll();
- completeCleanDone.set(true);
- } else if (delQuery != null) {
- writer.deleteByQuery(delQuery);
- }
- }
- }
-
- public static final String LAST_INDEX_TIME = "last_index_time";
- public static final String INDEX_START_TIME = "index_start_time";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessor.java
deleted file mode 100644
index 7ded623..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessor.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.io.Closeable;
-import java.util.Map;
-
-/**
- * <p>
- * An instance of entity processor serves an entity. It is reused throughout the
- * import process.
- * </p>
- * <p>
- * Implementations of this abstract class must provide a public no-args constructor.
- * </p>
- * <p>
- * Refer to <a
- * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
- * for more details.
- * </p>
- * <p>
- * <b>This API is experimental and may change in the future.</b>
- *
- * @since solr 1.3
- */
-public abstract class EntityProcessor implements Closeable {
-
- /**
- * This method is called when it starts processing an entity. When it comes
- * back to the entity it is called again. So it can reset anything at that point.
- * For a rootmost entity this is called only once for an ingestion. For sub-entities , this
- * is called multiple once for each row from its parent entity
- *
- * @param context The current context
- */
- public abstract void init(Context context);
-
- /**
- * This method helps streaming the data for each row . The implementation
- * would fetch as many rows as needed and gives one 'row' at a time. Only this
- * method is used during a full import
- *
- * @return A 'row'. The 'key' for the map is the column name and the 'value'
- * is the value of that column. If there are no more rows to be
- * returned, return 'null'
- */
- public abstract Map<String, Object> nextRow();
-
- /**
- * This is used for delta-import. It gives the pks of the changed rows in this
- * entity
- *
- * @return the pk vs value of all changed rows
- */
- public abstract Map<String, Object> nextModifiedRowKey();
-
- /**
- * This is used during delta-import. It gives the primary keys of the rows
- * that are deleted from this entity. If this entity is the root entity, solr
- * document is deleted. If this is a sub-entity, the Solr document is
- * considered as 'changed' and will be recreated
- *
- * @return the pk vs value of all changed rows
- */
- public abstract Map<String, Object> nextDeletedRowKey();
-
- /**
- * This is used during delta-import. This gives the primary keys and their
- * values of all the rows changed in a parent entity due to changes in this
- * entity.
- *
- * @return the pk vs value of all changed rows in the parent entity
- */
- public abstract Map<String, Object> nextModifiedParentRowKey();
-
- /**
- * Invoked for each entity at the very end of the import to do any needed cleanup tasks.
- *
- */
- public abstract void destroy();
-
- /**
- * Invoked after the transformers are invoked. EntityProcessors can add, remove or modify values
- * added by Transformers in this method.
- *
- * @param r The transformed row
- * @since solr 1.4
- */
- public void postTransform(Map<String, Object> r) {
- }
-
- /**
- * Invoked when the Entity processor is destroyed towards the end of import.
- *
- * @since solr 1.4
- */
- public void close() {
- //no-op
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
deleted file mode 100644
index 8311f36..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.SolrException;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.lang.invoke.MethodHandles;
-import java.util.*;
-
-/**
- * <p> Base class for all implementations of {@link EntityProcessor} </p> <p> Most implementations of {@link EntityProcessor}
- * extend this base class which provides common functionality. </p>
- * <p>
- * <b>This API is experimental and subject to change</b>
- *
- * @since solr 1.3
- */
-public class EntityProcessorBase extends EntityProcessor {
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- protected boolean isFirstInit = true;
-
- protected String entityName;
-
- protected Context context;
-
- protected Iterator<Map<String, Object>> rowIterator;
-
- protected String query;
-
- protected String onError = ABORT;
-
- protected DIHCacheSupport cacheSupport = null;
-
- private Zipper zipper;
-
-
- @Override
- public void init(Context context) {
- this.context = context;
- if (isFirstInit) {
- firstInit(context);
- }
- if(zipper!=null){
- zipper.onNewParent(context);
- }else{
- if(cacheSupport!=null) {
- cacheSupport.initNewParent(context);
- }
- }
- }
-
- /**
- * first time init call. do one-time operations here
- * it's necessary to call it from the overridden method,
- * otherwise it throws NPE on accessing zipper from nextRow()
- */
- protected void firstInit(Context context) {
- entityName = context.getEntityAttribute("name");
- String s = context.getEntityAttribute(ON_ERROR);
- if (s != null) onError = s;
-
- zipper = Zipper.createOrNull(context);
-
- if(zipper==null){
- initCache(context);
- }
- isFirstInit = false;
- }
-
- protected void initCache(Context context) {
- String cacheImplName = context
- .getResolvedEntityAttribute(DIHCacheSupport.CACHE_IMPL);
-
- if (cacheImplName != null ) {
- cacheSupport = new DIHCacheSupport(context, cacheImplName);
- }
- }
-
- @Override
- public Map<String, Object> nextModifiedRowKey() {
- return null;
- }
-
- @Override
- public Map<String, Object> nextDeletedRowKey() {
- return null;
- }
-
- @Override
- public Map<String, Object> nextModifiedParentRowKey() {
- return null;
- }
-
- /**
- * For a simple implementation, this is the only method that the sub-class should implement. This is intended to
- * stream rows one-by-one. Return null to signal end of rows
- *
- * @return a row where the key is the name of the field and value can be any Object or a Collection of objects. Return
- * null to signal end of rows
- */
- @Override
- public Map<String, Object> nextRow() {
- return null;// do not do anything
- }
-
- protected Map<String, Object> getNext() {
- if(zipper!=null){
- return zipper.supplyNextChild(rowIterator);
- }else{
- if(cacheSupport==null) {
- try {
- if (rowIterator == null)
- return null;
- if (rowIterator.hasNext())
- return rowIterator.next();
- query = null;
- rowIterator = null;
- return null;
- } catch (Exception e) {
- SolrException.log(log, "getNext() failed for query '" + query + "'", e);
- query = null;
- rowIterator = null;
- wrapAndThrow(DataImportHandlerException.WARN, e);
- return null;
- }
- } else {
- return cacheSupport.getCacheData(context, query, rowIterator);
- }
- }
- }
-
-
- @Override
- public void destroy() {
- query = null;
- if(cacheSupport!=null){
- cacheSupport.destroyAll();
- }
- cacheSupport = null;
- }
-
-
-
- public static final String TRANSFORMER = "transformer";
-
- public static final String TRANSFORM_ROW = "transformRow";
-
- public static final String ON_ERROR = "onError";
-
- public static final String ABORT = "abort";
-
- public static final String CONTINUE = "continue";
-
- public static final String SKIP = "skip";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java
deleted file mode 100644
index 6c106bd..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.dataimport.config.ConfigNameConstants;
-import org.apache.solr.handler.dataimport.config.Entity;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
-import static org.apache.solr.handler.dataimport.EntityProcessorBase.*;
-import static org.apache.solr.handler.dataimport.EntityProcessorBase.SKIP;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.lang.invoke.MethodHandles;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-
-/**
- * A Wrapper over {@link EntityProcessor} instance which performs transforms and handles multi-row outputs correctly.
- *
- * @since solr 1.4
- */
-public class EntityProcessorWrapper extends EntityProcessor {
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- private EntityProcessor delegate;
- private Entity entity;
- @SuppressWarnings({"rawtypes"})
- private DataSource datasource;
- private List<EntityProcessorWrapper> children = new ArrayList<>();
- private DocBuilder docBuilder;
- private boolean initialized;
- private String onError;
- private Context context;
- private VariableResolver resolver;
- private String entityName;
-
- protected List<Transformer> transformers;
-
- protected List<Map<String, Object>> rowcache;
-
- public EntityProcessorWrapper(EntityProcessor delegate, Entity entity, DocBuilder docBuilder) {
- this.delegate = delegate;
- this.entity = entity;
- this.docBuilder = docBuilder;
- }
-
- @Override
- public void init(Context context) {
- rowcache = null;
- this.context = context;
- resolver = context.getVariableResolver();
- if (entityName == null) {
- onError = resolver.replaceTokens(context.getEntityAttribute(ON_ERROR));
- if (onError == null) onError = ABORT;
- entityName = context.getEntityAttribute(ConfigNameConstants.NAME);
- }
- delegate.init(context);
-
- }
-
- @SuppressWarnings({"unchecked"})
- void loadTransformers() {
- String transClasses = context.getEntityAttribute(TRANSFORMER);
-
- if (transClasses == null) {
- transformers = Collections.emptyList();
- return;
- }
-
- String[] transArr = transClasses.split(",");
- transformers = new ArrayList<Transformer>() {
- @Override
- public boolean add(Transformer transformer) {
- if (docBuilder != null && docBuilder.verboseDebug) {
- transformer = docBuilder.getDebugLogger().wrapTransformer(transformer);
- }
- return super.add(transformer);
- }
- };
- for (String aTransArr : transArr) {
- String trans = aTransArr.trim();
- if (trans.startsWith("script:")) {
- // The script transformer is a potential vulnerability, esp. when the script is
- // provided from an untrusted source. Check and don't proceed if source is untrusted.
- checkIfTrusted(trans);
- String functionName = trans.substring("script:".length());
- ScriptTransformer scriptTransformer = new ScriptTransformer();
- scriptTransformer.setFunctionName(functionName);
- transformers.add(scriptTransformer);
- continue;
- }
- try {
- @SuppressWarnings({"rawtypes"})
- Class clazz = DocBuilder.loadClass(trans, context.getSolrCore());
- if (Transformer.class.isAssignableFrom(clazz)) {
- transformers.add((Transformer) clazz.getConstructor().newInstance());
- } else {
- Method meth = clazz.getMethod(TRANSFORM_ROW, Map.class);
- transformers.add(new ReflectionTransformer(meth, clazz, trans));
- }
- } catch (NoSuchMethodException nsme){
- String msg = "Transformer :"
- + trans
- + "does not implement Transformer interface or does not have a transformRow(Map<String.Object> m)method";
- log.error(msg);
- wrapAndThrow(SEVERE, nsme,msg);
- } catch (Exception e) {
- log.error("Unable to load Transformer: {}", aTransArr, e);
- wrapAndThrow(SEVERE, e,"Unable to load Transformer: " + trans);
- }
- }
-
- }
-
- private void checkIfTrusted(String trans) {
- if (docBuilder != null) {
- SolrCore core = docBuilder.dataImporter.getCore();
- boolean trusted = (core != null)? core.getCoreDescriptor().isConfigSetTrusted(): true;
- if (!trusted) {
- Exception ex = new SolrException(ErrorCode.UNAUTHORIZED, "The configset for this collection was uploaded "
- + "without any authentication in place,"
- + " and this transformer is not available for collections with untrusted configsets. To use this transformer,"
- + " re-upload the configset after enabling authentication and authorization.");
- String msg = "Transformer: "
- + trans
- + ". " + ex.getMessage();
- log.error(msg);
- wrapAndThrow(SEVERE, ex, msg);
- }
- }
- }
-
- @SuppressWarnings("unchecked")
- static class ReflectionTransformer extends Transformer {
- final Method meth;
-
- @SuppressWarnings({"rawtypes"})
- final Class clazz;
-
- final String trans;
-
- final Object o;
-
- public ReflectionTransformer(Method meth, @SuppressWarnings({"rawtypes"})Class clazz, String trans)
- throws Exception {
- this.meth = meth;
- this.clazz = clazz;
- this.trans = trans;
- o = clazz.getConstructor().newInstance();
- }
-
- @Override
- public Object transformRow(Map<String, Object> aRow, Context context) {
- try {
- return meth.invoke(o, aRow);
- } catch (Exception e) {
- log.warn("method invocation failed on transformer : {}", trans, e);
- throw new DataImportHandlerException(WARN, e);
- }
- }
- }
-
- protected Map<String, Object> getFromRowCache() {
- Map<String, Object> r = rowcache.remove(0);
- if (rowcache.isEmpty())
- rowcache = null;
- return r;
- }
-
- @SuppressWarnings("unchecked")
- protected Map<String, Object> applyTransformer(Map<String, Object> row) {
- if(row == null) return null;
- if (transformers == null)
- loadTransformers();
- if (transformers == Collections.EMPTY_LIST)
- return row;
- Map<String, Object> transformedRow = row;
- List<Map<String, Object>> rows = null;
- boolean stopTransform = checkStopTransform(row);
- VariableResolver resolver = context.getVariableResolver();
- for (Transformer t : transformers) {
- if (stopTransform) break;
- try {
- if (rows != null) {
- List<Map<String, Object>> tmpRows = new ArrayList<>();
- for (Map<String, Object> map : rows) {
- resolver.addNamespace(entityName, map);
- Object o = t.transformRow(map, context);
- if (o == null)
- continue;
- if (o instanceof Map) {
- @SuppressWarnings({"rawtypes"})
- Map oMap = (Map) o;
- stopTransform = checkStopTransform(oMap);
- tmpRows.add((Map) o);
- } else if (o instanceof List) {
- tmpRows.addAll((List) o);
- } else {
- log.error("Transformer must return Map<String, Object> or a List<Map<String, Object>>");
- }
- }
- rows = tmpRows;
- } else {
- resolver.addNamespace(entityName, transformedRow);
- Object o = t.transformRow(transformedRow, context);
- if (o == null)
- return null;
- if (o instanceof Map) {
- @SuppressWarnings({"rawtypes"})
- Map oMap = (Map) o;
- stopTransform = checkStopTransform(oMap);
- transformedRow = (Map) o;
- } else if (o instanceof List) {
- rows = (List) o;
- } else {
- log.error("Transformer must return Map<String, Object> or a List<Map<String, Object>>");
- }
- }
- } catch (Exception e) {
- log.warn("transformer threw error", e);
- if (ABORT.equals(onError)) {
- wrapAndThrow(SEVERE, e);
- } else if (SKIP.equals(onError)) {
- wrapAndThrow(DataImportHandlerException.SKIP, e);
- }
- // onError = continue
- }
- }
- if (rows == null) {
- return transformedRow;
- } else {
- rowcache = rows;
- return getFromRowCache();
- }
-
- }
-
- private boolean checkStopTransform(@SuppressWarnings({"rawtypes"})Map oMap) {
- return oMap.get("$stopTransform") != null
- && Boolean.parseBoolean(oMap.get("$stopTransform").toString());
- }
-
- @Override
- public Map<String, Object> nextRow() {
- if (rowcache != null) {
- return getFromRowCache();
- }
- while (true) {
- Map<String, Object> arow = null;
- try {
- arow = delegate.nextRow();
- } catch (Exception e) {
- if(ABORT.equals(onError)){
- wrapAndThrow(SEVERE, e);
- } else {
- //SKIP is not really possible. If this calls the nextRow() again the Entityprocessor would be in an inconisttent state
- SolrException.log(log, "Exception in entity : "+ entityName, e);
- return null;
- }
- }
- if (arow == null) {
- return null;
- } else {
- arow = applyTransformer(arow);
- if (arow != null) {
- delegate.postTransform(arow);
- return arow;
- }
- }
- }
- }
-
- @Override
- public Map<String, Object> nextModifiedRowKey() {
- Map<String, Object> row = delegate.nextModifiedRowKey();
- row = applyTransformer(row);
- rowcache = null;
- return row;
- }
-
- @Override
- public Map<String, Object> nextDeletedRowKey() {
- Map<String, Object> row = delegate.nextDeletedRowKey();
- row = applyTransformer(row);
- rowcache = null;
- return row;
- }
-
- @Override
- public Map<String, Object> nextModifiedParentRowKey() {
- return delegate.nextModifiedParentRowKey();
- }
-
- @Override
- public void destroy() {
- delegate.destroy();
- }
-
- public VariableResolver getVariableResolver() {
- return context.getVariableResolver();
- }
-
- public Context getContext() {
- return context;
- }
-
- @Override
- public void close() {
- delegate.close();
- }
-
- public Entity getEntity() {
- return entity;
- }
-
- public List<EntityProcessorWrapper> getChildren() {
- return children;
- }
-
- @SuppressWarnings({"rawtypes"})
- public DataSource getDatasource() {
- return datasource;
- }
-
- public void setDatasource(@SuppressWarnings({"rawtypes"})DataSource datasource) {
- this.datasource = datasource;
- }
-
- public boolean isInitialized() {
- return initialized;
- }
-
- public void setInitialized(boolean initialized) {
- this.initialized = initialized;
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Evaluator.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Evaluator.java
deleted file mode 100644
index 22282b9..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Evaluator.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Pattern;
-
-/**
- * <p>
- * Pluggable functions for resolving variables
- * </p>
- * <p>
- * Implementations of this abstract class must provide a public no-arg constructor.
- * </p>
- * <p>
- * Refer to <a
- * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
- * for more details.
- * </p>
- * <b>This API is experimental and may change in the future.</b>
- *
- * @since solr 1.3
- */
-public abstract class Evaluator {
-
- /**
- * Return a String after processing an expression and a {@link VariableResolver}
- *
- * @see VariableResolver
- * @param expression string to be evaluated
- * @param context instance
- * @return the value of the given expression evaluated using the resolver
- */
- public abstract String evaluate(String expression, Context context);
-
- /**
- * Parses a string of expression into separate params. The values are separated by commas. each value will be
- * translated into one of the following:
- * <ol>
- * <li>If it is in single quotes the value will be translated to a String</li>
- * <li>If is is not in quotes and is a number a it will be translated into a Double</li>
- * <li>else it is a variable which can be resolved and it will be put in as an instance of VariableWrapper</li>
- * </ol>
- *
- * @param expression the expression to be parsed
- * @param vr the VariableResolver instance for resolving variables
- *
- * @return a List of objects which can either be a string, number or a variable wrapper
- */
- protected List<Object> parseParams(String expression, VariableResolver vr) {
- List<Object> result = new ArrayList<>();
- expression = expression.trim();
- String[] ss = expression.split(",");
- for (int i = 0; i < ss.length; i++) {
- ss[i] = ss[i].trim();
- if (ss[i].startsWith("'")) {//a string param has started
- StringBuilder sb = new StringBuilder();
- while (true) {
- sb.append(ss[i]);
- if (ss[i].endsWith("'")) break;
- i++;
- if (i >= ss.length)
- throw new DataImportHandlerException(SEVERE, "invalid string at " + ss[i - 1] + " in function params: " + expression);
- sb.append(",");
- }
- String s = sb.substring(1, sb.length() - 1);
- s = s.replaceAll("\\\\'", "'");
- result.add(s);
- } else {
- if (Character.isDigit(ss[i].charAt(0))) {
- try {
- Double doub = Double.parseDouble(ss[i]);
- result.add(doub);
- } catch (NumberFormatException e) {
- if (vr.resolve(ss[i]) == null) {
- wrapAndThrow(
- SEVERE, e, "Invalid number :" + ss[i] +
- "in parameters " + expression);
- }
- }
- } else {
- result.add(getVariableWrapper(ss[i], vr));
- }
- }
- }
- return result;
- }
-
- protected VariableWrapper getVariableWrapper(String s, VariableResolver vr) {
- return new VariableWrapper(s,vr);
- }
-
- static protected class VariableWrapper {
- public final String varName;
- public final VariableResolver vr;
-
- public VariableWrapper(String s, VariableResolver vr) {
- this.varName = s;
- this.vr = vr;
- }
-
- public Object resolve() {
- return vr.resolve(varName);
- }
-
- @Override
- public String toString() {
- Object o = vr.resolve(varName);
- return o == null ? null : o.toString();
- }
- }
-
- static Pattern IN_SINGLE_QUOTES = Pattern.compile("^'(.*?)'$");
-
- public static final String DATE_FORMAT_EVALUATOR = "formatDate";
-
- public static final String URL_ENCODE_EVALUATOR = "encodeUrl";
-
- public static final String ESCAPE_SOLR_QUERY_CHARS = "escapeQueryChars";
-
- public static final String SQL_ESCAPE_EVALUATOR = "escapeSql";
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EventListener.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EventListener.java
deleted file mode 100644
index 0c43a0b..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EventListener.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-/**
- * Event listener for DataImportHandler
- *
- * <b>This API is experimental and subject to change</b>
- *
- * @since solr 1.4
- */
-public interface EventListener {
-
- /**
- * Event callback
- *
- * @param ctx the Context in which this event was called
- */
- void onEvent(Context ctx);
-
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java
deleted file mode 100644
index 571c280..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.*;
-import java.lang.invoke.MethodHandles;
-import java.nio.charset.StandardCharsets;
-import java.sql.Blob;
-import java.sql.Clob;
-import java.sql.SQLException;
-import java.util.Properties;
-
-/**
- * This can be useful for users who have a DB field containing xml and wish to use a nested {@link XPathEntityProcessor}
- * <p>
- * The datasouce may be configured as follows
- * <p>
- * <datasource name="f1" type="FieldReaderDataSource" />
- * <p>
- * The entity which uses this datasource must keep the url value as the variable name url="field-name"
- * <p>
- * The fieldname must be resolvable from {@link VariableResolver}
- * <p>
- * This may be used with any {@link EntityProcessor} which uses a {@link DataSource}<{@link Reader}> eg: {@link XPathEntityProcessor}
- * <p>
- * Supports String, BLOB, CLOB data types and there is an extra field (in the entity) 'encoding' for BLOB types
- *
- * @since 1.4
- */
-public class FieldReaderDataSource extends DataSource<Reader> {
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- protected VariableResolver vr;
- protected String dataField;
- private String encoding;
- private EntityProcessorWrapper entityProcessor;
-
- @Override
- public void init(Context context, Properties initProps) {
- dataField = context.getEntityAttribute("dataField");
- encoding = context.getEntityAttribute("encoding");
- entityProcessor = (EntityProcessorWrapper) context.getEntityProcessor();
- /*no op*/
- }
-
- @Override
- public Reader getData(String query) {
- Object o = entityProcessor.getVariableResolver().resolve(dataField);
- if (o == null) {
- throw new DataImportHandlerException (SEVERE, "No field available for name : " +dataField);
- }
- if (o instanceof String) {
- return new StringReader((String) o);
- } else if (o instanceof Clob) {
- Clob clob = (Clob) o;
- try {
- //Most of the JDBC drivers have getCharacterStream defined as public
- // so let us just check it
- return readCharStream(clob);
- } catch (Exception e) {
- log.info("Unable to get data from CLOB");
- return null;
-
- }
-
- } else if (o instanceof Blob) {
- Blob blob = (Blob) o;
- try {
- return getReader(blob);
- } catch (Exception e) {
- log.info("Unable to get data from BLOB");
- return null;
-
- }
- } else {
- return new StringReader(o.toString());
- }
-
- }
-
- static Reader readCharStream(Clob clob) {
- try {
- return clob.getCharacterStream();
- } catch (Exception e) {
- wrapAndThrow(SEVERE, e,"Unable to get reader from clob");
- return null;//unreachable
- }
- }
-
- private Reader getReader(Blob blob)
- throws SQLException, UnsupportedEncodingException {
- if (encoding == null) {
- return (new InputStreamReader(blob.getBinaryStream(), StandardCharsets.UTF_8));
- } else {
- return (new InputStreamReader(blob.getBinaryStream(), encoding));
- }
- }
-
- @Override
- public void close() {
-
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java
deleted file mode 100644
index ba7ca5d..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-import java.lang.invoke.MethodHandles;
-import java.sql.Blob;
-import java.sql.SQLException;
-import java.util.Properties;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-
-/**
- * This can be useful for users who have a DB field containing BLOBs which may be Rich documents
- * <p>
- * The datasource may be configured as follows
- * <p>
- * <dataSource name="f1" type="FieldStreamDataSource" />
- * <p>
- * The entity which uses this datasource must keep and attribute dataField
- * <p>
- * The fieldname must be resolvable from {@link VariableResolver}
- * <p>
- * This may be used with any {@link EntityProcessor} which uses a {@link DataSource}<{@link InputStream}> eg: TikaEntityProcessor
- *
- * @since 3.1
- */
-public class FieldStreamDataSource extends DataSource<InputStream> {
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- protected VariableResolver vr;
- protected String dataField;
- private EntityProcessorWrapper wrapper;
-
- @Override
- public void init(Context context, Properties initProps) {
- dataField = context.getEntityAttribute("dataField");
- wrapper = (EntityProcessorWrapper) context.getEntityProcessor();
- /*no op*/
- }
-
- @Override
- public InputStream getData(String query) {
- Object o = wrapper.getVariableResolver().resolve(dataField);
- if (o == null) {
- throw new DataImportHandlerException(SEVERE, "No field available for name : " + dataField);
- } else if (o instanceof Blob) {
- Blob blob = (Blob) o;
- try {
- return blob.getBinaryStream();
- } catch (SQLException sqle) {
- log.info("Unable to get data from BLOB");
- return null;
- }
- } else if (o instanceof byte[]) {
- byte[] bytes = (byte[]) o;
- return new ByteArrayInputStream(bytes);
- } else {
- throw new RuntimeException("unsupported type : " + o.getClass());
- }
-
- }
-
- @Override
- public void close() {
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java
deleted file mode 100644
index 34df1226..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.handler.dataimport;
-
-import java.io.*;
-import java.lang.invoke.MethodHandles;
-import java.nio.charset.StandardCharsets;
-import java.util.Properties;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
-import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
-
-/**
- * <p>
- * A {@link DataSource} which reads from local files
- * </p>
- * <p>
- * The file is read with the default platform encoding. It can be overriden by
- * specifying the encoding in solrconfig.xml
- * </p>
- * <p>
- * Refer to <a
- * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
- * for more details.
- * </p>
- * <p>
- * <b>This API is experimental and may change in the future.</b>
- *
- * @since solr 1.3
- */
-public class FileDataSource extends DataSource<Reader> {
- public static final String BASE_PATH = "basePath";
-
- /**
- * The basePath for this data source
- */
- protected String basePath;
-
- /**
- * The encoding using which the given file should be read
- */
- protected String encoding = null;
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- @Override
- public void init(Context context, Properties initProps) {
- basePath = initProps.getProperty(BASE_PATH);
- if (initProps.get(URLDataSource.ENCODING) != null)
- encoding = initProps.getProperty(URLDataSource.ENCODING);
- }
-
- /**
- * <p>
- * Returns a reader for the given file.
- * </p>
- * <p>
- * If the given file is not absolute, we try to construct an absolute path
- * using basePath configuration. If that fails, then the relative path is
- * tried. If file is not found a RuntimeException is thrown.
- * </p>
- * <p>
- * <b>It is the responsibility of the calling method to properly close the
- * returned Reader</b>
- * </p>
- */
- @Override
- public Reader getData(String query) {
- File f = getFile(basePath,query);
- try {
- return openStream(f);
- } catch (Exception e) {
- wrapAndThrow(SEVERE,e,"Unable to open File : "+f.getAbsolutePath());
- return null;
- }
- }
-
- static File getFile(String basePath, String query) {
- try {
- File file = new File(query);
-
- // If it's not an absolute path, try relative from basePath.
- if (!file.isAbsolute()) {
- // Resolve and correct basePath.
- File basePathFile;
- if (basePath == null) {
- basePathFile = new File(".").getAbsoluteFile();
- log.warn("FileDataSource.basePath is empty. Resolving to: {}"
- , basePathFile.getAbsolutePath());
- } else {
- basePathFile = new File(basePath);
- if (!basePathFile.isAbsolute()) {
- basePathFile = basePathFile.getAbsoluteFile();
- log.warn("FileDataSource.basePath is not absolute. Resolving to: {}"
- , basePathFile.getAbsolutePath());
- }
- }
-
- file = new File(basePathFile, query).getAbsoluteFile();
- }
-
- if (file.isFile() && file.canRead()) {
- if (log.isDebugEnabled()) {
- log.debug("Accessing File: {}", file.getAbsolutePath());
- }
- return file;
- } else {
- throw new FileNotFoundException("Could not find file: " + query +
- " (resolved to: " + file.getAbsolutePath());
- }
- } catch (FileNotFoundException e) {
- throw new RuntimeException(e);
- }
- }
-
- /**
- * Open a {@link java.io.Reader} for the given file name
- *
- * @param file a {@link java.io.File} instance
- * @return a Reader on the given file
- * @throws FileNotFoundException if the File does not exist
- * @throws UnsupportedEncodingException if the encoding is unsupported
- * @since solr 1.4
- */
- protected Reader openStream(File file) throws FileNotFoundException,
- UnsupportedEncodingException {
- if (encoding == null) {
- return new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8);
- } else {
- return new InputStreamReader(new FileInputStream(file), encoding);
- }
- }
-
- @Override
- public void close() {
-
- }
-}
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java
deleted file mode 100644
index a03354f..0000000
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
... 64721 lines suppressed ...