You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/11/02 15:43:15 UTC
[51/51] [partial] lucene-solr:jira/gradle: Add more contrib modules
Add more contrib modules
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4dd96a0e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4dd96a0e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4dd96a0e
Branch: refs/heads/jira/gradle
Commit: 4dd96a0e107b953f760d5217fd5e941419593359
Parents: 46fd24b
Author: Cao Manh Dat <da...@apache.org>
Authored: Fri Nov 2 15:42:17 2018 +0000
Committer: Cao Manh Dat <da...@apache.org>
Committed: Fri Nov 2 15:42:17 2018 +0000
----------------------------------------------------------------------
settings.gradle | 6 +-
solr/contrib/langid/build.gradle | 11 +
.../solr/update/processor/DetectedLanguage.java | 46 -
...DetectLanguageIdentifierUpdateProcessor.java | 89 -
...anguageIdentifierUpdateProcessorFactory.java | 137 --
.../solr/update/processor/LangIdParams.java | 57 -
.../LanguageIdentifierUpdateProcessor.java | 466 -----
.../OpenNLPLangDetectUpdateProcessor.java | 80 -
...OpenNLPLangDetectUpdateProcessorFactory.java | 130 --
.../TikaLanguageIdentifierUpdateProcessor.java | 68 -
...anguageIdentifierUpdateProcessorFactory.java | 94 -
.../apache/solr/update/processor/package.html | 23 -
solr/contrib/langid/src/java/overview.html | 21 -
.../solr/update/processor/DetectedLanguage.java | 46 +
...DetectLanguageIdentifierUpdateProcessor.java | 89 +
...anguageIdentifierUpdateProcessorFactory.java | 137 ++
.../solr/update/processor/LangIdParams.java | 57 +
.../LanguageIdentifierUpdateProcessor.java | 466 +++++
.../OpenNLPLangDetectUpdateProcessor.java | 80 +
...OpenNLPLangDetectUpdateProcessorFactory.java | 130 ++
.../TikaLanguageIdentifierUpdateProcessor.java | 68 +
...anguageIdentifierUpdateProcessorFactory.java | 94 +
.../apache/solr/update/processor/package.html | 23 +
solr/contrib/langid/src/main/java/overview.html | 21 +
.../update/processor/langdetect-profiles/af | 1 +
.../update/processor/langdetect-profiles/ar | 1 +
.../update/processor/langdetect-profiles/bg | 1 +
.../update/processor/langdetect-profiles/bn | 1 +
.../update/processor/langdetect-profiles/cs | 1 +
.../update/processor/langdetect-profiles/da | 1 +
.../update/processor/langdetect-profiles/de | 1 +
.../update/processor/langdetect-profiles/el | 1 +
.../update/processor/langdetect-profiles/en | 1 +
.../update/processor/langdetect-profiles/es | 1 +
.../update/processor/langdetect-profiles/et | 1 +
.../update/processor/langdetect-profiles/fa | 1 +
.../update/processor/langdetect-profiles/fi | 1 +
.../update/processor/langdetect-profiles/fr | 1 +
.../update/processor/langdetect-profiles/gu | 1 +
.../update/processor/langdetect-profiles/he | 1 +
.../update/processor/langdetect-profiles/hi | 1 +
.../update/processor/langdetect-profiles/hr | 1 +
.../update/processor/langdetect-profiles/hu | 1 +
.../update/processor/langdetect-profiles/id | 1 +
.../update/processor/langdetect-profiles/it | 1 +
.../update/processor/langdetect-profiles/ja | 1 +
.../update/processor/langdetect-profiles/kn | 1 +
.../update/processor/langdetect-profiles/ko | 1 +
.../update/processor/langdetect-profiles/lt | 1 +
.../update/processor/langdetect-profiles/lv | 1 +
.../update/processor/langdetect-profiles/mk | 1 +
.../update/processor/langdetect-profiles/ml | 1 +
.../update/processor/langdetect-profiles/mr | 1 +
.../update/processor/langdetect-profiles/ne | 1 +
.../update/processor/langdetect-profiles/nl | 1 +
.../update/processor/langdetect-profiles/no | 1 +
.../update/processor/langdetect-profiles/pa | 1 +
.../update/processor/langdetect-profiles/pl | 1 +
.../update/processor/langdetect-profiles/pt | 1 +
.../update/processor/langdetect-profiles/ro | 1 +
.../update/processor/langdetect-profiles/ru | 1 +
.../update/processor/langdetect-profiles/sk | 1 +
.../update/processor/langdetect-profiles/sl | 1 +
.../update/processor/langdetect-profiles/so | 1 +
.../update/processor/langdetect-profiles/sq | 1 +
.../update/processor/langdetect-profiles/sv | 1 +
.../update/processor/langdetect-profiles/sw | 1 +
.../update/processor/langdetect-profiles/ta | 1 +
.../update/processor/langdetect-profiles/te | 1 +
.../update/processor/langdetect-profiles/th | 1 +
.../update/processor/langdetect-profiles/tl | 1 +
.../update/processor/langdetect-profiles/tr | 1 +
.../update/processor/langdetect-profiles/uk | 1 +
.../update/processor/langdetect-profiles/ur | 1 +
.../update/processor/langdetect-profiles/vi | 1 +
.../update/processor/langdetect-profiles/zh-cn | 1 +
.../update/processor/langdetect-profiles/zh-tw | 1 +
.../update/processor/langdetect-profiles/af | 1 -
.../update/processor/langdetect-profiles/ar | 1 -
.../update/processor/langdetect-profiles/bg | 1 -
.../update/processor/langdetect-profiles/bn | 1 -
.../update/processor/langdetect-profiles/cs | 1 -
.../update/processor/langdetect-profiles/da | 1 -
.../update/processor/langdetect-profiles/de | 1 -
.../update/processor/langdetect-profiles/el | 1 -
.../update/processor/langdetect-profiles/en | 1 -
.../update/processor/langdetect-profiles/es | 1 -
.../update/processor/langdetect-profiles/et | 1 -
.../update/processor/langdetect-profiles/fa | 1 -
.../update/processor/langdetect-profiles/fi | 1 -
.../update/processor/langdetect-profiles/fr | 1 -
.../update/processor/langdetect-profiles/gu | 1 -
.../update/processor/langdetect-profiles/he | 1 -
.../update/processor/langdetect-profiles/hi | 1 -
.../update/processor/langdetect-profiles/hr | 1 -
.../update/processor/langdetect-profiles/hu | 1 -
.../update/processor/langdetect-profiles/id | 1 -
.../update/processor/langdetect-profiles/it | 1 -
.../update/processor/langdetect-profiles/ja | 1 -
.../update/processor/langdetect-profiles/kn | 1 -
.../update/processor/langdetect-profiles/ko | 1 -
.../update/processor/langdetect-profiles/lt | 1 -
.../update/processor/langdetect-profiles/lv | 1 -
.../update/processor/langdetect-profiles/mk | 1 -
.../update/processor/langdetect-profiles/ml | 1 -
.../update/processor/langdetect-profiles/mr | 1 -
.../update/processor/langdetect-profiles/ne | 1 -
.../update/processor/langdetect-profiles/nl | 1 -
.../update/processor/langdetect-profiles/no | 1 -
.../update/processor/langdetect-profiles/pa | 1 -
.../update/processor/langdetect-profiles/pl | 1 -
.../update/processor/langdetect-profiles/pt | 1 -
.../update/processor/langdetect-profiles/ro | 1 -
.../update/processor/langdetect-profiles/ru | 1 -
.../update/processor/langdetect-profiles/sk | 1 -
.../update/processor/langdetect-profiles/sl | 1 -
.../update/processor/langdetect-profiles/so | 1 -
.../update/processor/langdetect-profiles/sq | 1 -
.../update/processor/langdetect-profiles/sv | 1 -
.../update/processor/langdetect-profiles/sw | 1 -
.../update/processor/langdetect-profiles/ta | 1 -
.../update/processor/langdetect-profiles/te | 1 -
.../update/processor/langdetect-profiles/th | 1 -
.../update/processor/langdetect-profiles/tl | 1 -
.../update/processor/langdetect-profiles/tr | 1 -
.../update/processor/langdetect-profiles/uk | 1 -
.../update/processor/langdetect-profiles/ur | 1 -
.../update/processor/langdetect-profiles/vi | 1 -
.../update/processor/langdetect-profiles/zh-cn | 1 -
.../update/processor/langdetect-profiles/zh-tw | 1 -
.../opennlp-langdetect.eng-swe-spa-rus-deu.bin | Bin 17702 -> 0 bytes
.../langid/solr/collection1/conf/schema.xml | 73 -
.../conf/solrconfig-languageidentifier.xml | 115 --
.../opennlp.langdetect.trainer.params.txt | 17 -
...ageIdentifierUpdateProcessorFactoryTest.java | 61 +
...dentifierUpdateProcessorFactoryTestCase.java | 320 ++++
...NLPLangDetectUpdateProcessorFactoryTest.java | 66 +
...ageIdentifierUpdateProcessorFactoryTest.java | 181 ++
...ageIdentifierUpdateProcessorFactoryTest.java | 61 -
...dentifierUpdateProcessorFactoryTestCase.java | 320 ----
...NLPLangDetectUpdateProcessorFactoryTest.java | 66 -
...ageIdentifierUpdateProcessorFactoryTest.java | 181 --
.../opennlp-langdetect.eng-swe-spa-rus-deu.bin | Bin 0 -> 17702 bytes
.../langid/solr/collection1/conf/schema.xml | 73 +
.../conf/solrconfig-languageidentifier.xml | 115 ++
.../opennlp.langdetect.trainer.params.txt | 17 +
solr/contrib/ltr/build.gradle | 11 +
.../org/apache/solr/ltr/CSVFeatureLogger.java | 62 -
.../src/java/org/apache/solr/ltr/DocInfo.java | 42 -
.../java/org/apache/solr/ltr/FeatureLogger.java | 81 -
.../java/org/apache/solr/ltr/LTRRescorer.java | 251 ---
.../org/apache/solr/ltr/LTRScoringQuery.java | 739 -------
.../org/apache/solr/ltr/LTRThreadModule.java | 192 --
.../solr/ltr/SolrQueryRequestContextUtils.java | 83 -
.../org/apache/solr/ltr/feature/Feature.java | 338 ----
.../solr/ltr/feature/FeatureException.java | 31 -
.../solr/ltr/feature/FieldLengthFeature.java | 156 --
.../solr/ltr/feature/FieldValueFeature.java | 150 --
.../solr/ltr/feature/OriginalScoreFeature.java | 129 --
.../apache/solr/ltr/feature/SolrFeature.java | 340 ----
.../apache/solr/ltr/feature/ValueFeature.java | 145 --
.../apache/solr/ltr/feature/package-info.java | 21 -
.../org/apache/solr/ltr/model/AdapterModel.java | 45 -
.../solr/ltr/model/DefaultWrapperModel.java | 105 -
.../apache/solr/ltr/model/LTRScoringModel.java | 296 ---
.../org/apache/solr/ltr/model/LinearModel.java | 157 --
.../apache/solr/ltr/model/ModelException.java | 31 -
.../ltr/model/MultipleAdditiveTreesModel.java | 382 ----
.../solr/ltr/model/NeuralNetworkModel.java | 325 ----
.../org/apache/solr/ltr/model/WrapperModel.java | 163 --
.../org/apache/solr/ltr/model/package-info.java | 21 -
.../solr/ltr/norm/IdentityNormalizer.java | 53 -
.../apache/solr/ltr/norm/MinMaxNormalizer.java | 107 --
.../org/apache/solr/ltr/norm/Normalizer.java | 64 -
.../solr/ltr/norm/NormalizerException.java | 31 -
.../solr/ltr/norm/StandardNormalizer.java | 99 -
.../org/apache/solr/ltr/norm/package-info.java | 23 -
.../java/org/apache/solr/ltr/package-info.java | 45 -
.../LTRFeatureLoggerTransformerFactory.java | 311 ---
.../ltr/response/transform/package-info.java | 23 -
.../solr/ltr/search/LTRQParserPlugin.java | 236 ---
.../apache/solr/ltr/search/package-info.java | 23 -
.../org/apache/solr/ltr/store/FeatureStore.java | 67 -
.../org/apache/solr/ltr/store/ModelStore.java | 74 -
.../org/apache/solr/ltr/store/package-info.java | 21 -
.../ltr/store/rest/ManagedFeatureStore.java | 211 --
.../solr/ltr/store/rest/ManagedModelStore.java | 339 ----
.../solr/ltr/store/rest/package-info.java | 22 -
solr/contrib/ltr/src/java/overview.html | 91 -
.../org/apache/solr/ltr/CSVFeatureLogger.java | 62 +
.../main/java/org/apache/solr/ltr/DocInfo.java | 42 +
.../java/org/apache/solr/ltr/FeatureLogger.java | 81 +
.../java/org/apache/solr/ltr/LTRRescorer.java | 251 +++
.../org/apache/solr/ltr/LTRScoringQuery.java | 739 +++++++
.../org/apache/solr/ltr/LTRThreadModule.java | 192 ++
.../solr/ltr/SolrQueryRequestContextUtils.java | 83 +
.../org/apache/solr/ltr/feature/Feature.java | 338 ++++
.../solr/ltr/feature/FeatureException.java | 31 +
.../solr/ltr/feature/FieldLengthFeature.java | 156 ++
.../solr/ltr/feature/FieldValueFeature.java | 150 ++
.../solr/ltr/feature/OriginalScoreFeature.java | 129 ++
.../apache/solr/ltr/feature/SolrFeature.java | 340 ++++
.../apache/solr/ltr/feature/ValueFeature.java | 145 ++
.../apache/solr/ltr/feature/package-info.java | 21 +
.../org/apache/solr/ltr/model/AdapterModel.java | 45 +
.../solr/ltr/model/DefaultWrapperModel.java | 105 +
.../apache/solr/ltr/model/LTRScoringModel.java | 296 +++
.../org/apache/solr/ltr/model/LinearModel.java | 157 ++
.../apache/solr/ltr/model/ModelException.java | 31 +
.../ltr/model/MultipleAdditiveTreesModel.java | 382 ++++
.../solr/ltr/model/NeuralNetworkModel.java | 325 ++++
.../org/apache/solr/ltr/model/WrapperModel.java | 163 ++
.../org/apache/solr/ltr/model/package-info.java | 21 +
.../solr/ltr/norm/IdentityNormalizer.java | 53 +
.../apache/solr/ltr/norm/MinMaxNormalizer.java | 107 ++
.../org/apache/solr/ltr/norm/Normalizer.java | 64 +
.../solr/ltr/norm/NormalizerException.java | 31 +
.../solr/ltr/norm/StandardNormalizer.java | 99 +
.../org/apache/solr/ltr/norm/package-info.java | 23 +
.../java/org/apache/solr/ltr/package-info.java | 45 +
.../LTRFeatureLoggerTransformerFactory.java | 311 +++
.../ltr/response/transform/package-info.java | 23 +
.../solr/ltr/search/LTRQParserPlugin.java | 236 +++
.../apache/solr/ltr/search/package-info.java | 23 +
.../org/apache/solr/ltr/store/FeatureStore.java | 67 +
.../org/apache/solr/ltr/store/ModelStore.java | 74 +
.../org/apache/solr/ltr/store/package-info.java | 21 +
.../ltr/store/rest/ManagedFeatureStore.java | 211 ++
.../solr/ltr/store/rest/ManagedModelStore.java | 339 ++++
.../solr/ltr/store/rest/package-info.java | 22 +
solr/contrib/ltr/src/main/java/overview.html | 91 +
.../featureExamples/comp_features.json | 37 -
.../featureExamples/external_features.json | 57 -
...external_features_for_sparse_processing.json | 18 -
.../featureExamples/features-linear-efi.json | 17 -
.../featureExamples/features-linear.json | 51 -
.../features-store-test-model.json | 51 -
.../test-files/featureExamples/fq_features.json | 16 -
.../multipleadditivetreesmodel_features.json | 16 -
.../neuralnetworkmodel_features.json | 30 -
.../modelExamples/external_model.json | 12 -
.../modelExamples/external_model2.json | 13 -
.../src/test-files/modelExamples/fq-model.json | 25 -
.../modelExamples/linear-model-efi.json | 14 -
.../test-files/modelExamples/linear-model.json | 45 -
.../multipleadditivetreesmodel.json | 38 -
...tivetreesmodel_external_binary_features.json | 38 -
.../multipleadditivetreesmodel_no_feature.json | 24 -
.../multipleadditivetreesmodel_no_features.json | 14 -
.../multipleadditivetreesmodel_no_left.json | 22 -
.../multipleadditivetreesmodel_no_params.json | 8 -
.../multipleadditivetreesmodel_no_right.json | 22 -
...multipleadditivetreesmodel_no_threshold.json | 24 -
.../multipleadditivetreesmodel_no_tree.json | 15 -
.../multipleadditivetreesmodel_no_trees.json | 10 -
.../multipleadditivetreesmodel_no_weight.json | 24 -
.../neuralnetworkmodel_bad_activation.json | 26 -
.../neuralnetworkmodel_custom.json | 17 -
.../neuralnetworkmodel_explainable.json | 26 -
.../neuralnetworkmodel_mismatch_bias.json | 26 -
.../neuralnetworkmodel_mismatch_input.json | 26 -
.../neuralnetworkmodel_mismatch_layers.json | 25 -
.../neuralnetworkmodel_too_many_rows.json | 27 -
.../test-files/solr/collection1/conf/schema.xml | 90 -
.../solr/collection1/conf/solrconfig-ltr.xml | 73 -
.../collection1/conf/solrconfig-ltr_Th10_10.xml | 73 -
.../collection1/conf/solrconfig-multiseg.xml | 73 -
.../solr/collection1/conf/stopwords.txt | 16 -
.../solr/collection1/conf/synonyms.txt | 28 -
solr/contrib/ltr/src/test-files/solr/solr.xml | 42 -
.../apache/solr/ltr/FeatureLoggerTestUtils.java | 44 +
.../org/apache/solr/ltr/TestLTROnSolrCloud.java | 307 +++
.../apache/solr/ltr/TestLTRQParserExplain.java | 152 ++
.../apache/solr/ltr/TestLTRQParserPlugin.java | 126 ++
.../solr/ltr/TestLTRReRankingPipeline.java | 304 +++
.../apache/solr/ltr/TestLTRScoringQuery.java | 310 +++
.../org/apache/solr/ltr/TestLTRWithFacet.java | 103 +
.../org/apache/solr/ltr/TestLTRWithSort.java | 102 +
.../solr/ltr/TestParallelWeightCreation.java | 72 +
.../org/apache/solr/ltr/TestRerankBase.java | 480 +++++
.../solr/ltr/TestSelectiveWeightCreation.java | 256 +++
.../ltr/feature/TestEdisMaxSolrFeature.java | 76 +
.../solr/ltr/feature/TestExternalFeatures.java | 201 ++
.../ltr/feature/TestExternalValueFeatures.java | 101 +
...stFeatureExtractionFromMultipleSegments.java | 105 +
.../solr/ltr/feature/TestFeatureLogging.java | 237 +++
.../ltr/feature/TestFieldLengthFeature.java | 156 ++
.../solr/ltr/feature/TestFieldValueFeature.java | 208 ++
.../solr/ltr/feature/TestFilterSolrFeature.java | 109 ++
.../ltr/feature/TestNoMatchSolrFeature.java | 264 +++
.../ltr/feature/TestOriginalScoreFeature.java | 155 ++
.../ltr/feature/TestOriginalScoreScorer.java | 47 +
.../solr/ltr/feature/TestRankingFeature.java | 123 ++
.../ltr/feature/TestUserTermScoreWithQ.java | 74 +
.../ltr/feature/TestUserTermScorerQuery.java | 74 +
.../ltr/feature/TestUserTermScorereQDF.java | 75 +
.../solr/ltr/feature/TestValueFeature.java | 165 ++
.../apache/solr/ltr/model/TestAdapterModel.java | 143 ++
.../solr/ltr/model/TestDefaultWrapperModel.java | 145 ++
.../apache/solr/ltr/model/TestLinearModel.java | 217 +++
.../model/TestMultipleAdditiveTreesModel.java | 246 +++
.../solr/ltr/model/TestNeuralNetworkModel.java | 365 ++++
.../apache/solr/ltr/model/TestWrapperModel.java | 290 +++
.../solr/ltr/norm/TestMinMaxNormalizer.java | 136 ++
.../solr/ltr/norm/TestStandardNormalizer.java | 148 ++
.../ltr/store/rest/TestManagedFeatureStore.java | 149 ++
.../solr/ltr/store/rest/TestModelManager.java | 172 ++
.../store/rest/TestModelManagerPersistence.java | 330 ++++
.../apache/solr/ltr/FeatureLoggerTestUtils.java | 44 -
.../org/apache/solr/ltr/TestLTROnSolrCloud.java | 307 ---
.../apache/solr/ltr/TestLTRQParserExplain.java | 152 --
.../apache/solr/ltr/TestLTRQParserPlugin.java | 126 --
.../solr/ltr/TestLTRReRankingPipeline.java | 304 ---
.../apache/solr/ltr/TestLTRScoringQuery.java | 310 ---
.../org/apache/solr/ltr/TestLTRWithFacet.java | 103 -
.../org/apache/solr/ltr/TestLTRWithSort.java | 102 -
.../solr/ltr/TestParallelWeightCreation.java | 72 -
.../org/apache/solr/ltr/TestRerankBase.java | 480 -----
.../solr/ltr/TestSelectiveWeightCreation.java | 256 ---
.../ltr/feature/TestEdisMaxSolrFeature.java | 76 -
.../solr/ltr/feature/TestExternalFeatures.java | 201 --
.../ltr/feature/TestExternalValueFeatures.java | 101 -
...stFeatureExtractionFromMultipleSegments.java | 105 -
.../solr/ltr/feature/TestFeatureLogging.java | 237 ---
.../ltr/feature/TestFieldLengthFeature.java | 156 --
.../solr/ltr/feature/TestFieldValueFeature.java | 208 --
.../solr/ltr/feature/TestFilterSolrFeature.java | 109 --
.../ltr/feature/TestNoMatchSolrFeature.java | 264 ---
.../ltr/feature/TestOriginalScoreFeature.java | 155 --
.../ltr/feature/TestOriginalScoreScorer.java | 47 -
.../solr/ltr/feature/TestRankingFeature.java | 123 --
.../ltr/feature/TestUserTermScoreWithQ.java | 74 -
.../ltr/feature/TestUserTermScorerQuery.java | 74 -
.../ltr/feature/TestUserTermScorereQDF.java | 75 -
.../solr/ltr/feature/TestValueFeature.java | 165 --
.../apache/solr/ltr/model/TestAdapterModel.java | 143 --
.../solr/ltr/model/TestDefaultWrapperModel.java | 145 --
.../apache/solr/ltr/model/TestLinearModel.java | 217 ---
.../model/TestMultipleAdditiveTreesModel.java | 246 ---
.../solr/ltr/model/TestNeuralNetworkModel.java | 365 ----
.../apache/solr/ltr/model/TestWrapperModel.java | 290 ---
.../solr/ltr/norm/TestMinMaxNormalizer.java | 136 --
.../solr/ltr/norm/TestStandardNormalizer.java | 148 --
.../ltr/store/rest/TestManagedFeatureStore.java | 149 --
.../solr/ltr/store/rest/TestModelManager.java | 172 --
.../store/rest/TestModelManagerPersistence.java | 330 ----
.../featureExamples/comp_features.json | 37 +
.../featureExamples/external_features.json | 57 +
...external_features_for_sparse_processing.json | 18 +
.../featureExamples/features-linear-efi.json | 17 +
.../featureExamples/features-linear.json | 51 +
.../features-store-test-model.json | 51 +
.../resources/featureExamples/fq_features.json | 16 +
.../multipleadditivetreesmodel_features.json | 16 +
.../neuralnetworkmodel_features.json | 30 +
.../resources/modelExamples/external_model.json | 12 +
.../modelExamples/external_model2.json | 13 +
.../test/resources/modelExamples/fq-model.json | 25 +
.../modelExamples/linear-model-efi.json | 14 +
.../resources/modelExamples/linear-model.json | 45 +
.../multipleadditivetreesmodel.json | 38 +
...tivetreesmodel_external_binary_features.json | 38 +
.../multipleadditivetreesmodel_no_feature.json | 24 +
.../multipleadditivetreesmodel_no_features.json | 14 +
.../multipleadditivetreesmodel_no_left.json | 22 +
.../multipleadditivetreesmodel_no_params.json | 8 +
.../multipleadditivetreesmodel_no_right.json | 22 +
...multipleadditivetreesmodel_no_threshold.json | 24 +
.../multipleadditivetreesmodel_no_tree.json | 15 +
.../multipleadditivetreesmodel_no_trees.json | 10 +
.../multipleadditivetreesmodel_no_weight.json | 24 +
.../neuralnetworkmodel_bad_activation.json | 26 +
.../neuralnetworkmodel_custom.json | 17 +
.../neuralnetworkmodel_explainable.json | 26 +
.../neuralnetworkmodel_mismatch_bias.json | 26 +
.../neuralnetworkmodel_mismatch_input.json | 26 +
.../neuralnetworkmodel_mismatch_layers.json | 25 +
.../neuralnetworkmodel_too_many_rows.json | 27 +
.../resources/solr/collection1/conf/schema.xml | 90 +
.../solr/collection1/conf/solrconfig-ltr.xml | 73 +
.../collection1/conf/solrconfig-ltr_Th10_10.xml | 73 +
.../collection1/conf/solrconfig-multiseg.xml | 73 +
.../solr/collection1/conf/stopwords.txt | 16 +
.../solr/collection1/conf/synonyms.txt | 28 +
.../ltr/src/test/resources/solr/solr.xml | 42 +
solr/contrib/prometheus-exporter/build.gradle | 20 +
.../prometheus/collector/SolrCollector.java | 463 -----
.../solr/prometheus/collector/package-info.java | 21 -
.../solr/prometheus/exporter/SolrExporter.java | 253 ---
.../solr/prometheus/exporter/package-info.java | 21 -
.../solr/prometheus/scraper/SolrScraper.java | 214 ---
.../solr/prometheus/scraper/package-info.java | 21 -
.../prometheus-exporter/src/java/overview.html | 26 -
.../prometheus/collector/SolrCollector.java | 463 +++++
.../solr/prometheus/collector/package-info.java | 21 +
.../solr/prometheus/exporter/SolrExporter.java | 253 +++
.../solr/prometheus/exporter/package-info.java | 21 +
.../solr/prometheus/scraper/SolrScraper.java | 214 +++
.../solr/prometheus/scraper/package-info.java | 21 +
.../src/main/java/overview.html | 26 +
.../test-files/conf/solr-exporter-config.xml | 1806 ------------------
.../configsets/collection1/conf/managed-schema | 412 ----
.../configsets/collection1/conf/solrconfig.xml | 220 ---
.../configsets/collection1/conf/stopwords.txt | 14 -
.../configsets/collection1/conf/synonyms.txt | 29 -
.../test-files/exampledocs/gb18030-example.xml | 32 -
.../src/test-files/exampledocs/hd.xml | 56 -
.../src/test-files/exampledocs/ipod_other.xml | 60 -
.../src/test-files/exampledocs/ipod_video.xml | 40 -
.../test-files/exampledocs/manufacturers.xml | 75 -
.../src/test-files/exampledocs/mem.xml | 77 -
.../src/test-files/exampledocs/money.xml | 65 -
.../src/test-files/exampledocs/monitor.xml | 34 -
.../src/test-files/exampledocs/monitor2.xml | 33 -
.../src/test-files/exampledocs/mp500.xml | 43 -
.../src/test-files/exampledocs/sample.html | 13 -
.../src/test-files/exampledocs/sd500.xml | 38 -
.../src/test-files/exampledocs/solr.xml | 38 -
.../src/test-files/exampledocs/utf8-example.xml | 42 -
.../src/test-files/exampledocs/vidcard.xml | 62 -
.../prometheus/collector/SolrCollectorTest.java | 94 +
.../prometheus/exporter/SolrExporterTest.java | 99 +
.../exporter/SolrExporterTestBase.java | 53 +
.../prometheus/collector/SolrCollectorTest.java | 94 -
.../prometheus/exporter/SolrExporterTest.java | 99 -
.../exporter/SolrExporterTestBase.java | 53 -
.../resources/conf/solr-exporter-config.xml | 1806 ++++++++++++++++++
.../configsets/collection1/conf/managed-schema | 412 ++++
.../configsets/collection1/conf/solrconfig.xml | 220 +++
.../configsets/collection1/conf/stopwords.txt | 14 +
.../configsets/collection1/conf/synonyms.txt | 29 +
.../resources/exampledocs/gb18030-example.xml | 32 +
.../src/test/resources/exampledocs/hd.xml | 56 +
.../test/resources/exampledocs/ipod_other.xml | 60 +
.../test/resources/exampledocs/ipod_video.xml | 40 +
.../resources/exampledocs/manufacturers.xml | 75 +
.../src/test/resources/exampledocs/mem.xml | 77 +
.../src/test/resources/exampledocs/money.xml | 65 +
.../src/test/resources/exampledocs/monitor.xml | 34 +
.../src/test/resources/exampledocs/monitor2.xml | 33 +
.../src/test/resources/exampledocs/mp500.xml | 43 +
.../src/test/resources/exampledocs/sample.html | 13 +
.../src/test/resources/exampledocs/sd500.xml | 38 +
.../src/test/resources/exampledocs/solr.xml | 38 +
.../test/resources/exampledocs/utf8-example.xml | 42 +
.../src/test/resources/exampledocs/vidcard.xml | 62 +
solr/contrib/velocity/build.gradle | 10 +
.../java/org/apache/solr/response/PageTool.java | 92 -
.../solr/response/SolrParamResourceLoader.java | 73 -
.../solr/response/SolrVelocityLogger.java | 114 --
.../response/SolrVelocityResourceLoader.java | 61 -
.../solr/response/VelocityResponseWriter.java | 414 ----
.../java/org/apache/solr/response/package.html | 23 -
solr/contrib/velocity/src/java/overview.html | 21 -
.../java/org/apache/solr/response/PageTool.java | 92 +
.../solr/response/SolrParamResourceLoader.java | 73 +
.../solr/response/SolrVelocityLogger.java | 114 ++
.../response/SolrVelocityResourceLoader.java | 61 +
.../solr/response/VelocityResponseWriter.java | 414 ++++
.../java/org/apache/solr/response/package.html | 23 +
.../velocity/src/main/java/overview.html | 21 +
.../src/main/resources/VM_global_library.vm | 4 +
.../velocity/src/main/resources/_macros.vm | 70 +
.../velocity/src/main/resources/macros.vm | 3 +
.../src/main/resources/velocity/_default.vm | 14 +
.../src/main/resources/velocity/browse.vm | 73 +
.../src/main/resources/velocity/error.vm | 4 +
.../src/main/resources/velocity/facets.vm | 23 +
.../src/main/resources/velocity/footer.vm | 19 +
.../src/main/resources/velocity/head.vm | 185 ++
.../velocity/src/main/resources/velocity/hit.vm | 27 +
.../src/main/resources/velocity/layout.vm | 19 +
.../resources/velocity/resources.properties | 6 +
.../src/main/resources/velocity/results_list.vm | 3 +
.../velocity/src/resources/VM_global_library.vm | 4 -
solr/contrib/velocity/src/resources/_macros.vm | 70 -
solr/contrib/velocity/src/resources/macros.vm | 3 -
.../velocity/src/resources/velocity/_default.vm | 14 -
.../velocity/src/resources/velocity/browse.vm | 73 -
.../velocity/src/resources/velocity/error.vm | 4 -
.../velocity/src/resources/velocity/facets.vm | 23 -
.../velocity/src/resources/velocity/footer.vm | 19 -
.../velocity/src/resources/velocity/head.vm | 185 --
.../velocity/src/resources/velocity/hit.vm | 27 -
.../velocity/src/resources/velocity/layout.vm | 19 -
.../src/resources/velocity/resources.properties | 6 -
.../src/resources/velocity/results_list.vm | 3 -
.../velocity/src/test-files/velocity/file.vm | 1 -
.../velocity/solr/collection1/conf/schema.xml | 26 -
.../solr/collection1/conf/solrconfig.xml | 55 -
.../collection1/conf/velocity-init.properties | 18 -
.../conf/velocity/VM_global_library.vm | 3 -
.../collection1/conf/velocity/custom_tool.vm | 1 -
.../solr/collection1/conf/velocity/encoding.vm | 1 -
.../solr/collection1/conf/velocity/foreach.vm | 1 -
.../solr/collection1/conf/velocity/layout.vm | 1 -
.../solr/collection1/conf/velocity/locale.vm | 1 -
.../solr/collection1/conf/velocity/macros.vm | 3 -
.../solr/collection1/conf/velocity/numFound.vm | 1 -
.../collection1/conf/velocity/resource_get.vm | 1 -
.../conf/velocity/test_macro_legacy_support.vm | 1 -
.../conf/velocity/test_macro_overridden.vm | 1 -
.../conf/velocity/test_macro_visible.vm | 1 -
.../java/org/apache/solr/velocity/MockTool.java | 34 +
.../velocity/VelocityResponseWriterTest.java | 224 +++
.../test/org/apache/solr/velocity/MockTool.java | 34 -
.../velocity/VelocityResponseWriterTest.java | 224 ---
.../src/test/resources/velocity/file.vm | 1 +
.../resources/velocity/resources.properties | 18 +
.../velocity/resources_en_UK.properties | 18 +
.../velocity/solr/collection1/conf/schema.xml | 26 +
.../solr/collection1/conf/solrconfig.xml | 55 +
.../collection1/conf/velocity-init.properties | 18 +
.../conf/velocity/VM_global_library.vm | 3 +
.../collection1/conf/velocity/custom_tool.vm | 1 +
.../solr/collection1/conf/velocity/encoding.vm | 1 +
.../solr/collection1/conf/velocity/foreach.vm | 1 +
.../solr/collection1/conf/velocity/layout.vm | 1 +
.../solr/collection1/conf/velocity/locale.vm | 1 +
.../solr/collection1/conf/velocity/macros.vm | 3 +
.../solr/collection1/conf/velocity/numFound.vm | 1 +
.../collection1/conf/velocity/resource_get.vm | 1 +
.../conf/velocity/test_macro_legacy_support.vm | 1 +
.../conf/velocity/test_macro_overridden.vm | 1 +
.../conf/velocity/test_macro_visible.vm | 1 +
.../src/test/velocity/resources.properties | 18 -
.../test/velocity/resources_en_UK.properties | 18 -
527 files changed, 22367 insertions(+), 22311 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/settings.gradle
----------------------------------------------------------------------
diff --git a/settings.gradle b/settings.gradle
index 9f0488c..8d6e0d2 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -51,4 +51,8 @@ include 'solr:contrib:analytics'
include 'solr:contrib:clustering'
include 'solr:contrib:dataimporthandler'
include 'solr:contrib:dataimporthandler-extras'
-include 'solr:contrib:extraction'
\ No newline at end of file
+include 'solr:contrib:extraction'
+include 'solr:contrib:langid'
+include 'solr:contrib:ltr'
+include 'solr:contrib:prometheus-exporter'
+include 'solr:contrib:velocity'
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/solr/contrib/langid/build.gradle
----------------------------------------------------------------------
diff --git a/solr/contrib/langid/build.gradle b/solr/contrib/langid/build.gradle
new file mode 100644
index 0000000..8013711
--- /dev/null
+++ b/solr/contrib/langid/build.gradle
@@ -0,0 +1,11 @@
+apply plugin: 'java'
+
+dependencies {
+ compile project(':solr:core')
+ compile library.langdetect
+ compile library.jsonic
+ compile library.opennlp_tools
+ compile library.tika_core
+
+ testCompile project(':solr:test-framework')
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/solr/contrib/langid/src/java/org/apache/solr/update/processor/DetectedLanguage.java
----------------------------------------------------------------------
diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/DetectedLanguage.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/DetectedLanguage.java
deleted file mode 100644
index e8e6fbe..0000000
--- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/DetectedLanguage.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.update.processor;
-
-/**
- * Bean holding a language and a detection certainty
- */
-public class DetectedLanguage {
- private final String langCode;
- private final Double certainty;
-
- DetectedLanguage(String lang, Double certainty) {
- this.langCode = lang;
- this.certainty = certainty;
- }
-
- /**
- * Returns the detected language code
- * @return language code as a string
- */
- public String getLangCode() {
- return langCode;
- }
-
- /**
- * Returns the detected certainty for this language
- * @return certainty as a value between 0.0 and 1.0 where 1.0 is 100% certain
- */
- public Double getCertainty() {
- return certainty;
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java
----------------------------------------------------------------------
diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java
deleted file mode 100644
index 8af05b3..0000000
--- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.update.processor;
-
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
-
-import com.cybozu.labs.langdetect.Detector;
-import com.cybozu.labs.langdetect.DetectorFactory;
-import com.cybozu.labs.langdetect.LangDetectException;
-import com.cybozu.labs.langdetect.Language;
-import org.apache.solr.common.SolrInputDocument;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Identifies the language of a set of input fields using http://code.google.com/p/language-detection
- * <p>
- * See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
- * @since 3.5
- */
-public class LangDetectLanguageIdentifierUpdateProcessor extends LanguageIdentifierUpdateProcessor {
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- public LangDetectLanguageIdentifierUpdateProcessor(SolrQueryRequest req,
- SolrQueryResponse rsp, UpdateRequestProcessor next) {
- super(req, rsp, next);
- }
-
- @Override
- protected List<DetectedLanguage> detectLanguage(SolrInputDocument doc) {
- try {
- Detector detector = DetectorFactory.create();
- detector.setMaxTextLength(maxTotalChars);
-
- for (String fieldName : inputFields) {
- log.debug("Appending field " + fieldName);
- if (doc.containsKey(fieldName)) {
- Collection<Object> fieldValues = doc.getFieldValues(fieldName);
- if (fieldValues != null) {
- for (Object content : fieldValues) {
- if (content instanceof String) {
- String stringContent = (String) content;
- if (stringContent.length() > maxFieldValueChars) {
- detector.append(stringContent.substring(0, maxFieldValueChars));
- } else {
- detector.append(stringContent);
- }
- detector.append(" ");
- } else {
- log.warn("Field " + fieldName + " not a String value, not including in detection");
- }
- }
- }
- }
- }
- ArrayList<Language> langlist = detector.getProbabilities();
- ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
- for (Language l: langlist) {
- solrLangList.add(new DetectedLanguage(l.lang, l.prob));
- }
- return solrLangList;
- } catch (LangDetectException e) {
- log.debug("Could not determine language, returning empty list: ", e);
- return Collections.emptyList();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java
deleted file mode 100644
index a140807..0000000
--- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.update.processor;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.util.SolrPluginUtils;
-import org.apache.solr.util.plugin.SolrCoreAware;
-
-import com.cybozu.labs.langdetect.DetectorFactory;
-import com.cybozu.labs.langdetect.LangDetectException;
-
-/**
- * Identifies the language of a set of input fields using
- * http://code.google.com/p/language-detection
- * <p>
- * The UpdateProcessorChain config entry can take a number of parameters
- * which may also be passed as HTTP parameters on the update request
- * and override the defaults. Here is the simplest processor config possible:
- *
- * <pre class="prettyprint" >
- * <processor class="org.apache.solr.update.processor.LangDetectLanguageIdentifierUpdateProcessorFactory">
- * <str name="langid.fl">title,text</str>
- * <str name="langid.langField">language_s</str>
- * </processor>
- * </pre>
- * See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
- * @since 3.5
- */
-public class LangDetectLanguageIdentifierUpdateProcessorFactory extends
- UpdateRequestProcessorFactory implements SolrCoreAware, LangIdParams {
-
- protected SolrParams defaults;
- protected SolrParams appends;
- protected SolrParams invariants;
-
- @Override
- public void inform(SolrCore core) {
- }
-
- /**
- * The UpdateRequestProcessor may be initialized in solrconfig.xml similarly
- * to a RequestHandler, with defaults, appends and invariants.
- * @param args a NamedList with the configuration parameters
- */
- @Override
- @SuppressWarnings("rawtypes")
- public void init( NamedList args )
- {
- try {
- loadData();
- } catch (Exception e) {
- throw new RuntimeException("Couldn't load profile data, will return empty languages always!", e);
- }
- if (args != null) {
- Object o;
- o = args.get("defaults");
- if (o != null && o instanceof NamedList) {
- defaults = ((NamedList) o).toSolrParams();
- } else {
- defaults = args.toSolrParams();
- }
- o = args.get("appends");
- if (o != null && o instanceof NamedList) {
- appends = ((NamedList) o).toSolrParams();
- }
- o = args.get("invariants");
- if (o != null && o instanceof NamedList) {
- invariants = ((NamedList) o).toSolrParams();
- }
- }
- }
-
- @Override
- public UpdateRequestProcessor getInstance(SolrQueryRequest req,
- SolrQueryResponse rsp, UpdateRequestProcessor next) {
- // Process defaults, appends and invariants if we got a request
- if(req != null) {
- SolrPluginUtils.setDefaults(req, defaults, appends, invariants);
- }
- return new LangDetectLanguageIdentifierUpdateProcessor(req, rsp, next);
- }
-
-
- // DetectorFactory is totally global, so we only want to do this once... ever!!!
- static boolean loaded;
-
- // profiles we will load from classpath
- static final String languages[] = {
- "af", "ar", "bg", "bn", "cs", "da", "de", "el", "en", "es", "et", "fa", "fi", "fr", "gu",
- "he", "hi", "hr", "hu", "id", "it", "ja", "kn", "ko", "lt", "lv", "mk", "ml", "mr", "ne",
- "nl", "no", "pa", "pl", "pt", "ro", "ru", "sk", "sl", "so", "sq", "sv", "sw", "ta", "te",
- "th", "tl", "tr", "uk", "ur", "vi", "zh-cn", "zh-tw"
- };
-
- public static synchronized void loadData() throws IOException, LangDetectException {
- if (loaded) {
- return;
- }
- loaded = true;
- List<String> profileData = new ArrayList<>();
- for (String language : languages) {
- InputStream stream = LangDetectLanguageIdentifierUpdateProcessor.class.getResourceAsStream("langdetect-profiles/" + language);
- BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
- profileData.add(new String(IOUtils.toCharArray(reader)));
- reader.close();
- }
- DetectorFactory.loadProfile(profileData);
- DetectorFactory.setSeed(0);
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangIdParams.java
----------------------------------------------------------------------
diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangIdParams.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangIdParams.java
deleted file mode 100644
index 4e19eab..0000000
--- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangIdParams.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.update.processor;
-
-public interface LangIdParams {
-
- String LANGUAGE_ID = "langid";
- String DOCID_PARAM = LANGUAGE_ID + ".idField";
-
- String FIELDS_PARAM = LANGUAGE_ID + ".fl"; // Field list to detect from
- String LANG_FIELD = LANGUAGE_ID + ".langField"; // Main language detected
- String LANGS_FIELD = LANGUAGE_ID + ".langsField"; // All languages detected (multiValued)
- String FALLBACK = LANGUAGE_ID + ".fallback"; // Fallback lang code
- String FALLBACK_FIELDS = LANGUAGE_ID + ".fallbackFields"; // Comma-sep list of fallback fields
- String OVERWRITE = LANGUAGE_ID + ".overwrite"; // Overwrite if existing language value in LANG_FIELD
- String THRESHOLD = LANGUAGE_ID + ".threshold"; // Detection threshold
- String ENFORCE_SCHEMA = LANGUAGE_ID + ".enforceSchema"; // Enforces that output fields exist in schema
- String LANG_WHITELIST = LANGUAGE_ID + ".whitelist"; // Allowed languages
- String LCMAP = LANGUAGE_ID + ".lcmap"; // Maps detected langcode to other value
- String MAP_ENABLE = LANGUAGE_ID + ".map"; // Turns on or off the field mapping
- String MAP_FL = LANGUAGE_ID + ".map.fl"; // Field list for mapping
- String MAP_OVERWRITE = LANGUAGE_ID + ".map.overwrite"; // Whether to overwrite existing fields
- String MAP_KEEP_ORIG = LANGUAGE_ID + ".map.keepOrig"; // Keep original field after mapping
- String MAP_INDIVIDUAL = LANGUAGE_ID + ".map.individual"; // Detect language per individual field
- String MAP_INDIVIDUAL_FL = LANGUAGE_ID + ".map.individual.fl";// Field list of fields to redetect language for
- String MAP_LCMAP = LANGUAGE_ID + ".map.lcmap"; // Enables mapping multiple langs to same output field
- String MAP_PATTERN = LANGUAGE_ID + ".map.pattern"; // RegEx pattern to match field name
- String MAP_REPLACE = LANGUAGE_ID + ".map.replace"; // Replace pattern
- String MAX_FIELD_VALUE_CHARS = LANGUAGE_ID + ".maxFieldValueChars"; // Maximum number of characters to use per field for language detection
- String MAX_TOTAL_CHARS = LANGUAGE_ID + ".maxTotalChars"; // Maximum number of characters to use per all concatenated fields for language detection
-
- String DOCID_FIELD_DEFAULT = "id";
- String DOCID_LANGFIELD_DEFAULT = null;
- String DOCID_LANGSFIELD_DEFAULT = null;
- String MAP_PATTERN_DEFAULT = "(.*)";
- String MAP_REPLACE_DEFAULT = "$1_{lang}";
- int MAX_FIELD_VALUE_CHARS_DEFAULT = 10000;
- int MAX_TOTAL_CHARS_DEFAULT = 20000;
-
- // TODO: This default threshold accepts even "uncertain" detections.
- // Increase &langid.threshold above 0.5 to return only certain detections
- Double DOCID_THRESHOLD_DEFAULT = 0.5;
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
----------------------------------------------------------------------
diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
deleted file mode 100644
index 3679905..0000000
--- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
+++ /dev/null
@@ -1,466 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.update.processor;
-
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.SolrInputField;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-import org.apache.solr.update.AddUpdateCommand;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.regex.Pattern;
-
-
-/**
- * Identifies the language of a set of input fields.
- * Also supports mapping of field names based
- * on detected language.
- * <p>
- * See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
- * @since 3.5
- * @lucene.experimental
- */
-public abstract class LanguageIdentifierUpdateProcessor extends UpdateRequestProcessor implements LangIdParams {
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- protected boolean enabled;
-
- protected String[] inputFields = {};
- protected String[] mapFields = {};
- protected Pattern mapPattern;
- protected String mapReplaceStr;
- protected String langField;
- protected String langsField; // MultiValued, contains all languages detected
- protected String docIdField;
- protected String fallbackValue;
- protected String[] fallbackFields = {};
- protected boolean enableMapping;
- protected boolean mapKeepOrig;
- protected boolean overwrite;
- protected boolean mapOverwrite;
- protected boolean mapIndividual;
- protected boolean enforceSchema;
- protected double threshold;
- protected HashSet<String> langWhitelist;
- protected HashSet<String> mapIndividualFieldsSet;
- protected HashSet<String> allMapFieldsSet;
- protected HashMap<String,String> lcMap;
- protected HashMap<String,String> mapLcMap;
- protected IndexSchema schema;
- protected int maxFieldValueChars;
- protected int maxTotalChars;
-
- // Regex patterns
- protected final Pattern tikaSimilarityPattern = Pattern.compile(".*\\((.*?)\\)");
- protected final Pattern langPattern = Pattern.compile("\\{lang\\}");
-
- public LanguageIdentifierUpdateProcessor(SolrQueryRequest req,
- SolrQueryResponse rsp, UpdateRequestProcessor next) {
- super(next);
- schema = req.getSchema();
-
- initParams(req.getParams());
- }
-
- private void initParams(SolrParams params) {
- if (params != null) {
- // Document-centric langId params
- setEnabled(params.getBool(LANGUAGE_ID, true));
- if(params.get(FIELDS_PARAM, "").length() > 0) {
- inputFields = params.get(FIELDS_PARAM, "").split(",");
- }
- langField = params.get(LANG_FIELD, DOCID_LANGFIELD_DEFAULT);
- langsField = params.get(LANGS_FIELD, DOCID_LANGSFIELD_DEFAULT);
- SchemaField uniqueKeyField = schema.getUniqueKeyField();
- docIdField = params.get(DOCID_PARAM, uniqueKeyField == null ? DOCID_FIELD_DEFAULT : uniqueKeyField.getName());
- fallbackValue = params.get(FALLBACK);
- if(params.get(FALLBACK_FIELDS, "").length() > 0) {
- fallbackFields = params.get(FALLBACK_FIELDS).split(",");
- }
- overwrite = params.getBool(OVERWRITE, false);
- langWhitelist = new HashSet<>();
- threshold = params.getDouble(THRESHOLD, DOCID_THRESHOLD_DEFAULT);
- if(params.get(LANG_WHITELIST, "").length() > 0) {
- for(String lang : params.get(LANG_WHITELIST, "").split(",")) {
- langWhitelist.add(lang);
- }
- }
-
- // Mapping params (field centric)
- enableMapping = params.getBool(MAP_ENABLE, false);
- if(params.get(MAP_FL, "").length() > 0) {
- mapFields = params.get(MAP_FL, "").split(",");
- } else {
- mapFields = inputFields;
- }
- mapKeepOrig = params.getBool(MAP_KEEP_ORIG, false);
- mapOverwrite = params.getBool(MAP_OVERWRITE, false);
- mapIndividual = params.getBool(MAP_INDIVIDUAL, false);
-
- // Process individual fields
- String[] mapIndividualFields = {};
- if(params.get(MAP_INDIVIDUAL_FL, "").length() > 0) {
- mapIndividualFields = params.get(MAP_INDIVIDUAL_FL, "").split(",");
- } else {
- mapIndividualFields = mapFields;
- }
- mapIndividualFieldsSet = new HashSet<>(Arrays.asList(mapIndividualFields));
- // Compile a union of the lists of fields to map
- allMapFieldsSet = new HashSet<>(Arrays.asList(mapFields));
- if(Arrays.equals(mapFields, mapIndividualFields)) {
- allMapFieldsSet.addAll(mapIndividualFieldsSet);
- }
-
- // Normalize detected langcode onto normalized langcode
- lcMap = new HashMap<>();
- if(params.get(LCMAP) != null) {
- for(String mapping : params.get(LCMAP).split("[, ]")) {
- String[] keyVal = mapping.split(":");
- if(keyVal.length == 2) {
- lcMap.put(keyVal[0], keyVal[1]);
- } else {
- log.error("Unsupported format for langid.lcmap: "+mapping+". Skipping this mapping.");
- }
- }
- }
-
- // Language Code mapping
- mapLcMap = new HashMap<>();
- if(params.get(MAP_LCMAP) != null) {
- for(String mapping : params.get(MAP_LCMAP).split("[, ]")) {
- String[] keyVal = mapping.split(":");
- if(keyVal.length == 2) {
- mapLcMap.put(keyVal[0], keyVal[1]);
- } else {
- log.error("Unsupported format for langid.map.lcmap: "+mapping+". Skipping this mapping.");
- }
- }
- }
- enforceSchema = params.getBool(ENFORCE_SCHEMA, true);
-
- mapPattern = Pattern.compile(params.get(MAP_PATTERN, MAP_PATTERN_DEFAULT));
- mapReplaceStr = params.get(MAP_REPLACE, MAP_REPLACE_DEFAULT);
- maxFieldValueChars = params.getInt(MAX_FIELD_VALUE_CHARS, MAX_FIELD_VALUE_CHARS_DEFAULT);
- maxTotalChars = params.getInt(MAX_TOTAL_CHARS, MAX_TOTAL_CHARS_DEFAULT);
- if (maxFieldValueChars > maxTotalChars) {
- if (maxTotalChars == MAX_TOTAL_CHARS_DEFAULT) {
- // If the user specified only maxFieldValueChars, make maxTotalChars the same as it
- log.warn(MAX_FIELD_VALUE_CHARS + " (" + maxFieldValueChars + ") is less than " + MAX_TOTAL_CHARS + " ("
- + maxTotalChars + "). Setting " + MAX_TOTAL_CHARS + " to " + maxFieldValueChars + ".");
- maxTotalChars = maxFieldValueChars;
- } else {
- // If the user specified maxTotalChars, make maxFieldValueChars the same as it
- log.warn(MAX_FIELD_VALUE_CHARS + " (" + maxFieldValueChars + ") is less than " + MAX_TOTAL_CHARS + " ("
- + maxTotalChars + "). Setting " + MAX_FIELD_VALUE_CHARS + " to " + maxTotalChars + ".");
- maxFieldValueChars = maxTotalChars;
- }
- }
- }
- log.debug("LangId configured");
-
-
- if (inputFields.length == 0) {
- throw new SolrException(ErrorCode.BAD_REQUEST,
- "Missing or faulty configuration of LanguageIdentifierUpdateProcessor. Input fields must be specified as a comma separated list");
- }
-
- }
-
- @Override
- public void processAdd(AddUpdateCommand cmd) throws IOException {
- if (isEnabled()) {
- process(cmd.getSolrInputDocument());
- } else {
- log.debug("Processor not enabled, not running");
- }
- super.processAdd(cmd);
- }
-
- /**
- * This is the main, testable process method called from processAdd()
- * @param doc the SolrInputDocument to work on
- * @return the modified SolrInputDocument
- */
- protected SolrInputDocument process(SolrInputDocument doc) {
- String docLang = null;
- HashSet<String> docLangs = new HashSet<>();
- String fallbackLang = getFallbackLang(doc, fallbackFields, fallbackValue);
-
- if(langField == null || !doc.containsKey(langField) || (doc.containsKey(langField) && overwrite)) {
- List<DetectedLanguage> languagelist = detectLanguage(doc);
- docLang = resolveLanguage(languagelist, fallbackLang);
- docLangs.add(docLang);
- log.debug("Detected main document language from fields "+ Arrays.toString(inputFields) +": "+docLang);
-
- if(doc.containsKey(langField) && overwrite) {
- log.debug("Overwritten old value "+doc.getFieldValue(langField));
- }
- if(langField != null && langField.length() != 0) {
- doc.setField(langField, docLang);
- }
- } else {
- // langField is set, we sanity check it against whitelist and fallback
- docLang = resolveLanguage((String) doc.getFieldValue(langField), fallbackLang);
- docLangs.add(docLang);
- log.debug("Field "+langField+" already contained value "+docLang+", not overwriting.");
- }
-
- if(enableMapping) {
- for (String fieldName : allMapFieldsSet) {
- if(doc.containsKey(fieldName)) {
- String fieldLang;
- if(mapIndividual && mapIndividualFieldsSet.contains(fieldName)) {
- List<DetectedLanguage> languagelist = detectLanguage(doc);
- fieldLang = resolveLanguage(languagelist, docLang);
- docLangs.add(fieldLang);
- log.debug("Mapping field "+fieldName+" using individually detected language "+fieldLang);
- } else {
- fieldLang = docLang;
- log.debug("Mapping field "+fieldName+" using document global language "+fieldLang);
- }
- String mappedOutputField = getMappedField(fieldName, fieldLang);
-
- if (mappedOutputField != null) {
- log.debug("Mapping field {} to {}", doc.getFieldValue(docIdField), fieldLang);
- SolrInputField inField = doc.getField(fieldName);
- doc.setField(mappedOutputField, inField.getValue());
- if(!mapKeepOrig) {
- log.debug("Removing old field {}", fieldName);
- doc.removeField(fieldName);
- }
- } else {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid output field mapping for "
- + fieldName + " field and language: " + fieldLang);
- }
- }
- }
- }
-
- // Set the languages field to an array of all detected languages
- if(langsField != null && langsField.length() != 0) {
- doc.setField(langsField, docLangs.toArray());
- }
-
- return doc;
- }
-
- /**
- * Decides the fallback language, either from content of fallback field or fallback value
- * @param doc the Solr document
- * @param fallbackFields an array of strings with field names containing fallback language codes
- * @param fallbackValue a language code to use in case no fallbackFields are found
- */
- private String getFallbackLang(SolrInputDocument doc, String[] fallbackFields, String fallbackValue) {
- String lang = null;
- for(String field : fallbackFields) {
- if(doc.containsKey(field)) {
- lang = (String) doc.getFieldValue(field);
- log.debug("Language fallback to field "+field);
- break;
- }
- }
- if(lang == null) {
- log.debug("Language fallback to value "+fallbackValue);
- lang = fallbackValue;
- }
- return lang;
- }
-
- /**
- * Detects language(s) from a string.
- * Classes wishing to implement their own language detection module should override this method.
- * @param content The content to identify
- * @return List of detected language(s) according to RFC-3066
- */
- protected abstract List<DetectedLanguage> detectLanguage(SolrInputDocument content);
-
- /**
- * Chooses a language based on the list of candidates detected
- * @param language language code as a string
- * @param fallbackLang the language code to use as a fallback
- * @return a string of the chosen language
- */
- protected String resolveLanguage(String language, String fallbackLang) {
- List<DetectedLanguage> l = new ArrayList<>();
- l.add(new DetectedLanguage(language, 1.0));
- return resolveLanguage(l, fallbackLang);
- }
-
- /**
- * Chooses a language based on the list of candidates detected
- * @param languages a List of DetectedLanguages with certainty score
- * @param fallbackLang the language code to use as a fallback
- * @return a string of the chosen language
- */
- protected String resolveLanguage(List<DetectedLanguage> languages, String fallbackLang) {
- String langStr;
- if(languages.size() == 0) {
- log.debug("No language detected, using fallback {}", fallbackLang);
- langStr = fallbackLang;
- } else {
- DetectedLanguage lang = languages.get(0);
- String normalizedLang = normalizeLangCode(lang.getLangCode());
- if(langWhitelist.isEmpty() || langWhitelist.contains(normalizedLang)) {
- log.debug("Language detected {} with certainty {}", normalizedLang, lang.getCertainty());
- if(lang.getCertainty() >= threshold) {
- langStr = normalizedLang;
- } else {
- log.debug("Detected language below threshold {}, using fallback {}", threshold, fallbackLang);
- langStr = fallbackLang;
- }
- } else {
- log.debug("Detected a language not in whitelist ({}), using fallback {}", lang.getLangCode(), fallbackLang);
- langStr = fallbackLang;
- }
- }
-
- if(langStr == null || langStr.length() == 0) {
- log.warn("Language resolved to null or empty string. Fallback not configured?");
- langStr = "";
- }
-
- return langStr;
- }
-
- /**
- * Looks up language code in map (langid.lcmap) and returns mapped value
- * @param langCode the language code string returned from detector
- * @return the normalized/mapped language code
- */
- protected String normalizeLangCode(String langCode) {
- if (lcMap.containsKey(langCode)) {
- String lc = lcMap.get(langCode);
- log.debug("Doing langcode normalization mapping from "+langCode+" to "+lc);
- return lc;
- }
- return langCode;
- }
-
- /**
- * Returns the name of the field to map the current contents into, so that they are properly analyzed. For instance
- * if the currentField is "text" and the code is "en", the new field would by default be "text_en".
- * This method also performs custom regex pattern replace if configured. If enforceSchema=true
- * and the resulting field name doesn't exist, then null is returned.
- *
- * @param currentField The current field name
- * @param language the language code
- * @return The new schema field name, based on pattern and replace, or null if illegal
- */
- protected String getMappedField(String currentField, String language) {
- String lc = mapLcMap.containsKey(language) ? mapLcMap.get(language) : language;
- String newFieldName = langPattern.matcher(mapPattern.matcher(currentField).replaceFirst(mapReplaceStr)).replaceFirst(lc);
- if(enforceSchema && schema.getFieldOrNull(newFieldName) == null) {
- log.warn("Unsuccessful field name mapping from {} to {}, field does not exist and enforceSchema=true; skipping mapping.", currentField, newFieldName);
- return null;
- } else {
- log.debug("Doing mapping from "+currentField+" with language "+language+" to field "+newFieldName);
- }
- return newFieldName;
- }
-
- /**
- * Tells if this processor is enabled or not
- * @return true if enabled, else false
- */
- public boolean isEnabled() {
- return enabled;
- }
-
- public void setEnabled(boolean enabled) {
- this.enabled = enabled;
- }
-
-
-
- /**
- * Concatenates content from multiple fields
- */
- protected String concatFields(SolrInputDocument doc) {
- StringBuilder sb = new StringBuilder(getExpectedSize(doc, inputFields));
- for (String fieldName : inputFields) {
- log.debug("Appending field " + fieldName);
- if (doc.containsKey(fieldName)) {
- Collection<Object> fieldValues = doc.getFieldValues(fieldName);
- if (fieldValues != null) {
- for (Object content : fieldValues) {
- if (content instanceof String) {
- String stringContent = (String) content;
- if (stringContent.length() > maxFieldValueChars) {
- sb.append(stringContent.substring(0, maxFieldValueChars));
- } else {
- sb.append(stringContent);
- }
- sb.append(" ");
- if (sb.length() > maxTotalChars) {
- sb.setLength(maxTotalChars);
- break;
- }
- } else {
- log.warn("Field " + fieldName + " not a String value, not including in detection");
- }
- }
- }
- }
- }
- return sb.toString();
- }
-
- /**
- * Calculate expected string size.
- *
- * @param doc solr input document
- * @param fields fields to select
- * @return expected size of string value
- */
- private int getExpectedSize(SolrInputDocument doc, String[] fields) {
- int docSize = 0;
- for (String field : fields) {
- if (doc.containsKey(field)) {
- Collection<Object> contents = doc.getFieldValues(field);
- if (contents != null) {
- for (Object content : contents) {
- if (content instanceof String) {
- docSize += Math.min(((String) content).length(), maxFieldValueChars);
- }
- }
-
- if (docSize > maxTotalChars) {
- docSize = maxTotalChars;
- break;
- }
- }
- }
- }
- return docSize;
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessor.java
----------------------------------------------------------------------
diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessor.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessor.java
deleted file mode 100644
index 83f4fe4..0000000
--- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessor.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.update.processor;
-
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import opennlp.tools.langdetect.Language;
-import opennlp.tools.langdetect.LanguageDetectorME;
-import opennlp.tools.langdetect.LanguageDetectorModel;
-
-/**
- * Identifies the language of a set of input fields using <a href="https://opennlp.apache.org/">Apache OpenNLP</a>.
- * <p>
- * See "Language Detector" section of
- * <a href="https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html">https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html</a>
- */
-public class OpenNLPLangDetectUpdateProcessor extends LanguageIdentifierUpdateProcessor {
-
- private final LanguageDetectorModel model;
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- /** Maps ISO 639-3 (3-letter language code) to ISO 639-1 (2-letter language code) */
- private static final Map<String,String> ISO639_MAP = make_ISO639_map();
-
- public OpenNLPLangDetectUpdateProcessor(SolrQueryRequest req, SolrQueryResponse rsp,
- UpdateRequestProcessor next, LanguageDetectorModel model) {
- super(req, rsp, next);
- this.model = model;
- }
-
- @Override
- protected List<DetectedLanguage> detectLanguage(SolrInputDocument doc) {
- List<DetectedLanguage> languages = new ArrayList<>();
- String content = concatFields(doc);
- if (content.length() != 0) {
- LanguageDetectorME ldme = new LanguageDetectorME(model);
- Language[] langs = ldme.predictLanguages(content);
- for(Language language: langs){
- languages.add(new DetectedLanguage(ISO639_MAP.get(language.getLang()), language.getConfidence()));
- }
- } else {
- log.debug("No input text to detect language from, returning empty list");
- }
- return languages;
- }
-
- private static Map<String,String> make_ISO639_map() {
- Map<String,String> map = new HashMap<>();
- for (String lang : Locale.getISOLanguages()) {
- Locale locale = new Locale(lang);
- map.put(locale.getISO3Language(), locale.getLanguage());
- }
- return map;
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactory.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactory.java
deleted file mode 100644
index ffe11aa..0000000
--- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactory.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.update.processor;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.core.SolrResourceLoader;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.util.SolrPluginUtils;
-import org.apache.solr.util.plugin.SolrCoreAware;
-
-import opennlp.tools.langdetect.LanguageDetectorModel;
-
-/**
- * Identifies the language of a set of input fields using <a href="https://opennlp.apache.org/">Apache OpenNLP</a>.
- * <p>
- * The UpdateProcessorChain config entry can take a number of parameters
- * which may also be passed as HTTP parameters on the update request
- * and override the defaults. Here is the simplest processor config possible:
- *
- * <pre class="prettyprint" >
- * <processor class="org.apache.solr.update.processor.OpenNLPLangDetectUpdateProcessorFactory">
- * <str name="langid.fl">title,text</str>
- * <str name="langid.langField">language_s</str>
- * <str name="langid.model">langdetect-183.bin</str>
- * </processor>
- * </pre>
- * See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
- */
-public class OpenNLPLangDetectUpdateProcessorFactory extends UpdateRequestProcessorFactory
- implements SolrCoreAware {
-
- private static final String MODEL_PARAM = "langid.model";
- private String modelFile;
- private LanguageDetectorModel model;
- protected SolrParams defaults;
- protected SolrParams appends;
- protected SolrParams invariants;
- private SolrResourceLoader solrResourceLoader;
-
- @Override
- public void init( NamedList args )
- {
- if (args != null) {
- Object o;
- o = args.get("defaults");
- if (o != null && o instanceof NamedList) {
- defaults = ((NamedList) o).toSolrParams();
- } else {
- defaults = args.toSolrParams();
- }
- o = args.get("appends");
- if (o != null && o instanceof NamedList) {
- appends = ((NamedList) o).toSolrParams();
- }
- o = args.get("invariants");
- if (o != null && o instanceof NamedList) {
- invariants = ((NamedList) o).toSolrParams();
- }
-
- // Look for model filename in invariants, then in args, then defaults
- if (invariants != null) {
- modelFile = invariants.get(MODEL_PARAM);
- }
- if (modelFile == null) {
- o = args.get(MODEL_PARAM);
- if (o != null && o instanceof String) {
- modelFile = (String)o;
- } else {
- modelFile = defaults.get(MODEL_PARAM);
- if (modelFile == null) {
- throw new RuntimeException("Couldn't load language model, will return empty languages always!");
- }
- }
- }
- }
- }
-
- @Override
- public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
- // Process defaults, appends and invariants if we got a request
- if (req != null) {
- SolrPluginUtils.setDefaults(req, defaults, appends, invariants);
- }
- return new OpenNLPLangDetectUpdateProcessor(req, rsp, next, model);
- }
-
- private void loadModel() throws IOException {
- InputStream is = null;
- try{
- if (modelFile != null) {
- is = solrResourceLoader.openResource(modelFile);
- model = new LanguageDetectorModel(is);
- }
- }
- finally{
- IOUtils.closeQuietly(is);
- }
- }
-
- @Override
- public void inform(SolrCore core){
- solrResourceLoader = core.getResourceLoader();
- try {
- loadModel();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
----------------------------------------------------------------------
diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
deleted file mode 100644
index 5c8146d..0000000
--- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.update.processor;
-
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.tika.language.LanguageIdentifier;
-
-import org.apache.solr.common.SolrInputDocument;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Identifies the language of a set of input fields using Tika's
- * LanguageIdentifier.
- * The tika-core-x.y.jar must be on the classpath
- * <p>
- * See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
- * @since 3.5
- */
-public class TikaLanguageIdentifierUpdateProcessor extends LanguageIdentifierUpdateProcessor {
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- public TikaLanguageIdentifierUpdateProcessor(SolrQueryRequest req,
- SolrQueryResponse rsp, UpdateRequestProcessor next) {
- super(req, rsp, next);
- }
-
- @Override
- protected List<DetectedLanguage> detectLanguage(SolrInputDocument doc) {
- List<DetectedLanguage> languages = new ArrayList<>();
- String content = concatFields(doc);
- if (content.length() != 0) {
- LanguageIdentifier identifier = new LanguageIdentifier(content);
- // FIXME: Hack - we get the distance from toString and calculate our own certainty score
- Double distance = Double.parseDouble(tikaSimilarityPattern.matcher(identifier.toString()).replaceFirst("$1"));
- // This formula gives: 0.02 => 0.8, 0.1 => 0.5 which is a better sweetspot than isReasonablyCertain()
- Double certainty = 1 - (5 * distance);
- if (certainty < 0)
- certainty = 0d;
- DetectedLanguage language = new DetectedLanguage(identifier.getLanguage(), certainty);
- languages.add(language);
- log.debug("Language detected as "+language+" with a certainty of "+language.getCertainty()+" (Tika distance="+identifier.toString()+")");
- } else {
- log.debug("No input text to detect language from, returning empty list");
- }
- return languages;
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
deleted file mode 100644
index 838311b..0000000
--- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.update.processor;
-
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.util.SolrPluginUtils;
-import org.apache.solr.util.plugin.SolrCoreAware;
-
-/**
- * Identifies the language of a set of input fields using Tika's
- * LanguageIdentifier. The tika-core-x.y.jar must be on the classpath
- * <p>
- * The UpdateProcessorChain config entry can take a number of parameters
- * which may also be passed as HTTP parameters on the update request
- * and override the defaults. Here is the simplest processor config possible:
- *
- * <pre class="prettyprint" >
- * <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
- * <str name="langid.fl">title,text</str>
- * <str name="langid.langField">language_s</str>
- * </processor>
- * </pre>
- * See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
- * @since 3.5
- */
-public class TikaLanguageIdentifierUpdateProcessorFactory extends
- UpdateRequestProcessorFactory implements SolrCoreAware, LangIdParams {
-
- protected SolrParams defaults;
- protected SolrParams appends;
- protected SolrParams invariants;
-
- @Override
- public void inform(SolrCore core) {
- }
-
- /**
- * The UpdateRequestProcessor may be initialized in solrconfig.xml similarly
- * to a RequestHandler, with defaults, appends and invariants.
- * @param args a NamedList with the configuration parameters
- */
- @Override
- @SuppressWarnings("rawtypes")
- public void init( NamedList args )
- {
- if (args != null) {
- Object o;
- o = args.get("defaults");
- if (o != null && o instanceof NamedList) {
- defaults = ((NamedList) o).toSolrParams();
- } else {
- defaults = args.toSolrParams();
- }
- o = args.get("appends");
- if (o != null && o instanceof NamedList) {
- appends = ((NamedList) o).toSolrParams();
- }
- o = args.get("invariants");
- if (o != null && o instanceof NamedList) {
- invariants = ((NamedList) o).toSolrParams();
- }
- }
- }
-
- @Override
- public UpdateRequestProcessor getInstance(SolrQueryRequest req,
- SolrQueryResponse rsp, UpdateRequestProcessor next) {
- // Process defaults, appends and invariants if we got a request
- if(req != null) {
- SolrPluginUtils.setDefaults(req, defaults, appends, invariants);
- }
- return new TikaLanguageIdentifierUpdateProcessor(req, rsp, next);
- }
-
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/solr/contrib/langid/src/java/org/apache/solr/update/processor/package.html
----------------------------------------------------------------------
diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/package.html b/solr/contrib/langid/src/java/org/apache/solr/update/processor/package.html
deleted file mode 100644
index 9bf453d..0000000
--- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/package.html
+++ /dev/null
@@ -1,23 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- not a package-info.java, because we already defined this package in core/ -->
-<html>
-<body>
-Various implementations of {@link org.apache.solr.update.processor.LanguageIdentifierUpdateProcessor} and their factories.
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4dd96a0e/solr/contrib/langid/src/java/overview.html
----------------------------------------------------------------------
diff --git a/solr/contrib/langid/src/java/overview.html b/solr/contrib/langid/src/java/overview.html
deleted file mode 100644
index 9bc5c9a..0000000
--- a/solr/contrib/langid/src/java/overview.html
+++ /dev/null
@@ -1,21 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<body>
-Apache Solr Search Server: Solr Language Identifier contrib
-</body>
-</html>