You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by is...@apache.org on 2017/07/29 22:00:05 UTC
[28/28] lucene-solr:jira/solr-6630: Merging master
Merging master
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8d00e53b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8d00e53b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8d00e53b
Branch: refs/heads/jira/solr-6630
Commit: 8d00e53bd19d7b6a5fd422afa8d071139e8a0824
Parents: b6ee770
Author: Ishan Chattopadhyaya <is...@apache.org>
Authored: Sun Jul 30 03:29:15 2017 +0530
Committer: Ishan Chattopadhyaya <is...@apache.org>
Committed: Sun Jul 30 03:29:15 2017 +0530
----------------------------------------------------------------------
dev-tools/idea/.idea/workspace.xml | 2 +-
dev-tools/maven/pom.xml.template | 1 +
dev-tools/scripts/addVersion.py | 2 +-
dev-tools/scripts/jenkins.build.ref.guide.sh | 67 +
dev-tools/scripts/smokeTestRelease.py | 4 +-
lucene/CHANGES.txt | 9 +
lucene/common-build.xml | 3 +
.../apache/lucene/codecs/DocValuesConsumer.java | 2 +-
.../lucene/document/SortedDocValuesField.java | 6 +-
.../document/SortedSetDocValuesField.java | 6 +-
.../org/apache/lucene/index/MultiDocValues.java | 288 --
.../org/apache/lucene/index/MultiSorter.java | 2 +-
.../org/apache/lucene/index/MultiTermsEnum.java | 2 +
.../org/apache/lucene/index/OrdinalMap.java | 368 +++
.../org/apache/lucene/util/OfflineSorter.java | 1 -
.../org/apache/lucene/index/TestOrdinalMap.java | 2 +-
.../lucene/search/TestDocValuesQueries.java | 28 +-
...ConcurrentSortedSetDocValuesFacetCounts.java | 9 +-
.../DefaultSortedSetDocValuesReaderState.java | 2 +-
.../SortedSetDocValuesFacetCounts.java | 7 +-
.../lucene/search/highlight/Highlighter.java | 86 +-
.../search/join/GlobalOrdinalsCollector.java | 6 +-
.../lucene/search/join/GlobalOrdinalsQuery.java | 6 +-
.../join/GlobalOrdinalsWithScoreCollector.java | 16 +-
.../join/GlobalOrdinalsWithScoreQuery.java | 6 +-
.../org/apache/lucene/search/join/JoinUtil.java | 8 +-
.../apache/lucene/search/join/TestJoinUtil.java | 19 +-
.../serialized/SerializedDVStrategy.java | 5 +-
.../spatial3d/geom/GeoComplexPolygon.java | 2 +-
solr/CHANGES.txt | 127 +-
solr/bin/solr | 8 +-
solr/bin/solr.cmd | 2 +-
.../apache/solr/schema/ICUCollationField.java | 2 +-
.../dataimport/TestHierarchicalDocBuilder.java | 6 +-
solr/contrib/ltr/README.md | 3 +-
solr/contrib/ltr/example/README.md | 10 +-
.../solr/ltr/TestLTRReRankingPipeline.java | 19 +-
.../java/org/apache/solr/api/V2HttpCall.java | 2 +-
.../org/apache/solr/cloud/AddReplicaCmd.java | 10 +-
.../src/java/org/apache/solr/cloud/Assign.java | 98 +-
.../java/org/apache/solr/cloud/CloudUtil.java | 4 +-
.../apache/solr/cloud/CreateCollectionCmd.java | 3 +-
.../org/apache/solr/cloud/CreateShardCmd.java | 6 +-
.../java/org/apache/solr/cloud/MigrateCmd.java | 5 +-
.../org/apache/solr/cloud/MoveReplicaCmd.java | 76 +-
.../java/org/apache/solr/cloud/Overseer.java | 2 +-
.../org/apache/solr/cloud/ReplaceNodeCmd.java | 9 +-
.../org/apache/solr/cloud/SplitShardCmd.java | 2 +-
.../org/apache/solr/cloud/ZkController.java | 6 +-
.../solr/cloud/overseer/ReplicaMutator.java | 2 +-
.../solr/cloud/overseer/SliceMutator.java | 7 +-
.../solr/core/CachingDirectoryFactory.java | 9 +-
.../org/apache/solr/core/CoreContainer.java | 28 +-
.../org/apache/solr/core/DirectoryFactory.java | 5 +-
.../java/org/apache/solr/core/NodeConfig.java | 25 +-
.../src/java/org/apache/solr/core/SolrCore.java | 2 +-
.../org/apache/solr/core/SolrXmlConfig.java | 4 +
.../handler/AnalysisRequestHandlerBase.java | 11 +-
.../org/apache/solr/handler/ExportWriter.java | 13 +-
.../apache/solr/handler/ReplicationHandler.java | 6 +-
.../org/apache/solr/handler/StreamHandler.java | 3 +-
.../solr/handler/admin/CollectionsHandler.java | 20 +-
.../handler/admin/SolrInfoMBeanHandler.java | 14 +-
.../solr/handler/component/ExpandComponent.java | 24 +-
.../solr/index/SlowCompositeReaderWrapper.java | 2 +-
.../apache/solr/request/DocValuesFacets.java | 4 +-
.../org/apache/solr/request/DocValuesStats.java | 4 +-
.../org/apache/solr/request/IntervalFacets.java | 39 +-
.../solr/response/TextResponseWriter.java | 2 +-
.../transform/ChildDocTransformerFactory.java | 11 +
.../org/apache/solr/schema/CollationField.java | 2 +-
.../org/apache/solr/schema/DatePointField.java | 3 +
.../apache/solr/schema/DoublePointField.java | 4 +-
.../java/org/apache/solr/schema/EnumField.java | 7 +-
.../apache/solr/schema/ExternalFileField.java | 6 +
.../java/org/apache/solr/schema/FieldType.java | 13 +-
.../org/apache/solr/schema/FloatPointField.java | 4 +-
.../org/apache/solr/schema/IntPointField.java | 3 +
.../org/apache/solr/schema/LongPointField.java | 3 +
.../apache/solr/schema/NumericFieldType.java | 149 +-
.../solr/search/CollapsingQParserPlugin.java | 20 +-
.../facet/FacetFieldProcessorByArrayDV.java | 3 +-
.../org/apache/solr/search/facet/MinMaxAgg.java | 3 +-
.../org/apache/solr/search/facet/UniqueAgg.java | 82 +-
.../solr/search/facet/UniqueMultiDvSlotAcc.java | 3 +-
.../search/facet/UniqueSinglevaluedSlotAcc.java | 3 +-
.../join/BlockJoinFieldFacetAccumulator.java | 4 +-
.../org/apache/solr/search/join/GraphQuery.java | 89 +-
.../solr/search/join/GraphQueryParser.java | 1 +
.../solr/search/join/GraphTermsCollector.java | 260 +-
.../solr/servlet/DirectSolrConnection.java | 1 +
.../processor/AtomicUpdateProcessorFactory.java | 4 +-
.../processor/UUIDUpdateProcessorFactory.java | 37 +-
.../processor/UpdateRequestProcessorChain.java | 1 +
.../java/org/apache/solr/util/LongIterator.java | 34 +
.../src/java/org/apache/solr/util/LongSet.java | 137 +
.../java/org/apache/solr/util/NumberUtils.java | 20 +
.../org/apache/solr/util/hll/BitVector.java | 2 +
.../src/java/org/apache/solr/util/hll/HLL.java | 1 +
.../org/apache/solr/util/hll/LongIterator.java | 34 -
.../src/resources/SystemCollectionSchema.xml | 4 +-
.../solr/analysisconfs/analysis-err-schema.xml | 2 +-
.../solr/collection1/conf/bad-schema-eff.xml | 44 +
.../conf/schema-HighlighterMaxOffsetTest.xml | 2 +-
...chema-add-schema-fields-update-processor.xml | 12 +-
.../solr/collection1/conf/schema-behavior.xml | 6 +-
.../conf/schema-blockjoinfacetcomponent.xml | 6 +-
.../collection1/conf/schema-custom-field.xml | 4 +-
.../collection1/conf/schema-customfield.xml | 4 +-
.../solr/collection1/conf/schema-enums.xml | 2 +-
.../conf/schema-field-sort-values.xml | 6 +-
.../solr/collection1/conf/schema-folding.xml | 10 +-
.../solr/collection1/conf/schema-graph.xml | 71 -
.../solr/collection1/conf/schema-hash.xml | 24 +-
.../conf/schema-id-and-version-fields-only.xml | 2 +-
.../conf/schema-luceneMatchVersion.xml | 2 +-
.../conf/schema-minimal-atomic-stress.xml | 2 +-
.../conf/schema-multiword-synonyms.xml | 2 +-
.../solr/collection1/conf/schema-numeric.xml | 30 +-
.../solr/collection1/conf/schema-point.xml | 12 +-
.../collection1/conf/schema-preanalyzed.xml | 2 +-
.../collection1/conf/schema-psuedo-fields.xml | 4 +-
.../solr/collection1/conf/schema-rest.xml | 20 +-
.../solr/collection1/conf/schema-spatial.xml | 13 +-
.../solr/collection1/conf/schema-sql.xml | 20 +-
.../solr/collection1/conf/schema-trie.xml | 22 +-
.../solr/collection1/conf/schema11.xml | 14 +-
.../solr/collection1/conf/schema_latest.xml | 17 +
.../solr/collection1/conf/schemasurround.xml | 20 +-
.../configsets/_default/conf/managed-schema | 103 +-
.../configsets/_default/conf/solrconfig.xml | 195 +-
.../configsets/cloud-dynamic/conf/schema.xml | 20 +-
.../solr/configsets/cloud-hdfs/conf/schema.xml | 4 +-
.../conf/managed-schema | 4 +-
.../cloud-managed-upgrade/conf/schema.xml | 4 +-
.../cloud-managed/conf/managed-schema | 4 +-
.../conf/schema.xml | 4 +-
.../cloud-minimal-jmx/conf/schema.xml | 4 +-
.../configsets/cloud-minimal/conf/schema.xml | 4 +-
.../configsets/cloud-subdirs/conf/schema.xml | 4 +-
.../solr/configsets/configset-2/conf/schema.xml | 2 +-
.../solr/configsets/doc-expiry/conf/schema.xml | 20 +-
.../exitable-directory/conf/schema.xml | 4 +-
.../src/test-files/solr/solr-solrDataHome.xml | 24 +
.../test/SecureRandomAlgorithmTesterApp.java | 41 +
.../test/org/apache/solr/TestCrossCoreJoin.java | 9 +-
.../core/src/test/org/apache/solr/TestTrie.java | 8 +-
.../cloud/AssignBackwardCompatibilityTest.java | 101 +
.../test/org/apache/solr/cloud/AssignTest.java | 136 +-
.../solr/cloud/BasicDistributedZk2Test.java | 1 +
.../solr/cloud/ClusterStateUpdateTest.java | 2 +-
.../cloud/CollectionsAPIDistributedZkTest.java | 33 +-
.../solr/cloud/CollectionsAPISolrJTest.java | 63 +-
.../solr/cloud/DocValuesNotIndexedTest.java | 7 +-
.../apache/solr/cloud/MoveReplicaHDFSTest.java | 1 -
.../solr/cloud/MoveReplicaHDFSUlogDirTest.java | 142 +
.../org/apache/solr/cloud/MoveReplicaTest.java | 50 +-
...verseerCollectionConfigSetProcessorTest.java | 12 +-
.../org/apache/solr/cloud/ReplaceNodeTest.java | 26 +-
.../cloud/SegmentTerminateEarlyTestState.java | 82 +-
.../apache/solr/cloud/SolrCloudExampleTest.java | 2 +-
.../solr/cloud/TestAuthenticationFramework.java | 188 +-
.../TestCollectionsAPIViaSolrCloudCluster.java | 295 ++
.../apache/solr/cloud/TestConfigSetsAPI.java | 6 +-
.../org/apache/solr/cloud/TestCryptoKeys.java | 28 +-
.../solr/cloud/TestMiniSolrCloudCluster.java | 388 ---
.../cloud/TestMiniSolrCloudClusterKerberos.java | 141 -
.../cloud/TestPullReplicaErrorHandling.java | 3 +-
.../cloud/TestRandomRequestDistribution.java | 64 +-
.../cloud/TestSolrCloudWithKerberosAlt.java | 150 +-
.../TestStressCloudBlindAtomicUpdates.java | 4 +-
.../cloud/TestTolerantUpdateProcessorCloud.java | 2 +-
.../apache/solr/core/DirectoryFactoryTest.java | 78 +-
.../solr/core/TestConfigSetImmutable.java | 4 +-
.../org/apache/solr/core/TestCoreDiscovery.java | 188 +-
.../org/apache/solr/core/TestCustomStream.java | 4 +-
.../apache/solr/core/TestDynamicLoading.java | 38 +-
.../apache/solr/core/TestJmxIntegration.java | 3 +-
.../apache/solr/core/TestSolrConfigHandler.java | 160 +-
.../apache/solr/handler/CheckBackupStatus.java | 2 +-
.../DocumentAnalysisRequestHandlerTest.java | 37 +-
.../FieldAnalysisRequestHandlerTest.java | 28 +-
.../apache/solr/handler/TestConfigReload.java | 2 +-
.../handler/TestReplicationHandlerBackup.java | 4 +-
.../apache/solr/handler/TestReqParamsAPI.java | 44 +-
.../apache/solr/handler/TestRestoreCore.java | 2 +-
.../org/apache/solr/handler/TestSQLHandler.java | 5 +-
.../handler/TestSolrConfigHandlerCloud.java | 40 +-
.../TestSolrConfigHandlerConcurrent.java | 4 +-
.../solr/handler/admin/MBeansHandlerTest.java | 24 +
.../solr/handler/admin/MetricsHandlerTest.java | 2 +
.../solr/metrics/SolrMetricManagerTest.java | 1 -
.../reporters/solr/SolrCloudReportersTest.java | 1 -
.../apache/solr/request/SimpleFacetsTest.java | 36 +-
.../solr/request/TestIntervalFaceting.java | 78 +-
.../apache/solr/response/JSONWriterTest.java | 4 +-
.../apache/solr/response/TestExportWriter.java | 12 +-
.../solr/response/TestRawResponseWriter.java | 11 +-
.../transform/TestChildDocTransformer.java | 33 +
.../solr/rest/schema/TestBulkSchemaAPI.java | 58 +-
.../rest/schema/TestClassNameShortening.java | 76 -
.../schema/TestCopyFieldCollectionResource.java | 12 +-
.../rest/schema/TestDynamicFieldResource.java | 6 +-
.../schema/TestFieldCollectionResource.java | 24 +-
.../solr/rest/schema/TestFieldTypeResource.java | 26 +-
.../rest/schema/TestSchemaNameResource.java | 2 +-
.../solr/rest/schema/TestSchemaResource.java | 2 +-
.../schema/TestSchemaSimilarityResource.java | 2 +-
.../solr/schema/ChangedSchemaMergeTest.java | 8 +-
.../org/apache/solr/schema/DateFieldTest.java | 3 +-
.../org/apache/solr/schema/DocValuesTest.java | 4 +-
.../org/apache/solr/schema/EnumFieldTest.java | 6 +-
.../solr/schema/ExternalFileFieldSortTest.java | 21 +-
...IntPointPrefixActsAsRangeQueryFieldType.java | 34 +
.../solr/schema/PrimitiveFieldTypeTest.java | 151 +-
.../solr/schema/TestBulkSchemaConcurrent.java | 6 +-
.../TestCloudManagedSchemaConcurrent.java | 717 -----
.../org/apache/solr/schema/TestPointFields.java | 2658 +++++++++++-------
.../schema/TestSchemalessBufferedUpdates.java | 1 -
.../solr/schema/TestUseDocValuesAsStored2.java | 8 +-
.../TrieIntPrefixActsAsRangeQueryFieldType.java | 2 +
.../org/apache/solr/schema/WrappedIntField.java | 45 -
.../solr/schema/WrappedIntPointField.java | 46 +
.../apache/solr/schema/WrappedTrieIntField.java | 29 +
.../apache/solr/search/TestFieldSortValues.java | 3 +
.../solr/search/TestHashQParserPlugin.java | 7 +
...OverriddenPrefixQueryForCustomFieldType.java | 4 +
.../org/apache/solr/search/TestRangeQuery.java | 305 +-
.../solr/search/function/TestFunctionQuery.java | 5 +-
.../apache/solr/search/join/GraphQueryTest.java | 140 +-
.../solr/security/BasicAuthIntegrationTest.java | 2 +-
.../solr/servlet/HttpSolrCallGetCoreTest.java | 2 +-
.../apache/solr/update/SoftAutoCommitTest.java | 122 +-
.../AtomicUpdateProcessorFactoryTest.java | 13 +-
.../update/processor/AtomicUpdatesTest.java | 2 -
.../processor/TestNamedUpdateProcessors.java | 8 +-
.../UUIDUpdateProcessorFallbackTest.java | 101 +-
.../test/org/apache/solr/util/TestUtils.java | 14 +-
.../org/apache/solr/util/hll/BitVectorTest.java | 1 +
.../org/apache/solr/util/hll/FullHLLTest.java | 1 +
solr/example/example-DIH/README.txt | 2 +-
.../example-DIH/solr/atom/conf/solrconfig.xml | 11 +-
.../example-DIH/solr/db/conf/managed-schema | 116 +-
.../example-DIH/solr/db/conf/solrconfig.xml | 265 +-
.../example-DIH/solr/mail/conf/managed-schema | 107 +-
.../example-DIH/solr/mail/conf/solrconfig.xml | 263 +-
.../example-DIH/solr/solr/conf/managed-schema | 128 +-
.../example-DIH/solr/solr/conf/solrconfig.xml | 263 +-
.../example-DIH/solr/tika/conf/solrconfig.xml | 7 +-
solr/example/exampledocs/test_utf8.sh | 4 +-
solr/example/files/conf/managed-schema | 58 +-
solr/example/files/conf/solrconfig.xml | 201 +-
solr/server/solr/README.txt | 22 +-
.../configsets/_default/conf/managed-schema | 103 +-
.../configsets/_default/conf/solrconfig.xml | 195 +-
.../conf/managed-schema | 107 +-
.../conf/solrconfig.xml | 294 +-
solr/site/quickstart.mdtext | 51 +-
.../meta-docs/asciidoc-syntax.adoc | 239 --
.../solr-ref-guide/meta-docs/editing-tools.adoc | 39 -
solr/solr-ref-guide/meta-docs/jekyll.adoc | 89 -
solr/solr-ref-guide/meta-docs/pdf.adoc | 145 -
solr/solr-ref-guide/meta-docs/publish.adoc | 219 --
solr/solr-ref-guide/src/_data/strings.yml | 2 +-
solr/solr-ref-guide/src/about-this-guide.adoc | 59 +-
solr/solr-ref-guide/src/about-tokenizers.adoc | 1 -
...adding-custom-plugins-in-solrcloud-mode.adoc | 9 -
solr/solr-ref-guide/src/analyzers.adoc | 2 -
...uthentication-and-authorization-plugins.adoc | 18 +-
.../src/basic-authentication-plugin.adoc | 11 +-
solr/solr-ref-guide/src/blob-store-api.adoc | 3 -
solr/solr-ref-guide/src/blockjoin-faceting.adoc | 4 +-
.../solr-ref-guide/src/charfilterfactories.adoc | 4 -
.../src/collapse-and-expand-results.adoc | 6 +-
solr/solr-ref-guide/src/collections-api.adoc | 154 +-
.../src/collections-core-admin.adoc | 2 +-
.../src/command-line-utilities.adoc | 26 +-
.../src/common-query-parameters.adoc | 145 +-
solr/solr-ref-guide/src/config-api.adoc | 52 +-
solr/solr-ref-guide/src/configsets-api.adoc | 94 +-
.../solr-ref-guide/src/configuring-logging.adoc | 9 +-
.../src/configuring-solrconfig-xml.adoc | 8 +-
solr/solr-ref-guide/src/content-streams.adoc | 7 +-
solr/solr-ref-guide/src/coreadmin-api.adoc | 29 +-
.../src/cross-data-center-replication-cdcr.adoc | 142 +-
...adir-and-directoryfactory-in-solrconfig.adoc | 6 +-
solr/solr-ref-guide/src/dataimport-screen.adoc | 1 -
solr/solr-ref-guide/src/de-duplication.adoc | 5 -
.../src/defining-core-properties.adoc | 6 +-
solr/solr-ref-guide/src/defining-fields.adoc | 7 +-
.../detecting-languages-during-indexing.adoc | 4 -
.../src/distributed-requests.adoc | 9 +-
.../distributed-search-with-index-sharding.adoc | 7 +-
solr/solr-ref-guide/src/documents-screen.adoc | 16 +-
solr/solr-ref-guide/src/docvalues.adoc | 8 +-
solr/solr-ref-guide/src/enabling-ssl.adoc | 26 +-
solr/solr-ref-guide/src/errata.adoc | 2 -
.../src/exporting-result-sets.adoc | 6 -
solr/solr-ref-guide/src/faceting.adoc | 395 +--
.../field-type-definitions-and-properties.adoc | 12 +-
.../src/field-types-included-with-solr.adoc | 6 +-
.../solr-ref-guide/src/filter-descriptions.adoc | 70 +-
solr/solr-ref-guide/src/function-queries.adoc | 16 +-
.../src/getting-started-with-solrcloud.adoc | 5 -
solr/solr-ref-guide/src/graph-traversal.adoc | 32 +-
.../src/hadoop-authentication-plugin.adoc | 7 +-
solr/solr-ref-guide/src/highlighting.adoc | 15 +-
.../solr-ref-guide/src/how-solrcloud-works.adoc | 7 +-
solr/solr-ref-guide/src/how-to-contribute.adoc | 51 +
.../src/implicit-requesthandlers.adoc | 13 +-
solr/solr-ref-guide/src/index-replication.adoc | 15 +-
solr/solr-ref-guide/src/index.adoc | 2 +-
.../src/indexconfig-in-solrconfig.adoc | 22 +-
.../src/indexing-and-basic-data-operations.adoc | 1 -
.../src/initparams-in-solrconfig.adoc | 3 +-
.../src/introduction-to-solr-indexing.adoc | 2 -
solr/solr-ref-guide/src/jvm-settings.adoc | 3 -
.../src/kerberos-authentication-plugin.adoc | 19 +-
solr/solr-ref-guide/src/language-analysis.adoc | 125 +-
solr/solr-ref-guide/src/learning-to-rank.adoc | 74 +-
.../src/local-parameters-in-queries.adoc | 3 -
solr/solr-ref-guide/src/logging.adoc | 1 -
.../major-changes-from-solr-5-to-solr-6.adoc | 4 +-
.../src/making-and-restoring-backups.adoc | 8 +-
solr/solr-ref-guide/src/managed-resources.adoc | 17 +-
.../src/mbean-request-handler.adoc | 9 +-
solr/solr-ref-guide/src/merging-indexes.adoc | 6 +-
.../src/meta-docs/asciidoc-syntax.adoc | 344 +++
.../src/meta-docs/editing-tools.adoc | 39 +
solr/solr-ref-guide/src/meta-docs/jekyll.adoc | 88 +
solr/solr-ref-guide/src/meta-docs/pdf.adoc | 145 +
solr/solr-ref-guide/src/meta-docs/publish.adoc | 215 ++
solr/solr-ref-guide/src/metrics-reporting.adoc | 6 +-
solr/solr-ref-guide/src/morelikethis.adoc | 9 +-
.../src/near-real-time-searching.adoc | 10 +-
solr/solr-ref-guide/src/other-parsers.adoc | 99 +-
.../src/other-schema-elements.adoc | 2 -
.../src/overview-of-searching-in-solr.adoc | 2 +-
.../src/pagination-of-results.adoc | 4 +-
.../src/performance-statistics-reference.adoc | 4 +-
solr/solr-ref-guide/src/phonetic-matching.adoc | 20 +-
solr/solr-ref-guide/src/ping.adoc | 2 +-
solr/solr-ref-guide/src/post-tool.adoc | 11 +-
solr/solr-ref-guide/src/query-re-ranking.adoc | 2 +-
solr/solr-ref-guide/src/query-screen.adoc | 4 +-
.../src/query-settings-in-solrconfig.adoc | 16 +-
.../read-and-write-side-fault-tolerance.adoc | 6 -
solr/solr-ref-guide/src/realtime-get.adoc | 4 +-
.../src/request-parameters-api.adoc | 17 +-
.../src/requestdispatcher-in-solrconfig.adoc | 6 +-
...lers-and-searchcomponents-in-solrconfig.adoc | 16 +-
solr/solr-ref-guide/src/response-writers.adoc | 187 +-
solr/solr-ref-guide/src/result-clustering.adoc | 36 +-
solr/solr-ref-guide/src/result-grouping.adoc | 27 +-
.../src/rule-based-authorization-plugin.adoc | 16 +-
.../src/rule-based-replica-placement.adoc | 28 +-
.../src/running-solr-on-hdfs.adoc | 20 +-
solr/solr-ref-guide/src/running-solr.adoc | 4 +-
solr/solr-ref-guide/src/schema-api.adoc | 22 +-
...schema-factory-definition-in-solrconfig.adoc | 6 +-
solr/solr-ref-guide/src/schemaless-mode.adoc | 17 +-
solr/solr-ref-guide/src/segments-info.adoc | 2 +-
...tting-up-an-external-zookeeper-ensemble.adoc | 21 +-
.../shards-and-indexing-data-in-solrcloud.adoc | 7 +-
.../src/solr-control-script-reference.adoc | 30 +-
solr/solr-ref-guide/src/solr-glossary.adoc | 4 +-
.../src/solr-jdbc-apache-zeppelin.adoc | 3 -
.../src/solr-jdbc-dbvisualizer.adoc | 15 +-
solr/solr-ref-guide/src/spatial-search.adoc | 23 +-
solr/solr-ref-guide/src/spell-checking.adoc | 190 +-
solr/solr-ref-guide/src/stream-decorators.adoc | 16 +-
.../src/streaming-expressions.adoc | 3 -
solr/solr-ref-guide/src/suggester.adoc | 338 ++-
.../src/taking-solr-to-production.adoc | 23 +-
.../src/the-dismax-query-parser.adoc | 58 +-
.../src/the-extended-dismax-query-parser.adoc | 88 +-
.../src/the-query-elevation-component.adoc | 11 +-
.../src/the-standard-query-parser.adoc | 96 +-
.../solr-ref-guide/src/the-stats-component.adoc | 18 +-
.../src/the-term-vector-component.adoc | 8 +-
.../solr-ref-guide/src/the-terms-component.adoc | 15 +-
.../src/the-well-configured-solr-instance.adoc | 2 -
solr/solr-ref-guide/src/tokenizers.adoc | 14 -
.../transforming-and-indexing-custom-json.adoc | 52 +-
.../src/transforming-result-documents.adoc | 29 +-
solr/solr-ref-guide/src/uima-integration.adoc | 2 -
...anding-analyzers-tokenizers-and-filters.adoc | 4 -
.../src/update-request-processors.adoc | 35 +-
.../src/updatehandlers-in-solrconfig.adoc | 8 +-
.../src/updating-parts-of-documents.adoc | 27 +-
.../src/upgrading-a-solr-cluster.adoc | 8 -
solr/solr-ref-guide/src/upgrading-solr.adoc | 8 +-
.../src/uploading-data-with-index-handlers.adoc | 37 +-
...g-data-with-solr-cell-using-apache-tika.adoc | 48 +-
solr/solr-ref-guide/src/using-javascript.adoc | 2 +-
.../solr-ref-guide/src/using-jmx-with-solr.adoc | 10 +-
solr/solr-ref-guide/src/using-python.adoc | 6 +-
.../src/using-solr-from-ruby.adoc | 2 +-
solr/solr-ref-guide/src/using-solrj.adoc | 8 -
...zookeeper-to-manage-configuration-files.adoc | 25 +-
solr/solr-ref-guide/src/v2-api.adoc | 24 +-
.../src/velocity-response-writer.adoc | 3 -
solr/solr-ref-guide/src/velocity-search-ui.adoc | 4 +-
...king-with-currencies-and-exchange-rates.adoc | 23 +-
solr/solr-ref-guide/src/working-with-dates.adoc | 10 -
.../src/working-with-enum-fields.adoc | 10 +-
...rking-with-external-files-and-processes.adoc | 27 +-
.../src/zookeeper-access-control.adoc | 8 -
.../apache/solr/client/solrj/SolrRequest.java | 4 +
.../cloud/autoscaling/MoveReplicaSuggester.java | 8 +-
.../solr/client/solrj/impl/CloudSolrClient.java | 8 +-
.../client/solrj/impl/XMLResponseParser.java | 17 +-
.../solrj/io/eval/ResidualsEvaluator.java | 82 +
.../client/solrj/io/stream/CloudSolrStream.java | 22 +-
.../client/solrj/io/stream/FacetStream.java | 12 +-
.../solr/client/solrj/io/stream/GetStream.java | 8 +-
.../solr/client/solrj/io/stream/PlotStream.java | 221 ++
.../solr/client/solrj/io/stream/SolrStream.java | 2 +
.../solrj/request/CollectionAdminRequest.java | 40 +-
.../solr/client/solrj/request/V2Request.java | 18 +-
.../org/apache/solr/common/cloud/Replica.java | 7 +-
.../apache/solr/common/cloud/ZkStateReader.java | 7 +-
.../solr/common/params/CollectionParams.java | 9 +
.../solr/common/util/CommandOperation.java | 6 +-
.../java/org/apache/solr/common/util/Utils.java | 8 +-
.../src/resources/apispec/cluster.Commands.json | 8 +-
.../src/resources/apispec/cluster.aliases.json | 4 +-
.../apispec/cluster.configs.Commands.json | 4 +-
.../apispec/cluster.configs.delete.json | 2 +-
.../src/resources/apispec/cluster.configs.json | 2 +-
solr/solrj/src/resources/apispec/cluster.json | 2 +-
.../src/resources/apispec/cluster.nodes.json | 2 +-
.../cluster.security.BasicAuth.Commands.json | 2 +-
...cluster.security.RuleBasedAuthorization.json | 2 +-
...luster.security.authentication.Commands.json | 2 +-
.../cluster.security.authentication.json | 2 +-
...cluster.security.authorization.Commands.json | 2 +-
.../apispec/cluster.security.authorization.json | 2 +-
.../resources/apispec/collections.Commands.json | 22 +-
.../collections.collection.Commands.json | 12 +-
.../collections.collection.Commands.modify.json | 8 +-
.../collections.collection.Commands.reload.json | 2 +-
.../apispec/collections.collection.delete.json | 2 +-
.../apispec/collections.collection.json | 2 +-
.../collections.collection.shards.Commands.json | 12 +-
...ctions.collection.shards.shard.Commands.json | 4 +-
...lections.collection.shards.shard.delete.json | 2 +-
....collection.shards.shard.replica.delete.json | 2 +-
.../src/resources/apispec/collections.json | 2 +-
.../src/resources/apispec/core.RealtimeGet.json | 2 +-
.../apispec/core.SchemaEdit.addCopyField.json | 2 +-
.../apispec/core.SchemaEdit.addField.json | 4 +-
.../apispec/core.SchemaEdit.addFieldType.json | 2 +-
.../core.SchemaEdit.deleteCopyField.json | 2 +-
.../core.SchemaEdit.deleteDynamicField.json | 2 +-
.../apispec/core.SchemaEdit.deleteField.json | 2 +-
.../core.SchemaEdit.deleteFieldType.json | 2 +-
.../src/resources/apispec/core.SchemaEdit.json | 4 +-
.../apispec/core.SchemaRead.copyFields.json | 2 +-
...ore.SchemaRead.dynamicFields_fieldTypes.json | 2 +-
.../apispec/core.SchemaRead.fields.json | 2 +-
.../src/resources/apispec/core.SchemaRead.json | 2 +-
.../src/resources/apispec/core.Update.json | 2 +-
.../resources/apispec/core.config.Commands.json | 6 +-
.../core.config.Commands.runtimeLib.json | 2 +-
.../apispec/core.config.Params.Commands.json | 2 +-
.../resources/apispec/core.config.Params.json | 2 +-
.../src/resources/apispec/core.config.json | 2 +-
.../src/resources/apispec/core.system.blob.json | 2 +-
.../apispec/core.system.blob.upload.json | 2 +-
.../src/resources/apispec/cores.Commands.json | 8 +-
.../src/resources/apispec/cores.Status.json | 2 +-
.../resources/apispec/cores.core.Commands.json | 14 +-
.../apispec/cores.core.Commands.split.json | 2 +-
.../solr/client/solrj/SolrExampleTests.java | 75 +-
.../solr/client/solrj/TestLBHttpSolrClient.java | 2 +-
.../solrj/io/stream/StreamExpressionTest.java | 83 +-
.../stream/StreamExpressionToExpessionTest.java | 5 +-
.../client/solrj/request/TestV2Request.java | 9 +
.../java/org/apache/solr/SolrTestCaseJ4.java | 63 +-
.../java/org/apache/solr/util/RestTestBase.java | 2 +-
.../org/apache/solr/util/RestTestHarness.java | 6 +-
solr/webapp/web/index.html | 2 +-
solr/webapp/web/js/angular/controllers/cloud.js | 1 +
solr/webapp/web/js/angular/controllers/query.js | 5 +-
solr/webapp/web/partials/query.html | 7 +-
486 files changed, 9995 insertions(+), 9345 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/dev-tools/idea/.idea/workspace.xml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/.idea/workspace.xml b/dev-tools/idea/.idea/workspace.xml
index 0ca7f0c..e22108f 100644
--- a/dev-tools/idea/.idea/workspace.xml
+++ b/dev-tools/idea/.idea/workspace.xml
@@ -2,7 +2,7 @@
<project version="4">
<component name="RunManager" selected="JUnit.Lucene core">
<configuration default="true" type="JUnit" factoryName="JUnit">
- <option name="VM_PARAMETERS" value="-ea" />
+ <option name="VM_PARAMETERS" value="-ea -Djava.security.egd=file:/dev/./urandom" />
</configuration>
<configuration default="false" name="Lucene core" type="JUnit" factoryName="JUnit">
<module name="lucene-core-tests" />
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/dev-tools/maven/pom.xml.template
----------------------------------------------------------------------
diff --git a/dev-tools/maven/pom.xml.template b/dev-tools/maven/pom.xml.template
index cd8d6b8..6b7f915 100644
--- a/dev-tools/maven/pom.xml.template
+++ b/dev-tools/maven/pom.xml.template
@@ -277,6 +277,7 @@
<tests.postingsformat>${tests.postingsformat}</tests.postingsformat>
<tests.timezone>${tests.timezone}</tests.timezone>
<tests.verbose>${tests.verbose}</tests.verbose>
+ <java.security.egd>file:/dev/./urandom</java.security.egd>
</systemPropertyVariables>
</configuration>
</plugin>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/dev-tools/scripts/addVersion.py
----------------------------------------------------------------------
diff --git a/dev-tools/scripts/addVersion.py b/dev-tools/scripts/addVersion.py
index 5ce6bde..745cfd3 100644
--- a/dev-tools/scripts/addVersion.py
+++ b/dev-tools/scripts/addVersion.py
@@ -134,7 +134,7 @@ def update_example_solrconfigs(new_version):
print(' updating example solrconfig.xml files')
matcher = re.compile('<luceneMatchVersion>')
- paths = ['solr/server/solr/configsets', 'solr/example']
+ paths = ['solr/server/solr/configsets', 'solr/example', 'solr/core/src/test-files/solr/configsets/_default']
for path in paths:
if not os.path.isdir(path):
raise RuntimeError("Can't locate configset dir (layout change?) : " + path)
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/dev-tools/scripts/jenkins.build.ref.guide.sh
----------------------------------------------------------------------
diff --git a/dev-tools/scripts/jenkins.build.ref.guide.sh b/dev-tools/scripts/jenkins.build.ref.guide.sh
new file mode 100755
index 0000000..0263ef3
--- /dev/null
+++ b/dev-tools/scripts/jenkins.build.ref.guide.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+
+# This shell script will download the software required to build the ref
+# guide using RVM (Ruby Version Manager), and then run the following
+# under solr/solr-ref-guide: "ant clean build-site build-pdf".
+#
+# The following will be downloaded and installed into $HOME/.rvm/:
+# RVM, Ruby, and Ruby gems jekyll, jekyll-asciidoc, and pygments.rb.
+#
+# The script expects to be run in the top-level project directory.
+#
+# RVM will attempt to verify the signature on downloaded RVM software if
+# you have gpg or gpg2 installed. If you do, as a one-time operation you
+# must import two keys (substitute gpg2 below if you have it installed):
+#
+# gpg --keyserver hkp://keys.gnupg.net --recv-keys \
+# 409B6B1796C275462A1703113804BB82D39DC0E3 \
+# 7D2BAF1CF37B13E2069D6956105BD0E739499BDB
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x # Echo commands to the console
+set -e # Fail the script if any command fails
+
+RVM_PATH=$HOME/.rvm
+RUBY_VERSION=ruby-2.3.3
+GEMSET=solr-refguide-gemset
+
+# Install the "stable" RVM release to ~/.rvm/, and don't mess with .bash_profile etc.
+\curl -sSL https://get.rvm.io | bash -s -- --ignore-dotfiles stable
+
+set +x # Temporarily disable command echoing to reduce clutter
+
+function echoRun() {
+ local cmd="$1"
+ echo "Running '$cmd'"
+ $cmd
+}
+
+echoRun "source $RVM_PATH/scripts/rvm" # Load RVM into a shell session *as a Bash function*
+echoRun "rvm autolibs disable" # Enable single-user mode
+echoRun "rvm install $RUBY_VERSION" # Install Ruby
+echoRun "rvm gemset create $GEMSET" # Create this project's gemset
+echoRun "rvm $RUBY_VERSION@$GEMSET" # Activate this project's gemset
+
+# Install gems in the gemset. Param --force disables dependency conflict detection.
+echoRun "gem install --force --version 3.5.0 jekyll"
+echoRun "gem install --force --version 2.1.0 jekyll-asciidoc"
+echoRun "gem install --force --version 1.1.2 pygments.rb"
+
+cd solr/solr-ref-guide
+
+set -x # Re-enable command echoing
+ant clean build-site build-pdf
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/dev-tools/scripts/smokeTestRelease.py
----------------------------------------------------------------------
diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py
index 1083a09..01706a7 100644
--- a/dev-tools/scripts/smokeTestRelease.py
+++ b/dev-tools/scripts/smokeTestRelease.py
@@ -858,11 +858,11 @@ def testSolrExample(unpackPath, javaPath, isSrc):
run('sh ./exampledocs/test_utf8.sh http://localhost:8983/solr/techproducts', 'utf8.log')
print(' run query...')
s = load('http://localhost:8983/solr/techproducts/select/?q=video')
- if s.find('<result name="response" numFound="3" start="0">') == -1:
+ if s.find('"numFound":3,"start":0') == -1:
print('FAILED: response is:\n%s' % s)
raise RuntimeError('query on solr example instance failed')
s = load('http://localhost:8983/v2/cores')
- if s.find('"responseHeader":{"status":0') == -1:
+ if s.find('"status":0,') == -1:
print('FAILED: response is:\n%s' % s)
raise RuntimeError('query api v2 on solr example instance failed')
finally:
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 8926dd9..d5cc9e8 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -16,6 +16,12 @@ Changes in Runtime Behavior
======================= Lucene 7.1.0 =======================
(No Changes)
+Optimizations
+
+* LUCENE-7905: Optimize how OrdinalMap (used by
+ SortedSetDocValuesFacetCounts and others) builds its map (Robert
+ Muir, Adrien Grand, Mike McCandless)
+
======================= Lucene 7.0.0 =======================
New Features
@@ -140,6 +146,9 @@ Improvements
* LUCENE-7730: More accurate encoding of the length normalization factor
thanks to the removal of index-time boosts. (Adrien Grand)
+* LUCENE-7901: Original Highlighter now eagerly throws an exception if you
+ provide components that are null. (Jason Gerlowski, David Smiley)
+
Optimizations
* LUCENE-7416: BooleanQuery optimizes queries that have queries that occur both
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/common-build.xml
----------------------------------------------------------------------
diff --git a/lucene/common-build.xml b/lucene/common-build.xml
index dba69db..50a758b 100644
--- a/lucene/common-build.xml
+++ b/lucene/common-build.xml
@@ -1062,6 +1062,9 @@
<sysproperty key="tests.src.home" value="${user.dir}" />
+ <!-- replaces default random source to the nonblocking variant -->
+ <sysproperty key="java.security.egd" value="file:/dev/./urandom"/>
+
<!-- Only pass these to the test JVMs if defined in ANT. -->
<syspropertyset>
<propertyref prefix="tests.maxfailures" />
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
index 88e34f6..8526be6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@@ -30,8 +30,8 @@ import org.apache.lucene.index.EmptyDocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SegmentWriteState; // javadocs
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
index feb7725..80b4085 100644
--- a/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
@@ -79,7 +79,7 @@ public class SortedDocValuesField extends Field {
* alongside a range query that executes on points, such as
* {@link BinaryPoint#newRangeQuery}.
*/
- public static Query newRangeQuery(String field,
+ public static Query newSlowRangeQuery(String field,
BytesRef lowerValue, BytesRef upperValue,
boolean lowerInclusive, boolean upperInclusive) {
return new SortedSetDocValuesRangeQuery(field, lowerValue, upperValue, lowerInclusive, upperInclusive) {
@@ -98,7 +98,7 @@ public class SortedDocValuesField extends Field {
* alongside a range query that executes on points, such as
* {@link BinaryPoint#newExactQuery}.
*/
- public static Query newExactQuery(String field, BytesRef value) {
- return newRangeQuery(field, value, value, true, true);
+ public static Query newSlowExactQuery(String field, BytesRef value) {
+ return newSlowRangeQuery(field, value, value, true, true);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java
index 26b1907..13bb961 100644
--- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java
@@ -79,7 +79,7 @@ public class SortedSetDocValuesField extends Field {
* alongside a range query that executes on points, such as
* {@link BinaryPoint#newRangeQuery}.
*/
- public static Query newRangeQuery(String field,
+ public static Query newSlowRangeQuery(String field,
BytesRef lowerValue, BytesRef upperValue,
boolean lowerInclusive, boolean upperInclusive) {
return new SortedSetDocValuesRangeQuery(field, lowerValue, upperValue, lowerInclusive, upperInclusive) {
@@ -100,7 +100,7 @@ public class SortedSetDocValuesField extends Field {
* alongside a range query that executes on points, such as
* {@link BinaryPoint#newExactQuery}.
*/
- public static Query newExactQuery(String field, BytesRef value) {
- return newRangeQuery(field, value, value, true, true);
+ public static Query newSlowExactQuery(String field, BytesRef value) {
+ return newSlowRangeQuery(field, value, value, true, true);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
index 3cd796b..f5f5934 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
@@ -18,21 +18,10 @@ package org.apache.lucene.index;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
import java.util.List;
-import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
-import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.InPlaceMergeSorter;
-import org.apache.lucene.util.LongValues;
-import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
-import org.apache.lucene.util.packed.PackedLongValues;
/**
* A wrapper for CompositeIndexReader providing access to DocValues.
@@ -649,283 +638,6 @@ public class MultiDocValues {
}
}
- /** maps per-segment ordinals to/from global ordinal space */
- // TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it
- // TODO: use more efficient packed ints structures?
- // TODO: pull this out? it's pretty generic (maps between N ord()-enabled TermsEnums)
- public static class OrdinalMap implements Accountable {
-
- private static class SegmentMap implements Accountable {
- private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);
-
- /** Build a map from an index into a sorted view of `weights` to an index into `weights`. */
- private static int[] map(final long[] weights) {
- final int[] newToOld = new int[weights.length];
- for (int i = 0; i < weights.length; ++i) {
- newToOld[i] = i;
- }
- new InPlaceMergeSorter() {
- @Override
- protected void swap(int i, int j) {
- final int tmp = newToOld[i];
- newToOld[i] = newToOld[j];
- newToOld[j] = tmp;
- }
- @Override
- protected int compare(int i, int j) {
- // j first since we actually want higher weights first
- return Long.compare(weights[newToOld[j]], weights[newToOld[i]]);
- }
- }.sort(0, weights.length);
- return newToOld;
- }
-
- /** Inverse the map. */
- private static int[] inverse(int[] map) {
- final int[] inverse = new int[map.length];
- for (int i = 0; i < map.length; ++i) {
- inverse[map[i]] = i;
- }
- return inverse;
- }
-
- private final int[] newToOld, oldToNew;
-
- SegmentMap(long[] weights) {
- newToOld = map(weights);
- oldToNew = inverse(newToOld);
- assert Arrays.equals(newToOld, inverse(oldToNew));
- }
-
- int newToOld(int segment) {
- return newToOld[segment];
- }
-
- int oldToNew(int segment) {
- return oldToNew[segment];
- }
-
- @Override
- public long ramBytesUsed() {
- return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew);
- }
- }
-
- /**
- * Create an ordinal map that uses the number of unique values of each
- * {@link SortedDocValues} instance as a weight.
- * @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
- */
- public static OrdinalMap build(IndexReader.CacheKey owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException {
- final TermsEnum[] subs = new TermsEnum[values.length];
- final long[] weights = new long[values.length];
- for (int i = 0; i < values.length; ++i) {
- subs[i] = values[i].termsEnum();
- weights[i] = values[i].getValueCount();
- }
- return build(owner, subs, weights, acceptableOverheadRatio);
- }
-
- /**
- * Create an ordinal map that uses the number of unique values of each
- * {@link SortedSetDocValues} instance as a weight.
- * @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
- */
- public static OrdinalMap build(IndexReader.CacheKey owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException {
- final TermsEnum[] subs = new TermsEnum[values.length];
- final long[] weights = new long[values.length];
- for (int i = 0; i < values.length; ++i) {
- subs[i] = values[i].termsEnum();
- weights[i] = values[i].getValueCount();
- }
- return build(owner, subs, weights, acceptableOverheadRatio);
- }
-
- /**
- * Creates an ordinal map that allows mapping ords to/from a merged
- * space from <code>subs</code>.
- * @param owner a cache key
- * @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
- * not be dense (e.g. can be FilteredTermsEnums}.
- * @param weights a weight for each sub. This is ideally correlated with
- * the number of unique terms that each sub introduces compared
- * to the other subs
- * @throws IOException if an I/O error occurred.
- */
- public static OrdinalMap build(IndexReader.CacheKey owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException {
- if (subs.length != weights.length) {
- throw new IllegalArgumentException("subs and weights must have the same length");
- }
-
- // enums are not sorted, so let's sort to save memory
- final SegmentMap segmentMap = new SegmentMap(weights);
- return new OrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio);
- }
-
- private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class);
-
- /** Cache key of whoever asked for this awful thing */
- public final IndexReader.CacheKey owner;
- // globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
- final PackedLongValues globalOrdDeltas;
- // globalOrd -> first segment container
- final PackedLongValues firstSegments;
- // for every segment, segmentOrd -> globalOrd
- final LongValues segmentToGlobalOrds[];
- // the map from/to segment ids
- final SegmentMap segmentMap;
- // ram usage
- final long ramBytesUsed;
-
- OrdinalMap(IndexReader.CacheKey owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException {
- // create the ordinal mappings by pulling a termsenum over each sub's
- // unique terms, and walking a multitermsenum over those
- this.owner = owner;
- this.segmentMap = segmentMap;
- // even though we accept an overhead ratio, we keep these ones with COMPACT
- // since they are only used to resolve values given a global ord, which is
- // slow anyway
- PackedLongValues.Builder globalOrdDeltas = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
- PackedLongValues.Builder firstSegments = PackedLongValues.packedBuilder(PackedInts.COMPACT);
- final PackedLongValues.Builder[] ordDeltas = new PackedLongValues.Builder[subs.length];
- for (int i = 0; i < ordDeltas.length; i++) {
- ordDeltas[i] = PackedLongValues.monotonicBuilder(acceptableOverheadRatio);
- }
- long[] ordDeltaBits = new long[subs.length];
- long segmentOrds[] = new long[subs.length];
- ReaderSlice slices[] = new ReaderSlice[subs.length];
- TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
- for (int i = 0; i < slices.length; i++) {
- slices[i] = new ReaderSlice(0, 0, i);
- indexes[i] = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
- }
- MultiTermsEnum mte = new MultiTermsEnum(slices);
- mte.reset(indexes);
- long globalOrd = 0;
- while (mte.next() != null) {
- TermsEnumWithSlice matches[] = mte.getMatchArray();
- int firstSegmentIndex = Integer.MAX_VALUE;
- long globalOrdDelta = Long.MAX_VALUE;
- for (int i = 0; i < mte.getMatchCount(); i++) {
- int segmentIndex = matches[i].index;
- long segmentOrd = matches[i].terms.ord();
- long delta = globalOrd - segmentOrd;
- // We compute the least segment where the term occurs. In case the
- // first segment contains most (or better all) values, this will
- // help save significant memory
- if (segmentIndex < firstSegmentIndex) {
- firstSegmentIndex = segmentIndex;
- globalOrdDelta = delta;
- }
- // for each per-segment ord, map it back to the global term.
- while (segmentOrds[segmentIndex] <= segmentOrd) {
- ordDeltaBits[segmentIndex] |= delta;
- ordDeltas[segmentIndex].add(delta);
- segmentOrds[segmentIndex]++;
- }
- }
- // for each unique term, just mark the first segment index/delta where it occurs
- assert firstSegmentIndex < segmentOrds.length;
- firstSegments.add(firstSegmentIndex);
- globalOrdDeltas.add(globalOrdDelta);
- globalOrd++;
- }
- this.firstSegments = firstSegments.build();
- this.globalOrdDeltas = globalOrdDeltas.build();
- // ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
- segmentToGlobalOrds = new LongValues[subs.length];
- long ramBytesUsed = BASE_RAM_BYTES_USED + this.globalOrdDeltas.ramBytesUsed()
- + this.firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
- + segmentMap.ramBytesUsed();
- for (int i = 0; i < ordDeltas.length; ++i) {
- final PackedLongValues deltas = ordDeltas[i].build();
- if (ordDeltaBits[i] == 0L) {
- // segment ords perfectly match global ordinals
- // likely in case of low cardinalities and large segments
- segmentToGlobalOrds[i] = LongValues.IDENTITY;
- } else {
- final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
- final long monotonicBits = deltas.ramBytesUsed() * 8;
- final long packedBits = bitsRequired * deltas.size();
- if (deltas.size() <= Integer.MAX_VALUE
- && packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
- // monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
- final int size = (int) deltas.size();
- final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
- final PackedLongValues.Iterator it = deltas.iterator();
- for (int ord = 0; ord < size; ++ord) {
- newDeltas.set(ord, it.next());
- }
- assert !it.hasNext();
- segmentToGlobalOrds[i] = new LongValues() {
- @Override
- public long get(long ord) {
- return ord + newDeltas.get((int) ord);
- }
- };
- ramBytesUsed += newDeltas.ramBytesUsed();
- } else {
- segmentToGlobalOrds[i] = new LongValues() {
- @Override
- public long get(long ord) {
- return ord + deltas.get(ord);
- }
- };
- ramBytesUsed += deltas.ramBytesUsed();
- }
- ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
- }
- }
- this.ramBytesUsed = ramBytesUsed;
- }
-
- /**
- * Given a segment number, return a {@link LongValues} instance that maps
- * segment ordinals to global ordinals.
- */
- public LongValues getGlobalOrds(int segmentIndex) {
- return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)];
- }
-
- /**
- * Given global ordinal, returns the ordinal of the first segment which contains
- * this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
- */
- public long getFirstSegmentOrd(long globalOrd) {
- return globalOrd - globalOrdDeltas.get(globalOrd);
- }
-
- /**
- * Given a global ordinal, returns the index of the first
- * segment that contains this term.
- */
- public int getFirstSegmentNumber(long globalOrd) {
- return segmentMap.newToOld((int) firstSegments.get(globalOrd));
- }
-
- /**
- * Returns the total number of unique terms in global ord space.
- */
- public long getValueCount() {
- return globalOrdDeltas.size();
- }
-
- @Override
- public long ramBytesUsed() {
- return ramBytesUsed;
- }
-
- @Override
- public Collection<Accountable> getChildResources() {
- List<Accountable> resources = new ArrayList<>();
- resources.add(Accountables.namedAccountable("global ord deltas", globalOrdDeltas));
- resources.add(Accountables.namedAccountable("first segments", firstSegments));
- resources.add(Accountables.namedAccountable("segment map", segmentMap));
- // TODO: would be nice to return actual child segment deltas too, but the optimizations are confusing
- return resources;
- }
- }
-
/**
* Implements SortedDocValues over n subs, using an OrdinalMap
* @lucene.internal
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
index 630b65c..b484228 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
@@ -166,7 +166,7 @@ final class MultiSorter {
final SortedDocValues sorted = Sorter.getOrWrapSorted(readers.get(i), sortField);
values[i] = sorted;
}
- MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
+ OrdinalMap ordinalMap = OrdinalMap.build(null, values, PackedInts.DEFAULT);
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
index ac6887f..51f4958 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
@@ -302,6 +302,8 @@ public final class MultiTermsEnum extends TermsEnum {
// gather equal top fields
if (queue.size() > 0) {
+ // TODO: we could maybe defer this somewhat costly operation until one of the APIs that
+ // needs to see the top is invoked (docFreq, postings, etc.)
pullTop();
} else {
current = null;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java b/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java
new file mode 100644
index 0000000..bbb643f
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java
@@ -0,0 +1,368 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Accountables;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.InPlaceMergeSorter;
+import org.apache.lucene.util.LongValues;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
+
+/** Maps per-segment ordinals to/from global ordinal space, using a compact packed-ints representation.
+ *
+ * <p><b>NOTE</b>: this is a costly operation, as it must merge sort all terms, and may require non-trivial RAM once done. It's better to operate in
+ * segment-private ordinal space instead when possible.
+ *
+ * @lucene.internal */
+public class OrdinalMap implements Accountable {
+ // TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it
+ // TODO: use more efficient packed ints structures?
+
+ private static class TermsEnumIndex {
+ public final static TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
+ final int subIndex;
+ final TermsEnum termsEnum;
+ BytesRef currentTerm;
+
+ public TermsEnumIndex(TermsEnum termsEnum, int subIndex) {
+ this.termsEnum = termsEnum;
+ this.subIndex = subIndex;
+ }
+
+ public BytesRef next() throws IOException {
+ currentTerm = termsEnum.next();
+ return currentTerm;
+ }
+ }
+
+ private static class SegmentMap implements Accountable {
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);
+
+ /** Build a map from an index into a sorted view of `weights` to an index into `weights`. */
+ private static int[] map(final long[] weights) {
+ final int[] newToOld = new int[weights.length];
+ for (int i = 0; i < weights.length; ++i) {
+ newToOld[i] = i;
+ }
+ new InPlaceMergeSorter() {
+ @Override
+ protected void swap(int i, int j) {
+ final int tmp = newToOld[i];
+ newToOld[i] = newToOld[j];
+ newToOld[j] = tmp;
+ }
+ @Override
+ protected int compare(int i, int j) {
+ // j first since we actually want higher weights first
+ return Long.compare(weights[newToOld[j]], weights[newToOld[i]]);
+ }
+ }.sort(0, weights.length);
+ return newToOld;
+ }
+
+ /** Inverse the map. */
+ private static int[] inverse(int[] map) {
+ final int[] inverse = new int[map.length];
+ for (int i = 0; i < map.length; ++i) {
+ inverse[map[i]] = i;
+ }
+ return inverse;
+ }
+
+ private final int[] newToOld, oldToNew;
+
+ SegmentMap(long[] weights) {
+ newToOld = map(weights);
+ oldToNew = inverse(newToOld);
+ assert Arrays.equals(newToOld, inverse(oldToNew));
+ }
+
+ int newToOld(int segment) {
+ return newToOld[segment];
+ }
+
+ int oldToNew(int segment) {
+ return oldToNew[segment];
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew);
+ }
+ }
+
+ /**
+ * Create an ordinal map that uses the number of unique values of each
+ * {@link SortedDocValues} instance as a weight.
+ * @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
+ */
+ public static OrdinalMap build(IndexReader.CacheKey owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException {
+ final TermsEnum[] subs = new TermsEnum[values.length];
+ final long[] weights = new long[values.length];
+ for (int i = 0; i < values.length; ++i) {
+ subs[i] = values[i].termsEnum();
+ weights[i] = values[i].getValueCount();
+ }
+ return build(owner, subs, weights, acceptableOverheadRatio);
+ }
+
+ /**
+ * Create an ordinal map that uses the number of unique values of each
+ * {@link SortedSetDocValues} instance as a weight.
+ * @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
+ */
+ public static OrdinalMap build(IndexReader.CacheKey owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException {
+ final TermsEnum[] subs = new TermsEnum[values.length];
+ final long[] weights = new long[values.length];
+ for (int i = 0; i < values.length; ++i) {
+ subs[i] = values[i].termsEnum();
+ weights[i] = values[i].getValueCount();
+ }
+ return build(owner, subs, weights, acceptableOverheadRatio);
+ }
+
+ /**
+ * Creates an ordinal map that allows mapping ords to/from a merged
+ * space from <code>subs</code>.
+ * @param owner a cache key
+ * @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
+ * not be dense (e.g. can be FilteredTermsEnums}.
+ * @param weights a weight for each sub. This is ideally correlated with
+ * the number of unique terms that each sub introduces compared
+ * to the other subs
+ * @throws IOException if an I/O error occurred.
+ */
+ public static OrdinalMap build(IndexReader.CacheKey owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException {
+ if (subs.length != weights.length) {
+ throw new IllegalArgumentException("subs and weights must have the same length");
+ }
+
+ // enums are not sorted, so let's sort to save memory
+ final SegmentMap segmentMap = new SegmentMap(weights);
+ return new OrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio);
+ }
+
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class);
+
+ /** Cache key of whoever asked for this awful thing */
+ public final IndexReader.CacheKey owner;
+ // globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
+ final PackedLongValues globalOrdDeltas;
+ // globalOrd -> first segment container
+ final PackedLongValues firstSegments;
+ // for every segment, segmentOrd -> globalOrd
+ final LongValues segmentToGlobalOrds[];
+ // the map from/to segment ids
+ final SegmentMap segmentMap;
+ // ram usage
+ final long ramBytesUsed;
+
+ OrdinalMap(IndexReader.CacheKey owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException {
+ // create the ordinal mappings by pulling a termsenum over each sub's
+ // unique terms, and walking a multitermsenum over those
+ this.owner = owner;
+ this.segmentMap = segmentMap;
+ // even though we accept an overhead ratio, we keep these ones with COMPACT
+ // since they are only used to resolve values given a global ord, which is
+ // slow anyway
+ PackedLongValues.Builder globalOrdDeltas = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
+ PackedLongValues.Builder firstSegments = PackedLongValues.packedBuilder(PackedInts.COMPACT);
+ final PackedLongValues.Builder[] ordDeltas = new PackedLongValues.Builder[subs.length];
+ for (int i = 0; i < ordDeltas.length; i++) {
+ ordDeltas[i] = PackedLongValues.monotonicBuilder(acceptableOverheadRatio);
+ }
+ long[] ordDeltaBits = new long[subs.length];
+ long[] segmentOrds = new long[subs.length];
+
+ // Just merge-sorts by term:
+ PriorityQueue<TermsEnumIndex> queue = new PriorityQueue<TermsEnumIndex>(subs.length) {
+ @Override
+ protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) {
+ return a.currentTerm.compareTo(b.currentTerm) < 0;
+ }
+ };
+
+ for (int i = 0; i < subs.length; i++) {
+ TermsEnumIndex sub = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
+ if (sub.next() != null) {
+ queue.add(sub);
+ }
+ }
+
+ BytesRefBuilder scratch = new BytesRefBuilder();
+
+ long globalOrd = 0;
+ while (queue.size() != 0) {
+ TermsEnumIndex top = queue.top();
+ scratch.copyBytes(top.currentTerm);
+
+ int firstSegmentIndex = Integer.MAX_VALUE;
+ long globalOrdDelta = Long.MAX_VALUE;
+
+ // Advance past this term, recording the per-segment ord deltas:
+ while (true) {
+ top = queue.top();
+ long segmentOrd = top.termsEnum.ord();
+ long delta = globalOrd - segmentOrd;
+ int segmentIndex = top.subIndex;
+ // We compute the least segment where the term occurs. In case the
+ // first segment contains most (or better all) values, this will
+ // help save significant memory
+ if (segmentIndex < firstSegmentIndex) {
+ firstSegmentIndex = segmentIndex;
+ globalOrdDelta = delta;
+ }
+ ordDeltaBits[segmentIndex] |= delta;
+
+ // for each per-segment ord, map it back to the global term; the while loop is needed
+ // in case the incoming TermsEnums don't have compact ordinals (some ordinal values
+ // are skipped), which can happen e.g. with a FilteredTermsEnum:
+ assert segmentOrds[segmentIndex] <= segmentOrd;
+
+ // TODO: we could specialize this case (the while loop is not needed when the ords
+ // are compact)
+ do {
+ ordDeltas[segmentIndex].add(delta);
+ segmentOrds[segmentIndex]++;
+ } while (segmentOrds[segmentIndex] <= segmentOrd);
+
+ if (top.next() == null) {
+ queue.pop();
+ if (queue.size() == 0) {
+ break;
+ }
+ } else {
+ queue.updateTop();
+ }
+ if (queue.top().currentTerm.equals(scratch.get()) == false) {
+ break;
+ }
+ }
+
+ // for each unique term, just mark the first segment index/delta where it occurs
+ firstSegments.add(firstSegmentIndex);
+ globalOrdDeltas.add(globalOrdDelta);
+ globalOrd++;
+ }
+
+ this.firstSegments = firstSegments.build();
+ this.globalOrdDeltas = globalOrdDeltas.build();
+ // ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
+ segmentToGlobalOrds = new LongValues[subs.length];
+ long ramBytesUsed = BASE_RAM_BYTES_USED + this.globalOrdDeltas.ramBytesUsed()
+ + this.firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
+ + segmentMap.ramBytesUsed();
+ for (int i = 0; i < ordDeltas.length; ++i) {
+ final PackedLongValues deltas = ordDeltas[i].build();
+ if (ordDeltaBits[i] == 0L) {
+ // segment ords perfectly match global ordinals
+ // likely in case of low cardinalities and large segments
+ segmentToGlobalOrds[i] = LongValues.IDENTITY;
+ } else {
+ final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
+ final long monotonicBits = deltas.ramBytesUsed() * 8;
+ final long packedBits = bitsRequired * deltas.size();
+ if (deltas.size() <= Integer.MAX_VALUE
+ && packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
+ // monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
+ final int size = (int) deltas.size();
+ final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
+ final PackedLongValues.Iterator it = deltas.iterator();
+ for (int ord = 0; ord < size; ++ord) {
+ newDeltas.set(ord, it.next());
+ }
+ assert it.hasNext() == false;
+ segmentToGlobalOrds[i] = new LongValues() {
+ @Override
+ public long get(long ord) {
+ return ord + newDeltas.get((int) ord);
+ }
+ };
+ ramBytesUsed += newDeltas.ramBytesUsed();
+ } else {
+ segmentToGlobalOrds[i] = new LongValues() {
+ @Override
+ public long get(long ord) {
+ return ord + deltas.get(ord);
+ }
+ };
+ ramBytesUsed += deltas.ramBytesUsed();
+ }
+ ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
+ }
+ }
+ this.ramBytesUsed = ramBytesUsed;
+ }
+
+ /**
+ * Given a segment number, return a {@link LongValues} instance that maps
+ * segment ordinals to global ordinals.
+ */
+ public LongValues getGlobalOrds(int segmentIndex) {
+ return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)];
+ }
+
+ /**
+ * Given global ordinal, returns the ordinal of the first segment which contains
+ * this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
+ */
+ public long getFirstSegmentOrd(long globalOrd) {
+ return globalOrd - globalOrdDeltas.get(globalOrd);
+ }
+
+ /**
+ * Given a global ordinal, returns the index of the first
+ * segment that contains this term.
+ */
+ public int getFirstSegmentNumber(long globalOrd) {
+ return segmentMap.newToOld((int) firstSegments.get(globalOrd));
+ }
+
+ /**
+ * Returns the total number of unique terms in global ord space.
+ */
+ public long getValueCount() {
+ return globalOrdDeltas.size();
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return ramBytesUsed;
+ }
+
+ @Override
+ public Collection<Accountable> getChildResources() {
+ List<Accountable> resources = new ArrayList<>();
+ resources.add(Accountables.namedAccountable("global ord deltas", globalOrdDeltas));
+ resources.add(Accountables.namedAccountable("first segments", firstSegments));
+ resources.add(Accountables.namedAccountable("segment map", segmentMap));
+ // TODO: would be nice to return actual child segment deltas too, but the optimizations are confusing
+ return resources;
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java b/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java
index 7c6ed8d..86bcdef 100644
--- a/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java
@@ -608,7 +608,6 @@ public class OfflineSorter {
int count = 0;
while ((spare = iter.next()) != null) {
- assert spare.length <= Short.MAX_VALUE;
out.write(spare);
count++;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java b/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java
index 921102d6..6120985 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
-import org.apache.lucene.index.MultiDocValues.OrdinalMap;
+import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesQueries.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesQueries.java
index 0591278..43fae3e 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesQueries.java
@@ -141,12 +141,12 @@ public class TestDocValuesQueries extends LuceneTestCase {
final Query q1 = LongPoint.newRangeQuery("idx", min, max);
final Query q2;
if (sortedSet) {
- q2 = SortedSetDocValuesField.newRangeQuery("dv",
+ q2 = SortedSetDocValuesField.newSlowRangeQuery("dv",
min == Long.MIN_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMin),
max == Long.MAX_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMax),
includeMin, includeMax);
} else {
- q2 = SortedDocValuesField.newRangeQuery("dv",
+ q2 = SortedDocValuesField.newSlowRangeQuery("dv",
min == Long.MIN_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMin),
max == Long.MAX_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMax),
includeMin, includeMax);
@@ -191,11 +191,11 @@ public class TestDocValuesQueries extends LuceneTestCase {
QueryUtils.checkUnequal(q1, SortedNumericDocValuesField.newSlowRangeQuery("foo", 4, 5));
QueryUtils.checkUnequal(q1, SortedNumericDocValuesField.newSlowRangeQuery("bar", 3, 5));
- Query q2 = SortedSetDocValuesField.newRangeQuery("foo", new BytesRef("bar"), new BytesRef("baz"), true, true);
- QueryUtils.checkEqual(q2, SortedSetDocValuesField.newRangeQuery("foo", new BytesRef("bar"), new BytesRef("baz"), true, true));
- QueryUtils.checkUnequal(q2, SortedSetDocValuesField.newRangeQuery("foo", new BytesRef("baz"), new BytesRef("baz"), true, true));
- QueryUtils.checkUnequal(q2, SortedSetDocValuesField.newRangeQuery("foo", new BytesRef("bar"), new BytesRef("bar"), true, true));
- QueryUtils.checkUnequal(q2, SortedSetDocValuesField.newRangeQuery("quux", new BytesRef("bar"), new BytesRef("baz"), true, true));
+ Query q2 = SortedSetDocValuesField.newSlowRangeQuery("foo", new BytesRef("bar"), new BytesRef("baz"), true, true);
+ QueryUtils.checkEqual(q2, SortedSetDocValuesField.newSlowRangeQuery("foo", new BytesRef("bar"), new BytesRef("baz"), true, true));
+ QueryUtils.checkUnequal(q2, SortedSetDocValuesField.newSlowRangeQuery("foo", new BytesRef("baz"), new BytesRef("baz"), true, true));
+ QueryUtils.checkUnequal(q2, SortedSetDocValuesField.newSlowRangeQuery("foo", new BytesRef("bar"), new BytesRef("bar"), true, true));
+ QueryUtils.checkUnequal(q2, SortedSetDocValuesField.newSlowRangeQuery("quux", new BytesRef("bar"), new BytesRef("baz"), true, true));
}
public void testToString() {
@@ -204,15 +204,15 @@ public class TestDocValuesQueries extends LuceneTestCase {
assertEquals("[3 TO 5]", q1.toString("foo"));
assertEquals("foo:[3 TO 5]", q1.toString("bar"));
- Query q2 = SortedSetDocValuesField.newRangeQuery("foo", new BytesRef("bar"), new BytesRef("baz"), true, true);
+ Query q2 = SortedSetDocValuesField.newSlowRangeQuery("foo", new BytesRef("bar"), new BytesRef("baz"), true, true);
assertEquals("foo:[[62 61 72] TO [62 61 7a]]", q2.toString());
- q2 = SortedSetDocValuesField.newRangeQuery("foo", new BytesRef("bar"), new BytesRef("baz"), false, true);
+ q2 = SortedSetDocValuesField.newSlowRangeQuery("foo", new BytesRef("bar"), new BytesRef("baz"), false, true);
assertEquals("foo:{[62 61 72] TO [62 61 7a]]", q2.toString());
- q2 = SortedSetDocValuesField.newRangeQuery("foo", new BytesRef("bar"), new BytesRef("baz"), false, false);
+ q2 = SortedSetDocValuesField.newSlowRangeQuery("foo", new BytesRef("bar"), new BytesRef("baz"), false, false);
assertEquals("foo:{[62 61 72] TO [62 61 7a]}", q2.toString());
- q2 = SortedSetDocValuesField.newRangeQuery("foo", new BytesRef("bar"), null, true, true);
+ q2 = SortedSetDocValuesField.newSlowRangeQuery("foo", new BytesRef("bar"), null, true, true);
assertEquals("foo:[[62 61 72] TO *}", q2.toString());
- q2 = SortedSetDocValuesField.newRangeQuery("foo", null, new BytesRef("baz"), true, true);
+ q2 = SortedSetDocValuesField.newSlowRangeQuery("foo", null, new BytesRef("baz"), true, true);
assertEquals("foo:{* TO [62 61 7a]]", q2.toString());
assertEquals("{* TO [62 61 7a]]", q2.toString("foo"));
assertEquals("foo:{* TO [62 61 7a]]", q2.toString("bar"));
@@ -228,8 +228,8 @@ public class TestDocValuesQueries extends LuceneTestCase {
for (Query query : Arrays.asList(
NumericDocValuesField.newSlowRangeQuery("foo", 2, 4),
SortedNumericDocValuesField.newSlowRangeQuery("foo", 2, 4),
- SortedDocValuesField.newRangeQuery("foo", new BytesRef("abc"), new BytesRef("bcd"), random().nextBoolean(), random().nextBoolean()),
- SortedSetDocValuesField.newRangeQuery("foo", new BytesRef("abc"), new BytesRef("bcd"), random().nextBoolean(), random().nextBoolean()))) {
+ SortedDocValuesField.newSlowRangeQuery("foo", new BytesRef("abc"), new BytesRef("bcd"), random().nextBoolean(), random().nextBoolean()),
+ SortedSetDocValuesField.newSlowRangeQuery("foo", new BytesRef("abc"), new BytesRef("bcd"), random().nextBoolean(), random().nextBoolean()))) {
Weight w = searcher.createNormalizedWeight(query, random().nextBoolean());
assertNull(w.scorer(searcher.getIndexReader().leaves().get(0)));
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java
index bcb6acf..4e4a01c 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java
@@ -42,6 +42,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.ConjunctionDISI;
@@ -152,10 +153,10 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
private class CountOneSegment implements Callable<Void> {
final LeafReader leafReader;
final MatchingDocs hits;
- final MultiDocValues.OrdinalMap ordinalMap;
+ final OrdinalMap ordinalMap;
final int segOrd;
- public CountOneSegment(LeafReader leafReader, MatchingDocs hits, MultiDocValues.OrdinalMap ordinalMap, int segOrd) {
+ public CountOneSegment(LeafReader leafReader, MatchingDocs hits, OrdinalMap ordinalMap, int segOrd) {
this.leafReader = leafReader;
this.hits = hits;
this.ordinalMap = ordinalMap;
@@ -240,7 +241,7 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
/** Does all the "real work" of tallying up the counts. */
private final void count(List<MatchingDocs> matchingDocs) throws IOException, InterruptedException {
- MultiDocValues.OrdinalMap ordinalMap;
+ OrdinalMap ordinalMap;
// TODO: is this right? really, we need a way to
// verify that this ordinalMap "matches" the leaves in
@@ -281,7 +282,7 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
private final void countAll() throws IOException, InterruptedException {
//System.out.println("ssdv count");
- MultiDocValues.OrdinalMap ordinalMap;
+ OrdinalMap ordinalMap;
// TODO: is this right? really, we need a way to
// verify that this ordinalMap "matches" the leaves in
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java
index 832ff3b..a3098ba 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java
@@ -31,8 +31,8 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
-import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d00e53b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
index 2198fc0..6df4334 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java
@@ -37,6 +37,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.ConjunctionDISI;
@@ -155,7 +156,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
return new FacetResult(dim, new String[0], dimCount, labelValues, childCount);
}
- private void countOneSegment(MultiDocValues.OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits) throws IOException {
+ private void countOneSegment(OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits) throws IOException {
SortedSetDocValues segValues = reader.getSortedSetDocValues(field);
if (segValues == null) {
// nothing to count
@@ -236,7 +237,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
//System.out.println("ssdv count");
- MultiDocValues.OrdinalMap ordinalMap;
+ OrdinalMap ordinalMap;
// TODO: is this right? really, we need a way to
// verify that this ordinalMap "matches" the leaves in
@@ -267,7 +268,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
private final void countAll() throws IOException {
//System.out.println("ssdv count");
- MultiDocValues.OrdinalMap ordinalMap;
+ OrdinalMap ordinalMap;
// TODO: is this right? really, we need a way to
// verify that this ordinalMap "matches" the leaves in