You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/12/27 10:17:53 UTC

[lucene] branch branch_9x updated (5f1bd97d15e -> 08782710435)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a change to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


    from 5f1bd97d15e Aggressive `count` in BooleanWeight (#12017)
     new 97b108a32f5 Avoid sorting values of multi-valued writers if there is a single value. (#12039)
     new 33a5b31ce4c Replace JIRA release instructions with GitHub. (#11968)
     new 5075126f76b Tune the amount of memory that is allocated to sorting postings upon flushing. (#12011)
     new 08782710435 Optimize flush of doc-value fields that are effectively single-valued when an index sort is configured. (#12037)

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 dev-tools/scripts/releaseWizard.yaml               | 75 +++++++-------------
 lucene/CHANGES.txt                                 |  4 ++
 .../apache/lucene/index/FreqProxTermsWriter.java   | 28 +++++---
 .../lucene/index/NumericDocValuesWriter.java       | 45 +++++++-----
 .../apache/lucene/index/SortedDocValuesWriter.java | 80 ++++++++++++----------
 .../lucene/index/SortedNumericDocValuesWriter.java | 19 ++++-
 .../lucene/index/SortedSetDocValuesWriter.java     | 57 ++++++++-------
 7 files changed, 170 insertions(+), 138 deletions(-)


[lucene] 01/04: Avoid sorting values of multi-valued writers if there is a single value. (#12039)

Posted by jp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit 97b108a32f53efbe27c6618880e6ed0ef8c7bf47
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue Dec 27 11:03:06 2022 +0100

    Avoid sorting values of multi-valued writers if there is a single value. (#12039)
    
    They currently call `Arrays#sort`, which incurs a tiny bit of overhead due to
    range checks and some logic to determine the optimal sorting algorithm to use
    depending on the number of values. We can skip this overhead in the case when
    there is a single value.
---
 .../java/org/apache/lucene/index/SortedNumericDocValuesWriter.java    | 4 +++-
 .../src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java    | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
index 66a28374c3d..07dd60b5ec5 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
@@ -72,7 +72,9 @@ class SortedNumericDocValuesWriter extends DocValuesWriter<SortedNumericDocValue
     if (currentDoc == -1) {
       return;
     }
-    Arrays.sort(currentValues, 0, currentUpto);
+    if (currentUpto > 1) {
+      Arrays.sort(currentValues, 0, currentUpto);
+    }
     for (int i = 0; i < currentUpto; i++) {
       pending.add(currentValues[i]);
     }
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
index 7b4ab53dfa2..b1676f84dfd 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
@@ -102,7 +102,9 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
     if (currentDoc == -1) {
       return;
     }
-    Arrays.sort(currentValues, 0, currentUpto);
+    if (currentUpto > 1) {
+      Arrays.sort(currentValues, 0, currentUpto);
+    }
     int lastValue = -1;
     int count = 0;
     for (int i = 0; i < currentUpto; i++) {


[lucene] 04/04: Optimize flush of doc-value fields that are effectively single-valued when an index sort is configured. (#12037)

Posted by jp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit 08782710435618f15825f777ae2a5bee9b6f681a
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue Dec 27 11:12:56 2022 +0100

    Optimize flush of doc-value fields that are effectively single-valued when an index sort is configured. (#12037)
    
    This iterates on #399 to also optimize the case when an index sort is
    configured. When cutting over the NYC taxis benchmark to the new numeric
    fields,
    [flush times](http://people.apache.org/~mikemccand/lucenebench/sparseResults.html#flush_times)
    stayed mostly the same when index sorting is disabled and increased by 7-8%
    when index sorting is enabled. I expect this change to address this slowdown.
---
 .../lucene/index/NumericDocValuesWriter.java       | 45 +++++++-----
 .../apache/lucene/index/SortedDocValuesWriter.java | 80 ++++++++++++----------
 .../lucene/index/SortedNumericDocValuesWriter.java | 15 ++++
 .../lucene/index/SortedSetDocValuesWriter.java     | 53 +++++++-------
 4 files changed, 114 insertions(+), 79 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
index c760e7f3b0b..f4501108643 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
@@ -20,6 +20,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
 
 import java.io.IOException;
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.Counter;
@@ -99,30 +100,38 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
     if (finalValues == null) {
       finalValues = pending.build();
     }
+
+    dvConsumer.addNumericField(
+        fieldInfo, getDocValuesProducer(fieldInfo, finalValues, docsWithField, sortMap));
+  }
+
+  static DocValuesProducer getDocValuesProducer(
+      FieldInfo writerFieldInfo,
+      PackedLongValues values,
+      DocsWithFieldSet docsWithField,
+      Sorter.DocMap sortMap)
+      throws IOException {
     final NumericDVs sorted;
     if (sortMap != null) {
-      NumericDocValues oldValues =
-          new BufferedNumericDocValues(finalValues, docsWithField.iterator());
-      sorted = sortDocValues(state.segmentInfo.maxDoc(), sortMap, oldValues);
+      NumericDocValues oldValues = new BufferedNumericDocValues(values, docsWithField.iterator());
+      sorted = sortDocValues(sortMap.size(), sortMap, oldValues);
     } else {
       sorted = null;
     }
 
-    dvConsumer.addNumericField(
-        fieldInfo,
-        new EmptyDocValuesProducer() {
-          @Override
-          public NumericDocValues getNumeric(FieldInfo fieldInfo) {
-            if (fieldInfo != NumericDocValuesWriter.this.fieldInfo) {
-              throw new IllegalArgumentException("wrong fieldInfo");
-            }
-            if (sorted == null) {
-              return new BufferedNumericDocValues(finalValues, docsWithField.iterator());
-            } else {
-              return new SortingNumericDocValues(sorted);
-            }
-          }
-        });
+    return new EmptyDocValuesProducer() {
+      @Override
+      public NumericDocValues getNumeric(FieldInfo fieldInfo) {
+        if (fieldInfo != writerFieldInfo) {
+          throw new IllegalArgumentException("wrong fieldInfo");
+        }
+        if (sorted == null) {
+          return new BufferedNumericDocValues(values, docsWithField.iterator());
+        } else {
+          return new SortingNumericDocValues(sorted);
+        }
+      }
+    };
   }
 
   // iterates over the values we have in ram
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
index ce7b8dccf20..14805858eef 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
@@ -22,6 +22,7 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
 import java.io.IOException;
 import java.util.Arrays;
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
@@ -109,24 +110,28 @@ class SortedDocValuesWriter extends DocValuesWriter<SortedDocValues> {
     bytesUsed = newBytesUsed;
   }
 
-  @Override
-  SortedDocValues getDocValues() {
-    int valueCount = hash.size();
+  private void finish() {
     if (finalSortedValues == null) {
+      int valueCount = hash.size();
       updateBytesUsed();
       assert finalOrdMap == null && finalOrds == null;
       finalSortedValues = hash.sort();
       finalOrds = pending.build();
       finalOrdMap = new int[valueCount];
+      for (int ord = 0; ord < valueCount; ord++) {
+        finalOrdMap[finalSortedValues[ord]] = ord;
+      }
     }
-    for (int ord = 0; ord < valueCount; ord++) {
-      finalOrdMap[finalSortedValues[ord]] = ord;
-    }
+  }
+
+  @Override
+  SortedDocValues getDocValues() {
+    finish();
     return new BufferedSortedDocValues(
         hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator());
   }
 
-  private int[] sortDocValues(int maxDoc, Sorter.DocMap sortMap, SortedDocValues oldValues)
+  private static int[] sortDocValues(int maxDoc, Sorter.DocMap sortMap, SortedDocValues oldValues)
       throws IOException {
     int[] ords = new int[maxDoc];
     Arrays.fill(ords, -1);
@@ -141,45 +146,48 @@ class SortedDocValuesWriter extends DocValuesWriter<SortedDocValues> {
   @Override
   public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer)
       throws IOException {
-    final int valueCount = hash.size();
-    if (finalOrds == null) {
-      updateBytesUsed();
-      finalSortedValues = hash.sort();
-      finalOrds = pending.build();
-      finalOrdMap = new int[valueCount];
-      for (int ord = 0; ord < valueCount; ord++) {
-        finalOrdMap[finalSortedValues[ord]] = ord;
-      }
-    }
+    finish();
+
+    dvConsumer.addSortedField(
+        fieldInfo,
+        getDocValuesProducer(
+            fieldInfo, hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField, sortMap));
+  }
 
+  static DocValuesProducer getDocValuesProducer(
+      FieldInfo writerFieldInfo,
+      BytesRefHash hash,
+      PackedLongValues ords,
+      int[] sortedValues,
+      int[] ordMap,
+      DocsWithFieldSet docsWithField,
+      Sorter.DocMap sortMap)
+      throws IOException {
     final int[] sorted;
     if (sortMap != null) {
       sorted =
           sortDocValues(
-              state.segmentInfo.maxDoc(),
+              sortMap.size(),
               sortMap,
               new BufferedSortedDocValues(
-                  hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator()));
+                  hash, ords, sortedValues, ordMap, docsWithField.iterator()));
     } else {
       sorted = null;
     }
-    dvConsumer.addSortedField(
-        fieldInfo,
-        new EmptyDocValuesProducer() {
-          @Override
-          public SortedDocValues getSorted(FieldInfo fieldInfoIn) {
-            if (fieldInfoIn != fieldInfo) {
-              throw new IllegalArgumentException("wrong fieldInfo");
-            }
-            final SortedDocValues buf =
-                new BufferedSortedDocValues(
-                    hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator());
-            if (sorted == null) {
-              return buf;
-            }
-            return new SortingSortedDocValues(buf, sorted);
-          }
-        });
+    return new EmptyDocValuesProducer() {
+      @Override
+      public SortedDocValues getSorted(FieldInfo fieldInfoIn) {
+        if (fieldInfoIn != writerFieldInfo) {
+          throw new IllegalArgumentException("wrong fieldInfo");
+        }
+        final SortedDocValues buf =
+            new BufferedSortedDocValues(hash, ords, sortedValues, ordMap, docsWithField.iterator());
+        if (sorted == null) {
+          return buf;
+        }
+        return new SortingSortedDocValues(buf, sorted);
+      }
+    };
   }
 
   static class BufferedSortedDocValues extends SortedDocValues {
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
index 07dd60b5ec5..6bc1d97bc97 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
@@ -21,6 +21,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
 import java.io.IOException;
 import java.util.Arrays;
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.index.NumericDocValuesWriter.BufferedNumericDocValues;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.ArrayUtil;
@@ -175,6 +176,20 @@ class SortedNumericDocValuesWriter extends DocValuesWriter<SortedNumericDocValue
       valueCounts = finalValuesCount;
     }
 
+    if (valueCounts == null) {
+      DocValuesProducer singleValueProducer =
+          NumericDocValuesWriter.getDocValuesProducer(fieldInfo, values, docsWithField, sortMap);
+      dvConsumer.addSortedNumericField(
+          fieldInfo,
+          new EmptyDocValuesProducer() {
+            @Override
+            public SortedNumericDocValues getSortedNumeric(FieldInfo fieldInfo) throws IOException {
+              return DocValues.singleton(singleValueProducer.getNumeric(fieldInfo));
+            }
+          });
+      return;
+    }
+
     final LongValues sorted;
     if (sortMap != null) {
       sorted =
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
index b1676f84dfd..de1611ece32 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
@@ -22,6 +22,7 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
 import java.io.IOException;
 import java.util.Arrays;
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.index.SortedDocValuesWriter.BufferedSortedDocValues;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.ArrayUtil;
@@ -162,8 +163,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
     bytesUsed = newBytesUsed;
   }
 
-  @Override
-  SortedSetDocValues getDocValues() {
+  private void finish() {
     if (finalOrds == null) {
       assert finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null;
       finishCurrentDoc();
@@ -172,10 +172,15 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
       finalOrdCounts = pendingCounts == null ? null : pendingCounts.build();
       finalSortedValues = hash.sort();
       finalOrdMap = new int[valueCount];
+      for (int ord = 0; ord < finalOrdMap.length; ord++) {
+        finalOrdMap[finalSortedValues[ord]] = ord;
+      }
     }
-    for (int ord = 0; ord < finalOrdMap.length; ord++) {
-      finalOrdMap[finalSortedValues[ord]] = ord;
-    }
+  }
+
+  @Override
+  SortedSetDocValues getDocValues() {
+    finish();
     return getValues(
         finalSortedValues, finalOrdMap, hash, finalOrds, finalOrdCounts, maxCount, docsWithField);
   }
@@ -200,27 +205,25 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
   @Override
   public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer)
       throws IOException {
-    final int valueCount = hash.size();
-    final PackedLongValues ords;
-    final PackedLongValues ordCounts;
-    final int[] sortedValues;
-    final int[] ordMap;
+    finish();
+    final PackedLongValues ords = finalOrds;
+    final PackedLongValues ordCounts = finalOrdCounts;
+    final int[] sortedValues = finalSortedValues;
+    final int[] ordMap = finalOrdMap;
 
-    if (finalOrds == null) {
-      assert finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null;
-      finishCurrentDoc();
-      ords = pending.build();
-      ordCounts = pendingCounts == null ? null : pendingCounts.build();
-      sortedValues = hash.sort();
-      ordMap = new int[valueCount];
-      for (int ord = 0; ord < valueCount; ord++) {
-        ordMap[sortedValues[ord]] = ord;
-      }
-    } else {
-      ords = finalOrds;
-      ordCounts = finalOrdCounts;
-      sortedValues = finalSortedValues;
-      ordMap = finalOrdMap;
+    if (ordCounts == null) {
+      DocValuesProducer singleValueProducer =
+          SortedDocValuesWriter.getDocValuesProducer(
+              fieldInfo, hash, ords, sortedValues, ordMap, docsWithField, sortMap);
+      dvConsumer.addSortedSetField(
+          fieldInfo,
+          new EmptyDocValuesProducer() {
+            @Override
+            public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
+              return DocValues.singleton(singleValueProducer.getSorted(fieldInfo));
+            }
+          });
+      return;
     }
 
     final DocOrds docOrds;


[lucene] 02/04: Replace JIRA release instructions with GitHub. (#11968)

Posted by jp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit 33a5b31ce4cc03281ceee3d3512fa4dedcbc7a9d
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue Dec 27 11:08:46 2022 +0100

    Replace JIRA release instructions with GitHub. (#11968)
---
 dev-tools/scripts/releaseWizard.yaml | 75 +++++++++++++-----------------------
 1 file changed, 26 insertions(+), 49 deletions(-)

diff --git a/dev-tools/scripts/releaseWizard.yaml b/dev-tools/scripts/releaseWizard.yaml
index 7f793d30d2a..737c8298696 100644
--- a/dev-tools/scripts/releaseWizard.yaml
+++ b/dev-tools/scripts/releaseWizard.yaml
@@ -551,12 +551,12 @@ groups:
       * Before committing to the branch, reply to this thread and argue
         why the fix needs backporting and how long it will take.
       * All issues accepted for backporting should be marked with {{ release_version }}
-        in JIRA, and issues that should delay the release must be marked as Blocker
+        in GitHub, and issues that should delay the release must be marked as Blocker
       * All patches that are intended for the branch should first be committed
         to the unstable branch, merged into the stable branch, and then into
         the current release branch.
-      * Only Jira issues with Fix version {{ release_version }} and priority "Blocker" will delay
-        a release candidate build.
+      * Only GitHub issues or pull requests with milestone {{ release_version }} and
+        priority "Blocker" will delay a release candidate build.
       ----
     types:
     - bugfix
@@ -571,14 +571,14 @@ groups:
       Ask on dev@ for input. Ideally the timing of this request mostly coincides with the
       release branch creation. It's a good idea to remind the devs of this later in the release too.
 
-      NOTE: Do not add every single JIRA issue, but distill the Release note into important changes!
+      NOTE: Do not add every single GitHub PR, but distill the Release note into important changes!
     links:
     - https://cwiki.apache.org/confluence/display/LUCENE/Release+Notes
   - !Todo
-    id: new_jira_versions
-    title: Add a new version in JIRA for the next release
+    id: new_github_versions
+    title: Add a new version in GitHub for the next release
     description: |-
-      Go to the JIRA "Manage Versions" Administration pages and add the new version:
+      Go to the GitHub milestones and add the new version:
 
       {% if release_type == 'major' -%}
       . Change name of version `main ({{ release_version_major }}.0)` into `{{ release_version_major }}.0`
@@ -588,13 +588,13 @@ groups:
     - major
     - minor
     links:
-    - https://issues.apache.org/jira/plugins/servlet/project-config/LUCENE/versions
+    - https://github.com/apache/lucene/milestones
 - !TodoGroup
   id: artifacts
   title: Build the release artifacts
   description: |-
     If after the last day of the feature freeze phase no blocking issues are
-    in JIRA with "Fix Version" {{ release_version }}, then it's time to build the
+    in GitHub with milestone {{ release_version }}, then it's time to build the
     release artifacts, run the smoke tester and stage the RC in svn
   depends:
   - test
@@ -1344,8 +1344,8 @@ groups:
       CHANGES.txt files, removing any duplicate entries, but only from sections for as-yet 
       unreleased versions; leave intact duplicate entries for already-released versions.
 
-      There is a script to generate a regex that will match JIRAs fixed in a release: 
-      `releasedJirasRegex.py`. The following examples will print regexes matching all JIRAs 
+      There is a script to generate a regex that will match issues fixed in a release: 
+      `releasedJirasRegex.py`. The following examples will print regexes matching all issues 
       fixed in {{ release_version }}, which can then be used to find duplicates in unreleased 
       version sections of the corresponding CHANGES.txt files.
     commands: !Commands
@@ -1517,55 +1517,32 @@ groups:
       because there was no released Lucene version to test against.
       {{ set_java_home(release_version) }}
   - !Todo
-    id: jira_release
-    title: Mark version as released in JIRA
+    id: github_release
+    title: Mark version as released in GitHub
     description: |-
-      Go to the JIRA "Manage Versions" Administration pages.
-
-      . Next to version {{ release_version }}, click the gear pop-up menu icon and choose "Release"
-      . Fill in the release date ({{ release_date | formatdate }})
-      . It will give the option of transitioning issues marked fix-for the released version to the 
-        next version, but do not do this as it will send an email for each issue :)
+      Go to https://github.com/apache/lucene/milestones.
+      . Click "Close" on the version you just released.
     links:
-    - https://issues.apache.org/jira/plugins/servlet/project-config/LUCENE/versions
+    - https://github.com/apache/lucene/milestones
   - !Todo
-    id: jira_close_resolved
-    title: Close all issues resolved in the release
+    id: github_change_unresolved
+    title: Remove milestone for unresolved
     description: |-
-      Go to JIRA search to find all issues that were fixed in the release
-      you just made, whose Status is Resolved.
+      Go to GitHub to update all open issues assigned to milestone _{{ release_version }}_.
 
-      . Go to https://issues.apache.org/jira/issues/?jql=project+in+(LUCENE)+AND+status=Resolved+AND+fixVersion={{ release_version }}
-      . Do a bulk change (Under Tools... menu) to close all of these issues. This is a workflow transition task
-      . In the 'Comment' box type `Closing after the {{ release_version }} release`
-      . *Uncheck* the box that says `Send mail for this update`
-    links:
-    - https://issues.apache.org/jira/issues/?jql=project+in+(LUCENE)+AND+status=Resolved+AND+fixVersion={{ release_version }}
-  - !Todo
-    id: jira_change_unresolved
-    title: Remove fixVersion for unresolved
-    description: |-
-      Do another JIRA search to find all issues with Resolution=_Unresolved_ and fixVersion=_{{ release_version }}_.
-
-      . Open https://issues.apache.org/jira/issues/?jql=project+=+LUCENE+AND+resolution=Unresolved+AND+fixVersion={{ release_version }}
-      . In the `Tools` menu, start a bulk change - operation="Edit issues"
-      . Identify issues that *are included* in the release, but are unresolved e.g. due to being REOPENED. These shall *not* be bulk changed!
-      . Check the box next to `Change Fix Version/s` and in the dropdown `Find and remove these`, selecting v {{ release_version }}
-      . On the bottom of the form, uncheck the box that says `Send mail for this update`
-      . Click `Next`, review the changes and click `Confirm`
+      . Open https://github.com/apache/lucene/milestones/{{ release_version }}
+      . Remove the milestone from all issues and pull requests that are still open.
     links:
-    - https://issues.apache.org/jira/issues/?jql=project+=+LUCENE+AND+resolution=Unresolved+AND+fixVersion={{ release_version }}
+    - https://github.com/apache/lucene/milestones/{{ release_version }}
   - !Todo
-    id: new_jira_versions_bugfix
-    title: Add a new version in JIRA for the next release
+    id: new_github_versions_bugfix
+    title: Add a new milestone in GitHub for the next release
     description: |-
-      Go to the JIRA "Manage Versions" Administration pages and add the new version:
-
-      . Create a new (unreleased) version `{{ get_next_version }}`
+      Go to GitHub milestones and add the new version {{ get_next_version }}`.
     types:
     - bugfix
     links:
-    - https://issues.apache.org/jira/plugins/servlet/project-config/LUCENE/versions
+    - https://github.com/apache/lucene/milestones
   - !Todo
     id: stop_promoting_old
     title: Stop promoting old releases


[lucene] 03/04: Tune the amount of memory that is allocated to sorting postings upon flushing. (#12011)

Posted by jp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit 5075126f76b86bbe768cf3bfe141ea262cb3e3c6
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue Dec 27 11:11:18 2022 +0100

    Tune the amount of memory that is allocated to sorting postings upon flushing. (#12011)
    
    When flushing segments that have an index sort configured, postings lists get
    loaded into arrays and get reordered according to the index sort.
    
    This reordering is implemented with `TimSorter`, a variant of merge sort. Like
    merge sort, an important part of `TimSorter` consists of merging two contiguous
    sorted slices of the array into a combined sorted slice. This merging can be
    done either with external memory, which is the classical approach, or in-place,
    which still runs in linear time but with a much higher factor. Until now we
    were allocating a fixed budget of `maxDoc/64` for doing these merges with
    external memory. If this is not enough, sorted slices would be merged in place.
    
    I've been looking at some profiles recently for an index where a non-negligible
    chunk of the time was spent on in-place merges. So I would like to propose the
    following change:
     - Increase the maximum RAM budget to `maxDoc / 8`. This should help avoid
       in-place merges for all postings up to `docFreq = maxDoc / 4`.
     - Make this RAM budget lazily allocated, rather than eagerly like today. This
       would help not allocate memory in O(maxDoc) for fields like primary keys
       that only have a couple postings per term.
    
    So overall memory usage would never be more than 50% higher than what it is
    today, because `TimSorter` never needs more than X temporary slots if the
    postings list doesn't have at least 2*X entries, and these 2*X entries already
    get loaded into memory today. And for fields that have short postings, memory
    usage should actually be lower.
---
 lucene/CHANGES.txt                                 |  4 ++++
 .../apache/lucene/index/FreqProxTermsWriter.java   | 28 +++++++++++++++-------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index fb5604ecb1e..8d076db0df9 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -154,8 +154,12 @@ Optimizations
 
 * GITHUB#12006: Do ints compare instead of ArrayUtil#compareUnsigned4 in LatlonPointQueries. (Guo Feng)
 
+* GITHUB#12011: Minor speedup to flushing long postings lists when an index
+  sort is configured. (Adrien Grand)
+
 * GITHUB#12017: Aggressive count in BooleanWeight. (Lu Xugang)
 
+
 Other
 ---------------------
 
diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
index 1ca589c961d..ce5de0f3974 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
@@ -34,6 +34,8 @@ import org.apache.lucene.util.CollectionUtil;
 import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.IntBlockPool;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.LongsRef;
 import org.apache.lucene.util.TimSorter;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 
@@ -228,12 +230,12 @@ final class FreqProxTermsWriter extends TermsHash {
 
       private int[] docs;
       private int[] freqs;
-      private final int[] tmpDocs;
+      private int[] tmpDocs;
       private int[] tmpFreqs;
 
       DocFreqSorter(int maxDoc) {
-        super(maxDoc / 64);
-        this.tmpDocs = new int[maxDoc / 64];
+        super(maxDoc / 8);
+        this.tmpDocs = IntsRef.EMPTY_INTS;
       }
 
       public void reset(int[] docs, int[] freqs) {
@@ -272,6 +274,12 @@ final class FreqProxTermsWriter extends TermsHash {
 
       @Override
       protected void save(int i, int len) {
+        if (tmpDocs.length < len) {
+          tmpDocs = new int[ArrayUtil.oversize(len, Integer.BYTES)];
+          if (freqs != null) {
+            tmpFreqs = new int[tmpDocs.length];
+          }
+        }
         System.arraycopy(docs, i, tmpDocs, 0, len);
         if (freqs != null) {
           System.arraycopy(freqs, i, tmpFreqs, 0, len);
@@ -423,13 +431,13 @@ final class FreqProxTermsWriter extends TermsHash {
 
       private int[] docs;
       private long[] offsets;
-      private final int[] tmpDocs;
-      private final long[] tmpOffsets;
+      private int[] tmpDocs;
+      private long[] tmpOffsets;
 
       public DocOffsetSorter(int maxDoc) {
-        super(maxDoc / 64);
-        this.tmpDocs = new int[maxDoc / 64];
-        this.tmpOffsets = new long[maxDoc / 64];
+        super(maxDoc / 8);
+        this.tmpDocs = IntsRef.EMPTY_INTS;
+        this.tmpOffsets = LongsRef.EMPTY_LONGS;
       }
 
       public void reset(int[] docs, long[] offsets) {
@@ -461,6 +469,10 @@ final class FreqProxTermsWriter extends TermsHash {
 
       @Override
       protected void save(int i, int len) {
+        if (tmpDocs.length < len) {
+          tmpDocs = new int[ArrayUtil.oversize(len, Integer.BYTES)];
+          tmpOffsets = new long[tmpDocs.length];
+        }
         System.arraycopy(docs, i, tmpDocs, 0, len);
         System.arraycopy(offsets, i, tmpOffsets, 0, len);
       }