You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2018/02/08 17:31:37 UTC
[4/4] lucene-solr:branch_7x: LUCENE-7966: Fix backwards-codecs of 7.x branch to not use StringHelper methods (as they are more picky and may break indexing)

LUCENE-7966: Fix backwards-codecs of 7.x branch to not use StringHelper methods (as they are more picky and may break indexing)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/30c4d6ea
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/30c4d6ea
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/30c4d6ea

Branch: refs/heads/branch_7x
Commit: 30c4d6ea35721cd2fc5cc6bd5cdff77766d1f1fc
Parents: 621f5ec
Author: Uwe Schindler <us...@apache.org>
Authored: Thu Feb 8 18:31:08 2018 +0100
Committer: Uwe Schindler <us...@apache.org>
Committed: Thu Feb 8 18:31:08 2018 +0100

----------------------------------------------------------------------
 .../codecs/lucene54/LegacyStringHelper.java     | 72 ++++++++++++++++++++
 .../lucene54/Lucene54DocValuesConsumer.java     |  7 +-
 2 files changed, 75 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/30c4d6ea/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/LegacyStringHelper.java
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/LegacyStringHelper.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/LegacyStringHelper.java
new file mode 100644
index 0000000..fc55d15
--- /dev/null
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/LegacyStringHelper.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene54;
+
+
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Legacy methods for manipulating strings.
+ *
+ * @lucene.internal
+ * @deprecated This is only used for backwards compatibility codecs (they
+ * don't work with the Java9-based replacement methods).
+ */
+@Deprecated
+abstract class LegacyStringHelper {
+
+  /**
+   * Compares two {@link BytesRef}, element by element, and returns the
+   * number of elements common to both arrays (from the start of each).
+   *
+   * @param left The first {@link BytesRef} to compare
+   * @param right The second {@link BytesRef} to compare
+   * @return The number of common elements (from the start of each).
+   */
+  public static int bytesDifference(BytesRef left, BytesRef right) {
+    int len = left.length < right.length ? left.length : right.length;
+    final byte[] bytesLeft = left.bytes;
+    final int offLeft = left.offset;
+    byte[] bytesRight = right.bytes;
+    final int offRight = right.offset;
+    for (int i = 0; i < len; i++)
+      if (bytesLeft[i+offLeft] != bytesRight[i+offRight])
+        return i;
+    return len;
+  }
+  
+  /** 
+   * Returns the length of {@code currentTerm} needed for use as a sort key.
+   * so that {@link BytesRef#compareTo(BytesRef)} still returns the same result.
+   * This method assumes currentTerm comes after priorTerm.
+   */
+  public static int sortKeyLength(final BytesRef priorTerm, final BytesRef currentTerm) {
+    final int currentTermOffset = currentTerm.offset;
+    final int priorTermOffset = priorTerm.offset;
+    final int limit = Math.min(priorTerm.length, currentTerm.length);
+    for (int i = 0; i < limit; i++) {
+      if (priorTerm.bytes[priorTermOffset+i] != currentTerm.bytes[currentTermOffset+i]) {
+        return i+1;
+      }
+    }
+    return Math.min(1+priorTerm.length, currentTerm.length);
+  }
+
+  private LegacyStringHelper() {
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/30c4d6ea/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesConsumer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesConsumer.java
index 217cd43..dbf3f14 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesConsumer.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene54/Lucene54DocValuesConsumer.java
@@ -46,7 +46,6 @@ import org.apache.lucene.util.LongsRef;
 import org.apache.lucene.util.MathUtil;
 import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
 import org.apache.lucene.util.PagedBytes;
-import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.packed.DirectMonotonicWriter;
 import org.apache.lucene.util.packed.DirectWriter;
 import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
@@ -433,7 +432,7 @@ final class Lucene54DocValuesConsumer extends DocValuesConsumer implements Close
           previousValue.copyBytes(v);
         } else if (termPosition == INTERVAL_COUNT - 1) {
           // last term in block, accumulate shared prefix against first term
-          prefixSum += StringHelper.bytesDifference(previousValue.get(), v);
+          prefixSum += LegacyStringHelper.bytesDifference(previousValue.get(), v);
         }
       }
       numValues++;
@@ -479,7 +478,7 @@ final class Lucene54DocValuesConsumer extends DocValuesConsumer implements Close
         } else {
           // prefix-code: we only share at most 255 characters, to encode the length as a single
           // byte and have random access. Larger terms just get less compression.
-          int sharedPrefix = Math.min(255, StringHelper.bytesDifference(lastTerm.get(), v));
+          int sharedPrefix = Math.min(255, LegacyStringHelper.bytesDifference(lastTerm.get(), v));
           bytesBuffer.writeByte((byte) sharedPrefix);
           bytesBuffer.writeBytes(v.bytes, v.offset + sharedPrefix, v.length - sharedPrefix);
           // we can encode one smaller, because terms are unique.
@@ -557,7 +556,7 @@ final class Lucene54DocValuesConsumer extends DocValuesConsumer implements Close
     for (BytesRef b : values) {
       int termPosition = (int) (count & REVERSE_INTERVAL_MASK);
       if (termPosition == 0) {
-        int len = StringHelper.sortKeyLength(priorTerm.get(), b);
+        int len = LegacyStringHelper.sortKeyLength(priorTerm.get(), b);
         indexTerm.bytes = b.bytes;
         indexTerm.offset = b.offset;
         indexTerm.length = len;