You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by kr...@apache.org on 2017/01/16 21:08:02 UTC

[01/23] lucene-solr:jira/solr-8593: add test that EdgeNGram filter keeps payloads

Repository: lucene-solr
Updated Branches:
  refs/heads/jira/solr-8593 483bfa642 -> 292e51887


add test that EdgeNGram filter keeps payloads


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/61e45283
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/61e45283
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/61e45283

Branch: refs/heads/jira/solr-8593
Commit: 61e45283061ae486acc5882c5a770025c8291222
Parents: 987e265
Author: Nathan Gass <ga...@search.ch>
Authored: Mon Jan 9 14:59:31 2017 +0100
Committer: Nathan Gass <ga...@search.ch>
Committed: Fri Jan 13 12:14:26 2017 +0100

----------------------------------------------------------------------
 .../lucene/analysis/ngram/TestNGramFilters.java | 22 ++++++++++++++++++++
 1 file changed, 22 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/61e45283/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java
index 1243352..b6f4405 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java
@@ -22,7 +22,10 @@ import java.io.StringReader;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.payloads.PayloadHelper;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+import org.apache.lucene.util.BytesRef;
 
 /**
  * Simple tests to ensure the NGram filter factories are working.
@@ -123,6 +126,25 @@ public class TestNGramFilters extends BaseTokenStreamFactoryTestCase {
     assertTokenStreamContents(stream, 
         new String[] { "t", "te" });
   }
+
+  public void testEdgeNGramFilterPayload() throws Exception {
+    Reader reader = new StringReader("test|0.1");
+    TokenStream stream = whitespaceMockTokenizer(reader);
+    stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
+    stream = tokenFilterFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").create(stream);
+
+    stream.reset();
+    while (stream.incrementToken()) {
+      PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
+      assertNotNull(payAttr);
+      BytesRef payData = payAttr.getPayload();
+      assertNotNull(payData);
+      float payFloat = PayloadHelper.decodeFloat(payData.bytes);
+      assertEquals(0.1f, payFloat, 0.0f);
+    }
+    stream.end();
+    stream.close();
+  }
   
   /** Test that bogus arguments result in exception */
   public void testBogusArguments() throws Exception {

[18/23] lucene-solr:jira/solr-8593: Fix precommit

Posted by kr...@apache.org.

Fix precommit


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/478de2a5
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/478de2a5
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/478de2a5

Branch: refs/heads/jira/solr-8593
Commit: 478de2a59a12335bfecf16f9f39f99403d853848
Parents: fc2e0fd
Author: Alan Woodward <ro...@apache.org>
Authored: Mon Jan 16 11:41:59 2017 +0000
Committer: Alan Woodward <ro...@apache.org>
Committed: Mon Jan 16 11:42:36 2017 +0000

----------------------------------------------------------------------
 .../java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java | 1 -
 .../src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java | 1 -
 2 files changed, 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/478de2a5/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
index 47b80ff..56efd89 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.util.AttributeSource.State;
 
 /**
  * Tokenizes the given token into n-grams of given size(s).

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/478de2a5/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
index cb5d447..a2e0aa7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
@@ -24,7 +24,6 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.miscellaneous.CodepointCountFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.util.AttributeSource.State;
 
 /**
  * Tokenizes the input into n-grams of the given size(s).

[14/23] lucene-solr:jira/solr-8593: Revert "Add getMatchingChildren() method to Scorer"

Posted by kr...@apache.org.

Revert "Add getMatchingChildren() method to Scorer"

This reverts commit 9403372fbc36aced848bf8aa498bc71b7b94567b.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/9aa78dcc
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/9aa78dcc
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/9aa78dcc

Branch: refs/heads/jira/solr-8593
Commit: 9aa78dcca350b11c752dddbc3cfa78b84ecfcf68
Parents: 27ec40d
Author: Alan Woodward <ro...@apache.org>
Authored: Sun Jan 15 10:18:20 2017 +0000
Committer: Alan Woodward <ro...@apache.org>
Committed: Sun Jan 15 10:18:20 2017 +0000

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 ---
 .../apache/lucene/search/DisjunctionScorer.java |  8 -----
 .../lucene/search/MinShouldMatchSumScorer.java  |  9 ------
 .../java/org/apache/lucene/search/Scorer.java   |  8 -----
 .../search/TestBooleanQueryVisitSubscorers.java | 33 --------------------
 5 files changed, 62 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9aa78dcc/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 540188e..58201d6 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -130,10 +130,6 @@ New features
   SortedSetDocValues to allow filtering their TermsEnums with a
   CompiledAutomaton (Alan Woodward, Mike McCandless)
 
-* LUCENE-7628: Scorer now has a getMatchingChildren() method that will 
-  return all child scorers positioned on the current document.  (Alan
-  Woodward)
-
 Bug Fixes
 
 * LUCENE-7547: JapaneseTokenizerFactory was failing to close the

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9aa78dcc/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
index a76999e..c53942a 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
@@ -202,12 +202,4 @@ abstract class DisjunctionScorer extends Scorer {
     return children;
   }
 
-  @Override
-  public Collection<ChildScorer> getMatchingChildren() throws IOException {
-    List<ChildScorer> children = new ArrayList<>();
-    for (DisiWrapper w = getSubMatches(); w != null; w = w.next) {
-      children.add(new ChildScorer(w.scorer, "SHOULD"));
-    }
-    return children;
-  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9aa78dcc/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
index b977400..032b5fe 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
@@ -133,15 +133,6 @@ final class MinShouldMatchSumScorer extends Scorer {
   }
 
   @Override
-  public Collection<ChildScorer> getMatchingChildren() throws IOException {
-    List<ChildScorer> children = new ArrayList<>();
-    for (DisiWrapper s = lead; s != null; s = s.next) {
-      children.add(new ChildScorer(s.scorer, "SHOULD"));
-    }
-    return children;
-  }
-
-  @Override
   public DocIdSetIterator iterator() {
     return new DocIdSetIterator() {
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9aa78dcc/lucene/core/src/java/org/apache/lucene/search/Scorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java
index 2e35e91..f434327 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java
@@ -82,14 +82,6 @@ public abstract class Scorer {
   public Collection<ChildScorer> getChildren() {
     return Collections.emptyList();
   }
-
-  /**
-   * Returns child sub-scorers that match the current document
-   * @lucene.experimental
-   */
-  public Collection<ChildScorer> getMatchingChildren() throws IOException {
-    return getChildren();
-  }
   
   /** A child Scorer and its relationship to its parent.
    * the meaning of the relationship depends upon the parent query. 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9aa78dcc/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java
index 0909d5d..60ba528 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java
@@ -108,39 +108,6 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
     assertEquals(2, tfs.get(1).intValue()); // f2:search + f2:lucene
     assertEquals(2, tfs.get(2).intValue()); // f2:search + f2:lucene
   }
-
-  public void testDisjunctionMatches() throws IOException {
-    BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
-    bq1.add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD);
-    bq1.add(new PhraseQuery(F2, "search", "engine"), Occur.SHOULD);
-
-    Weight w1 = scorerSearcher.createNormalizedWeight(bq1.build(), true);
-    Scorer s1 = w1.scorer(reader.leaves().get(0));
-    assertEquals(0, s1.iterator().nextDoc());
-    assertEquals(2, s1.getMatchingChildren().size());
-
-    BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
-    bq2.add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD);
-    bq2.add(new PhraseQuery(F2, "search", "library"), Occur.SHOULD);
-
-    Weight w2 = scorerSearcher.createNormalizedWeight(bq2.build(), true);
-    Scorer s2 = w2.scorer(reader.leaves().get(0));
-    assertEquals(0, s2.iterator().nextDoc());
-    assertEquals(1, s2.getMatchingChildren().size());
-  }
-
-  public void testMinShouldMatchMatches() throws IOException {
-    BooleanQuery.Builder bq = new BooleanQuery.Builder();
-    bq.add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD);
-    bq.add(new TermQuery(new Term(F2, "lucene")), Occur.SHOULD);
-    bq.add(new PhraseQuery(F2, "search", "library"), Occur.SHOULD);
-    bq.setMinimumNumberShouldMatch(2);
-
-    Weight w = scorerSearcher.createNormalizedWeight(bq.build(), true);
-    Scorer s = w.scorer(reader.leaves().get(0));
-    assertEquals(0, s.iterator().nextDoc());
-    assertEquals(2, s.getMatchingChildren().size());
-  }
   
   public void testConjunctions() throws IOException {
     BooleanQuery.Builder bq = new BooleanQuery.Builder();

[04/23] lucene-solr:jira/solr-8593: add comment and test for ngram token filter

Posted by kr...@apache.org.

add comment and test for ngram token filter


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/80e28542
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/80e28542
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/80e28542

Branch: refs/heads/jira/solr-8593
Commit: 80e2854247cce485920b45acdeffa3e68bcea385
Parents: 01f2a87
Author: Nathan Gass <ga...@search.ch>
Authored: Fri Jan 13 16:42:41 2017 +0100
Committer: Nathan Gass <ga...@search.ch>
Committed: Fri Jan 13 16:42:41 2017 +0100

----------------------------------------------------------------------
 .../lucene/analysis/ngram/TestNGramFilters.java | 25 ++++++++++++++++++++
 1 file changed, 25 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/80e28542/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java
index b6f4405..5de532f 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestNGramFilters.java
@@ -80,6 +80,28 @@ public class TestNGramFilters extends BaseTokenStreamFactoryTestCase {
   }
 
   /**
+   * Test NGramFilterFactory on tokens with payloads
+   */
+  public void testNGramFilterPayload() throws Exception {
+    Reader reader = new StringReader("test|0.1");
+    TokenStream stream = whitespaceMockTokenizer(reader);
+    stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
+    stream = tokenFilterFactory("NGram", "minGramSize", "1", "maxGramSize", "2").create(stream);
+
+    stream.reset();
+    while (stream.incrementToken()) {
+      PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
+      assertNotNull(payAttr);
+      BytesRef payData = payAttr.getPayload();
+      assertNotNull(payData);
+      float payFloat = PayloadHelper.decodeFloat(payData.bytes);
+      assertEquals(0.1f, payFloat, 0.0f);
+    }
+    stream.end();
+    stream.close();
+  }
+
+  /**
    * Test EdgeNGramTokenizerFactory
    */
   public void testEdgeNGramTokenizer() throws Exception {
@@ -127,6 +149,9 @@ public class TestNGramFilters extends BaseTokenStreamFactoryTestCase {
         new String[] { "t", "te" });
   }
 
+  /**
+   * Test EdgeNGramFilterFactory on tokens with payloads
+   */
   public void testEdgeNGramFilterPayload() throws Exception {
     Reader reader = new StringReader("test|0.1");
     TokenStream stream = whitespaceMockTokenizer(reader);

[12/23] lucene-solr:jira/solr-8593: Fix compile warning in Lucene Core; make Eclipse happy by moving Java-1-like-side-by-side class to separate file

Posted by kr...@apache.org.

Fix compile warning in Lucene Core; make Eclipse happy by moving Java-1-like-side-by-side class to separate file


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/60d4a554
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/60d4a554
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/60d4a554

Branch: refs/heads/jira/solr-8593
Commit: 60d4a554ecd0ac00bf1cd84041f19dc3f8926cf3
Parents: 4eafdb3
Author: Uwe Schindler <us...@apache.org>
Authored: Sat Jan 14 19:46:59 2017 +0100
Committer: Uwe Schindler <us...@apache.org>
Committed: Sat Jan 14 19:46:59 2017 +0100

----------------------------------------------------------------------
 .../lucene/search/MultiCollectorManager.java    |   6 +-
 .../solr/highlight/LuceneRegexFragmenter.java   | 217 +++++++++++++++++++
 .../apache/solr/highlight/RegexFragmenter.java  | 196 -----------------
 3 files changed, 221 insertions(+), 198 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/60d4a554/lucene/core/src/java/org/apache/lucene/search/MultiCollectorManager.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiCollectorManager.java b/lucene/core/src/java/org/apache/lucene/search/MultiCollectorManager.java
index 9549cde..a8c6d1c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiCollectorManager.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiCollectorManager.java
@@ -31,8 +31,10 @@ public class MultiCollectorManager implements CollectorManager<MultiCollectorMan
 
   final private CollectorManager<Collector, ?>[] collectorManagers;
 
-  public MultiCollectorManager(final CollectorManager... collectorManagers) {
-    this.collectorManagers = collectorManagers;
+  @SafeVarargs
+  @SuppressWarnings({"varargs", "unchecked"})
+  public MultiCollectorManager(final CollectorManager<? extends Collector, ?>... collectorManagers) {
+    this.collectorManagers = (CollectorManager[]) collectorManagers;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/60d4a554/solr/core/src/java/org/apache/solr/highlight/LuceneRegexFragmenter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/highlight/LuceneRegexFragmenter.java b/solr/core/src/java/org/apache/solr/highlight/LuceneRegexFragmenter.java
new file mode 100644
index 0000000..0dc3340
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/highlight/LuceneRegexFragmenter.java
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.highlight;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.search.highlight.Fragmenter;
+
+/**
+ * Fragmenter that tries to produce snippets that "look" like a regular 
+ * expression.
+ *
+ * NOTE: the default for <code>maxAnalyzedChars</code> is much lower for this 
+ * fragmenter.  After this limit is exhausted, fragments are produced in the
+ * same way as <code>GapFragmenter</code>
+ */
+class LuceneRegexFragmenter implements Fragmenter
+{
+  // ** defaults
+  public static final int DEFAULT_FRAGMENT_SIZE = 70;
+  public static final int DEFAULT_INCREMENT_GAP = 50;
+  public static final float DEFAULT_SLOP = 0.6f;
+  public static final int DEFAULT_MAX_ANALYZED_CHARS = 10000;
+
+  // ** settings
+
+  // desired length of fragments, in characters
+  protected int targetFragChars;
+  // increment gap which indicates a new fragment should occur 
+  // (often due to multi-valued fields)
+  protected int incrementGapThreshold;
+  // factor by which we are allowed to bend the frag size (larger or smaller)
+  protected float slop;
+  // analysis limit (ensures we don't waste too much time on long fields)
+  protected int maxAnalyzedChars;
+  // default desirable pattern for text fragments.
+  protected Pattern textRE;
+  
+
+  // ** state
+  protected int currentNumFrags;
+  protected int currentOffset;
+  protected int targetOffset;
+  protected int[] hotspots;
+
+  private PositionIncrementAttribute posIncAtt;
+  private OffsetAttribute offsetAtt;
+
+  // ** other
+  // note: could dynamically change size of sentences extracted to match
+  // target frag size
+  public static final String 
+    DEFAULT_PATTERN_RAW = "[-\\w ,\\n\"']{20,200}";
+  public static final Pattern 
+    DEFAULT_PATTERN = Pattern.compile(DEFAULT_PATTERN_RAW);
+
+
+  public LuceneRegexFragmenter() {
+    this(DEFAULT_FRAGMENT_SIZE, 
+         DEFAULT_INCREMENT_GAP,
+         DEFAULT_SLOP,
+         DEFAULT_MAX_ANALYZED_CHARS);
+  }
+  public LuceneRegexFragmenter(int targetFragChars) {
+    this(targetFragChars, 
+         DEFAULT_INCREMENT_GAP,
+         DEFAULT_SLOP,
+         DEFAULT_MAX_ANALYZED_CHARS);
+  }
+
+  public LuceneRegexFragmenter(int targetFragChars, 
+                               int incrementGapThreshold,
+                               float slop,
+                               int maxAnalyzedChars ) {
+    this(targetFragChars, incrementGapThreshold, slop, maxAnalyzedChars,
+         DEFAULT_PATTERN);
+         
+  }
+
+  public LuceneRegexFragmenter(int targetFragChars, 
+                               int incrementGapThreshold,
+                               float slop,
+                               int maxAnalyzedChars,
+                               Pattern targetPattern) {
+    this.targetFragChars = targetFragChars;
+    this.incrementGapThreshold = incrementGapThreshold;    
+    this.slop = slop;
+    this.maxAnalyzedChars = maxAnalyzedChars;
+    this.textRE = targetPattern;
+  }
+  
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.search.highlight.TextFragmenter#start(java.lang.String)
+   */
+  @Override
+  public void start(String originalText, TokenStream tokenStream) {
+    currentNumFrags = 1;
+    currentOffset = 0;
+    addHotSpots(originalText);
+    posIncAtt = tokenStream.getAttribute(PositionIncrementAttribute.class);
+    offsetAtt = tokenStream.getAttribute(OffsetAttribute.class);
+  }
+
+  ////////////////////////////////////
+  // pre-analysis
+  ////////////////////////////////////
+
+  protected void addHotSpots(String text) {
+    //System.out.println("hot spotting");
+    ArrayList<Integer> temphs = new ArrayList<>(
+                              text.length() / targetFragChars);
+    Matcher match = textRE.matcher(text);
+    int cur = 0;
+    while(match.find() && cur < maxAnalyzedChars) {
+      int start=match.start(), end=match.end();
+      temphs.add(start);
+      temphs.add(end);
+      cur = end;
+      //System.out.println("Matched " + match.group());
+    }    
+    hotspots = new int[temphs.size()];
+    for(int i = 0; i < temphs.size(); i++) {
+      hotspots[i] = temphs.get(i);
+    }
+    // perhaps not necessary--I don't know if re matches are non-overlapping
+    Arrays.sort(hotspots);
+  }
+
+  ////////////////////////////////////
+  // fragmenting
+  ////////////////////////////////////
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token)
+   */
+  @Override
+  public boolean isNewFragment()
+  {
+    boolean isNewFrag = false;
+    int minFragLen = (int)((1.0f - slop)*targetFragChars);
+    int endOffset = offsetAtt.endOffset();
+    
+    // ** determin isNewFrag
+    if(posIncAtt.getPositionIncrement() > incrementGapThreshold) {
+      // large position gaps always imply new fragments
+      isNewFrag = true;
+
+    } else if(endOffset - currentOffset < minFragLen) {
+      // we're not in our range of flexibility
+      isNewFrag = false;
+
+    } else if(targetOffset > 0) {
+      // we've already decided on a target
+      isNewFrag = endOffset > targetOffset;
+
+    } else {
+      // we might be able to do something
+      int minOffset = currentOffset + minFragLen;
+      int maxOffset = (int)(currentOffset + (1.0f + slop)*targetFragChars);
+      int hotIndex;
+
+      // look for a close hotspot
+      hotIndex = Arrays.binarySearch(hotspots, endOffset);
+      if(hotIndex < 0) hotIndex = -hotIndex;
+      if(hotIndex >= hotspots.length) {
+        // no more hotspots in this input stream
+        targetOffset = currentOffset + targetFragChars;
+
+      } else if(hotspots[hotIndex] > maxOffset) {
+        // no hotspots within slop
+        targetOffset = currentOffset + targetFragChars;
+
+      } else {
+        // try to find hotspot in slop
+        int goal = hotspots[hotIndex];
+        while(goal < minOffset && hotIndex < hotspots.length) {
+          hotIndex++;
+          goal = hotspots[hotIndex];
+        }        
+        targetOffset = goal <= maxOffset ? goal : currentOffset + targetFragChars;
+      }
+
+      isNewFrag = endOffset > targetOffset;
+    }      
+      
+    // ** operate on isNewFrag
+    if(isNewFrag) {
+        currentNumFrags++;
+        currentOffset = endOffset;
+        targetOffset = -1;
+    }
+    return isNewFrag;
+  }
+  
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/60d4a554/solr/core/src/java/org/apache/solr/highlight/RegexFragmenter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/highlight/RegexFragmenter.java b/solr/core/src/java/org/apache/solr/highlight/RegexFragmenter.java
index a80141e..b755b2d 100644
--- a/solr/core/src/java/org/apache/solr/highlight/RegexFragmenter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/RegexFragmenter.java
@@ -16,14 +16,8 @@
  */
 package org.apache.solr.highlight;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.search.highlight.Fragmenter;
 import org.apache.lucene.search.highlight.NullFragmenter;
 import org.apache.solr.common.params.HighlightParams;
@@ -94,193 +88,3 @@ public class RegexFragmenter extends HighlightingPluginBase implements SolrFragm
     return "RegexFragmenter (" + defaultPatternRaw + ")";
   }
 }
-
-
-/**
- * Fragmenter that tries to produce snippets that "look" like a regular 
- * expression.
- *
- * NOTE: the default for <code>maxAnalyzedChars</code> is much lower for this 
- * fragmenter.  After this limit is exhausted, fragments are produced in the
- * same way as <code>GapFragmenter</code>
- */
-class LuceneRegexFragmenter implements Fragmenter
-{
-  // ** defaults
-  public static final int DEFAULT_FRAGMENT_SIZE = 70;
-  public static final int DEFAULT_INCREMENT_GAP = 50;
-  public static final float DEFAULT_SLOP = 0.6f;
-  public static final int DEFAULT_MAX_ANALYZED_CHARS = 10000;
-
-  // ** settings
-
-  // desired length of fragments, in characters
-  protected int targetFragChars;
-  // increment gap which indicates a new fragment should occur 
-  // (often due to multi-valued fields)
-  protected int incrementGapThreshold;
-  // factor by which we are allowed to bend the frag size (larger or smaller)
-  protected float slop;
-  // analysis limit (ensures we don't waste too much time on long fields)
-  protected int maxAnalyzedChars;
-  // default desirable pattern for text fragments.
-  protected Pattern textRE;
-  
-
-  // ** state
-  protected int currentNumFrags;
-  protected int currentOffset;
-  protected int targetOffset;
-  protected int[] hotspots;
-
-  private PositionIncrementAttribute posIncAtt;
-  private OffsetAttribute offsetAtt;
-
-  // ** other
-  // note: could dynamically change size of sentences extracted to match
-  // target frag size
-  public static final String 
-    DEFAULT_PATTERN_RAW = "[-\\w ,\\n\"']{20,200}";
-  public static final Pattern 
-    DEFAULT_PATTERN = Pattern.compile(DEFAULT_PATTERN_RAW);
-
-
-  public LuceneRegexFragmenter() {
-    this(DEFAULT_FRAGMENT_SIZE, 
-         DEFAULT_INCREMENT_GAP,
-         DEFAULT_SLOP,
-         DEFAULT_MAX_ANALYZED_CHARS);
-  }
-  public LuceneRegexFragmenter(int targetFragChars) {
-    this(targetFragChars, 
-         DEFAULT_INCREMENT_GAP,
-         DEFAULT_SLOP,
-         DEFAULT_MAX_ANALYZED_CHARS);
-  }
-
-  public LuceneRegexFragmenter(int targetFragChars, 
-                               int incrementGapThreshold,
-                               float slop,
-                               int maxAnalyzedChars ) {
-    this(targetFragChars, incrementGapThreshold, slop, maxAnalyzedChars,
-         DEFAULT_PATTERN);
-         
-  }
-
-  public LuceneRegexFragmenter(int targetFragChars, 
-                               int incrementGapThreshold,
-                               float slop,
-                               int maxAnalyzedChars,
-                               Pattern targetPattern) {
-    this.targetFragChars = targetFragChars;
-    this.incrementGapThreshold = incrementGapThreshold;    
-    this.slop = slop;
-    this.maxAnalyzedChars = maxAnalyzedChars;
-    this.textRE = targetPattern;
-  }
-  
-
-  /* (non-Javadoc)
-   * @see org.apache.lucene.search.highlight.TextFragmenter#start(java.lang.String)
-   */
-  @Override
-  public void start(String originalText, TokenStream tokenStream) {
-    currentNumFrags = 1;
-    currentOffset = 0;
-    addHotSpots(originalText);
-    posIncAtt = tokenStream.getAttribute(PositionIncrementAttribute.class);
-    offsetAtt = tokenStream.getAttribute(OffsetAttribute.class);
-  }
-
-  ////////////////////////////////////
-  // pre-analysis
-  ////////////////////////////////////
-
-  protected void addHotSpots(String text) {
-    //System.out.println("hot spotting");
-    ArrayList<Integer> temphs = new ArrayList<>(
-                              text.length() / targetFragChars);
-    Matcher match = textRE.matcher(text);
-    int cur = 0;
-    while(match.find() && cur < maxAnalyzedChars) {
-      int start=match.start(), end=match.end();
-      temphs.add(start);
-      temphs.add(end);
-      cur = end;
-      //System.out.println("Matched " + match.group());
-    }    
-    hotspots = new int[temphs.size()];
-    for(int i = 0; i < temphs.size(); i++) {
-      hotspots[i] = temphs.get(i);
-    }
-    // perhaps not necessary--I don't know if re matches are non-overlapping
-    Arrays.sort(hotspots);
-  }
-
-  ////////////////////////////////////
-  // fragmenting
-  ////////////////////////////////////
-
-  /* (non-Javadoc)
-   * @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token)
-   */
-  @Override
-  public boolean isNewFragment()
-  {
-    boolean isNewFrag = false;
-    int minFragLen = (int)((1.0f - slop)*targetFragChars);
-    int endOffset = offsetAtt.endOffset();
-    
-    // ** determin isNewFrag
-    if(posIncAtt.getPositionIncrement() > incrementGapThreshold) {
-      // large position gaps always imply new fragments
-      isNewFrag = true;
-
-    } else if(endOffset - currentOffset < minFragLen) {
-      // we're not in our range of flexibility
-      isNewFrag = false;
-
-    } else if(targetOffset > 0) {
-      // we've already decided on a target
-      isNewFrag = endOffset > targetOffset;
-
-    } else {
-      // we might be able to do something
-      int minOffset = currentOffset + minFragLen;
-      int maxOffset = (int)(currentOffset + (1.0f + slop)*targetFragChars);
-      int hotIndex;
-
-      // look for a close hotspot
-      hotIndex = Arrays.binarySearch(hotspots, endOffset);
-      if(hotIndex < 0) hotIndex = -hotIndex;
-      if(hotIndex >= hotspots.length) {
-        // no more hotspots in this input stream
-        targetOffset = currentOffset + targetFragChars;
-
-      } else if(hotspots[hotIndex] > maxOffset) {
-        // no hotspots within slop
-        targetOffset = currentOffset + targetFragChars;
-
-      } else {
-        // try to find hotspot in slop
-        int goal = hotspots[hotIndex];
-        while(goal < minOffset && hotIndex < hotspots.length) {
-          hotIndex++;
-          goal = hotspots[hotIndex];
-        }        
-        targetOffset = goal <= maxOffset ? goal : currentOffset + targetFragChars;
-      }
-
-      isNewFrag = endOffset > targetOffset;
-    }      
-      
-    // ** operate on isNewFrag
-    if(isNewFrag) {
-        currentNumFrags++;
-        currentOffset = endOffset;
-        targetOffset = -1;
-    }
-    return isNewFrag;
-  }
-  
-}

[15/23] lucene-solr:jira/solr-8593: LUCENE-7630: Fix (Edge)NGramTokenFilter to no longer drop payloads and preserve all attributes [merge branch 'edgepayloads' from Nathan Gass https://github.com/xabbu42/lucene-solr]

Posted by kr...@apache.org.

LUCENE-7630: Fix (Edge)NGramTokenFilter to no longer drop payloads and preserve all attributes
[merge branch 'edgepayloads' from Nathan Gass https://github.com/xabbu42/lucene-solr]

Signed-off-by: Uwe Schindler <us...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c64a0115
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c64a0115
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c64a0115

Branch: refs/heads/jira/solr-8593
Commit: c64a01158e972176256e257d6c1d4629b05783a2
Parents: 9aa78dc ea049b9
Author: Uwe Schindler <us...@apache.org>
Authored: Mon Jan 16 11:16:43 2017 +0100
Committer: Uwe Schindler <us...@apache.org>
Committed: Mon Jan 16 11:16:43 2017 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  7 +++
 .../analysis/ngram/EdgeNGramTokenFilter.java    | 17 ++-----
 .../lucene/analysis/ngram/NGramTokenFilter.java | 19 +++-----
 .../lucene/analysis/ngram/TestNGramFilters.java | 47 ++++++++++++++++++++
 4 files changed, 63 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c64a0115/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --cc lucene/CHANGES.txt
index 58201d6,4912920..530b0d4
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@@ -61,6 -58,6 +61,13 @@@ Othe
  
  * LUCENE-7360: Remove Explanation.toHtml() (Alan Woodward)
  
++======================= Lucene 6.5.0 =======================
++
++Bug Fixes
++
++* LUCENE-7630: Fix (Edge)NGramTokenFilter to no longer drop payloads
++  and preserve all attributes. (Nathan Gass via Uwe Schindler)
++
  ======================= Lucene 6.4.0 =======================
  
  API Changes

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c64a0115/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
----------------------------------------------------------------------
diff --cc lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
index 827e26f,df12fda..47b80ff
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
@@@ -95,8 -88,7 +88,7 @@@ public final class EdgeNGramTokenFilte
        if (curGramSize <= maxGram) {         // if we have hit the end of our n-gram size range, quit
          if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams
            // grab gramSize chars from front or back
-           clearAttributes();
-           offsetAtt.setOffset(tokStart, tokEnd);
 -	      restoreState(state);
++          restoreState(state);
            // first ngram gets increment, others don't
            if (curGramSize == minGram) {
              posIncrAtt.setPositionIncrement(savePosIncr);

[08/23] lucene-solr:jira/solr-8593: LUCENE-7627: Add #intersect(CompiledAutomaton) to Sorted*DocValues

Posted by kr...@apache.org.

LUCENE-7627: Add #intersect(CompiledAutomaton) to Sorted*DocValues


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8fa0a8dd
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8fa0a8dd
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8fa0a8dd

Branch: refs/heads/jira/solr-8593
Commit: 8fa0a8dd1e5eb3a5e2553c346372d203d00e575b
Parents: 53d5af1
Author: Alan Woodward <ro...@apache.org>
Authored: Wed Jan 11 12:07:11 2017 +0000
Committer: Alan Woodward <ro...@apache.org>
Committed: Sat Jan 14 09:40:19 2017 +0000

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 +++
 .../apache/lucene/index/SortedDocValues.java    | 22 ++++++++++++++
 .../apache/lucene/index/SortedSetDocValues.java | 22 ++++++++++++++
 .../index/BaseDocValuesFormatTestCase.java      | 32 ++++++++++++++++++++
 4 files changed, 80 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fa0a8dd/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 30943d2..58201d6 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -126,6 +126,10 @@ New features
   concurrently across all segments in the index (Emmanuel Keller via
   Mike McCandless)
 
+* LUCENE-7627: Added .intersect methods to SortedDocValues and 
+  SortedSetDocValues to allow filtering their TermsEnums with a
+  CompiledAutomaton (Alan Woodward, Mike McCandless)
+
 Bug Fixes
 
 * LUCENE-7547: JapaneseTokenizerFactory was failing to close the

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fa0a8dd/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
index e2d7dfd..087e487 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
 import java.io.IOException;
 
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
 
 /**
  * A per-document byte[] with presorted values.  This is fundamentally an
@@ -110,4 +111,25 @@ public abstract class SortedDocValues extends BinaryDocValues {
     return new SortedDocValuesTermsEnum(this);
   }
 
+  /**
+   * Returns a {@link TermsEnum} over the values, filtered by a {@link CompiledAutomaton}
+   * The enum supports {@link TermsEnum#ord()}.
+   */
+  public TermsEnum intersect(CompiledAutomaton automaton) throws IOException {
+    TermsEnum in = termsEnum();
+    switch (automaton.type) {
+      case NONE:
+        return TermsEnum.EMPTY;
+      case ALL:
+        return in;
+      case SINGLE:
+        return new SingleTermsEnum(in, automaton.term);
+      case NORMAL:
+        return new AutomatonTermsEnum(in, automaton);
+      default:
+        // unreachable
+        throw new RuntimeException("unhandled case");
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fa0a8dd/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
index 6d02c25..9e1c6a3 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
 import java.io.IOException;
 
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
 
 /**
  * A multi-valued version of {@link SortedDocValues}.
@@ -102,4 +103,25 @@ public abstract class SortedSetDocValues extends DocValuesIterator {
   public TermsEnum termsEnum() throws IOException {
     return new SortedSetDocValuesTermsEnum(this);
   }
+
+  /**
+   * Returns a {@link TermsEnum} over the values, filtered by a {@link CompiledAutomaton}
+   * The enum supports {@link TermsEnum#ord()}.
+   */
+  public TermsEnum intersect(CompiledAutomaton automaton) throws IOException {
+    TermsEnum in = termsEnum();
+    switch (automaton.type) {
+      case NONE:
+        return TermsEnum.EMPTY;
+      case ALL:
+        return in;
+      case SINGLE:
+        return new SingleTermsEnum(in, automaton.term);
+      case NORMAL:
+        return new AutomatonTermsEnum(in, automaton);
+      default:
+        // unreachable
+        throw new RuntimeException("unhandled case");
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8fa0a8dd/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
index d55f212..8cb6665 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
@@ -67,6 +67,8 @@ import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.TestUtil;
 
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.RegExp;
 
 import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
 import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
@@ -906,6 +908,21 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
     termsEnum.seekExact(2);
     assertEquals("world", termsEnum.term().utf8ToString());
     assertEquals(2, termsEnum.ord());
+
+    // NORMAL automaton
+    termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton()));
+    assertEquals("hello", termsEnum.next().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertEquals("world", termsEnum.next().utf8ToString());
+    assertEquals(2, termsEnum.ord());
+    assertNull(termsEnum.next());
+
+    // SINGLE automaton
+    termsEnum = dv.intersect(new CompiledAutomaton(new RegExp("hello").toAutomaton()));
+    assertEquals("hello", termsEnum.next().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertNull(termsEnum.next());
+
     ireader.close();
     directory.close();
   }
@@ -2057,6 +2074,21 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
     termsEnum.seekExact(2);
     assertEquals("world", termsEnum.term().utf8ToString());
     assertEquals(2, termsEnum.ord());
+
+    // NORMAL automaton
+    termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton()));
+    assertEquals("hello", termsEnum.next().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertEquals("world", termsEnum.next().utf8ToString());
+    assertEquals(2, termsEnum.ord());
+    assertNull(termsEnum.next());
+
+    // SINGLE automaton
+    termsEnum = dv.intersect(new CompiledAutomaton(new RegExp("hello").toAutomaton()));
+    assertEquals("hello", termsEnum.next().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertNull(termsEnum.next());
+
     ireader.close();
     directory.close();
   }

[17/23] lucene-solr:jira/solr-8593: LUCENE-7623: Add FunctionMatchQuery and FunctionScoreQuery

Posted by kr...@apache.org.

LUCENE-7623: Add FunctionMatchQuery and FunctionScoreQuery


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/fc2e0fd1
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/fc2e0fd1
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/fc2e0fd1

Branch: refs/heads/jira/solr-8593
Commit: fc2e0fd13324699fe1ddb15bb09960a8501f52f5
Parents: ceaeb42
Author: Alan Woodward <ro...@apache.org>
Authored: Sun Jan 15 18:37:41 2017 +0000
Committer: Alan Woodward <ro...@apache.org>
Committed: Mon Jan 16 11:15:33 2017 +0000

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   5 +
 .../lucene/search/DoubleValuesSource.java       |  92 +++++++++--
 .../queries/function/FunctionMatchQuery.java    |  99 ++++++++++++
 .../queries/function/FunctionScoreQuery.java    | 151 +++++++++++++++++++
 .../function/TestFunctionMatchQuery.java        |  61 ++++++++
 .../function/TestFunctionScoreExplanations.java | 105 +++++++++++++
 .../function/TestFunctionScoreQuery.java        | 114 ++++++++++++++
 7 files changed, 611 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fc2e0fd1/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4963454..851ed72 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -63,6 +63,11 @@ Other
 
 ======================= Lucene 6.5.0 =======================
 
+New Features
+
+* LUCENE-7623: Add FunctionScoreQuery and FunctionMatchQuery (Alan Woodward,
+  Adrien Grand, David Smiley)
+
 Bug Fixes
 
 * LUCENE-7630: Fix (Edge)NGramTokenFilter to no longer drop payloads

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fc2e0fd1/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java
index d4be4e9..af24e1a 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java
@@ -20,7 +20,9 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.Objects;
 import java.util.function.DoubleToLongFunction;
+import java.util.function.DoubleUnaryOperator;
 import java.util.function.LongToDoubleFunction;
+import java.util.function.ToDoubleBiFunction;
 
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.LeafReaderContext;
@@ -173,6 +175,69 @@ public abstract class DoubleValuesSource {
       public boolean needsScores() {
         return false;
       }
+
+      @Override
+      public String toString() {
+        return "constant(" + value + ")";
+      }
+    };
+  }
+
+  /**
+   * Creates a DoubleValuesSource that is a function of another DoubleValuesSource
+   */
+  public static DoubleValuesSource function(DoubleValuesSource in, DoubleUnaryOperator function) {
+    return new DoubleValuesSource() {
+      @Override
+      public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
+        DoubleValues inputs = in.getValues(ctx, scores);
+        return new DoubleValues() {
+          @Override
+          public double doubleValue() throws IOException {
+            return function.applyAsDouble(inputs.doubleValue());
+          }
+
+          @Override
+          public boolean advanceExact(int doc) throws IOException {
+            return inputs.advanceExact(doc);
+          }
+        };
+      }
+
+      @Override
+      public boolean needsScores() {
+        return in.needsScores();
+      }
+    };
+  }
+
+  /**
+   * Creates a DoubleValuesSource that is a function of another DoubleValuesSource and a score
+   * @param in        the DoubleValuesSource to use as an input
+   * @param function  a function of the form (source, score) == result
+   */
+  public static DoubleValuesSource scoringFunction(DoubleValuesSource in, ToDoubleBiFunction<Double, Double> function) {
+    return new DoubleValuesSource() {
+      @Override
+      public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
+        DoubleValues inputs = in.getValues(ctx, scores);
+        return new DoubleValues() {
+          @Override
+          public double doubleValue() throws IOException {
+            return function.applyAsDouble(inputs.doubleValue(), scores.doubleValue());
+          }
+
+          @Override
+          public boolean advanceExact(int doc) throws IOException {
+            return inputs.advanceExact(doc);
+          }
+        };
+      }
+
+      @Override
+      public boolean needsScores() {
+        return true;
+      }
     };
   }
 
@@ -221,7 +286,17 @@ public abstract class DoubleValuesSource {
     @Override
     public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
       final NumericDocValues values = DocValues.getNumeric(ctx.reader(), field);
-      return toDoubleValues(values, decoder::applyAsDouble);
+      return new DoubleValues() {
+        @Override
+        public double doubleValue() throws IOException {
+          return decoder.applyAsDouble(values.longValue());
+        }
+
+        @Override
+        public boolean advanceExact(int target) throws IOException {
+          return values.advanceExact(target);
+        }
+      };
     }
 
     @Override
@@ -288,21 +363,6 @@ public abstract class DoubleValuesSource {
     }
   }
 
-  private static DoubleValues toDoubleValues(NumericDocValues in, LongToDoubleFunction map) {
-    return new DoubleValues() {
-      @Override
-      public double doubleValue() throws IOException {
-        return map.applyAsDouble(in.longValue());
-      }
-
-      @Override
-      public boolean advanceExact(int target) throws IOException {
-        return in.advanceExact(target);
-      }
-
-    };
-  }
-
   private static NumericDocValues asNumericDocValues(DoubleValuesHolder in, DoubleToLongFunction converter) {
     return new NumericDocValues() {
       @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fc2e0fd1/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionMatchQuery.java
----------------------------------------------------------------------
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionMatchQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionMatchQuery.java
new file mode 100644
index 0000000..4a9c709
--- /dev/null
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionMatchQuery.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.queries.function;
+
+import java.io.IOException;
+import java.util.Objects;
+import java.util.function.DoublePredicate;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.ConstantScoreScorer;
+import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.DoubleValues;
+import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
+import org.apache.lucene.search.Weight;
+
+/**
+ * A query that retrieves all documents with a {@link DoubleValues} value matching a predicate
+ *
+ * This query works by a linear scan of the index, and is best used in
+ * conjunction with other queries that can restrict the number of
+ * documents visited
+ */
+public final class FunctionMatchQuery extends Query {
+
+  private final DoubleValuesSource source;
+  private final DoublePredicate filter;
+
+  /**
+   * Create a FunctionMatchQuery
+   * @param source  a {@link DoubleValuesSource} to use for values
+   * @param filter  the predicate to match against
+   */
+  public FunctionMatchQuery(DoubleValuesSource source, DoublePredicate filter) {
+    this.source = source;
+    this.filter = filter;
+  }
+
+  @Override
+  public String toString(String field) {
+    return "FunctionMatchQuery(" + source.toString() + ")";
+  }
+
+  @Override
+  public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
+    return new ConstantScoreWeight(this, boost) {
+      @Override
+      public Scorer scorer(LeafReaderContext context) throws IOException {
+        DoubleValues values = source.getValues(context, null);
+        DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
+        TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
+          @Override
+          public boolean matches() throws IOException {
+            return values.advanceExact(approximation.docID()) && filter.test(values.doubleValue());
+          }
+
+          @Override
+          public float matchCost() {
+            return 100; // TODO maybe DoubleValuesSource should have a matchCost?
+          }
+        };
+        return new ConstantScoreScorer(this, score(), twoPhase);
+      }
+    };
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+    FunctionMatchQuery that = (FunctionMatchQuery) o;
+    return Objects.equals(source, that.source) && Objects.equals(filter, that.filter);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(source, filter);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fc2e0fd1/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java
----------------------------------------------------------------------
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java
new file mode 100644
index 0000000..29ef41f
--- /dev/null
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.queries.function;
+
+import java.io.IOException;
+import java.util.Objects;
+import java.util.Set;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DoubleValues;
+import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.FilterScorer;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Weight;
+
+/**
+ * A query that wraps another query, and uses a DoubleValuesSource to
+ * replace or modify the wrapped query's score
+ *
+ * If the DoubleValuesSource doesn't return a value for a particular document,
+ * then that document will be given a score of 0.
+ */
+public final class FunctionScoreQuery extends Query {
+
+  private final Query in;
+  private final DoubleValuesSource source;
+
+  /**
+   * Create a new FunctionScoreQuery
+   * @param in      the query to wrap
+   * @param source  a source of scores
+   */
+  public FunctionScoreQuery(Query in, DoubleValuesSource source) {
+    this.in = in;
+    this.source = source;
+  }
+
+  @Override
+  public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
+    Weight inner = in.createWeight(searcher, needsScores && source.needsScores(), 1f);
+    if (needsScores == false)
+      return inner;
+    return new FunctionScoreWeight(this, inner, source, boost);
+  }
+
+  @Override
+  public Query rewrite(IndexReader reader) throws IOException {
+    Query rewritten = in.rewrite(reader);
+    if (rewritten == in)
+      return this;
+    return new FunctionScoreQuery(rewritten, source);
+  }
+
+  @Override
+  public String toString(String field) {
+    return "FunctionScoreQuery(" + in.toString(field) + ", scored by " + source.toString() + ")";
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+    FunctionScoreQuery that = (FunctionScoreQuery) o;
+    return Objects.equals(in, that.in) &&
+        Objects.equals(source, that.source);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(in, source);
+  }
+
+  private static class FunctionScoreWeight extends Weight {
+
+    final Weight inner;
+    final DoubleValuesSource valueSource;
+    final float boost;
+
+    FunctionScoreWeight(Query query, Weight inner, DoubleValuesSource valueSource, float boost) {
+      super(query);
+      this.inner = inner;
+      this.valueSource = valueSource;
+      this.boost = boost;
+    }
+
+    @Override
+    public void extractTerms(Set<Term> terms) {
+      this.inner.extractTerms(terms);
+    }
+
+    @Override
+    public Explanation explain(LeafReaderContext context, int doc) throws IOException {
+      Scorer scorer = inner.scorer(context);
+      if (scorer.iterator().advance(doc) != doc)
+        return Explanation.noMatch("No match");
+      DoubleValues scores = valueSource.getValues(context, DoubleValuesSource.fromScorer(scorer));
+      scores.advanceExact(doc);
+      Explanation scoreExpl = scoreExplanation(context, doc, scores);
+      if (boost == 1f)
+        return scoreExpl;
+      return Explanation.match(scoreExpl.getValue() * boost, "product of:",
+          Explanation.match(boost, "boost"), scoreExpl);
+    }
+
+    private Explanation scoreExplanation(LeafReaderContext context, int doc, DoubleValues scores) throws IOException {
+      if (valueSource.needsScores() == false)
+        return Explanation.match((float) scores.doubleValue(), valueSource.toString());
+      float score = (float) scores.doubleValue();
+      return Explanation.match(score, "computed from:",
+          Explanation.match(score, valueSource.toString()),
+          inner.explain(context, doc));
+    }
+
+    @Override
+    public Scorer scorer(LeafReaderContext context) throws IOException {
+      Scorer in = inner.scorer(context);
+      if (in == null)
+        return null;
+      DoubleValues scores = valueSource.getValues(context, DoubleValuesSource.fromScorer(in));
+      return new FilterScorer(in) {
+        @Override
+        public float score() throws IOException {
+          if (scores.advanceExact(docID()))
+            return (float) (scores.doubleValue() * boost);
+          else
+            return 0;
+        }
+      };
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fc2e0fd1/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionMatchQuery.java
----------------------------------------------------------------------
diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionMatchQuery.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionMatchQuery.java
new file mode 100644
index 0000000..61faa15
--- /dev/null
+++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionMatchQuery.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.queries.function;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.QueryUtils;
+import org.apache.lucene.search.TopDocs;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestFunctionMatchQuery extends FunctionTestSetup {
+
+  static IndexReader reader;
+  static IndexSearcher searcher;
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    createIndex(true);
+    reader = DirectoryReader.open(dir);
+    searcher = new IndexSearcher(reader);
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    reader.close();
+  }
+
+  public void testRangeMatching() throws IOException {
+    DoubleValuesSource in = DoubleValuesSource.fromFloatField(FLOAT_FIELD);
+    FunctionMatchQuery fmq = new FunctionMatchQuery(in, d -> d >= 2 && d < 4);
+    TopDocs docs = searcher.search(fmq, 10);
+
+    assertEquals(2, docs.totalHits);
+    assertEquals(9, docs.scoreDocs[0].doc);
+    assertEquals(13, docs.scoreDocs[1].doc);
+
+    QueryUtils.check(random(), fmq, searcher, rarely());
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fc2e0fd1/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionScoreExplanations.java
----------------------------------------------------------------------
diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionScoreExplanations.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionScoreExplanations.java
new file mode 100644
index 0000000..5c64396
--- /dev/null
+++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionScoreExplanations.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.queries.function;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BaseExplanationTestCase;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BoostQuery;
+import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarities.ClassicSimilarity;
+
+public class TestFunctionScoreExplanations extends BaseExplanationTestCase {
+
+  public void testOneTerm() throws Exception {
+    Query q = new TermQuery(new Term(FIELD, "w1"));
+    FunctionScoreQuery fsq = new FunctionScoreQuery(q, DoubleValuesSource.constant(5));
+    qtest(fsq, new int[] { 0,1,2,3 });
+  }
+
+  public void testBoost() throws Exception {
+    Query q = new TermQuery(new Term(FIELD, "w1"));
+    FunctionScoreQuery csq = new FunctionScoreQuery(q, DoubleValuesSource.constant(5));
+    qtest(new BoostQuery(csq, 4), new int[] { 0,1,2,3 });
+  }
+
+  public void testTopLevelBoost() throws Exception {
+    Query q = new TermQuery(new Term(FIELD, "w1"));
+    FunctionScoreQuery csq = new FunctionScoreQuery(q, DoubleValuesSource.constant(5));
+    BooleanQuery.Builder bqB = new BooleanQuery.Builder();
+    bqB.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
+    bqB.add(csq, BooleanClause.Occur.MUST);
+    BooleanQuery bq = bqB.build();
+    qtest(new BoostQuery(bq, 6), new int[] { 0,1,2,3 });
+  }
+
+  public void testExplanationsIncludingScore() throws Exception {
+
+    DoubleValuesSource scores = DoubleValuesSource.function(DoubleValuesSource.SCORES, v -> v * 2);
+
+    Query q = new TermQuery(new Term(FIELD, "w1"));
+    FunctionScoreQuery csq = new FunctionScoreQuery(q, scores);
+
+    qtest(csq, new int[] { 0, 1, 2, 3 });
+
+    Explanation e1 = searcher.explain(q, 0);
+    Explanation e = searcher.explain(csq, 0);
+
+    assertEquals(e.getDetails().length, 2);
+
+    assertEquals(e1.getValue() * 2, e.getValue(), 0.00001);
+  }
+
+  public void testSubExplanations() throws IOException {
+    Query query = new FunctionScoreQuery(new MatchAllDocsQuery(), DoubleValuesSource.constant(5));
+    IndexSearcher searcher = newSearcher(BaseExplanationTestCase.searcher.getIndexReader());
+    searcher.setSimilarity(new BM25Similarity());
+
+    Explanation expl = searcher.explain(query, 0);
+    assertEquals("constant(5.0)", expl.getDescription());
+    assertEquals(0, expl.getDetails().length);
+
+    query = new BoostQuery(query, 2);
+    expl = searcher.explain(query, 0);
+    assertEquals(2, expl.getDetails().length);
+    // function
+    assertEquals(5f, expl.getDetails()[1].getValue(), 0f);
+    // boost
+    assertEquals("boost", expl.getDetails()[0].getDescription());
+    assertEquals(2f, expl.getDetails()[0].getValue(), 0f);
+
+    searcher.setSimilarity(new ClassicSimilarity()); // in order to have a queryNorm != 1
+    expl = searcher.explain(query, 0);
+    assertEquals(2, expl.getDetails().length);
+    // function
+    assertEquals(5f, expl.getDetails()[1].getValue(), 0f);
+    // boost
+    assertEquals("boost", expl.getDetails()[0].getDescription());
+    assertEquals(2f, expl.getDetails()[0].getValue(), 0f);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fc2e0fd1/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionScoreQuery.java
----------------------------------------------------------------------
diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionScoreQuery.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionScoreQuery.java
new file mode 100644
index 0000000..8f6ef8e
--- /dev/null
+++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionScoreQuery.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.queries.function;
+
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BoostQuery;
+import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryUtils;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestFunctionScoreQuery extends FunctionTestSetup {
+
+  static IndexReader reader;
+  static IndexSearcher searcher;
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    createIndex(true);
+    reader = DirectoryReader.open(dir);
+    searcher = new IndexSearcher(reader);
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    reader.close();
+  }
+
+  // FunctionQuery equivalent
+  public void testSimpleSourceScore() throws Exception {
+
+    FunctionScoreQuery q = new FunctionScoreQuery(new TermQuery(new Term(TEXT_FIELD, "first")),
+        DoubleValuesSource.fromIntField(INT_FIELD));
+
+    QueryUtils.check(random(), q, searcher, rarely());
+
+    int expectedDocs[] = new int[]{ 4, 7, 9 };
+    TopDocs docs = searcher.search(q, 4);
+    assertEquals(expectedDocs.length, docs.totalHits);
+    for (int i = 0; i < expectedDocs.length; i++) {
+      assertEquals(docs.scoreDocs[i].doc, expectedDocs[i]);
+    }
+
+  }
+
+  // CustomScoreQuery and BoostedQuery equivalent
+  public void testScoreModifyingSource() throws Exception {
+
+    DoubleValuesSource iii = DoubleValuesSource.fromIntField("iii");
+    DoubleValuesSource score = DoubleValuesSource.scoringFunction(iii, (v, s) -> v * s);
+
+    BooleanQuery bq = new BooleanQuery.Builder()
+        .add(new TermQuery(new Term(TEXT_FIELD, "first")), BooleanClause.Occur.SHOULD)
+        .add(new TermQuery(new Term(TEXT_FIELD, "text")), BooleanClause.Occur.SHOULD)
+        .build();
+    TopDocs plain = searcher.search(bq, 1);
+
+    FunctionScoreQuery fq = new FunctionScoreQuery(bq, score);
+
+    QueryUtils.check(random(), fq, searcher, rarely());
+
+    int[] expectedDocs = new int[]{ 4, 7, 9, 8, 12 };
+    TopDocs docs = searcher.search(fq, 5);
+    assertEquals(plain.totalHits, docs.totalHits);
+    for (int i = 0; i < expectedDocs.length; i++) {
+      assertEquals(expectedDocs[i], docs.scoreDocs[i].doc);
+
+    }
+
+  }
+
+  // check boosts with non-distributive score source
+  public void testBoostsAreAppliedLast() throws Exception {
+
+    DoubleValuesSource scores
+        = DoubleValuesSource.function(DoubleValuesSource.SCORES, v -> Math.log(v + 4));
+
+    Query q1 = new FunctionScoreQuery(new TermQuery(new Term(TEXT_FIELD, "text")), scores);
+    TopDocs plain = searcher.search(q1, 5);
+
+    Query boosted = new BoostQuery(q1, 2);
+    TopDocs afterboost = searcher.search(boosted, 5);
+    assertEquals(plain.totalHits, afterboost.totalHits);
+    for (int i = 0; i < 5; i++) {
+      assertEquals(plain.scoreDocs[i].doc, afterboost.scoreDocs[i].doc);
+      assertEquals(plain.scoreDocs[i].score, afterboost.scoreDocs[i].score / 2, 0.0001);
+    }
+
+  }
+
+}

[03/23] lucene-solr:jira/solr-8593: use captureState and restoreState instead of cloneAttributes

Posted by kr...@apache.org.

use captureState and restoreState instead of cloneAttributes


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/01f2a87c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/01f2a87c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/01f2a87c

Branch: refs/heads/jira/solr-8593
Commit: 01f2a87c67392a86b533d0c76ba7666845d1945f
Parents: 6570e6e
Author: Nathan Gass <ga...@search.ch>
Authored: Fri Jan 13 15:54:07 2017 +0100
Committer: Nathan Gass <ga...@search.ch>
Committed: Fri Jan 13 15:54:07 2017 +0100

----------------------------------------------------------------------
 .../apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java  | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/01f2a87c/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
index 303b7e320..df12fda 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
@@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.AttributeSource.State;
 
 /**
  * Tokenizes the given token into n-grams of given size(s).
@@ -43,7 +43,7 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
   private int curCodePointCount;
   private int curGramSize;
   private int savePosIncr;
-  private AttributeSource attributes;
+  private State state;
   
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
@@ -81,15 +81,14 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
           curTermLength = termAtt.length();
           curCodePointCount = Character.codePointCount(termAtt, 0, termAtt.length());
           curGramSize = minGram;
-          attributes = input.cloneAttributes();
+          state = captureState();
           savePosIncr += posIncrAtt.getPositionIncrement();
         }
       }
       if (curGramSize <= maxGram) {         // if we have hit the end of our n-gram size range, quit
         if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams
           // grab gramSize chars from front or back
-          clearAttributes();
-          attributes.copyTo(this);
+	      restoreState(state);
           // first ngram gets increment, others don't
           if (curGramSize == minGram) {
             posIncrAtt.setPositionIncrement(savePosIncr);

[11/23] lucene-solr:jira/solr-8593: SOLR-9893: Update Mockito to version 2.6.2 for Java 9 support. Disable all legacy EasyMock tests when running on Java 9 until they were migrated to Mockito

Posted by kr...@apache.org.

SOLR-9893: Update Mockito to version 2.6.2 for Java 9 support. Disable all legacy EasyMock tests when running on Java 9 until they were migrated to Mockito


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4eafdb33
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4eafdb33
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4eafdb33

Branch: refs/heads/jira/solr-8593
Commit: 4eafdb337ab88553506e15bc186cd06464ea14f9
Parents: e2c41af
Author: Uwe Schindler <us...@apache.org>
Authored: Sat Jan 14 12:43:24 2017 +0100
Committer: Uwe Schindler <us...@apache.org>
Committed: Sat Jan 14 12:43:24 2017 +0100

----------------------------------------------------------------------
 lucene/ivy-versions.properties                  |   5 +-
 solr/CHANGES.txt                                |   3 +
 .../handler/dataimport/TestJdbcDataSource.java  |   7 +
 solr/contrib/morphlines-core/ivy.xml            |   8 +-
 solr/core/ivy.xml                               |   4 +-
 .../org/apache/solr/cloud/ClusterStateTest.java |   8 +
 ...verseerCollectionConfigSetProcessorTest.java |   4 +-
 .../solr/core/BlobRepositoryMockingTest.java    |   9 +-
 .../org/apache/solr/core/CoreSorterTest.java    |   7 +
 .../security/TestPKIAuthenticationPlugin.java   |   8 +
 .../solr/servlet/SolrRequestParserTest.java     |   3 +
 solr/licenses/byte-buddy-1.6.2.jar.sha1         |   1 +
 solr/licenses/byte-buddy-LICENSE-ASL.txt        | 202 +++++++++++++++++++
 solr/licenses/byte-buddy-NOTICE.txt             |   4 +
 solr/licenses/mockito-core-1.9.5.jar.sha1       |   1 -
 solr/licenses/mockito-core-2.6.2.jar.sha1       |   1 +
 solr/licenses/objenesis-1.2.jar.sha1            |   1 -
 solr/licenses/objenesis-2.4.jar.sha1            |   1 +
 .../solrj/impl/CloudSolrClientCacheTest.java    |   7 +
 19 files changed, 275 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/lucene/ivy-versions.properties
----------------------------------------------------------------------
diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties
index 297d46c..770649c 100644
--- a/lucene/ivy-versions.properties
+++ b/lucene/ivy-versions.properties
@@ -96,6 +96,7 @@ io.netty.netty-all.version = 4.0.36.Final
 /mecab/mecab-ipadic = 2.7.0-20070801
 /mecab/mecab-naist-jdic = 0.6.3b-20111013
 /net.arnx/jsonic = 1.2.7
+/net.bytebuddy/byte-buddy = 1.6.2
 /net.sf.ehcache/ehcache-core = 2.4.4
 /net.sf.saxon/Saxon-HE = 9.6.0-2
 /net.sourceforge.argparse4j/argparse4j = 0.4.3
@@ -287,14 +288,14 @@ org.kitesdk.kite-morphlines.version = 1.1.0
 
 /org.locationtech.spatial4j/spatial4j = 0.6
 
-/org.mockito/mockito-core = 1.9.5
+/org.mockito/mockito-core = 2.6.2
 
 org.mortbay.jetty.version = 6.1.26
 /org.mortbay.jetty/jetty = ${org.mortbay.jetty.version}
 /org.mortbay.jetty/jetty-util = ${org.mortbay.jetty.version}
 
 /org.noggit/noggit = 0.6
-/org.objenesis/objenesis = 1.2
+/org.objenesis/objenesis = 2.4
 
 org.ow2.asm.version = 5.1
 /org.ow2.asm/asm = ${org.ow2.asm.version}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index ccc5d7c..f8ecf23 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -437,6 +437,9 @@ Other Changes
 * SOLR-9934: SolrTestCase.clearIndex has been improved to take advantage of low level test specific logic that
   clears the index metadata more completely then a normal *:* DBQ can due to update versioning.  (hossman)
 
+* SOLR-9893: Update Mockito to version 2.6.2 for Java 9 support. Disable all legacy EasyMock tests when running
+  on Java 9 until they were migrated to Mockito. (Uwe Schindler)
+
 ==================  6.3.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java
----------------------------------------------------------------------
diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java
index 01340bc..12e34be 100644
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java
@@ -35,11 +35,13 @@ import java.util.Properties;
 
 import javax.sql.DataSource;
 
+import org.apache.lucene.util.Constants;
 import org.apache.solr.handler.dataimport.JdbcDataSource.ResultSetIterator;
 import org.easymock.EasyMock;
 import org.easymock.IMocksControl;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.BeforeClass;
 import org.junit.Ignore;
 import org.junit.Test;
 
@@ -68,6 +70,11 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase {
   Properties props = new Properties();
 
   String sysProp = System.getProperty("java.naming.factory.initial");
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    assumeFalse("SOLR-9893: EasyMock does not work with Java 9", Constants.JRE_IS_MINIMUM_JAVA9);
+  }
 
   @Override
   @Before

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/contrib/morphlines-core/ivy.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/ivy.xml b/solr/contrib/morphlines-core/ivy.xml
index f7fd005..ad47aec 100644
--- a/solr/contrib/morphlines-core/ivy.xml
+++ b/solr/contrib/morphlines-core/ivy.xml
@@ -69,13 +69,17 @@
 
     <dependency org="aopalliance" name="aopalliance" rev="${/aopalliance/aopalliance}" conf="test" />
     <dependency org="com.sun.xml.bind" name="jaxb-impl" rev="${/com.sun.xml.bind/jaxb-impl}" conf="test" />
-    <dependency org="org.objenesis" name="objenesis" rev="${/org.objenesis/objenesis}" conf="test" />
     <dependency org="io.netty" name="netty-all" rev="${/io.netty/netty-all}" conf="test" />
     <dependency org="org.apache.mrunit" name="mrunit" rev="${/org.apache.mrunit/mrunit}" conf="test">
       <artifact name="mrunit" maven:classifier="hadoop2" />
       <exclude org="log4j" module="log4j" />
     </dependency>
-    <dependency org="org.mockito" name="mockito-core" rev="${/org.mockito/mockito-core}" conf="test" />
+    
+    <!-- Mocking -->
+    <dependency org="org.mockito" name="mockito-core" rev="${/org.mockito/mockito-core}" conf="test"/>
+    <dependency org="net.bytebuddy" name="byte-buddy" rev="${/net.bytebuddy/byte-buddy}" conf="test"/>
+    <dependency org="org.objenesis" name="objenesis" rev="${/org.objenesis/objenesis}" conf="test"/>
+    
     <dependency org="commons-collections" name="commons-collections" rev="${/commons-collections/commons-collections}" conf="test" />
 
     <!-- FasterXml Jackson Dependencies -->

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/core/ivy.xml
----------------------------------------------------------------------
diff --git a/solr/core/ivy.xml b/solr/core/ivy.xml
index 67e4379..074e35f 100644
--- a/solr/core/ivy.xml
+++ b/solr/core/ivy.xml
@@ -51,11 +51,13 @@
     <dependency org="org.slf4j" name="slf4j-log4j12" rev="${/org.slf4j/slf4j-log4j12}" conf="compile"/>
     <dependency org="org.slf4j" name="jcl-over-slf4j" rev="${/org.slf4j/jcl-over-slf4j}" conf="compile"/>
 
+    <!-- TODO: Nuke those 2 deps, please!!!! -->
     <dependency org="org.easymock" name="easymock" rev="${/org.easymock/easymock}" conf="test"/>
     <dependency org="cglib" name="cglib-nodep" rev="${/cglib/cglib-nodep}" conf="test"/>
-    <dependency org="org.objenesis" name="objenesis" rev="${/org.objenesis/objenesis}" conf="test"/>
 
     <dependency org="org.mockito" name="mockito-core" rev="${/org.mockito/mockito-core}" conf="test"/>
+    <dependency org="net.bytebuddy" name="byte-buddy" rev="${/net.bytebuddy/byte-buddy}" conf="test"/>
+    <dependency org="org.objenesis" name="objenesis" rev="${/org.objenesis/objenesis}" conf="test"/>
 
     <dependency org="com.fasterxml.jackson.core" name="jackson-core" rev="${/com.fasterxml.jackson.core/jackson-core}" conf="compile"/>
     <dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="${/com.fasterxml.jackson.core/jackson-databind}" conf="test"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/core/src/test/org/apache/solr/cloud/ClusterStateTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ClusterStateTest.java b/solr/core/src/test/org/apache/solr/cloud/ClusterStateTest.java
index 704c877..5911cbe 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ClusterStateTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ClusterStateTest.java
@@ -21,6 +21,7 @@ import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.lucene.util.Constants;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
@@ -30,11 +31,18 @@ import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.util.Utils;
 import org.easymock.EasyMock;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 import static org.easymock.EasyMock.createMock;
 
 public class ClusterStateTest extends SolrTestCaseJ4 {
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    assumeFalse("SOLR-9893: EasyMock does not work with Java 9", Constants.JRE_IS_MINIMUM_JAVA9);
+  }
+
   @Test
   public void testStoreAndRead() throws Exception {
     Map<String,DocCollection> collectionStates = new HashMap<>();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
index 6a7906d..d9f53af 100644
--- a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
@@ -23,6 +23,7 @@ import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.function.Predicate;
 
+import org.apache.lucene.util.Constants;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.client.solrj.SolrResponse;
 import org.apache.solr.client.solrj.response.QueryResponse;
@@ -103,9 +104,10 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
     }
     
   }
-  
+
   @BeforeClass
   public static void setUpOnce() throws Exception {
+    assumeFalse("SOLR-9893: EasyMock does not work with Java 9", Constants.JRE_IS_MINIMUM_JAVA9);
     workQueueMock = createMock(OverseerTaskQueue.class);
     runningMapMock = createMock(DistributedMap.class);
     completedMapMock = createMock(DistributedMap.class);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/core/src/test/org/apache/solr/core/BlobRepositoryMockingTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/core/BlobRepositoryMockingTest.java b/solr/core/src/test/org/apache/solr/core/BlobRepositoryMockingTest.java
index 6da1367..355cb61 100644
--- a/solr/core/src/test/org/apache/solr/core/BlobRepositoryMockingTest.java
+++ b/solr/core/src/test/org/apache/solr/core/BlobRepositoryMockingTest.java
@@ -25,10 +25,13 @@ import java.nio.charset.Charset;
 import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.commons.io.IOUtils;
+import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.common.SolrException;
 import org.easymock.EasyMock;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 import static org.easymock.EasyMock.anyObject;
@@ -64,7 +67,11 @@ public class BlobRepositoryMockingTest {
   boolean blobFetched = false;
   String blobKey = "";
 
-
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    LuceneTestCase.assumeFalse("SOLR-9893: EasyMock does not work with Java 9", Constants.JRE_IS_MINIMUM_JAVA9);
+  }
+  
   @Before
   public void setUp() throws IllegalAccessException, NoSuchFieldException {
     blobFetched = false;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java b/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java
index dda437a..f0c4b8c 100644
--- a/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java
+++ b/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java
@@ -27,6 +27,7 @@ import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
 
+import org.apache.lucene.util.Constants;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.cloud.CloudDescriptor;
 import org.apache.solr.cloud.ZkController;
@@ -35,6 +36,7 @@ import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.CoreSorter.CountsForEachShard;
 import org.apache.solr.util.MockCoreContainer;
+import org.junit.BeforeClass;
 
 import static java.util.stream.Collectors.toList;
 import static org.apache.solr.core.CoreSorter.getShardName;
@@ -47,6 +49,11 @@ public class CoreSorterTest extends SolrTestCaseJ4 {
   Map<String, Boolean> nodes = new LinkedHashMap<>();
   Set<String> liveNodes = new HashSet<>();
 
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    assumeFalse("SOLR-9893: EasyMock does not work with Java 9", Constants.JRE_IS_MINIMUM_JAVA9);
+  }
+  
   public void testComparator() {
     List<CountsForEachShard> l = new ArrayList<>();
     //                           DOWN LIVE  MY

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/core/src/test/org/apache/solr/security/TestPKIAuthenticationPlugin.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/security/TestPKIAuthenticationPlugin.java b/solr/core/src/test/org/apache/solr/security/TestPKIAuthenticationPlugin.java
index 1eb1d21..f602b1b 100644
--- a/solr/core/src/test/org/apache/solr/security/TestPKIAuthenticationPlugin.java
+++ b/solr/core/src/test/org/apache/solr/security/TestPKIAuthenticationPlugin.java
@@ -28,6 +28,7 @@ import java.util.concurrent.atomic.AtomicReference;
 import org.apache.http.Header;
 import org.apache.http.auth.BasicUserPrincipal;
 import org.apache.http.message.BasicHttpRequest;
+import org.apache.lucene.util.Constants;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.core.CoreContainer;
@@ -36,6 +37,8 @@ import org.apache.solr.request.SolrRequestInfo;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.util.CryptoKeys;
 import org.easymock.EasyMock;
+import org.junit.BeforeClass;
+
 import static org.easymock.EasyMock.getCurrentArguments;
 
 public class TestPKIAuthenticationPlugin extends SolrTestCaseJ4 {
@@ -70,6 +73,11 @@ public class TestPKIAuthenticationPlugin extends SolrTestCaseJ4 {
     }
   }
 
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    assumeFalse("SOLR-9893: EasyMock does not work with Java 9", Constants.JRE_IS_MINIMUM_JAVA9);
+  }
+  
   public void test() throws Exception {
     AtomicReference<Principal> principal = new AtomicReference<>();
     String nodeName = "node_x_233";

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java b/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java
index 53ef7a6..b9e1e4a 100644
--- a/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java
+++ b/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java
@@ -42,6 +42,7 @@ import javax.servlet.http.HttpServletRequest;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
+import org.apache.lucene.util.Constants;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.CommonParams;
@@ -66,6 +67,8 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 {
 
   @BeforeClass
   public static void beforeClass() throws Exception {
+    assumeFalse("SOLR-9893: EasyMock does not work with Java 9", Constants.JRE_IS_MINIMUM_JAVA9);
+    
     initCore("solrconfig.xml", "schema.xml");
     parser = new SolrRequestParsers( h.getCore().getSolrConfig() );
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/licenses/byte-buddy-1.6.2.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/byte-buddy-1.6.2.jar.sha1 b/solr/licenses/byte-buddy-1.6.2.jar.sha1
new file mode 100644
index 0000000..af3a381
--- /dev/null
+++ b/solr/licenses/byte-buddy-1.6.2.jar.sha1
@@ -0,0 +1 @@
+f58a01d36e24a94241d44d52c78e380396d5adb2

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/licenses/byte-buddy-LICENSE-ASL.txt
----------------------------------------------------------------------
diff --git a/solr/licenses/byte-buddy-LICENSE-ASL.txt b/solr/licenses/byte-buddy-LICENSE-ASL.txt
new file mode 100644
index 0000000..e06d208
--- /dev/null
+++ b/solr/licenses/byte-buddy-LICENSE-ASL.txt
@@ -0,0 +1,202 @@
+Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright {yyyy} {name of copyright owner}
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/licenses/byte-buddy-NOTICE.txt
----------------------------------------------------------------------
diff --git a/solr/licenses/byte-buddy-NOTICE.txt b/solr/licenses/byte-buddy-NOTICE.txt
new file mode 100644
index 0000000..731a995
--- /dev/null
+++ b/solr/licenses/byte-buddy-NOTICE.txt
@@ -0,0 +1,4 @@
+Byte Buddy is a code generation and manipulation library for creating and modifying Java
+classes during the runtime of a Java application and without the help of a compiler.
+
+Copyright 2014 Rafael Winterhalter

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/licenses/mockito-core-1.9.5.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/mockito-core-1.9.5.jar.sha1 b/solr/licenses/mockito-core-1.9.5.jar.sha1
deleted file mode 100644
index 5de9041..0000000
--- a/solr/licenses/mockito-core-1.9.5.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-c3264abeea62c4d2f367e21484fbb40c7e256393

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/licenses/mockito-core-2.6.2.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/mockito-core-2.6.2.jar.sha1 b/solr/licenses/mockito-core-2.6.2.jar.sha1
new file mode 100644
index 0000000..f130b90
--- /dev/null
+++ b/solr/licenses/mockito-core-2.6.2.jar.sha1
@@ -0,0 +1 @@
+9eeaa7c2a971cd4738e1b9391a38ba4f21f05763

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/licenses/objenesis-1.2.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/objenesis-1.2.jar.sha1 b/solr/licenses/objenesis-1.2.jar.sha1
deleted file mode 100644
index 1252cc6..0000000
--- a/solr/licenses/objenesis-1.2.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-bfcb0539a071a4c5a30690388903ac48c0667f2a

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/licenses/objenesis-2.4.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/objenesis-2.4.jar.sha1 b/solr/licenses/objenesis-2.4.jar.sha1
new file mode 100644
index 0000000..278f7dd
--- /dev/null
+++ b/solr/licenses/objenesis-2.4.jar.sha1
@@ -0,0 +1 @@
+2916b6c96b50c5b3ec4452ed99401db745aabb27

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4eafdb33/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java
index 415c658..c144890 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java
@@ -31,17 +31,24 @@ import java.util.function.Function;
 
 import com.google.common.collect.ImmutableSet;
 import org.apache.http.NoHttpResponseException;
+import org.apache.lucene.util.Constants;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.util.NamedList;
 import org.easymock.EasyMock;
+import org.junit.BeforeClass;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 
 public class CloudSolrClientCacheTest extends SolrTestCaseJ4 {
 
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    assumeFalse("SOLR-9893: EasyMock does not work with Java 9", Constants.JRE_IS_MINIMUM_JAVA9);
+  }
+  
   public void testCaching() throws Exception {
     String collName = "gettingstarted";
     Set<String> livenodes = new HashSet<>();

[16/23] lucene-solr:jira/solr-8593: Add 6.5 version also in trunk; sync up CHANGES.txt

Posted by kr...@apache.org.

Add 6.5 version also in trunk; sync up CHANGES.txt


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ceaeb42a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ceaeb42a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ceaeb42a

Branch: refs/heads/jira/solr-8593
Commit: ceaeb42a1f31eff6e7f96c5d55a1c21449fbf07f
Parents: c64a011
Author: Uwe Schindler <us...@apache.org>
Authored: Mon Jan 16 11:50:28 2017 +0100
Committer: Uwe Schindler <us...@apache.org>
Committed: Mon Jan 16 11:50:28 2017 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                                 | 17 +++++++----------
 .../src/java/org/apache/lucene/util/Version.java   |  7 +++++++
 solr/CHANGES.txt                                   | 17 +++++++++++++++++
 3 files changed, 31 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ceaeb42a/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 530b0d4..4963454 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -121,8 +121,7 @@ New features
 
 * LUCENE-5325: Added LongValuesSource and DoubleValuesSource, intended as
   type-safe replacements for ValueSource in the queries module.  These
-  expose per-segment LongValues or DoubleValues iterators, similar to the
-  existing DocValues iterator API. (Alan Woodward, Adrien Grand)
+  expose per-segment LongValues or DoubleValues iterators. (Alan Woodward, Adrien Grand)
 
 * LUCENE-7603: Graph token streams are now handled accurately by query
   parsers, by enumerating all paths and creating the corresponding
@@ -180,6 +179,9 @@ Bug Fixes
 
 Improvements
 
+* LUCENE-7532: Add back lost codec file format documentation
+  (Shinichiro Abe via Mike McCandless)
+
 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
   PhraseQuery or MultiPhraseQuery when the word automaton is simple
   (Mike McCandless)
@@ -206,14 +208,14 @@ Improvements
   which can be overridden to return false to eek out more speed in some cases.
   (Timothy M. Rodriguez, David Smiley)
 
-* LUCENE-7537: Index time sorting now supports multi-valued sorts
-  using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless)
-
 * LUCENE-7560: QueryBuilder.createFieldQuery is no longer final,
   giving custom query parsers subclassing QueryBuilder more freedom to
   control how text is analyzed and converted into a query (Matt Weber
   via Mike McCandless)
 
+* LUCENE-7537: Index time sorting now supports multi-valued sorts
+  using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless)
+
 * LUCENE-7575: UnifiedHighlighter can now highlight fields with queries that don't
   necessarily refer to that field (AKA requireFieldMatch==false). Disabled by default.
   See UH get/setFieldMatcher. (Jim Ferenczi via David Smiley)
@@ -254,11 +256,6 @@ Optimizations
 
 * LUCENE-7572: Doc values queries now cache their hash code. (Adrien Grand)
 
-* LUCENE-7579: Segments are now also sorted during flush, and merging
-  on a sorted index is substantially faster by using some of the same
-  bulk merge optimizations that non-sorted merging uses (Jim Ferenczi
-  via Mike McCandless)
-
 Other
 
 * LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ceaeb42a/lucene/core/src/java/org/apache/lucene/util/Version.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/Version.java b/lucene/core/src/java/org/apache/lucene/util/Version.java
index 2355a9a..00fb329 100644
--- a/lucene/core/src/java/org/apache/lucene/util/Version.java
+++ b/lucene/core/src/java/org/apache/lucene/util/Version.java
@@ -81,6 +81,13 @@ public final class Version {
   public static final Version LUCENE_6_4_0 = new Version(6, 4, 0);
 
   /**
+   * Match settings and bugs in Lucene's 6.5.0 release.
+   * @deprecated Use latest
+   */
+  @Deprecated
+  public static final Version LUCENE_6_5_0 = new Version(6, 5, 0);
+
+  /**
    * Match settings and bugs in Lucene's 7.0.0 release.
    *  <p>
    *  Use this to get the latest &amp; greatest settings, bug

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ceaeb42a/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f8ecf23..4874067 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -77,6 +77,23 @@ Optimizations
 * SOLR-9941: Clear the deletes lists at UpdateLog before replaying from log. This prevents redundantly pre-applying
   DBQs, during the log replay, to every update in the log as if the DBQs were out of order. (hossman, Ishan Chattopadhyaya)
 
+==================  6.5.0 ==================
+
+Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
+
+Versions of Major Components
+---------------------
+Apache Tika 1.13
+Carrot2 3.15.0
+Velocity 1.7 and Velocity Tools 2.0
+Apache UIMA 2.3.1
+Apache ZooKeeper 3.4.6
+Jetty 9.3.14.v20161028
+
+
+(No Changes)
+
+
 ==================  6.4.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

[06/23] lucene-solr:jira/solr-8593: LUCENE-7626: IndexWriter no longer accepts broken offsets

Posted by kr...@apache.org.

LUCENE-7626: IndexWriter no longer accepts broken offsets


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/64b86331
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/64b86331
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/64b86331

Branch: refs/heads/jira/solr-8593
Commit: 64b86331c29d074fa7b257d65d3fda3b662bf96a
Parents: 5b3565e
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Jan 13 17:46:02 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Jan 13 17:46:02 2017 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   3 +
 .../miscellaneous/FixBrokenOffsetsFilter.java   |  78 ++++++++++++
 .../FixBrokenOffsetsFilterFactory.java          |  39 ++++++
 ...ache.lucene.analysis.util.TokenFilterFactory |   1 +
 .../TestFixBrokenOffsetsFilter.java             |  50 ++++++++
 .../apache/lucene/index/FixBrokenOffsets.java   | 125 +++++++++++++++++++
 .../java/org/apache/lucene/index/package.html   |  27 ++++
 .../lucene/index/TestFixBrokenOffsets.java      | 114 +++++++++++++++++
 .../lucene/index/index.630.brokenoffsets.zip    | Bin 0 -> 3203 bytes
 .../org/apache/lucene/index/CheckIndex.java     |  29 +++--
 .../lucene/index/DefaultIndexingChain.java      |  20 ++-
 .../org/apache/lucene/index/TestCheckIndex.java |   5 -
 .../search/highlight/TokenSourcesTest.java      |   2 +-
 .../lucene/search/TestTermAutomatonQuery.java   |   3 +
 .../index/BaseTermVectorsFormatTestCase.java    |  17 +--
 .../apache/lucene/index/BaseTestCheckIndex.java |  19 ---
 .../java/org/apache/lucene/util/TestUtil.java   |   4 +-
 .../apache/solr/schema/PreAnalyzedField.java    |  11 ++
 .../solr/index/hdfs/CheckHdfsIndexTest.java     |   5 -
 19 files changed, 480 insertions(+), 72 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 040f4e0..30943d2 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -29,6 +29,9 @@ API Changes
 
 Bug Fixes
 
+* LUCENE-7626: IndexWriter will no longer accept broken token offsets
+  (Mike McCandless)
+
 Improvements
 
 * LUCENE-7489: Better storage of sparse doc-values fields with the default

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FixBrokenOffsetsFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FixBrokenOffsetsFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FixBrokenOffsetsFilter.java
new file mode 100644
index 0000000..b0a6b1d
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FixBrokenOffsetsFilter.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+
+/** 
+ * A filter to correct offsets that illegally go backwards.
+ *
+ * @deprecated Fix the token filters that create broken offsets in the first place.
+ */
+@Deprecated
+public final class FixBrokenOffsetsFilter extends TokenFilter {
+
+  private int lastStartOffset;
+  private int lastEndOffset;
+
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+  public FixBrokenOffsetsFilter(TokenStream in) {
+    super(in);
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken() == false) {
+      return false;
+    }
+    fixOffsets();
+    return true;
+  }
+
+  @Override
+  public void end() throws IOException {
+    super.end();
+    fixOffsets();
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    lastStartOffset = 0;
+    lastEndOffset = 0;
+  }
+
+  private void fixOffsets() {
+    int startOffset = offsetAtt.startOffset();
+    int endOffset = offsetAtt.endOffset();
+    if (startOffset < lastStartOffset) {
+      startOffset = lastStartOffset;
+    }
+    if (endOffset < startOffset) {
+      endOffset = startOffset;
+    }
+    offsetAtt.setOffset(startOffset, endOffset);
+    lastStartOffset = startOffset;
+    lastEndOffset = endOffset;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FixBrokenOffsetsFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FixBrokenOffsetsFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FixBrokenOffsetsFilterFactory.java
new file mode 100644
index 0000000..8484b8c
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FixBrokenOffsetsFilterFactory.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link FixBrokenOffsetsFilter}.
+ */
+public class FixBrokenOffsetsFilterFactory extends TokenFilterFactory {
+
+  /** Sole constructor */
+  public FixBrokenOffsetsFilterFactory(Map<String,String> args) {
+    super(args);
+  }
+
+  @Override
+  public TokenStream create(TokenStream input) {
+    return new FixBrokenOffsetsFilter(input);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
index 73986d7..5f8894c 100644
--- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
+++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
@@ -64,6 +64,7 @@ org.apache.lucene.analysis.miscellaneous.CapitalizationFilterFactory
 org.apache.lucene.analysis.miscellaneous.CodepointCountFilterFactory
 org.apache.lucene.analysis.miscellaneous.DateRecognizerFilterFactory
 org.apache.lucene.analysis.miscellaneous.FingerprintFilterFactory
+org.apache.lucene.analysis.miscellaneous.FixBrokenOffsetsFilterFactory
 org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilterFactory
 org.apache.lucene.analysis.miscellaneous.KeepWordFilterFactory
 org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilterFactory

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestFixBrokenOffsetsFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestFixBrokenOffsetsFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestFixBrokenOffsetsFilter.java
new file mode 100644
index 0000000..ada5014
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestFixBrokenOffsetsFilter.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CannedTokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+
+public class TestFixBrokenOffsetsFilter extends BaseTokenStreamTestCase {
+
+  public void testBogusTermVectors() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
+    Document doc = new Document();
+    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+    ft.setStoreTermVectors(true);
+    ft.setStoreTermVectorOffsets(true);
+    Field field = new Field("foo", "", ft);
+    field.setTokenStream(new FixBrokenOffsetsFilter(new CannedTokenStream(
+        new Token("bar", 5, 10), new Token("bar", 1, 4)
+        )));
+    doc.add(field);
+    iw.addDocument(doc);
+    iw.close();
+    dir.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/backward-codecs/src/java/org/apache/lucene/index/FixBrokenOffsets.java
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/index/FixBrokenOffsets.java b/lucene/backward-codecs/src/java/org/apache/lucene/index/FixBrokenOffsets.java
new file mode 100644
index 0000000..d4d6f85
--- /dev/null
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/index/FixBrokenOffsets.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.SuppressForbidden;
+
+/**
+ * Command-line tool that reads from a source index and
+ * writes to a dest index, correcting any broken offsets
+ * in the process.
+ *
+ * @lucene.experimental
+ */
+public class FixBrokenOffsets {
+  public SegmentInfos infos;
+
+  FSDirectory fsDir;
+
+  Path dir;
+
+  @SuppressForbidden(reason = "System.out required: command line tool")
+  public static void main(String[] args) throws IOException {
+    if (args.length < 2) {
+      System.err.println("Usage: FixBrokenOffsetse <srcDir> <destDir>");
+      return;
+    }
+    Path srcPath = Paths.get(args[0]);
+    if (!Files.exists(srcPath)) {
+      throw new RuntimeException("srcPath " + srcPath.toAbsolutePath() + " doesn't exist");
+    }
+    Path destPath = Paths.get(args[1]);
+    if (Files.exists(destPath)) {
+      throw new RuntimeException("destPath " + destPath.toAbsolutePath() + " already exists; please remove it and re-run");
+    }
+    Directory srcDir = FSDirectory.open(srcPath);
+    DirectoryReader reader = DirectoryReader.open(srcDir);
+
+    List<LeafReaderContext> leaves = reader.leaves();
+    CodecReader[] filtered = new CodecReader[leaves.size()];
+    for(int i=0;i<leaves.size();i++) {
+      filtered[i] = SlowCodecReaderWrapper.wrap(new FilterLeafReader(leaves.get(i).reader()) {
+          @Override
+          public Fields getTermVectors(int docID) throws IOException {
+            Fields termVectors = in.getTermVectors(docID);
+            if (termVectors == null) {
+              return null;
+            }
+            return new FilterFields(termVectors) {
+              @Override
+              public Terms terms(String field) throws IOException {
+                return new FilterTerms(super.terms(field)) {
+                  @Override
+                  public TermsEnum iterator() throws IOException {
+                    return new FilterTermsEnum(super.iterator()) {
+                      @Override
+                      public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
+                        return new FilterPostingsEnum(super.postings(reuse, flags)) {
+                          int nextLastStartOffset = 0;
+                          int lastStartOffset = 0;
+
+                          @Override
+                          public int nextPosition() throws IOException {
+                            int pos = super.nextPosition();
+                            lastStartOffset = nextLastStartOffset;
+                            nextLastStartOffset = startOffset();
+                            return pos;
+                          }
+                          
+                          @Override
+                          public int startOffset() throws IOException {
+                            int offset = super.startOffset();
+                            if (offset < lastStartOffset) {
+                              offset = lastStartOffset;
+                            }
+                            return offset;
+                          }
+                          
+                          @Override
+                          public int endOffset() throws IOException {
+                            int offset = super.endOffset();
+                            if (offset < lastStartOffset) {
+                              offset = lastStartOffset;
+                            }
+                            return offset;
+                          }
+                        };
+                      }
+                    };
+                  }
+                };
+              }
+            };
+          }
+        });
+    }
+
+    Directory destDir = FSDirectory.open(destPath);
+    IndexWriter writer = new IndexWriter(destDir, new IndexWriterConfig());
+    writer.addIndexes(filtered);
+    IOUtils.close(writer, reader, srcDir, destDir);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/backward-codecs/src/java/org/apache/lucene/index/package.html
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/index/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/index/package.html
new file mode 100644
index 0000000..42ff91a
--- /dev/null
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/index/package.html
@@ -0,0 +1,27 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- not a package-info.java, because we already defined this package in core/ -->
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+  <title>Tools for handling backwards compatibility issues with indices.</title>
+</head>
+<body>
+Tools for handling backwards compatibility issues with indices.
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
new file mode 100644
index 0000000..bcd5a65
--- /dev/null
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+import java.util.List;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+public class TestFixBrokenOffsets extends LuceneTestCase {
+
+  // Run this in Lucene 6.x:
+  //
+  //     ant test -Dtestcase=TestFixBrokenOffsets -Dtestmethod=testCreateBrokenOffsetsIndex -Dtests.codec=default -Dtests.useSecurityManager=false
+  /*
+  public void testCreateBrokenOffsetsIndex() throws IOException {
+
+    Path indexDir = Paths.get("/tmp/brokenoffsets");
+    Files.deleteIfExists(indexDir);
+    Directory dir = newFSDirectory(indexDir);
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+
+    Document doc = new Document();
+    FieldType fieldType = new FieldType(TextField.TYPE_STORED);
+    fieldType.setStoreTermVectors(true);
+    fieldType.setStoreTermVectorPositions(true);
+    fieldType.setStoreTermVectorOffsets(true);
+    Field field = new Field("foo", "bar", fieldType);
+    field.setTokenStream(new CannedTokenStream(new Token("foo", 10, 13), new Token("foo", 7, 9)));
+    doc.add(field);
+    writer.addDocument(doc);
+    writer.commit();
+
+    // 2nd segment
+    doc = new Document();
+    field = new Field("foo", "bar", fieldType);
+    field.setTokenStream(new CannedTokenStream(new Token("bar", 15, 17), new Token("bar", 1, 5)));
+    doc.add(field);
+    writer.addDocument(doc);
+    
+    writer.close();
+
+    dir.close();
+  }
+  */
+
+  public void testFixBrokenOffsetsIndex() throws IOException {
+    InputStream resource = getClass().getResourceAsStream("index.630.brokenoffsets.zip");
+    assertNotNull("Broken offsets index not found", resource);
+    Path path = createTempDir("brokenoffsets");
+    TestUtil.unzip(resource, path);
+    Directory dir = FSDirectory.open(path);
+
+    // OK: index is 6.3.0 so offsets not checked:
+    TestUtil.checkIndex(dir);
+    
+    MockDirectoryWrapper tmpDir = newMockDirectory();
+    tmpDir.setCheckIndexOnClose(false);
+    IndexWriter w = new IndexWriter(tmpDir, new IndexWriterConfig());
+    w.addIndexes(dir);
+    w.close();
+    // OK: addIndexes(Directory...) also keeps version as 6.3.0, so offsets not checked:
+    TestUtil.checkIndex(tmpDir);
+    tmpDir.close();
+
+    final MockDirectoryWrapper tmpDir2 = newMockDirectory();
+    tmpDir2.setCheckIndexOnClose(false);
+    w = new IndexWriter(tmpDir2, new IndexWriterConfig());
+    DirectoryReader reader = DirectoryReader.open(dir);
+    List<LeafReaderContext> leaves = reader.leaves();
+    CodecReader[] codecReaders = new CodecReader[leaves.size()];
+    for(int i=0;i<leaves.size();i++) {
+      codecReaders[i] = (CodecReader) leaves.get(i).reader();
+    }
+    w.addIndexes(codecReaders);
+    w.close();
+
+    // NOT OK: broken offsets were copied into a 7.0 segment:
+    ByteArrayOutputStream output = new ByteArrayOutputStream(1024);    
+    RuntimeException re = expectThrows(RuntimeException.class, () -> {TestUtil.checkIndex(tmpDir2, false, true, output);});
+    assertEquals("term [66 6f 6f]: doc 0: pos 1: startOffset 7 < lastStartOffset 10; consider using the FixBrokenOffsets tool in Lucene's backward-codecs module to correct your index", re.getMessage());
+    tmpDir2.close();
+
+    // Now run the tool and confirm the broken offsets are fixed:
+    Path path2 = createTempDir("fixedbrokenoffsets").resolve("subdir");
+    FixBrokenOffsets.main(new String[] {path.toString(), path2.toString()});
+    Directory tmpDir3 = FSDirectory.open(path2);
+    TestUtil.checkIndex(tmpDir3);
+    tmpDir3.close();
+    
+    dir.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/backward-codecs/src/test/org/apache/lucene/index/index.630.brokenoffsets.zip
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/index.630.brokenoffsets.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.630.brokenoffsets.zip
new file mode 100644
index 0000000..3cf476a
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.630.brokenoffsets.zip differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index fd8011d..3bb10d3 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -740,13 +740,13 @@ public final class CheckIndex implements Closeable {
           segInfoStat.fieldNormStatus = testFieldNorms(reader, infoStream, failFast);
 
           // Test the Term Index
-          segInfoStat.termIndexStatus = testPostings(reader, infoStream, verbose, failFast);
+          segInfoStat.termIndexStatus = testPostings(reader, infoStream, verbose, failFast, version);
 
           // Test Stored Fields
           segInfoStat.storedFieldStatus = testStoredFields(reader, infoStream, failFast);
 
           // Test Term Vectors
-          segInfoStat.termVectorStatus = testTermVectors(reader, infoStream, verbose, crossCheckTermVectors, failFast);
+          segInfoStat.termVectorStatus = testTermVectors(reader, infoStream, verbose, crossCheckTermVectors, failFast, version);
 
           // Test Docvalues
           segInfoStat.docValuesStatus = testDocValues(reader, infoStream, failFast);
@@ -1205,7 +1205,7 @@ public final class CheckIndex implements Closeable {
    * checks Fields api is consistent with itself.
    * searcher is optional, to verify with queries. Can be null.
    */
-  private static Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, boolean doPrint, boolean isVectors, PrintStream infoStream, boolean verbose) throws IOException {
+  private static Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, boolean doPrint, boolean isVectors, PrintStream infoStream, boolean verbose, Version version) throws IOException {
     // TODO: we should probably return our own stats thing...?!
     long startNS;
     if (doPrint) {
@@ -1461,14 +1461,13 @@ public final class CheckIndex implements Closeable {
               if (hasOffsets) {
                 int startOffset = postings.startOffset();
                 int endOffset = postings.endOffset();
-                // NOTE: we cannot enforce any bounds whatsoever on vectors... they were a free-for-all before?
-                // but for offsets in the postings lists these checks are fine: they were always enforced by IndexWriter
-                if (!isVectors) {
+                // In Lucene 7 we fixed IndexWriter to also enforce term vector offsets
+                if (isVectors == false || version.onOrAfter(Version.LUCENE_7_0_0)) {
                   if (startOffset < 0) {
                     throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds");
                   }
                   if (startOffset < lastOffset) {
-                    throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset);
+                    throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset + "; consider using the FixBrokenOffsets tool in Lucene's backward-codecs module to correct your index");
                   }
                   if (endOffset < 0) {
                     throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds");
@@ -1742,15 +1741,15 @@ public final class CheckIndex implements Closeable {
    * Test the term index.
    * @lucene.experimental
    */
-  public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream) throws IOException {
-    return testPostings(reader, infoStream, false, false);
+  public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream, Version version) throws IOException {
+    return testPostings(reader, infoStream, false, false, version);
   }
   
   /**
    * Test the term index.
    * @lucene.experimental
    */
-  public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream, boolean verbose, boolean failFast) throws IOException {
+  public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream, boolean verbose, boolean failFast, Version version) throws IOException {
 
     // TODO: we should go and verify term vectors match, if
     // crossCheckTermVectors is on...
@@ -1765,7 +1764,7 @@ public final class CheckIndex implements Closeable {
 
       final Fields fields = reader.getPostingsReader().getMergeInstance();
       final FieldInfos fieldInfos = reader.getFieldInfos();
-      status = checkFields(fields, reader.getLiveDocs(), maxDoc, fieldInfos, true, false, infoStream, verbose);
+      status = checkFields(fields, reader.getLiveDocs(), maxDoc, fieldInfos, true, false, infoStream, verbose, version);
     } catch (Throwable e) {
       if (failFast) {
         IOUtils.reThrow(e);
@@ -2339,15 +2338,15 @@ public final class CheckIndex implements Closeable {
    * Test term vectors.
    * @lucene.experimental
    */
-  public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream) throws IOException {
-    return testTermVectors(reader, infoStream, false, false, false);
+  public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream, Version version) throws IOException {
+    return testTermVectors(reader, infoStream, false, false, false, version);
   }
 
   /**
    * Test term vectors.
    * @lucene.experimental
    */
-  public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream, boolean verbose, boolean crossCheckTermVectors, boolean failFast) throws IOException {
+  public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream, boolean verbose, boolean crossCheckTermVectors, boolean failFast, Version version) throws IOException {
     long startNS = System.nanoTime();
     final Status.TermVectorStatus status = new Status.TermVectorStatus();
     final FieldInfos fieldInfos = reader.getFieldInfos();
@@ -2387,7 +2386,7 @@ public final class CheckIndex implements Closeable {
           
           if (tfv != null) {
             // First run with no deletions:
-            checkFields(tfv, null, 1, fieldInfos, false, true, infoStream, verbose);
+            checkFields(tfv, null, 1, fieldInfos, false, true, infoStream, verbose, version);
             
             // Only agg stats if the doc is live:
             final boolean doStats = liveDocs == null || liveDocs.get(j);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
index 79c285b..197ab31 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@@ -27,6 +27,7 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.NormsConsumer;
@@ -728,10 +729,6 @@ final class DefaultIndexingChain extends DocConsumer {
 
       final boolean analyzed = fieldType.tokenized() && docState.analyzer != null;
         
-      // only bother checking offsets if something will consume them.
-      // TODO: after we fix analyzers, also check if termVectorOffsets will be indexed.
-      final boolean checkOffsets = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
-
       /*
        * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
        * when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses,
@@ -743,6 +740,7 @@ final class DefaultIndexingChain extends DocConsumer {
         stream.reset();
         invertState.setAttributeSource(stream);
         termsHashPerField.start(field, first);
+        CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
 
         while (stream.incrementToken()) {
 
@@ -771,15 +769,13 @@ final class DefaultIndexingChain extends DocConsumer {
             invertState.numOverlap++;
           }
               
-          if (checkOffsets) {
-            int startOffset = invertState.offset + invertState.offsetAttribute.startOffset();
-            int endOffset = invertState.offset + invertState.offsetAttribute.endOffset();
-            if (startOffset < invertState.lastStartOffset || endOffset < startOffset) {
-              throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards "
-                                                 + "startOffset=" + startOffset + ",endOffset=" + endOffset + ",lastStartOffset=" + invertState.lastStartOffset + " for field '" + field.name() + "'");
-            }
-            invertState.lastStartOffset = startOffset;
+          int startOffset = invertState.offset + invertState.offsetAttribute.startOffset();
+          int endOffset = invertState.offset + invertState.offsetAttribute.endOffset();
+          if (startOffset < invertState.lastStartOffset || endOffset < startOffset) {
+            throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards "
+                                               + "startOffset=" + startOffset + ",endOffset=" + endOffset + ",lastStartOffset=" + invertState.lastStartOffset + " for field '" + field.name() + "'");
           }
+          invertState.lastStartOffset = startOffset;
 
           invertState.length++;
           if (invertState.length < 0) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java b/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
index 7b71d3c..2559ce4 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
@@ -43,11 +43,6 @@ public class TestCheckIndex extends BaseTestCheckIndex {
   }
   
   @Test
-  public void testBogusTermVectors() throws IOException {
-    testBogusTermVectors(directory);
-  }
-  
-  @Test
   public void testChecksumsOnly() throws IOException {
     testChecksumsOnly(directory);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
index 581ff2f..d49434a 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
@@ -377,7 +377,7 @@ public class TokenSourcesTest extends BaseTokenStreamTestCase {
     }
 
     final BaseTermVectorsFormatTestCase.RandomTokenStream rTokenStream =
-        new BaseTermVectorsFormatTestCase.RandomTokenStream(TestUtil.nextInt(random(), 1, 10), terms, termBytes, false);
+        new BaseTermVectorsFormatTestCase.RandomTokenStream(TestUtil.nextInt(random(), 1, 10), terms, termBytes);
     //check to see if the token streams might have non-deterministic testable result
     final boolean storeTermVectorPositions = random().nextBoolean();
     final int[] startOffsets = rTokenStream.getStartOffsets();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/sandbox/src/test/org/apache/lucene/search/TestTermAutomatonQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/TestTermAutomatonQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/TestTermAutomatonQuery.java
index 6055e00..6ef9baf 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/TestTermAutomatonQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/TestTermAutomatonQuery.java
@@ -45,6 +45,7 @@ import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
@@ -431,7 +432,9 @@ public class TestTermAutomatonQuery extends LuceneTestCase {
     @Override
     public boolean incrementToken() throws IOException {
       if (synNext) {
+        AttributeSource.State state = captureState();
         clearAttributes();
+        restoreState(state);
         posIncAtt.setPositionIncrement(0);
         termAtt.append(""+((char) 97 + random().nextInt(3)));
         synNext = false;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
index 5e6809f..7acee87 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
@@ -200,10 +200,6 @@ public abstract class BaseTermVectorsFormatTestCase extends BaseIndexFileFormatT
     int i = 0;
 
     public RandomTokenStream(int len, String[] sampleTerms, BytesRef[] sampleTermBytes) {
-      this(len, sampleTerms, sampleTermBytes, rarely());
-    }
-
-    public RandomTokenStream(int len, String[] sampleTerms, BytesRef[] sampleTermBytes, boolean offsetsGoBackwards) {
       terms = new String[len];
       termBytes = new BytesRef[len];
       positionsIncrements = new int[len];
@@ -216,17 +212,12 @@ public abstract class BaseTermVectorsFormatTestCase extends BaseIndexFileFormatT
         terms[i] = sampleTerms[o];
         termBytes[i] = sampleTermBytes[o];
         positionsIncrements[i] = TestUtil.nextInt(random(), i == 0 ? 1 : 0, 10);
-        if (offsetsGoBackwards) {
-          startOffsets[i] = random().nextInt();
-          endOffsets[i] = random().nextInt();
+        if (i == 0) {
+          startOffsets[i] = TestUtil.nextInt(random(), 0, 1 << 16);
         } else {
-          if (i == 0) {
-            startOffsets[i] = TestUtil.nextInt(random(), 0, 1 << 16);
-          } else {
-            startOffsets[i] = startOffsets[i-1] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 16 : 20);
-          }
-          endOffsets[i] = startOffsets[i] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20);
+          startOffsets[i] = startOffsets[i-1] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 16 : 20);
         }
+        endOffsets[i] = startOffsets[i] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20);
       }
 
       for (int i = 0; i < len; ++i) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/test-framework/src/java/org/apache/lucene/index/BaseTestCheckIndex.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseTestCheckIndex.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseTestCheckIndex.java
index cdec720..21ccf3b 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseTestCheckIndex.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseTestCheckIndex.java
@@ -22,11 +22,8 @@ import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.store.Directory;
@@ -105,22 +102,6 @@ public class BaseTestCheckIndex extends LuceneTestCase {
     checker.close();
   }
   
-  // LUCENE-4221: we have to let these thru, for now
-  public void testBogusTermVectors(Directory dir) throws IOException {
-    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
-    Document doc = new Document();
-    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
-    ft.setStoreTermVectors(true);
-    ft.setStoreTermVectorOffsets(true);
-    Field field = new Field("foo", "", ft);
-    field.setTokenStream(new CannedTokenStream(
-        new Token("bar", 5, 10), new Token("bar", 1, 4)
-    ));
-    doc.add(field);
-    iw.addDocument(doc);
-    iw.close();
-  }
-  
   public void testChecksumsOnly(Directory dir) throws IOException {
     LineFileDocs lf = new LineFileDocs(random());
     MockAnalyzer analyzer = new MockAnalyzer(random());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
index d3351ab..0ea90fc 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
@@ -334,9 +334,9 @@ public final class TestUtil {
     CheckIndex.testLiveDocs(codecReader, infoStream, true);
     CheckIndex.testFieldInfos(codecReader, infoStream, true);
     CheckIndex.testFieldNorms(codecReader, infoStream, true);
-    CheckIndex.testPostings(codecReader, infoStream, false, true);
+    CheckIndex.testPostings(codecReader, infoStream, false, true, Version.LUCENE_7_0_0);
     CheckIndex.testStoredFields(codecReader, infoStream, true);
-    CheckIndex.testTermVectors(codecReader, infoStream, false, crossCheckTermVectors, true);
+    CheckIndex.testTermVectors(codecReader, infoStream, false, crossCheckTermVectors, true, Version.LUCENE_7_0_0);
     CheckIndex.testDocValues(codecReader, infoStream, true);
     CheckIndex.testPoints(codecReader, infoStream, true);
     

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java b/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
index 87d4094..5f125d9 100644
--- a/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
+++ b/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
@@ -27,6 +27,7 @@ import java.util.Map;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
@@ -284,6 +285,7 @@ public class PreAnalyzedField extends TextField implements HasImplicitIndexAnaly
     private byte[] binaryValue = null;
     private PreAnalyzedParser parser;
     private IOException readerConsumptionException;
+    private int lastEndOffset;
 
     public PreAnalyzedTokenizer(PreAnalyzedParser parser) {
       // we don't pack attributes: since we are used for (de)serialization and dont want bloat.
@@ -311,6 +313,8 @@ public class PreAnalyzedField extends TextField implements HasImplicitIndexAnaly
       
       AttributeSource.State state = it.next();
       restoreState(state.clone());
+      // TODO: why can't I lookup the OffsetAttribute up in ctor instead?
+      lastEndOffset = addAttribute(OffsetAttribute.class).endOffset();
       return true;
     }
 
@@ -329,6 +333,13 @@ public class PreAnalyzedField extends TextField implements HasImplicitIndexAnaly
       it = cachedStates.iterator();
     }
 
+    @Override
+    public void end() throws IOException {
+      super.end();
+      // we must set the end offset correctly so multi-valued fields don't try to send offsets backwards:
+      addAttribute(OffsetAttribute.class).setOffset(lastEndOffset, lastEndOffset);
+    }
+
     private void setReaderConsumptionException(IOException e) {
       readerConsumptionException = e;
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b86331/solr/core/src/test/org/apache/solr/index/hdfs/CheckHdfsIndexTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/index/hdfs/CheckHdfsIndexTest.java b/solr/core/src/test/org/apache/solr/index/hdfs/CheckHdfsIndexTest.java
index b4f6931..61b4305 100644
--- a/solr/core/src/test/org/apache/solr/index/hdfs/CheckHdfsIndexTest.java
+++ b/solr/core/src/test/org/apache/solr/index/hdfs/CheckHdfsIndexTest.java
@@ -121,11 +121,6 @@ public class CheckHdfsIndexTest extends AbstractFullDistribZkTestBase {
   }
 
   @Test
-  public void testBogusTermVectors() throws IOException {
-    testCheckIndex.testBogusTermVectors(directory);
-  }
-
-  @Test
   public void testChecksumsOnly() throws IOException {
     testCheckIndex.testChecksumsOnly(directory);
   }

[10/23] lucene-solr:jira/solr-8593: LUCENE-7626: I forgot to close the reader in this test

Posted by kr...@apache.org.

LUCENE-7626: I forgot to close the reader in this test


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e2c41af5
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e2c41af5
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e2c41af5

Branch: refs/heads/jira/solr-8593
Commit: e2c41af5017f67c279df239a1b99a00c4c4cf9b0
Parents: 9403372
Author: Mike McCandless <mi...@apache.org>
Authored: Sat Jan 14 06:21:01 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Sat Jan 14 06:21:01 2017 -0500

----------------------------------------------------------------------
 .../src/test/org/apache/lucene/index/TestFixBrokenOffsets.java    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e2c41af5/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
index bcd5a65..4ecbd13 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestFixBrokenOffsets.java
@@ -70,7 +70,7 @@ public class TestFixBrokenOffsets extends LuceneTestCase {
     assertNotNull("Broken offsets index not found", resource);
     Path path = createTempDir("brokenoffsets");
     TestUtil.unzip(resource, path);
-    Directory dir = FSDirectory.open(path);
+    Directory dir = newFSDirectory(path);
 
     // OK: index is 6.3.0 so offsets not checked:
     TestUtil.checkIndex(dir);
@@ -94,6 +94,7 @@ public class TestFixBrokenOffsets extends LuceneTestCase {
       codecReaders[i] = (CodecReader) leaves.get(i).reader();
     }
     w.addIndexes(codecReaders);
+    reader.close();
     w.close();
 
     // NOT OK: broken offsets were copied into a 7.0 segment:

[23/23] lucene-solr:jira/solr-8593: Merge branch 'apache-https-master' into jira/solr-8593

Posted by kr...@apache.org.

Merge branch 'apache-https-master' into jira/solr-8593


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/292e5188
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/292e5188
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/292e5188

Branch: refs/heads/jira/solr-8593
Commit: 292e518878c29cb81d300cf48dcf657ef5438397
Parents: 483bfa6 205f9cc
Author: Kevin Risden <kr...@apache.org>
Authored: Mon Jan 16 16:07:54 2017 -0500
Committer: Kevin Risden <kr...@apache.org>
Committed: Mon Jan 16 16:07:54 2017 -0500

----------------------------------------------------------------------
 .../dot.settings/org.eclipse.jdt.core.prefs     |   8 +-
 lucene/CHANGES.txt                              |  36 ++-
 .../miscellaneous/FixBrokenOffsetsFilter.java   |  78 +++++++
 .../FixBrokenOffsetsFilterFactory.java          |  39 ++++
 .../analysis/ngram/EdgeNGramTokenFilter.java    |  16 +-
 .../lucene/analysis/ngram/NGramTokenFilter.java |  18 +-
 ...ache.lucene.analysis.util.TokenFilterFactory |   1 +
 .../TestFixBrokenOffsetsFilter.java             |  50 +++++
 .../lucene/analysis/ngram/TestNGramFilters.java |  47 ++++
 .../apache/lucene/index/FixBrokenOffsets.java   | 125 +++++++++++
 .../java/org/apache/lucene/index/package.html   |  27 +++
 .../lucene/index/TestFixBrokenOffsets.java      | 115 ++++++++++
 .../lucene/index/index.630.brokenoffsets.zip    | Bin 0 -> 3203 bytes
 .../org/apache/lucene/index/CheckIndex.java     |  29 ++-
 .../lucene/index/DefaultIndexingChain.java      |  20 +-
 .../apache/lucene/index/SortedDocValues.java    |  22 ++
 .../apache/lucene/index/SortedSetDocValues.java |  22 ++
 .../lucene/search/DoubleValuesSource.java       |  92 ++++++--
 .../lucene/search/MultiCollectorManager.java    |   6 +-
 .../java/org/apache/lucene/util/Version.java    |   7 +
 .../org/apache/lucene/index/TestCheckIndex.java |   5 -
 .../search/highlight/TokenSourcesTest.java      |   2 +-
 lucene/ivy-versions.properties                  |   5 +-
 .../queries/function/FunctionMatchQuery.java    |  99 +++++++++
 .../queries/function/FunctionScoreQuery.java    | 151 +++++++++++++
 .../function/TestFunctionMatchQuery.java        |  61 ++++++
 .../function/TestFunctionScoreExplanations.java | 105 +++++++++
 .../function/TestFunctionScoreQuery.java        | 114 ++++++++++
 .../lucene/queryparser/xml/CoreParser.java      |   2 -
 .../lucene/search/TestTermAutomatonQuery.java   |   3 +
 .../index/BaseDocValuesFormatTestCase.java      |  32 +++
 .../index/BaseTermVectorsFormatTestCase.java    |  17 +-
 .../apache/lucene/index/BaseTestCheckIndex.java |  19 --
 .../java/org/apache/lucene/util/TestUtil.java   |   4 +-
 solr/CHANGES.txt                                |  34 ++-
 .../handler/dataimport/TestJdbcDataSource.java  |   7 +
 solr/contrib/morphlines-core/ivy.xml            |   8 +-
 solr/core/ivy.xml                               |   4 +-
 .../solr/handler/admin/MetricsHandler.java      |  92 +++++---
 .../solr/highlight/LuceneRegexFragmenter.java   | 217 +++++++++++++++++++
 .../apache/solr/highlight/RegexFragmenter.java  | 196 -----------------
 .../apache/solr/metrics/SolrMetricManager.java  |  30 ++-
 .../apache/solr/schema/PreAnalyzedField.java    |  11 +
 .../org/apache/solr/cloud/ClusterStateTest.java |   8 +
 ...verseerCollectionConfigSetProcessorTest.java |   4 +-
 .../solr/core/BlobRepositoryMockingTest.java    |   9 +-
 .../org/apache/solr/core/CoreSorterTest.java    |   7 +
 .../solr/handler/admin/MetricsHandlerTest.java  |  26 ++-
 .../solr/index/hdfs/CheckHdfsIndexTest.java     |   5 -
 .../ApacheLuceneSolrNearQueryBuilder.java       |   1 -
 .../apache/solr/search/GoodbyeQueryBuilder.java |   1 -
 .../apache/solr/search/HandyQueryBuilder.java   |   1 -
 .../apache/solr/search/HelloQueryBuilder.java   |   1 -
 .../security/TestPKIAuthenticationPlugin.java   |   8 +
 .../solr/servlet/SolrRequestParserTest.java     |   3 +
 solr/licenses/byte-buddy-1.6.2.jar.sha1         |   1 +
 solr/licenses/byte-buddy-LICENSE-ASL.txt        | 202 +++++++++++++++++
 solr/licenses/byte-buddy-NOTICE.txt             |   4 +
 solr/licenses/mockito-core-1.9.5.jar.sha1       |   1 -
 solr/licenses/mockito-core-2.6.2.jar.sha1       |   1 +
 solr/licenses/objenesis-1.2.jar.sha1            |   1 -
 solr/licenses/objenesis-2.5.jar.sha1            |   1 +
 .../solrj/impl/CloudSolrClientCacheTest.java    |   7 +
 .../solr/cloud/AbstractDistribZkTestBase.java   |   5 +-
 64 files changed, 1875 insertions(+), 398 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/292e5188/lucene/ivy-versions.properties
----------------------------------------------------------------------
diff --cc lucene/ivy-versions.properties
index 2b53454,6cb7f26..66c4b41
--- a/lucene/ivy-versions.properties
+++ b/lucene/ivy-versions.properties
@@@ -94,7 -96,7 +94,8 @@@ io.netty.netty-all.version = 4.0.36.Fin
  /mecab/mecab-ipadic = 2.7.0-20070801
  /mecab/mecab-naist-jdic = 0.6.3b-20111013
  /net.arnx/jsonic = 1.2.7
+ /net.bytebuddy/byte-buddy = 1.6.2
 +/net.hydromatic/eigenbase-properties = 1.1.5
  /net.sf.ehcache/ehcache-core = 2.4.4
  /net.sf.saxon/Saxon-HE = 9.6.0-2
  /net.sourceforge.argparse4j/argparse4j = 0.4.3

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/292e5188/solr/core/ivy.xml
----------------------------------------------------------------------
diff --cc solr/core/ivy.xml
index ecebd97,074e35f..dc58ad5
--- a/solr/core/ivy.xml
+++ b/solr/core/ivy.xml
@@@ -51,15 -51,17 +51,17 @@@
      <dependency org="org.slf4j" name="slf4j-log4j12" rev="${/org.slf4j/slf4j-log4j12}" conf="compile"/>
      <dependency org="org.slf4j" name="jcl-over-slf4j" rev="${/org.slf4j/jcl-over-slf4j}" conf="compile"/>
  
+     <!-- TODO: Nuke those 2 deps, please!!!! -->
      <dependency org="org.easymock" name="easymock" rev="${/org.easymock/easymock}" conf="test"/>
      <dependency org="cglib" name="cglib-nodep" rev="${/cglib/cglib-nodep}" conf="test"/>
-     <dependency org="org.objenesis" name="objenesis" rev="${/org.objenesis/objenesis}" conf="test"/>
  
      <dependency org="org.mockito" name="mockito-core" rev="${/org.mockito/mockito-core}" conf="test"/>
+     <dependency org="net.bytebuddy" name="byte-buddy" rev="${/net.bytebuddy/byte-buddy}" conf="test"/>
+     <dependency org="org.objenesis" name="objenesis" rev="${/org.objenesis/objenesis}" conf="test"/>
  
      <dependency org="com.fasterxml.jackson.core" name="jackson-core" rev="${/com.fasterxml.jackson.core/jackson-core}" conf="compile"/>
 -    <dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="${/com.fasterxml.jackson.core/jackson-databind}" conf="test"/>
 -    <dependency org="com.fasterxml.jackson.core" name="jackson-annotations" rev="${/com.fasterxml.jackson.core/jackson-annotations}" conf="test"/>
 +    <dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="${/com.fasterxml.jackson.core/jackson-databind}" conf="compile"/>
 +    <dependency org="com.fasterxml.jackson.core" name="jackson-annotations" rev="${/com.fasterxml.jackson.core/jackson-annotations}" conf="compile"/>
      <dependency org="com.fasterxml.jackson.dataformat" name="jackson-dataformat-smile" rev="${/com.fasterxml.jackson.dataformat/jackson-dataformat-smile}" conf="compile"/>

[19/23] lucene-solr:jira/solr-8593: SOLR-9906: Fix dodgy test check

Posted by kr...@apache.org.

SOLR-9906: Fix dodgy test check


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/efc7ee0f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/efc7ee0f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/efc7ee0f

Branch: refs/heads/jira/solr-8593
Commit: efc7ee0f0c9154fe58671601fdc053540c97ff62
Parents: 478de2a
Author: Alan Woodward <ro...@apache.org>
Authored: Mon Jan 16 15:24:02 2017 +0000
Committer: Alan Woodward <ro...@apache.org>
Committed: Mon Jan 16 15:24:02 2017 +0000

----------------------------------------------------------------------
 .../java/org/apache/solr/cloud/AbstractDistribZkTestBase.java   | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/efc7ee0f/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java
index 0669cbe..7141eed 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java
@@ -29,7 +29,6 @@ import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
-import org.apache.solr.common.cloud.Slice.State;
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.core.Diagnostics;
@@ -240,8 +239,8 @@ public abstract class AbstractDistribZkTestBase extends BaseDistributedSearchTes
       ClusterState clusterState = zkStateReader.getClusterState();
       DocCollection coll = clusterState.getCollection("collection1");
       Slice slice = coll.getSlice(shardName);
-      if (slice.getLeader() != null && !slice.getLeader().equals(oldLeader) && slice.getState() == State.ACTIVE) {
-        log.info("Old leader {}, new leader. New leader got elected in {} ms", oldLeader, slice.getLeader(),timeOut.timeElapsed(MILLISECONDS) );
+      if (slice.getLeader() != null && !slice.getLeader().equals(oldLeader) && slice.getLeader().getState() == Replica.State.ACTIVE) {
+        log.info("Old leader {}, new leader {}. New leader got elected in {} ms", oldLeader, slice.getLeader(),timeOut.timeElapsed(MILLISECONDS) );
         break;
       }

[07/23] lucene-solr:jira/solr-8593: SOLR-9960 MetricsHandler should support multiple prefixes.

Posted by kr...@apache.org.

SOLR-9960 MetricsHandler should support multiple prefixes.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/53d5af17
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/53d5af17
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/53d5af17

Branch: refs/heads/jira/solr-8593
Commit: 53d5af17dac5e2c27f1304b029cbd09461ea197b
Parents: 64b8633
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Fri Jan 13 19:24:40 2017 +0100
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Sat Jan 14 09:52:15 2017 +0100

----------------------------------------------------------------------
 solr/CHANGES.txt                                | 14 +--
 .../solr/handler/admin/MetricsHandler.java      | 92 +++++++++++++-------
 .../apache/solr/metrics/SolrMetricManager.java  | 30 ++++---
 .../solr/handler/admin/MetricsHandlerTest.java  | 26 +++++-
 4 files changed, 109 insertions(+), 53 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53d5af17/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index b6055fe..ccc5d7c 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -196,14 +196,16 @@ New Features
 
 * SOLR-9805: Use metrics-jvm library to instrument jvm internals such as GC, memory usage and others. (shalin)
 
-* SOLR-9812: SOLR-9911: Added a new /admin/metrics API to return all metrics collected by Solr via API.
-  API supports three optional parameters:
+* SOLR-9812: SOLR-9911, SOLR-9960: Added a new /admin/metrics API to return all metrics collected by Solr via API.
+  API supports four optional multi-valued parameters:
   * 'group' (all,jvm,jetty,node,core),
-  * 'type' (all,counter,timer,gauge,histogram) both of which are multi-valued
-  * 'prefix' that filters the returned metrics
+  * 'type' (all,counter,timer,gauge,histogram),
+  * 'prefix' that filters the returned metrics,
+  * 'registry' that selects one or more registries by prefix (eg. solr.jvm,solr.core.collection1)
   Example: http://localhost:8983/solr/admin/metrics?group=jvm,jetty&type=counter
-  Example: http://localhost:8983/solr/admin/metrics?group=jvm&prefix=buffers
-  (shalin)
+  Example: http://localhost:8983/solr/admin/metrics?group=jvm&prefix=buffers,os
+  Example: http://localhost:8983/solr/admin/metrics?registry=solr.node,solr.core&prefix=ADMIN
+  (shalin, ab)
 
 * SOLR-9884: Add version to segments handler output (Steven Bower via Erick Erickson)
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53d5af17/solr/core/src/java/org/apache/solr/handler/admin/MetricsHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/MetricsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/MetricsHandler.java
index 0c87875..385317b 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/MetricsHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/MetricsHandler.java
@@ -18,9 +18,10 @@
 package org.apache.solr.handler.admin;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 import java.util.stream.Collectors;
 
 import com.codahale.metrics.Counter;
@@ -34,7 +35,6 @@ import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.core.CoreContainer;
-import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.RequestHandlerBase;
 import org.apache.solr.metrics.SolrMetricManager;
 import org.apache.solr.request.SolrQueryRequest;
@@ -74,59 +74,85 @@ public class MetricsHandler extends RequestHandlerBase implements PermissionName
     MetricFilter mustMatchFilter = parseMustMatchFilter(req);
     List<MetricType> metricTypes = parseMetricTypes(req);
     List<MetricFilter> metricFilters = metricTypes.stream().map(MetricType::asMetricFilter).collect(Collectors.toList());
-    List<Group> requestedGroups = parseGroups(req);
+    Set<String> requestedRegistries = parseRegistries(req);
 
     NamedList response = new NamedList();
-    for (Group group : requestedGroups) {
-      String registryName = SolrMetricManager.getRegistryName(group);
-      if (group == Group.core) {
-        // this requires special handling because of the way we create registry name for a core (deeply nested)
-        container.getAllCoreNames().forEach(s -> {
-          String coreRegistryName;
-          try (SolrCore core = container.getCore(s)) {
-            coreRegistryName = core.getCoreMetricManager().getRegistryName();
-          }
-          MetricRegistry registry = metricManager.registry(coreRegistryName);
-          response.add(coreRegistryName, MetricUtils.toNamedList(registry, metricFilters, mustMatchFilter));
-        });
-      } else {
-        MetricRegistry registry = metricManager.registry(registryName);
-        response.add(registryName, MetricUtils.toNamedList(registry, metricFilters, mustMatchFilter));
-      }
+    for (String registryName : requestedRegistries) {
+      MetricRegistry registry = metricManager.registry(registryName);
+      response.add(registryName, MetricUtils.toNamedList(registry, metricFilters, mustMatchFilter));
     }
     rsp.getValues().add("metrics", response);
   }
 
   private MetricFilter parseMustMatchFilter(SolrQueryRequest req) {
-    String prefix = req.getParams().get("prefix");
+    String[] prefixes = req.getParams().getParams("prefix");
     MetricFilter mustMatchFilter;
-    if (prefix != null) {
-      mustMatchFilter = new SolrMetricManager.PrefixFilter(prefix.trim());
+    if (prefixes != null && prefixes.length > 0) {
+      Set<String> prefixSet = new HashSet<>();
+      for (String prefix : prefixes) {
+        prefixSet.addAll(StrUtils.splitSmart(prefix, ','));
+      }
+      mustMatchFilter = new SolrMetricManager.PrefixFilter((String[])prefixSet.toArray(new String[prefixSet.size()]));
     } else  {
       mustMatchFilter = MetricFilter.ALL;
     }
     return mustMatchFilter;
   }
 
-  private List<Group> parseGroups(SolrQueryRequest req) {
+  private Set<String> parseRegistries(SolrQueryRequest req) {
     String[] groupStr = req.getParams().getParams("group");
-    List<String> groups = Collections.emptyList();
+    String[] registryStr = req.getParams().getParams("registry");
+    if ((groupStr == null || groupStr.length == 0) && (registryStr == null || registryStr.length == 0)) {
+      // return all registries
+      return container.getMetricManager().registryNames();
+    }
+    boolean allRegistries = false;
+    Set<String> initialPrefixes = Collections.emptySet();
     if (groupStr != null && groupStr.length > 0) {
-      groups = new ArrayList<>();
+      initialPrefixes = new HashSet<>();
       for (String g : groupStr) {
-        groups.addAll(StrUtils.splitSmart(g, ','));
+        List<String> split = StrUtils.splitSmart(g, ',');
+        for (String s : split) {
+          if (s.trim().equals("all")) {
+            allRegistries = true;
+            break;
+          }
+          initialPrefixes.add(SolrMetricManager.overridableRegistryName(s.trim()));
+        }
+        if (allRegistries) {
+          return container.getMetricManager().registryNames();
+        }
       }
     }
 
-    List<Group> requestedGroups = Arrays.asList(Group.values()); // by default we return all groups
-    try {
-      if (groups.size() > 0 && !groups.contains("all")) {
-        requestedGroups = groups.stream().map(String::trim).map(Group::valueOf).collect(Collectors.toList());
+    if (registryStr != null && registryStr.length > 0) {
+      if (initialPrefixes.isEmpty()) {
+        initialPrefixes = new HashSet<>();
+      }
+      for (String r : registryStr) {
+        List<String> split = StrUtils.splitSmart(r, ',');
+        for (String s : split) {
+          if (s.trim().equals("all")) {
+            allRegistries = true;
+            break;
+          }
+          initialPrefixes.add(SolrMetricManager.overridableRegistryName(s.trim()));
+        }
+        if (allRegistries) {
+          return container.getMetricManager().registryNames();
+        }
+      }
+    }
+    Set<String> validRegistries = new HashSet<>();
+    for (String r : container.getMetricManager().registryNames()) {
+      for (String prefix : initialPrefixes) {
+        if (r.startsWith(prefix)) {
+          validRegistries.add(r);
+          break;
+        }
       }
-    } catch (IllegalArgumentException e) {
-      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid group in: " + groups + " specified. Must be one of (all, jvm, jetty, http, node, core)", e);
     }
-    return requestedGroups;
+    return validRegistries;
   }
 
   private List<MetricType> parseMetricTypes(SolrQueryRequest req) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53d5af17/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
index 9f126ed..caa8906 100644
--- a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
+++ b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
@@ -93,31 +93,37 @@ public class SolrMetricManager {
    * with names that start with a prefix.
    */
   public static class PrefixFilter implements MetricFilter {
-    private final String prefix;
+    private final String[] prefixes;
     private final Set<String> matched = new HashSet<>();
+    private boolean allMatch = false;
 
     /**
      * Create a filter that uses the provided prefix.
-     * @param prefix prefix to use, must not be null. If empty then any
-     *               name will match.
+     * @param prefixes prefixes to use, must not be null. If empty then any
+     *               name will match, if not empty then match on any prefix will
+     *                 succeed (logical OR).
      */
-    public PrefixFilter(String prefix) {
-      Objects.requireNonNull(prefix);
-      this.prefix = prefix;
+    public PrefixFilter(String... prefixes) {
+      Objects.requireNonNull(prefixes);
+      this.prefixes = prefixes;
+      if (prefixes.length == 0) {
+        allMatch = true;
+      }
     }
 
     @Override
     public boolean matches(String name, Metric metric) {
-      if (prefix.isEmpty()) {
+      if (allMatch) {
         matched.add(name);
         return true;
       }
-      if (name.startsWith(prefix)) {
-        matched.add(name);
-        return true;
-      } else {
-        return false;
+      for (String prefix : prefixes) {
+        if (name.startsWith(prefix)) {
+          matched.add(name);
+          return true;
+        }
       }
+      return false;
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53d5af17/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java
index edab3ce..a1b29db 100644
--- a/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/admin/MetricsHandlerTest.java
@@ -66,6 +66,26 @@ public class MetricsHandlerTest extends SolrTestCaseJ4 {
     assertNotNull(values.get("solr.jvm"));
 
     resp = new SolrQueryResponse();
+    // "collection" works too, because it's a prefix for "collection1"
+    handler.handleRequestBody(req(CommonParams.QT, "/admin/metrics", CommonParams.WT, "json", "registry", "solr.core.collection,solr.jvm"), resp);
+    values = resp.getValues();
+    assertNotNull(values.get("metrics"));
+    values = (NamedList) values.get("metrics");
+    assertEquals(2, values.size());
+    assertNotNull(values.get("solr.core.collection1"));
+    assertNotNull(values.get("solr.jvm"));
+
+    resp = new SolrQueryResponse();
+    // "collection" works too, because it's a prefix for "collection1"
+    handler.handleRequestBody(req(CommonParams.QT, "/admin/metrics", CommonParams.WT, "json", "registry", "solr.core.collection", "registry", "solr.jvm"), resp);
+    values = resp.getValues();
+    assertNotNull(values.get("metrics"));
+    values = (NamedList) values.get("metrics");
+    assertEquals(2, values.size());
+    assertNotNull(values.get("solr.core.collection1"));
+    assertNotNull(values.get("solr.jvm"));
+
+    resp = new SolrQueryResponse();
     handler.handleRequestBody(req(CommonParams.QT, "/admin/metrics", CommonParams.WT, "json", "group", "jvm,jetty"), resp);
     values = resp.getValues();
     assertNotNull(values.get("metrics"));
@@ -94,7 +114,7 @@ public class MetricsHandlerTest extends SolrTestCaseJ4 {
     assertNull(values.get("ADMIN./admin/authorization.errors")); // this is a timer node
 
     resp = new SolrQueryResponse();
-    handler.handleRequestBody(req(CommonParams.QT, "/admin/metrics", CommonParams.WT, "json", "prefix", "CONTAINER.cores"), resp);
+    handler.handleRequestBody(req(CommonParams.QT, "/admin/metrics", CommonParams.WT, "json", "prefix", "CONTAINER.cores,CONTAINER.threadPool"), resp);
     values = resp.getValues();
     assertNotNull(values.get("metrics"));
     values = (NamedList) values.get("metrics");
@@ -102,10 +122,12 @@ public class MetricsHandlerTest extends SolrTestCaseJ4 {
     assertEquals(0, ((NamedList)values.get("solr.jvm")).size());
     assertEquals(0, ((NamedList)values.get("solr.jetty")).size());
     assertEquals(0, ((NamedList)values.get("solr.core.collection1")).size());
-    assertEquals(3, ((NamedList)values.get("solr.node")).size());
+    assertEquals(11, ((NamedList)values.get("solr.node")).size());
     assertNotNull(values.get("solr.node"));
     values = (NamedList) values.get("solr.node");
     assertNotNull(values.get("CONTAINER.cores.lazy")); // this is a gauge node
+    assertNotNull(values.get("CONTAINER.threadPool.coreContainerWorkExecutor.completed"));
+    assertNotNull(values.get("CONTAINER.threadPool.coreLoadExecutor.completed"));
 
     resp = new SolrQueryResponse();
     handler.handleRequestBody(req(CommonParams.QT, "/admin/metrics", CommonParams.WT, "json", "group", "jvm", "prefix", "CONTAINER.cores"), resp);

[02/23] lucene-solr:jira/solr-8593: copy all attributes including payload to new tokens

Posted by kr...@apache.org.

copy all attributes including payload to new tokens


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6570e6ec
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6570e6ec
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6570e6ec

Branch: refs/heads/jira/solr-8593
Commit: 6570e6ecc2b14a28da9873948083791ba47145d0
Parents: 61e4528
Author: Nathan Gass <ga...@search.ch>
Authored: Mon Jan 9 15:00:21 2017 +0100
Committer: Nathan Gass <ga...@search.ch>
Committed: Fri Jan 13 12:14:27 2017 +0100

----------------------------------------------------------------------
 .../lucene/analysis/ngram/EdgeNGramTokenFilter.java | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6570e6ec/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
index 827e26f..303b7e320 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
@@ -22,9 +22,8 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.AttributeSource;
 
 /**
  * Tokenizes the given token into n-grams of given size(s).
@@ -43,15 +42,11 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
   private int curTermLength;
   private int curCodePointCount;
   private int curGramSize;
-  private int tokStart;
-  private int tokEnd; // only used if the length changed before this filter
   private int savePosIncr;
-  private int savePosLen;
+  private AttributeSource attributes;
   
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
   private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-  private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
 
   /**
    * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
@@ -86,17 +81,15 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
           curTermLength = termAtt.length();
           curCodePointCount = Character.codePointCount(termAtt, 0, termAtt.length());
           curGramSize = minGram;
-          tokStart = offsetAtt.startOffset();
-          tokEnd = offsetAtt.endOffset();
+          attributes = input.cloneAttributes();
           savePosIncr += posIncrAtt.getPositionIncrement();
-          savePosLen = posLenAtt.getPositionLength();
         }
       }
       if (curGramSize <= maxGram) {         // if we have hit the end of our n-gram size range, quit
         if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams
           // grab gramSize chars from front or back
           clearAttributes();
-          offsetAtt.setOffset(tokStart, tokEnd);
+          attributes.copyTo(this);
           // first ngram gets increment, others don't
           if (curGramSize == minGram) {
             posIncrAtt.setPositionIncrement(savePosIncr);
@@ -104,7 +97,6 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
           } else {
             posIncrAtt.setPositionIncrement(0);
           }
-          posLenAtt.setPositionLength(savePosLen);
           final int charLength = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curGramSize);
           termAtt.copyBuffer(curTermBuffer, 0, charLength);
           curGramSize++;

[05/23] lucene-solr:jira/solr-8593: also copy all attributes for ngram token filters

Posted by kr...@apache.org.

also copy all attributes for ngram token filters


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ea049b96
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ea049b96
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ea049b96

Branch: refs/heads/jira/solr-8593
Commit: ea049b96a24d6afc582ecdf406e8bf256b9911d9
Parents: 80e2854
Author: Nathan Gass <ga...@search.ch>
Authored: Fri Jan 13 17:01:34 2017 +0100
Committer: Nathan Gass <ga...@search.ch>
Committed: Fri Jan 13 17:07:23 2017 +0100

----------------------------------------------------------------------
 .../lucene/analysis/ngram/NGramTokenFilter.java  | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ea049b96/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
index e275cfa..cb5d447 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
@@ -23,9 +23,8 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.miscellaneous.CodepointCountFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.AttributeSource.State;
 
 /**
  * Tokenizes the input into n-grams of the given size(s).
@@ -52,14 +51,11 @@ public final class NGramTokenFilter extends TokenFilter {
   private int curCodePointCount;
   private int curGramSize;
   private int curPos;
-  private int curPosInc, curPosLen;
-  private int tokStart;
-  private int tokEnd;
+  private int curPosInc;
+  private State state;
 
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncAtt;
-  private final PositionLengthAttribute posLenAtt;
-  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
 
   /**
    * Creates NGramTokenFilter with given min and max n-grams.
@@ -79,7 +75,6 @@ public final class NGramTokenFilter extends TokenFilter {
     this.maxGram = maxGram;
 
     posIncAtt = addAttribute(PositionIncrementAttribute.class);
-    posLenAtt = addAttribute(PositionLengthAttribute.class);
   }
 
   /**
@@ -104,9 +99,7 @@ public final class NGramTokenFilter extends TokenFilter {
           curGramSize = minGram;
           curPos = 0;
           curPosInc = posIncAtt.getPositionIncrement();
-          curPosLen = posLenAtt.getPositionLength();
-          tokStart = offsetAtt.startOffset();
-          tokEnd = offsetAtt.endOffset();
+          state = captureState();
         }
       }
 
@@ -115,14 +108,12 @@ public final class NGramTokenFilter extends TokenFilter {
         curGramSize = minGram;
       }
       if ((curPos + curGramSize) <= curCodePointCount) {
-        clearAttributes();
+        restoreState(state);
         final int start = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
         final int end = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
         termAtt.copyBuffer(curTermBuffer, start, end - start);
         posIncAtt.setPositionIncrement(curPosInc);
         curPosInc = 0;
-        posLenAtt.setPositionLength(curPosLen);
-        offsetAtt.setOffset(tokStart, tokEnd);
         curGramSize++;
         return true;
       }

[20/23] lucene-solr:jira/solr-8593: Remove unnecessary @Override annotation in CoreParser.java class.

Posted by kr...@apache.org.

Remove unnecessary @Override annotation in CoreParser.java class.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/649c58de
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/649c58de
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/649c58de

Branch: refs/heads/jira/solr-8593
Commit: 649c58de0252ba608963e0fe699f8332c870b294
Parents: efc7ee0
Author: Christine Poerschke <cp...@apache.org>
Authored: Mon Jan 16 15:10:51 2017 +0000
Committer: Christine Poerschke <cp...@apache.org>
Committed: Mon Jan 16 15:29:30 2017 +0000

----------------------------------------------------------------------
 .../src/java/org/apache/lucene/queryparser/xml/CoreParser.java     | 2 --
 1 file changed, 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/649c58de/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/CoreParser.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/CoreParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/CoreParser.java
index 1bf82ac..d8aa8ef 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/CoreParser.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/CoreParser.java
@@ -141,8 +141,6 @@ public class CoreParser implements QueryBuilder {
     return doc;
   }
 
-
-  @Override
   public Query getQuery(Element e) throws ParserException {
     return queryFactory.getQuery(e);
   }

[13/23] lucene-solr:jira/solr-8593: SOLR-9893: For full Java 9 compatibility also update to latest Objenesis 2.5 (this allows mocking frameworks to instantiate objects without a ctor)

Posted by kr...@apache.org.

SOLR-9893: For full Java 9 compatibility also update to latest Objenesis 2.5 (this allows mocking frameworks to instantiate objects without a ctor)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/27ec40d3
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/27ec40d3
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/27ec40d3

Branch: refs/heads/jira/solr-8593
Commit: 27ec40d3f5117d22a14e179506bec8e545906077
Parents: 60d4a55
Author: Uwe Schindler <us...@apache.org>
Authored: Sun Jan 15 10:07:22 2017 +0100
Committer: Uwe Schindler <us...@apache.org>
Committed: Sun Jan 15 10:07:22 2017 +0100

----------------------------------------------------------------------
 lucene/ivy-versions.properties       | 2 +-
 solr/licenses/objenesis-2.4.jar.sha1 | 1 -
 solr/licenses/objenesis-2.5.jar.sha1 | 1 +
 3 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/27ec40d3/lucene/ivy-versions.properties
----------------------------------------------------------------------
diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties
index 770649c..6cb7f26 100644
--- a/lucene/ivy-versions.properties
+++ b/lucene/ivy-versions.properties
@@ -295,7 +295,7 @@ org.mortbay.jetty.version = 6.1.26
 /org.mortbay.jetty/jetty-util = ${org.mortbay.jetty.version}
 
 /org.noggit/noggit = 0.6
-/org.objenesis/objenesis = 2.4
+/org.objenesis/objenesis = 2.5
 
 org.ow2.asm.version = 5.1
 /org.ow2.asm/asm = ${org.ow2.asm.version}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/27ec40d3/solr/licenses/objenesis-2.4.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/objenesis-2.4.jar.sha1 b/solr/licenses/objenesis-2.4.jar.sha1
deleted file mode 100644
index 278f7dd..0000000
--- a/solr/licenses/objenesis-2.4.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-2916b6c96b50c5b3ec4452ed99401db745aabb27

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/27ec40d3/solr/licenses/objenesis-2.5.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/objenesis-2.5.jar.sha1 b/solr/licenses/objenesis-2.5.jar.sha1
new file mode 100644
index 0000000..f7f506d
--- /dev/null
+++ b/solr/licenses/objenesis-2.5.jar.sha1
@@ -0,0 +1 @@
+612ecb799912ccf77cba9b3ed8c813da086076e9

[21/23] lucene-solr:jira/solr-8593: Remove four unnecessary @Override annotations in SolrQueryBuilder (test) classes.

Posted by kr...@apache.org.

Remove four unnecessary @Override annotations in SolrQueryBuilder (test) classes.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/23019006
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/23019006
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/23019006

Branch: refs/heads/jira/solr-8593
Commit: 230190065ca96c6ecc45e581a56f856888c2e321
Parents: 649c58d
Author: Christine Poerschke <cp...@apache.org>
Authored: Mon Jan 16 18:14:36 2017 +0000
Committer: Christine Poerschke <cp...@apache.org>
Committed: Mon Jan 16 18:14:36 2017 +0000

----------------------------------------------------------------------
 .../org/apache/solr/search/ApacheLuceneSolrNearQueryBuilder.java    | 1 -
 solr/core/src/test/org/apache/solr/search/GoodbyeQueryBuilder.java  | 1 -
 solr/core/src/test/org/apache/solr/search/HandyQueryBuilder.java    | 1 -
 solr/core/src/test/org/apache/solr/search/HelloQueryBuilder.java    | 1 -
 4 files changed, 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23019006/solr/core/src/test/org/apache/solr/search/ApacheLuceneSolrNearQueryBuilder.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/ApacheLuceneSolrNearQueryBuilder.java b/solr/core/src/test/org/apache/solr/search/ApacheLuceneSolrNearQueryBuilder.java
index 135ec45..bbc081a 100644
--- a/solr/core/src/test/org/apache/solr/search/ApacheLuceneSolrNearQueryBuilder.java
+++ b/solr/core/src/test/org/apache/solr/search/ApacheLuceneSolrNearQueryBuilder.java
@@ -35,7 +35,6 @@ public class ApacheLuceneSolrNearQueryBuilder extends SolrQueryBuilder {
     super(defaultField, analyzer, req, queryFactory);
   }
 
-  @Override
   public Query getQuery(Element e) throws ParserException {
     final String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
     final SpanQuery[] spanQueries = new SpanQuery[]{

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23019006/solr/core/src/test/org/apache/solr/search/GoodbyeQueryBuilder.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/GoodbyeQueryBuilder.java b/solr/core/src/test/org/apache/solr/search/GoodbyeQueryBuilder.java
index af258d4..93f4a1b 100644
--- a/solr/core/src/test/org/apache/solr/search/GoodbyeQueryBuilder.java
+++ b/solr/core/src/test/org/apache/solr/search/GoodbyeQueryBuilder.java
@@ -31,7 +31,6 @@ public class GoodbyeQueryBuilder extends SolrQueryBuilder {
     super(defaultField, analyzer, req, queryFactory);
   }
 
-  @Override
   public Query getQuery(Element e) throws ParserException {
     return new MatchNoDocsQuery();
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23019006/solr/core/src/test/org/apache/solr/search/HandyQueryBuilder.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/HandyQueryBuilder.java b/solr/core/src/test/org/apache/solr/search/HandyQueryBuilder.java
index 14a8aac..c38fb6b 100644
--- a/solr/core/src/test/org/apache/solr/search/HandyQueryBuilder.java
+++ b/solr/core/src/test/org/apache/solr/search/HandyQueryBuilder.java
@@ -35,7 +35,6 @@ public class HandyQueryBuilder extends SolrQueryBuilder {
     super(defaultField, analyzer, req, queryFactory);
   }
 
-  @Override
   public Query getQuery(Element e) throws ParserException {
     final BooleanQuery.Builder bq = new BooleanQuery.Builder();
     final Query lhsQ = getSubQuery(e, "Left");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/23019006/solr/core/src/test/org/apache/solr/search/HelloQueryBuilder.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/HelloQueryBuilder.java b/solr/core/src/test/org/apache/solr/search/HelloQueryBuilder.java
index 642047f..8ea98f1 100644
--- a/solr/core/src/test/org/apache/solr/search/HelloQueryBuilder.java
+++ b/solr/core/src/test/org/apache/solr/search/HelloQueryBuilder.java
@@ -31,7 +31,6 @@ public class HelloQueryBuilder extends SolrQueryBuilder {
     super(defaultField, analyzer, req, queryFactory);
   }
 
-  @Override
   public Query getQuery(Element e) throws ParserException {
     return new MatchAllDocsQuery();
   }

[09/23] lucene-solr:jira/solr-8593: Add getMatchingChildren() method to Scorer

Posted by kr...@apache.org.

Add getMatchingChildren() method to Scorer


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/9403372f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/9403372f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/9403372f

Branch: refs/heads/jira/solr-8593
Commit: 9403372fbc36aced848bf8aa498bc71b7b94567b
Parents: 8fa0a8d
Author: Alan Woodward <ro...@apache.org>
Authored: Sat Jan 14 09:08:02 2017 +0000
Committer: Alan Woodward <ro...@apache.org>
Committed: Sat Jan 14 10:28:51 2017 +0000

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 +++
 .../apache/lucene/search/DisjunctionScorer.java |  8 +++++
 .../lucene/search/MinShouldMatchSumScorer.java  |  9 ++++++
 .../java/org/apache/lucene/search/Scorer.java   |  8 +++++
 .../search/TestBooleanQueryVisitSubscorers.java | 33 ++++++++++++++++++++
 5 files changed, 62 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9403372f/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 58201d6..540188e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -130,6 +130,10 @@ New features
   SortedSetDocValues to allow filtering their TermsEnums with a
   CompiledAutomaton (Alan Woodward, Mike McCandless)
 
+* LUCENE-7628: Scorer now has a getMatchingChildren() method that will 
+  return all child scorers positioned on the current document.  (Alan
+  Woodward)
+
 Bug Fixes
 
 * LUCENE-7547: JapaneseTokenizerFactory was failing to close the

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9403372f/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
index c53942a..a76999e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
@@ -202,4 +202,12 @@ abstract class DisjunctionScorer extends Scorer {
     return children;
   }
 
+  @Override
+  public Collection<ChildScorer> getMatchingChildren() throws IOException {
+    List<ChildScorer> children = new ArrayList<>();
+    for (DisiWrapper w = getSubMatches(); w != null; w = w.next) {
+      children.add(new ChildScorer(w.scorer, "SHOULD"));
+    }
+    return children;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9403372f/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
index 032b5fe..b977400 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
@@ -133,6 +133,15 @@ final class MinShouldMatchSumScorer extends Scorer {
   }
 
   @Override
+  public Collection<ChildScorer> getMatchingChildren() throws IOException {
+    List<ChildScorer> children = new ArrayList<>();
+    for (DisiWrapper s = lead; s != null; s = s.next) {
+      children.add(new ChildScorer(s.scorer, "SHOULD"));
+    }
+    return children;
+  }
+
+  @Override
   public DocIdSetIterator iterator() {
     return new DocIdSetIterator() {
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9403372f/lucene/core/src/java/org/apache/lucene/search/Scorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java
index f434327..2e35e91 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java
@@ -82,6 +82,14 @@ public abstract class Scorer {
   public Collection<ChildScorer> getChildren() {
     return Collections.emptyList();
   }
+
+  /**
+   * Returns child sub-scorers that match the current document
+   * @lucene.experimental
+   */
+  public Collection<ChildScorer> getMatchingChildren() throws IOException {
+    return getChildren();
+  }
   
   /** A child Scorer and its relationship to its parent.
    * the meaning of the relationship depends upon the parent query. 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9403372f/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java
index 60ba528..0909d5d 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java
@@ -108,6 +108,39 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
     assertEquals(2, tfs.get(1).intValue()); // f2:search + f2:lucene
     assertEquals(2, tfs.get(2).intValue()); // f2:search + f2:lucene
   }
+
+  public void testDisjunctionMatches() throws IOException {
+    BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
+    bq1.add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD);
+    bq1.add(new PhraseQuery(F2, "search", "engine"), Occur.SHOULD);
+
+    Weight w1 = scorerSearcher.createNormalizedWeight(bq1.build(), true);
+    Scorer s1 = w1.scorer(reader.leaves().get(0));
+    assertEquals(0, s1.iterator().nextDoc());
+    assertEquals(2, s1.getMatchingChildren().size());
+
+    BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
+    bq2.add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD);
+    bq2.add(new PhraseQuery(F2, "search", "library"), Occur.SHOULD);
+
+    Weight w2 = scorerSearcher.createNormalizedWeight(bq2.build(), true);
+    Scorer s2 = w2.scorer(reader.leaves().get(0));
+    assertEquals(0, s2.iterator().nextDoc());
+    assertEquals(1, s2.getMatchingChildren().size());
+  }
+
+  public void testMinShouldMatchMatches() throws IOException {
+    BooleanQuery.Builder bq = new BooleanQuery.Builder();
+    bq.add(new TermQuery(new Term(F1, "lucene")), Occur.SHOULD);
+    bq.add(new TermQuery(new Term(F2, "lucene")), Occur.SHOULD);
+    bq.add(new PhraseQuery(F2, "search", "library"), Occur.SHOULD);
+    bq.setMinimumNumberShouldMatch(2);
+
+    Weight w = scorerSearcher.createNormalizedWeight(bq.build(), true);
+    Scorer s = w.scorer(reader.leaves().get(0));
+    assertEquals(0, s.iterator().nextDoc());
+    assertEquals(2, s.getMatchingChildren().size());
+  }
   
   public void testConjunctions() throws IOException {
     BooleanQuery.Builder bq = new BooleanQuery.Builder();

[22/23] lucene-solr:jira/solr-8593: Merge the two problem sections in org.eclipse.jdt.core.prefs settings.

Posted by kr...@apache.org.

Merge the two problem sections in org.eclipse.jdt.core.prefs settings.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/205f9cc5
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/205f9cc5
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/205f9cc5

Branch: refs/heads/jira/solr-8593
Commit: 205f9cc59ed001e6d262930482b88e723e2ea3f8
Parents: 2301900
Author: Christine Poerschke <cp...@apache.org>
Authored: Mon Jan 16 18:23:12 2017 +0000
Committer: Christine Poerschke <cp...@apache.org>
Committed: Mon Jan 16 18:42:07 2017 +0000

----------------------------------------------------------------------
 dev-tools/eclipse/dot.settings/org.eclipse.jdt.core.prefs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/205f9cc5/dev-tools/eclipse/dot.settings/org.eclipse.jdt.core.prefs
----------------------------------------------------------------------
diff --git a/dev-tools/eclipse/dot.settings/org.eclipse.jdt.core.prefs b/dev-tools/eclipse/dot.settings/org.eclipse.jdt.core.prefs
index 6f6533a..0f0b112 100644
--- a/dev-tools/eclipse/dot.settings/org.eclipse.jdt.core.prefs
+++ b/dev-tools/eclipse/dot.settings/org.eclipse.jdt.core.prefs
@@ -4,6 +4,7 @@ org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
 org.eclipse.jdt.core.compiler.compliance=1.8
 org.eclipse.jdt.core.compiler.doc.comment.support=enabled
 org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.comparingIdentical=error
 org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
 org.eclipse.jdt.core.compiler.problem.invalidJavadoc=error
 org.eclipse.jdt.core.compiler.problem.invalidJavadocTags=enabled
@@ -18,6 +19,9 @@ org.eclipse.jdt.core.compiler.problem.missingJavadocTags=ignore
 org.eclipse.jdt.core.compiler.problem.missingJavadocTagsMethodTypeParameters=disabled
 org.eclipse.jdt.core.compiler.problem.missingJavadocTagsOverriding=disabled
 org.eclipse.jdt.core.compiler.problem.missingJavadocTagsVisibility=public
+org.eclipse.jdt.core.compiler.problem.noEffectAssignment=error
+org.eclipse.jdt.core.compiler.problem.unusedImport=error
+org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=error
 org.eclipse.jdt.core.compiler.annotation.nullanalysis=disabled
 org.eclipse.jdt.core.compiler.source=1.8
 org.eclipse.jdt.core.compiler.taskCaseSensitive=enabled
@@ -304,7 +308,3 @@ org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=true
 org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
 org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true
 org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true
-org.eclipse.jdt.core.compiler.problem.comparingIdentical=error
-org.eclipse.jdt.core.compiler.problem.noEffectAssignment=error
-org.eclipse.jdt.core.compiler.problem.unusedImport=error
-org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=error