You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by da...@apache.org on 2018/11/27 09:56:43 UTC

[01/16] lucene-solr:jira/http2: Remove excess element in metrics-reporting.adoc file.

Repository: lucene-solr
Updated Branches:
  refs/heads/jira/http2 8b0eecd18 -> 59615cb88


Remove excess <metrics> element in metrics-reporting.adoc file.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ea304a3a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ea304a3a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ea304a3a

Branch: refs/heads/jira/http2
Commit: ea304a3a3244965b7b141ea4d4d38543a1ad4230
Parents: 05167ed
Author: Christine Poerschke <cp...@apache.org>
Authored: Tue Nov 20 18:50:25 2018 +0000
Committer: Christine Poerschke <cp...@apache.org>
Committed: Tue Nov 20 18:50:25 2018 +0000

----------------------------------------------------------------------
 solr/solr-ref-guide/src/metrics-reporting.adoc | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ea304a3a/solr/solr-ref-guide/src/metrics-reporting.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/metrics-reporting.adoc b/solr/solr-ref-guide/src/metrics-reporting.adoc
index 77adb0b..979f650 100644
--- a/solr/solr-ref-guide/src/metrics-reporting.adoc
+++ b/solr/solr-ref-guide/src/metrics-reporting.adoc
@@ -161,7 +161,6 @@ Reporter configurations are specified in `solr.xml` file in `<metrics><reporter>
     <int name="port">9999</int>
     <int name="period">60</int>
   </reporter>
-  <metrics>
     <reporter name="log_metrics" group="core" class="org.apache.solr.metrics.reporters.SolrSlf4jReporter">
       <int name="period">60</int>
       <str name="filter">QUERY./select.requestTimes</str>

[16/16] lucene-solr:jira/http2: Merge with master

Posted by da...@apache.org.

Merge with master


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/59615cb8
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/59615cb8
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/59615cb8

Branch: refs/heads/jira/http2
Commit: 59615cb88a9b9dad6a39b7540e0c850d2f52ea90
Parents: 8b0eecd 72ca448
Author: Cao Manh Dat <da...@apache.org>
Authored: Tue Nov 27 09:56:36 2018 +0000
Committer: Cao Manh Dat <da...@apache.org>
Committed: Tue Nov 27 09:56:36 2018 +0000

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   6 +
 .../PartOfSpeechAttributeImpl.java              |  11 +-
 .../lucene/index/BufferedUpdatesStream.java     |  11 +-
 .../lucene/index/FrozenBufferedUpdates.java     | 241 ++++++-----
 .../org/apache/lucene/index/IndexWriter.java    |  88 ++--
 .../org/apache/lucene/util/bkd/BKDWriter.java   |  74 +++-
 .../apache/lucene/index/TestIndexWriter.java    |   4 +-
 .../org/apache/lucene/util/bkd/TestBKD.java     |  29 ++
 .../org/apache/lucene/search/BM25FQuery.java    | 430 +++++++++++++++++++
 .../lucene/search/MultiNormsLeafSimScorer.java  | 155 +++++++
 .../apache/lucene/search/TestBM25FQuery.java    | 168 ++++++++
 solr/CHANGES.txt                                |  17 +-
 .../solr/analysis/LowerCaseTokenizer.java       | 156 +++++++
 .../analysis/LowerCaseTokenizerFactory.java     |  69 +++
 .../apache/solr/response/CSVResponseWriter.java |   9 +-
 .../apache/solr/update/AddUpdateCommand.java    |  31 +-
 .../solr/update/DirectUpdateHandler2.java       |  23 +-
 .../collection1/conf/schema-deprecations.xml    |  36 ++
 .../solr/collection1/conf/schema12.xml          |   4 +
 .../solr/analysis/TestDeprecatedFilters.java    |  36 ++
 .../solr/cloud/TestCloudPseudoReturnFields.java |   8 +-
 .../apache/solr/cloud/TestRandomFlRTGCloud.java |   4 +
 .../org/apache/solr/cloud/ZkNodePropsTest.java  |  17 +-
 .../apache/solr/handler/tagger/TaggerTest.java  |   6 +-
 .../solr/response/TestCSVResponseWriter.java    |  49 ++-
 .../solr/search/TestPseudoReturnFields.java     |   9 +-
 .../test/org/apache/solr/search/TestReload.java |   6 +-
 .../org/apache/solr/update/RootFieldTest.java   | 127 ++++++
 .../update/processor/AtomicUpdatesTest.java     |  18 +-
 solr/solr-ref-guide/src/metrics-reporting.adoc  |   5 +-
 .../src/migrate-to-policy-rule.adoc             | 140 +++---
 .../src/solrcloud-autoscaling.adoc              |   2 +-
 .../apache/solr/common/cloud/ZkNodeProps.java   |  13 +-
 .../solr/common/util/FastInputStream.java       |   6 +-
 .../apache/solr/common/util/JavaBinCodec.java   |  13 +
 .../solr/client/solrj/SolrExampleTests.java     |  91 ++++
 36 files changed, 1828 insertions(+), 284 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/59615cb8/solr/CHANGES.txt
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/59615cb8/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
----------------------------------------------------------------------

[11/16] lucene-solr:jira/http2: SOLR-12546: Let csv response writer to handle docValues fields by default.

Posted by da...@apache.org.

SOLR-12546: Let csv response writer to handle docValues fields by default.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d7b878e9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d7b878e9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d7b878e9

Branch: refs/heads/jira/http2
Commit: d7b878e90c6ce185d799f1fa554e8c3770793f80
Parents: 2da72ad
Author: Mikhail Khludnev <mk...@apache.org>
Authored: Sun Nov 25 11:26:39 2018 +0300
Committer: Mikhail Khludnev <mk...@apache.org>
Committed: Sun Nov 25 11:26:39 2018 +0300

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 +-
 .../apache/solr/response/CSVResponseWriter.java |  9 ++--
 .../solr/collection1/conf/schema12.xml          |  4 ++
 .../solr/response/TestCSVResponseWriter.java    | 49 ++++++++++++++++++--
 4 files changed, 56 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d7b878e9/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 13009a5..9e63d66 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -130,8 +130,9 @@ Other Changes
 
 Bug Fixes
 ----------------------
+* SOLR-12546: CVSResponseWriter omits useDocValuesAsStored=true field when fl=*
+  (Munendra S N via Mikhail Khludnev)
 
-(No Changes)
 Improvements
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d7b878e9/solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java b/solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
index e894c77..5ebec77 100644
--- a/solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
+++ b/solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
@@ -247,7 +247,7 @@ class CSVWriter extends TextResponseWriter {
 
     Collection<String> fields = returnFields.getRequestedFieldNames();
     Object responseObj = rsp.getResponse();
-    boolean returnOnlyStored = false;
+    boolean returnStoredOrDocValStored = false;
     if (fields==null||returnFields.hasPatternMatching()) {
       if (responseObj instanceof SolrDocumentList) {
         // get the list of fields from the SolrDocumentList
@@ -271,7 +271,7 @@ class CSVWriter extends TextResponseWriter {
       } else {
         fields.remove("score");
       }
-      returnOnlyStored = true;
+      returnStoredOrDocValStored = true;
     }
 
     CSVSharedBufPrinter csvPrinterMV = new CSVSharedBufPrinter(mvWriter, mvStrategy);
@@ -293,8 +293,9 @@ class CSVWriter extends TextResponseWriter {
         sf = new SchemaField(field, ft);
       }
       
-      // Return only stored fields, unless an explicit field list is specified
-      if (returnOnlyStored && sf != null && !sf.stored()) {
+      // Return stored fields or useDocValuesAsStored=true fields,
+      // unless an explicit field list is specified
+      if (returnStoredOrDocValStored && !sf.stored() && !(sf.hasDocValues() && sf.useDocValuesAsStored())) {
         continue;
       }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d7b878e9/solr/core/src/test-files/solr/collection1/conf/schema12.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema12.xml b/solr/core/src/test-files/solr/collection1/conf/schema12.xml
index e4c3ad2..6f33b41 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema12.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema12.xml
@@ -715,6 +715,10 @@
   <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
   <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
 
+  <dynamicField name="*_ii" type="pint" indexed="false" stored="false" useDocValuesAsStored="true"/>
+  <dynamicField name="*_iis" type="pint" indexed="false" stored="false" useDocValuesAsStored="true"/>
+  <dynamicField name="*_ff" type="pfloat" indexed="false" stored="false" useDocValuesAsStored="false"/>
+
   <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
   <dynamicField name="attr_*" type="text" indexed="true" stored="true" multiValued="true"/>
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d7b878e9/solr/core/src/test/org/apache/solr/response/TestCSVResponseWriter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/response/TestCSVResponseWriter.java b/solr/core/src/test/org/apache/solr/response/TestCSVResponseWriter.java
index d10ea71..979279c 100644
--- a/solr/core/src/test/org/apache/solr/response/TestCSVResponseWriter.java
+++ b/solr/core/src/test/org/apache/solr/response/TestCSVResponseWriter.java
@@ -20,6 +20,8 @@ import java.io.StringWriter;
 import java.time.Instant;
 import java.util.Arrays;
 import java.util.Date;
+import java.util.List;
+import java.util.stream.Collectors;
 
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrDocument;
@@ -43,6 +45,7 @@ public class TestCSVResponseWriter extends SolrTestCaseJ4 {
     assertU(adoc("id","3", "shouldbeunstored","foo"));
     assertU(adoc("id","4", "amount_c", "1.50,EUR"));
     assertU(adoc("id","5", "store", "12.434,-134.1"));
+    assertU(adoc("id","6", "pubyear_ii", "123", "store_iis", "12", "price_ff", "1.3"));
     assertU(commit());
   }
 
@@ -111,8 +114,9 @@ public class TestCSVResponseWriter extends SolrTestCaseJ4 {
     , h.query(req("q","id:[1 TO 2]", "wt","csv", "csv.header","false", "fl","id,v_ss,foo_s")));
 
     // test SOLR-2970 not returning non-stored fields by default. Compare sorted list
-    assertEquals(sortHeader("amount_c,store,v_ss,foo_b,v2_ss,foo_f,foo_i,foo_d,foo_s,foo_dt,id,foo_l\n")
-    , sortHeader(h.query(req("q","id:3", "wt","csv", "csv.header","true", "fl","*", "rows","0"))));
+    assertEquals(sortHeader("amount_c,store,v_ss,foo_b,v2_ss,foo_f,foo_i,foo_d,foo_s,foo_dt,id,foo_l," +
+            "pubyear_ii,store_iis\n"),
+        sortHeader(h.query(req("q","id:3", "wt","csv", "csv.header","true", "fl","*", "rows","0"))));
 
 
     // now test SolrDocumentList
@@ -229,7 +233,7 @@ public class TestCSVResponseWriter extends SolrTestCaseJ4 {
     //assertions specific to multiple pseudofields functions like abs, div, exists, etc.. (SOLR-5423)
     String funcText = h.query(req("q","*", "wt","csv", "csv.header","true", "fl","XXX:id,YYY:exists(foo_i),exists(shouldbeunstored)"));
     String[] funcLines = funcText.split("\n");
-    assertEquals(6, funcLines.length);
+    assertEquals(7, funcLines.length);
     assertEquals("XXX,YYY,exists(shouldbeunstored)", funcLines[0] );
     assertEquals("1,true,false", funcLines[1] );
     assertEquals("3,false,true", funcLines[3] );
@@ -238,11 +242,48 @@ public class TestCSVResponseWriter extends SolrTestCaseJ4 {
     //assertions specific to single function without alias (SOLR-5423)
     String singleFuncText = h.query(req("q","*", "wt","csv", "csv.header","true", "fl","exists(shouldbeunstored),XXX:id"));
     String[] singleFuncLines = singleFuncText.split("\n");
-    assertEquals(6, singleFuncLines.length);
+    assertEquals(7, singleFuncLines.length);
     assertEquals("exists(shouldbeunstored),XXX", singleFuncLines[0] );
     assertEquals("false,1", singleFuncLines[1] );
     assertEquals("true,3", singleFuncLines[3] );
   }
+
+  @Test
+  public void testForDVEnabledFields() throws Exception {
+    // for dv enabled and useDocValueAsStored=true
+    // returns pubyear_i, store_iis but not price_ff
+    String singleFuncText = h.query(req("q","id:6", "wt","csv", "csv.header","true"));
+    String sortedHeader = sortHeader("amount_c,store,v_ss,foo_b,v2_ss,foo_f,foo_i,foo_d,foo_s,foo_dt,id,foo_l," +
+        "pubyear_ii,store_iis");
+    String[] singleFuncLines = singleFuncText.split("\n");
+    assertEquals(2, singleFuncLines.length);
+    assertEquals(sortedHeader, sortHeader(singleFuncLines[0]));
+    List<String> actualVal = Arrays.stream(singleFuncLines[1].trim().split(","))
+        .filter(val -> !val.trim().isEmpty() && !val.trim().equals("\"\""))
+        .collect(Collectors.toList());
+    assertEquals(3, actualVal.size());
+    assertTrue(actualVal.containsAll(Arrays.asList("6", "123", "12")));
+
+    // explicit fl=*
+    singleFuncText = h.query(req("q","id:6", "wt","csv", "csv.header","true", "fl", "*"));
+    sortedHeader = sortHeader("amount_c,store,v_ss,foo_b,v2_ss,foo_f,foo_i,foo_d,foo_s,foo_dt,id,foo_l," +
+        "pubyear_ii,store_iis");
+    singleFuncLines = singleFuncText.split("\n");
+    assertEquals(2, singleFuncLines.length);
+    assertEquals(sortedHeader, sortHeader(singleFuncLines[0]));
+    actualVal = Arrays.stream(singleFuncLines[1].trim().split(","))
+        .filter(val -> !val.trim().isEmpty() && !val.trim().equals("\"\""))
+        .collect(Collectors.toList());
+    assertEquals(3, actualVal.size());
+    assertTrue(actualVal.containsAll(Arrays.asList("6", "123", "12")));
+
+    // explicit price_ff
+    singleFuncText = h.query(req("q","id:6", "wt","csv", "csv.header","true", "fl", "price_ff"));
+    singleFuncLines = singleFuncText.split("\n");
+    assertEquals(2, singleFuncLines.length);
+    assertEquals("price_ff", singleFuncLines[0]);
+    assertEquals("1.3", singleFuncLines[1]);
+  }
     
 
   /*

[07/16] lucene-solr:jira/http2: SOLR-9856: Update the Ref Guide to no longer list this issue as unresolved.

Posted by da...@apache.org.

SOLR-9856: Update the Ref Guide to no longer list this issue as unresolved.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/67cdd219
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/67cdd219
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/67cdd219

Branch: refs/heads/jira/http2
Commit: 67cdd21996f716ffb137bbcb8f826794a2632be7
Parents: 56cb42d
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Wed Nov 21 19:13:02 2018 +0100
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Wed Nov 21 19:13:02 2018 +0100

----------------------------------------------------------------------
 solr/solr-ref-guide/src/metrics-reporting.adoc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/67cdd219/solr/solr-ref-guide/src/metrics-reporting.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/metrics-reporting.adoc b/solr/solr-ref-guide/src/metrics-reporting.adoc
index 979f650..57356d8 100644
--- a/solr/solr-ref-guide/src/metrics-reporting.adoc
+++ b/solr/solr-ref-guide/src/metrics-reporting.adoc
@@ -68,8 +68,8 @@ The <<Core Level Metrics,Core (SolrCore) Registry>> includes `solr.core.<collect
   process distributed shard requests also report `shardRequests` sub-counters for each type of distributed
   request.
 * <<Index Merge Metrics,index-level events>>: meters for minor / major merges, number of merged docs, number of deleted docs, gauges for currently running merges and their size.
-* shard replication and transaction log replay on replicas (TBD, SOLR-9856)
-* open / available / pending connections for shard handler and update handler
+* shard replication and transaction log replay on replicas,
+* open / available / pending connections for shard handler and update handler.
 
 === Jetty Registry

[15/16] lucene-solr:jira/http2: LUCENE-8562: Speed up merging segments of points with data dimensions by only sorting on the indexed dimensions

Posted by da...@apache.org.

LUCENE-8562: Speed up merging segments of points with data dimensions by only sorting on the indexed dimensions


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/72ca4488
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/72ca4488
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/72ca4488

Branch: refs/heads/jira/http2
Commit: 72ca4488d1313ffd2b9b8cf43027f7677022e80f
Parents: 68c0774
Author: iverase <iv...@apache.org>
Authored: Tue Nov 27 10:26:49 2018 +0100
Committer: iverase <iv...@apache.org>
Committed: Tue Nov 27 10:26:49 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 +
 .../org/apache/lucene/util/bkd/BKDWriter.java   | 74 +++++++++++++++-----
 .../org/apache/lucene/util/bkd/TestBKD.java     | 29 ++++++++
 3 files changed, 87 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/72ca4488/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5a347d8..86d06bc 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -225,6 +225,9 @@ Improvements
 * LUCENE-8463: TopFieldCollector can now early-terminates queries when sorting by SortField.DOC.
   (Christophe Bismuth via Jim Ferenczi)
 
+* LUCENE-8562: Speed up merging segments of points with data dimensions by only sorting on the indexed
+  dimensions. (Ignacio Vera)
+
 Optimizations
 
 * LUCENE-8552: FieldInfos.getMergedFieldInfos no longer does any merging if there is <= 1 segment.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/72ca4488/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index c4ac04e..1ffa275 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -767,6 +767,10 @@ public class BKDWriter implements Closeable {
   /** Sort the heap writer by the specified dim */
   private void sortHeapPointWriter(final HeapPointWriter writer, int dim) {
     final int pointCount = Math.toIntExact(this.pointCount);
+    sortHeapPointWriter(writer, pointCount, dim);
+  }
+  /** Sort the heap writer by the specified dim */
+  private void sortHeapPointWriter(final HeapPointWriter writer, int pointCount, int dim) {
     // Tie-break by docID:
 
     // No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it
@@ -959,7 +963,7 @@ public class BKDWriter implements Closeable {
     }
 
     LongBitSet ordBitSet;
-    if (numDataDims > 1) {
+    if (numIndexDims > 1) {
       if (singleValuePerDoc) {
         ordBitSet = new LongBitSet(maxDoc);
       } else {
@@ -994,7 +998,7 @@ public class BKDWriter implements Closeable {
     assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
 
     // Sort all docs once by each dimension:
-    PathSlice[] sortedPointWriters = new PathSlice[numDataDims];
+    PathSlice[] sortedPointWriters = new PathSlice[numIndexDims];
 
     // This is only used on exception; on normal code paths we close all files we opened:
     List<Closeable> toCloseHeroically = new ArrayList<>();
@@ -1002,9 +1006,7 @@ public class BKDWriter implements Closeable {
     boolean success = false;
     try {
       //long t0 = System.nanoTime();
-      // even with selective indexing we create the sortedPointWriters so we can compress
-      // the leaf node data by common prefix
-      for(int dim=0;dim<numDataDims;dim++) {
+      for(int dim=0;dim<numIndexDims;dim++) {
         sortedPointWriters[dim] = new PathSlice(sort(dim), 0, pointCount);
       }
       //long t1 = System.nanoTime();
@@ -1445,7 +1447,7 @@ public class BKDWriter implements Closeable {
       boolean result = reader.next();
       assert result: "rightCount=" + rightCount + " source.count=" + source.count + " source.writer=" + source.writer;
       System.arraycopy(reader.packedValue(), splitDim*bytesPerDim, scratch1, 0, bytesPerDim);
-      if (numDataDims > 1) {
+      if (numIndexDims > 1) {
         assert ordBitSet.get(reader.ord()) == false;
         ordBitSet.set(reader.ord());
         // Subtract 1 from rightCount because we already did the first value above (so we could record the split value):
@@ -1619,7 +1621,7 @@ public class BKDWriter implements Closeable {
       assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
           docIDs, 0);
       writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, count, sortedDim, packedValues);
-      
+
       out.writeBytes(scratchOut.getBytes(), 0, scratchOut.getPosition());
       scratchOut.reset();
 
@@ -1678,10 +1680,10 @@ public class BKDWriter implements Closeable {
                      long[] leafBlockFPs,
                      List<Closeable> toCloseHeroically) throws IOException {
 
-    for(PathSlice slice : slices) {
+    for (PathSlice slice : slices) {
       assert slice.count == slices[0].count;
     }
-    
+
     if (numDataDims == 1 && slices[0].writer instanceof OfflinePointWriter && slices[0].count <= maxPointsSortInHeap) {
       // Special case for 1D, to cutover to heap once we recurse deeply enough:
       slices[0] = switchToHeap(slices[0], toCloseHeroically);
@@ -1695,7 +1697,7 @@ public class BKDWriter implements Closeable {
       int sortedDim = 0;
       int sortedDimCardinality = Integer.MAX_VALUE;
 
-      for (int dim=0;dim<numDataDims;dim++) {
+      for (int dim=0;dim<numIndexDims;dim++) {
         if (slices[dim].writer instanceof HeapPointWriter == false) {
           // Adversarial cases can cause this, e.g. very lopsided data, all equal points, such that we started
           // offline, but then kept splitting only in one dimension, and so never had to rewrite into heap writer
@@ -1740,7 +1742,41 @@ public class BKDWriter implements Closeable {
         }
       }
 
-      PathSlice source = slices[sortedDim];
+      PathSlice dataDimPathSlice = null;
+
+      if (numDataDims != numIndexDims) {
+        HeapPointWriter heapSource = (HeapPointWriter) slices[0].writer;
+        int from = (int) slices[0].start;
+        int to = from + (int) slices[0].count;
+        Arrays.fill(commonPrefixLengths, numIndexDims, numDataDims, bytesPerDim);
+        heapSource.readPackedValue(from, scratch1);
+        for (int i = from + 1; i < to; ++i) {
+          heapSource.readPackedValue(i, scratch2);
+          for (int dim = numIndexDims; dim < numDataDims; dim++) {
+            final int offset = dim * bytesPerDim;
+            for (int j = 0; j < commonPrefixLengths[dim]; j++) {
+              if (scratch1[offset + j] != scratch2[offset + j]) {
+                commonPrefixLengths[dim] = j;
+                break;
+              }
+            }
+          }
+        }
+        //handle case when all index dimensions contain the same value but not the data dimensions
+        if (commonPrefixLengths[sortedDim] == bytesPerDim) {
+          for (int dim = numIndexDims; dim < numDataDims; ++dim) {
+            if (commonPrefixLengths[dim] != bytesPerDim) {
+              sortedDim = dim;
+              //create a new slice in memory
+              dataDimPathSlice = switchToHeap(slices[0], toCloseHeroically);
+              sortHeapPointWriter((HeapPointWriter) dataDimPathSlice.writer, (int) dataDimPathSlice.count, sortedDim);
+              break;
+            }
+          }
+        }
+      }
+
+      PathSlice source = (dataDimPathSlice != null) ? dataDimPathSlice : slices[sortedDim];
 
       // We ensured that maxPointsSortInHeap was >= maxPointsInLeafNode, so we better be in heap at this point:
       HeapPointWriter heapSource = (HeapPointWriter) source.writer;
@@ -1804,8 +1840,8 @@ public class BKDWriter implements Closeable {
 
       // Partition all PathSlice that are not the split dim into sorted left and right sets, so we can recurse:
 
-      PathSlice[] leftSlices = new PathSlice[numDataDims];
-      PathSlice[] rightSlices = new PathSlice[numDataDims];
+      PathSlice[] leftSlices = new PathSlice[numIndexDims];
+      PathSlice[] rightSlices = new PathSlice[numIndexDims];
 
       byte[] minSplitPackedValue = new byte[packedIndexBytesLength];
       System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, packedIndexBytesLength);
@@ -1815,13 +1851,13 @@ public class BKDWriter implements Closeable {
 
       // When we are on this dim, below, we clear the ordBitSet:
       int dimToClear;
-      if (numDataDims - 1 == splitDim) {
-        dimToClear = numDataDims - 2;
+      if (numIndexDims - 1 == splitDim) {
+        dimToClear = numIndexDims - 2;
       } else {
-        dimToClear = numDataDims - 1;
+        dimToClear = numIndexDims - 1;
       }
 
-      for(int dim=0;dim<numDataDims;dim++) {
+      for(int dim=0;dim<numIndexDims;dim++) {
 
         if (dim == splitDim) {
           // No need to partition on this dim since it's a simple slice of the incoming already sorted slice, and we
@@ -1858,7 +1894,7 @@ public class BKDWriter implements Closeable {
             ordBitSet, out,
             minPackedValue, maxSplitPackedValue, parentSplits,
             splitPackedValues, leafBlockFPs, toCloseHeroically);
-      for(int dim=0;dim<numDataDims;dim++) {
+      for(int dim=0;dim<numIndexDims;dim++) {
         // Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
         if (dim != splitDim) {
           leftSlices[dim].writer.destroy();
@@ -1871,7 +1907,7 @@ public class BKDWriter implements Closeable {
             ordBitSet, out,
             minSplitPackedValue, maxPackedValue, parentSplits,
             splitPackedValues, leafBlockFPs, toCloseHeroically);
-      for(int dim=0;dim<numDataDims;dim++) {
+      for(int dim=0;dim<numIndexDims;dim++) {
         // Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
         if (dim != splitDim) {
           rightSlices[dim].writer.destroy();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/72ca4488/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index d75d785..a01c927 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -492,6 +492,35 @@ public class TestBKD extends LuceneTestCase {
     verify(docValues, null, numDataDims, numIndexDims, numBytesPerDim);
   }
 
+  public void testIndexDimEqualDataDimDifferent() throws Exception {
+    int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
+    int numDataDims = TestUtil.nextInt(random(), 2, 5);
+    int numIndexDims = TestUtil.nextInt(random(), 1, numDataDims - 1);
+
+    int numDocs = atLeast(1000);
+    byte[][][] docValues = new byte[numDocs][][];
+
+    byte[][] indexDimensions = new byte[numDataDims][];
+    for(int dim=0;dim<numIndexDims;dim++) {
+      indexDimensions[dim] = new byte[numBytesPerDim];
+      random().nextBytes(indexDimensions[dim]);
+    }
+
+    for(int docID=0;docID<numDocs;docID++) {
+      byte[][] values = new byte[numDataDims][];
+      for(int dim=0;dim<numIndexDims;dim++) {
+        values[dim] = indexDimensions[dim];
+      }
+      for (int dim = numIndexDims; dim < numDataDims; dim++) {
+          values[dim] = new byte[numBytesPerDim];
+          random().nextBytes(values[dim]);
+      }
+      docValues[docID] = values;
+    }
+
+    verify(docValues, null, numDataDims, numIndexDims, numBytesPerDim);
+  }
+
   public void testOneDimEqual() throws Exception {
     int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
     int numDataDims = TestUtil.nextInt(random(), 1, 5);

[04/16] lucene-solr:jira/http2: LUCENE-8216: improve error message when field weight is invalid

Posted by da...@apache.org.

LUCENE-8216: improve error message when field weight is invalid


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/08dd681f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/08dd681f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/08dd681f

Branch: refs/heads/jira/http2
Commit: 08dd681f0febcf73af94b47ea742294bf4dd8701
Parents: fd96bc5
Author: Jim Ferenczi <ji...@apache.org>
Authored: Wed Nov 21 10:27:51 2018 +0100
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Wed Nov 21 10:27:51 2018 +0100

----------------------------------------------------------------------
 lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java   | 2 +-
 .../sandbox/src/test/org/apache/lucene/search/TestBM25FQuery.java  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/08dd681f/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java
index 025d734..b02989d 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java
@@ -94,7 +94,7 @@ public final class BM25FQuery extends Query {
      */
     public Builder addField(String field, float weight) {
       if (weight < 1) {
-        throw new IllegalArgumentException("weight must be greater than 1");
+        throw new IllegalArgumentException("weight must be greater or equal to 1");
       }
       fieldAndWeights.put(field, new FieldAndWeight(field, weight));
       return this;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/08dd681f/lucene/sandbox/src/test/org/apache/lucene/search/TestBM25FQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/TestBM25FQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/TestBM25FQuery.java
index 8bc8cb1..1dce7da 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/TestBM25FQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/TestBM25FQuery.java
@@ -37,7 +37,7 @@ public class TestBM25FQuery extends LuceneTestCase {
     BM25FQuery.Builder builder = new BM25FQuery.Builder();
     IllegalArgumentException exc =
         expectThrows(IllegalArgumentException.class, () -> builder.addField("foo", 0.5f));
-    assertEquals(exc.getMessage(), "weight must be greater than 1");
+    assertEquals(exc.getMessage(), "weight must be greater or equal to 1");
   }
 
   public void testRewrite() throws IOException {

[10/16] lucene-solr:jira/http2: LUCENE-8570: Fix possible NPE in the attribute reflection of the Nori's PartOfSpeechAttributeImpl

Posted by da...@apache.org.

LUCENE-8570: Fix possible NPE in the attribute reflection of the Nori's PartOfSpeechAttributeImpl


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2da72ad0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2da72ad0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2da72ad0

Branch: refs/heads/jira/http2
Commit: 2da72ad05c5cf05ca81e0fa64abf4b6fef4896a4
Parents: 2459072
Author: Jim Ferenczi <ji...@apache.org>
Authored: Fri Nov 23 10:31:28 2018 +0100
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Fri Nov 23 10:31:28 2018 +0100

----------------------------------------------------------------------
 .../ko/tokenattributes/PartOfSpeechAttributeImpl.java    | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2da72ad0/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java
index 2e51689..fbd637f 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java
@@ -62,11 +62,12 @@ public class PartOfSpeechAttributeImpl extends AttributeImpl implements PartOfSp
 
   @Override
   public void reflectWith(AttributeReflector reflector) {
-    reflector.reflect(PartOfSpeechAttribute.class, "posType", getPOSType().name());
-    Tag leftPOS = getLeftPOS();
-    reflector.reflect(PartOfSpeechAttribute.class, "leftPOS", leftPOS.name() + "(" + leftPOS.description() + ")");
-    Tag rightPOS = getRightPOS();
-    reflector.reflect(PartOfSpeechAttribute.class, "rightPOS", rightPOS.name() + "(" + rightPOS.description() + ")");
+    String posName = getPOSType() == null ? null : getPOSType().name();
+    String rightPOS = getRightPOS() == null ? null : getRightPOS().name() + "(" + getRightPOS().description() + ")";
+    String leftPOS = getLeftPOS() == null ? null : getLeftPOS().name() + "(" + getLeftPOS().description() + ")";
+    reflector.reflect(PartOfSpeechAttribute.class, "posType", posName);
+    reflector.reflect(PartOfSpeechAttribute.class, "leftPOS", leftPOS);
+    reflector.reflect(PartOfSpeechAttribute.class, "rightPOS", rightPOS);
     reflector.reflect(PartOfSpeechAttribute.class, "morphemes", displayMorphemes(getMorphemes()));
   }

[13/16] lucene-solr:jira/http2: SOLR-5211: ignore temporarily pending moshe fixing

Posted by da...@apache.org.

SOLR-5211: ignore temporarily pending moshe fixing


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/dc134be4
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/dc134be4
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/dc134be4

Branch: refs/heads/jira/http2
Commit: dc134be4499dfc1edd42941d6c6b1ff1b96190d3
Parents: a346ba0
Author: David Smiley <ds...@apache.org>
Authored: Mon Nov 26 10:11:21 2018 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Mon Nov 26 10:11:21 2018 -0500

----------------------------------------------------------------------
 solr/core/src/test/org/apache/solr/update/RootFieldTest.java | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dc134be4/solr/core/src/test/org/apache/solr/update/RootFieldTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/update/RootFieldTest.java b/solr/core/src/test/org/apache/solr/update/RootFieldTest.java
index 7d4ed08..c9f6cfd 100644
--- a/solr/core/src/test/org/apache/solr/update/RootFieldTest.java
+++ b/solr/core/src/test/org/apache/solr/update/RootFieldTest.java
@@ -28,12 +28,14 @@ import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.CommonParams;
 import org.junit.BeforeClass;
+import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
 
 import static org.hamcrest.CoreMatchers.is;
 
+@Ignore("pending SOLR-5211 moshe fixing")
 public class RootFieldTest extends SolrJettyTestBase {
   private static boolean useRootSchema;
   private static final String MESSAGE = "Update handler should create and process _root_ field " +
@@ -49,8 +51,8 @@ public class RootFieldTest extends SolrJettyTestBase {
   @BeforeClass
   public static void beforeTest() throws Exception {
     useRootSchema = random().nextBoolean();
-    // schema.xml declares _root_ field while schema11.xml does not.
-    String schema = useRootSchema ? "schema.xml" : "schema11.xml";
+    // schema.xml declares _root_ field while schema15.xml does not.
+    String schema = useRootSchema ? "schema.xml" : "schema15.xml";
     initCore("solrconfig.xml", schema);
   }
 
@@ -78,7 +80,7 @@ public class RootFieldTest extends SolrJettyTestBase {
 
     // Check retrieved field values
     assertThat(foundDoc.getFieldValue( "id" ), is(docId));
-    assertThat( ((List)foundDoc.getFieldValue( "name" )).get(0), is("child free doc"));
+    assertThat(foundDoc.getFieldValue( "name" ), is("child free doc"));
 
     String expectedRootValue = expectRoot() ? docId : null;
     assertThat(MESSAGE, foundDoc.getFieldValue( "_root_" ), is(expectedRootValue));

[06/16] lucene-solr:jira/http2: LUCENE-8569: Never count soft-deletes if reader has no hard-deletes

Posted by da...@apache.org.

LUCENE-8569: Never count soft-deletes if reader has no hard-deletes

Today we count the actual soft-deletes during a merge which is
unnecessary if there are no hard-deletes present. In this case, which
is considered to be the common case we can get accurate counts by substracting
the number of deleted docs in the wrapped reader from the number of soft-deletes
in that reader.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/56cb42d2
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/56cb42d2
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/56cb42d2

Branch: refs/heads/jira/http2
Commit: 56cb42d200dd999342c4cd5c92fbfa271ba91153
Parents: 5f8855e
Author: Simon Willnauer <si...@apache.org>
Authored: Tue Nov 20 12:56:20 2018 +0100
Committer: Simon Willnauer <si...@apache.org>
Committed: Wed Nov 21 14:29:00 2018 +0100

----------------------------------------------------------------------
 .../org/apache/lucene/index/IndexWriter.java    | 84 +++++++++++++-------
 1 file changed, 56 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/56cb42d2/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 028554b..f841582 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -69,6 +69,7 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.InfoStream;
 import org.apache.lucene.util.StringHelper;
@@ -4352,6 +4353,36 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
     }
   }
 
+  private void countSoftDeletes(CodecReader reader, Bits wrappedLiveDocs, Bits hardLiveDocs, Counter softDeleteCounter,
+                                Counter hardDeleteCounter) throws IOException {
+    int hardDeleteCount = 0;
+    int softDeletesCount = 0;
+    DocIdSetIterator softDeletedDocs = DocValuesFieldExistsQuery.getDocValuesDocIdSetIterator(config.getSoftDeletesField(), reader);
+    if (softDeletedDocs != null) {
+      int docId;
+      while ((docId = softDeletedDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        if (wrappedLiveDocs == null || wrappedLiveDocs.get(docId)) {
+          if (hardLiveDocs == null || hardLiveDocs.get(docId)) {
+            softDeletesCount++;
+          } else {
+            hardDeleteCount++;
+          }
+        }
+      }
+    }
+    softDeleteCounter.addAndGet(softDeletesCount);
+    hardDeleteCounter.addAndGet(hardDeleteCount);
+  }
+
+  private boolean assertSoftDeletesCount(CodecReader reader, int expectedCount) throws IOException {
+    Counter count = Counter.newCounter(false);
+    Counter hardDeletes = Counter.newCounter(false);
+    countSoftDeletes(reader, reader.getLiveDocs(), null, count, hardDeletes);
+    assert count.get() == expectedCount : "soft-deletes count mismatch expected: "
+        + expectedCount  + " but actual: " + count.get() ;
+    return true;
+  }
+
   /** Does the actual (time-consuming) work of the merge,
    *  but without holding synchronized lock on IndexWriter
    *  instance */
@@ -4400,7 +4431,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
 
       // Let the merge wrap readers
       List<CodecReader> mergeReaders = new ArrayList<>();
-      int softDeleteCount = 0;
+      Counter softDeleteCount = Counter.newCounter(false);
       for (int r = 0; r < merge.readers.size(); r++) {
         SegmentReader reader = merge.readers.get(r);
         CodecReader wrappedReader = merge.wrapForMerge(reader);
@@ -4408,34 +4439,31 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
         if (softDeletesEnabled) {
           if (reader != wrappedReader) { // if we don't have a wrapped reader we won't preserve any soft-deletes
             Bits hardLiveDocs = merge.hardLiveDocs.get(r);
-            Bits wrappedLiveDocs = wrappedReader.getLiveDocs();
-            int hardDeleteCount = 0;
-            DocIdSetIterator softDeletedDocs = DocValuesFieldExistsQuery.getDocValuesDocIdSetIterator(config.getSoftDeletesField(), wrappedReader);
-            if (softDeletedDocs != null) {
-              int docId;
-              while ((docId = softDeletedDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-                if (wrappedLiveDocs == null || wrappedLiveDocs.get(docId)) {
-                  if (hardLiveDocs == null || hardLiveDocs.get(docId)) {
-                    softDeleteCount++;
-                  } else {
-                    hardDeleteCount++;
+            if (hardLiveDocs != null) { // we only need to do this accounting if we have mixed deletes
+              Bits wrappedLiveDocs = wrappedReader.getLiveDocs();
+              Counter hardDeleteCounter = Counter.newCounter(false);
+              countSoftDeletes(wrappedReader, wrappedLiveDocs, hardLiveDocs, softDeleteCount, hardDeleteCounter);
+              int hardDeleteCount = Math.toIntExact(hardDeleteCounter.get());
+              // Wrap the wrapped reader again if we have excluded some hard-deleted docs
+              if (hardDeleteCount > 0) {
+                Bits liveDocs = wrappedLiveDocs == null ? hardLiveDocs : new Bits() {
+                  @Override
+                  public boolean get(int index) {
+                    return hardLiveDocs.get(index) && wrappedLiveDocs.get(index);
                   }
-                }
+
+                  @Override
+                  public int length() {
+                    return hardLiveDocs.length();
+                  }
+                };
+                wrappedReader = FilterCodecReader.wrapLiveDocs(wrappedReader, liveDocs, wrappedReader.numDocs() - hardDeleteCount);
               }
-            }
-            // Wrap the wrapped reader again if we have excluded some hard-deleted docs
-            if (hardLiveDocs != null && hardDeleteCount > 0) {
-              Bits liveDocs = wrappedLiveDocs == null ? hardLiveDocs : new Bits() {
-                @Override
-                public boolean get(int index) {
-                  return hardLiveDocs.get(index) && wrappedLiveDocs.get(index);
-                }
-                @Override
-                public int length() {
-                  return hardLiveDocs.length();
-                }
-              };
-              wrappedReader = FilterCodecReader.wrapLiveDocs(wrappedReader, liveDocs, wrappedReader.numDocs() - hardDeleteCount);
+            } else {
+              final int carryOverSoftDeletes = reader.getSegmentInfo().getSoftDelCount() - wrappedReader.numDeletedDocs();
+              assert carryOverSoftDeletes >= 0 : "carry-over soft-deletes must be positive";
+              assert assertSoftDeletesCount(wrappedReader, carryOverSoftDeletes);
+              softDeleteCount.addAndGet(carryOverSoftDeletes);
             }
           }
         }
@@ -4445,7 +4473,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
                                                      merge.info.info, infoStream, dirWrapper,
                                                      globalFieldNumberMap, 
                                                      context);
-      merge.info.setSoftDelCount(softDeleteCount);
+      merge.info.setSoftDelCount(Math.toIntExact(softDeleteCount.get()));
       merge.checkAborted();
 
       merge.mergeStartNS = System.nanoTime();

[02/16] lucene-solr:jira/http2: SOLR-13006: ZkNodeProps to be able to load from both javabin and JSON

Posted by da...@apache.org.

SOLR-13006: ZkNodeProps to be able to load from both javabin and JSON


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/492c3440
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/492c3440
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/492c3440

Branch: refs/heads/jira/http2
Commit: 492c3440def2112fe76c99b691cc073ca6511f62
Parents: ea304a3
Author: Noble Paul <no...@apache.org>
Authored: Wed Nov 21 18:20:03 2018 +1100
Committer: Noble Paul <no...@apache.org>
Committed: Wed Nov 21 18:20:03 2018 +1100

----------------------------------------------------------------------
 solr/CHANGES.txt                                   |  2 ++
 .../org/apache/solr/cloud/ZkNodePropsTest.java     | 17 ++++++++++-------
 .../org/apache/solr/common/cloud/ZkNodeProps.java  | 13 ++++++++++++-
 .../apache/solr/common/util/FastInputStream.java   |  6 +++++-
 .../org/apache/solr/common/util/JavaBinCodec.java  | 13 +++++++++++++
 5 files changed, 42 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/492c3440/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 7994142..d323c9d 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -270,6 +270,8 @@ Other Changes
 * SOLR-12497: Add documentation to use Hadoop credential provider-based keystore/trustsore.
 (Mano Kovacs, Cassandra Targett)
 
+* SOLR-13006: ZkNodeProps to be able to load from both javabin and JSON (noble)
+
 Bug Fixes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/492c3440/solr/core/src/test/org/apache/solr/cloud/ZkNodePropsTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkNodePropsTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkNodePropsTest.java
index 11e93d6..88ce3c8 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkNodePropsTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkNodePropsTest.java
@@ -22,7 +22,9 @@ import java.util.Map;
 
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.util.JavaBinCodec;
 import org.apache.solr.common.util.Utils;
+import org.apache.solr.util.SimplePostTool;
 import org.junit.Test;
 
 public class ZkNodePropsTest extends SolrTestCaseJ4 {
@@ -39,13 +41,14 @@ public class ZkNodePropsTest extends SolrTestCaseJ4 {
     
     ZkNodeProps zkProps = new ZkNodeProps(props);
     byte[] bytes = Utils.toJSON(zkProps);
-    
     ZkNodeProps props2 = ZkNodeProps.load(bytes);
-    assertEquals("value1", props2.getStr("prop1"));
-    assertEquals("value2", props2.getStr("prop2"));
-    assertEquals("value3", props2.getStr("prop3"));
-    assertEquals("value4", props2.getStr("prop4"));
-    assertEquals("value5", props2.getStr("prop5"));
-    assertEquals("value6", props2.getStr("prop6"));
+
+    props.forEach((s, o) -> assertEquals(o, props2.get(s)));
+    SimplePostTool.BAOS baos = new SimplePostTool.BAOS();
+    new JavaBinCodec().marshal(zkProps.getProperties(), baos);
+    bytes = baos.toByteArray();
+    System.out.println("BIN size : " + bytes.length);
+    ZkNodeProps props3 = ZkNodeProps.load(bytes);
+    props.forEach((s, o) -> assertEquals(o, props3.get(s)));
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/492c3440/solr/solrj/src/java/org/apache/solr/common/cloud/ZkNodeProps.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkNodeProps.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkNodeProps.java
index 93fe59a..752ce45 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkNodeProps.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkNodeProps.java
@@ -16,11 +16,13 @@
  */
 package org.apache.solr.common.cloud;
 
+import java.io.IOException;
 import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.solr.common.util.JavaBinCodec;
 import org.apache.solr.common.util.Utils;
 import org.noggit.JSONUtil;
 import org.noggit.JSONWriter;
@@ -89,7 +91,16 @@ public class ZkNodeProps implements JSONWriter.Writable {
    * Create Replica from json string that is typically stored in zookeeper.
    */
   public static ZkNodeProps load(byte[] bytes) {
-    Map<String, Object> props = (Map<String, Object>) Utils.fromJSON(bytes);
+    Map<String, Object> props = null;
+    if (bytes[0] == 2) {
+      try {
+        props = (Map<String, Object>) new JavaBinCodec().unmarshal(bytes);
+      } catch (IOException e) {
+        throw new RuntimeException("Unable to parse javabin content");
+      }
+    } else {
+      props = (Map<String, Object>) Utils.fromJSON(bytes);
+    }
     return new ZkNodeProps(props);
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/492c3440/solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java b/solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
index 06d0738..bbcc129 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
@@ -16,7 +16,10 @@
  */
 package org.apache.solr.common.util;
 
-import java.io.*;
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
 
 /** Single threaded buffered InputStream
  *  Internal Solr use only, subject to change.
@@ -76,6 +79,7 @@ public class FastInputStream extends DataInputInputStream {
   }
 
   public int readWrappedStream(byte[] target, int offset, int len) throws IOException {
+    if(in == null) return -1;
     return in.read(target, offset, len);
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/492c3440/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
index 2e49cf9..ca8c80f 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
@@ -179,6 +179,10 @@ public class JavaBinCodec implements PushWriter {
 
   byte version;
 
+  public Object unmarshal(byte[] buf) throws IOException {
+    FastInputStream dis = initRead(buf);
+    return readVal(dis);
+  }
   public Object unmarshal(InputStream is) throws IOException {
     FastInputStream dis = initRead(is);
     return readVal(dis);
@@ -187,6 +191,15 @@ public class JavaBinCodec implements PushWriter {
   protected FastInputStream initRead(InputStream is) throws IOException {
     assert !alreadyUnmarshalled;
     FastInputStream dis = FastInputStream.wrap(is);
+    return _init(dis);
+  }
+  protected FastInputStream initRead(byte[] buf) throws IOException {
+    assert !alreadyUnmarshalled;
+    FastInputStream dis = new FastInputStream(null, buf, 0, buf.length);
+    return _init(dis);
+  }
+
+  private FastInputStream _init(FastInputStream dis) throws IOException {
     version = dis.readByte();
     if (version != VERSION) {
       throw new RuntimeException("Invalid version (expected " + VERSION +

[14/16] lucene-solr:jira/http2: SOLR-12740: revise migration docs for clarity and typos

Posted by da...@apache.org.

SOLR-12740: revise migration docs for clarity and typos


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/68c07744
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/68c07744
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/68c07744

Branch: refs/heads/jira/http2
Commit: 68c0774458f9d0697bf7875e677474bae07dd266
Parents: dc134be
Author: Cassandra Targett <ct...@apache.org>
Authored: Mon Nov 26 12:15:15 2018 -0600
Committer: Cassandra Targett <ct...@apache.org>
Committed: Mon Nov 26 12:15:15 2018 -0600

----------------------------------------------------------------------
 .../src/migrate-to-policy-rule.adoc             | 140 +++++++++++--------
 .../src/solrcloud-autoscaling.adoc              |   2 +-
 2 files changed, 82 insertions(+), 60 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/68c07744/solr/solr-ref-guide/src/migrate-to-policy-rule.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/migrate-to-policy-rule.adoc b/solr/solr-ref-guide/src/migrate-to-policy-rule.adoc
index 587147e..38b0cd6 100644
--- a/solr/solr-ref-guide/src/migrate-to-policy-rule.adoc
+++ b/solr/solr-ref-guide/src/migrate-to-policy-rule.adoc
@@ -1,4 +1,4 @@
-= Migrate to Policy Rules
+= Migrating Rule-Based Replica Rules to Autoscaling Policies
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
@@ -16,52 +16,83 @@
 // specific language governing permissions and limitations
 // under the License.
 
-Replica placement rules (legacy) are deprecated in favor of the new policy and preferences syntax (new). This document helps you to migrate your existing replica placement rules to the new syntax.
+Creating rules for replica placement in a Solr cluster is now done with the <<solrcloud-autoscaling.adoc#solrcloud-autoscaling,autoscaling framework>>.
 
+This document outlines how to migrate from the legacy <<rule-based-replica-placement.adoc#rule-based-replica-placement,rule-based replica placement>> to an <<solrcloud-autoscaling-policy-preferences.adoc#solrcloud-autoscaling-policy-preferences,autoscaling policy>>.
 
-Every rule in the legacy framework can be expressed in the new syntax. Please refer to <<solrcloud-autoscaling-policy-preferences.adoc#cluster-preferences-specification,Cluster Preferences Specification>> document for more details.
+The autoscaling framework is designed to fully automate your cluster management.
+However, if you do not want actions taken on your cluster in an automatic way, you can still use the framework to set rules and preferences.
+With a set of rules and preferences in place, instead of taking action directly the system will suggest actions you can take manually.
 
+The section <<solrcloud-autoscaling-policy-preferences.adoc#cluster-preferences-specification,Cluster Preferences Specification>> describes the capabilities of an autoscaling policy in detail.
+Below we'll walk through a few examples to show how you would express the your legacy rules in the autoscaling syntax.
+Every rule in the legacy rule-based replica framework can be expressed in the new syntax.
+
+== How Rules are Defined
+
+One key difference between the frameworks is the way rules are defined.
+
+With the rule-based replica placement framework, rules are defined with the Collections API at the time of collection creation.
+
+The autoscaling framework, however, has its own <<solrcloud-autoscaling-api.adoc#solrcloud-autoscaling-api,API>>.
+Policies can be configured for the entire cluster or for individual collections depending on your needs.
+
+The following is the legacy syntax for a rule that limits the cluster to one replica for each shard in any Solr node:
 
-The following is the legacy syntax for a rule that limits maximum one replica for each shard in any Solr node
 [source,text]
 ----
 replica:<2,node:*,shard:**
 ----
-The rules are specified along with a collection creation operation as the `rules` parameter
 
-The equivalent new syntax is
+The equivalent rule in the autoscaling policy is:
+
 [source,json]
 ----
-{"replica":"<2","node":"#ANY","shard":"#EACH"}
+{"replica":"<2", "node":"#ANY", "shard":"#EACH"}
 ----
 
-The new policy rules have to be created separately using an API call  <<solrcloud-autoscaling-api.adoc#cluster-specific-policies, See examples>>
+== Differences in Rule Syntaxes
+
+Many elements of defining rules are similar in both frameworks, but some elements are different.
 
 [[rule-operators1]]
-== Rule Operators
+=== Rule Operators
 
-All the following operators can be directly used in the new policy syntax and they mean the same.
+All of the following operators can be directly used in the new policy syntax and they mean the same in both frameworks.
 
-* *equals (no operator required)*: `tag:x` means tag value must be equal to ‘x’
-* *greater than (>)*: `tag:>x` means tag value greater than ‘x’. x must be a number
-* *less than (<)*: `tag:<x` means tag value less than ‘x’. x must be a number
-* *not equal (!)*: `tag:!x` means tag value MUST NOT be equal to ‘x’. The equals check is performed on String value
+* *equals (no operator required)*: `tag:x` means the value for a tag must be equal to `'x'`.
+* *greater than (>)*: `tag:>x` means the tag value must be greater than `'x'`. In this case, `'x'` must be a number.
+* *less than (<)*: `tag:<x` means the tag value must be less than `‘x’`. In this case also, `'x'` must be a number.
+* *not equal (!)*: `tag:!x` means tag value MUST NOT be equal to `‘x’`. The equals check is performed on a String value.
 
 [[fuzzy-operator1]]
-=== Fuzzy Operator (~)
-There is no `~` operator in the new syntax. Please use the attribute `"strict":false` instead
+==== Fuzzy Operator (~)
+
+There is no `~` operator in the autoscaling policy syntax.
+Instead, it uses the `strict` parameter, which can be `true` or `false`.
+To replace the `~` operator, use the attribute `"strict":false` instead.
 
-example:
+For example:
+
+.Rule-based replica placement framework:
+[source,text]
+----
+replica:<2~,node:*,shard:**
+----
+
+.Autoscaling framework:
 [source,json]
 ----
-{"replica":"<2","node":"#ANY","shard":"#EACH", "strict": false}
+{"replica":"<2", "node":"#ANY", "shard":"#EACH", "strict": false}
 ----
 
 [[tag-names1]]
-== Tag names
+=== Attributes
 
-Tag values are provided by the framework and these tags mean the same the new syntax as well
+Attributes were known as "tags" in the rule-based replica placement framework.
+In the autoscaling framework, they are attributes that are used for node selection or to set global cluster-wide rules.
 
+The available attributes in the autoscaling framework are similar to the tags that were available with rule-based replica placement. Attributes with the same name mean the same in both frameworks.
 
 * *cores*: Number of cores in the node
 * *freedisk*: Disk space available in the node
@@ -73,104 +104,95 @@ Tag values are provided by the framework and these tags mean the same the new sy
 * *sysprop.\{PROPERTY_NAME}*: These are values available from system properties. `sysprop.key` means a value that is passed to the node as `-Dkey=keyValue` during the node startup. It is possible to use rules like `sysprop.key:expectedVal,shard:*`
 
 [[snitches1]]
-== Snitches
-There is no equivalent for a snitch in the new policy framework
+=== Snitches
 
-== Porting existing Replica placement rules
-[[keep-less-than-2-replicas]]
-=== Keep less than 2 replicas (at most 1 replica) of this collection on any node
+There is no equivalent for a snitch in the autoscaling policy framework.
 
-For this rule, we define the `replica` condition with operators for "less than 2", and use a pre-defined tag named `node` to define nodes with any name.
+== Porting Existing Replica Placement Rules
+
+There is no automatic way to move from using rule-based replica placement rules to an autoscaling policy.
+Instead you will need to remove your replica rules from each collection and institute a policy using the <<solrcloud-autoscaling-api.adoc#solrcloud-autoscaling-api,autoscaling API>>.
 
-*legacy:*
+The following examples are intended to help you translate your existing rules into new rules that fit the autoscaling framework.
 
+*Keep less than 2 replicas (at most 1 replica) of this collection on any node*
+
+For this rule, we define the `replica` condition with operators for "less than 2", and use a pre-defined tag named `node` to define nodes with any name.
+
+.Rule-based replica placement framework:
 [source,text]
 ----
 replica:<2,node:*
 ----
 
-*new:*
-
+.Autoscaling framework:
 [source,json]
 ----
 {"replica":"<2","node":"#ANY"}
 ----
-[[keep-less-than-2-replicas--per-shard]]
-=== For a given shard, keep less than 2 replicas on any node
 
-For this rule, we use the `shard` condition to define any shard, the `replica` condition with operators for "less than 2", and finally a pre-defined tag named `node` to define nodes with any name.
+*For a given shard, keep less than 2 replicas on any node*
 
-*legacy:*
+For this rule, we use the `shard` condition to define any shard, the `replica` condition with operators for "less than 2", and finally a pre-defined tag named `node` to define nodes with any name.
 
+.Rule-based replica placement framework:
 [source,text]
 ----
 shard:*,replica:<2,node:*
 ----
 
-*new:*
-
+.Autoscaling framework:
 [source,json]
 ----
 {"replica":"<2","shard":"#EACH", "node":"#ANY"}
 ----
 
-[[all-replicas-on-shard-730]]
-=== Assign all replicas in shard1 to rack 730
-
-This rule limits the `shard` condition to 'shard1', but any number of replicas. We're also referencing a custom tag named `rack`. Before defining this rule, we will need to configure a custom Snitch which provides values for the tag `rack`.
+*Assign all replicas in shard1 to rack 730*
 
-*legacy:*
+This rule limits the `shard` condition to 'shard1', but any number of replicas. We're also referencing a custom tag named `rack`.
 
+.Rule-based replica placement framework:
 [source,text]
 ----
 shard:shard1,replica:*,rack:730
 ----
 
-*new:*
-
+.Autoscaling framework:
 [source,json]
 ----
 {"replica":"#ALL", "shard":"shard1", "sysprop.rack":"730"}
 ----
 
-Please note that all your nodes must be started with a system property `-Drack=<rack-number>`
+In the rule-based replica placement framework, we needed to configure a custom Snitch which provides values for the tag `rack`.
 
-[[less-than-5-cores]]
-=== Create replicas in nodes with less than 5 cores only
+With the autoscaling framework, however, we need to start all nodes with a system property to define the rack values. For example, `bin/solr start -c -Drack=<rack-number>`.
 
-This rule uses the `replica` condition to define any number of replicas, but adds a pre-defined tag named `core` and uses operators for "less than 5".
+*Create replicas in nodes with less than 5 cores only*
 
-*legacy:*
+This rule uses the `replica` condition to define any number of replicas, but adds a pre-defined tag named `core` and uses operators for "less than 5".
 
+.Rule-based replica placement framework:
 [source,text]
 ----
 cores:<5
 ----
 
-*new:*
-
+.Autoscaling framework:
 [source,json]
 ----
 {"cores":"<5", "node":"#ANY"}
 ----
-[[do-not-create-on-host]]
-=== Do not create any replicas in host 192.45.67.3
 
-*legacy:*
+*Do not create any replicas in host 192.45.67.3*
 
+.legacy syntax:
 [source,text]
 ----
 host:!192.45.67.3
 ----
 
-*new:*
-
+.autoscaling framework:
 [source,json]
 ----
 {"replica": 0, "host":"192.45.67.3"}
 ----
-
-[[defining-rules1]]
-== Defining Rules
-
-Rules are always defined in along with the collection in the legacy system. The new syntax allows you to specify rules globally as well as on a <<solrcloud-autoscaling-policy-preferences.adoc#collection-specific-policy, per collection basis>>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/68c07744/solr/solr-ref-guide/src/solrcloud-autoscaling.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/solrcloud-autoscaling.adoc b/solr/solr-ref-guide/src/solrcloud-autoscaling.adoc
index 61b8b6e..5f9f148 100644
--- a/solr/solr-ref-guide/src/solrcloud-autoscaling.adoc
+++ b/solr/solr-ref-guide/src/solrcloud-autoscaling.adoc
@@ -32,4 +32,4 @@ The following sections describe the autoscaling features of SolrCloud:
 * <<solrcloud-autoscaling-auto-add-replicas.adoc#solrcloud-autoscaling-auto-add-replicas,Automatically Adding Replicas>>
 * <<solrcloud-autoscaling-fault-tolerance.adoc#solrcloud-autoscaling-fault-tolerance,Autoscaling Fault Tolerance>>
 * <<solrcloud-autoscaling-api.adoc#solrcloud-autoscaling-api,Autoscaling API>>
-* <<migrate-to-policy-rule.adoc#migrate-to-policy-rule, Migrating from legacy rules>>
+* <<migrate-to-policy-rule.adoc#migrate-to-policy-rule, Migrating Rule-Based Replica Rules to Autoscaling Policies>>

[08/16] lucene-solr:jira/http2: [TEST] Prevent merges in simple tests that compare doc stats from IW

Posted by da...@apache.org.

[TEST] Prevent merges in simple tests that compare doc stats from IW


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/f7fa2506
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/f7fa2506
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/f7fa2506

Branch: refs/heads/jira/http2
Commit: f7fa25069e16caeca1a8bed184dab7ed0c35545f
Parents: 67cdd21
Author: Simon Willnauer <si...@apache.org>
Authored: Thu Nov 22 14:20:29 2018 +0100
Committer: Simon Willnauer <si...@apache.org>
Committed: Thu Nov 22 14:20:29 2018 +0100

----------------------------------------------------------------------
 .../core/src/test/org/apache/lucene/index/TestIndexWriter.java   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f7fa2506/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
index 20d3532..a5b45f7 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
@@ -3081,7 +3081,9 @@ public class TestIndexWriter extends LuceneTestCase {
 
   public void testSoftUpdateDocuments() throws IOException {
     Directory dir = newDirectory();
-    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig().setSoftDeletesField("soft_delete"));
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()
+        .setMergePolicy(NoMergePolicy.INSTANCE)
+        .setSoftDeletesField("soft_delete"));
     expectThrows(IllegalArgumentException.class, () -> {
       writer.softUpdateDocument(null, new Document(), new NumericDocValuesField("soft_delete", 1));
     });

[03/16] lucene-solr:jira/http2: LUCENE-8216: Added a new BM25FQuery in sandbox to blend statistics across several fields using the BM25F formula

Posted by da...@apache.org.

LUCENE-8216: Added a new BM25FQuery in sandbox to blend statistics across several fields using the BM25F formula


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/fd96bc5c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/fd96bc5c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/fd96bc5c

Branch: refs/heads/jira/http2
Commit: fd96bc5ca6b1cf0c24953fb7b35937e403846440
Parents: 492c344
Author: Jim Ferenczi <ji...@apache.org>
Authored: Wed Nov 21 10:24:30 2018 +0100
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Wed Nov 21 10:24:30 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   3 +
 .../org/apache/lucene/search/BM25FQuery.java    | 430 +++++++++++++++++++
 .../lucene/search/MultiNormsLeafSimScorer.java  | 155 +++++++
 .../apache/lucene/search/TestBM25FQuery.java    | 168 ++++++++
 4 files changed, 756 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fd96bc5c/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 16e9107..5a347d8 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -148,6 +148,9 @@ New Features
   based on the haversine distance of a LatLonPoint field to a provided point. This is
   typically useful to boost by distance. (Ignacio Vera)
 
+* LUCENE-8216: Added a new BM25FQuery in sandbox to blend statistics across several fields
+  using the BM25F formula. (Adrien Grand, Jim Ferenczi)
+
 Improvements
 
 * LUCENE-7997: Add BaseSimilarityTestCase to sanity check similarities.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fd96bc5c/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java
new file mode 100644
index 0000000..025d734
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java
@@ -0,0 +1,430 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.TermStates;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.SimilarityBase;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A {@link Query} that treats multiple fields as a single stream and scores
+ * terms as if you had indexed them as a single term in a single field.
+ *
+ * For scoring purposes this query implements the BM25F's simple formula
+ * described in:
+ *  http://www.staff.city.ac.uk/~sb317/papers/foundations_bm25_review.pdf
+ *
+ * The per-field similarity is ignored but to be compatible each field must use
+ * a {@link Similarity} at index time that encodes norms the same way as
+ * {@link SimilarityBase#computeNorm}.
+ *
+ * @lucene.experimental
+ */
+public final class BM25FQuery extends Query {
+
+  /**
+   * A builder for {@link BM25FQuery}.
+   */
+  public static class Builder {
+    private final BM25Similarity similarity;
+    private final Map<String, FieldAndWeight> fieldAndWeights = new HashMap<>();
+    private final Set<BytesRef> termsSet = new HashSet<>();
+
+    /**
+     * Default builder.
+     */
+    public Builder() {
+      this.similarity = new BM25Similarity();
+    }
+
+    /**
+     * Builder with the supplied parameter values.
+     * @param k1 Controls non-linear term frequency normalization (saturation).
+     * @param b Controls to what degree document length normalizes tf values.
+     */
+    public Builder(float k1, float b) {
+      this.similarity = new BM25Similarity(k1, b);
+    }
+
+    /**
+     * Adds a field to this builder.
+     * @param field The field name.
+     */
+    public Builder addField(String field) {
+      return addField(field, 1f);
+    }
+
+    /**
+     * Adds a field to this builder.
+     * @param field The field name.
+     * @param weight The weight associated to this field.
+     */
+    public Builder addField(String field, float weight) {
+      if (weight < 1) {
+        throw new IllegalArgumentException("weight must be greater than 1");
+      }
+      fieldAndWeights.put(field, new FieldAndWeight(field, weight));
+      return this;
+    }
+
+    /**
+     * Adds a term to this builder.
+     */
+    public Builder addTerm(BytesRef term) {
+      if (termsSet.size() > BooleanQuery.getMaxClauseCount()) {
+        throw new BooleanQuery.TooManyClauses();
+      }
+      termsSet.add(term);
+      return this;
+    }
+
+    /**
+     * Builds the {@link BM25FQuery}.
+     */
+    public BM25FQuery build() {
+      int size = fieldAndWeights.size() * termsSet.size();
+      if (size > BooleanQuery.getMaxClauseCount()) {
+        throw new BooleanQuery.TooManyClauses();
+      }
+      BytesRef[] terms = termsSet.toArray(new BytesRef[0]);
+      return new BM25FQuery(similarity, new TreeMap<>(fieldAndWeights), terms);
+    }
+  }
+
+  static class FieldAndWeight {
+    final String field;
+    final float weight;
+
+    FieldAndWeight(String field, float weight) {
+      this.field = field;
+      this.weight = weight;
+    }
+  }
+
+  // the similarity to use for scoring.
+  private final BM25Similarity similarity;
+  // sorted map for fields.
+  private final TreeMap<String, FieldAndWeight> fieldAndWeights;
+  // array of terms, sorted.
+  private final BytesRef terms[];
+  // array of terms per field, sorted
+  private final Term fieldTerms[];
+
+  private BM25FQuery(BM25Similarity similarity, TreeMap<String, FieldAndWeight> fieldAndWeights, BytesRef[] terms) {
+    this.similarity = similarity;
+    this.fieldAndWeights = fieldAndWeights;
+    this.terms = terms;
+    int numFieldTerms = fieldAndWeights.size() * terms.length;
+    if (numFieldTerms > BooleanQuery.getMaxClauseCount()) {
+      throw new BooleanQuery.TooManyClauses();
+    }
+    this.fieldTerms = new Term[numFieldTerms];
+    Arrays.sort(terms);
+    int pos = 0;
+    for (String field : fieldAndWeights.keySet()) {
+      for (BytesRef term : terms) {
+        fieldTerms[pos++] = new Term(field, term);
+      }
+    }
+  }
+
+  public List<Term> getTerms() {
+    return Collections.unmodifiableList(Arrays.asList(fieldTerms));
+  }
+
+  @Override
+  public String toString(String field) {
+    StringBuilder builder = new StringBuilder("BM25F((");
+    int pos = 0;
+    for (FieldAndWeight fieldWeight : fieldAndWeights.values()) {
+      if (pos++ != 0) {
+        builder.append(" ");
+      }
+      builder.append(fieldWeight.field);
+      if (fieldWeight.weight != 1f) {
+        builder.append("^");
+        builder.append(fieldWeight.weight);
+      }
+    }
+    builder.append(")(");
+    pos = 0;
+    for (BytesRef term : terms) {
+      if (pos++ != 0) {
+        builder.append(" ");
+      }
+      builder.append(term.utf8ToString());
+    }
+    builder.append("))");
+    return builder.toString();
+  }
+
+  @Override
+  public int hashCode() {
+    return 31 * classHash() + Arrays.hashCode(terms);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    return sameClassAs(other) &&
+        Arrays.equals(terms, ((BM25FQuery) other).terms);
+  }
+
+  @Override
+  public Query rewrite(IndexReader reader) throws IOException {
+    // optimize zero and single field cases
+    if (terms.length == 0) {
+      return new BooleanQuery.Builder().build();
+    }
+    // single field and one term
+    if (fieldTerms.length == 1) {
+      return new TermQuery(fieldTerms[0]);
+    }
+    // single field and multiple terms
+    if (fieldAndWeights.size() == 1) {
+      return new SynonymQuery(fieldTerms);
+    }
+    return this;
+  }
+
+  private BooleanQuery rewriteToBoolean() {
+    // rewrite to a simple disjunction if the score is not needed.
+    BooleanQuery.Builder bq = new BooleanQuery.Builder();
+    for (Term term : fieldTerms) {
+      bq.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
+    }
+    return bq.build();
+  }
+
+  @Override
+  public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
+    if (scoreMode.needsScores()) {
+      return new BM25FWeight(this, searcher, scoreMode, boost);
+    } else {
+      // rewrite to a simple disjunction if the score is not needed.
+      Query bq = rewriteToBoolean();
+      return searcher.rewrite(bq).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
+    }
+  }
+
+  class BM25FWeight extends Weight {
+    private final IndexSearcher searcher;
+    private final TermStates termStates[];
+    private final Similarity.SimScorer simWeight;
+
+    BM25FWeight(Query query, IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
+      super(query);
+      assert scoreMode.needsScores();
+      this.searcher = searcher;
+      long docFreq = 0;
+      long totalTermFreq = 0;
+      termStates = new TermStates[fieldTerms.length];
+      for (int i = 0; i < termStates.length; i++) {
+        FieldAndWeight field = fieldAndWeights.get(fieldTerms[i].field());
+        termStates[i] = TermStates.build(searcher.getTopReaderContext(), fieldTerms[i], true);
+        TermStatistics termStats = searcher.termStatistics(fieldTerms[i], termStates[i]);
+        if (termStats != null) {
+          docFreq = Math.max(termStats.docFreq(), docFreq);
+          totalTermFreq += (double) field.weight * termStats.totalTermFreq();
+        }
+      }
+      if (docFreq > 0) {
+        CollectionStatistics pseudoCollectionStats = mergeCollectionStatistics(searcher);
+        TermStatistics pseudoTermStatistics = new TermStatistics(new BytesRef("pseudo_term"), docFreq, Math.max(1, totalTermFreq));
+        this.simWeight = similarity.scorer(boost, pseudoCollectionStats, pseudoTermStatistics);
+      } else {
+        this.simWeight = null;
+      }
+    }
+
+    private CollectionStatistics mergeCollectionStatistics(IndexSearcher searcher) throws IOException {
+      long maxDoc = searcher.getIndexReader().maxDoc();
+      long docCount = 0;
+      long sumTotalTermFreq = 0;
+      long sumDocFreq = 0;
+      for (FieldAndWeight fieldWeight : fieldAndWeights.values()) {
+        CollectionStatistics collectionStats = searcher.collectionStatistics(fieldWeight.field);
+        if (collectionStats != null) {
+          docCount = Math.max(collectionStats.docCount(), docCount);
+          sumDocFreq = Math.max(collectionStats.sumDocFreq(), sumDocFreq);
+          sumTotalTermFreq += (double) fieldWeight.weight * collectionStats.sumTotalTermFreq();
+        }
+      }
+
+      return new CollectionStatistics("pseudo_field", maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
+    }
+
+    @Override
+    public void extractTerms(Set<Term> termSet) {
+      termSet.addAll(Arrays.asList(fieldTerms));
+    }
+
+    @Override
+    public Matches matches(LeafReaderContext context, int doc) throws IOException {
+      Weight weight = searcher.rewrite(rewriteToBoolean()).createWeight(searcher, ScoreMode.COMPLETE, 1f);
+      return weight.matches(context, doc);
+    }
+
+    @Override
+    public Explanation explain(LeafReaderContext context, int doc) throws IOException {
+      Scorer scorer = scorer(context);
+      if (scorer != null) {
+        int newDoc = scorer.iterator().advance(doc);
+        if (newDoc == doc) {
+          final float freq;
+          if (scorer instanceof BM25FScorer) {
+            freq = ((BM25FScorer) scorer).freq();
+          } else {
+            assert scorer instanceof TermScorer;
+            freq = ((TermScorer) scorer).freq();
+          }
+          final MultiNormsLeafSimScorer docScorer =
+              new MultiNormsLeafSimScorer(simWeight, context.reader(), fieldAndWeights.values(), true);
+          Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
+          Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
+          return Explanation.match(
+              scoreExplanation.getValue(),
+              "weight(" + getQuery() + " in " + doc + ") ["
+                  + similarity.getClass().getSimpleName() + "], result of:",
+              scoreExplanation);
+        }
+      }
+      return Explanation.noMatch("no matching term");
+    }
+
+    @Override
+    public Scorer scorer(LeafReaderContext context) throws IOException {
+      List<PostingsEnum> iterators = new ArrayList<>();
+      List<FieldAndWeight> fields = new ArrayList<>();
+      for (int i = 0; i < fieldTerms.length; i++) {
+        TermState state = termStates[i].get(context);
+        if (state != null) {
+          TermsEnum termsEnum = context.reader().terms(fieldTerms[i].field()).iterator();
+          termsEnum.seekExact(fieldTerms[i].bytes(), state);
+          PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.FREQS);
+          iterators.add(postingsEnum);
+          fields.add(fieldAndWeights.get(fieldTerms[i].field()));
+        }
+      }
+
+      if (iterators.isEmpty()) {
+        return null;
+      }
+
+      // we must optimize this case (term not in segment), disjunctions require >= 2 subs
+      if (iterators.size() == 1) {
+        final LeafSimScorer scoringSimScorer =
+            new LeafSimScorer(simWeight, context.reader(), fields.get(0).field, true);
+        return new TermScorer(this, iterators.get(0), scoringSimScorer);
+      }
+      final MultiNormsLeafSimScorer scoringSimScorer =
+          new MultiNormsLeafSimScorer(simWeight, context.reader(), fields, true);
+      LeafSimScorer nonScoringSimScorer = new LeafSimScorer(simWeight, context.reader(), "pseudo_field", false);
+      // we use termscorers + disjunction as an impl detail
+      DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size());
+      for (int i = 0; i < iterators.size(); i++) {
+        float weight = fields.get(i).weight;
+        queue.add(new WeightedDisiWrapper(new TermScorer(this, iterators.get(i), nonScoringSimScorer), weight));
+      }
+      // Even though it is called approximation, it is accurate since none of
+      // the sub iterators are two-phase iterators.
+      DocIdSetIterator iterator = new DisjunctionDISIApproximation(queue);
+      return new BM25FScorer(this, queue, iterator, scoringSimScorer);
+    }
+
+    @Override
+    public boolean isCacheable(LeafReaderContext ctx) {
+      return false;
+    }
+  }
+
+  private static class WeightedDisiWrapper extends DisiWrapper {
+    final float weight;
+
+    WeightedDisiWrapper(Scorer scorer, float weight) {
+      super(scorer);
+      this.weight = weight;
+    }
+
+    float freq() throws IOException {
+      return weight * ((PostingsEnum) iterator).freq();
+    }
+  }
+
+  private static class BM25FScorer extends Scorer {
+    private final DisiPriorityQueue queue;
+    private final DocIdSetIterator iterator;
+    private final MultiNormsLeafSimScorer simScorer;
+
+    BM25FScorer(Weight weight, DisiPriorityQueue queue, DocIdSetIterator iterator, MultiNormsLeafSimScorer simScorer) {
+      super(weight);
+      this.queue = queue;
+      this.iterator = iterator;
+      this.simScorer = simScorer;
+    }
+
+    @Override
+    public int docID() {
+      return iterator.docID();
+    }
+
+    float freq() throws IOException {
+      DisiWrapper w = queue.topList();
+      float freq = ((WeightedDisiWrapper) w).freq();
+      for (w = w.next; w != null; w = w.next) {
+        freq += ((WeightedDisiWrapper) w).freq();
+        if (freq < 0) { // overflow
+          return Integer.MAX_VALUE;
+        }
+      }
+      return freq;
+    }
+
+    @Override
+    public float score() throws IOException {
+      return simScorer.score(iterator.docID(), freq());
+    }
+
+    @Override
+    public DocIdSetIterator iterator() {
+      return iterator;
+    }
+
+    @Override
+    public float getMaxScore(int upTo) throws IOException {
+      return Float.POSITIVE_INFINITY;
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fd96bc5c/lucene/sandbox/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java b/lucene/sandbox/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java
new file mode 100644
index 0000000..75c9801
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
+import org.apache.lucene.util.SmallFloat;
+
+import static org.apache.lucene.search.BM25FQuery.FieldAndWeight;
+
+/**
+ * Copy of {@link LeafSimScorer} that sums document's norms from multiple fields.
+ */
+final class MultiNormsLeafSimScorer {
+  /**
+   * Cache of decoded norms.
+   */
+  private static final float[] LENGTH_TABLE = new float[256];
+
+  static {
+    for (int i = 0; i < 256; i++) {
+      LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
+    }
+  }
+
+  private final SimScorer scorer;
+  private final NumericDocValues norms;
+
+  /**
+   * Sole constructor: Score documents of {@code reader} with {@code scorer}.
+   *
+   */
+  MultiNormsLeafSimScorer(SimScorer scorer, LeafReader reader, Collection<FieldAndWeight> normFields, boolean needsScores) throws IOException {
+    this.scorer = Objects.requireNonNull(scorer);
+    if (needsScores) {
+      final List<NumericDocValues> normsList = new ArrayList<>();
+      final List<Float> weightList = new ArrayList<>();
+      for (FieldAndWeight field : normFields) {
+        NumericDocValues norms = reader.getNormValues(field.field);
+        if (norms != null) {
+          normsList.add(norms);
+          weightList.add(field.weight);
+        }
+      }
+      if (normsList.isEmpty()) {
+        norms = null;
+      } else if (normsList.size() == 1) {
+        norms = normsList.get(0);
+      } else {
+        final NumericDocValues[] normsArr = normsList.toArray(new NumericDocValues[0]);
+        final float[] weightArr = new float[normsList.size()];
+        for (int i = 0; i < weightList.size(); i++) {
+          weightArr[i] = weightList.get(i);
+        }
+        norms = new MultiFieldNormValues(normsArr, weightArr);
+      }
+    } else {
+      norms = null;
+    }
+  }
+
+  private long getNormValue(int doc) throws IOException {
+    if (norms != null) {
+      boolean found = norms.advanceExact(doc);
+      assert found;
+      return norms.longValue();
+    } else {
+      return 1L; // default norm
+    }
+  }
+
+  /** Score the provided document assuming the given term document frequency.
+   *  This method must be called on non-decreasing sequences of doc ids.
+   *  @see SimScorer#score(float, long) */
+  public float score(int doc, float freq) throws IOException {
+    return scorer.score(freq, getNormValue(doc));
+  }
+
+  /** Explain the score for the provided document assuming the given term document frequency.
+   *  This method must be called on non-decreasing sequences of doc ids.
+   *  @see SimScorer#explain(Explanation, long) */
+  public Explanation explain(int doc, Explanation freqExpl) throws IOException {
+    return scorer.explain(freqExpl, getNormValue(doc));
+  }
+
+  private static class MultiFieldNormValues extends NumericDocValues {
+    private final NumericDocValues[] normsArr;
+    private final float[] weightArr;
+    private long current;
+    private int docID = -1;
+
+    MultiFieldNormValues(NumericDocValues[] normsArr, float[] weightArr) {
+      this.normsArr = normsArr;
+      this.weightArr = weightArr;
+    }
+
+    @Override
+    public long longValue() {
+      return current;
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+      float normValue = 0;
+      for (int i = 0; i < normsArr.length; i++) {
+        boolean found = normsArr[i].advanceExact(target);
+        assert found;
+        normValue += weightArr[i] * LENGTH_TABLE[(byte) normsArr[i].longValue()];
+      }
+      current = SmallFloat.intToByte4(Math.round(normValue));
+      return true;
+    }
+
+    @Override
+    public int docID() {
+      return docID;
+    }
+
+    @Override
+    public int nextDoc() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int advance(int target) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long cost() {
+      throw new UnsupportedOperationException();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fd96bc5c/lucene/sandbox/src/test/org/apache/lucene/search/TestBM25FQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/TestBM25FQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/TestBM25FQuery.java
new file mode 100644
index 0000000..8bc8cb1
--- /dev/null
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/TestBM25FQuery.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestBM25FQuery extends LuceneTestCase {
+  public void testInvalid() {
+    BM25FQuery.Builder builder = new BM25FQuery.Builder();
+    IllegalArgumentException exc =
+        expectThrows(IllegalArgumentException.class, () -> builder.addField("foo", 0.5f));
+    assertEquals(exc.getMessage(), "weight must be greater than 1");
+  }
+
+  public void testRewrite() throws IOException {
+    BM25FQuery.Builder builder = new BM25FQuery.Builder();
+    IndexReader reader = new MultiReader();
+    IndexSearcher searcher = new IndexSearcher(reader);
+    Query actual = searcher.rewrite(builder.build());
+    assertEquals(actual, new MatchNoDocsQuery());
+    builder.addField("field", 1f);
+    actual = searcher.rewrite(builder.build());
+    assertEquals(actual, new MatchNoDocsQuery());
+    builder.addTerm(new BytesRef("foo"));
+    actual = searcher.rewrite(builder.build());
+    assertEquals(actual, new TermQuery(new Term("field", "foo")));
+    builder.addTerm(new BytesRef("bar"));
+    actual = searcher.rewrite(builder.build());
+    assertEquals(actual, new SynonymQuery(new Term("field", "foo"),
+        new Term("field", "bar")));
+    builder.addField("another_field", 1f);
+    Query query = builder.build();
+    actual = searcher.rewrite(query);
+    assertEquals(actual, query);
+  }
+
+  public void testToString() {
+    assertEquals("BM25F(()())", new BM25FQuery.Builder().build().toString());
+    BM25FQuery.Builder builder = new BM25FQuery.Builder();
+    builder.addField("foo", 1f);
+    assertEquals("BM25F((foo)())", builder.build().toString());
+    builder.addTerm(new BytesRef("bar"));
+    assertEquals("BM25F((foo)(bar))", builder.build().toString());
+    builder.addField("title", 3f);
+    assertEquals("BM25F((foo title^3.0)(bar))", builder.build().toString());
+    builder.addTerm(new BytesRef("baz"));
+    assertEquals("BM25F((foo title^3.0)(bar baz))", builder.build().toString());
+  }
+
+  public void testSameScore() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+    Document doc = new Document();
+    doc.add(new StringField("f", "a", Store.NO));
+    w.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new StringField("g", "a", Store.NO));
+    for (int i = 0; i < 10; ++i) {
+      w.addDocument(doc);
+    }
+
+    IndexReader reader = w.getReader();
+    IndexSearcher searcher = newSearcher(reader);
+    BM25FQuery query = new BM25FQuery.Builder()
+        .addField("f", 1f)
+        .addField("g", 1f)
+        .addTerm(new BytesRef("a"))
+        .build();
+    TopScoreDocCollector collector = TopScoreDocCollector.create(Math.min(reader.numDocs(), Integer.MAX_VALUE), null, Integer.MAX_VALUE);
+    searcher.search(query, collector);
+    TopDocs topDocs = collector.topDocs();
+    assertEquals(TotalHits.Relation.EQUAL_TO, topDocs.totalHits.relation);
+    assertEquals(11, topDocs.totalHits.value);
+    // All docs must have the same score
+    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
+      assertEquals(topDocs.scoreDocs[0].score, topDocs.scoreDocs[i].score, 0.0f);
+    }
+
+    reader.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testAgainstCopyField() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, new MockAnalyzer(random()));
+    int numMatch = atLeast(10);
+    int boost1 = Math.max(1, random().nextInt(5));
+    int boost2 = Math.max(1, random().nextInt(5));
+    for (int i = 0; i < numMatch; i++) {
+      Document doc = new Document();
+      if (random().nextBoolean()) {
+        doc.add(new TextField("a", "baz", Store.NO));
+        doc.add(new TextField("b", "baz", Store.NO));
+        for (int k = 0; k < boost1+boost2; k++) {
+          doc.add(new TextField("ab", "baz", Store.NO));
+        }
+        w.addDocument(doc);
+        doc.clear();
+      }
+      int freqA = random().nextInt(5) + 1;
+      for (int j = 0; j < freqA; j++) {
+        doc.add(new TextField("a", "foo", Store.NO));
+      }
+      int freqB = random().nextInt(5) + 1;
+      for (int j = 0; j < freqB; j++) {
+        doc.add(new TextField("b", "foo", Store.NO));
+      }
+      int freqAB = freqA * boost1 + freqB * boost2;
+      for (int j = 0; j < freqAB; j++) {
+        doc.add(new TextField("ab", "foo", Store.NO));
+      }
+      w.addDocument(doc);
+    }
+    IndexReader reader = w.getReader();
+    IndexSearcher searcher = newSearcher(reader);
+    searcher.setSimilarity(new BM25Similarity());
+    BM25FQuery query = new BM25FQuery.Builder()
+        .addField("a", (float) boost1)
+        .addField("b", (float) boost2)
+        .addTerm(new BytesRef("foo"))
+        .addTerm(new BytesRef("foo"))
+        .build();
+
+    TopScoreDocCollector bm25FCollector = TopScoreDocCollector.create(numMatch, null, Integer.MAX_VALUE);
+    searcher.search(query, bm25FCollector);
+    TopDocs bm25FTopDocs = bm25FCollector.topDocs();
+    assertEquals(numMatch, bm25FTopDocs.totalHits.value);
+    TopScoreDocCollector collector = TopScoreDocCollector.create(reader.numDocs(), null, Integer.MAX_VALUE);
+    searcher.search(new TermQuery(new Term("ab", "foo")), collector);
+    TopDocs topDocs = collector.topDocs();
+    CheckHits.checkEqual(query, topDocs.scoreDocs, bm25FTopDocs.scoreDocs);
+
+    reader.close();
+    w.close();
+    dir.close();
+  }
+}

[09/16] lucene-solr:jira/http2: SOLR-12775: Add deprecated versions of LowerCaseTokenizer and LowerCaseTokenizerFactory

Posted by da...@apache.org.

SOLR-12775: Add deprecated versions of LowerCaseTokenizer and LowerCaseTokenizerFactory


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/24590722
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/24590722
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/24590722

Branch: refs/heads/jira/http2
Commit: 245907228642798f053542e02277d91baa211006
Parents: f7fa250
Author: Alan Woodward <ro...@apache.org>
Authored: Thu Nov 22 13:54:01 2018 +0000
Committer: Alan Woodward <ro...@apache.org>
Committed: Thu Nov 22 13:54:39 2018 +0000

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../solr/analysis/LowerCaseTokenizer.java       | 156 +++++++++++++++++++
 .../analysis/LowerCaseTokenizerFactory.java     |  69 ++++++++
 .../collection1/conf/schema-deprecations.xml    |  36 +++++
 .../solr/analysis/TestDeprecatedFilters.java    |  36 +++++
 5 files changed, 300 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/24590722/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index d323c9d..13009a5 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -98,6 +98,9 @@ Other Changes
 
 * SOLR-12620: Remove the Admin UI Cloud -> Graph (Radial) view (janhoy)
 
+* SOLR-12775: LowerCaseTokenizer is deprecated, and should be replaced by LetterTokenizer and
+  LowerCaseFilter (Alan Woodward)
+
 ==================  7.7.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/24590722/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizer.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizer.java b/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizer.java
new file mode 100644
index 0000000..1361283
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizer.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.analysis;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.CharacterUtils;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LetterTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.util.AttributeFactory;
+
+/**
+ * LowerCaseTokenizer performs the function of LetterTokenizer
+ * and LowerCaseFilter together.  It divides text at non-letters and converts
+ * them to lower case.  While it is functionally equivalent to the combination
+ * of LetterTokenizer and LowerCaseFilter, there is a performance advantage
+ * to doing the two tasks at once, hence this (redundant) implementation.
+ * <P>
+ * Note: this does a decent job for most European languages, but does a terrible
+ * job for some Asian languages, where words are not separated by spaces.
+ * </p>
+ *
+ * @deprecated Use {@link LetterTokenizer} and {@link org.apache.lucene.analysis.LowerCaseFilter}
+ */
+@Deprecated
+public final class LowerCaseTokenizer extends Tokenizer {
+
+  /**
+   * Construct a new LowerCaseTokenizer.
+   */
+  public LowerCaseTokenizer() {
+    this.maxTokenLen = DEFAULT_MAX_WORD_LEN;
+  }
+
+  /**
+   * Construct a new LowerCaseTokenizer using a given
+   * {@link org.apache.lucene.util.AttributeFactory}.
+   *
+   * @param factory
+   *          the attribute factory to use for this {@link Tokenizer}
+   */
+  public LowerCaseTokenizer(AttributeFactory factory) {
+    this(factory, DEFAULT_MAX_WORD_LEN);
+  }
+
+  /**
+   * Construct a new LowerCaseTokenizer using a given
+   * {@link org.apache.lucene.util.AttributeFactory}.
+   *
+   * @param factory the attribute factory to use for this {@link Tokenizer}
+   * @param maxTokenLen maximum token length the tokenizer will emit.
+   *        Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
+   * @throws IllegalArgumentException if maxTokenLen is invalid.
+   */
+  public LowerCaseTokenizer(AttributeFactory factory, int maxTokenLen) {
+    super(factory);
+    this.maxTokenLen = maxTokenLen;
+  }
+
+  private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
+  public static final int DEFAULT_MAX_WORD_LEN = 255;
+  private static final int IO_BUFFER_SIZE = 4096;
+  private final int maxTokenLen;
+
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+  private final CharacterUtils.CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
+
+  @Override
+  public final boolean incrementToken() throws IOException {
+    clearAttributes();
+    int length = 0;
+    int start = -1; // this variable is always initialized
+    int end = -1;
+    char[] buffer = termAtt.buffer();
+    while (true) {
+      if (bufferIndex >= dataLen) {
+        offset += dataLen;
+        CharacterUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils
+        if (ioBuffer.getLength() == 0) {
+          dataLen = 0; // so next offset += dataLen won't decrement offset
+          if (length > 0) {
+            break;
+          } else {
+            finalOffset = correctOffset(offset);
+            return false;
+          }
+        }
+        dataLen = ioBuffer.getLength();
+        bufferIndex = 0;
+      }
+      // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone
+      final int c = Character.codePointAt(ioBuffer.getBuffer(), bufferIndex, ioBuffer.getLength());
+      final int charCount = Character.charCount(c);
+      bufferIndex += charCount;
+
+      if (Character.isLetter(c)) {               // if it's a token char
+        if (length == 0) {                // start of token
+          assert start == -1;
+          start = offset + bufferIndex - charCount;
+          end = start;
+        } else if (length >= buffer.length-1) { // check if a supplementary could run out of bounds
+          buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer
+        }
+        end += charCount;
+        length += Character.toChars(Character.toLowerCase(c), buffer, length); // buffer it, normalized
+        if (length >= maxTokenLen) { // buffer overflow! make sure to check for >= surrogate pair could break == test
+          break;
+        }
+      } else if (length > 0) {           // at non-Letter w/ chars
+        break;                           // return 'em
+      }
+    }
+
+    termAtt.setLength(length);
+    assert start != -1;
+    offsetAtt.setOffset(correctOffset(start), finalOffset = correctOffset(end));
+    return true;
+
+  }
+
+  @Override
+  public final void end() throws IOException {
+    super.end();
+    // set final offset
+    offsetAtt.setOffset(finalOffset, finalOffset);
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    bufferIndex = 0;
+    offset = 0;
+    dataLen = 0;
+    finalOffset = 0;
+    ioBuffer.reset(); // make sure to reset the IO buffer!!
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/24590722/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java b/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java
new file mode 100644
index 0000000..953d046
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.analysis;
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
+import org.apache.lucene.analysis.util.CharTokenizer;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeFactory;
+
+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
+/**
+ * Factory for {@link LowerCaseTokenizer}.
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100"&gt;
+ * &lt;analyzer&gt;
+ * &lt;tokenizer class="solr.LowerCaseTokenizerFactory" maxTokenLen="256"/&gt;
+ * &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ * <p>
+ * Options:
+ * <ul>
+ * <li>maxTokenLen: max token length, should be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
+ *     It is rare to need to change this
+ * else {@link CharTokenizer}::DEFAULT_MAX_WORD_LEN</li>
+ * </ul>
+ * @deprecated Use {@link org.apache.lucene.analysis.core.LetterTokenizerFactory} and {@link LowerCaseFilterFactory}
+ */
+@Deprecated
+public class LowerCaseTokenizerFactory extends TokenizerFactory {
+
+  private final int maxTokenLen;
+
+  /**
+   * Creates a new LowerCaseTokenizerFactory
+   */
+  public LowerCaseTokenizerFactory(Map<String, String> args) {
+    super(args);
+    maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
+    if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
+      throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
+    }
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public LowerCaseTokenizer create(AttributeFactory factory) {
+    return new LowerCaseTokenizer(factory, maxTokenLen);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/24590722/solr/core/src/test-files/solr/collection1/conf/schema-deprecations.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-deprecations.xml b/solr/core/src/test-files/solr/collection1/conf/schema-deprecations.xml
new file mode 100644
index 0000000..9574754
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-deprecations.xml
@@ -0,0 +1,36 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+
+<schema name="schema-deprecations" version="1.6">
+
+  <types>
+    <fieldType name="lowertok" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.LowerCaseTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+    <fieldType name="string" class="solr.StrField"/>
+    <fieldType name="long" class="${solr.tests.LongFieldType}"/>
+  </types>
+
+  <fields>
+    <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
+    <field name="_version_" type="long" indexed="false" stored="false" docValues="true"/>
+    <field name="lowertext" type="lowertok" indexed="true"/>
+  </fields>
+
+</schema>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/24590722/solr/core/src/test/org/apache/solr/analysis/TestDeprecatedFilters.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/analysis/TestDeprecatedFilters.java b/solr/core/src/test/org/apache/solr/analysis/TestDeprecatedFilters.java
new file mode 100644
index 0000000..120fda1
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/analysis/TestDeprecatedFilters.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.BeforeClass;
+
+public class TestDeprecatedFilters extends SolrTestCaseJ4 {
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig-master.xml","schema-deprecations.xml");
+  }
+
+  public void testLowerCaseTokenizer() {
+    assertU(adoc("id", "1", "lowertext", "THIS IS A TEST"));
+    assertU(commit());
+    assertQ(req("lowertext:test"), "//result[@numFound=1]");
+  }
+
+}

[05/16] lucene-solr:jira/http2: LUCENE-8571: Don't block on FrozenBufferedUpdates#apply during IW#processEvents

Posted by da...@apache.org.

LUCENE-8571: Don't block on FrozenBufferedUpdates#apply during IW#processEvents

While indexing we try to apply frozen deletes packages concurrently
on indexing threads if necessary. This is done in an opaque way via
IndexWriter#processEvents. Yet, when we commit or refresh we have to
ensure we apply all frozen update packages before we return.
Today we execute the apply method in a blocking fashion which is unncessary
when we are in a IndexWriter#processEvents context, we block indexing
threads while they could just continue since it's already being applied.
We also might wait in BufferedUpdatesStream when we apply all necessary updates
were we can continue with other work instead of waiting.
This change also tries to apply the packages that are not currently applied
first in order to not unnecessarily block.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5f8855ee
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5f8855ee
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5f8855ee

Branch: refs/heads/jira/http2
Commit: 5f8855ee0bf57c8777775df8c10889eeee2e8d78
Parents: 08dd681
Author: Simon Willnauer <si...@apache.org>
Authored: Wed Nov 21 10:22:41 2018 +0100
Committer: Simon Willnauer <si...@apache.org>
Committed: Wed Nov 21 14:20:19 2018 +0100

----------------------------------------------------------------------
 .../lucene/index/BufferedUpdatesStream.java     |  11 +-
 .../lucene/index/FrozenBufferedUpdates.java     | 241 +++++++++++--------
 .../org/apache/lucene/index/IndexWriter.java    |   4 +-
 3 files changed, 147 insertions(+), 109 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5f8855ee/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java b/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
index 91e590c..9a669e0 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
@@ -222,14 +223,22 @@ final class BufferedUpdatesStream implements Accountable {
       infoStream.message("BD", "waitApply: " + waitFor.size() + " packets: " + waitFor);
     }
 
+    ArrayList<FrozenBufferedUpdates> pendingPackets = new ArrayList<>();
     long totalDelCount = 0;
     for (FrozenBufferedUpdates packet : waitFor) {
       // Frozen packets are now resolved, concurrently, by the indexing threads that
       // create them, by adding a DocumentsWriter.ResolveUpdatesEvent to the events queue,
       // but if we get here and the packet is not yet resolved, we resolve it now ourselves:
-      packet.apply(writer);
+      if (packet.tryApply(writer) == false) {
+        // if somebody else is currently applying it - move on to the next one and force apply below
+        pendingPackets.add(packet);
+      }
       totalDelCount += packet.totalDelCount;
     }
+    for (FrozenBufferedUpdates packet : pendingPackets) {
+      // now block on all the packets that were concurrently applied to ensure they are due before we continue.
+      packet.forceApply(writer);
+    }
 
     if (infoStream.isEnabled("BD")) {
       infoStream.message("BD",

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5f8855ee/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java
index 36834a3..bb84a79f1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java
@@ -30,6 +30,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.ReentrantLock;
 import java.util.function.IntConsumer;
 
 import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
@@ -83,6 +84,7 @@ final class FrozenBufferedUpdates {
 
   /** Counts down once all deletes/updates have been applied */
   public final CountDownLatch applied = new CountDownLatch(1);
+  private final ReentrantLock applyLock = new ReentrantLock();
 
   /** How many total documents were deleted/updated. */
   public long totalDelCount;
@@ -214,149 +216,173 @@ final class FrozenBufferedUpdates {
 
   /** Translates a frozen packet of delete term/query, or doc values
    *  updates, into their actual docIDs in the index, and applies the change.  This is a heavy
-   *  operation and is done concurrently by incoming indexing threads. */
+   *  operation and is done concurrently by incoming indexing threads.
+   *  This method will return immediately without blocking if another thread is currently
+   *  applying the package. In order to ensure the packet has been applied, {@link #forceApply(IndexWriter)}
+   *  must be called.
+   *  */
   @SuppressWarnings("try")
-  public synchronized void apply(IndexWriter writer) throws IOException {
-    if (applied.getCount() == 0) {
-      // already done
-      return;
+  boolean tryApply(IndexWriter writer) throws IOException {
+    if (applyLock.tryLock()) {
+      try {
+        forceApply(writer);
+        return true;
+      } finally {
+        applyLock.unlock();
+      }
     }
+    return false;
+  }
 
-    long startNS = System.nanoTime();
+  /** Translates a frozen packet of delete term/query, or doc values
+   *  updates, into their actual docIDs in the index, and applies the change.  This is a heavy
+   *  operation and is done concurrently by incoming indexing threads.
+   *  */
+  void forceApply(IndexWriter writer) throws IOException {
+    applyLock.lock();
+    try {
+      if (applied.getCount() == 0) {
+        // already done
+        return;
+      }
+      long startNS = System.nanoTime();
 
-    assert any();
+      assert any();
 
-    Set<SegmentCommitInfo> seenSegments = new HashSet<>();
+      Set<SegmentCommitInfo> seenSegments = new HashSet<>();
 
-    int iter = 0;
-    int totalSegmentCount = 0;
-    long totalDelCount = 0;
+      int iter = 0;
+      int totalSegmentCount = 0;
+      long totalDelCount = 0;
 
-    boolean finished = false;
+      boolean finished = false;
 
-    // Optimistic concurrency: assume we are free to resolve the deletes against all current segments in the index, despite that
-    // concurrent merges are running.  Once we are done, we check to see if a merge completed while we were running.  If so, we must retry
-    // resolving against the newly merged segment(s).  Eventually no merge finishes while we were running and we are done.
-    while (true) {
-      String messagePrefix;
-      if (iter == 0) {
-        messagePrefix = "";
-      } else {
-        messagePrefix = "iter " + iter;
-      }
+      // Optimistic concurrency: assume we are free to resolve the deletes against all current segments in the index, despite that
+      // concurrent merges are running.  Once we are done, we check to see if a merge completed while we were running.  If so, we must retry
+      // resolving against the newly merged segment(s).  Eventually no merge finishes while we were running and we are done.
+      while (true) {
+        String messagePrefix;
+        if (iter == 0) {
+          messagePrefix = "";
+        } else {
+          messagePrefix = "iter " + iter;
+        }
 
-      long iterStartNS = System.nanoTime();
+        long iterStartNS = System.nanoTime();
 
-      long mergeGenStart = writer.mergeFinishedGen.get();
+        long mergeGenStart = writer.mergeFinishedGen.get();
 
-      Set<String> delFiles = new HashSet<>();
-      BufferedUpdatesStream.SegmentState[] segStates;
+        Set<String> delFiles = new HashSet<>();
+        BufferedUpdatesStream.SegmentState[] segStates;
 
-      synchronized (writer) {
-        List<SegmentCommitInfo> infos = getInfosToApply(writer);
-        if (infos == null) {
-          break;
-        }
+        synchronized (writer) {
+          List<SegmentCommitInfo> infos = getInfosToApply(writer);
+          if (infos == null) {
+            break;
+          }
 
-        for (SegmentCommitInfo info : infos) {
-          delFiles.addAll(info.files());
-        }
+          for (SegmentCommitInfo info : infos) {
+            delFiles.addAll(info.files());
+          }
 
-        // Must open while holding IW lock so that e.g. segments are not merged
-        // away, dropped from 100% deletions, etc., before we can open the readers
-        segStates = openSegmentStates(writer, infos, seenSegments, delGen());
+          // Must open while holding IW lock so that e.g. segments are not merged
+          // away, dropped from 100% deletions, etc., before we can open the readers
+          segStates = openSegmentStates(writer, infos, seenSegments, delGen());
 
-        if (segStates.length == 0) {
+          if (segStates.length == 0) {
+
+            if (infoStream.isEnabled("BD")) {
+              infoStream.message("BD", "packet matches no segments");
+            }
+            break;
+          }
 
           if (infoStream.isEnabled("BD")) {
-            infoStream.message("BD", "packet matches no segments");
+            infoStream.message("BD", String.format(Locale.ROOT,
+                messagePrefix + "now apply del packet (%s) to %d segments, mergeGen %d",
+                this, segStates.length, mergeGenStart));
           }
-          break;
+
+          totalSegmentCount += segStates.length;
+
+          // Important, else IFD may try to delete our files while we are still using them,
+          // if e.g. a merge finishes on some of the segments we are resolving on:
+          writer.deleter.incRef(delFiles);
+        }
+
+        AtomicBoolean success = new AtomicBoolean();
+        long delCount;
+        try (Closeable finalizer = () -> finishApply(writer, segStates, success.get(), delFiles)) {
+          // don't hold IW monitor lock here so threads are free concurrently resolve deletes/updates:
+          delCount = apply(segStates);
+          success.set(true);
         }
 
+        // Since we just resolved some more deletes/updates, now is a good time to write them:
+        writer.writeSomeDocValuesUpdates();
+
+        // It's OK to add this here, even if the while loop retries, because delCount only includes newly
+        // deleted documents, on the segments we didn't already do in previous iterations:
+        totalDelCount += delCount;
+
         if (infoStream.isEnabled("BD")) {
           infoStream.message("BD", String.format(Locale.ROOT,
-                                                 messagePrefix + "now apply del packet (%s) to %d segments, mergeGen %d",
-                                                 this, segStates.length, mergeGenStart));
+              messagePrefix + "done inner apply del packet (%s) to %d segments; %d new deletes/updates; took %.3f sec",
+              this, segStates.length, delCount, (System.nanoTime() - iterStartNS) / 1000000000.));
         }
+        if (privateSegment != null) {
+          // No need to retry for a segment-private packet: the merge that folds in our private segment already waits for all deletes to
+          // be applied before it kicks off, so this private segment must already not be in the set of merging segments
 
-        totalSegmentCount += segStates.length;
+          break;
+        }
 
-        // Important, else IFD may try to delete our files while we are still using them,
-        // if e.g. a merge finishes on some of the segments we are resolving on:
-        writer.deleter.incRef(delFiles);
-      }
+        // Must sync on writer here so that IW.mergeCommit is not running concurrently, so that if we exit, we know mergeCommit will succeed
+        // in pulling all our delGens into a merge:
+        synchronized (writer) {
+          long mergeGenCur = writer.mergeFinishedGen.get();
 
-      AtomicBoolean success = new AtomicBoolean();
-      long delCount;
-      try (Closeable finalizer = () -> finishApply(writer, segStates, success.get(), delFiles)) {
-        // don't hold IW monitor lock here so threads are free concurrently resolve deletes/updates:
-        delCount = apply(segStates);
-        success.set(true);
-      }
+          if (mergeGenCur == mergeGenStart) {
 
-      // Since we just resolved some more deletes/updates, now is a good time to write them:
-      writer.writeSomeDocValuesUpdates();
+            // Must do this while still holding IW lock else a merge could finish and skip carrying over our updates:
 
-      // It's OK to add this here, even if the while loop retries, because delCount only includes newly
-      // deleted documents, on the segments we didn't already do in previous iterations:
-      totalDelCount += delCount;
+            // Record that this packet is finished:
+            writer.finished(this);
 
-      if (infoStream.isEnabled("BD")) {
-        infoStream.message("BD", String.format(Locale.ROOT,
-                                               messagePrefix + "done inner apply del packet (%s) to %d segments; %d new deletes/updates; took %.3f sec",
-                                               this, segStates.length, delCount, (System.nanoTime() - iterStartNS) / 1000000000.));
-      }
-      if (privateSegment != null) {
-        // No need to retry for a segment-private packet: the merge that folds in our private segment already waits for all deletes to
-        // be applied before it kicks off, so this private segment must already not be in the set of merging segments
-
-        break;
-      }
+            finished = true;
 
-      // Must sync on writer here so that IW.mergeCommit is not running concurrently, so that if we exit, we know mergeCommit will succeed
-      // in pulling all our delGens into a merge:
-      synchronized (writer) {
-        long mergeGenCur = writer.mergeFinishedGen.get();
+            // No merge finished while we were applying, so we are done!
+            break;
+          }
+        }
 
-        if (mergeGenCur == mergeGenStart) {
+        if (infoStream.isEnabled("BD")) {
+          infoStream.message("BD", messagePrefix + "concurrent merges finished; move to next iter");
+        }
 
-          // Must do this while still holding IW lock else a merge could finish and skip carrying over our updates:
-          
-          // Record that this packet is finished:
-          writer.finished(this);
+        // A merge completed while we were running.  In this case, that merge may have picked up some of the updates we did, but not
+        // necessarily all of them, so we cycle again, re-applying all our updates to the newly merged segment.
 
-          finished = true;
+        iter++;
+      }
 
-          // No merge finished while we were applying, so we are done!
-          break;
-        }
+      if (finished == false) {
+        // Record that this packet is finished:
+        writer.finished(this);
       }
 
       if (infoStream.isEnabled("BD")) {
-        infoStream.message("BD", messagePrefix + "concurrent merges finished; move to next iter");
+        String message = String.format(Locale.ROOT,
+            "done apply del packet (%s) to %d segments; %d new deletes/updates; took %.3f sec",
+            this, totalSegmentCount, totalDelCount, (System.nanoTime() - startNS) / 1000000000.);
+        if (iter > 0) {
+          message += "; " + (iter + 1) + " iters due to concurrent merges";
+        }
+        message += "; " + writer.getPendingUpdatesCount() + " packets remain";
+        infoStream.message("BD", message);
       }
-        
-      // A merge completed while we were running.  In this case, that merge may have picked up some of the updates we did, but not
-      // necessarily all of them, so we cycle again, re-applying all our updates to the newly merged segment.
-
-      iter++;
-    }
-
-    if (finished == false) {
-      // Record that this packet is finished:
-      writer.finished(this);
-    }
-        
-    if (infoStream.isEnabled("BD")) {
-      String message = String.format(Locale.ROOT,
-                                     "done apply del packet (%s) to %d segments; %d new deletes/updates; took %.3f sec",
-                                     this, totalSegmentCount, totalDelCount, (System.nanoTime() - startNS) / 1000000000.);
-      if (iter > 0) {
-        message += "; " + (iter+1) + " iters due to concurrent merges";
-      }
-      message += "; " + writer.getPendingUpdatesCount() + " packets remain";
-      infoStream.message("BD", message);
+    } finally {
+      applyLock.unlock();
     }
   }
 
@@ -411,6 +437,7 @@ final class FrozenBufferedUpdates {
 
   private void finishApply(IndexWriter writer, BufferedUpdatesStream.SegmentState[] segStates,
                            boolean success, Set<String> delFiles) throws IOException {
+    assert applyLock.isHeldByCurrentThread();
     synchronized (writer) {
 
       BufferedUpdatesStream.ApplyDeletesResult result;
@@ -441,8 +468,8 @@ final class FrozenBufferedUpdates {
 
   /** Applies pending delete-by-term, delete-by-query and doc values updates to all segments in the index, returning
    *  the number of new deleted or updated documents. */
-  private synchronized long apply(BufferedUpdatesStream.SegmentState[] segStates) throws IOException {
-
+  private long apply(BufferedUpdatesStream.SegmentState[] segStates) throws IOException {
+    assert applyLock.isHeldByCurrentThread();
     if (delGen == -1) {
       // we were not yet pushed
       throw new IllegalArgumentException("gen is not yet set; call BufferedUpdatesStream.push first");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5f8855ee/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 80d11c1..028554b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -2607,7 +2607,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
     // Do this as an event so it applies higher in the stack when we are not holding DocumentsWriterFlushQueue.purgeLock:
     eventQueue.add(w -> {
       try {
-        packet.apply(w);
+        // we call tryApply here since we don't want to block if a refresh or a flush is already applying the
+        // packet. The flush will retry this packet anyway to ensure all of them are applied
+        packet.tryApply(w);
       } catch (Throwable t) {
         try {
           w.onTragicEvent(t, "applyUpdatesPacket");

[12/16] lucene-solr:jira/http2: SOLR-5211: Always populate _root_ (if defined). And, small refactor: Clarified how _version_ is transferred from root to children.

Posted by da...@apache.org.

SOLR-5211:  Always populate _root_ (if defined).
And, small refactor: Clarified how _version_ is transferred from root to children.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/a346ba0d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/a346ba0d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/a346ba0d

Branch: refs/heads/jira/http2
Commit: a346ba0d3c371ec9f314c21fe67afeca64846cf0
Parents: d7b878e
Author: Moshe <mo...@mail.com>
Authored: Sun Nov 25 21:47:49 2018 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Sun Nov 25 21:47:49 2018 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   8 ++
 .../apache/solr/update/AddUpdateCommand.java    |  31 +++--
 .../solr/update/DirectUpdateHandler2.java       |  23 ++--
 .../solr/cloud/TestCloudPseudoReturnFields.java |   8 +-
 .../apache/solr/cloud/TestRandomFlRTGCloud.java |   4 +
 .../apache/solr/handler/tagger/TaggerTest.java  |   6 +-
 .../solr/search/TestPseudoReturnFields.java     |   9 +-
 .../test/org/apache/solr/search/TestReload.java |   6 +-
 .../org/apache/solr/update/RootFieldTest.java   | 125 +++++++++++++++++++
 .../update/processor/AtomicUpdatesTest.java     |  18 +--
 .../solr/client/solrj/SolrExampleTests.java     |  91 ++++++++++++++
 11 files changed, 289 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a346ba0d/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 9e63d66..1b5e1ad 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -72,6 +72,14 @@ New Features
 
 * SOLR-12593: The default configSet now includes an "ignored_*" dynamic field.  (David Smiley)
 
+Improvements
+----------------------
+
+* SOLR-5211: If _root_ is defined in the schema, it is now always populated automatically.  This allows documents with
+ children to be updated with a document that does not have children, whereas before it would break block-join queries.
+ If you don't use nested documents then _root_ can be removed from the schema.  (Dr Oleg Savrasov, Moshe Bla,
+ David Smiley, Mikhail Khludnev)
+
 Optimizations
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a346ba0d/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java b/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
index cfa937e..b644f73 100644
--- a/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
+++ b/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
@@ -95,11 +95,14 @@ public class AddUpdateCommand extends UpdateCommand {
    * Nested documents, if found, will cause an exception to be thrown.  Call {@link #getLuceneDocsIfNested()} for that.
    * Any changes made to the returned Document will not be reflected in the SolrInputDocument, or future calls to this
    * method.
-   * Note that the behavior of this is sensitive to {@link #isInPlaceUpdate()}.
-   */
+   * Note that the behavior of this is sensitive to {@link #isInPlaceUpdate()}.*/
    public Document getLuceneDocument() {
      final boolean ignoreNestedDocs = false; // throw an exception if found
-     return DocumentBuilder.toDocument(getSolrInputDocument(), req.getSchema(), isInPlaceUpdate(), ignoreNestedDocs);
+     SolrInputDocument solrInputDocument = getSolrInputDocument();
+     if (!isInPlaceUpdate() && getReq().getSchema().isUsableForChildDocs()) {
+       addRootField(solrInputDocument, getHashableId());
+     }
+     return DocumentBuilder.toDocument(solrInputDocument, req.getSchema(), isInPlaceUpdate(), ignoreNestedDocs);
    }
 
   /** Returns the indexed ID for this document.  The returned BytesRef is retained across multiple calls, and should not be modified. */
@@ -194,13 +197,14 @@ public class AddUpdateCommand extends UpdateCommand {
       return null; // caller should call getLuceneDocument() instead
     }
 
-    String rootId = getHashableId();
-
-    boolean isVersion = version != 0;
+    final String rootId = getHashableId();
+    final SolrInputField versionSif = solrDoc.get(CommonParams.VERSION_FIELD);
 
     for (SolrInputDocument sdoc : all) {
-      sdoc.setField(IndexSchema.ROOT_FIELD_NAME, rootId);
-      if(isVersion) sdoc.setField(CommonParams.VERSION_FIELD, version);
+      addRootField(sdoc, rootId);
+      if (versionSif != null) {
+        addVersionField(sdoc, versionSif);
+      }
       // TODO: if possible concurrent modification exception (if SolrInputDocument not cloned and is being forwarded to replicas)
       // then we could add this field to the generated lucene document instead.
     }
@@ -208,6 +212,17 @@ public class AddUpdateCommand extends UpdateCommand {
     return () -> all.stream().map(sdoc -> DocumentBuilder.toDocument(sdoc, req.getSchema())).iterator();
   }
 
+  private void addRootField(SolrInputDocument sdoc, String rootId) {
+    sdoc.setField(IndexSchema.ROOT_FIELD_NAME, rootId);
+  }
+
+  private void addVersionField(SolrInputDocument sdoc, SolrInputField versionSif) {
+    // Reordered delete-by-query assumes all documents have a version, see SOLR-10114
+    // all docs in hierarchy should have the same version.
+    // Either fetch the version from the root doc or compute it and propagate it.
+    sdoc.put(CommonParams.VERSION_FIELD, versionSif);
+  }
+
   private List<SolrInputDocument> flatten(SolrInputDocument root) {
     List<SolrInputDocument> unwrappedDocs = new ArrayList<>();
     flattenAnonymous(unwrappedDocs, root, true);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a346ba0d/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
index e64ee8a..660df06 100644
--- a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
+++ b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
@@ -42,6 +42,7 @@ import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.solr.cloud.ZkController;
 import org.apache.solr.common.SolrException;
@@ -319,9 +320,9 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
     RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
     try {
       IndexWriter writer = iw.get();
-      Iterable<Document> blockDocs = cmd.getLuceneDocsIfNested();
-      if (blockDocs != null) {
-        writer.addDocuments(blockDocs);
+      Iterable<Document> nestedDocs = cmd.getLuceneDocsIfNested();
+      if (nestedDocs != null) {
+        writer.addDocuments(nestedDocs);
       } else {
         writer.addDocument(cmd.getLuceneDocument());
       }
@@ -425,7 +426,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
       return;
     }
 
-    Term deleteTerm = new Term(idField.getName(), cmd.getIndexedId());
+    Term deleteTerm = getIdTerm(cmd.getIndexedId(), false);
     // SolrCore.verbose("deleteDocuments",deleteTerm,writer);
     RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
     try {
@@ -951,13 +952,13 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
 
     } else { // more normal path
 
-      Iterable<Document> blockDocs = cmd.getLuceneDocsIfNested();
-      boolean isBlock = blockDocs != null; // AKA nested child docs
-      Term idTerm = new Term(isBlock ? IndexSchema.ROOT_FIELD_NAME : idField.getName(), cmd.getIndexedId());
+      Iterable<Document> nestedDocs = cmd.getLuceneDocsIfNested();
+      boolean isNested = nestedDocs != null; // AKA nested child docs
+      Term idTerm = getIdTerm(cmd.getIndexedId(), isNested);
       Term updateTerm = hasUpdateTerm ? cmd.updateTerm : idTerm;
-      if (isBlock) {
+      if (isNested) {
         log.debug("updateDocuments({})", cmd);
-        writer.updateDocuments(updateTerm, blockDocs);
+        writer.updateDocuments(updateTerm, nestedDocs);
       } else {
         Document luceneDocument = cmd.getLuceneDocument();
         log.debug("updateDocument({})", cmd);
@@ -975,6 +976,10 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
     }
   }
 
+  private Term getIdTerm(BytesRef indexedId, boolean isNested) {
+    boolean useRootId = isNested || core.getLatestSchema().isUsableForChildDocs();
+    return new Term(useRootId ? IndexSchema.ROOT_FIELD_NAME : idField.getName(), indexedId);
+  }
 
   /////////////////////////////////////////////////////////////////////
   // SolrInfoBean stuff: Statistics and Module Info

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a346ba0d/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java
index f7e6756..31d69cf 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java
@@ -194,7 +194,7 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
       SolrDocumentList docs = assertSearch(params("q", "*:*", "rows", "10", "fl",fl));
       // shouldn't matter what doc we pick...
       for (SolrDocument doc : docs) {
-        assertEquals(fl + " => " + doc, 4, doc.size());
+        assertEquals(fl + " => " + doc, 5, doc.size());
         assertTrue(fl + " => " + doc, doc.getFieldValue("id") instanceof String);
         assertTrue(fl + " => " + doc, doc.getFieldValue("val_i") instanceof Integer);
         assertTrue(fl + " => " + doc, doc.getFieldValue("subject") instanceof String);
@@ -208,7 +208,7 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
     for (String fl : TestPseudoReturnFields.ALL_REAL_FIELDS) {
       for (int i : Arrays.asList(42, 43, 44, 45, 46, 99)) {
         SolrDocument doc = getRandClient(random()).getById(""+i, params("fl",fl));
-        assertEquals(fl + " => " + doc, 4, doc.size());
+        assertEquals(fl + " => " + doc, 5, doc.size());
         assertTrue(fl + " => " + doc, doc.getFieldValue("id") instanceof String);
         assertTrue(fl + " => " + doc, doc.getFieldValue("val_i") instanceof Integer);
         assertTrue(fl + " => " + doc, doc.getFieldValue("subject") instanceof String);
@@ -238,7 +238,7 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
       SolrDocumentList docs = assertSearch(params("q", "*:*", "rows", "10", "fl",fl));
       // shouldn't matter what doc we pick...
       for (SolrDocument doc : docs) {
-        assertEquals(fl + " => " + doc, 5, doc.size());
+        assertEquals(fl + " => " + doc, 6, doc.size());
         assertTrue(fl + " => " + doc, doc.getFieldValue("id") instanceof String);
         assertTrue(fl + " => " + doc, doc.getFieldValue("score") instanceof Float);
         assertTrue(fl + " => " + doc, doc.getFieldValue("val_i") instanceof Integer);
@@ -253,7 +253,7 @@ public class TestCloudPseudoReturnFields extends SolrCloudTestCase {
     for (String fl : TestPseudoReturnFields.SCORE_AND_REAL_FIELDS) {
       for (int i : Arrays.asList(42, 43, 44, 45, 46, 99)) {
         SolrDocument doc = getRandClient(random()).getById(""+i, params("fl",fl));
-        assertEquals(fl + " => " + doc, 4, doc.size());
+        assertEquals(fl + " => " + doc, 5, doc.size());
         assertTrue(fl + " => " + doc, doc.getFieldValue("id") instanceof String);
         assertTrue(fl + " => " + doc, doc.getFieldValue("val_i") instanceof Integer);
         assertTrue(fl + " => " + doc, doc.getFieldValue("subject") instanceof String);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a346ba0d/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java b/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java
index 5a9db8f..6969883 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java
@@ -70,6 +70,9 @@ public class TestRandomFlRTGCloud extends SolrCloudTestCase {
 
   /** Always included in fl so we can vet what doc we're looking at */
   private static final FlValidator ID_VALIDATOR = new SimpleFieldValueValidator("id");
+
+  /** Since nested documents are not tested, when _root_ is declared in schema, it is always the same as id */
+  private static final FlValidator ROOT_VALIDATOR = new RenameFieldValueValidator("id" , "_root_");
   
   /** 
    * Types of things we will randomly ask for in fl param, and validate in response docs.
@@ -352,6 +355,7 @@ public class TestRandomFlRTGCloud extends SolrCloudTestCase {
     
     final Set<FlValidator> validators = new LinkedHashSet<>();
     validators.add(ID_VALIDATOR); // always include id so we can be confident which doc we're looking at
+    validators.add(ROOT_VALIDATOR); // always added in a nested schema, with the same value as id
     addRandomFlValidators(random(), validators);
     FlValidator.addParams(validators, params);
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a346ba0d/solr/core/src/test/org/apache/solr/handler/tagger/TaggerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/tagger/TaggerTest.java b/solr/core/src/test/org/apache/solr/handler/tagger/TaggerTest.java
index 93b11b5..7a67b95 100644
--- a/solr/core/src/test/org/apache/solr/handler/tagger/TaggerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/tagger/TaggerTest.java
@@ -83,7 +83,8 @@ public class TaggerTest extends TaggerTestCase {
         "<result name=\"response\" numFound=\"1\" start=\"0\">\n" +
         "  <doc>\n" +
         "    <str name=\"id\">1</str>\n" +
-        "    <str name=\"name\">London Business School</str></doc>\n" +
+        "    <str name=\"name\">London Business School</str>\n" +
+        "    <str name=\"_root_\">1</str></doc>\n" +
         "</result>\n" +
         "</response>\n";
     assertEquals(expected, rspStr);
@@ -111,7 +112,8 @@ public class TaggerTest extends TaggerTestCase {
         "<result name=\"response\" numFound=\"1\" start=\"0\">\n" +
         "  <doc>\n" +
         "    <str name=\"id\">1</str>\n" +
-        "    <str name=\"name\">London Business School</str></doc>\n" +
+        "    <str name=\"name\">London Business School</str>\n" +
+        "    <str name=\"_root_\">1</str></doc>\n" +
         "</result>\n" +
         "</response>\n";
     assertEquals(expected, rspStr);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a346ba0d/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java b/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java
index 0a98734..1080354 100644
--- a/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java
+++ b/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java
@@ -126,7 +126,7 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 {
               ,"//result/doc/str[@name='ssto']"
               ,"//result/doc/str[@name='subject']"
               
-              ,"//result/doc[count(*)=4]"
+              ,"//result/doc[count(*)=5]"
               );
     }
   }
@@ -142,7 +142,7 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 {
                 ,"//doc/int[@name='val_i']"
                 ,"//doc/str[@name='ssto']"
                 ,"//doc/str[@name='subject']"
-                ,"//doc[count(*)=4]"
+                ,"//doc[count(*)=5]"
                 );
       }
     }
@@ -172,8 +172,7 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 {
               ,"//result/doc/str[@name='ssto']"
               ,"//result/doc/str[@name='subject']"
               ,"//result/doc/float[@name='score']"
-              
-              ,"//result/doc[count(*)=5]"
+              ,"//result/doc[count(*)=6]"
               );
     }
   }
@@ -190,7 +189,7 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 {
                 ,"//doc/int[@name='val_i']"
                 ,"//doc/str[@name='ssto']"
                 ,"//doc/str[@name='subject']"
-                ,"//doc[count(*)=4]"
+                ,"//doc[count(*)=5]"
                 );
       }
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a346ba0d/solr/core/src/test/org/apache/solr/search/TestReload.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestReload.java b/solr/core/src/test/org/apache/solr/search/TestReload.java
index 872abec..13d78fc 100644
--- a/solr/core/src/test/org/apache/solr/search/TestReload.java
+++ b/solr/core/src/test/org/apache/solr/search/TestReload.java
@@ -36,13 +36,13 @@ public class TestReload extends TestRTGBase {
 
     assertU(commit("softCommit","true"));   // should cause a RTG searcher to be opened
 
-    assertJQ(req("qt","/get","id","1")
+    assertJQ(req("qt","/get","id","1", "fl", "id,_version_")
         ,"=={'doc':{'id':'1','_version_':" + version + "}}"
     );
 
     h.reload();
 
-    assertJQ(req("qt","/get","id","1")
+    assertJQ(req("qt","/get","id","1", "fl", "id,_version_")
         ,"=={'doc':{'id':'1','_version_':" + version + "}}"
     );
 
@@ -76,7 +76,7 @@ public class TestReload extends TestRTGBase {
       if (rand.nextBoolean()) {
         // RTG should always be able to see the last version
         // System.out.println("!!! rtg");
-        assertJQ(req("qt","/get","id","1")
+        assertJQ(req("qt","/get","id","1", "fl", "id,_version_")
             ,"=={'doc':{'id':'1','_version_':" + version + "}}"
         );
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a346ba0d/solr/core/src/test/org/apache/solr/update/RootFieldTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/update/RootFieldTest.java b/solr/core/src/test/org/apache/solr/update/RootFieldTest.java
new file mode 100644
index 0000000..7d4ed08
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/update/RootFieldTest.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update;
+
+import java.util.List;
+
+import org.apache.solr.SolrJettyTestBase;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.CommonParams;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import static org.hamcrest.CoreMatchers.is;
+
+public class RootFieldTest extends SolrJettyTestBase {
+  private static boolean useRootSchema;
+  private static final String MESSAGE = "Update handler should create and process _root_ field " +
+      "unless there is no such a field in schema";
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  private static boolean expectRoot() {
+    return useRootSchema;
+  }
+
+  @BeforeClass
+  public static void beforeTest() throws Exception {
+    useRootSchema = random().nextBoolean();
+    // schema.xml declares _root_ field while schema11.xml does not.
+    String schema = useRootSchema ? "schema.xml" : "schema11.xml";
+    initCore("solrconfig.xml", schema);
+  }
+
+  @Test
+  public void testLegacyBlockProcessing() throws Exception
+  {
+    SolrClient client = getSolrClient();
+    client.deleteByQuery("*:*");// delete everything!
+
+    // Add child free doc
+    SolrInputDocument docToUpdate = new SolrInputDocument();
+    String docId = "11";
+    docToUpdate.addField( "id", docId);
+    docToUpdate.addField( "name", "child free doc" );
+    client.add(docToUpdate);
+    client.commit();
+
+    SolrQuery query = new SolrQuery();
+    query.setQuery( "*:*" );
+    query.set( CommonParams.FL, "id,name,_root_" );
+
+    SolrDocumentList results = client.query(query).getResults();
+    assertThat(results.getNumFound(), is(1L));
+    SolrDocument foundDoc = results.get( 0 );
+
+    // Check retrieved field values
+    assertThat(foundDoc.getFieldValue( "id" ), is(docId));
+    assertThat( ((List)foundDoc.getFieldValue( "name" )).get(0), is("child free doc"));
+
+    String expectedRootValue = expectRoot() ? docId : null;
+    assertThat(MESSAGE, foundDoc.getFieldValue( "_root_" ), is(expectedRootValue));
+
+    // Update the doc
+    docToUpdate.setField( "name", "updated doc" );
+    client.add(docToUpdate);
+    client.commit();
+
+    results = client.query(query).getResults();
+    assertEquals( 1, results.getNumFound() );
+    foundDoc = results.get( 0 );
+
+    // Check updated field values
+    assertThat(foundDoc.getFieldValue( "id" ), is(docId));
+    assertThat( ((List)foundDoc.getFieldValue( "name" )).get(0), is("updated doc"));
+    assertThat(MESSAGE, foundDoc.getFieldValue( "_root_" ), is(expectedRootValue));
+  }
+
+  @Test
+  public void testUpdateWithChildDocs() throws Exception {
+    SolrClient client = getSolrClient();
+    client.deleteByQuery("*:*");// delete everything!
+
+    // Add child free doc
+    SolrInputDocument docToUpdate = new SolrInputDocument();
+    String docId = "11";
+    docToUpdate.addField( "id", docId);
+    docToUpdate.addField( "name", "parent doc with a child" );
+    SolrInputDocument child = new SolrInputDocument();
+    child.addField("id", "111");
+    child.addField("name", "child doc");
+    docToUpdate.addChildDocument(child);
+    if (!useRootSchema) {
+      thrown.expect(SolrException.class);
+      thrown.expectMessage("Unable to index docs with children:" +
+          " the schema must include definitions for both a uniqueKey field" +
+          " and the '_root_' field, using the exact same fieldType");
+    }
+    client.add(docToUpdate);
+    client.commit();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a346ba0d/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
index 43a84e6..f72fd67 100644
--- a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
+++ b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java
@@ -1215,7 +1215,7 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
               , "//doc/long[@name='_version_']"
               , "//doc/date[@name='timestamp']"
               , "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
-              , "count(//doc/*)=7"
+              , "count(//doc/*)=8"
               );
 
       // do atomic update
@@ -1230,7 +1230,7 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
               , "//doc/long[@name='_version_']"
               , "//doc/date[@name='timestamp']"
               , "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
-              , "count(//doc/*)=7"
+              , "count(//doc/*)=8"
               );
 
       assertU(commit());
@@ -1244,7 +1244,7 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
               , "//doc/long[@name='_version_']"
               , "//doc/date[@name='timestamp']"
               , "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
-              , "count(//doc/*)=7"
+              , "count(//doc/*)=8"
               );
     }
     
@@ -1267,7 +1267,7 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
               , "//doc/long[@name='_version_']"
               , "//doc/date[@name='timestamp']"
               , "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
-              , "count(//doc/*)=6"
+              , "count(//doc/*)=7"
               );
       // do atomic update
       assertU(adoc(sdoc("id", "7", fieldToUpdate, ImmutableMap.of("inc", -555))));
@@ -1281,7 +1281,7 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
               , "//doc/long[@name='_version_']"
               , "//doc/date[@name='timestamp']"
               , "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
-              , "count(//doc/*)=6"
+              , "count(//doc/*)=7"
               );
 
       // diff doc where we check that we can overwrite the default value
@@ -1296,7 +1296,7 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
               , "//doc/long[@name='_version_']"
               , "//doc/date[@name='timestamp']"
               , "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
-              , "count(//doc/*)=6"
+              , "count(//doc/*)=7"
               );
       // do atomic update
       assertU(adoc(sdoc("id", "8", fieldToUpdate, ImmutableMap.of("inc", -555))));
@@ -1310,7 +1310,7 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
               , "//doc/long[@name='_version_']"
               , "//doc/date[@name='timestamp']"
               , "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
-              , "count(//doc/*)=6"
+              , "count(//doc/*)=7"
               );
       
       assertU(commit());
@@ -1325,7 +1325,7 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
               , "//doc/long[@name='_version_']"
               , "//doc/date[@name='timestamp']"
               , "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
-              , "count(//doc/*)=6"
+              , "count(//doc/*)=7"
               );
       assertQ(fieldToUpdate + ": doc8 post commit RTG"
               , req("qt", "/get", "id", "8")
@@ -1337,7 +1337,7 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
               , "//doc/long[@name='_version_']"
               , "//doc/date[@name='timestamp']"
               , "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
-              , "count(//doc/*)=6"
+              , "count(//doc/*)=7"
               );
     }
     

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a346ba0d/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
index b83be83..807757c 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
@@ -69,6 +69,7 @@ import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.Pair;
+import org.junit.Before;
 import org.junit.Test;
 import org.noggit.JSONParser;
 import org.slf4j.Logger;
@@ -76,6 +77,7 @@ import org.slf4j.LoggerFactory;
 
 import static org.apache.solr.common.params.UpdateParams.ASSUME_CONTENT_TYPE;
 import static org.junit.internal.matchers.StringContains.containsString;
+import static org.hamcrest.CoreMatchers.is;
 
 /**
  * This should include tests against the example solr config
@@ -93,6 +95,15 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
   static {
     ignoreException("uniqueKey");
   }
+
+  @Before
+  public void emptyCollection() throws Exception {
+    SolrClient client = getSolrClient();
+    // delete everything!
+    client.deleteByQuery("*:*");
+    client.commit();
+  }
+
   /**
    * query the example
    */
@@ -2179,4 +2190,84 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
     }
     return sdoc;
   }
+
+  @Test
+  public void testAddChildToChildFreeDoc() throws IOException, SolrServerException, IllegalArgumentException, IllegalAccessException, SecurityException, NoSuchFieldException {
+    SolrClient client = getSolrClient();
+    client.deleteByQuery("*:*");
+
+    SolrInputDocument docToUpdate = new SolrInputDocument();
+    docToUpdate.addField("id", "p0");
+    docToUpdate.addField("title_s", "i am a child free doc");
+    client.add(docToUpdate);
+    client.commit();
+
+    SolrQuery q = new SolrQuery("*:*");
+    q.set( CommonParams.FL, "id,title_s" );
+    q.addSort("id", SolrQuery.ORDER.desc);
+
+    SolrDocumentList results = client.query(q).getResults();
+    assertThat(results.getNumFound(), is(1L));
+    SolrDocument foundDoc = results.get(0);
+    assertThat(foundDoc.getFieldValue("title_s"), is("i am a child free doc"));
+
+    // Rewrite child free doc
+    docToUpdate.setField("title_s", "i am a parent");
+
+    SolrInputDocument child = new SolrInputDocument();
+    child.addField("id", "c0");
+    child.addField("title_s", "i am a child");
+
+    docToUpdate.addChildDocument(child);
+
+    client.add(docToUpdate);
+    client.commit();
+
+    results = client.query(q).getResults();
+
+    assertThat(results.getNumFound(), is(2L));
+    foundDoc = results.get(0);
+    assertThat(foundDoc.getFieldValue("title_s"), is("i am a parent"));
+    foundDoc = results.get(1);
+    assertThat(foundDoc.getFieldValue("title_s"), is("i am a child"));
+  }
+
+  @Test
+  public void testDeleteParentDoc() throws IOException, SolrServerException, IllegalArgumentException, IllegalAccessException, SecurityException, NoSuchFieldException {
+    SolrClient client = getSolrClient();
+    client.deleteByQuery("*:*");
+
+    SolrInputDocument docToDelete = new SolrInputDocument();
+    docToDelete.addField("id", "p0");
+    docToDelete.addField("title_s", "parent doc");
+
+    SolrInputDocument child = new SolrInputDocument();
+    child.addField("id", "c0");
+    child.addField("title_s", "i am a child 0");
+    docToDelete.addChildDocument(child);
+
+    child = new SolrInputDocument();
+    child.addField("id", "c1");
+    child.addField("title_s", "i am a child 1");
+    docToDelete.addChildDocument(child);
+
+    child = new SolrInputDocument();
+    child.addField("id", "c2");
+    child.addField("title_s", "i am a child 2");
+    docToDelete.addChildDocument(child);
+
+    client.add(docToDelete);
+    client.commit();
+
+    SolrQuery q = new SolrQuery("*:*");
+    SolrDocumentList results = client.query(q).getResults();
+    assertThat(results.getNumFound(), is(4L));
+
+    client.deleteById("p0");
+    client.commit();
+
+    results = client.query(q).getResults();
+    assertThat("All the children are expected to be deleted together with parent",
+        results.getNumFound(), is(0L));
+  }
 }