You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2016/12/14 11:20:05 UTC

lucene-solr:branch_6x: Revert "LUCENE-7590: add DocValuesStatsCollector"

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 43f4f7a27 -> e09ef681e


Revert "LUCENE-7590: add DocValuesStatsCollector"

This reverts commit 43f4f7a279553913aadfdd684d9cdcff0a5f4220.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e09ef681
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e09ef681
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e09ef681

Branch: refs/heads/branch_6x
Commit: e09ef681e4d36adb8987ca0cda6bcb3221830102
Parents: 43f4f7a
Author: Shai Erera <sh...@apache.org>
Authored: Wed Dec 14 13:18:31 2016 +0200
Committer: Shai Erera <sh...@apache.org>
Committed: Wed Dec 14 13:18:31 2016 +0200

----------------------------------------------------------------------
 .../apache/lucene/search/DocValuesStats.java    | 165 ------------------
 .../lucene/search/DocValuesStatsCollector.java  |  64 -------
 .../search/TestDocValuesStatsCollector.java     | 166 -------------------
 3 files changed, 395 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e09ef681/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
----------------------------------------------------------------------
diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
deleted file mode 100644
index fad9f97..0000000
--- a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search;
-
-import java.io.IOException;
-
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.NumericDocValues;
-
-/** Holds statistics for a DocValues field. */
-public abstract class DocValuesStats<T> {
-
-  private int missing = 0;
-  private int count = 0;
-
-  protected final String field;
-
-  protected T min;
-  protected T max;
-
-  protected DocValuesStats(String field, T initialMin, T initialMax) {
-    this.field = field;
-    this.min = initialMin;
-    this.max = initialMax;
-  }
-
-  /**
-   * Called after #{@link DocValuesStats#accumulate(int)} was processed and verified that the document has a value for
-   * the field. Implementations should update the statistics based on the value of the current document.
-   *
-   * @param count
-   *          the updated number of documents with value for this field.
-   */
-  protected abstract void doAccumulate(int count) throws IOException;
-
-  /**
-   * Initializes this object with the given reader context. Returns whether stats can be computed for this segment (i.e.
-   * it does have the requested DocValues field).
-   */
-  protected abstract boolean init(LeafReaderContext contxt) throws IOException;
-
-  /** Returns whether the given document has a value for the requested DocValues field. */
-  protected abstract boolean hasValue(int doc) throws IOException;
-
-  final void accumulate(int doc) throws IOException {
-    if (hasValue(doc)) {
-      ++count;
-      doAccumulate(count);
-    } else {
-      ++missing;
-    }
-  }
-
-  final void addMissing() {
-    ++missing;
-  }
-
-  /** The field for which these stats were computed. */
-  public final String field() {
-    return field;
-  }
-
-  /** The number of documents which have a value of the field. */
-  public final int count() {
-    return count;
-  }
-
-  /** The number of documents which do not have a value of the field. */
-  public final int missing() {
-    return missing;
-  }
-
-  /** The minimum value of the field. Undefined when {@link #count} is zero. */
-  public final T min() {
-    return min;
-  }
-
-  /** The maximum value of the field. Undefined when {@link #count} is zero. */
-  public final T max() {
-    return max;
-  }
-
-  /** Holds statistics for a numeric DocValues field. */
-  public static abstract class NumericDocValuesStats<T extends Number> extends DocValuesStats<T> {
-
-    protected double mean = 0.0;
-
-    protected NumericDocValues ndv;
-
-    protected NumericDocValuesStats(String field, T initialMin, T initialMax) {
-      super(field, initialMin, initialMax);
-    }
-
-    @Override
-    protected final boolean init(LeafReaderContext contxt) throws IOException {
-      ndv = contxt.reader().getNumericDocValues(field);
-      return ndv != null;
-    }
-
-    @Override
-    protected boolean hasValue(int doc) throws IOException {
-      return ndv.advanceExact(doc);
-    }
-
-    /** The mean of all values of the field. Undefined when {@link #count} is zero. */
-    public final double mean() {
-      return mean;
-    }
-  }
-
-  /** Holds DocValues statistics for a numeric field storing {@code long} values. */
-  public static final class LongDocValuesStats extends NumericDocValuesStats<Long> {
-
-    public LongDocValuesStats(String description) {
-      super(description, Long.MAX_VALUE, Long.MIN_VALUE);
-    }
-
-    @Override
-    protected void doAccumulate(int count) throws IOException {
-      long val = ndv.longValue();
-      if (val > max) {
-        max = val;
-      }
-      if (val < min) {
-        min = val;
-      }
-      mean += (val - mean) / count;
-    }
-  }
-
-  /** Holds DocValues statistics for a numeric field storing {@code double} values. */
-  public static final class DoubleDocValuesStats extends NumericDocValuesStats<Double> {
-
-    public DoubleDocValuesStats(String description) {
-      super(description, Double.MAX_VALUE, Double.MIN_VALUE);
-    }
-
-    @Override
-    protected void doAccumulate(int count) throws IOException {
-      double val = Double.longBitsToDouble(ndv.longValue());
-      if (Double.compare(val, max) > 0) {
-        max = val;
-      }
-      if (Double.compare(val, min) < 0) {
-        min = val;
-      }
-      mean += (val - mean) / count;
-    }
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e09ef681/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java
----------------------------------------------------------------------
diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java
deleted file mode 100644
index 2b1fa4f..0000000
--- a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search;
-
-import java.io.IOException;
-
-import org.apache.lucene.index.LeafReaderContext;
-
-/** A {@link Collector} which computes statistics for a DocValues field. */
-public class DocValuesStatsCollector implements Collector {
-
-  private final DocValuesStats<?> stats;
-
-  /** Creates a collector to compute statistics for a DocValues field using the given {@code stats}. */
-  public DocValuesStatsCollector(DocValuesStats<?> stats) {
-    this.stats = stats;
-  }
-
-  @Override
-  public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
-    boolean shouldProcess = stats.init(context);
-    if (!shouldProcess) {
-      // Stats cannot be computed for this segment, therefore consider all matching documents as a 'miss'. 
-      return new LeafCollector() {
-        @Override public void setScorer(Scorer scorer) throws IOException {}
-
-        @Override
-        public void collect(int doc) throws IOException {
-          // All matching documents in this reader are missing a value
-          stats.addMissing();
-        }
-      };
-    }
-
-    return new LeafCollector() {
-      @Override public void setScorer(Scorer scorer) throws IOException {}
-
-      @Override
-      public void collect(int doc) throws IOException {
-        stats.accumulate(doc);
-      }
-    };
-  }
-
-  @Override
-  public boolean needsScores() {
-    return false;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e09ef681/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
----------------------------------------------------------------------
diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java b/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
deleted file mode 100644
index 65f82e6..0000000
--- a/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.stream.DoubleStream;
-import java.util.stream.LongStream;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.DoubleDocValuesField;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.DocValuesStats.DoubleDocValuesStats;
-import org.apache.lucene.search.DocValuesStats.LongDocValuesStats;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-
-/** Unit tests for {@link DocValuesStatsCollector}. */
-public class TestDocValuesStatsCollector extends LuceneTestCase {
-
-  public void testNoDocsWithField() throws IOException {
-    try (Directory dir = newDirectory();
-        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
-      int numDocs = TestUtil.nextInt(random(), 1, 100);
-      for (int i = 0; i < numDocs; i++) {
-        indexWriter.addDocument(new Document());
-      }
-
-      try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
-        IndexSearcher searcher = new IndexSearcher(reader);
-        LongDocValuesStats stats = new LongDocValuesStats("foo");
-        searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
-
-        assertEquals(0, stats.count());
-        assertEquals(numDocs, stats.missing());
-      }
-    }
-  }
-
-  public void testRandomDocsWithLongValues() throws IOException {
-    try (Directory dir = newDirectory();
-        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
-      String field = "numeric";
-      int numDocs = TestUtil.nextInt(random(), 1, 100);
-      long[] docValues = new long[numDocs];
-      int nextVal = 1;
-      for (int i = 0; i < numDocs; i++) {
-        Document doc = new Document();
-        if (random().nextBoolean()) { // not all documents have a value
-          doc.add(new NumericDocValuesField(field, nextVal));
-          doc.add(new StringField("id", "doc" + i, Store.NO));
-          docValues[i] = nextVal;
-          ++nextVal;
-        }
-        indexWriter.addDocument(doc);
-      }
-
-      // 20% of cases delete some docs
-      if (random().nextDouble() < 0.2) {
-        for (int i = 0; i < numDocs; i++) {
-          if (random().nextBoolean()) {
-            indexWriter.deleteDocuments(new Term("id", "doc" + i));
-            docValues[i] = 0;
-          }
-        }
-      }
-
-      try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
-        IndexSearcher searcher = new IndexSearcher(reader);
-        LongDocValuesStats stats = new LongDocValuesStats(field);
-        searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
-
-        int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
-        assertEquals(expCount, stats.count());
-        assertEquals(getZeroValues(docValues).count() - reader.numDeletedDocs(), stats.missing());
-        if (stats.count() > 0) {
-          assertEquals(getPositiveValues(docValues).max().getAsLong(), stats.max().longValue());
-          assertEquals(getPositiveValues(docValues).min().getAsLong(), stats.min().longValue());
-          assertEquals(getPositiveValues(docValues).average().getAsDouble(), stats.mean(), 0.00001);
-        }
-      }
-    }
-  }
-
-  public void testRandomDocsWithDoubleValues() throws IOException {
-    try (Directory dir = newDirectory();
-        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
-      String field = "numeric";
-      int numDocs = TestUtil.nextInt(random(), 1, 100);
-      double[] docValues = new double[numDocs];
-      double nextVal = 1.0;
-      for (int i = 0; i < numDocs; i++) {
-        Document doc = new Document();
-        if (random().nextBoolean()) { // not all documents have a value
-          doc.add(new DoubleDocValuesField(field, nextVal));
-          doc.add(new StringField("id", "doc" + i, Store.NO));
-          docValues[i] = nextVal;
-          ++nextVal;
-        }
-        indexWriter.addDocument(doc);
-      }
-
-      // 20% of cases delete some docs
-      if (random().nextDouble() < 0.2) {
-        for (int i = 0; i < numDocs; i++) {
-          if (random().nextBoolean()) {
-            indexWriter.deleteDocuments(new Term("id", "doc" + i));
-            docValues[i] = 0;
-          }
-        }
-      }
-
-      try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
-        IndexSearcher searcher = new IndexSearcher(reader);
-        DoubleDocValuesStats stats = new DoubleDocValuesStats(field);
-        searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
-
-        int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
-        assertEquals(expCount, stats.count());
-        assertEquals(getZeroValues(docValues).count() - reader.numDeletedDocs(), stats.missing());
-        if (stats.count() > 0) {
-          assertEquals(getPositiveValues(docValues).max().getAsDouble(), stats.max().doubleValue(), 0.00001);
-          assertEquals(getPositiveValues(docValues).min().getAsDouble(), stats.min().doubleValue(), 0.00001);
-          assertEquals(getPositiveValues(docValues).average().getAsDouble(), stats.mean(), 0.00001);
-        }
-      }
-    }
-  }
-
-  private static LongStream getPositiveValues(long[] docValues) {
-    return Arrays.stream(docValues).filter(v -> v > 0);
-  }
-
-  private static DoubleStream getPositiveValues(double[] docValues) {
-    return Arrays.stream(docValues).filter(v -> v > 0);
-  }
-
-  private static LongStream getZeroValues(long[] docValues) {
-    return Arrays.stream(docValues).filter(v -> v == 0);
-  }
-
-  private static DoubleStream getZeroValues(double[] docValues) {
-    return Arrays.stream(docValues).filter(v -> v == 0);
-  }
-
-}