You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/11/08 12:32:23 UTC
svn commit: r1637544 [3/6] - in /lucene/dev/branches/lucene6005: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/codecs/perfield/ lucene/core/src/java/org/apache/lucene/document/ lucene/core/src/java/org/apache/lucene/index/ lucene/core/...

Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java Sat Nov  8 11:32:18 2014
@@ -31,14 +31,14 @@ import org.apache.lucene.util.TestUtil;
 
 public class TestCompiledAutomaton extends LuceneTestCase {
 
-  private CompiledAutomaton build(String... strings) {
+  private CompiledAutomaton build(int maxDeterminizedStates, String... strings) {
     final List<BytesRef> terms = new ArrayList<>();
     for(String s : strings) {
       terms.add(new BytesRef(s));
     }
     Collections.sort(terms);
     final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
-    return new CompiledAutomaton(a, true, false);
+    return new CompiledAutomaton(a, true, false, maxDeterminizedStates);
   }
 
   private void testFloor(CompiledAutomaton c, String input, String expected) {
@@ -53,8 +53,8 @@ public class TestCompiledAutomaton exten
     }
   }
 
-  private void testTerms(String[] terms) throws Exception {
-    final CompiledAutomaton c = build(terms);
+  private void testTerms(int maxDeterminizedStates, String[] terms) throws Exception {
+    final CompiledAutomaton c = build(maxDeterminizedStates, terms);
     final BytesRef[] termBytes = new BytesRef[terms.length];
     for(int idx=0;idx<terms.length;idx++) {
       termBytes[idx] = new BytesRef(terms[idx]);
@@ -100,7 +100,7 @@ public class TestCompiledAutomaton exten
     while(terms.size() != numTerms) {
       terms.add(randomString());
     }
-    testTerms(terms.toArray(new String[terms.size()]));
+    testTerms(numTerms * 100, terms.toArray(new String[terms.size()]));
   }
 
   private String randomString() {
@@ -109,7 +109,8 @@ public class TestCompiledAutomaton exten
   }
 
   public void testBasic() throws Exception {
-    CompiledAutomaton c = build("fob", "foo", "goo");
+    CompiledAutomaton c = build(Operations.DEFAULT_MAX_DETERMINIZED_STATES,
+      "fob", "foo", "goo");
     testFloor(c, "goo", "goo");
     testFloor(c, "ga", "foo");
     testFloor(c, "g", "foo");

Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java Sat Nov  8 11:32:18 2014
@@ -19,6 +19,8 @@ package org.apache.lucene.util.automaton
 
 import org.apache.lucene.util.LuceneTestCase;
 
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
 /**
  * Not completely thorough, but tries to test determinism correctness
  * somewhat randomly.
@@ -39,29 +41,32 @@ public class TestDeterminism extends Luc
     for (int i = 0; i < num; i++) {
       Automaton a = AutomatonTestUtil.randomAutomaton(random());
       a = AutomatonTestUtil.determinizeSimple(a);
-      Automaton b = Operations.determinize(a);
+      Automaton b = Operations.determinize(a, Integer.MAX_VALUE);
       // TODO: more verifications possible?
       assertTrue(Operations.sameLanguage(a, b));
     }
   }
   
   private static void assertAutomaton(Automaton a) {
-    a = Operations.determinize(Operations.removeDeadStates(a));
+    a = Operations.determinize(Operations.removeDeadStates(a), DEFAULT_MAX_DETERMINIZED_STATES);
 
     // complement(complement(a)) = a
-    Automaton equivalent = Operations.complement(Operations.complement(a));
+    Automaton equivalent = Operations.complement(Operations.complement(a,
+      DEFAULT_MAX_DETERMINIZED_STATES), DEFAULT_MAX_DETERMINIZED_STATES);
     assertTrue(Operations.sameLanguage(a, equivalent));
     
     // a union a = a
-    equivalent = Operations.determinize(Operations.removeDeadStates(Operations.union(a, a)));
+    equivalent = Operations.determinize(Operations.removeDeadStates(Operations.union(a, a)),
+      DEFAULT_MAX_DETERMINIZED_STATES);
     assertTrue(Operations.sameLanguage(a, equivalent));
     
     // a intersect a = a
-    equivalent = Operations.determinize(Operations.removeDeadStates(Operations.intersection(a, a)));
+    equivalent = Operations.determinize(Operations.removeDeadStates(Operations.intersection(a, a)),
+      DEFAULT_MAX_DETERMINIZED_STATES);
     assertTrue(Operations.sameLanguage(a, equivalent));
     
     // a minus a = empty
-    Automaton empty = Operations.minus(a, a);
+    Automaton empty = Operations.minus(a, a, DEFAULT_MAX_DETERMINIZED_STATES);
     assertTrue(Operations.isEmpty(empty));
     
     // as long as don't accept the empty string
@@ -70,7 +75,8 @@ public class TestDeterminism extends Luc
       //System.out.println("test " + a);
       Automaton optional = Operations.optional(a);
       //System.out.println("optional " + optional);
-      equivalent = Operations.minus(optional, Automata.makeEmptyString());
+      equivalent = Operations.minus(optional, Automata.makeEmptyString(),
+        DEFAULT_MAX_DETERMINIZED_STATES);
       //System.out.println("equiv " + equivalent);
       assertTrue(Operations.sameLanguage(a, equivalent));
     }

Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java Sat Nov  8 11:32:18 2014
@@ -50,12 +50,12 @@ public class TestDeterminizeLexicon exte
   public void assertLexicon() throws Exception {
     Collections.shuffle(automata, random());
     Automaton lex = Operations.union(automata);
-    lex = Operations.determinize(lex);
+    lex = Operations.determinize(lex, 1000000);
     assertTrue(Operations.isFinite(lex));
     for (String s : terms) {
       assertTrue(Operations.run(lex, s));
     }
-    final ByteRunAutomaton lexByte = new ByteRunAutomaton(lex);
+    final ByteRunAutomaton lexByte = new ByteRunAutomaton(lex, false, 1000000);
     for (String s : terms) {
       byte bytes[] = s.getBytes(StandardCharsets.UTF_8);
       assertTrue(lexByte.run(bytes, 0, bytes.length));

Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java Sat Nov  8 11:32:18 2014
@@ -22,6 +22,8 @@ import java.util.List;
 
 import org.apache.lucene.util.LuceneTestCase;
 
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
 public class TestLevenshteinAutomata extends LuceneTestCase {
  
   public void testLev0() throws Exception {
@@ -121,11 +123,11 @@ public class TestLevenshteinAutomata ext
   private Automaton naiveLev1(String s) {
     Automaton a = Automata.makeString(s);
     a = Operations.union(a, insertionsOf(s));
-    a = MinimizationOperations.minimize(a);
+    a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
     a = Operations.union(a, deletionsOf(s));
-    a = MinimizationOperations.minimize(a);
+    a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
     a = Operations.union(a, substitutionsOf(s));
-    a = MinimizationOperations.minimize(a);
+    a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
     
     return a;
   }
@@ -137,7 +139,7 @@ public class TestLevenshteinAutomata ext
   private Automaton naiveLev1T(String s) {
     Automaton a = naiveLev1(s);
     a = Operations.union(a, transpositionsOf(s));
-    a = MinimizationOperations.minimize(a);
+    a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
     return a;
   }
   
@@ -156,7 +158,7 @@ public class TestLevenshteinAutomata ext
     }
     
     Automaton a = Operations.union(list);
-    a = MinimizationOperations.minimize(a);
+    a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
     return a;
   }
   
@@ -174,7 +176,7 @@ public class TestLevenshteinAutomata ext
     }
     
     Automaton a = Operations.union(list);
-    a = MinimizationOperations.minimize(a);
+    a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
     return a;
   }
   
@@ -193,7 +195,7 @@ public class TestLevenshteinAutomata ext
     }
     
     Automaton a = Operations.union(list);
-    a = MinimizationOperations.minimize(a);
+    a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
     return a;
   }
   
@@ -218,7 +220,7 @@ public class TestLevenshteinAutomata ext
       }
     }
     Automaton a = Operations.union(list);
-    a = MinimizationOperations.minimize(a);
+    a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
     return a;
   }
   

Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java Sat Nov  8 11:32:18 2014
@@ -28,8 +28,10 @@ public class TestMinimize extends Lucene
     int num = atLeast(200);
     for (int i = 0; i < num; i++) {
       Automaton a = AutomatonTestUtil.randomAutomaton(random());
-      Automaton la = Operations.determinize(Operations.removeDeadStates(a));
-      Automaton lb = MinimizationOperations.minimize(a);
+      Automaton la = Operations.determinize(Operations.removeDeadStates(a),
+        Integer.MAX_VALUE);
+      Automaton lb = MinimizationOperations.minimize(a,
+        Integer.MAX_VALUE);
       assertTrue(Operations.sameLanguage(la, lb));
     }
   }
@@ -42,7 +44,8 @@ public class TestMinimize extends Lucene
     for (int i = 0; i < num; i++) {
       Automaton a = AutomatonTestUtil.randomAutomaton(random());
       a = AutomatonTestUtil.minimizeSimple(a);
-      Automaton b = MinimizationOperations.minimize(a);
+      Automaton b = MinimizationOperations.minimize(a,
+        Integer.MAX_VALUE);
       assertTrue(Operations.sameLanguage(a, b));
       assertEquals(a.getNumStates(), b.getNumStates());
       int numStates = a.getNumStates();
@@ -62,6 +65,6 @@ public class TestMinimize extends Lucene
   
   /** n^2 space usage in Hopcroft minimization? */
   public void testMinimizeHuge() {
-    new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton();
+    new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton(1000000);
   }
 }

Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/util/automaton/TestOperations.java Sat Nov  8 11:32:18 2014
@@ -24,6 +24,8 @@ import org.apache.lucene.util.fst.Util;
 
 import com.carrotsearch.randomizedtesting.generators.RandomInts;
 
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
 public class TestOperations extends LuceneTestCase {
   /** Test string union. */
   public void testStringUnion() {
@@ -51,7 +53,8 @@ public class TestOperations extends Luce
     for (BytesRef bref : strings) {
       eachIndividual[i++] = Automata.makeString(bref.utf8ToString());
     }
-    return Operations.determinize(Operations.union(Arrays.asList(eachIndividual)));
+    return Operations.determinize(Operations.union(Arrays.asList(eachIndividual)),
+      DEFAULT_MAX_DETERMINIZED_STATES);
   }
 
   /** Test concatenation with empty language returns empty */
@@ -71,12 +74,12 @@ public class TestOperations extends Luce
     Automaton concat1 = Operations.concatenate(expandedSingleton, nfa);
     Automaton concat2 = Operations.concatenate(singleton, nfa);
     assertFalse(concat2.isDeterministic());
-    assertTrue(Operations.sameLanguage(Operations.determinize(concat1),
-                                       Operations.determinize(concat2)));
-    assertTrue(Operations.sameLanguage(Operations.determinize(nfa),
-                                       Operations.determinize(concat1)));
-    assertTrue(Operations.sameLanguage(Operations.determinize(nfa),
-                                       Operations.determinize(concat2)));
+    assertTrue(Operations.sameLanguage(Operations.determinize(concat1, 100),
+                                       Operations.determinize(concat2, 100)));
+    assertTrue(Operations.sameLanguage(Operations.determinize(nfa, 100),
+                                       Operations.determinize(concat1, 100)));
+    assertTrue(Operations.sameLanguage(Operations.determinize(nfa, 100),
+                                       Operations.determinize(concat2, 100)));
   }
 
   public void testGetRandomAcceptedString() throws Throwable {
@@ -86,7 +89,7 @@ public class TestOperations extends Luce
 
       final RegExp re = new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE);
       //System.out.println("TEST i=" + i + " re=" + re);
-      final Automaton a = Operations.determinize(re.toAutomaton());
+      final Automaton a = Operations.determinize(re.toAutomaton(), DEFAULT_MAX_DETERMINIZED_STATES);
       assertFalse(Operations.isEmpty(a));
 
       final AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a);
@@ -137,7 +140,7 @@ public class TestOperations extends Luce
    */
   public void testFiniteStringsBasic() {
     Automaton a = Operations.union(Automata.makeString("dog"), Automata.makeString("duck"));
-    a = MinimizationOperations.minimize(a);
+    a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
     Set<IntsRef> strings = getFiniteStrings(a, -1, true);
     assertEquals(2, strings.size());
     IntsRefBuilder dog = new IntsRefBuilder();
@@ -190,7 +193,7 @@ public class TestOperations extends Luce
     // TODO: what other random things can we do here...
     Automaton a = Operations.union(automata);
     if (random().nextBoolean()) {
-      a = MinimizationOperations.minimize(a);
+      a = MinimizationOperations.minimize(a, 1000000);
       if (VERBOSE) {
         System.out.println("TEST: a.minimize numStates=" + a.getNumStates());
       }
@@ -198,7 +201,7 @@ public class TestOperations extends Luce
       if (VERBOSE) {
         System.out.println("TEST: a.determinize");
       }
-      a = Operations.determinize(a);
+      a = Operations.determinize(a, 1000000);
     } else if (random().nextBoolean()) {
       if (VERBOSE) {
         System.out.println("TEST: a.removeDeadStates");

Modified: lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/TestRandomSamplingFacetsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/TestRandomSamplingFacetsCollector.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/TestRandomSamplingFacetsCollector.java (original)
+++ lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/TestRandomSamplingFacetsCollector.java Sat Nov  8 11:32:18 2014
@@ -1,5 +1,6 @@
 package org.apache.lucene.facet;
 
+import java.util.List;
 import java.util.Random;
 
 import org.apache.lucene.document.Document;
@@ -12,6 +13,7 @@ import org.apache.lucene.facet.taxonomy.
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MultiCollector;
 import org.apache.lucene.search.TermQuery;
@@ -37,29 +39,40 @@ import org.apache.lucene.util.IOUtils;
 
 public class TestRandomSamplingFacetsCollector extends FacetTestCase {
   
+  // The first 50 chi-square value for p-value=0.05, taken from:
+  // http://en.wikibooks.org/wiki/Engineering_Tables/Chi-Squared_Distibution
+  private static final float[] CHI_SQUARE_VALUES = new float[] {0.0f, 3.841f,
+      5.991f, 7.815f, 9.488f, 11.07f, 12.592f, 14.067f, 15.507f, 16.919f,
+      18.307f, 19.675f, 21.026f, 22.362f, 23.685f, 24.996f, 26.296f, 27.587f,
+      28.869f, 30.144f, 31.41f, 32.671f, 33.924f, 35.172f, 36.415f, 37.652f,
+      38.885f, 40.113f, 41.337f, 42.557f, 43.773f, 44.985f, 46.194f, 47.4f,
+      48.602f, 49.802f, 50.998f, 52.192f, 53.384f, 54.572f, 55.758f, 56.942f,
+      58.124f, 59.304f, 60.481f, 61.656f, 62.83f, 64.001f, 65.171f, 66.339f,
+      67.505f};
+  
   public void testRandomSampling() throws Exception {
     Directory dir = newDirectory();
     Directory taxoDir = newDirectory();
     
+    Random random = random();
     DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
-    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+    RandomIndexWriter writer = new RandomIndexWriter(random, dir);
     
     FacetsConfig config = new FacetsConfig();
     
+    final int numCategories = 10;
     int numDocs = atLeast(10000);
     for (int i = 0; i < numDocs; i++) {
       Document doc = new Document();
       doc.add(new StringField("EvenOdd", (i % 2 == 0) ? "even" : "odd", Store.NO));
-      doc.add(new FacetField("iMod10", String.valueOf(i % 10)));
+      doc.add(new FacetField("iMod10", Integer.toString(i % numCategories)));
       writer.addDocument(config.build(taxoWriter, doc));
     }
-    Random random = random();
     
     // NRT open
     IndexSearcher searcher = newSearcher(writer.getReader());
     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
-    writer.close();
-    IOUtils.close(taxoWriter);
+    IOUtils.close(writer, taxoWriter);
     
     // Test empty results
     RandomSamplingFacetsCollector collectRandomZeroResults = new RandomSamplingFacetsCollector(numDocs / 10, random.nextLong());
@@ -80,61 +93,55 @@ public class TestRandomSamplingFacetsCol
     // Use a query to select half of the documents.
     TermQuery query = new TermQuery(new Term("EvenOdd", "even"));
     
-    // there will be 5 facet values (0, 2, 4, 6 and 8), as only the even (i %
-    // 10) are hits.
-    // there is a REAL small chance that one of the 5 values will be missed when
-    // sampling.
-    // but is that 0.8 (chance not to take a value) ^ 2000 * 5 (any can be
-    // missing) ~ 10^-193
-    // so that is probably not going to happen.
-    int maxNumChildren = 5;
-    
-    RandomSamplingFacetsCollector random100Percent = new RandomSamplingFacetsCollector(numDocs, random.nextLong()); // no sampling
-    RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.nextLong()); // 10 % of total docs, 20% of the hits
+    RandomSamplingFacetsCollector random10Percent = new RandomSamplingFacetsCollector(numDocs / 10, random.nextLong()); // 10% of total docs, 20% of the hits
 
     FacetsCollector fc = new FacetsCollector();
     
-    searcher.search(query, MultiCollector.wrap(fc, random100Percent, random10Percent));
+    searcher.search(query, MultiCollector.wrap(fc, random10Percent));
     
-    FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
-    FastTaxonomyFacetCounts random100FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random100Percent);
-    FastTaxonomyFacetCounts exactFacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, fc);
-    
-    FacetResult random10Result = random10Percent.amortizeFacetCounts(random10FacetCounts.getTopChildren(10, "iMod10"), config, searcher);
-    FacetResult random100Result = random100FacetCounts.getTopChildren(10, "iMod10");
-    FacetResult exactResult = exactFacetCounts.getTopChildren(10, "iMod10");
-    
-    assertEquals(random100Result, exactResult);
-    
-    // we should have five children, but there is a small chance we have less.
-    // (see above).
-    assertTrue(random10Result.childCount <= maxNumChildren);
-    // there should be one child at least.
-    assertTrue(random10Result.childCount >= 1);
-    
-    // now calculate some statistics to determine if the sampled result is 'ok'.
-    // because random sampling is used, the results will vary each time.
-    int sum = 0;
-    for (LabelAndValue lav : random10Result.labelValues) {
-      sum += lav.value.intValue();
+    final List<MatchingDocs> matchingDocs = random10Percent.getMatchingDocs();
+
+    // count the total hits and sampled docs, also store the number of sampled
+    // docs per segment
+    int totalSampledDocs = 0, totalHits = 0;
+    int[] numSampledDocs = new int[matchingDocs.size()];
+//    System.out.println("numSegments=" + numSampledDocs.length);
+    for (int i = 0; i < numSampledDocs.length; i++) {
+      MatchingDocs md = matchingDocs.get(i);
+      final DocIdSetIterator iter = md.bits.iterator();
+      while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) ++numSampledDocs[i];
+      totalSampledDocs += numSampledDocs[i];
+      totalHits += md.totalHits;
     }
-    float mu = (float) sum / (float) maxNumChildren;
     
-    float variance = 0;
-    for (LabelAndValue lav : random10Result.labelValues) {
-      variance += Math.pow((mu - lav.value.intValue()), 2);
+    // compute the chi-square value for the sampled documents' distribution
+    float chi_square = 0;
+    for (int i = 0; i < numSampledDocs.length; i++) {
+      MatchingDocs md = matchingDocs.get(i);
+      float ei = (float) md.totalHits / totalHits;
+      if (ei > 0.0f) {
+        float oi = (float) numSampledDocs[i] / totalSampledDocs;
+        chi_square += (Math.pow(ei - oi, 2) / ei);
+      }
     }
-    variance = variance / maxNumChildren;
-    float sigma = (float) Math.sqrt(variance);
     
-    // we query only half the documents and have 5 categories. The average
-    // number of docs in a category will thus be the total divided by 5*2
-    float targetMu = numDocs / (5.0f * 2.0f);
-    
-    // the average should be in the range and the standard deviation should not
-    // be too great
-    assertTrue(sigma < 200);
-    assertTrue(targetMu - 3 * sigma < mu && mu < targetMu + 3 * sigma);
+    // Verify that the chi-square value isn't too big. According to
+    // http://en.wikipedia.org/wiki/Chi-squared_distribution#Table_of_.CF.872_value_vs_p-value,
+    // we basically verify that there is a really small chance of hitting a very
+    // bad sample (p-value < 0.05), for n-degrees of freedom. The number 'n' depends
+    // on the number of segments.
+    assertTrue("chisquare not statistically significant enough: " + chi_square, chi_square < CHI_SQUARE_VALUES[numSampledDocs.length]);
+    
+    // Test amortized counts - should be 5X the sampled count, but maximum numDocs/10
+    final FastTaxonomyFacetCounts random10FacetCounts = new FastTaxonomyFacetCounts(taxoReader, config, random10Percent);
+    final FacetResult random10Result = random10FacetCounts.getTopChildren(10, "iMod10");
+    final FacetResult amortized10Result = random10Percent.amortizeFacetCounts(random10Result, config, searcher);
+    for (int i = 0; i < amortized10Result.labelValues.length; i++) {
+      LabelAndValue amortized = amortized10Result.labelValues[i];
+      LabelAndValue sampled = random10Result.labelValues[i];
+      // since numDocs may not divide by 10 exactly, allow for some slack in the amortized count 
+      assertEquals(amortized.value.floatValue(), Math.min(5 * sampled.value.floatValue(), numDocs / 10.f), 1.0);
+    }
     
     IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
   }

Modified: lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java Sat Nov  8 11:32:18 2014
@@ -25,6 +25,7 @@ import java.util.Set;
 
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.DocIdSet;
@@ -32,7 +33,6 @@ import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.RegexpQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
@@ -903,7 +903,7 @@ public class FieldQueryTest extends Abst
   public void testRegexpQuery() throws Exception {
     makeIndexStrMV();
     Term term = new Term(F, "d[a-z].g");
-    defgMultiTermQueryTest(new RegexpQuery (term));
+    defgMultiTermQueryTest(new RegexpQuery(term));
   }
 
   public void testRangeQuery() throws Exception {

Modified: lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/uninverting/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/uninverting/DocTermOrds.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/uninverting/DocTermOrds.java (original)
+++ lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/uninverting/DocTermOrds.java Sat Nov  8 11:32:18 2014
@@ -24,12 +24,13 @@ import java.util.Collections;
 import java.util.List;
 
 import org.apache.lucene.codecs.PostingsFormat; // javadocs
-import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
@@ -282,7 +283,7 @@ public class DocTermOrds implements Acco
   /** Call this only once (if you subclass!) */
   protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException {
     final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
-    if (info != null && info.hasDocValues()) {
+    if (info != null && info.getDocValuesType() != DocValuesType.NONE) {
       throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
     }
     //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);

Modified: lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/uninverting/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/uninverting/FieldCacheImpl.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/uninverting/FieldCacheImpl.java (original)
+++ lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/uninverting/FieldCacheImpl.java Sat Nov  8 11:32:18 2014
@@ -26,11 +26,13 @@ import java.util.List;
 import java.util.Map;
 import java.util.WeakHashMap;
 
-import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.index.SortedDocValues;
@@ -356,9 +358,9 @@ class FieldCacheImpl implements FieldCac
     if (fieldInfo == null) {
       // field does not exist or has no value
       return new Bits.MatchNoBits(reader.maxDoc());
-    } else if (fieldInfo.hasDocValues()) {
+    } else if (fieldInfo.getDocValuesType() != DocValuesType.NONE) {
       return reader.getDocsWithField(field);
-    } else if (!fieldInfo.isIndexed()) {
+    } else if (fieldInfo.getIndexOptions() == IndexOptions.NONE) {
       return new Bits.MatchNoBits(reader.maxDoc());
     }
     BitsEntry bitsEntry = (BitsEntry) caches.get(DocsWithFieldCache.class).get(reader, new CacheKey(field, null), false);
@@ -459,9 +461,9 @@ class FieldCacheImpl implements FieldCac
       final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
       if (info == null) {
         return DocValues.emptyNumeric();
-      } else if (info.hasDocValues()) {
+      } else if (info.getDocValuesType() != DocValuesType.NONE) {
         throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
-      } else if (!info.isIndexed()) {
+      } else if (info.getIndexOptions() == IndexOptions.NONE) {
         return DocValues.emptyNumeric();
       }
       return (NumericDocValues) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
@@ -634,11 +636,11 @@ class FieldCacheImpl implements FieldCac
       final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
       if (info == null) {
         return DocValues.emptySorted();
-      } else if (info.hasDocValues()) {
+      } else if (info.getDocValuesType() != DocValuesType.NONE) {
         // we don't try to build a sorted instance from numeric/binary doc
         // values because dedup can be very costly
         throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
-      } else if (!info.isIndexed()) {
+      } else if (info.getIndexOptions() == IndexOptions.NONE) {
         return DocValues.emptySorted();
       }
       SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false);
@@ -783,9 +785,9 @@ class FieldCacheImpl implements FieldCac
     final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
     if (info == null) {
       return DocValues.emptyBinary();
-    } else if (info.hasDocValues()) {
+    } else if (info.getDocValuesType() != DocValuesType.NONE) {
       throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
-    } else if (!info.isIndexed()) {
+    } else if (info.getIndexOptions() == IndexOptions.NONE) {
       return DocValues.emptyBinary();
     }
 
@@ -906,9 +908,9 @@ class FieldCacheImpl implements FieldCac
     final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
     if (info == null) {
       return DocValues.emptySortedSet();
-    } else if (info.hasDocValues()) {
+    } else if (info.getDocValuesType() != DocValuesType.NONE) {
       throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
-    } else if (!info.isIndexed()) {
+    } else if (info.getIndexOptions() == IndexOptions.NONE) {
       return DocValues.emptySortedSet();
     }
     

Modified: lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java Sat Nov  8 11:32:18 2014
@@ -37,6 +37,7 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.FilterDirectoryReader;
 import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;
@@ -185,7 +186,7 @@ public class UninvertingReader extends F
     ArrayList<FieldInfo> filteredInfos = new ArrayList<>();
     for (FieldInfo fi : in.getFieldInfos()) {
       DocValuesType type = fi.getDocValuesType();
-      if (fi.isIndexed() && !fi.hasDocValues()) {
+      if (fi.getIndexOptions() != IndexOptions.NONE && fi.getDocValuesType() == DocValuesType.NONE) {
         Type t = mapping.get(fi.name);
         if (t != null) {
           switch(t) {
@@ -291,7 +292,7 @@ public class UninvertingReader extends F
    */
   private Type getType(String field) {
     FieldInfo info = fieldInfos.fieldInfo(field);
-    if (info == null || info.hasDocValues() == false) {
+    if (info == null || info.getDocValuesType() == DocValuesType.NONE) {
       return null;
     }
     return mapping.get(field);

Modified: lucene/dev/branches/lucene6005/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java (original)
+++ lucene/dev/branches/lucene6005/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java Sat Nov  8 11:32:18 2014
@@ -263,11 +263,11 @@ public class CustomScoreQuery extends Qu
         valSrcExpls[i] = valSrcWeights[i].explain(info, doc);
       }
       Explanation customExp = CustomScoreQuery.this.getCustomScoreProvider(info).customExplain(doc,subQueryExpl,valSrcExpls);
-      float sc = getBoost() * customExp.getValue();
+      float sc = queryWeight * customExp.getValue();
       Explanation res = new ComplexExplanation(
         true, sc, CustomScoreQuery.this.toString() + ", product of:");
       res.addDetail(customExp);
-      res.addDetail(new Explanation(getBoost(), "queryBoost")); // actually using the q boost as q weight (== weight value)
+      res.addDetail(new Explanation(queryWeight, "queryWeight"));
       return res;
     }
 

Modified: lucene/dev/branches/lucene6005/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java (original)
+++ lucene/dev/branches/lucene6005/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java Sat Nov  8 11:32:18 2014
@@ -591,7 +591,7 @@ public final class MoreLikeThis {
    * @param filteredDocument Document with field values extracted for selected fields.
    * @return More Like This query for the passed document.
    */
-  public Query like(Map<String, ArrayList<String>> filteredDocument) throws IOException {
+  public Query like(Map<String, Collection<Object>> filteredDocument) throws IOException {
     if (fieldNames == null) {
       // gather list of valid fields from lucene
       Collection<String> fields = MultiFields.getIndexedFields(ir);
@@ -753,16 +753,16 @@ public final class MoreLikeThis {
   }
 
 
-  private PriorityQueue<ScoreTerm> retrieveTerms(Map<String, ArrayList<String>> fields) throws 
+  private PriorityQueue<ScoreTerm> retrieveTerms(Map<String, Collection<Object>> fields) throws 
       IOException {
     HashMap<String,Int> termFreqMap = new HashMap();
     for (String fieldName : fieldNames) {
 
       for (String field : fields.keySet()) {
-        ArrayList<String> fieldValues = fields.get(field);
-        for(String fieldValue:fieldValues) {
+        Collection<Object> fieldValues = fields.get(field);
+        for(Object fieldValue:fieldValues) {
           if (fieldValue != null) {
-            addTermFrequencies(new StringReader(fieldValue), termFreqMap,
+            addTermFrequencies(new StringReader(String.valueOf(fieldValue)), termFreqMap,
                 fieldName);
           }
         }

Modified: lucene/dev/branches/lucene6005/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java (original)
+++ lucene/dev/branches/lucene6005/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java Sat Nov  8 11:32:18 2014
@@ -33,6 +33,9 @@ import org.apache.lucene.search.*;
 import org.apache.lucene.search.BooleanQuery.TooManyClauses;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.QueryBuilder;
+import org.apache.lucene.util.automaton.RegExp;
+
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
 
 /** This class is overridden by QueryParser in QueryParser.jj
  * and acts to separate the majority of the Java code from the .jj grammar file. 
@@ -81,6 +84,7 @@ public abstract class QueryParserBase ex
   boolean analyzeRangeTerms = false;
 
   boolean autoGeneratePhraseQueries;
+  int maxDeterminizedStates = DEFAULT_MAX_DETERMINIZED_STATES;
 
   // So the generated QueryParser(CharStream) won't error out
   protected QueryParserBase() {
@@ -398,6 +402,24 @@ public abstract class QueryParserBase ex
     return analyzeRangeTerms;
   }
 
+  /**
+   * @param maxDeterminizedStates the maximum number of states that
+   *   determinizing a regexp query can result in.  If the query results in any
+   *   more states a TooComplexToDeterminizeException is thrown.
+   */
+  public void setMaxDeterminizedStates(int maxDeterminizedStates) {
+    this.maxDeterminizedStates = maxDeterminizedStates;
+  }
+
+  /**
+   * @return the maximum number of states that determinizing a regexp query
+   *   can result in.  If the query results in any more states a
+   *   TooComplexToDeterminizeException is thrown.
+   */
+  public int getMaxDeterminizedStates() {
+    return maxDeterminizedStates;
+  }
+
   protected void addClause(List<BooleanClause> clauses, int conj, int mods, Query q) {
     boolean required, prohibited;
 
@@ -553,7 +575,8 @@ public abstract class QueryParserBase ex
    * @return new RegexpQuery instance
    */
   protected Query newRegexpQuery(Term regexp) {
-    RegexpQuery query = new RegexpQuery(regexp);
+    RegexpQuery query = new RegexpQuery(regexp, RegExp.ALL,
+      maxDeterminizedStates);
     query.setRewriteMethod(multiTermRewriteMethod);
     return query;
   }

Modified: lucene/dev/branches/lucene6005/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java (original)
+++ lucene/dev/branches/lucene6005/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java Sat Nov  8 11:32:18 2014
@@ -38,6 +38,7 @@ public class RegexpQueryNodeBuilder impl
   public RegexpQuery build(QueryNode queryNode) throws QueryNodeException {
     RegexpQueryNode regexpNode = (RegexpQueryNode) queryNode;
 
+    // TODO: make the maxStates configurable w/ a reasonable default (QueryParserBase uses 10000)
     RegexpQuery q = new RegexpQuery(new Term(regexpNode.getFieldAsString(),
         regexpNode.textToBytesRef()));
 

Modified: lucene/dev/branches/lucene6005/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java (original)
+++ lucene/dev/branches/lucene6005/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java Sat Nov  8 11:32:18 2014
@@ -24,9 +24,9 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.IndexReaderContext;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
@@ -40,6 +40,8 @@ import org.apache.lucene.util.automaton.
 import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.Transition;
 
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
 // TODO
 //    - compare perf to PhraseQuery exact and sloppy
 //    - optimize: find terms that are in fact MUST (because all paths
@@ -108,6 +110,16 @@ public class TermAutomatonQuery extends 
 
   /** Call this once you are done adding states/transitions. */
   public void finish() {
+    finish(DEFAULT_MAX_DETERMINIZED_STATES);
+  }
+
+  /**
+   * Call this once you are done adding states/transitions.
+   * @param maxDeterminizedStates Maximum number of states created when
+   *   determinizing the automaton.  Higher numbers allow this operation to
+   *   consume more memory but allow more complex automatons.
+   */
+  public void finish(int maxDeterminizedStates) {
     Automaton automaton = builder.finish();
 
     // System.out.println("before det:\n" + automaton.toDot());
@@ -171,7 +183,8 @@ public class TermAutomatonQuery extends 
       automaton = newAutomaton;
     }
 
-    det = Operations.removeDeadStates(Operations.determinize(automaton));
+    det = Operations.removeDeadStates(Operations.determinize(automaton,
+      maxDeterminizedStates));
   }
 
   @Override

Modified: lucene/dev/branches/lucene6005/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java (original)
+++ lucene/dev/branches/lucene6005/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java Sat Nov  8 11:32:18 2014
@@ -106,7 +106,7 @@ public class BBoxStrategy extends Spatia
 
     FieldType fieldType = new FieldType(DoubleField.TYPE_NOT_STORED);
     fieldType.setNumericPrecisionStep(8);//Solr's default
-    fieldType.setDocValueType(DocValuesType.NUMERIC);
+    fieldType.setDocValuesType(DocValuesType.NUMERIC);
     setFieldType(fieldType);
   }
 

Modified: lucene/dev/branches/lucene6005/lucene/spatial/src/test/org/apache/lucene/spatial/bbox/TestBBoxStrategy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/spatial/src/test/org/apache/lucene/spatial/bbox/TestBBoxStrategy.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/spatial/src/test/org/apache/lucene/spatial/bbox/TestBBoxStrategy.java (original)
+++ lucene/dev/branches/lucene6005/lucene/spatial/src/test/org/apache/lucene/spatial/bbox/TestBBoxStrategy.java Sat Nov  8 11:32:18 2014
@@ -112,7 +112,7 @@ public class TestBBoxStrategy extends Ra
     if (random().nextBoolean()) {
       BBoxStrategy bboxStrategy = (BBoxStrategy) strategy;
       FieldType fieldType = new FieldType(bboxStrategy.getFieldType());
-      fieldType.setDocValueType(DocValuesType.NONE);
+      fieldType.setDocValuesType(DocValuesType.NONE);
       bboxStrategy.setFieldType(fieldType);
     }
     for (SpatialOperation operation : SpatialOperation.values()) {

Modified: lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Sat Nov  8 11:32:18 2014
@@ -62,6 +62,8 @@ import org.apache.lucene.util.fst.Util;
 import org.apache.lucene.util.fst.Util.Result;
 import org.apache.lucene.util.fst.Util.TopResults;
 
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
 /**
  * Suggester that first analyzes the surface form, adds the
  * analyzed form to a weighted FST, and then does the same
@@ -898,7 +900,7 @@ public class AnalyzingSuggester extends 
 
     // TODO: we can optimize this somewhat by determinizing
     // while we convert
-    automaton = Operations.determinize(automaton);
+    automaton = Operations.determinize(automaton, DEFAULT_MAX_DETERMINIZED_STATES);
     return automaton;
   }
 

Modified: lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java (original)
+++ lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java Sat Nov  8 11:32:18 2014
@@ -17,7 +17,6 @@ package org.apache.lucene.search.suggest
  */
 
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Set;
@@ -30,13 +29,15 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.automaton.Automata;
-import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.LevenshteinAutomata;
 import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.LevenshteinAutomata;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.UTF32ToUTF8;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.PairOutputs.Pair;
 
+import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
+
 /**
  * Implements a fuzzy {@link AnalyzingSuggester}. The similarity measurement is
  * based on the Damerau-Levenshtein (optimal string alignment) algorithm, though
@@ -205,7 +206,7 @@ public final class FuzzySuggester extend
   protected Automaton convertAutomaton(Automaton a) {
     if (unicodeAware) {
       Automaton utf8automaton = new UTF32ToUTF8().convert(a);
-      utf8automaton = Operations.determinize(utf8automaton);
+      utf8automaton = Operations.determinize(utf8automaton, DEFAULT_MAX_DETERMINIZED_STATES);
       return utf8automaton;
     } else {
       return a;
@@ -253,7 +254,7 @@ public final class FuzzySuggester extend
       Automaton a = Operations.union(Arrays.asList(subs));
       // TODO: we could call toLevenshteinAutomata() before det? 
       // this only happens if you have multiple paths anyway (e.g. synonyms)
-      return Operations.determinize(a);
+      return Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
     }
   }
 }

Modified: lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java Sat Nov  8 11:32:18 2014
@@ -800,11 +800,11 @@ public class AssertingLeafReader extends
     FieldInfo fi = getFieldInfos().fieldInfo(field);
     if (docsWithField != null) {
       assert fi != null;
-      assert fi.hasDocValues();
+      assert fi.getDocValuesType() != DocValuesType.NONE;
       assert maxDoc() == docsWithField.length();
       docsWithField = new AssertingBits(docsWithField);
     } else {
-      assert fi == null || fi.hasDocValues() == false;
+      assert fi == null || fi.getDocValuesType() == DocValuesType.NONE;
     }
     return docsWithField;
   }

Modified: lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java (original)
+++ lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java Sat Nov  8 11:32:18 2014
@@ -49,15 +49,16 @@ public abstract class BaseFieldInfoForma
     Codec codec = getCodec();
     SegmentInfo segmentInfo = newSegmentInfo(dir, "_123");
     FieldInfos.Builder builder = new FieldInfos.Builder();
-    FieldInfo fi = builder.addOrUpdate("field", TextField.TYPE_STORED);
+    FieldInfo fi = builder.getOrAdd("field");
+    fi.setIndexOptions(TextField.TYPE_STORED.indexOptions());
     addAttributes(fi);
     FieldInfos infos = builder.finish();
     codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
     FieldInfos infos2 = codec.fieldInfosFormat().read(dir, segmentInfo, "", IOContext.DEFAULT);
     assertEquals(1, infos2.size());
     assertNotNull(infos2.fieldInfo("field"));
-    assertTrue(infos2.fieldInfo("field").isIndexed());
-    assertFalse(infos2.fieldInfo("field").hasDocValues());
+    assertTrue(infos2.fieldInfo("field").getIndexOptions() != IndexOptions.NONE);
+    assertFalse(infos2.fieldInfo("field").getDocValuesType() != DocValuesType.NONE);
     assertFalse(infos2.fieldInfo("field").omitsNorms());
     assertFalse(infos2.fieldInfo("field").hasPayloads());
     assertFalse(infos2.fieldInfo("field").hasVectors());
@@ -81,7 +82,15 @@ public abstract class BaseFieldInfoForma
     FieldInfos.Builder builder = new FieldInfos.Builder();
     for (String field : fieldNames) {
       IndexableFieldType fieldType = randomFieldType(random());
-      FieldInfo fi = builder.addOrUpdate(field, fieldType);
+      FieldInfo fi = builder.getOrAdd(field);
+      IndexOptions indexOptions = fieldType.indexOptions();
+      if (indexOptions != IndexOptions.NONE) {
+        fi.setIndexOptions(indexOptions);
+        if (fieldType.omitNorms()) {      
+          fi.setOmitsNorms();
+        }
+      }
+      fi.setDocValuesType(fieldType.docValuesType());
       if (fieldType.indexOptions() != IndexOptions.NONE && fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
         if (random().nextBoolean()) {
           fi.setStorePayloads();
@@ -118,7 +127,7 @@ public abstract class BaseFieldInfoForma
     
     if (r.nextBoolean()) {
       DocValuesType values[] = getDocValuesTypes();
-      type.setDocValueType(values[r.nextInt(values.length)]);
+      type.setDocValuesType(values[r.nextInt(values.length)]);
     }
         
     return type;
@@ -157,11 +166,9 @@ public abstract class BaseFieldInfoForma
     assertEquals(expected.name, actual.name);
     assertEquals(expected.getDocValuesType(), actual.getDocValuesType());
     assertEquals(expected.getIndexOptions(), actual.getIndexOptions());
-    assertEquals(expected.hasDocValues(), actual.hasDocValues());
     assertEquals(expected.hasNorms(), actual.hasNorms());
     assertEquals(expected.hasPayloads(), actual.hasPayloads());
     assertEquals(expected.hasVectors(), actual.hasVectors());
-    assertEquals(expected.isIndexed(), actual.isIndexed());
     assertEquals(expected.omitsNorms(), actual.omitsNorms());
     assertEquals(expected.getDocValuesGen(), actual.getDocValuesGen());
   }

Modified: lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java (original)
+++ lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java Sat Nov  8 11:32:18 2014
@@ -60,8 +60,8 @@ import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.Version;
 import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
 import org.apache.lucene.util.automaton.AutomatonTestUtil;
+import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -1247,7 +1247,7 @@ public abstract class BasePostingsFormat
     for(String field : fields.keySet()) {
       while (true) {
         Automaton a = AutomatonTestUtil.randomAutomaton(random());
-        CompiledAutomaton ca = new CompiledAutomaton(a);
+        CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE);
         if (ca.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
           // Keep retrying until we get an A that will really "use" the PF's intersect code:
           continue;

Modified: lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java Sat Nov  8 11:32:18 2014
@@ -58,8 +58,8 @@ import java.util.logging.Logger;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document2;
-import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
@@ -70,6 +70,7 @@ import org.apache.lucene.index.BinaryDoc
 import org.apache.lucene.index.CompositeReader;
 import org.apache.lucene.index.ConcurrentMergeScheduler;
 import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldFilterLeafReader;
@@ -77,8 +78,8 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.index.IndexReader.ReaderClosedListener;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.ReaderClosedListener;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexableField;
@@ -104,8 +105,8 @@ import org.apache.lucene.index.SortedDoc
 import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum.SeekStatus;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
 import org.apache.lucene.index.TieredMergePolicy;
 import org.apache.lucene.search.AssertingIndexSearcher;
 import org.apache.lucene.search.DocIdSetIterator;
@@ -115,12 +116,12 @@ import org.apache.lucene.store.BaseDirec
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.FlushInfo;
-import org.apache.lucene.store.IOContext.Context;
 import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IOContext.Context;
 import org.apache.lucene.store.LockFactory;
 import org.apache.lucene.store.MergeInfo;
-import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
 import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
 import org.apache.lucene.store.NRTCachingDirectory;
 import org.apache.lucene.store.RateLimitedDirectoryWrapper;
 import org.apache.lucene.util.automaton.AutomatonTestUtil;
@@ -137,6 +138,7 @@ import org.junit.Test;
 import org.junit.rules.RuleChain;
 import org.junit.rules.TestRule;
 import org.junit.runner.RunWith;
+
 import com.carrotsearch.randomizedtesting.JUnit4MethodProvider;
 import com.carrotsearch.randomizedtesting.LifecycleScope;
 import com.carrotsearch.randomizedtesting.MixWithSuiteName;
@@ -147,16 +149,16 @@ import com.carrotsearch.randomizedtestin
 import com.carrotsearch.randomizedtesting.annotations.SeedDecorators;
 import com.carrotsearch.randomizedtesting.annotations.TestGroup;
 import com.carrotsearch.randomizedtesting.annotations.TestMethodProviders;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup.Group;
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup.Group;
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
 import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 import com.carrotsearch.randomizedtesting.rules.NoClassHooksShadowingRule;
@@ -2226,7 +2228,7 @@ public abstract class LuceneTestCase ext
   private static Set<String> getDVFields(IndexReader reader) {
     Set<String> fields = new HashSet<>();
     for(FieldInfo fi : MultiFields.getMergedFieldInfos(reader)) {
-      if (fi.hasDocValues()) {
+      if (fi.getDocValuesType() != DocValuesType.NONE) {
         fields.add(fi.name);
       }
     }

Modified: lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java (original)
+++ lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java Sat Nov  8 11:32:18 2014
@@ -895,7 +895,7 @@ public final class TestUtil {
     for(IndexableField f : doc1.getFields()) {
       final Field field1 = (Field) f;
       final Field field2;
-      final DocValuesType dvType = field1.fieldType().docValueType();
+      final DocValuesType dvType = field1.fieldType().docValuesType();
       final NumericType numType = field1.fieldType().numericType();
       if (dvType != DocValuesType.NONE) {
         switch(dvType) {

Modified: lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java (original)
+++ lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java Sat Nov  8 11:32:18 2014
@@ -40,6 +40,11 @@ import org.apache.lucene.util.UnicodeUti
  * basic unoptimized implementations (*slow) for testing.
  */
 public class AutomatonTestUtil {
+  /**
+   * Default maximum number of states that {@link Operations#determinize} should create.
+   */
+  public static final int DEFAULT_MAX_DETERMINIZED_STATES = 1000000;
+
   /** Returns random string, including full unicode range. */
   public static String randomRegexp(Random r) {
     while (true) {
@@ -257,12 +262,12 @@ public class AutomatonTestUtil {
     // get two random Automata from regexps
     Automaton a1 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton();
     if (random.nextBoolean()) {
-      a1 = Operations.complement(a1);
+      a1 = Operations.complement(a1, DEFAULT_MAX_DETERMINIZED_STATES);
     }
     
     Automaton a2 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton();
     if (random.nextBoolean()) {
-      a2 = Operations.complement(a2);
+      a2 = Operations.complement(a2, DEFAULT_MAX_DETERMINIZED_STATES);
     }
 
     // combine them in random ways
@@ -270,7 +275,7 @@ public class AutomatonTestUtil {
       case 0: return Operations.concatenate(a1, a2);
       case 1: return Operations.union(a1, a2);
       case 2: return Operations.intersection(a1, a2);
-      default: return Operations.minus(a1, a2);
+      default: return Operations.minus(a1, a2, DEFAULT_MAX_DETERMINIZED_STATES);
     }
   }
   

Modified: lucene/dev/branches/lucene6005/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/solr/CHANGES.txt?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene6005/solr/CHANGES.txt Sat Nov  8 11:32:18 2014
@@ -198,6 +198,15 @@ New Features
   have similar content. It works in standalone/cloud mode and shares logic with the
   Lucene MoreLikeThis class (Anshum Gupta).
 
+* SOLR-6670: change BALANCESLICEUNIQUE to BALANCESHARDUNIQUE. Also, the parameter
+  for ADDREPLICAPROP that used to be sliceUnique is now shardUnique. (Erick Erickson)
+
+* SOLR-6351: Stats can now be nested under pivot values by adding a 'stats' local param to 
+  facet.pivot which refers to a 'tag' local param in one or more stats.field params.
+  (hossman, Vitaliy Zhovtyuk, Steve Molloy)
+
+SOLR-6533: Support editing common solrconfig.xml values (Noble Paul)
+
 Bug Fixes
 ----------------------
 
@@ -228,9 +237,6 @@ Bug Fixes
 * SOLR-6457: LBHttpSolrServer: ArrayIndexOutOfBoundsException risk if counter overflows
   (longkey via Noble Paul)
 
-* SOLR-6452: StatsComponent's stat 'missing' will work on fields with docValues=true and
-  indexed=false (Xu Zhang via TomÃ¡s FernÃ¡ndez LÃ¶bbe)
-
 * SOLR-6499: Log warning about multiple update request handlers
   (Noble Paul, Andreas Hubold, hossman)
 
@@ -242,19 +248,6 @@ Bug Fixes
 * SOLR-6484: SolrCLI's healthcheck action needs to check live nodes as part of reporting
   the status of a replica (Timothy Potter)
 
-* SOLR-6509: Solr start scripts interactive mode doesn't honor -z argument (Timothy Potter)
-
-* SOLR-6511: Fencepost error in LeaderInitiatedRecoveryThread (Timothy Potter)
-
-* SOLR-6530: Commits under network partitions can put any node in down state.
-  (Ramkumar Aiyengar, Alan Woodward, Mark Miller, shalin)
-
-* SOLR-6587: Misleading exception when creating collections in SolrCloud with bad configuration.
-  (TomÃ¡s FernÃ¡ndez LÃ¶bbe)
-
-* SOLR-6545: Query field list with wild card on dynamic field fails.
-  (Burke Webster, Xu Zhang, shalin)
-
 * SOLR-6540 Fix NPE from strdist() func when doc value source does not exist in a doc (hossman)
 
 * SOLR-6624 Spelling mistakes in the Java source (Hrishikesh Gadre)
@@ -262,25 +255,22 @@ Bug Fixes
 * SOLR-6307: Atomic update remove does not work for int array or date array
   (Anurag Sharma , noble)
 
-* SOLR-6573: QueryElevationComponent now works with localParams in the query (janhoy)
-
-* SOLR-6524: Collections left in recovery state after node restart because recovery sleep time
-  increases exponentially between retries. (Mark Miller, shalin)
-
 * SOLR-6224: Post soft-commit callbacks are called before soft commit actually happens.
   (shalin)
 
-* SOLR-6646: bin/solr start script fails to detect solr on non-default port and then after
-  30s tails wrong log file (janhoy)
-
-* SOLR-6647: Bad error message when missing resource from ZK when parsing Schema (janhoy)
-
 * SOLR-6591: Overseer can use stale cluster state and lose updates for collections
   with stateFormat > 1. (shalin)
 
 * SOLR-6631: DistributedQueue spinning on calling zookeeper getChildren()
   (Jessica Cheng Mallet, Mark Miller, Timothy Potter)
 
+* SOLR-2927: Solr does not unregister all mbeans upon exception in constructor
+  causing memory leaks. (tom liu, Sharath Babu, Cyrille Roy, shalin)
+
+* SOLR-6685: ConcurrentModificationException in Overseer Status API. (shalin)
+
+* SOLR-6706: /update/json/docs throws RuntimeException if a nested structure
+  contains a non-leaf float field (Noble Paul, shalin)
 
 Optimizations
 ----------------------
@@ -344,9 +334,6 @@ Other Changes
 * SOLR-6115: Cleanup enum/string action types in Overseer, OverseerCollectionProcessor and
   CollectionHandler. (Erick Erickson, shalin)
 
-* SOLR-6486: solr start script can have a debug flag option; use -a to set arbitrary options
-  (Noble Paul, Timothy Potter)
-
 * SOLR-6453: Stop throwing an error message from Overseer when node exits (Ramkumar Aiyengar, Noble Paul)
 
 * SOLR-6249: Schema API changes return success before all cores are updated; client application
@@ -354,18 +341,9 @@ Other Changes
   managed schema update to block until all replicas of the same collection have processed the
   update or until the specified timeout is reached (Timothy Potter)
 
-* SOLR-6550: Provide simple mechanism for passing additional metadata / context about a server-side
-   SolrException back to the client-side (Timothy Potter)
-
 * SOLR-6597: SolrIndexConfig parameter in one of the SolrIndexSearcher constructor has been removed.
   It was just passed and never used via that constructor. (Anshum Gupta)
 
-* SOLR-6549: bin/solr script should support a -s option to set the -Dsolr.solr.home property.
-  (Timothy Potter)
-
-* SOLR-6529: Stop command in the start scripts should only stop the instance that it had started.
-  (Varun Thacker, Timothy Potter)
-
 * SOLR-5852: Add CloudSolrServer helper method to connect to a ZK ensemble. (Varun Thacker, Furkan KAMACI,
   Shawn Heisey, Mark Miller, Erick Erickson via shalin)
 
@@ -387,6 +365,68 @@ Other Changes
   relative paths for solrconfig.xml <lib> references with solr.install.dir 
   system property; bin/solr scripts will set it appropriately. (ehatcher)
 
+* SOLR-6698: Solr is not consistent wrt ZkCredentialsProvider / ZkCredentialProvider.
+  References to zkCredentialProvider in System properties or configurations should be
+  changed to zkCredentialsProvider.  (Gregory Chanan)
+
+* SOLR-6715: ZkSolrResourceLoader constructors accept a parameter called 'collection'
+  but it should be 'configName'. (shalin)
+
+==================  4.10.3 ==================
+
+Bug Fixes
+----------------------
+
+* SOLR-6696: bin/solr start script should not enable autoSoftCommit by default (janhoy)
+
+* SOLR-6704: TrieDateField type drops schema properties in branch 4.10 (TomÃ¡s FernÃ¡ndez LÃ¶bbe)
+
+==================  4.10.2 ==================
+
+Bug Fixes
+----------------------
+
+* SOLR-6509: Solr start scripts interactive mode doesn't honor -z argument (Timothy Potter)
+
+* SOLR-6511: Fencepost error in LeaderInitiatedRecoveryThread (Timothy Potter)
+
+* SOLR-6530: Commits under network partitions can put any node in down state.
+  (Ramkumar Aiyengar, Alan Woodward, Mark Miller, shalin)
+
+* SOLR-6573: QueryElevationComponent now works with localParams in the query (janhoy)
+
+* SOLR-6524: Collections left in recovery state after node restart because recovery sleep time
+  increases exponentially between retries. (Mark Miller, shalin)
+
+* SOLR-6587: Misleading exception when creating collections in SolrCloud with bad configuration.
+  (TomÃ¡s FernÃ¡ndez LÃ¶bbe)
+
+* SOLR-6452: StatsComponent's stat 'missing' will work on fields with docValues=true and
+  indexed=false (Xu Zhang via TomÃ¡s FernÃ¡ndez LÃ¶bbe)
+
+* SOLR-6646: bin/solr start script fails to detect solr on non-default port and then after
+  30s tails wrong log file (janhoy)
+
+* SOLR-6647: Bad error message when missing resource from ZK when parsing Schema (janhoy)
+
+* SOLR-6545: Query field list with wild card on dynamic field fails.
+  (Burke Webster, Xu Zhang, shalin)
+
+Other Changes
+----------------------
+
+* SOLR-6550: Provide simple mechanism for passing additional metadata / context about a server-side
+   SolrException back to the client-side (Timothy Potter)
+
+* SOLR-6486: solr start script can have a debug flag option; use -a to set arbitrary options
+  (Noble Paul, Timothy Potter)
+
+* SOLR-6549: bin/solr script should support a -s option to set the -Dsolr.solr.home property.
+  (Timothy Potter)
+
+* SOLR-6529: Stop command in the start scripts should only stop the instance that it had started.
+  (Varun Thacker, Timothy Potter)
+
 ==================  4.10.1 ==================
 
 Bug Fixes

Modified: lucene/dev/branches/lucene6005/solr/bin/solr
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/solr/bin/solr?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/solr/bin/solr (original)
+++ lucene/dev/branches/lucene6005/solr/bin/solr Sat Nov  8 11:32:18 2014
@@ -1023,7 +1023,7 @@ $SOLR_HOST_ARG -Djetty.port=$SOLR_PORT \
 -Dsolr.solr.home=$SOLR_HOME \
 -Dsolr.install.dir=$SOLR_TIP \
 -Duser.timezone=$SOLR_TIMEZONE \
--Djava.net.preferIPv4Stack=true -Dsolr.autoSoftCommit.maxTime=3000"
+-Djava.net.preferIPv4Stack=true"
   
   if [ "$SOLR_MODE" == "solrcloud" ]; then
     IN_CLOUD_MODE=" in SolrCloud mode"

Modified: lucene/dev/branches/lucene6005/solr/bin/solr.cmd
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/solr/bin/solr.cmd?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/solr/bin/solr.cmd (original)
+++ lucene/dev/branches/lucene6005/solr/bin/solr.cmd Sat Nov  8 11:32:18 2014
@@ -619,7 +619,7 @@ IF "%verbose%"=="1" (
     @echo     SOLR_TIMEZONE   = %SOLR_TIMEZONE%
 )
 
-set START_OPTS=-Duser.timezone=%SOLR_TIMEZONE% -Djava.net.preferIPv4Stack=true -Dsolr.autoSoftCommit.maxTime=3000
+set START_OPTS=-Duser.timezone=%SOLR_TIMEZONE% -Djava.net.preferIPv4Stack=true
 set START_OPTS=%START_OPTS% %GC_TUNE% %GC_LOG_OPTS%
 IF NOT "!CLOUD_MODE_OPTS!"=="" set START_OPTS=%START_OPTS% !CLOUD_MODE_OPTS!
 IF NOT "%REMOTE_JMX_OPTS%"=="" set START_OPTS=%START_OPTS% %REMOTE_JMX_OPTS%

Modified: lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java (original)
+++ lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/cloud/CloudUtil.java Sat Nov  8 11:32:18 2014
@@ -88,7 +88,7 @@ public class CloudUtil {
 
   /**
    * Returns a displayable unified path to the given resource. For non-solrCloud that will be the
-   * same as getConfigDir, but for Cloud it will be getCollectionZkPath ending in a /
+   * same as getConfigDir, but for Cloud it will be getConfigSetZkPath ending in a /
    * <p/>
    * <b>Note:</b> Do not use this to generate a valid file path, but for debug printing etc
    * @param loader Resource loader instance
@@ -96,7 +96,7 @@ public class CloudUtil {
    */
   public static String unifiedResourcePath(SolrResourceLoader loader) {
     return (loader instanceof ZkSolrResourceLoader) ?
-            ((ZkSolrResourceLoader) loader).getCollectionZkPath() + "/" :
+            ((ZkSolrResourceLoader) loader).getConfigSetZkPath() + "/" :
             loader.getConfigDir();
   }
 }

Modified: lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/cloud/Overseer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/cloud/Overseer.java?rev=1637544&r1=1637543&r2=1637544&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/cloud/Overseer.java (original)
+++ lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/cloud/Overseer.java Sat Nov  8 11:32:18 2014
@@ -18,11 +18,11 @@ package org.apache.solr.cloud;
  */
 
 import static java.util.Collections.singletonMap;
-import static org.apache.solr.cloud.OverseerCollectionProcessor.SLICE_UNIQUE;
+import static org.apache.solr.cloud.OverseerCollectionProcessor.SHARD_UNIQUE;
 import static org.apache.solr.common.cloud.ZkNodeProps.makeMap;
 import static org.apache.solr.cloud.OverseerCollectionProcessor.ONLY_ACTIVE_NODES;
 import static org.apache.solr.cloud.OverseerCollectionProcessor.COLL_PROP_PREFIX;
-import static org.apache.solr.common.params.CollectionParams.CollectionAction.BALANCESLICEUNIQUE;
+import static org.apache.solr.common.params.CollectionParams.CollectionAction.BALANCESHARDUNIQUE;
 
 import java.io.Closeable;
 import java.io.IOException;
@@ -40,6 +40,7 @@ import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Random;
 import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -382,6 +383,20 @@ public class Overseer implements Closeab
                 zkClient.setData(e.getKey(), data, true);
               } else {
                 log.info("going to create_collection {}", e.getKey());
+                String parentPath = e.getKey().substring(0, e.getKey().lastIndexOf('/'));
+                if (!zkClient.exists(parentPath, true)) {
+                  // if the /collections/collection_name path doesn't exist then it means that
+                  // 1) the user invoked a DELETE collection API and the OverseerCollectionProcessor has deleted
+                  // this zk path.
+                  // 2) these are most likely old "state" messages which are only being processed now because
+                  // if they were new "state" messages then in legacy mode, a new collection would have been
+                  // created with stateFormat = 1 (which is the default state format)
+                  // 3) these can't be new "state" messages created for a new collection because
+                  // otherwise the OverseerCollectionProcessor would have already created this path
+                  // as part of the create collection API call -- which is the only way in which a collection
+                  // with stateFormat > 1 can possibly be created
+                  continue;
+                }
                 zkClient.create(e.getKey(), data, CreateMode.PERSISTENT, true);
               }
             }
@@ -475,7 +490,7 @@ public class Overseer implements Closeab
           case DELETEREPLICAPROP:
             clusterState = deleteReplicaProp(clusterState, message);
             break;
-          case BALANCESLICEUNIQUE:
+          case BALANCESHARDUNIQUE:
             ExclusiveSliceProperty dProp = new ExclusiveSliceProperty(this, clusterState, message);
             if (dProp.balanceProperty()) {
               String collName = message.getStr(ZkStateReader.COLLECTION_PROP);
@@ -571,19 +586,19 @@ public class Overseer implements Closeab
       }
       property = property.toLowerCase(Locale.ROOT);
       String propVal = message.getStr(ZkStateReader.PROPERTY_VALUE_PROP);
-      String sliceUnique = message.getStr(OverseerCollectionProcessor.SLICE_UNIQUE);
+      String shardUnique = message.getStr(OverseerCollectionProcessor.SHARD_UNIQUE);
 
       boolean isUnique = false;
 
       if (sliceUniqueBooleanProperties.contains(property)) {
-        if (StringUtils.isNotBlank(sliceUnique) && Boolean.parseBoolean(sliceUnique) == false) {
+        if (StringUtils.isNotBlank(shardUnique) && Boolean.parseBoolean(shardUnique) == false) {
           throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Overseer SETREPLICAPROPERTY for " +
-              property + " cannot have " + OverseerCollectionProcessor.SLICE_UNIQUE + " set to anything other than" +
+              property + " cannot have " + OverseerCollectionProcessor.SHARD_UNIQUE + " set to anything other than" +
               "'true'. No action taken");
         }
         isUnique = true;
       } else {
-        isUnique = Boolean.parseBoolean(sliceUnique);
+        isUnique = Boolean.parseBoolean(shardUnique);
       }
 
       Replica replica = clusterState.getReplica(collectionName, replicaName);
@@ -1456,12 +1471,12 @@ public class Overseer implements Closeab
                 ZkStateReader.PROPERTY_PROP + "' parameters. No action taken ");
       }
 
-      Boolean sliceUnique = Boolean.parseBoolean(message.getStr(SLICE_UNIQUE));
-      if (sliceUnique == false &&
+      Boolean shardUnique = Boolean.parseBoolean(message.getStr(SHARD_UNIQUE));
+      if (shardUnique == false &&
           Overseer.sliceUniqueBooleanProperties.contains(this.property) == false) {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Balancing properties amongst replicas in a slice requires that"
-            + " the property be a pre-defined property (e.g. 'preferredLeader') or that 'sliceUnique' be set to 'true' " +
-            " Property: " + this.property + " sliceUnique: " + Boolean.toString(sliceUnique));
+            + " the property be a pre-defined property (e.g. 'preferredLeader') or that 'shardUnique' be set to 'true' " +
+            " Property: " + this.property + " shardUnique: " + Boolean.toString(shardUnique));
       }
 
       collection = clusterState.getCollection(collectionName);
@@ -1508,7 +1523,7 @@ public class Overseer implements Closeab
           if (StringUtils.isNotBlank(replica.getStr(property))) {
             if (sliceHasProp) {
               throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
-                  "'" + BALANCESLICEUNIQUE + "' should only be called for properties that have at most one member " +
+                  "'" + BALANCESHARDUNIQUE + "' should only be called for properties that have at most one member " +
                       "in any slice with the property set. No action taken.");
             }
             if (nodesHostingProp.containsKey(nodeName) == false) {
@@ -1948,7 +1963,7 @@ public class Overseer implements Closeab
   public static class Stats {
     static final int MAX_STORED_FAILURES = 10;
 
-    final Map<String, Stat> stats = Collections.synchronizedMap(new HashMap<String, Stat>());
+    final Map<String, Stat> stats = new ConcurrentHashMap<>();
 
     public Map<String, Stat> getStats() {
       return stats;
@@ -1966,19 +1981,16 @@ public class Overseer implements Closeab
 
     public void success(String operation) {
       String op = operation.toLowerCase(Locale.ROOT);
-      synchronized (stats) {
-        Stat stat = stats.get(op);
-        if (stat == null) {
-          stat = new Stat();
-          stats.put(op, stat);
-        }
-        stat.success.incrementAndGet();
+      Stat stat = stats.get(op);
+      if (stat == null) {
+        stat = new Stat();
+        stats.put(op, stat);
       }
+      stat.success.incrementAndGet();
     }
 
     public void error(String operation) {
       String op = operation.toLowerCase(Locale.ROOT);
-      synchronized (stats) {
       Stat stat = stats.get(op);
       if (stat == null) {
         stat = new Stat();
@@ -1986,26 +1998,20 @@ public class Overseer implements Closeab
       }
       stat.errors.incrementAndGet();
     }
-    }
 
     public TimerContext time(String operation) {
       String op = operation.toLowerCase(Locale.ROOT);
-      Stat stat;
-      synchronized (stats) {
-        stat = stats.get(op);
+      Stat stat = stats.get(op);
       if (stat == null) {
         stat = new Stat();
         stats.put(op, stat);
       }
-      }
       return stat.requestTime.time();
     }
 
     public void storeFailureDetails(String operation, ZkNodeProps request, SolrResponse resp) {
       String op = operation.toLowerCase(Locale.ROOT);
-      Stat stat ;
-      synchronized (stats) {
-        stat = stats.get(op);
+      Stat stat = stats.get(op);
       if (stat == null) {
         stat = new Stat();
         stats.put(op, stat);
@@ -2018,7 +2024,6 @@ public class Overseer implements Closeab
         failedOps.addLast(new FailedOp(request, resp));
       }
     }
-    }
 
     public List<FailedOp> getFailureDetails(String operation) {
       Stat stat = stats.get(operation.toLowerCase(Locale.ROOT));