You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/12/24 14:39:24 UTC

svn commit: r1553272 [5/10] - in /lucene/dev/branches/lucene5376/lucene/server: ./ plugins/ plugins/BinaryDocument/ plugins/BinaryDocument/src/ plugins/BinaryDocument/src/java/ plugins/BinaryDocument/src/java/org/ plugins/BinaryDocument/src/java/org/ap...

Added: lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/LiveSettingsHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/LiveSettingsHandler.java?rev=1553272&view=auto
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/LiveSettingsHandler.java (added)
+++ lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/LiveSettingsHandler.java Tue Dec 24 13:39:22 2013
@@ -0,0 +1,79 @@
+package org.apache.lucene.server.handlers;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.server.FinishRequest;
+import org.apache.lucene.server.GlobalState;
+import org.apache.lucene.server.IndexState;
+import org.apache.lucene.server.params.*;
+
+/** For changing index settings that can be changed while
+ *  the index is running. */
+public class LiveSettingsHandler extends Handler {
+
+  public final static StructType TYPE =
+    new StructType(
+        new Param("indexName", "The index", new StringType()),
+        new Param("maxRefreshSec", "Longest time to wait before reopening IndexSearcher (i.e., periodic background reopen).", new FloatType(), 1.0f),
+        new Param("minRefreshSec", "Shortest time to wait before reopening IndexSearcher (i.e., when a search is waiting for a specific indexGen).", new FloatType(), .05f),
+        new Param("maxSearcherAgeSec", "Non-current searchers older than this are pruned.", new FloatType(), 60.0f),
+        new Param("index.ramBufferSizeMB", "Size (in MB) of IndexWriter's RAM buffer.", new FloatType(), 16.0f));
+
+  @Override
+  public StructType getType() {
+    return TYPE;
+  }
+
+  @Override
+  public String getTopDoc() {
+    return "Change global offline or online settings for this index.";
+  }
+
+  public LiveSettingsHandler(GlobalState state) {
+    super(state);
+  }
+
+  @Override
+  public FinishRequest handle(final IndexState state, Request r, Map<String,List<String>> params) throws Exception {
+
+    // TODO: should this be done inside finish?  Ie, so it's
+    // "all or no change"?
+    if (r.hasParam("maxRefreshSec")) {
+      state.setMaxRefreshSec(r.getFloat("maxRefreshSec"));
+    }
+    if (r.hasParam("minRefreshSec")) {
+      state.setMinRefreshSec(r.getFloat("minRefreshSec"));
+    }
+    if (r.hasParam("maxSearcherAgeSec")) {
+      state.setMaxSearcherAgeSec(r.getFloat("maxSearcherAgeSec"));
+    }
+    if (r.hasParam("index.ramBufferSizeMB")) {
+      state.setIndexRAMBufferSizeMB(r.getFloat("index.ramBufferSizeMB"));
+    }
+
+    return new FinishRequest() {
+      @Override
+      public String finish() {
+        return state.getLiveSettingsJSON();
+      }
+    };
+  }
+}

Added: lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/LiveValuesHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/LiveValuesHandler.java?rev=1553272&view=auto
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/LiveValuesHandler.java (added)
+++ lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/LiveValuesHandler.java Tue Dec 24 13:39:22 2013
@@ -0,0 +1,83 @@
+package org.apache.lucene.server.handlers;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.server.FieldDef;
+import org.apache.lucene.server.FinishRequest;
+import org.apache.lucene.server.GlobalState;
+import org.apache.lucene.server.IndexState;
+import org.apache.lucene.server.StringLiveFieldValues;
+import org.apache.lucene.server.params.ListType;
+import org.apache.lucene.server.params.Param;
+import org.apache.lucene.server.params.Request;;
+import org.apache.lucene.server.params.StringType;
+import org.apache.lucene.server.params.StructType;
+import org.apache.lucene.server.params.Type;
+import net.minidev.json.JSONArray;
+import net.minidev.json.JSONObject;
+
+public class LiveValuesHandler extends Handler {
+  private final static StructType TYPE = new StructType (
+                                             new Param("indexName", "Index name", new StringType()),
+                                             new Param("ids", "List of ids to retrieve", new ListType(new StringType())),
+                                             new Param("field", "Which field value to look up", new StringType()));
+  @Override
+  public String getTopDoc() {
+    return "Lookup live field values.";
+  }
+
+  @Override
+  public StructType getType() {
+    return TYPE;
+  }
+
+  public LiveValuesHandler(GlobalState state) {
+    super(state);
+  }
+
+  @Override
+  public FinishRequest handle(final IndexState state, final Request r, Map<String,List<String>> params) throws Exception {
+    FieldDef fd = state.getField(r, "field");
+
+    if (fd.liveValuesIDField == null) {
+      r.fail("field", "field \"" + fd.liveValuesIDField + "\" was not registered with liveValues");
+    }
+
+    final List<Object> idValues = r.getList("ids");
+
+    final StringLiveFieldValues lv = state.liveFieldValues.get(fd.name);
+    assert lv != null;
+
+    return new FinishRequest() {
+      @Override
+      public String finish() throws IOException {
+        JSONObject result = new JSONObject();
+        JSONArray arr = new JSONArray();
+        result.put("values", arr);
+        for(Object o : idValues) {
+          arr.add(lv.get((String) o));
+        }
+        return result.toString();
+      }
+    };
+  }  
+}

Added: lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java?rev=1553272&view=auto
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java (added)
+++ lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java Tue Dec 24 13:39:22 2013
@@ -0,0 +1,1076 @@
+package org.apache.lucene.server.handlers;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.text.Collator;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ar.ArabicAnalyzer;
+import org.apache.lucene.analysis.ar.ArabicStemFilter;
+import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
+import org.apache.lucene.analysis.br.BrazilianAnalyzer;
+import org.apache.lucene.analysis.ca.CatalanAnalyzer;
+import org.apache.lucene.analysis.cjk.CJKAnalyzer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.de.GermanAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
+import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
+import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
+import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.eu.BasqueAnalyzer;
+import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
+import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig;
+import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
+import org.apache.lucene.analysis.icu.segmentation.ICUTokenizerConfig;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.pattern.PatternTokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.synonym.SynonymFilter;
+import org.apache.lucene.analysis.synonym.SynonymMap;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.collation.CollationKeyAnalyzer;
+import org.apache.lucene.document.FieldType.NumericType;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.suggest.analyzing.SuggestStopFilter;
+import org.apache.lucene.server.FieldDef;
+import org.apache.lucene.server.FinishRequest;
+import org.apache.lucene.server.GlobalState;
+import org.apache.lucene.server.IndexState;
+import org.apache.lucene.server.params.*;
+import org.apache.lucene.server.params.PolyType.PolyEntry;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.Version;
+import net.minidev.json.JSONObject;
+import net.minidev.json.JSONValue;
+import net.minidev.json.parser.ParseException;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.RuleBasedBreakIterator;
+
+public class RegisterFieldHandler extends Handler {
+
+  private final static List<Object> DEFAULT_ENGLISH_STOP_WORDS = new ArrayList<Object>();
+  static {
+    for(Object o : EnglishAnalyzer.getDefaultStopSet()) {
+      DEFAULT_ENGLISH_STOP_WORDS.add(new String((char[]) o));
+    }
+  }
+
+  private final static List<Object> DEFAULT_ARABIC_STOP_WORDS = new ArrayList<Object>();
+  static {
+    for(Object o : ArabicAnalyzer.getDefaultStopSet()) {
+      DEFAULT_ARABIC_STOP_WORDS.add(new String((char[]) o));
+    }
+  }
+
+  private final static List<Object> DEFAULT_ARMENIAN_STOP_WORDS = new ArrayList<Object>();
+  static {
+    for(Object o : ArmenianAnalyzer.getDefaultStopSet()) {
+      DEFAULT_ARMENIAN_STOP_WORDS.add(new String((char[]) o));
+    }
+  }
+
+  private final static List<Object> DEFAULT_BASQUE_STOP_WORDS = new ArrayList<Object>();
+  static {
+    for(Object o : BasqueAnalyzer.getDefaultStopSet()) {
+      DEFAULT_BASQUE_STOP_WORDS.add(new String((char[]) o));
+    }
+  }
+
+  private final static List<Object> DEFAULT_BRAZILIAN_STOP_WORDS = new ArrayList<Object>();
+  static {
+    for(Object o : BrazilianAnalyzer.getDefaultStopSet()) {
+      DEFAULT_BRAZILIAN_STOP_WORDS.add(new String((char[]) o));
+    }
+  }
+
+  private final static List<Object> DEFAULT_BULGARIAN_STOP_WORDS = new ArrayList<Object>();
+  static {
+    for(Object o : BulgarianAnalyzer.getDefaultStopSet()) {
+      DEFAULT_BULGARIAN_STOP_WORDS.add(new String((char[]) o));
+    }
+  }
+
+  private final static List<Object> DEFAULT_CJK_STOP_WORDS = new ArrayList<Object>();
+  static {
+    for(Object o : CJKAnalyzer.getDefaultStopSet()) {
+      DEFAULT_CJK_STOP_WORDS.add(new String((char[]) o));
+    }
+  }
+
+  private final static List<Object> DEFAULT_CATALAN_STOP_WORDS = new ArrayList<Object>();
+  static {
+    for(Object o : CatalanAnalyzer.getDefaultStopSet()) {
+      DEFAULT_CATALAN_STOP_WORDS.add(new String((char[]) o));
+    }
+  }
+
+  private final static List<Object> DEFAULT_GERMAN_STOP_WORDS = new ArrayList<Object>();
+  static {
+    for(Object o : GermanAnalyzer.getDefaultStopSet()) {
+      DEFAULT_GERMAN_STOP_WORDS.add(new String((char[]) o));
+    }
+  }
+
+  // Breaks the recursion:
+  private final static WrapType ANALYZER_TYPE_WRAP = new WrapType();
+
+  public final static Param MATCH_VERSION_PARAM = new Param("matchVersion", "Lucene version to match.", new EnumType("LUCENE_40", "LUCENE_40",
+                                                                                                                     "LUCENE_41", "LUCENE_41",
+                                                                                                                     "LUCENE_42", "LUCENE_42",
+                                                                                                                     "LUCENE_43", "LUCENE_43"));
+
+  final static Type ANALYZER_TYPE =
+    new StructType(
+                   // nocommit cutover to PolyType
+                   new Param("class",
+                             "An existing Analyzer class.  Use either this, or define your own analysis chain by setting tokenizer and tokenFilter.",
+                             new PolyType(Analyzer.class,
+                                          new PolyEntry("ArabicAnalyzer", "Analyzer for Arabic (see @lucene:analyzers-common:org.apache.lucene.analysis.ar.ArabicAnalyzer)",
+                                                        new Param("stopWords", "Stop words to remove during analysis",
+                                                                  new ListType(new StringType()), DEFAULT_ARABIC_STOP_WORDS),
+                                                        new Param("stemExclusionSet", "A set of terms not to be stemmed",
+                                                                  new ListType(new StringType()))),
+                                          new PolyEntry("ArmenianAnalyzer", "Analyzer for Armenian. (see @lucene:analyzers-common:org.apache.lucene.analysis.hy.ArmenianAnalyzer)",
+                                                        new Param("stopWords", "Stop words to remove during analysis",
+                                                                  new ListType(new StringType()), DEFAULT_ARMENIAN_STOP_WORDS),
+                                                        new Param("stemExclusionSet", "A set of terms not to be stemmed",
+                                                                  new ListType(new StringType()))),
+                                          new PolyEntry("BasqueAnalyzer", "Analyzer for Basque. (see @lucene:analyzers-common:org.apache.lucene.analysis.eu.BasqueAnalyzer)",
+                                                        new Param("stopWords", "Stop words to remove during analysis",
+                                                                  new ListType(new StringType()), DEFAULT_BASQUE_STOP_WORDS),
+                                                        new Param("stemExclusionSet", "A set of terms not to be stemmed",
+                                                                  new ListType(new StringType()))),
+                                          new PolyEntry("BrazilianAnalyzer", "Analyzer for Brazilian Portuguese language (see @lucene:analyzers-common:org.apache.lucene.analysis.br.BrazilianAnalyzer)",
+                                                        new Param("stopWords", "Stop words to remove during analysis",
+                                                                  new ListType(new StringType()), DEFAULT_BRAZILIAN_STOP_WORDS),
+                                                        new Param("stemExclusionSet", "A set of terms not to be stemmed",
+                                                                  new ListType(new StringType()))),
+                                          new PolyEntry("BulgarianAnalyzer", "Analyzer for Bulgarian (see @lucene:analyzers-common:org.apache.lucene.analysis.bg.BulgarianAnalyzer)",
+                                                        new Param("stopWords", "Stop words to remove during analysis",
+                                                                  new ListType(new StringType()), DEFAULT_BULGARIAN_STOP_WORDS),
+                                                        new Param("stemExclusionSet", "A set of terms not to be stemmed",
+                                                                  new ListType(new StringType()))),
+                                          new PolyEntry("CatalanAnalyzer", "Analyzer for Catalan (see @lucene:analyzers-common:org.apache.lucene.analysis.ca.CatalanAnalyzer)",
+                                                        new Param("stopWords", "Stop words to remove during analysis",
+                                                                  new ListType(new StringType()), DEFAULT_BULGARIAN_STOP_WORDS),
+                                                        new Param("stemExclusionSet", "A set of terms not to be stemmed",
+                                                                  new ListType(new StringType()))),
+                                          new PolyEntry("CJKAnalyzer", "An Analyzer that tokenizes text with StandardTokenizer, normalizes content with CJKWidthFilter, folds case with LowerCaseFilter, forms bigrams of CJK with CJKBigramFilter, and filters stopwords with StopFilter (see @lucene:analyzers-common:org.apache.lucene.analysis.cjk.CJKAnalyzer)",
+                                                        new Param("stopWords", "Stop words to remove during analysis",
+                                                                  new ListType(new StringType()), DEFAULT_CJK_STOP_WORDS)),
+                                          new PolyEntry("CollationKeyAnalyzer", "<p> Configures KeywordTokenizer with CollationAttributeFactory (see @lucene:analyzers-common:org.apache.lucene.collation.CollationKeyAnalyzer)",
+                                                        new Param("locale", "Locale", SearchHandler.LOCALE_TYPE)),
+                                          new PolyEntry("EnglishAnalyzer", "Analyzer for English. (see @lucene:analyzers-common:org.apache.lucene.analysis.en.EnglishAnalyzer)",
+                                                        new Param("stopWords", "Stop words to remove during analysis.",
+                                                                  new ListType(new StringType()), DEFAULT_ENGLISH_STOP_WORDS),
+                                                        new Param("stemExclusionSet", "A set of terms not to be stemmed",
+                                                                  new ListType(new StringType()))),
+                                          new PolyEntry("GermanAnalyzer", "Analyzer for German language (see @lucene:analyzers-common:org.apache.lucene.analysis.de.GermanAnalyzer)",
+                                                        new Param("stopWords", "Stop words to remove during analysis.",
+                                                                  new ListType(new StringType()), DEFAULT_GERMAN_STOP_WORDS),
+                                                        new Param("stemExclusionSet", "A set of terms not to be stemmed",
+                                                                  new ListType(new StringType()))),
+                                          new PolyEntry("StandardAnalyzer", "Filters StandardTokenizer with StandardFilter, LowerCaseFilter and StopFilter, using a list of English stop words (see @lucene:analyzers-common:org.apache.lucene.analysis.standard.StandardAnalyzer)",
+                                                        new Param("maxTokenLength", "Max token length.", new IntType(), StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH),
+                                                        new Param("stopWords", "Stop words to remove during analysis.",
+                                                                  new ListType(new StringType()), DEFAULT_ENGLISH_STOP_WORDS)),
+                                          new PolyEntry("WhitespaceAnalyzer", "An Analyzer that uses WhitespaceTokenizer (see @lucene:analyzers-common:org.apache.lucene.analysis.core.WhitespaceAnalyzer)")),
+                             "StandardAnalyzer"),
+                   new Param("positionIncrementGap", "How many positions to insert between separate values in a multi-valued field", new IntType(), 0),
+                   new Param("offsetGap", "How many offsets to insert between separate values in a multi-valued field", new IntType(), 1),
+                   new Param("tokenizer", "Tokenizer class (for a custom analysis chain).",
+                             new StructType(
+                                 new Param("class",
+                                           "Tokenizer class",
+                                           new PolyType(Tokenizer.class,
+                                                        new PolyEntry("WhitespaceTokenizer", "A WhitespaceTokenizer is a tokenizer that divides text at whitespace (see @lucene:analyzers-common:org.apache.lucene.analysis.core.WhitespaceTokenizer)", new StructType()),
+                                                        new PolyEntry("StandardTokenizer", "A grammar-based tokenizer constructed with JFlex (see @lucene:analyzers-common:org.apache.lucene.analysis.standard.StandardTokenizer)",
+                                                            new Param("maxTokenLength", "Max length of each token", new IntType(), StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH)),
+                                                        new PolyEntry("PatternTokenizer", "This tokenizer uses regex pattern matching to construct distinct tokens for the input stream (see @lucene:analyzers-common:org.apache.lucene.analysis.pattern.PatternTokenizer)",
+                                                            new Param("pattern", "Regular expression pattern", new StringType()),
+                                                                      new Param("group", "Group index for the tokens (-1 to do 'split')", new IntType(), -1)),
+                                                        new PolyEntry("ICUTokenizer", "Breaks text into words according to UAX #29: Unicode Text Segmentation (http://www.unicode.org/reports/tr29",
+                                                            new Param("rules", "Customize the tokenization per-script",
+                                                               new ListType(
+                                                                   new StructType(
+                                                                       new Param("script", "Script", new StringType()),
+                                                                       new Param("rules", "Rules", new StringType()))))))))),
+                   new Param("tokenFilters", "Optional list of TokenFilters to apply after the Tokenizer",
+                             new ListType(
+                                 new StructType(
+                                     new Param("class", "TokenFilter class",
+                                         new PolyType(TokenFilter.class,
+                                                      new PolyEntry("ArabicStemFilter", "A TokenFilter that applies ArabicStemmer to stem Arabic words. (see @lucene:analyzers-common:org.apache.lucene.analysis.ar.ArabicStemFilter)"),
+                                                      new PolyEntry("StandardFilter", "Normalizes tokens extracted with StandardTokenizer. (see @lucene:analyzers-common:org.apache.lucene.analysis.standard.StandardFilter)"),
+                                                      new PolyEntry("EnglishPossessiveFilter", "TokenFilter that removes possessives (trailing 's) from words (see @lucene:analyzers-common:org.apache.lucene.analysis.en.EnglishPossessiveFilter)"),
+                                                      new PolyEntry("PorterStemFilter", "Transforms the token stream as per the Porter stemming algorithm (see @lucene:analyzers-common:org.apache.lucene.analysis.en.PorterStemFilter)"),
+                                                      new PolyEntry("SuggestStopFilter", "Like {@link StopFilter} except it will not remove the last token if that token was not followed by some token separator.",
+                                                                    new Param("stopWords", "Stop words to remove during analysis",
+                                                                        new ListType(new StringType()), DEFAULT_ENGLISH_STOP_WORDS)),
+                                                      new PolyEntry("EnglishMinimalStemFilter", "A TokenFilter that applies EnglishMinimalStemmer to stem English words (see @lucene:analyzers-common:org.apache.lucene.analysis.en.EnglishMinimalStemFilter)"),
+                                                      new PolyEntry("SetKeywordMarkerFilter", "Marks terms as keywords via the KeywordAttribute (see @lucene:analyzers-common:org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter)", new Param("keyWords", "List of tokens to mark as keywords", new ListType(new StringType()))),
+                                                      new PolyEntry("StopFilter", "Removes stop words from a token stream (see @lucene:analyzers-common:org.apache.lucene.analysis.core.StopFilter)",
+                                                          new Param("stopWords", "Stop words to remove during analysis",
+                                                                    new ListType(new StringType()), DEFAULT_ENGLISH_STOP_WORDS)),
+                                                      new PolyEntry("SynonymFilter", "Matches single- or multi-word synonyms and injects or replaces the match with a corresponding synonym",
+                                                          //new Param("synonymFile", "Local file to load synonyms from", new StringType()),   // nocommit TODO
+                                                          new Param("ignoreCase", "True if matching should be case insensitive", new BooleanType(), true),
+                                                          new Param("analyzer", "Analyzer to use to tokenize synonym inputs", ANALYZER_TYPE_WRAP),
+                                                          new Param("synonyms", "Specify synonyms inline (instead of synonymFile)",
+                                                              new ListType(
+                                                                  new StructType(
+                                                                      new Param("input", "String or list of strings with input token(s) to match", new OrType(new ListType(new StringType()), new StringType())),
+                                                                      // TODO: allow more than one token on the output?
+                                                                      new Param("output", "Single token to replace the matched tokens with", new StringType()),
+                                                                      new Param("replace", "True if the input tokens should be replaced with the output token; false if the input tokens should be preserved and the output token overlaid", new BooleanType(), true))))),
+                                                      new PolyEntry("LowerCaseFilter", "Normalizes token text to lower case (see @lucene:analyzers-common:org.apache.lucene.analysis.core.LowerCaseFilter)", new StructType())))))),
+                   MATCH_VERSION_PARAM);
+
+  static {
+    ANALYZER_TYPE_WRAP.set(ANALYZER_TYPE);
+  }
+
+  // nocommit need not be separate TYPE anymore!
+  private static final StructType BM25_SIM_TYPE =
+    new StructType(new Param("k1", "Controls non-linear term frequency normalization (saturation).", new FloatType(), 1.2f),
+                   new Param("b", "Controls to what degree document length normalizes tf values.", new FloatType(), 0.75f));
+
+  // nocommit need not be separate TYPE anymore!
+  private static final StructType DEFAULT_SIM_TYPE = new StructType();
+
+  private final static StructType FIELD_TYPE =
+    new StructType(
+        new Param("type", "Type of the value.",
+                  new EnumType("text", "Text that's tokenized and indexed, with the index-time analyzer.",
+                               "atom", "Text that's indexed as a single token, with DOCS_ONLY and omitting norms.",
+                               "boolean", "Boolean value.",
+                               "float", "Float value.",
+                               "double", "Double value.",
+                               "int", "Int value.",
+                               "long", "Long value.",
+                               // TODO: this is hacked up now ... only supports fixed "recency" blending ... ideally we would accept
+                               // a custom equation and parse & execute that:
+                               "virtual", "Virtual (computed at search time) field, e.g. for blended sorting.")),
+        new Param("index", "True if the value should be indexed.", new BooleanType(), false),
+        new Param("tokenize", "True if the value should be tokenized.", new BooleanType(), true),
+        new Param("store", "True if the value should be stored.", new BooleanType(), false),
+        new Param("multiValued", "True if this field may sometimes have more than one value.", new BooleanType(), false),
+        new Param("highlight", "True if the value should be indexed for highlighting.", new BooleanType(), false),
+        new Param("postingsFormat", "Which PostingsFormat should be used to index this field.",
+                  new StringType(), "Lucene41"),
+        new Param("docValuesFormat", "Which DocValuesFormat should be used to index this field.",
+                  new StringType(), "Lucene45"),
+        new Param("sort", "True if the value should be indexed into doc values for sorting.", new BooleanType(), false),
+        new Param("group", "True if the value should be indexed into doc values for grouping.", new BooleanType(), false),
+        new Param("facet", "Whether this field should index facets, and how.",
+                  new EnumType("no", "No facets are indexed.",
+                               "flat", "Facets are indexed with no hierarchy.",
+                               "hierarchy", "Facets are indexed and are hierarchical.",
+                               "numericRange", "Compute facet counts for custom numeric ranges"),
+                  "no"),
+        new Param("storeDocValues", "Whether to index the value into doc values.", new BooleanType(), false),
+        new Param("liveValues", "Enable live values for this field: whenever this field is retrieved during a search, the live (most recetly added) value will always be returned; set this to the field name of your id (primary key) field.  Uses @lucene:core:org.apache.lucene.index.LiveFieldValues under the hood.", new StringType()),
+        new Param("numericPrecisionStep", "If the value is numeric, what precision step to use during indexing.", new IntType(), NumericUtils.PRECISION_STEP_DEFAULT),
+        new Param("omitNorms", "True if norms are omitted.", new BooleanType(), false),
+        new Param("analyzer", "Analyzer to use for this field during indexing and searching.", ANALYZER_TYPE),
+        new Param("indexAnalyzer", "Analyzer to use for this field during indexing.", ANALYZER_TYPE),
+        new Param("searchAnalyzer", "Analyzer to use for this field during searching.", ANALYZER_TYPE),
+        new Param("indexOptions", "How the tokens should be indexed.",
+                  new EnumType("docs", "Index only doc ids (for binary search).",
+                               "docsFreqs", "Index doc ids and term frequencies.",
+                               "docsFreqsPositions", "Index doc ids, term frequences and positions.",
+                               "docsFreqsPositionsOffsets", "Index doc ids, term frequencies, positions and offsets."),
+                  "docsFreqsPositions"),
+        new Param("recencyScoreBlend", "Only used with type=virtual, to describe how the virtual field blends with score.",
+                  new StructType(
+                                 new Param("timeStampField", "Field holding timestamp value (must be type long, with sort=true)", new StringType()),
+                                 new Param("maxBoost", "Maximum boost to apply to the relevance score (for the most recent matches)", new FloatType()),
+                                 new Param("range", "Age beyond which no boosting occurs", new LongType()))),
+        new Param("termVectors", "Whether/how term vectors should be indexed.",
+                  new EnumType("terms", "Index terms and freqs only.",
+                               "termsPositions", "Index terms, freqs and positions.",
+                               "termsPpositionsOffsets", "Index terms, freqs, positions and offsets.",
+                               "termsPositionsdOoffsetsPayloads", "Index terms, freqs, positions, offsets and payloads."
+                               )),
+        new Param("similarity", "Which Similarity implementation to use for this field.",
+                  new StructType(
+                                 new Param("class",
+                                           "Which Similarity class to use.",
+                                           new PolyType(Similarity.class,
+                                               new PolyEntry("DefaultSimilarity", "Expert: Default scoring implementation. (see @lucene:core:org.apache.lucene.search.similarities.DefaultSimilarity)", DEFAULT_SIM_TYPE),
+                                               new PolyEntry("BM25Similarity", "BM25 Similarity (see @lucene:core:org.apache.lucene.search.similarities.BM25Similarity)", BM25_SIM_TYPE)),
+                                           "DefaultSimilarity")))
+                   );
+
+  public final static StructType TYPE =
+    new StructType(
+        new Param("indexName", "Index name", new StringType()),
+        new Param("fields", "New fields to register",
+            new StructType(new Param("*", "Register this field name with the provided type.  Note that the field name must be of the form [a-zA-Z_][a-zA-Z_0-9]*.  You can register multiple fields in one request.", FIELD_TYPE))));
+
+  public RegisterFieldHandler(GlobalState state) {
+    super(state);
+  }
+
+  @Override
+  public String getTopDoc() {
+    return "Registers one or more fields.  Fields must be registered before they can be added in a document (via @addDocument).  Pass a struct whose keys are the fields names to register and whose values define the type for that field.  Any number of fields may be registered in a single request, and once a field is registered it cannot be changed (write-once).  <p>This returns the full set of fields currently registered.";
+  }
+
+  @Override
+  public StructType getType() {
+    return TYPE;
+  }
+
+  private FieldDef parseOneVirtualFieldType(Request r, IndexState state, Map<String,FieldDef> pendingFieldDefs, String name, JSONObject o) {
+    Request r2 = r.getStruct("recencyScoreBlend");
+    String timeStampField = r2.getString("timeStampField");
+    FieldDef fd;
+    try {
+      fd = state.getField(timeStampField);
+    } catch (IllegalArgumentException iae) {
+      fd = pendingFieldDefs.get(timeStampField);
+      if (fd == null) {
+        r2.fail("timeStampField", "field \"" + timeStampField + "\" was not yet registered");
+      }
+    }
+    if (fd.fieldType.docValueType() != DocValuesType.NUMERIC) {
+      r2.fail("timeStampField", "field \"" + fd.name + "\" must be registered with type=long and sort=true");
+    }
+    float maxBoost = r2.getFloat("maxBoost");
+    long range = r2.getLong("range");
+    return new FieldDef(name, null, "virtual", null, null, null, true, null, null, null, false, null, fd.name, maxBoost, range);
+  }
+
+  private FieldDef parseOneFieldType(Request r, IndexState state, Map<String,FieldDef> pendingFieldDefs, String name, JSONObject o) {
+    Request f = new Request(r, name, o, FIELD_TYPE);
+
+    // TODO: need to somehow get the field name into here so
+    // error messages tell me which field is wrong
+    String type = f.getEnum("type");
+    if (type.equals("virtual")) {
+      return parseOneVirtualFieldType(f, state, pendingFieldDefs, name, o);
+    }
+
+    FieldType ft = new FieldType();
+
+    boolean dv = f.getBoolean("storeDocValues");
+    boolean sorted = f.getBoolean("sort");
+    boolean grouped = f.getBoolean("group");
+
+    // nocommit: user must say which highlighter?  ie, we
+    // may index offsets into postings, or term vectors...
+    boolean highlighted = f.getBoolean("highlight");
+
+    if (highlighted && !type.equals("text") && !type.equals("atom")) {
+      f.fail("highlighted", "only type=text or type=atom fields can be highlighted");
+    }
+
+    boolean singleValued = !f.getBoolean("multiValued");
+    if (!singleValued) {
+      if (sorted) {
+        f.fail("multiValued", "field \"" + name + "\": cannot sort on multiValued fields");
+      }
+      if (grouped) {
+        f.fail("multiValued", "field \"" + name + "\": cannot group on multiValued fields");
+      }
+    }      
+
+    if (type.equals("text")) {
+      ft.setIndexed(true);
+      ft.setTokenized(true);
+      if (sorted || grouped) {
+        ft.setDocValueType(DocValuesType.SORTED);
+      } else if (dv) {
+        ft.setDocValueType(DocValuesType.BINARY);
+      }
+      if (highlighted) {
+        ft.setStored(true);
+        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+      }
+    } else if (type.equals("atom")) {
+      if (f.hasParam("analyzer")) {
+        f.fail("analyzer", "no analyzer allowed with atom (it's hardwired to KeywordAnalyzer internally)");
+      }
+      ft.setIndexed(true);
+      ft.setIndexOptions(IndexOptions.DOCS_ONLY);
+      ft.setOmitNorms(true);
+      ft.setTokenized(false);
+      if (sorted || grouped) {
+        ft.setDocValueType(DocValuesType.SORTED);
+      } else if (grouped || dv) {
+        ft.setDocValueType(DocValuesType.BINARY);
+      }
+    } else if (type.equals("boolean")) {
+      if (dv || sorted || grouped) {
+        ft.setDocValueType(DocValuesType.NUMERIC);
+      }
+    } else if (type.equals("long")) {
+      if (dv || sorted || grouped) {
+        ft.setDocValueType(DocValuesType.NUMERIC);
+      }
+    } else if (type.equals("int")) {
+      if (dv || sorted || grouped) {
+        ft.setDocValueType(DocValuesType.NUMERIC);
+      }
+    } else if (type.equals("double")) {
+      if (dv || sorted || grouped) {
+        ft.setDocValueType(DocValuesType.NUMERIC);
+      }
+    } else if (type.equals("float")) {
+      if (dv || sorted || grouped) {
+        ft.setDocValueType(DocValuesType.NUMERIC);
+      }
+    } else {
+      assert false;
+    }
+
+    if (f.hasParam("store")) {
+      ft.setStored(f.getBoolean("store"));
+      if (!ft.stored() && highlighted) {
+        f.fail("store", "store cannot be False when highlighted is True");
+      }
+    }
+
+    if (f.hasParam("index")) {
+      ft.setIndexed(f.getBoolean("index"));
+    }
+
+    if (f.hasParam("analyzer") && !ft.indexed()) {
+      f.fail("analyzer", "no analyzer allowed when indexed is false");
+    }
+
+    // TODO: multi-valued fields
+
+    if (type.equals("text") || type.equals("atom")) {
+
+      if (ft.indexed()) {
+        if (f.hasParam("tokenize")) {
+          ft.setTokenized(f.getBoolean("tokenize"));
+        }
+        if (f.hasParam("omitNorms")) {
+          ft.setOmitNorms(f.getBoolean("omitNorms"));
+        }
+
+        if (f.hasParam("termVectors")) {
+          String tv = f.getString("termVectors");
+          if (tv.equals("terms")) {
+            ft.setStoreTermVectors(true);
+          } else if (tv.equals("termsPositions")) {
+            ft.setStoreTermVectors(true);
+            ft.setStoreTermVectorPositions(true);
+          } else if (tv.equals("termsPositionsOffsets")) {
+            ft.setStoreTermVectors(true);
+            ft.setStoreTermVectorPositions(true);
+            ft.setStoreTermVectorOffsets(true);
+          } else if (tv.equals("termsPositionsOffsetsPayloads")) {
+            ft.setStoreTermVectors(true);
+            ft.setStoreTermVectorPositions(true);
+            ft.setStoreTermVectorOffsets(true);
+            ft.setStoreTermVectorPayloads(true);
+          } else {
+            assert false;
+          }
+        }
+
+        if (f.hasParam("indexOptions")) {
+          String io = f.getString("indexOptions");
+          if (io.equals("docs")) {
+            ft.setIndexOptions(IndexOptions.DOCS_ONLY);
+          } else if (io.equals("docsFreqs")) {
+            ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+          } else if (io.equals("docsFreqsPositions")) {
+            ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+          } else if (io.equals("docsFreqsPositionsAndOffsets")) {
+            ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+          } else {
+            assert false;
+          }
+        }
+      }
+    } else if (type.equals("boolean")) {
+      ft.setIndexed(true);
+      ft.setOmitNorms(true);
+      ft.setTokenized(false);
+      ft.setIndexOptions(IndexOptions.DOCS_ONLY);
+    } else if (ft.indexed()) {
+      ft.setNumericPrecisionStep(f.getInt("numericPrecisionStep"));
+      if (type.equals("float")) {
+        ft.setNumericType(NumericType.FLOAT);
+      } else if (type.equals("double")) {
+        ft.setNumericType(NumericType.DOUBLE);
+      } else if (type.equals("long")) {
+        ft.setNumericType(NumericType.LONG);
+      } else {
+        assert type.equals("int"): "type=" + type;
+        ft.setNumericType(NumericType.INT);
+      }
+    }
+
+    String pf = f.getString("postingsFormat");
+    if (PostingsFormat.forName(pf) == null) {
+      f.fail("postingsFormat", "unrecognized postingsFormat \"" + pf + "\"");
+    }
+    String dvf = f.getString("docValuesFormat");
+    if (DocValuesFormat.forName(dvf) == null) {
+      f.fail("docValuesFormat", "unrecognized docValuesFormat \"" + dvf + "\"");
+    }
+
+    Similarity sim;
+    if (f.hasParam("similarity")) {
+      Request s = f.getStruct("similarity");
+      Request.PolyResult pr = s.getPoly("class");
+      if (pr.name.equals("DefaultSimilarity")) {
+        sim = new DefaultSimilarity();
+      } else if (pr.name.equals("BM25Similarity")) {
+        sim = new BM25Similarity(pr.r.getFloat("k1"), pr.r.getFloat("b"));
+      } else {
+        assert false;
+        sim = null;
+      }
+    } else {
+      sim = new DefaultSimilarity();
+    }
+
+    Analyzer indexAnalyzer;
+    Analyzer searchAnalyzer;
+    Analyzer analyzer = getAnalyzer(state.matchVersion, f, "analyzer");
+    if (analyzer != null) {
+      indexAnalyzer = searchAnalyzer = analyzer;
+    } else {
+      indexAnalyzer = getAnalyzer(state.matchVersion, f, "indexAnalyzer");
+      searchAnalyzer = getAnalyzer(state.matchVersion, f, "searchAnalyzer");
+    }
+
+    if (type.equals("text") && ft.indexed()) {
+      if (indexAnalyzer == null) {
+        f.fail("indexAnalyzer", "field=\"" + name + "\": either analyzer or indexAnalyzer must be specified for an indexed text field");
+      }
+      if (searchAnalyzer == null) {
+        f.fail("searchAnalyzer", "field=\"" + name + "\": either analyzer or searchAnalyzer must be specified for an indexed text field");
+      }
+    }
+
+    if (indexAnalyzer == null) {
+      indexAnalyzer = dummyAnalyzer;
+    }
+
+    if (searchAnalyzer == null) {
+      searchAnalyzer = searchAnalyzer;
+    }
+
+    String liveValuesIDField;
+    if (f.hasParam("liveValues")) {
+      // nocommit: sort of silly you cannot register id & live
+      // fields in same request...
+      FieldDef idField = state.getField(f, "liveValues");
+      liveValuesIDField = idField.name;
+      if (!type.equals("atom") && !type.equals("atom")) {
+        f.fail("liveValues", "only type=atom or type=text fields may have liveValues enabled");
+      }
+      if (!singleValued) {
+        f.fail("liveValues", "liveValues fields must not be multiValued");
+      }
+      if (!ft.stored()) {
+        f.fail("liveValues", "this field is not stored");
+      }
+      if (!idField.fieldType.stored()) {
+        f.fail("liveValues", "id field \"" + liveValuesIDField + "\" is not stored");
+      }
+      if (!idField.singleValued) {
+        f.fail("liveValues", "id field \"" + liveValuesIDField + "\" must not be multiValued");
+      }
+      if (!idField.valueType.equals("atom") && !idField.valueType.equals("text")) {
+        f.fail("liveValues", "id field \"" + liveValuesIDField + "\" must have type=atom or type=text");
+      }
+      // TODO: we could relax this, since
+      // PostingsHighlighter lets you pull from "private"
+      // source:
+      if (highlighted) {
+        f.fail("liveValues", "cannot highlight live fields");
+      }
+    } else {
+      liveValuesIDField = null;
+    }
+
+    String facet = f.getEnum("facet");
+    if (facet.equals("hierarchy") && type.equals("atom") && (ft.indexed() || ft.stored())) {
+      f.fail("facet", "facet=hierarchy fields cannot have type atom if it's indexed or stored");
+    }
+    if (facet.equals("numericRange")) {
+      if (!type.equals("long") && !type.equals("int") && !type.equals("float") && !type.equals("double")) {
+        f.fail("facet", "numericRange facets only applies to numeric types");
+      }
+      if (!ft.indexed()) {
+        f.fail("index", "facet=numericRange fields must have index=true");
+      }
+      // We index the field as NumericField, for drill-down, and store doc values, for dynamic facet counting
+      ft.setDocValueType(DocValuesType.NUMERIC);
+    }
+
+    ft.freeze();
+
+    return new FieldDef(name, ft, type, facet, pf, dvf, singleValued, sim, indexAnalyzer, searchAnalyzer, highlighted, liveValuesIDField, null, 0.0f, 0l);
+  }
+
+  /** Messy: we need this for indexed-but-not-tokenized
+   *  fields, solely for .getOffsetGap I think. */
+  public final static Analyzer dummyAnalyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        throw new UnsupportedOperationException();
+      }
+    };
+
+  private static List<String> toStringList(List<Object> l) {
+    List<String> words = new ArrayList<String>();
+    for(Object o : l) {
+      words.add((String) o);
+    }
+    return words;
+  }
+
+  final static Pattern COMMENTS_PATTERN = Pattern.compile("#.*$", Pattern.MULTILINE);
+
+  static TokenStreamComponents buildCustomAnalysisChain(Version matchVersion, Request chain, Reader reader) {
+
+    Request t = chain.getStruct("tokenizer");
+
+    Request.PolyResult pr = t.getPoly("class");
+
+    // nocommit charFilters
+
+    Tokenizer tokenizer;
+    // nocommit use java7 string switch:
+    if (pr.name.equals("StandardTokenizer")) {
+      tokenizer = new StandardTokenizer(matchVersion, reader);
+      ((StandardTokenizer) tokenizer).setMaxTokenLength(pr.r.getInt("maxTokenLength"));
+    } else if (pr.name.equals("WhitespaceTokenizer")) {
+      tokenizer = new WhitespaceTokenizer(matchVersion, reader);
+    } else if (pr.name.equals("PatternTokenizer")) {
+      Pattern p;
+      try {
+        p = Pattern.compile(pr.r.getString("pattern"));
+      } catch (PatternSyntaxException pse) {
+        pr.r.fail("pattern", "failed to compile Pattern", pse);
+        // Dead code but compiler disagrees:
+        p = null;
+      }
+      tokenizer = new PatternTokenizer(reader, p, pr.r.getInt("group"));
+    } else if (pr.name.equals("ICUTokenizer")) {
+      final BreakIterator breakers[];
+      if (pr.r.hasParam("rules")) {
+        breakers = new BreakIterator[UScript.CODE_LIMIT];
+        for(Object o : pr.r.getList("rules")) {
+          Request r2 = (Request) o;
+          String script = r2.getString("script");
+          String rules = r2.getString("rules");
+          rules = COMMENTS_PATTERN.matcher(rules).replaceAll("");
+          int code;
+          try {
+            code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, script);
+          } catch (IllegalArgumentException iae) {
+            r2.fail("script", "failed to parse as script code: " + iae.getMessage());
+            // Dead code but compiler disagrees:
+            code = -1;
+          }
+          try {
+            breakers[code] = new RuleBasedBreakIterator(rules);
+          } catch (IllegalArgumentException iae) {
+            r2.fail("rules", "failed to parse rules: " + iae.getMessage());
+          }
+        }
+      } else {
+        breakers = null;
+      }
+
+      ICUTokenizerConfig config = new DefaultICUTokenizerConfig() {
+        
+        @Override
+        public BreakIterator getBreakIterator(int script) {
+          if (breakers[script] != null) {
+            return (BreakIterator) breakers[script].clone();
+          } else {
+            return super.getBreakIterator(script);
+          }
+        }
+
+        // TODO: we could also allow codes->types mapping
+      };
+
+      tokenizer = new ICUTokenizer(reader, config);
+
+    } else {
+      // BUG
+      tokenizer = null;
+      assert false;
+    }
+
+    TokenStream last = tokenizer;
+    if (chain.hasParam("tokenFilters")) {
+      for(Object o : chain.getList("tokenFilters")) {
+        Request sub = (Request) o;
+        pr = sub.getPoly("class");
+        // nocommit use java7 string switch:
+        if (pr.name.equals("StandardFilter")) {
+          last = new StandardFilter(matchVersion, last);
+        } else if (pr.name.equals("EnglishPossessiveFilter")) {
+          last = new EnglishPossessiveFilter(matchVersion, last);
+        } else if (pr.name.equals("PorterStemFilter")) {
+          last = new PorterStemFilter(last);
+        } else if (pr.name.equals("ArabicStemFilter")) {
+          last = new ArabicStemFilter(last);
+        } else if (pr.name.equals("EnglishMinimalStemFilter")) {
+          last = new EnglishMinimalStemFilter(last);
+        } else if (pr.name.equals("LowerCaseFilter")) {
+          last = new LowerCaseFilter(matchVersion, last);
+        } else if (pr.name.equals("SetKeywordMarkerFilter")) {
+          CharArraySet set = new CharArraySet(matchVersion, toStringList(pr.r.getList("keyWords")), false);
+          last = new SetKeywordMarkerFilter(last, set);
+        } else if (pr.name.equals("StopFilter")) {
+          CharArraySet set = new CharArraySet(matchVersion, toStringList(pr.r.getList("stopWords")), false);
+          last = new StopFilter(matchVersion, last, set);
+        } else if (pr.name.equals("SuggestStopFilter")) {
+          CharArraySet set = new CharArraySet(matchVersion, toStringList(pr.r.getList("stopWords")), false);
+          last = new SuggestStopFilter(last, set);
+        } else if (pr.name.equals("SynonymFilter")) {
+          Analyzer a = getAnalyzer(matchVersion, pr.r, "analyzer");
+          if (a == null) {
+            pr.r.fail("analyzer", "analyzer is required");
+          }
+
+          try {
+            SynonymMap.Parser parser = new SynonymMap.Parser(true, a) {
+                @Override
+                public void parse(Reader in) throws IOException {
+                  // nocommit move parsing in here!
+                };
+              };
+
+            CharsRef scratch = new CharsRef();
+            CharsRef scratchOutput = new CharsRef();
+            for(Object o2 : pr.r.getList("synonyms")) {
+              Request syn = (Request) o2;
+              boolean replace = syn.getBoolean("replace");
+              CharsRef output = new CharsRef(syn.getString("output"));
+              if (!syn.isString("input")) {
+                for(Object o3 : syn.getList("input")) {
+                  parser.add(parser.analyze((String) o3, scratch),
+                             output,
+                             !replace);
+                }
+              } else {
+                parser.add(parser.analyze(syn.getString("input"), scratch),
+                           output,
+                           !replace);
+              }
+            }
+            last = new SynonymFilter(last, parser.build(), pr.r.getBoolean("ignoreCase"));
+          } catch (IOException ioe) {
+            throw new RuntimeException(ioe);
+          }
+        } else {
+          assert false: "unrecognized: " + pr.name;
+        }
+      }
+    }
+
+    return new TokenStreamComponents(tokenizer, last);
+  }
+
+  private static class CustomAnalyzer extends Analyzer {
+    private final String json;
+    private final Version matchVersion;
+    private int positionIncrementGap;
+    private int offsetGap;
+
+    public CustomAnalyzer(Version matchVersion, String json) {
+      this.matchVersion = matchVersion;
+      this.json = json;
+    }
+
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+      JSONObject o;
+      try {
+        o = (JSONObject) JSONValue.parseStrict(json);
+      } catch (ParseException pe) {
+        // BUG
+        throw new RuntimeException(pe);
+      }
+      Request r = new Request(null, fieldName, o, (StructType) ANALYZER_TYPE);
+      positionIncrementGap = r.getInt("positionIncrementGap");
+      offsetGap = r.getInt("offsetGap");
+      return buildCustomAnalysisChain(matchVersion,
+                                      r,
+                                      reader);
+    }
+
+    @Override
+    public int getPositionIncrementGap(String fieldName) {
+      return positionIncrementGap;
+    }
+
+    @Override
+    public int getOffsetGap(String fieldName) {
+      return offsetGap;
+    }
+  }
+
+  static Analyzer getAnalyzer(Version matchVersionGlobal, Request f, String name) {
+    Analyzer analyzer;
+    if (f.hasParam(name)) {
+      Request a = f.getStruct(name);
+      String jsonOrig = a.toString();
+
+      Version matchVersion;
+      if (a.hasParam("matchVersion")) {
+        matchVersion = getVersion(a.getEnum("matchVersion"));
+      } else {
+        matchVersion = matchVersionGlobal;
+      }
+
+      if (a.hasParam("class")) {
+        // Predefined analyzer class:
+        Request.PolyResult pr = a.getPoly("class");
+        // TODO: try to "share" a single instance of
+        // each?  Analyzer can be costly!
+        if (pr.name.equals("StandardAnalyzer")) {
+          CharArraySet set = new CharArraySet(matchVersion, toStringList(pr.r.getList("stopWords")), false);
+          StandardAnalyzer sa = new StandardAnalyzer(matchVersion, set);
+          analyzer = sa;
+          sa.setMaxTokenLength(pr.r.getInt("maxTokenLength"));
+        } else if (pr.name.equals("EnglishAnalyzer")) {
+          CharArraySet stopWords = new CharArraySet(matchVersion, toStringList(pr.r.getList("stopWords")), false);
+          if (pr.r.hasParam("stemExclusionSet")) {
+            CharArraySet stemExclusions = new CharArraySet(matchVersion, toStringList(pr.r.getList("stemExclusionSet")), false);
+            analyzer = new EnglishAnalyzer(matchVersion, stopWords, stemExclusions);
+          } else {
+            analyzer = new EnglishAnalyzer(matchVersion, stopWords);
+          }
+        } else if (pr.name.equals("GermanAnalyzer")) {
+          CharArraySet stopWords = new CharArraySet(matchVersion, toStringList(pr.r.getList("stopWords")), false);
+          if (pr.r.hasParam("stemExclusionSet")) {
+            CharArraySet stemExclusions = new CharArraySet(matchVersion, toStringList(pr.r.getList("stemExclusionSet")), false);
+            analyzer = new GermanAnalyzer(matchVersion, stopWords, stemExclusions);
+          } else {
+            analyzer = new GermanAnalyzer(matchVersion, stopWords);
+          }
+        } else if (pr.name.equals("ArabicAnalyzer")) {
+          CharArraySet stopWords = new CharArraySet(matchVersion, toStringList(pr.r.getList("stopWords")), false);
+          if (pr.r.hasParam("stemExclusionSet")) {
+            CharArraySet stemExclusions = new CharArraySet(matchVersion, toStringList(pr.r.getList("stemExclusionSet")), false);
+            analyzer = new ArabicAnalyzer(matchVersion, stopWords, stemExclusions);
+          } else {
+            analyzer = new ArabicAnalyzer(matchVersion, stopWords);
+          }
+        } else if (pr.name.equals("ArmenianAnalyzer")) {
+          CharArraySet stopWords = new CharArraySet(matchVersion, toStringList(pr.r.getList("stopWords")), false);
+          if (pr.r.hasParam("stemExclusionSet")) {
+            CharArraySet stemExclusions = new CharArraySet(matchVersion, toStringList(pr.r.getList("stemExclusionSet")), false);
+            analyzer = new ArmenianAnalyzer(matchVersion, stopWords, stemExclusions);
+          } else {
+            analyzer = new ArmenianAnalyzer(matchVersion, stopWords);
+          }
+        } else if (pr.name.equals("BasqueAnalyzer")) {
+          CharArraySet stopWords = new CharArraySet(matchVersion, toStringList(pr.r.getList("stopWords")), false);
+          if (pr.r.hasParam("stemExclusionSet")) {
+            CharArraySet stemExclusions = new CharArraySet(matchVersion, toStringList(pr.r.getList("stemExclusionSet")), false);
+            analyzer = new BasqueAnalyzer(matchVersion, stopWords, stemExclusions);
+          } else {
+            analyzer = new BasqueAnalyzer(matchVersion, stopWords);
+          }
+        } else if (pr.name.equals("BrazilianAnalyzer")) {
+          CharArraySet stopWords = new CharArraySet(matchVersion, toStringList(pr.r.getList("stopWords")), false);
+          if (pr.r.hasParam("stemExclusionSet")) {
+            CharArraySet stemExclusions = new CharArraySet(matchVersion, toStringList(pr.r.getList("stemExclusionSet")), false);
+            analyzer = new BrazilianAnalyzer(matchVersion, stopWords, stemExclusions);
+          } else {
+            analyzer = new BrazilianAnalyzer(matchVersion, stopWords);
+          }
+        } else if (pr.name.equals("BulgarianAnalyzer")) {
+          CharArraySet stopWords = new CharArraySet(matchVersion, toStringList(pr.r.getList("stopWords")), false);
+          if (pr.r.hasParam("stemExclusionSet")) {
+            CharArraySet stemExclusions = new CharArraySet(matchVersion, toStringList(pr.r.getList("stemExclusionSet")), false);
+            analyzer = new BulgarianAnalyzer(matchVersion, stopWords, stemExclusions);
+          } else {
+            analyzer = new BulgarianAnalyzer(matchVersion, stopWords);
+          }
+        } else if (pr.name.equals("CatalanAnalyzer")) {
+          CharArraySet stopWords = new CharArraySet(matchVersion, toStringList(pr.r.getList("stopWords")), false);
+          if (pr.r.hasParam("stemExclusionSet")) {
+            CharArraySet stemExclusions = new CharArraySet(matchVersion, toStringList(pr.r.getList("stemExclusionSet")), false);
+            analyzer = new CatalanAnalyzer(matchVersion, stopWords, stemExclusions);
+          } else {
+            analyzer = new CatalanAnalyzer(matchVersion, stopWords);
+          }
+        } else if (pr.name.equals("CJKAnalyzer")) {
+          CharArraySet stopWords = new CharArraySet(matchVersion, toStringList(pr.r.getList("stopWords")), false);
+          analyzer = new CJKAnalyzer(matchVersion, stopWords);
+        } else if (pr.name.equals("CollationKeyAnalyzer")) {
+          Locale locale = SearchHandler.getLocale(pr.r.getStruct("locale"));
+          analyzer = new CollationKeyAnalyzer(matchVersion, Collator.getInstance(locale));
+        } else if (pr.name.equals("WhitespaceAnalyzer")) {
+          analyzer = new WhitespaceAnalyzer(matchVersion);
+        } else {
+          f.fail("class", "unrecognized analyzer class \"" + pr.name + "\"");
+          // Dead code but compiler disagrees:
+          analyzer = null;
+        }
+      } else if (a.hasParam("tokenizer")) {
+        a.getInt("positionIncrementGap");
+        a.getInt("offsetGap");
+        // Ensures the args are all correct:
+        buildCustomAnalysisChain(matchVersion, a, new StringReader(""));
+        analyzer = new CustomAnalyzer(matchVersion, jsonOrig);
+      } else {
+        f.fail(name, "either class or tokenizer/tokenFilters are required");
+        analyzer = null;
+      }
+    } else {
+      analyzer = null;
+    }
+
+    return analyzer;
+  }
+
+  @SuppressWarnings("deprecation")
+  public static Version getVersion(String v) {
+    if (v.equals("LUCENE_40")) {
+      return Version.LUCENE_40;
+    } else if (v.equals("LUCENE_41")) {
+      return Version.LUCENE_41;
+    } else if (v.equals("LUCENE_42")) {
+      return Version.LUCENE_42;
+    } else if (v.equals("LUCENE_43")) {
+      return Version.LUCENE_43;
+    } else {
+      throw new IllegalArgumentException("unhandled version " + v);
+    }
+  }
+
+  @Override
+  public FinishRequest handle(final IndexState state, Request r, Map<String,List<String>> params) throws Exception {
+
+    assert state != null;
+
+    final Map<String,FieldDef> pendingFieldDefs = new HashMap<String,FieldDef>();
+    final Map<String,String> saveStates = new HashMap<String,String>();
+
+    if (r.hasParam("fields")) {
+      r = r.getStruct("fields");
+
+      Set<String> seen = new HashSet<String>();
+
+      for(int pass=0;pass<2;pass++) {
+        Iterator<Map.Entry<String,Object>> it = r.getParams();
+        while(it.hasNext()) {
+          Map.Entry<String,Object> ent = it.next();
+          String fieldName = ent.getKey();
+
+          if (pass == 1 && seen.contains(fieldName)) {
+            continue;
+          }
+
+          if (!(ent.getValue() instanceof JSONObject)) {
+            r.fail("field \"" + fieldName + "\": expected object containing the field type but got: " + ent.getValue());
+          }
+
+          if (pass == 0 && "virtual".equals(((JSONObject) ent.getValue()).get("type"))) {
+            // Do this on 2nd pass so the field it refers to will be registered even if it's a single request
+            continue;
+          }
+
+          if (!IndexState.isSimpleName(fieldName)) {
+            r.fail("invalid field name \"" + fieldName + "\": must be [a-zA-Z_][a-zA-Z0-9]*");
+          }
+
+          if (fieldName.endsWith("_boost")) {
+            r.fail("invalid field name \"" + fieldName + "\": field names cannot end with _boost");
+          }
+
+          if (seen.contains(fieldName)) {
+            throw new IllegalArgumentException("field \"" + fieldName + "\" appears at least twice in this request");
+          }
+      
+          seen.add(fieldName);
+
+          JSONObject fd = (JSONObject) ent.getValue();
+
+          saveStates.put(fieldName, fd.toString());
+
+          pendingFieldDefs.put(fieldName, parseOneFieldType(r, state, pendingFieldDefs, fieldName, fd));
+        }
+      }
+    }
+
+    return new FinishRequest() {
+      @Override
+      public String finish() throws IOException {
+        for(Map.Entry<String,FieldDef> ent : pendingFieldDefs.entrySet()) {
+          // Silly: we need JSONObject.clone...
+          JSONObject o;
+          try {
+            o = (JSONObject) JSONValue.parseStrict(saveStates.get(ent.getKey()));
+          } catch (ParseException pe) {
+            // BUG
+            assert false;
+            throw new RuntimeException(pe);
+          }
+
+          state.addField(ent.getValue(), o);
+        }
+
+        return state.getAllFieldsJSON();
+      }
+    };
+  }
+}

Added: lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/ReleaseSnapshotHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/ReleaseSnapshotHandler.java?rev=1553272&view=auto
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/ReleaseSnapshotHandler.java (added)
+++ lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/ReleaseSnapshotHandler.java Tue Dec 24 13:39:22 2013
@@ -0,0 +1,77 @@
+package org.apache.lucene.server.handlers;
+
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.server.FinishRequest;
+import org.apache.lucene.server.GlobalState;
+import org.apache.lucene.server.IndexState;
+import org.apache.lucene.server.params.Param; 
+import org.apache.lucene.server.params.Request; 
+import org.apache.lucene.server.params.StringType; 
+import org.apache.lucene.server.params.StructType; 
+import org.apache.lucene.server.params.Type; 
+
+public class ReleaseSnapshotHandler extends Handler {
+
+  final static StructType TYPE = new StructType(  
+                               new Param("indexName", "Index Name", new StringType()),
+                               new Param("id", "The id for this snapshot; this must have been previously created via @createSnapshot.", new StringType()));
+
+  @Override
+  public String getTopDoc() {
+    return "Releases a snapshot previously created with @createSnapshot.";
+  }
+
+  @Override
+  public StructType getType() {
+    return TYPE;
+  }
+
+  public ReleaseSnapshotHandler(GlobalState state) {
+    super(state);
+  }
+
+  @Override
+  public FinishRequest handle(final IndexState state, final Request r, Map<String,List<String>> params) throws Exception {
+
+    final IndexState.Gens gens = new IndexState.Gens(r, "id");
+
+    return new FinishRequest() {
+      @Override
+      public String finish() throws IOException {
+
+        // SearcherLifetimeManager pruning thread will drop
+        // the searcher (if it's old enough) next time it
+        // wakes up:
+        state.snapshots.release(gens.indexGen);
+        state.writer.getIndexWriter().deleteUnusedFiles();
+        state.snapshotGenToVersion.remove(gens.indexGen);
+
+        state.taxoSnapshots.release(gens.taxoGen);
+        state.taxoInternalWriter.deleteUnusedFiles();
+        state.decRef(gens.stateGen);
+
+        return "{}";
+      }
+    };
+  }
+}
\ No newline at end of file