You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by bu...@apache.org on 2011/02/24 03:11:42 UTC
svn commit: r1074015 [1/2] - in /lucene/dev/branches/realtime_search: ./
lucene/ lucene/src/java/org/apache/lucene/index/
lucene/src/java/org/apache/lucene/util/
lucene/src/java/org/apache/lucene/util/automaton/fst/
lucene/src/test/org/apache/lucene/ut...
Author: buschmi
Date: Thu Feb 24 02:11:39 2011
New Revision: 1074015
URL: http://svn.apache.org/viewvc?rev=1074015&view=rev
Log:
Merging r1073114 through r1074014 into realtime branch
Modified:
lucene/dev/branches/realtime_search/ (props changed)
lucene/dev/branches/realtime_search/lucene/ (props changed)
lucene/dev/branches/realtime_search/lucene/CHANGES.txt
lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java
lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/SmallFloat.java
lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java
lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/Util.java
lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java
lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
lucene/dev/branches/realtime_search/modules/ (props changed)
lucene/dev/branches/realtime_search/solr/ (props changed)
lucene/dev/branches/realtime_search/solr/CHANGES.txt (props changed)
lucene/dev/branches/realtime_search/solr/KEYS (props changed)
lucene/dev/branches/realtime_search/solr/LICENSE.txt (props changed)
lucene/dev/branches/realtime_search/solr/NOTICE.txt (props changed)
lucene/dev/branches/realtime_search/solr/README.txt (props changed)
lucene/dev/branches/realtime_search/solr/build.xml (props changed)
lucene/dev/branches/realtime_search/solr/client/ (props changed)
lucene/dev/branches/realtime_search/solr/common-build.xml (props changed)
lucene/dev/branches/realtime_search/solr/contrib/ (props changed)
lucene/dev/branches/realtime_search/solr/example/ (props changed)
lucene/dev/branches/realtime_search/solr/lib/ (props changed)
lucene/dev/branches/realtime_search/solr/site/ (props changed)
lucene/dev/branches/realtime_search/solr/src/ (props changed)
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EdgeNGramFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EdgeNGramTokenizerFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HyphenationCompoundWordTokenFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/KeywordMarkerFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/KeywordTokenizerFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/LengthFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/LetterTokenizerFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/LimitTokenCountFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/NGramFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/NGramTokenizerFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/NumericPayloadTokenFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ShingleFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/StandardTokenizerFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/StemmerOverrideFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/TokenOffsetPayloadTokenFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/TrimFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/TypeAsPayloadTokenFilterFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/UAX29URLEmailTokenizerFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/WhitespaceTokenizerFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/WikipediaTokenizerFactory.java
lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/WordDelimiterFilterFactory.java
lucene/dev/branches/realtime_search/solr/testlogging.properties (props changed)
Modified: lucene/dev/branches/realtime_search/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/CHANGES.txt?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/realtime_search/lucene/CHANGES.txt Thu Feb 24 02:11:39 2011
@@ -739,6 +739,13 @@ Bug fixes
* LUCENE-2891: IndexWriterConfig did not accept -1 in setReaderTermIndexDivisor,
which can be used to prevent loading the terms index into memory. (Shai Erera)
+* LUCENE-2937: Encoding a float into a byte (e.g. encoding field norms during
+ indexing) had an underflow detection bug that caused floatToByte(f)==0 where
+ f was greater than 0, but slightly less than byteToFloat(1). This meant that
+ certain very small field norms (index_boost * length_norm) could have
+ been rounded down to 0 instead of being rounded up to the smallest
+ positive number. (yonik)
+
New features
* LUCENE-2128: Parallelized fetching document frequencies during weight
@@ -1055,7 +1062,7 @@ Documentation
(Adriano Crestani via Robert Muir)
* LUCENE-2894: Use google-code-prettify for syntax highlighting in javadoc.
- (Koji Sekiguchi)
+ (Shinichiro Abe, Koji Sekiguchi)
================== Release 2.9.4 / 3.0.3 2010-12-03 ====================
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java Thu Feb 24 02:11:39 2011
@@ -79,6 +79,13 @@ final class DocFieldProcessor extends Do
// FreqProxTermsWriter does this with
// FieldInfo.storePayload.
final String fileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELD_INFOS_EXTENSION);
+
+ // If this segment only has docs that hit non-aborting exceptions,
+ // then no term vectors files will have been written; therefore we
+ // need to update the fieldInfos and clear the term vectors bits
+ if (!state.hasVectors) {
+ state.fieldInfos.clearVectors();
+ }
state.fieldInfos.write(state.directory, fileName);
}
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java Thu Feb 24 02:11:39 2011
@@ -102,7 +102,6 @@ public final class FieldInfos implements
return byNumber.size();
}
- @Override
public Iterator<FieldInfo> iterator() {
return byNumber.values().iterator();
}
@@ -404,6 +403,14 @@ public final class FieldInfos implements
return false;
}
+ void clearVectors() {
+ for (FieldInfo fi : this) {
+ fi.storeTermVector = false;
+ fi.storeOffsetWithTermVector = false;
+ fi.storePositionWithTermVector = false;
+ }
+ }
+
public boolean hasNorms() {
for (FieldInfo fi : this) {
if (!fi.omitNorms) {
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/SmallFloat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/SmallFloat.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/SmallFloat.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/SmallFloat.java Thu Feb 24 02:11:39 2011
@@ -39,7 +39,7 @@ public class SmallFloat {
int fzero = (63-zeroExp)<<numMantissaBits;
int bits = Float.floatToRawIntBits(f);
int smallfloat = bits >> (24-numMantissaBits);
- if (smallfloat < fzero) {
+ if (smallfloat <= fzero) {
return (bits<=0) ?
(byte)0 // negative numbers and zero both map to 0 byte
:(byte)1; // underflow is mapped to smallest non-zero number.
@@ -75,7 +75,7 @@ public class SmallFloat {
public static byte floatToByte315(float f) {
int bits = Float.floatToRawIntBits(f);
int smallfloat = bits >> (24-3);
- if (smallfloat < (63-15)<<3) {
+ if (smallfloat <= ((63-15)<<3)) {
return (bits<=0) ? (byte)0 : (byte)1;
}
if (smallfloat >= ((63-15)<<3) + 0x100) {
@@ -103,7 +103,7 @@ public class SmallFloat {
public static byte floatToByte52(float f) {
int bits = Float.floatToRawIntBits(f);
int smallfloat = bits >> (24-5);
- if (smallfloat < (63-2)<<5) {
+ if (smallfloat <= (63-2)<<5) {
return (bits<=0) ? (byte)0 : (byte)1;
}
if (smallfloat >= ((63-2)<<5) + 0x100) {
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java Thu Feb 24 02:11:39 2011
@@ -83,7 +83,7 @@ public class Builder<T> {
@SuppressWarnings("unchecked") final UnCompiledNode<T>[] f = (UnCompiledNode<T>[]) new UnCompiledNode[10];
frontier = f;
for(int idx=0;idx<frontier.length;idx++) {
- frontier[idx] = new UnCompiledNode<T>(this);
+ frontier[idx] = new UnCompiledNode<T>(this, idx);
}
}
@@ -201,7 +201,7 @@ public class Builder<T> {
// undecided on whether to prune it. later, it
// will be either compiled or pruned, so we must
// allocate a new node:
- frontier[idx] = new UnCompiledNode<T>(this);
+ frontier[idx] = new UnCompiledNode<T>(this, idx);
}
}
}
@@ -292,7 +292,7 @@ public class Builder<T> {
new UnCompiledNode[ArrayUtil.oversize(input.length+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(frontier, 0, next, 0, frontier.length);
for(int idx=frontier.length;idx<next.length;idx++) {
- next[idx] = new UnCompiledNode<T>(this);
+ next[idx] = new UnCompiledNode<T>(this, idx);
}
frontier = next;
}
@@ -424,12 +424,22 @@ public class Builder<T> {
boolean isFinal;
int inputCount;
+ /** This node's depth, starting from the automaton root. */
+ final int depth;
+
+ /**
+ * @param depth
+ * The node's depth starting from the automaton root. Needed for
+ * LUCENE-2934 (node expansion based on conditions other than the
+ * fanout size).
+ */
@SuppressWarnings("unchecked")
- public UnCompiledNode(Builder<T> owner) {
+ public UnCompiledNode(Builder<T> owner, int depth) {
this.owner = owner;
arcs = (Arc<T>[]) new Arc[1];
arcs[0] = new Arc<T>();
output = owner.NO_OUTPUT;
+ this.depth = depth;
}
public boolean isCompiled() {
@@ -441,6 +451,9 @@ public class Builder<T> {
isFinal = false;
output = owner.NO_OUTPUT;
inputCount = 0;
+
+ // We don't clear the depth here because it never changes
+ // for nodes on the frontier (even when reused).
}
public T getLastOutput(int labelToMatch) {
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java Thu Feb 24 02:11:39 2011
@@ -25,6 +25,7 @@ import org.apache.lucene.store.IndexInpu
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.automaton.fst.Builder.UnCompiledNode;
/** Represents an FST using a compact byte[] format.
* <p> The format is similar to what's used by Morfologik
@@ -47,11 +48,21 @@ public class FST<T> {
// this when number of arcs is > NUM_ARCS_ARRAY:
private final static int BIT_ARCS_AS_FIXED_ARRAY = 1 << 6;
- // If the node has >= this number of arcs, the arcs are
- // stored as a fixed array. Fixed array consumes more RAM
- // but enables binary search on the arcs (instead of
- // linear scan) on lookup by arc label:
- private final static int NUM_ARCS_FIXED_ARRAY = 10;
+ /**
+ * @see #shouldExpand(UnCompiledNode)
+ */
+ final static int FIXED_ARRAY_SHALLOW_DISTANCE = 3; // 0 => only root node.
+
+ /**
+ * @see #shouldExpand(UnCompiledNode)
+ */
+ final static int FIXED_ARRAY_NUM_ARCS_SHALLOW = 5;
+
+ /**
+ * @see #shouldExpand(UnCompiledNode)
+ */
+ final static int FIXED_ARRAY_NUM_ARCS_DEEP = 10;
+
private int[] bytesPerArc = new int[0];
// Increment version to change it
@@ -315,7 +326,7 @@ public class FST<T> {
int startAddress = writer.posWrite;
//System.out.println(" startAddr=" + startAddress);
- final boolean doFixedArray = node.numArcs >= NUM_ARCS_FIXED_ARRAY;
+ final boolean doFixedArray = shouldExpand(node);
final int fixedArrayStart;
if (doFixedArray) {
if (bytesPerArc.length < node.numArcs) {
@@ -518,6 +529,23 @@ public class FST<T> {
return readNextArc(arc);
}
+ /**
+ * Checks if <code>arc</code>'s target state is in expanded (or vector) format.
+ *
+ * @return Returns <code>true</code> if <code>arc</code> points to a state in an
+ * expanded array format.
+ */
+ boolean isExpandedTarget(Arc<T> follow) throws IOException {
+ if (follow.isFinal()) {
+ return false;
+ } else {
+ final BytesReader in = getBytesReader(follow.target);
+ final byte b = in.readByte();
+
+ return (b & BIT_ARCS_AS_FIXED_ARRAY) != 0;
+ }
+ }
+
/** In-place read; returns the arc. */
public Arc<T> readNextArc(Arc<T> arc) throws IOException {
if (arc.label == -1) {
@@ -712,6 +740,26 @@ public class FST<T> {
public int getArcWithOutputCount() {
return arcWithOutputCount;
}
+
+ /**
+ * Nodes will be expanded if their depth (distance from the root node) is
+ * <= this value and their number of arcs is >=
+ * {@link #FIXED_ARRAY_NUM_ARCS_SHALLOW}.
+ *
+ * <p>
+ * Fixed array consumes more RAM but enables binary search on the arcs
+ * (instead of a linear scan) on lookup by arc label.
+ *
+ * @return <code>true</code> if <code>node</code> should be stored in an
+ * expanded (array) form.
+ *
+ * @see #FIXED_ARRAY_NUM_ARCS_DEEP
+ * @see Builder.UnCompiledNode#depth
+ */
+ private boolean shouldExpand(UnCompiledNode<T> node) {
+ return (node.depth <= FIXED_ARRAY_SHALLOW_DISTANCE && node.numArcs >= FIXED_ARRAY_NUM_ARCS_SHALLOW) ||
+ node.numArcs >= FIXED_ARRAY_NUM_ARCS_DEEP;
+ }
// Non-static: writes to FST's byte[]
class BytesWriter extends DataOutput {
Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/Util.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/Util.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/Util.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/util/automaton/fst/Util.java Thu Feb 24 02:11:39 2011
@@ -189,6 +189,8 @@ public final class Util {
*/
public static <T> void toDot(FST<T> fst, Writer out, boolean sameRank, boolean labelStates)
throws IOException {
+ final String expandedNodeColor = "blue";
+
// This is the start arc in the automaton (from the epsilon state to the first state
// with outgoing transitions.
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
@@ -219,7 +221,9 @@ public final class Util {
}
emitDotState(out, "initial", "point", "white", "");
- emitDotState(out, Integer.toString(startArc.target), stateShape, null, "");
+ emitDotState(out, Integer.toString(startArc.target), stateShape,
+ fst.isExpandedTarget(startArc) ? expandedNodeColor : null,
+ "");
out.write(" initial -> " + startArc.target + "\n");
final T NO_OUTPUT = fst.outputs.getNoOutput();
@@ -243,7 +247,9 @@ public final class Util {
while (true) {
// Emit the unseen state and add it to the queue for the next level.
if (arc.target >= 0 && !seen.get(arc.target)) {
- emitDotState(out, Integer.toString(arc.target), stateShape, null,
+ final boolean isExpanded = fst.isExpandedTarget(arc);
+ emitDotState(out, Integer.toString(arc.target), stateShape,
+ isExpanded ? expandedNodeColor : null,
labelStates ? Integer.toString(arc.target) : "");
seen.set(arc.target);
nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
@@ -285,10 +291,10 @@ public final class Util {
}
sameLevelStates.clear();
}
-
+
// Emit terminating state (always there anyway).
out.write(" -1 [style=filled, color=black, shape=circle, label=\"\"]\n\n");
- out.write(" {rank=sink; -1 } ");
+ out.write(" {rank=sink; -1 }\n");
out.write("}\n");
out.flush();
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/TestSmallFloat.java Thu Feb 24 02:11:39 2011
@@ -28,8 +28,8 @@ public class TestSmallFloat extends Luce
return Float.intBitsToFloat(bits);
}
- // original lucene floatToByte
- static byte orig_floatToByte(float f) {
+ // original lucene floatToByte (since lucene 1.3)
+ static byte orig_floatToByte_v13(float f) {
if (f < 0.0f) // round negatives up to zero
f = 0.0f;
@@ -53,6 +53,33 @@ public class TestSmallFloat extends Luce
return (byte)((exponent << 3) | mantissa); // pack into a byte
}
+ // This is the original lucene floatToBytes (from v1.3)
+ // except with the underflow detection bug fixed for values like 5.8123817E-10f
+ static byte orig_floatToByte(float f) {
+ if (f < 0.0f) // round negatives up to zero
+ f = 0.0f;
+
+ if (f == 0.0f) // zero is a special case
+ return 0;
+
+ int bits = Float.floatToIntBits(f); // parse float into parts
+ int mantissa = (bits & 0xffffff) >> 21;
+ int exponent = (((bits >> 24) & 0x7f) - 63) + 15;
+
+ if (exponent > 31) { // overflow: use max value
+ exponent = 31;
+ mantissa = 7;
+ }
+
+ if (exponent < 0 || exponent == 0 && mantissa == 0) { // underflow: use min value
+ exponent = 0;
+ mantissa = 1;
+ }
+
+ return (byte)((exponent << 3) | mantissa); // pack into a byte
+ }
+
+
public void testByteToFloat() {
for (int i=0; i<256; i++) {
float f1 = orig_byteToFloat((byte)i);
@@ -68,6 +95,22 @@ public class TestSmallFloat extends Luce
}
public void testFloatToByte() {
+ assertEquals(0, orig_floatToByte_v13(5.8123817E-10f)); // verify the old bug (see LUCENE-2937)
+ assertEquals(1, orig_floatToByte(5.8123817E-10f)); // verify it's fixed in this test code
+ assertEquals(1, SmallFloat.floatToByte315(5.8123817E-10f)); // verify it's fixed
+
+ // test some constants
+ assertEquals(0, SmallFloat.floatToByte315(0));
+ assertEquals(1, SmallFloat.floatToByte315(Float.MIN_VALUE)); // underflow rounds up to smallest positive
+ assertEquals(255, SmallFloat.floatToByte315(Float.MAX_VALUE) & 0xff); // overflow rounds down to largest positive
+ assertEquals(255, SmallFloat.floatToByte315(Float.POSITIVE_INFINITY) & 0xff);
+
+ // all negatives map to 0
+ assertEquals(0, SmallFloat.floatToByte315(-Float.MIN_VALUE));
+ assertEquals(0, SmallFloat.floatToByte315(-Float.MAX_VALUE));
+ assertEquals(0, SmallFloat.floatToByte315(Float.NEGATIVE_INFINITY));
+
+
// up iterations for more exhaustive test after changing something
int num = 100000 * RANDOM_MULTIPLIER;
for (int i = 0; i < num; i++) {
@@ -95,8 +138,8 @@ public class TestSmallFloat extends Luce
if (f==f) { // skip non-numbers
byte b1 = orig_floatToByte(f);
byte b2 = SmallFloat.floatToByte315(f);
- if (b1!=b2) {
- TestCase.fail("Failed floatToByte315 for float " + f);
+ if (b1!=b2 || b2==0 && f>0) {
+ fail("Failed floatToByte315 for float " + f + " source bits="+Integer.toHexString(i) + " float raw bits=" + Integer.toHexString(Float.floatToRawIntBits(i)));
}
}
if (i==Integer.MAX_VALUE) break;
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Thu Feb 24 02:11:39 2011
@@ -56,6 +56,7 @@ import org.apache.lucene.util.LineFileDo
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.automaton.fst.FST.Arc;
public class TestFSTs extends LuceneTestCase {
@@ -1322,4 +1323,85 @@ public class TestFSTs extends LuceneTest
assertEquals(b, seekResult.input);
assertEquals(42, (long) seekResult.output);
}
+
+ /**
+ * Test state expansion (array format) on close-to-root states. Creates
+ * synthetic input that has one expanded state on each level.
+ *
+ * @see "https://issues.apache.org/jira/browse/LUCENE-2933"
+ */
+ public void testExpandedCloseToRoot() throws Exception {
+ class SyntheticData {
+ FST<Object> compile(String[] lines) throws IOException {
+ final NoOutputs outputs = NoOutputs.getSingleton();
+ final Object nothing = outputs.getNoOutput();
+ final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
+
+ int line = 0;
+ final BytesRef term = new BytesRef();
+ while (line < lines.length) {
+ String w = lines[line++];
+ if (w == null) {
+ break;
+ }
+ term.copy(w);
+ b.add(term, nothing);
+ }
+
+ return b.finish();
+ }
+
+ void generate(ArrayList<String> out, StringBuilder b, char from, char to,
+ int depth) {
+ if (depth == 0 || from == to) {
+ String seq = b.toString() + "_" + out.size() + "_end";
+ out.add(seq);
+ } else {
+ for (char c = from; c <= to; c++) {
+ b.append(c);
+ generate(out, b, from, c == to ? to : from, depth - 1);
+ b.deleteCharAt(b.length() - 1);
+ }
+ }
+ }
+
+ public int verifyStateAndBelow(FST<Object> fst, Arc<Object> arc, int depth)
+ throws IOException {
+ if (fst.targetHasArcs(arc)) {
+ int childCount = 0;
+ for (arc = fst.readFirstTargetArc(arc, arc);;
+ arc = fst.readNextArc(arc), childCount++)
+ {
+ boolean expanded = fst.isExpandedTarget(arc);
+ int children = verifyStateAndBelow(fst, new FST.Arc<Object>().copyFrom(arc), depth + 1);
+
+ assertEquals(
+ expanded,
+ (depth <= FST.FIXED_ARRAY_SHALLOW_DISTANCE &&
+ children >= FST.FIXED_ARRAY_NUM_ARCS_SHALLOW) ||
+ children >= FST.FIXED_ARRAY_NUM_ARCS_DEEP);
+ if (arc.isLast()) break;
+ }
+
+ return childCount;
+ }
+ return 0;
+ }
+ }
+
+ // Sanity check.
+ assertTrue(FST.FIXED_ARRAY_NUM_ARCS_SHALLOW < FST.FIXED_ARRAY_NUM_ARCS_DEEP);
+ assertTrue(FST.FIXED_ARRAY_SHALLOW_DISTANCE >= 0);
+
+ SyntheticData s = new SyntheticData();
+
+ ArrayList<String> out = new ArrayList<String>();
+ StringBuilder b = new StringBuilder();
+ s.generate(out, b, 'a', 'i', 10);
+ String[] input = out.toArray(new String[out.size()]);
+ Arrays.sort(input);
+ FST<Object> fst = s.compile(input);
+ FST.Arc<Object> arc = fst.getFirstArc(new FST.Arc<Object>());
+ s.verifyStateAndBelow(fst, arc, 1);
+ }
}
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -21,7 +21,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.analysis.TokenStream;
-/** Factory for {@link ASCIIFoldingFilter} */
+/**
+ * Factory for {@link ASCIIFoldingFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.ASCIIFoldingFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class ASCIIFoldingFilterFactory extends BaseTokenFilterFactory {
public ASCIIFoldingFilter create(TokenStream input) {
return new ASCIIFoldingFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -21,8 +21,16 @@ import org.apache.lucene.analysis.ar.Ara
/**
- * Factory for {@link ArabicNormalizationFilter}
- **/
+ * Factory for {@link ArabicNormalizationFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.ArabicNormalizationFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class ArabicNormalizationFilterFactory extends BaseTokenFilterFactory{
public ArabicNormalizationFilter create(TokenStream input) {
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -21,8 +21,16 @@ import org.apache.lucene.analysis.ar.Ara
/**
- * Factory for {@link ArabicStemFilter}
- **/
+ * Factory for {@link ArabicStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.ArabicStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class ArabicStemFilterFactory extends BaseTokenFilterFactory{
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -21,7 +21,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.br.BrazilianStemFilter;
-/** Factory for {@link BrazilianStemFilter} */
+/**
+ * Factory for {@link BrazilianStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.BrazilianStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class BrazilianStemFilterFactory extends BaseTokenFilterFactory {
public BrazilianStemFilter create(TokenStream in) {
return new BrazilianStemFilter(in);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.bg.BulgarianStemFilter;
-/** Factory for {@link BulgarianStemFilter} */
+/**
+ * Factory for {@link BulgarianStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.BulgarianStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class BulgarianStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new BulgarianStemFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java Thu Feb 24 02:11:39 2011
@@ -22,7 +22,16 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.cjk.CJKTokenizer;
import java.io.Reader;
-/** Factory for {@link CJKTokenizer} */
+/**
+ * Factory for {@link CJKTokenizer}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.CJKTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class CJKTokenizerFactory extends BaseTokenizerFactory {
public CJKTokenizer create(Reader in) {
return new CJKTokenizer(in);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -33,7 +33,7 @@ import java.util.StringTokenizer;
* The factory takes parameters:<br/>
* "onlyFirstWord" - should each word be capitalized or all of the words?<br/>
* "keep" - a keep word list. Each word that should be kept separated by whitespace.<br/>
- * "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.
+ * "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.<br/>
* "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<br/>
* "okPrefix" - do not change word capitalization if a word begins with something in this list.
* for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
@@ -43,6 +43,16 @@ import java.util.StringTokenizer;
* "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
* assumed to be correct.<br/>
*
+ * <pre class="prettyprint" >
+ * <fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.CapitalizationFilterFactory" onlyFirstWord="true"
+ * keep="java solr lucene" keepIgnoreCase="false"
+ * okPrefix="McK McD McA"/>
+ * </analyzer>
+ * </fieldType></pre>
+ *
* @version $Id$
* @since solr 1.3
*/
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -22,6 +22,15 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.standard.ClassicFilter;
/**
+ * Factory for {@link ClassicFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.ClassicFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ *
* @version $Id$
*/
public class ClassicFilterFactory extends BaseTokenFilterFactory {
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java Thu Feb 24 02:11:39 2011
@@ -24,6 +24,14 @@ import java.io.Reader;
import java.util.Map;
/**
+ * Factory for {@link ClassicTokenizer}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.ClassicTokenizerFactory" maxTokenLength="120"/>
+ * </analyzer>
+ * </fieldType></pre>
+ *
* @version $Id$
*/
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -56,11 +56,19 @@ import org.apache.solr.util.plugin.Resou
* <li>strength: 'primary','secondary','tertiary', or 'identical' (optional)
* <li>decomposition: 'no','canonical', or 'full' (optional)
* </ul>
- *
+ *
+ * <pre class="prettyprint" >
+ * <fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/>
+ * </analyzer>
+ * </fieldType></pre>
+ *
* @see Collator
* @see Locale
* @see RuleBasedCollator
- * @since solr 1.5
+ * @since solr 3.1
*/
public class CollationKeyFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private Collator collator;
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -27,7 +27,15 @@ import org.apache.solr.common.ResourceLo
import org.apache.solr.util.plugin.ResourceLoaderAware;
/**
- * Constructs a CommonGramsFilter
+ * Constructs a {@link CommonGramsFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
*/
/*
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -29,10 +29,18 @@ import org.apache.solr.common.ResourceLo
import org.apache.solr.util.plugin.ResourceLoaderAware;
/**
- * Construct CommonGramsQueryFilter
+ * Construct {@link CommonGramsQueryFilter}.
*
- * This is pretty close to a straight copy from StopFilterFactory
+ * This is pretty close to a straight copy from {@link StopFilterFactory}.
*
+ * <pre class="prettyprint" >
+ * <fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
*/
public class CommonGramsQueryFilterFactory extends BaseTokenFilterFactory
implements ResourceLoaderAware {
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,16 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cz.CzechStemFilter;
-/** Factory for {@link CzechStemFilter} */
+/**
+ * Factory for {@link CzechStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_czstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.CzechStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ */
public class CzechStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new CzechStemFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -31,8 +31,17 @@ import java.util.Map;
/**
*
- * Factory for {@link DelimitedPayloadTokenFilter}
- **/
+ * Factory for {@link DelimitedPayloadTokenFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_dlmtd" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ *
+ */
public class DelimitedPayloadTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
public static final String ENCODER_ATTR = "encoder";
public static final String DELIMITER_ATTR = "delimiter";
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DictionaryCompoundWordTokenFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -28,7 +28,18 @@ import org.apache.lucene.analysis.TokenS
import java.util.Map;
import java.io.IOException;
-/** Factory for {@link DictionaryCompoundWordTokenFilter} */
+/**
+ * Factory for {@link DictionaryCompoundWordTokenFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="dictionary.txt"
+ * minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class DictionaryCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private CharArraySet dictionary;
private String dictFile;
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -21,6 +21,17 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter;
+/**
+ * Factory for {@link DoubleMetaphoneFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_dblmtphn" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.DoubleMetaphoneFilterFactory" inject="true" maxCodeLength="4"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class DoubleMetaphoneFilterFactory extends BaseTokenFilterFactory
{
public static final String INJECT = "inject";
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EdgeNGramFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EdgeNGramFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EdgeNGramFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EdgeNGramFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -23,6 +23,14 @@ import org.apache.lucene.analysis.ngram.
/**
* Creates new instances of {@link EdgeNGramTokenFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.EdgeNGramFilterFactory" side="front" minGramSize="1" maxGramSize="1"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
*/
public class EdgeNGramFilterFactory extends BaseTokenFilterFactory {
private int maxGramSize = 0;
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EdgeNGramTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EdgeNGramTokenizerFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EdgeNGramTokenizerFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EdgeNGramTokenizerFactory.java Thu Feb 24 02:11:39 2011
@@ -24,6 +24,13 @@ import java.util.Map;
/**
* Creates new instances of {@link EdgeNGramTokenizer}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.EdgeNGramTokenizerFactory" side="front" minGramSize="1" maxGramSize="1"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
*/
public class EdgeNGramTokenizerFactory extends BaseTokenizerFactory {
private int maxGramSize = 0;
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -27,7 +27,17 @@ import org.apache.lucene.analysis.util.C
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream;
-/** Factory for {@link ElisionFilter} */
+/**
+ * Factory for {@link ElisionFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.ElisionFilterFactory" articles="stopwordarticles.txt"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class ElisionFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private CharArraySet articles;
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
-/** Factory for {@link EnglishMinimalStemFilter} */
+/**
+ * Factory for {@link EnglishMinimalStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_enminstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.EnglishMinimalStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class EnglishMinimalStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new EnglishMinimalStemFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
-/** Factory for {@link EnglishPossessiveFilter} */
+/**
+ * Factory for {@link EnglishPossessiveFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_enpossessive" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.EnglishPossessiveFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class EnglishPossessiveFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new EnglishPossessiveFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
-/** Factory for {@link FinnishLightStemFilter} */
+/**
+ * Factory for {@link FinnishLightStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_filgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.FinnishLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class FinnishLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new FinnishLightStemFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
-/** Factory for {@link FrenchLightStemFilter} */
+/**
+ * Factory for {@link FrenchLightStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_frlgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.FrenchLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class FrenchLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new FrenchLightStemFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
-/** Factory for {@link FrenchMinimalStemFilter} */
+/**
+ * Factory for {@link FrenchMinimalStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_frminstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.FrenchMinimalStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class FrenchMinimalStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new FrenchMinimalStemFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.gl.GalicianStemFilter;
-/** Factory for {@link GalicianStemFilter} */
+/**
+ * Factory for {@link GalicianStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_glstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.GalicianStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class GalicianStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new GalicianStemFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanLightStemFilter;
-/** Factory for {@link GermanLightStemFilter} */
+/**
+ * Factory for {@link GermanLightStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_delgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.GermanLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class GermanLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new GermanLightStemFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
-/** Factory for {@link GermanMinimalStemFilter} */
+/**
+ * Factory for {@link GermanMinimalStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_deminstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.GermanMinimalStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class GermanMinimalStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new GermanMinimalStemFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -22,7 +22,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.TokenStream;
-/** Factory for {@link GermanStemFilter} */
+/**
+ * Factory for {@link GermanStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_destem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.GermanStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class GermanStemFilterFactory extends BaseTokenFilterFactory {
public GermanStemFilter create(TokenStream in) {
return new GermanStemFilter(in);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -26,7 +26,17 @@ import org.apache.lucene.analysis.el.Gre
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
-/** Factory for {@link GreekLowerCaseFilter} */
+/**
+ * Factory for {@link GreekLowerCaseFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_glc" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.GreekLowerCaseFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory
{
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.el.GreekStemFilter;
-/** Factory for {@link GreekStemFilter} */
+/**
+ * Factory for {@link GreekStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_gstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.GreekStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class GreekStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -21,7 +21,18 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
-public class HTMLStripCharFilterFactory extends BaseCharFilterFactory {
+/**
+* Factory for {@link HTMLStripCharFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_html" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <charFilter class="solr.HTMLStripCharFilterFactory"/>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType></pre
+ * @version $Id$
+ */
+ public class HTMLStripCharFilterFactory extends BaseCharFilterFactory {
public HTMLStripCharFilter create(CharStream input) {
return new HTMLStripCharFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
-/** Factory for {@link HindiNormalizationFilter} */
+/**
+ * Factory for {@link HindiNormalizationFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_hinormal" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.HindiNormalizationFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new HindiNormalizationFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hi.HindiStemFilter;
-/** Factory for {@link HindiStemFilter} */
+/**
+ * Factory for {@link HindiStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.HindiStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class HindiStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new HindiStemFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
-/** Factory for {@link HungarianLightStemFilter} */
+/**
+ * Factory for {@link HungarianLightStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.HungarianLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class HungarianLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new HungarianLightStemFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -22,7 +22,15 @@ import org.apache.lucene.analysis.miscel
import org.apache.solr.analysis.BaseTokenFilterFactory;
/**
- * Factory for {@link HyphenatedWordsFilter}
+ * Factory for {@link HyphenatedWordsFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_hyphn" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.HyphenatedWordsFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
*/
public class HyphenatedWordsFilterFactory extends BaseTokenFilterFactory {
public HyphenatedWordsFilter create(TokenStream input) {
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HyphenationCompoundWordTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HyphenationCompoundWordTokenFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HyphenationCompoundWordTokenFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/HyphenationCompoundWordTokenFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -33,7 +33,7 @@ import java.io.InputStream;
import org.xml.sax.InputSource;
/**
- * Factory for {@link HyphenationCompoundWordTokenFilter}
+ * Factory for {@link HyphenationCompoundWordTokenFilter}.
* <p>
* This factory accepts the following parameters:
* <ul>
@@ -48,6 +48,15 @@ import org.xml.sax.InputSource;
* to the stream. defaults to false.
* </ul>
* <p>
+ * <pre class="prettyprint" >
+ * <fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.HyphenationCompoundWordTokenFilterFactory" hyphenator="hyphenator.xml" encoding="UTF-8"
+ * dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
* @see HyphenationCompoundWordTokenFilter
*/
public class HyphenationCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
-/** Factory for {@link IndicNormalizationFilter} */
+/**
+ * Factory for {@link IndicNormalizationFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_innormal" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.IndicNormalizationFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new IndicNormalizationFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -22,7 +22,17 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.id.IndonesianStemFilter;
-/** Factory for {@link IndonesianStemFilter} */
+/**
+ * Factory for {@link IndonesianStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_idstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class IndonesianStemFilterFactory extends BaseTokenFilterFactory {
private boolean stemDerivational = true;
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -20,7 +20,17 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
-/** Factory for {@link ItalianLightStemFilter} */
+/**
+ * Factory for {@link ItalianLightStemFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_itlgtstem" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.ItalianLightStemFilterFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ * @version $Id$
+ */
public class ItalianLightStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new ItalianLightStemFilter(input);
Modified: lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java?rev=1074015&r1=1074014&r2=1074015&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java (original)
+++ lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java Thu Feb 24 02:11:39 2011
@@ -28,6 +28,14 @@ import java.util.Set;
import java.io.IOException;
/**
+ * Factory for {@link KeepWordFilter}.
+ * <pre class="prettyprint" >
+ * <fieldType name="text_keepword" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ * <filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false" enablePositionIncrements="false"/>
+ * </analyzer>
+ * </fieldType></pre>
* @version $Id$
*/
public class KeepWordFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {