You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2009/08/16 19:28:59 UTC
svn commit: r804726 - in /lucene/solr/trunk: ./
src/java/org/apache/solr/analysis/ src/java/org/apache/solr/handler/
src/java/org/apache/solr/handler/component/
src/java/org/apache/solr/highlight/ src/java/org/apache/solr/schema/
src/java/org/apache/so...
Author: yonik
Date: Sun Aug 16 17:28:58 2009
New Revision: 804726
URL: http://svn.apache.org/viewvc?rev=804726&view=rev
Log:
SOLR-1353: Implement and use reusable token streams for analysis
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/KeepWordFilter.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/PatternReplaceFilter.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/PhoneticFilter.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/SolrAnalyzer.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenizerChain.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java
lucene/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
lucene/solr/trunk/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
lucene/solr/trunk/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
lucene/solr/trunk/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
lucene/solr/trunk/src/java/org/apache/solr/schema/BoolField.java
lucene/solr/trunk/src/java/org/apache/solr/schema/FieldType.java
lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java
lucene/solr/trunk/src/java/org/apache/solr/search/FieldQParserPlugin.java
lucene/solr/trunk/src/test/org/apache/solr/update/TestIndexingPerformance.java
Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Sun Aug 16 17:28:58 2009
@@ -320,6 +320,9 @@
15. SOLR-1150: Load Documents for Highlighting one at a time rather than
all at once to avoid OOM with many large Documents. (Siddharth Gargate via Mark Miller)
+16. SOLR-1353: Implement and use reusable token streams for analysis. (yonik)
+
+
Bug Fixes
----------------------
1. SOLR-774: Fixed logging level display (Sean Timm via Otis Gospodnetic)
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java Sun Aug 16 17:28:58 2009
@@ -17,10 +17,8 @@
package org.apache.solr.analysis;
-import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import java.io.IOException;
import java.util.ArrayList;
@@ -190,52 +188,53 @@
* This is package protected since it is not useful without the Factory
*/
class CapitalizationFilter extends TokenFilter {
- protected final CapitalizationFilterFactory factory;
+ private final CapitalizationFilterFactory factory;
+ private final TermAttribute termAtt;
public CapitalizationFilter(TokenStream in, final CapitalizationFilterFactory factory) {
super(in);
this.factory = factory;
+ this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
}
@Override
- public Token next(Token token) throws IOException {
- Token t = input.next(token);
- if (t != null) {
-
- char[] termBuffer = t.termBuffer();
- int termBufferLength = t.termLength();
- char[] backup = null;
- if (factory.maxWordCount < CapitalizationFilterFactory.DEFAULT_MAX_WORD_COUNT) {
- //make a backup in case we exceed the word count
- System.arraycopy(termBuffer, 0, backup, 0, termBufferLength);
- }
- if (termBufferLength < factory.maxTokenLength) {
- int wordCount = 0;
-
- int lastWordStart = 0;
- for (int i = 0; i < termBufferLength; i++) {
- char c = termBuffer[i];
- if (c <= ' ' || c == '.') {
- int len = i - lastWordStart;
- if (len > 0) {
- factory.processWord(termBuffer, lastWordStart, len, wordCount++);
- lastWordStart = i + 1;
- i++;
- }
+ public boolean incrementToken() throws IOException {
+ if (!input.incrementToken()) return false;
+
+ char[] termBuffer = termAtt.termBuffer();
+ int termBufferLength = termAtt.termLength();
+ char[] backup = null;
+ if (factory.maxWordCount < CapitalizationFilterFactory.DEFAULT_MAX_WORD_COUNT) {
+ //make a backup in case we exceed the word count
+ System.arraycopy(termBuffer, 0, backup, 0, termBufferLength);
+ }
+ if (termBufferLength < factory.maxTokenLength) {
+ int wordCount = 0;
+
+ int lastWordStart = 0;
+ for (int i = 0; i < termBufferLength; i++) {
+ char c = termBuffer[i];
+ if (c <= ' ' || c == '.') {
+ int len = i - lastWordStart;
+ if (len > 0) {
+ factory.processWord(termBuffer, lastWordStart, len, wordCount++);
+ lastWordStart = i + 1;
+ i++;
}
}
+ }
- // process the last word
- if (lastWordStart < termBufferLength) {
- factory.processWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
- }
+ // process the last word
+ if (lastWordStart < termBufferLength) {
+ factory.processWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
+ }
- if (wordCount > factory.maxWordCount) {
- t.setTermBuffer(backup, 0, termBufferLength);
- }
+ if (wordCount > factory.maxWordCount) {
+ termAtt.setTermBuffer(backup, 0, termBufferLength);
}
}
- return t;
+
+ return true;
}
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java Sun Aug 16 17:28:58 2009
@@ -50,7 +50,8 @@
for(;;) {
if (!remainingTokens.isEmpty()) {
- clearAttributes(); restoreState(remainingTokens.removeFirst());
+ // clearAttributes(); // not currently necessary
+ restoreState(remainingTokens.removeFirst());
return true;
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java Sun Aug 16 17:28:58 2009
@@ -24,6 +24,7 @@
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.util.plugin.ResourceLoaderAware;
+import org.tartarus.snowball.SnowballProgram;
import java.io.IOException;
import java.io.File;
@@ -75,50 +76,9 @@
* English Porter2 filter that doesn't use reflection to
* adapt lucene to the snowball stemmer code.
*/
-class EnglishPorterFilter extends TokenFilter {
- private final CharArraySet protWords;
- private org.tartarus.snowball.ext.EnglishStemmer stemmer;
-
+@Deprecated
+class EnglishPorterFilter extends SnowballPorterFilter {
public EnglishPorterFilter(TokenStream source, CharArraySet protWords) {
- super(source);
- this.protWords = protWords;
- stemmer = new org.tartarus.snowball.ext.EnglishStemmer();
- }
-
-
- /**
- * the original code from lucene sandbox
- * public final Token next() throws IOException {
- * Token token = input.next();
- * if (token == null)
- * return null;
- * stemmer.setCurrent(token.termText());
- * try {
- * stemMethod.invoke(stemmer, EMPTY_ARGS);
- * } catch (Exception e) {
- * throw new RuntimeException(e.toString());
- * }
- * return new Token(stemmer.getCurrent(),
- * token.startOffset(), token.endOffset(), token.type());
- * }
- */
-
- @Override
- public Token next(Token token) throws IOException {
- Token result = input.next(token);
- if (result != null) {
- char[] termBuffer = result.termBuffer();
- int len = result.termLength();
- // if protected, don't stem. use this to avoid stemming collisions.
- if (protWords != null && protWords.contains(termBuffer, 0, len)) {
- return result;
- }
- stemmer.setCurrent(new String(termBuffer, 0, len));//ugh, wish the Stemmer took a char array
- stemmer.stem();
- String newstr = stemmer.getCurrent();
- result.setTermBuffer(newstr.toCharArray(), 0, newstr.length());
- }
- return result;
+ super(source, new org.tartarus.snowball.ext.EnglishStemmer(), protWords);
}
}
-
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/KeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/KeepWordFilter.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/KeepWordFilter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/KeepWordFilter.java Sun Aug 16 17:28:58 2009
@@ -21,6 +21,8 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.tartarus.snowball.SnowballProgram;
import java.io.IOException;
import java.util.Set;
@@ -33,21 +35,20 @@
* @since solr 1.3
*/
public final class KeepWordFilter extends TokenFilter {
- final CharArraySet words;
+ private final CharArraySet words;
+ private final TermAttribute termAtt;
-
public KeepWordFilter(TokenStream in, Set<String> words, boolean ignoreCase ) {
super(in);
this.words = new CharArraySet(words, ignoreCase);
+ this.termAtt = (TermAttribute)addAttribute(TermAttribute.class);
}
@Override
- public final Token next(Token in) throws IOException {
- for (Token token=input.next(in); token!=null; token=input.next(token)) {
- if( words.contains( token.termBuffer(), 0, token.termLength() ) ) {
- return token;
- }
+ public boolean incrementToken() throws IOException {
+ while (input.incrementToken()) {
+ if (words.contains(termAtt.termBuffer(), 0, termAtt.termLength())) return true;
}
- return null;
+ return false;
}
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/PatternReplaceFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/PatternReplaceFilter.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/PatternReplaceFilter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/PatternReplaceFilter.java Sun Aug 16 17:28:58 2009
@@ -20,9 +20,12 @@
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
+import java.util.Set;
import java.io.IOException;
import java.nio.CharBuffer;
@@ -40,10 +43,10 @@
* @see Pattern
*/
public final class PatternReplaceFilter extends TokenFilter {
- Pattern p;
- String replacement;
- boolean all = true;
-
+ private final Pattern p;
+ private final String replacement;
+ private final boolean all;
+ private final TermAttribute termAtt;
/**
* Constructs an instance to replace either the first, or all occurances
*
@@ -63,21 +66,23 @@
this.p=p;
this.replacement = (null == replacement) ? "" : replacement;
this.all=all;
+ this.termAtt = (TermAttribute)addAttribute(TermAttribute.class);
}
-
- public final Token next(Token in) throws IOException {
- Token t = input.next(in);
- if (t == null)
- return null;
- CharSequence text = CharBuffer.wrap(t.termBuffer(), 0, t.termLength());
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (!input.incrementToken()) return false;
+
+ CharSequence text = CharBuffer.wrap(termAtt.termBuffer(), 0, termAtt.termLength());
Matcher m = p.matcher(text);
+
if (all) {
- t.setTermText(m.replaceAll(replacement));
+ termAtt.setTermBuffer(m.replaceAll(replacement));
} else {
- t.setTermText(m.replaceFirst(replacement));
+ termAtt.setTermBuffer(m.replaceFirst(replacement));
}
- return t;
+ return true;
}
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java Sun Aug 16 17:28:58 2009
@@ -20,6 +20,10 @@
import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrConfig;
@@ -112,6 +116,31 @@
final Iterator<Token> iter = tokens.iterator();
return new TokenStream() {
@Override
+ public boolean incrementToken() throws IOException {
+ return super.incrementToken();
+ }
+
+ @Override
+ public void end() throws IOException {
+ super.end();
+ }
+
+ @Override
+ public Token next(Token reusableToken) throws IOException {
+ return super.next(reusableToken);
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ }
+
+ @Override
+ public void close() throws IOException {
+ super.close();
+ }
+
+ @Override
public Token next() throws IOException {
if( iter.hasNext() ) {
return iter.next();
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/PhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/PhoneticFilter.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/PhoneticFilter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/PhoneticFilter.java Sun Aug 16 17:28:58 2009
@@ -54,7 +54,8 @@
@Override
public boolean incrementToken() throws IOException {
if( save != null ) {
- clearAttributes(); restoreState(save);
+ // clearAttributes(); // not currently necessary
+ restoreState(save);
save = null;
return true;
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java Sun Aug 16 17:28:58 2009
@@ -25,6 +25,7 @@
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.common.util.StrUtils;
@@ -97,50 +98,35 @@
}
}
+
class SnowballPorterFilter extends TokenFilter {
private final CharArraySet protWords;
- private SnowballProgram stemmer;
+ private final SnowballProgram stemmer;
+ private final TermAttribute termAtt;
public SnowballPorterFilter(TokenStream source, SnowballProgram stemmer, CharArraySet protWords) {
super(source);
this.protWords = protWords;
this.stemmer = stemmer;
+ this.termAtt = (TermAttribute)addAttribute(TermAttribute.class);
}
-
- /**
- * the original code from lucene sandbox
- * public final Token next() throws IOException {
- * Token token = input.next();
- * if (token == null)
- * return null;
- * stemmer.setCurrent(token.termText());
- * try {
- * stemMethod.invoke(stemmer, EMPTY_ARGS);
- * } catch (Exception e) {
- * throw new RuntimeException(e.toString());
- * }
- * return new Token(stemmer.getCurrent(),
- * token.startOffset(), token.endOffset(), token.type());
- * }
- */
-
@Override
- public Token next(Token token) throws IOException {
- Token result = input.next(token);
- if (result != null) {
- char[] termBuffer = result.termBuffer();
- int len = result.termLength();
- // if protected, don't stem. use this to avoid stemming collisions.
- if (protWords != null && protWords.contains(termBuffer, 0, len)) {
- return result;
- }
- stemmer.setCurrent(new String(termBuffer, 0, len));//ugh, wish the Stemmer took a char array
- stemmer.stem();
- String newstr = stemmer.getCurrent();
- result.setTermBuffer(newstr.toCharArray(), 0, newstr.length());
+ public boolean incrementToken() throws IOException {
+ if (!input.incrementToken()) return false;
+
+ char[] termBuffer = termAtt.termBuffer();
+ int len = termAtt.termLength();
+ // if protected, don't stem. use this to avoid stemming collisions.
+ if (protWords != null && protWords.contains(termBuffer, 0, len)) {
+ return true;
}
- return result;
+
+ stemmer.setCurrent(new String(termBuffer, 0, len));//ugh, wish the Stemmer took a char array
+ stemmer.stem();
+ String newstr = stemmer.getCurrent();
+ termAtt.setTermBuffer(newstr.toCharArray(), 0, newstr.length());
+
+ return true;
}
}
-
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/SolrAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/SolrAnalyzer.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/SolrAnalyzer.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/SolrAnalyzer.java Sun Aug 16 17:28:58 2009
@@ -17,7 +17,10 @@
package org.apache.solr.analysis;
-import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.*;
+
+import java.io.Reader;
+import java.io.IOException;
/**
* @version $Id$
@@ -32,4 +35,45 @@
public int getPositionIncrementGap(String fieldName) {
return posIncGap;
}
+
+ /** wrap the reader in a CharStream, if appropriate */
+ public Reader charStream(Reader reader){
+ return reader;
+ }
+
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return getStream(fieldName, reader).getTokenStream();
+ }
+
+ public static class TokenStreamInfo {
+ private final Tokenizer tokenizer;
+ private final TokenStream tokenStream;
+ public TokenStreamInfo(Tokenizer tokenizer, TokenStream tokenStream) {
+ this.tokenizer = tokenizer;
+ this.tokenStream = tokenStream;
+ }
+ public Tokenizer getTokenizer() { return tokenizer; }
+ public TokenStream getTokenStream() { return tokenStream; }
+ }
+
+
+ public abstract TokenStreamInfo getStream(String fieldName, Reader reader);
+
+ @Override
+ public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+ // if (true) return tokenStream(fieldName, reader);
+ TokenStreamInfo tsi = (TokenStreamInfo)getPreviousTokenStream();
+ if (tsi != null) {
+ tsi.getTokenizer().reset(charStream(reader));
+ // the consumer will currently call reset() on the TokenStream to hit all the filters.
+ // this isn't necessarily guaranteed by the APIs... but is currently done
+ // by lucene indexing in DocInverterPerField, and in the QueryParser
+ return tsi.getTokenStream();
+ } else {
+ tsi = getStream(fieldName, reader);
+ setPreviousTokenStream(tsi);
+ return tsi.getTokenStream();
+ }
+ }
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/SynonymFilter.java Sun Aug 16 17:28:58 2009
@@ -205,4 +205,9 @@
return result;
}
+ @Override
+ public void reset() throws IOException {
+ input.reset();
+ replacement = null;
+ }
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenizerChain.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenizerChain.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenizerChain.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/TokenizerChain.java Sun Aug 16 17:28:58 2009
@@ -20,8 +20,10 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.CharReader;
+import org.apache.lucene.analysis.Tokenizer;
import java.io.Reader;
+import java.io.IOException;
/**
* @version $Id$
@@ -50,23 +52,14 @@
public TokenizerFactory getTokenizerFactory() { return tokenizer; }
public TokenFilterFactory[] getTokenFilterFactories() { return filters; }
- public Reader charStream(Reader reader){
- if( charFilters != null && charFilters.length > 0 ){
- CharStream cs = CharReader.get( reader );
- for (int i=0; i<charFilters.length; i++) {
- cs = charFilters[i].create(cs);
- }
- reader = cs;
- }
- return reader;
- }
-
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream ts = tokenizer.create(charStream(reader));
+ @Override
+ public TokenStreamInfo getStream(String fieldName, Reader reader) {
+ Tokenizer tk = (Tokenizer)tokenizer.create(charStream(reader));
+ TokenStream ts = tk;
for (int i=0; i<filters.length; i++) {
ts = filters[i].create(ts);
}
- return ts;
+ return new TokenStreamInfo(tk,ts);
}
public String toString() {
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java Sun Aug 16 17:28:58 2009
@@ -18,6 +18,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.NumericTokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.DateField;
import static org.apache.solr.schema.TrieField.TrieTypes;
@@ -38,7 +39,6 @@
* @since solr 1.4
*/
public class TrieTokenizerFactory extends BaseTokenizerFactory {
- protected static final DateField dateField = new DateField();
protected final int precisionStep;
protected final TrieTypes type;
@@ -48,28 +48,71 @@
}
public TokenStream create(Reader input) {
- try {
- StringBuilder builder = new StringBuilder();
- char[] buf = new char[8];
- int len;
- while ((len = input.read(buf)) != -1)
- builder.append(buf, 0, len);
+ return new TrieTokenizer(input, type, precisionStep, TrieTokenizer.getNumericTokenStream(precisionStep));
+ }
+}
+
+class TrieTokenizer extends Tokenizer {
+ protected static final DateField dateField = new DateField();
+ protected final int precisionStep;
+ protected final TrieTypes type;
+ protected final NumericTokenStream ts;
+
+ static NumericTokenStream getNumericTokenStream(int precisionStep) {
+ return new NumericTokenStream(precisionStep);
+ }
+
+ public TrieTokenizer(Reader input, TrieTypes type, int precisionStep, NumericTokenStream ts) {
+ // must share the attribute source with the NumericTokenStream we delegate to
+ super(ts);
+ this.type = type;
+ this.precisionStep = precisionStep;
+ this.ts = ts;
+
+ try {
+ reset(input);
+ } catch (IOException e) {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to create TrieIndexTokenizer", e);
+ }
+ }
+
+ @Override
+ public void reset(Reader input) throws IOException {
+ try {
+ super.reset(input);
+ input = super.input;
+ char[] buf = new char[32];
+ int len = input.read(buf);
+ String v = new String(buf, 0, len);
switch (type) {
case INTEGER:
- return new NumericTokenStream(precisionStep).setIntValue(Integer.parseInt(builder.toString()));
+ ts.setIntValue(Integer.parseInt(v));
+ break;
case FLOAT:
- return new NumericTokenStream(precisionStep).setFloatValue(Float.parseFloat(builder.toString()));
+ ts.setFloatValue(Float.parseFloat(v));
+ break;
case LONG:
- return new NumericTokenStream(precisionStep).setLongValue(Long.parseLong(builder.toString()));
+ ts.setLongValue(Long.parseLong(v));
+ break;
case DOUBLE:
- return new NumericTokenStream(precisionStep).setDoubleValue(Double.parseDouble(builder.toString()));
+ ts.setDoubleValue(Double.parseDouble(v));
+ break;
case DATE:
- return new NumericTokenStream(precisionStep).setLongValue(dateField.parseMath(null, builder.toString()).getTime());
+ ts.setLongValue(dateField.parseMath(null, v).getTime());
+ break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field");
}
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to create TrieIndexTokenizer", e);
}
+
+ ts.reset();
}
-}
+
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ return ts.incrementToken();
+ }
+}
\ No newline at end of file
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/TrimFilter.java Sun Aug 16 17:28:58 2009
@@ -20,6 +20,8 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import java.io.IOException;
@@ -31,20 +33,24 @@
public final class TrimFilter extends TokenFilter {
final boolean updateOffsets;
+ private final TermAttribute termAtt;
+ private final OffsetAttribute offsetAtt;
+
public TrimFilter(TokenStream in, boolean updateOffsets) {
super(in);
this.updateOffsets = updateOffsets;
+
+ this.termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
}
@Override
- public final Token next(Token in) throws IOException {
- Token t = input.next(in);
- if (null == t || null == t.termBuffer() || t.termLength() == 0){
- return t;
- }
- char[] termBuffer = t.termBuffer();
- int len = t.termLength();
+ public boolean incrementToken() throws IOException {
+ if (!input.incrementToken()) return false;
+
+ char[] termBuffer = termAtt.termBuffer();
+ int len = termAtt.termLength();
int start = 0;
int end = 0;
int endOff = 0;
@@ -59,24 +65,17 @@
}
if (start > 0 || end < len) {
if (start < end) {
- t.setTermBuffer(t.termBuffer(), start, (end - start));
+ termAtt.setTermBuffer(termBuffer, start, (end - start));
} else {
- t.setTermLength(0);
+ termAtt.setTermLength(0);
}
if (updateOffsets) {
- t.setStartOffset(t.startOffset() + start);
- if (start < end) {
- t.setEndOffset(t.endOffset() - endOff);
- } //else if end is less than, start, then the term length is 0, so, no need to bother w/ the end offset
+ int newStart = offsetAtt.startOffset()+start;
+ int newEnd = offsetAtt.endOffset() - (start<end ? endOff:0);
+ offsetAtt.setOffset(newStart, newEnd);
}
- /*t = new Token( t.termText().substring( start, end ),
- t.startOffset()+start,
- t.endOffset()-endOff,
- t.type() );*/
-
-
}
- return t;
+ return true;
}
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/WordDelimiterFilter.java Sun Aug 16 17:28:58 2009
@@ -657,6 +657,12 @@
}
}
+ @Override
+ public void reset() throws IOException {
+ input.reset();
+ queuePos=0;
+ queue.clear();
+ }
// questions:
// negative numbers? -42 indexed as just 42?
Modified: lucene/solr/trunk/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java Sun Aug 16 17:28:58 2009
@@ -24,6 +24,7 @@
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.common.SolrException;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
@@ -68,7 +69,14 @@
Analyzer analyzer = context.getAnalyzer();
if (!TokenizerChain.class.isInstance(analyzer)) {
- TokenStream tokenStream = analyzer.tokenStream(context.getFieldName(), new StringReader(value));
+
+ TokenStream tokenStream = null;
+ try {
+ tokenStream = analyzer.reusableTokenStream(context.getFieldName(), new StringReader(value));
+ tokenStream.reset();
+ } catch (IOException e) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
+ }
NamedList<List<NamedList>> namedList = new SimpleOrderedMap<List<NamedList>>();
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context));
return namedList;
Modified: lucene/solr/trunk/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/component/QueryElevationComponent.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/component/QueryElevationComponent.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/component/QueryElevationComponent.java Sun Aug 16 17:28:58 2009
@@ -294,7 +294,9 @@
return query;
}
StringBuilder norm = new StringBuilder();
- TokenStream tokens = analyzer.tokenStream( null, new StringReader( query ) );
+ TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) );
+ tokens.reset();
+
Token token = tokens.next();
while( token != null ) {
norm.append( new String(token.termBuffer(), 0, token.termLength()) );
Modified: lucene/solr/trunk/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/component/SpellCheckComponent.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/component/SpellCheckComponent.java Sun Aug 16 17:28:58 2009
@@ -160,7 +160,8 @@
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
Collection<Token> result = new ArrayList<Token>();
Token token = null;
- TokenStream ts = analyzer.tokenStream("", new StringReader(q));
+ TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
+ ts.reset();
while ((token = ts.next()) != null){
result.add(token);
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java Sun Aug 16 17:28:58 2009
@@ -286,7 +286,9 @@
}
catch (IllegalArgumentException e) {
// fall back to anaylzer
- tstream = new TokenOrderingFilter(schema.getAnalyzer().tokenStream(fieldName, new StringReader(docTexts[j])), 10);
+ TokenStream ts = schema.getAnalyzer().reusableTokenStream(fieldName, new StringReader(docTexts[j]));
+ ts.reset();
+ tstream = new TokenOrderingFilter(ts, 10);
}
Highlighter highlighter;
Modified: lucene/solr/trunk/src/java/org/apache/solr/schema/BoolField.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/schema/BoolField.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/schema/BoolField.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/schema/BoolField.java Sun Aug 16 17:28:58 2009
@@ -24,6 +24,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Fieldable;
import org.apache.solr.request.XMLWriter;
import org.apache.solr.request.TextResponseWriter;
@@ -48,28 +49,43 @@
}
// avoid instantiating every time...
- protected final static Token TRUE_TOKEN = new Token("T",0,1);
- protected final static Token FALSE_TOKEN = new Token("F",0,1);
+ protected final static char[] TRUE_TOKEN = {'T'};
+ protected final static char[] FALSE_TOKEN = {'F'};
////////////////////////////////////////////////////////////////////////
// TODO: look into creating my own queryParser that can more efficiently
// handle single valued non-text fields (int,bool,etc) if needed.
-
protected final static Analyzer boolAnalyzer = new SolrAnalyzer() {
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new Tokenizer(reader) {
- boolean done=false;
- public Token next() throws IOException {
- if (done) return null;
- done=true;
- int ch = input.read();
- if (ch==-1) return null;
- return (ch=='t' || ch=='T' || ch=='1') ? TRUE_TOKEN : FALSE_TOKEN;
- }
- };
- }
- };
+ public TokenStreamInfo getStream(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new Tokenizer(reader) {
+ final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ boolean done = false;
+
+ @Override
+ public void reset(Reader input) throws IOException {
+ done = false;
+ super.reset(input);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ clearAttributes();
+ if (done) return false;
+ done = true;
+ int ch = input.read();
+ if (ch==-1) return false;
+ termAtt.setTermBuffer(
+ ((ch=='t' || ch=='T' || ch=='1') ? TRUE_TOKEN : FALSE_TOKEN)
+ ,0,1);
+ return true;
+ }
+ };
+
+ return new TokenStreamInfo(tokenizer, tokenizer);
+ }
+ };
+
public Analyzer getAnalyzer() {
return boolAnalyzer;
Modified: lucene/solr/trunk/src/java/org/apache/solr/schema/FieldType.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/schema/FieldType.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/schema/FieldType.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/schema/FieldType.java Sun Aug 16 17:28:58 2009
@@ -23,6 +23,8 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
@@ -286,55 +288,38 @@
return toInternal(val);
}
- /*********
- // default analyzer for non-text fields.
- // Only reads 80 bytes, but that should be plenty for a single value.
- public Analyzer getAnalyzer() {
- if (analyzer != null) return analyzer;
-
- // the default analyzer...
- return new Analyzer() {
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new Tokenizer(reader) {
- final char[] cbuf = new char[80];
- public Token next() throws IOException {
- int n = input.read(cbuf,0,80);
- if (n<=0) return null;
- String s = toInternal(new String(cbuf,0,n));
- return new Token(s,0,n);
- };
- };
- }
- };
- }
- **********/
-
-
/**
* Default analyzer for types that only produce 1 verbatim token...
* A maximum size of chars to be read must be specified
*/
- protected final class DefaultAnalyzer extends SolrAnalyzer {
+ protected class DefaultAnalyzer extends SolrAnalyzer {
final int maxChars;
DefaultAnalyzer(int maxChars) {
this.maxChars=maxChars;
}
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new Tokenizer(reader) {
- char[] cbuf = new char[maxChars];
- public Token next() throws IOException {
+ public TokenStreamInfo getStream(String fieldName, Reader reader) {
+ Tokenizer ts = new Tokenizer(reader) {
+ final char[] cbuf = new char[maxChars];
+ final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ final OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+ @Override
+ public boolean incrementToken() throws IOException {
+ clearAttributes();
int n = input.read(cbuf,0,maxChars);
- if (n<=0) return null;
- String s = toInternal(new String(cbuf,0,n)); // virtual func on parent
- return new Token(s,0,n);
- };
+ if (n<=0) return false;
+ String s = toInternal(new String(cbuf,0,n));
+ termAtt.setTermBuffer(s);
+ offsetAtt.setOffset(0,n);
+ return true;
+ }
};
+
+ return new TokenStreamInfo(ts, ts);
}
}
-
/**
* Analyzer set by schema for text types to use when indexing fields
* of this type, subclasses can set analyzer themselves or override
Modified: lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java Sun Aug 16 17:28:58 2009
@@ -360,6 +360,11 @@
}
@Override
+ public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+ return getAnalyzer(fieldName).reusableTokenStream(fieldName,reader);
+ }
+
+ @Override
public int getPositionIncrementGap(String fieldName) {
return getAnalyzer(fieldName).getPositionIncrementGap(fieldName);
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/search/FieldQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/FieldQParserPlugin.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/FieldQParserPlugin.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/FieldQParserPlugin.java Sun Aug 16 17:28:58 2009
@@ -24,6 +24,7 @@
import org.apache.lucene.search.*;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.SolrException;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.TextField;
@@ -65,7 +66,13 @@
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
- TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
+ TokenStream source = null;
+ try {
+ source = analyzer.reusableTokenStream(field, new StringReader(queryText));
+ source.reset();
+ } catch (IOException e) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
+ }
ArrayList<Token> lst = new ArrayList<Token>();
Token t;
int positionCount = 0;
Modified: lucene/solr/trunk/src/test/org/apache/solr/update/TestIndexingPerformance.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/update/TestIndexingPerformance.java?rev=804726&r1=804725&r2=804726&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/update/TestIndexingPerformance.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/update/TestIndexingPerformance.java Sun Aug 16 17:28:58 2009
@@ -22,8 +22,10 @@
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.util.AbstractSolrTestCase;
+import org.apache.solr.common.util.StrUtils;
import java.io.IOException;
+import java.util.Arrays;
/** Bypass the normal Solr pipeline and just text indexing performance
* starting at the update handler. The same document is indexed repeatedly.
@@ -39,6 +41,12 @@
int iter=1000;
String iterS = System.getProperty("iter");
if (iterS != null) iter=Integer.parseInt(iterS);
+ boolean includeDoc = Boolean.parseBoolean(System.getProperty("includeDoc","true")); // include the time to create the document
+ String doc = System.getProperty("doc");
+ if (doc != null) {
+ StrUtils.splitSmart(doc,",",true);
+ }
+
SolrQueryRequest req = lrf.makeRequest();
IndexSchema schema = req.getSchema();
@@ -53,23 +61,43 @@
,"text","just how fast is this text indexing?"
};
- Document ldoc = new Document();
- for (int i=0; i<fields.length; i+=2) {
- String field = fields[i];
- String val = fields[i+1];
- Field f = schema.getField(field).createField(val, 1.0f);
- ldoc.add(f);
- }
+
+ /***
+ String[] fields = {
+ "a_i","1"
+ ,"b_i","2"
+ ,"c_i","3"
+ ,"d_i","4"
+ ,"e_i","5"
+ ,"f_i","6"
+ ,"g_i","7"
+ ,"h_i","8"
+ ,"i_i","9"
+ ,"j_i","0"
+ ,"k_i","0"
+ };
+ ***/
+
+ long start = System.currentTimeMillis();
AddUpdateCommand add = new AddUpdateCommand();
add.allowDups = true;
- add.doc = ldoc;
- long start = System.currentTimeMillis();
+
for (int i=0; i<iter; i++) {
+ if (includeDoc || add.doc==null) {
+ add.doc = new Document();
+ for (int j=0; j<fields.length; j+=2) {
+ String field = fields[j];
+ String val = fields[j+1];
+ Field f = schema.getField(field).createField(val, 1.0f);
+ add.doc.add(f);
+ }
+ }
updateHandler.addDoc(add);
}
long end = System.currentTimeMillis();
+ System.out.println("includeDoc="+includeDoc+" doc="+ Arrays.toString(fields));
System.out.println("iter="+iter +" time=" + (end-start) + " throughput=" + ((long)iter*1000)/(end-start));
//discard all the changes