You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/20 20:52:08 UTC
svn commit: r1061480 [5/5] - in /lucene/dev/branches/bulkpostings: ./
dev-tools/idea/.idea/copyright/ lucene/ lucene/contrib/
lucene/contrib/demo/src/java/org/apache/lucene/demo/
lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/ l...
Modified: lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/Grouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/Grouping.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/Grouping.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/Grouping.java Thu Jan 20 19:52:03 2011
@@ -162,7 +162,7 @@ public class Grouping {
// if we aren't going to return any groups, disregard the offset
if (numGroups == 0) maxGroupToFind = 0;
- collector = new TopGroupCollector(groupBy, context, normalizeSort(sort), maxGroupToFind);
+ collector = new TopGroupCollector(groupBy, context, searcher.weightSort(normalizeSort(sort)), maxGroupToFind);
/*** if we need a different algorithm when sort != group.sort
if (compareSorts(sort, groupSort)) {
@@ -185,9 +185,9 @@ public class Grouping {
int collectorOffset = format==Format.Simple ? 0 : offset;
if (groupBy instanceof StrFieldSource) {
- collector2 = new Phase2StringGroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, collectorOffset);
+ collector2 = new Phase2StringGroupCollector(collector, groupBy, context, searcher.weightSort(groupSort), docsToCollect, needScores, collectorOffset);
} else {
- collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, collectorOffset);
+ collector2 = new Phase2GroupCollector(collector, groupBy, context, searcher.weightSort(groupSort), docsToCollect, needScores, collectorOffset);
}
return collector2;
}
@@ -306,11 +306,11 @@ public class Grouping {
return v;
}
- static TopDocsCollector newCollector(Sort sort, int numHits, boolean fillFields, boolean needScores) throws IOException {
+ TopDocsCollector newCollector(Sort sort, int numHits, boolean fillFields, boolean needScores) throws IOException {
if (sort==null || sort==byScoreDesc) {
return TopScoreDocCollector.create(numHits, true);
} else {
- return TopFieldCollector.create(sort, numHits, false, needScores, needScores, true);
+ return TopFieldCollector.create(searcher.weightSort(sort), numHits, false, needScores, needScores, true);
}
}
@@ -505,12 +505,12 @@ class TopGroupCollector extends GroupCol
int matches;
- public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort sort, int nGroups) throws IOException {
+ public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort weightedSort, int nGroups) throws IOException {
this.vs = groupByVS;
this.context = vsContext;
this.nGroups = nGroups = Math.max(1,nGroups); // we need a minimum of 1 for this collector
- SortField[] sortFields = sort.getSort();
+ SortField[] sortFields = weightedSort.getSort();
this.comparators = new FieldComparator[sortFields.length];
this.reversed = new int[sortFields.length];
for (int i = 0; i < sortFields.length; i++) {
@@ -719,7 +719,7 @@ class Phase2GroupCollector extends Colle
int docBase;
// TODO: may want to decouple from the phase1 collector
- public Phase2GroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort sort, int docsPerGroup, boolean getScores, int offset) throws IOException {
+ public Phase2GroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort weightedSort, int docsPerGroup, boolean getScores, int offset) throws IOException {
boolean getSortFields = false;
if (topGroups.orderedGroups == null)
@@ -733,10 +733,10 @@ class Phase2GroupCollector extends Colle
}
SearchGroupDocs groupDocs = new SearchGroupDocs();
groupDocs.groupValue = group.groupValue;
- if (sort==null)
+ if (weightedSort==null)
groupDocs.collector = TopScoreDocCollector.create(docsPerGroup, true);
else
- groupDocs.collector = TopFieldCollector.create(sort, docsPerGroup, getSortFields, getScores, getScores, true);
+ groupDocs.collector = TopFieldCollector.create(weightedSort, docsPerGroup, getSortFields, getScores, getScores, true);
groupMap.put(groupDocs.groupValue, groupDocs);
}
@@ -791,8 +791,8 @@ class Phase2StringGroupCollector extends
final SearchGroupDocs[] groups;
final BytesRef spare = new BytesRef();
- public Phase2StringGroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort sort, int docsPerGroup, boolean getScores, int offset) throws IOException {
- super(topGroups, groupByVS, vsContext,sort,docsPerGroup,getScores,offset);
+ public Phase2StringGroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort weightedSort, int docsPerGroup, boolean getScores, int offset) throws IOException {
+ super(topGroups, groupByVS, vsContext,weightedSort,docsPerGroup,getScores,offset);
ordSet = new SentinelIntSet(groupMap.size(), -1);
groups = new SearchGroupDocs[ordSet.keys.length];
}
Modified: lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/SolrConstantScoreQuery.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/SolrConstantScoreQuery.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/SolrConstantScoreQuery.java Thu Jan 20 19:52:03 2011
@@ -61,7 +61,7 @@ public class SolrConstantScoreQuery exte
private Map context;
public ConstantWeight(IndexSearcher searcher) throws IOException {
- this.similarity = getSimilarity(searcher);
+ this.similarity = searcher.getSimilarity();
this.context = ValueSource.newContext(searcher);
if (filter instanceof SolrFilter)
((SolrFilter)filter).createWeight(context, searcher);
Modified: lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java Thu Jan 20 19:52:03 2011
@@ -481,6 +481,30 @@ public class SolrIndexSearcher extends I
return fieldValueCache;
}
+ /** Returns a weighted sort according to this searcher */
+ public Sort weightSort(Sort sort) throws IOException {
+ if (sort == null) return null;
+ SortField[] sorts = sort.getSort();
+
+ boolean needsWeighting = false;
+ for (SortField sf : sorts) {
+ if (sf instanceof SolrSortField) {
+ needsWeighting = true;
+ break;
+ }
+ }
+ if (!needsWeighting) return sort;
+
+ SortField[] newSorts = Arrays.copyOf(sorts, sorts.length);
+ for (int i=0; i<newSorts.length; i++) {
+ if (newSorts[i] instanceof SolrSortField) {
+ newSorts[i] = ((SolrSortField)newSorts[i]).weight(this);
+ }
+ }
+
+ return new Sort(newSorts);
+ }
+
/**
* Returns the first document number containing the term <code>t</code>
@@ -1166,7 +1190,7 @@ public class SolrIndexSearcher extends I
if (cmd.getSort() == null) {
topCollector = TopScoreDocCollector.create(len, true);
} else {
- topCollector = TopFieldCollector.create(cmd.getSort(), len, false, needScores, needScores, true);
+ topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true);
}
Collector collector = topCollector;
if( timeAllowed > 0 ) {
@@ -1276,7 +1300,7 @@ public class SolrIndexSearcher extends I
if (cmd.getSort() == null) {
topCollector = TopScoreDocCollector.create(len, true);
} else {
- topCollector = TopFieldCollector.create(cmd.getSort(), len, false, needScores, needScores, true);
+ topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true);
}
DocSetCollector setCollector = new DocSetDelegateCollector(smallSetSize, maxDoc, topCollector);
@@ -1558,7 +1582,7 @@ public class SolrIndexSearcher extends I
// bit of a hack to tell if a set is sorted - do it better in the futute.
boolean inOrder = set instanceof BitDocSet || set instanceof SortedIntDocSet;
- TopDocsCollector topCollector = TopFieldCollector.create(sort, nDocs, false, false, false, inOrder);
+ TopDocsCollector topCollector = TopFieldCollector.create(weightSort(sort), nDocs, false, false, false, inOrder);
DocIterator iter = set.iterator();
int base=0;
Modified: lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/BoostedQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/BoostedQuery.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/BoostedQuery.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/BoostedQuery.java Thu Jan 20 19:52:03 2011
@@ -96,7 +96,7 @@ public class BoostedQuery extends Query
if(subQueryScorer == null) {
return null;
}
- return new BoostedQuery.CustomScorer(getSimilarity(searcher), context, this, subQueryScorer, boostVal);
+ return new BoostedQuery.CustomScorer(searcher.getSimilarity(), context, this, subQueryScorer, boostVal);
}
@Override
Modified: lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/FunctionQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/FunctionQuery.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/FunctionQuery.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/FunctionQuery.java Thu Jan 20 19:52:03 2011
@@ -95,7 +95,7 @@ public class FunctionQuery extends Query
@Override
public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException {
- return new AllScorer(getSimilarity(searcher), context, this);
+ return new AllScorer(searcher.getSimilarity(), context, this);
}
@Override
Modified: lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/ValueSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/ValueSource.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/ValueSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/ValueSource.java Thu Jan 20 19:52:03 2011
@@ -26,12 +26,13 @@ import org.apache.lucene.search.IndexSea
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.Bits;
import org.apache.lucene.index.MultiFields;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.search.SolrSortField;
import java.io.IOException;
import java.io.Serializable;
import java.util.IdentityHashMap;
import java.util.Map;
-import java.util.Collections;
/**
* Instantiates {@link org.apache.solr.search.function.DocValues} for a particular reader.
@@ -61,24 +62,6 @@ public abstract class ValueSource implem
return description();
}
- /**
- * EXPERIMENTAL: This method is subject to change.
- * <br>WARNING: Sorted function queries are not currently weighted.
- * <p>
- * Get the SortField for this ValueSource. Uses the {@link #getValues(java.util.Map, AtomicReaderContext)}
- * to populate the SortField.
- *
- * @param reverse true if this is a reverse sort.
- * @return The {@link org.apache.lucene.search.SortField} for the ValueSource
- * @throws IOException if there was a problem reading the values.
- */
- public SortField getSortField(boolean reverse) throws IOException {
- //should we pass in the description for the field name?
- //Hmm, Lucene is going to intern whatever we pass in, not sure I like that
- //and we can't pass in null, either, as that throws an illegal arg. exception
- return new SortField(description(), new ValueSourceComparatorSource(), reverse);
- }
-
/**
* Implementations should propagate createWeight to sub-ValueSources which can optionally store
@@ -97,16 +80,56 @@ public abstract class ValueSource implem
return context;
}
- class ValueSourceComparatorSource extends FieldComparatorSource {
+ //
+ // Sorting by function
+ //
- public ValueSourceComparatorSource() {
+ /**
+ * EXPERIMENTAL: This method is subject to change.
+ * <br>WARNING: Sorted function queries are not currently weighted.
+ * <p>
+ * Get the SortField for this ValueSource. Uses the {@link #getValues(java.util.Map, AtomicReaderContext)}
+ * to populate the SortField.
+ *
+ * @param reverse true if this is a reverse sort.
+ * @return The {@link org.apache.lucene.search.SortField} for the ValueSource
+ * @throws IOException if there was a problem reading the values.
+ */
+ public SortField getSortField(boolean reverse) throws IOException {
+ return new ValueSourceSortField(reverse);
+ }
+
+ private static FieldComparatorSource dummyComparator = new FieldComparatorSource() {
+ @Override
+ public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unweighted use of sort " + fieldname);
+ }
+ };
+
+ class ValueSourceSortField extends SortField implements SolrSortField {
+ public ValueSourceSortField(boolean reverse) {
+ super(description(), dummyComparator, reverse);
+ }
+
+ @Override
+ public SortField weight(IndexSearcher searcher) throws IOException {
+ Map context = newContext(searcher);
+ createWeight(context, searcher);
+ return new SortField(getField(), new ValueSourceComparatorSource(context), getReverse());
+ }
+ }
+
+ class ValueSourceComparatorSource extends FieldComparatorSource {
+ private final Map context;
+ public ValueSourceComparatorSource(Map context) {
+ this.context = context;
}
public FieldComparator newComparator(String fieldname, int numHits,
int sortPos, boolean reversed) throws IOException {
- return new ValueSourceComparator(numHits);
+ return new ValueSourceComparator(context, numHits);
}
}
@@ -119,8 +142,10 @@ public abstract class ValueSource implem
private final double[] values;
private DocValues docVals;
private double bottom;
+ private Map fcontext;
- ValueSourceComparator(int numHits) {
+ ValueSourceComparator(Map fcontext, int numHits) {
+ this.fcontext = fcontext;
values = new double[numHits];
}
@@ -153,7 +178,7 @@ public abstract class ValueSource implem
}
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
- docVals = getValues(Collections.emptyMap(), context);
+ docVals = getValues(fcontext, context);
return this;
}
@@ -162,7 +187,7 @@ public abstract class ValueSource implem
}
public Comparable value(int slot) {
- return Double.valueOf(values[slot]);
+ return values[slot];
}
}
}
Modified: lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java Thu Jan 20 19:52:03 2011
@@ -74,7 +74,6 @@ public class FileBasedSpellChecker exten
return null;
}
- @SuppressWarnings("unchecked")
private void loadExternalFileDictionary(SolrCore core) {
try {
@@ -92,7 +91,6 @@ public class FileBasedSpellChecker exten
new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, fieldType.getAnalyzer()).
setMaxBufferedDocs(150).
setMergePolicy(mp).
- setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH).
setOpenMode(IndexWriterConfig.OpenMode.CREATE)
);
Modified: lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/update/SolrIndexConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/update/SolrIndexConfig.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/update/SolrIndexConfig.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/update/SolrIndexConfig.java Thu Jan 20 19:52:03 2011
@@ -53,7 +53,6 @@ public class SolrIndexConfig {
maxMergeDocs = -1;
mergeFactor = -1;
ramBufferSizeMB = 16;
- maxFieldLength = -1;
writeLockTimeout = -1;
commitLockTimeout = -1;
lockType = null;
@@ -71,7 +70,6 @@ public class SolrIndexConfig {
public final double ramBufferSizeMB;
- public final int maxFieldLength;
public final int writeLockTimeout;
public final int commitLockTimeout;
public final String lockType;
@@ -95,7 +93,6 @@ public class SolrIndexConfig {
mergeFactor=solrConfig.getInt(prefix+"/mergeFactor",def.mergeFactor);
ramBufferSizeMB = solrConfig.getDouble(prefix+"/ramBufferSizeMB", def.ramBufferSizeMB);
- maxFieldLength=solrConfig.getInt(prefix+"/maxFieldLength",def.maxFieldLength);
writeLockTimeout=solrConfig.getInt(prefix+"/writeLockTimeout", def.writeLockTimeout);
commitLockTimeout=solrConfig.getInt(prefix+"/commitLockTimeout", def.commitLockTimeout);
lockType=solrConfig.get(prefix+"/lockType", def.lockType);
@@ -153,9 +150,6 @@ public class SolrIndexConfig {
if (termIndexInterval != -1)
iwc.setTermIndexInterval(termIndexInterval);
- if (maxFieldLength != -1)
- iwc.setMaxFieldLength(maxFieldLength);
-
if (writeLockTimeout != -1)
iwc.setWriteLockTimeout(writeLockTimeout);
Modified: lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema-copyfield-test.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema-copyfield-test.xml?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema-copyfield-test.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema-copyfield-test.xml Thu Jan 20 19:52:03 2011
@@ -202,13 +202,14 @@
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
Modified: lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema-required-fields.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema-required-fields.xml?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema-required-fields.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema-required-fields.xml Thu Jan 20 19:52:03 2011
@@ -193,13 +193,14 @@
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
Modified: lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema.xml?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema.xml Thu Jan 20 19:52:03 2011
@@ -236,13 +236,14 @@
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
Modified: lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema12.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema12.xml?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema12.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema12.xml Thu Jan 20 19:52:03 2011
@@ -252,13 +252,14 @@
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
@@ -286,14 +287,14 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
@@ -303,14 +304,14 @@
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.StopFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.StopFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
@@ -375,7 +376,7 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -384,7 +385,7 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
@@ -397,7 +398,7 @@
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" expand="true" />
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer>
</fieldtype>
Modified: lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/TestDistributedSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/TestDistributedSearch.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/TestDistributedSearch.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/TestDistributedSearch.java Thu Jan 20 19:52:03 2011
@@ -95,6 +95,7 @@ public class TestDistributedSearch exten
// these queries should be exactly ordered and scores should exactly match
query("q","*:*", "sort",i1+" desc");
+ query("q","*:*", "sort","{!func}add("+i1+",5)"+" desc");
query("q","*:*", "sort",i1+" asc");
query("q","*:*", "sort",i1+" desc", "fl","*,score");
query("q","*:*", "sort",tlong+" asc", "fl","score"); // test legacy behavior - "score"=="*,score"
Modified: lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java Thu Jan 20 19:52:03 2011
@@ -33,7 +33,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.ArrayList;
-import java.util.Collections;
public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase {
@@ -59,37 +58,6 @@ public class SnowballPorterFilterFactory
assertTokenStreamContents(stream, gold);
}
- /**
- * Tests the protected words mechanism of EnglishPorterFilterFactory
- */
- @Deprecated
- public void testProtectedOld() throws Exception {
- EnglishStemmer stemmer = new EnglishStemmer();
- String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
- String[] gold = new String[test.length];
- for (int i = 0; i < test.length; i++) {
- if (test[i].equals("fledgling") == false && test[i].equals("banks") == false) {
- stemmer.setCurrent(test[i]);
- stemmer.stem();
- gold[i] = stemmer.getCurrent();
- } else {
- gold[i] = test[i];
- }
- }
-
- EnglishPorterFilterFactory factory = new EnglishPorterFilterFactory();
- Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
- args.put(SnowballPorterFilterFactory.PROTECTED_TOKENS, "who-cares.txt");
- factory.init(args);
- List<String> lines = new ArrayList<String>();
- Collections.addAll(lines, "banks", "fledgling");
- factory.inform(new LinesMockSolrResourceLoader(lines));
- Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION,
- new StringReader(StrUtils.join(Arrays.asList(test), ' ')));
- TokenStream stream = factory.create(tokenizer);
- assertTokenStreamContents(stream, gold);
- }
-
class LinesMockSolrResourceLoader implements ResourceLoader {
List<String> lines;
Modified: lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java Thu Jan 20 19:52:03 2011
@@ -99,8 +99,7 @@ public class TestArbitraryIndexDir exten
Directory dir = newFSDirectory(newDir);
IndexWriter iw = new IndexWriter(
dir,
- new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)).
- setMaxFieldLength(1000)
+ new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40))
);
Document doc = new Document();
doc.add(new Field("id", "2", Field.Store.YES, Field.Index.ANALYZED));
Modified: lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/request/JSONWriterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/request/JSONWriterTest.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/request/JSONWriterTest.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/request/JSONWriterTest.java Thu Jan 20 19:52:03 2011
@@ -66,21 +66,6 @@ public class JSONWriterTest extends Solr
}
@Test
- public void testPHPS() throws IOException {
- SolrQueryRequest req = req("dummy");
- SolrQueryResponse rsp = new SolrQueryResponse();
- QueryResponseWriter w = new PHPSerializedResponseWriter();
-
- StringWriter buf = new StringWriter();
- rsp.add("data1", "hello");
- rsp.add("data2", 42);
- rsp.add("data3", true);
- w.write(buf, req, rsp);
- assertEquals(buf.toString(), "a:3:{s:5:\"data1\";s:5:\"hello\";s:5:\"data2\";i:42;s:5:\"data3\";b:1;}");
- req.close();
- }
-
- @Test
public void testJSON() throws IOException {
SolrQueryRequest req = req("wt","json","json.nl","arrarr");
SolrQueryResponse rsp = new SolrQueryResponse();
Modified: lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/request/SimpleFacetsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/request/SimpleFacetsTest.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/request/SimpleFacetsTest.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/request/SimpleFacetsTest.java Thu Jan 20 19:52:03 2011
@@ -169,6 +169,16 @@ public class SimpleFacetsTest extends So
,"//lst[@name='trait_s']/int[@name='Pig'][.='1']"
);
+ // test excluding main query
+ assertQ(req("q", "{!tag=main}id:43"
+ ,"facet", "true"
+ ,"facet.query", "{!key=foo}id:42"
+ ,"facet.query", "{!ex=main key=bar}id:42" // only matches when we exclude main query
+ )
+ ,"//lst[@name='facet_queries']/int[@name='foo'][.='0']"
+ ,"//lst[@name='facet_queries']/int[@name='bar'][.='1']"
+ );
+
assertQ("check counts for applied facet queries using filtering (fq)",
req("q", "id:[42 TO 47]"
,"facet", "true"
Modified: lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/search/TestSort.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/search/TestSort.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/search/TestSort.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/search/TestSort.java Thu Jan 20 19:52:03 2011
@@ -63,8 +63,7 @@ public class TestSort extends AbstractSo
IndexWriter iw = new IndexWriter(
dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT)).
- setOpenMode(IndexWriterConfig.OpenMode.CREATE).
- setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH)
+ setOpenMode(IndexWriterConfig.OpenMode.CREATE)
);
final MyDoc[] mydocs = new MyDoc[ndocs];
Modified: lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java Thu Jan 20 19:52:03 2011
@@ -326,17 +326,18 @@ public class TestFunctionQuery extends S
assertU(adoc("id",""+i, "text","batman"));
}
assertU(commit());
- assertU(adoc("id","120", "text","batman superman")); // in a segment by itself
+ assertU(adoc("id","120", "text","batman superman")); // in a smaller segment
+ assertU(adoc("id","121", "text","superman"));
assertU(commit());
- // batman and superman have the same idf in single-doc segment, but very different in the complete index.
+ // superman has a higher df (thus lower idf) in one segment, but reversed in the complete index
String q ="{!func}query($qq)";
String fq="id:120";
assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'1.0'");
assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'1.0'");
// test weighting through a function range query
- assertQ(req("fl","*,score", "q", "{!frange l=1 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']");
+ assertQ(req("fl","*,score", "fq",fq, "q", "{!frange l=1 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']");
// test weighting through a complex function
q ="{!func}sub(div(sum(0.0,product(1,query($qq))),1),0)";
@@ -360,6 +361,14 @@ public class TestFunctionQuery extends S
// OK
}
+ // test that sorting by function weights correctly. superman should sort higher than batman due to idf of the whole index
+
+ assertQ(req("q", "*:*", "fq","id:120 OR id:121", "sort","{!func v=$sortfunc} desc", "sortfunc","query($qq)", "qq","text:(batman OR superman)")
+ ,"*//doc[1]/float[.='120.0']"
+ ,"*//doc[2]/float[.='121.0']"
+ );
+
+
purgeFieldCache(FieldCache.DEFAULT); // avoid FC insanity
}
Modified: lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java (original)
+++ lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java Thu Jan 20 19:52:03 2011
@@ -284,8 +284,7 @@ public class IndexBasedSpellCheckerTest
Directory dir = newFSDirectory(altIndexDir);
IndexWriter iw = new IndexWriter(
dir,
- new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).
- setMaxFieldLength(IndexWriterConfig.UNLIMITED_FIELD_LENGTH)
+ new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
);
for (int i = 0; i < ALT_DOCS.length; i++) {
Document doc = new Document();
Modified: lucene/dev/branches/bulkpostings/solr/src/webapp/web/admin/analysis.jsp
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/webapp/web/admin/analysis.jsp?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/webapp/web/admin/analysis.jsp (original)
+++ lucene/dev/branches/bulkpostings/solr/src/webapp/web/admin/analysis.jsp Thu Jan 20 19:52:03 2011
@@ -24,6 +24,7 @@
org.apache.lucene.analysis.CharReader,
org.apache.lucene.analysis.CharStream,
org.apache.lucene.analysis.tokenattributes.*,
+ org.apache.lucene.util.AttributeReflector,
org.apache.solr.analysis.CharFilterFactory,
org.apache.solr.analysis.TokenFilterFactory,
org.apache.solr.analysis.TokenizerChain,
@@ -31,7 +32,8 @@
org.apache.solr.schema.FieldType,
org.apache.solr.schema.SchemaField,
org.apache.solr.common.util.XML,
- javax.servlet.jsp.JspWriter,java.io.IOException
+ javax.servlet.jsp.JspWriter,java.io.IOException,
+ org.apache.noggit.CharArr
"%>
<%@ page import="java.io.Reader"%>
<%@ page import="java.io.StringReader"%>
@@ -39,8 +41,6 @@
<%@ page import="java.math.BigInteger" %>
<%-- $Id$ --%>
-<%-- $Source: /cvs/main/searching/org.apache.solrolarServer/resources/admin/analysis.jsp,v $ --%>
-<%-- $Name: $ --%>
<%@include file="header.jsp" %>
@@ -71,19 +71,19 @@
<table>
<tr>
<td>
- <strong>Field
+ <strong>Field
<select name="nt">
- <option <%= nt.equals("name") ? "selected=\"selected\"" : "" %> >name</option>
- <option <%= nt.equals("type") ? "selected=\"selected\"" : "" %>>type</option>
+ <option <%= nt.equals("name") ? "selected=\"selected\"" : "" %> >name</option>
+ <option <%= nt.equals("type") ? "selected=\"selected\"" : "" %>>type</option>
</select></strong>
</td>
<td>
- <input class="std" name="name" type="text" value="<% XML.escapeCharData(name, out); %>">
+ <input class="std" name="name" type="text" value="<% XML.escapeCharData(name, out); %>">
</td>
</tr>
<tr>
<td>
- <strong>Field value (Index)</strong>
+ <strong>Field value (Index)</strong>
<br/>
verbose output
<input name="verbose" type="checkbox"
@@ -94,19 +94,19 @@
<%= highlight ? "checked=\"true\"" : "" %> >
</td>
<td>
- <textarea class="std" rows="8" cols="70" name="val"><% XML.escapeCharData(val,out); %></textarea>
+ <textarea class="std" rows="8" cols="70" name="val"><% XML.escapeCharData(val,out); %></textarea>
</td>
</tr>
<tr>
<td>
- <strong>Field value (Query)</strong>
+ <strong>Field value (Query)</strong>
<br/>
verbose output
<input name="qverbose" type="checkbox"
<%= qverbose ? "checked=\"true\"" : "" %> >
</td>
<td>
- <textarea class="std" rows="1" cols="70" name="qval"><% XML.escapeCharData(qval,out); %></textarea>
+ <textarea class="std" rows="1" cols="70" name="qval"><% XML.escapeCharData(qval,out); %></textarea>
</td>
</tr>
<tr>
@@ -115,7 +115,7 @@
</td>
<td>
- <input class="stdbutton" type="submit" value="analyze">
+ <input class="stdbutton" type="submit" value="analyze">
</td>
</tr>
@@ -148,24 +148,28 @@
}
if (field!=null) {
- HashSet<Tok> matches = null;
+ HashSet<BytesRef> matches = null;
if (qval!="" && highlight) {
Reader reader = new StringReader(qval);
Analyzer analyzer = field.getType().getQueryAnalyzer();
TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader);
+ TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
tstream.reset();
- List<AttributeSource> tokens = getTokens(tstream);
- matches = new HashSet<Tok>();
- for (AttributeSource t : tokens) { matches.add( new Tok(t,0)); }
+ matches = new HashSet<BytesRef>();
+ while (tstream.incrementToken()) {
+ final BytesRef bytes = new BytesRef();
+ bytesAtt.toBytesRef(bytes);
+ matches.add(bytes);
+ }
}
if (val!="") {
out.println("<h3>Index Analyzer</h3>");
- doAnalyzer(out, field, val, false, verbose,matches);
+ doAnalyzer(out, field, val, false, verbose, matches);
}
if (qval!="") {
out.println("<h3>Query Analyzer</h3>");
- doAnalyzer(out, field, qval, true, qverbose,null);
+ doAnalyzer(out, field, qval, true, qverbose, null);
}
}
@@ -177,7 +181,7 @@
<%!
- private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<Tok> match) throws Exception {
+ private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<BytesRef> match) throws Exception {
FieldType ft = field.getType();
Analyzer analyzer = queryAnalyser ?
@@ -240,7 +244,7 @@
tstream.reset();
List<AttributeSource> tokens = getTokens(tstream);
if (verbose) {
- writeHeader(out, analyzer.getClass(), new HashMap<String,String>());
+ writeHeader(out, analyzer.getClass(), Collections.EMPTY_MAP);
}
writeTokens(out, tokens, ft, verbose, match);
}
@@ -249,52 +253,59 @@
static List<AttributeSource> getTokens(TokenStream tstream) throws IOException {
List<AttributeSource> tokens = new ArrayList<AttributeSource>();
-
- while (true) {
- if (!tstream.incrementToken())
- break;
- else {
- tokens.add(tstream.cloneAttributes());
- }
+ tstream.reset();
+ while (tstream.incrementToken()) {
+ tokens.add(tstream.cloneAttributes());
}
return tokens;
}
-
- private static class Tok {
- AttributeSource token;
- int pos;
- Tok(AttributeSource token, int pos) {
- this.token=token;
- this.pos=pos;
- }
-
- public boolean equals(Object o) {
- return ((Tok)o).token.toString().equals(token.toString());
- }
- public int hashCode() {
- return token.toString().hashCode();
- }
- public String toString() {
- return token.toString();
+ private static class ReflectItem {
+ final Class<? extends Attribute> attClass;
+ final String key;
+ final Object value;
+
+ ReflectItem(Class<? extends Attribute> attClass, String key, Object value) {
+ this.attClass = attClass;
+ this.key = key;
+ this.value = value;
}
- public String toPrintableString() {
- TermToBytesRefAttribute att = token.addAttribute(TermToBytesRefAttribute.class);
- if (att instanceof CharTermAttribute)
- return att.toString();
- else {
- BytesRef bytes = new BytesRef();
- att.toBytesRef(bytes);
- return bytes.toString();
- }
+ }
+
+ private static class Tok {
+ final BytesRef bytes = new BytesRef();
+ final String rawText, text;
+ final int pos;
+ final List<ReflectItem> reflected = new ArrayList<ReflectItem>();
+
+ Tok(AttributeSource token, int pos, FieldType ft) {
+ this.pos = pos;
+ token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(bytes);
+ rawText = (token.hasAttribute(CharTermAttribute.class)) ?
+ token.getAttribute(CharTermAttribute.class).toString() : null;
+ final CharArr textBuf = new CharArr(bytes.length);
+ ft.indexedToReadable(bytes, textBuf);
+ text = textBuf.toString();
+ token.reflectWith(new AttributeReflector() {
+ public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
+ // leave out position and raw term
+ if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
+ return;
+ if (CharTermAttribute.class.isAssignableFrom(attClass))
+ return;
+ if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
+ return;
+ reflected.add(new ReflectItem(attClass, key, value));
+ }
+ });
}
}
- private static interface ToStr {
- public String toStr(Object o);
+ private static interface TokToStr {
+ public String toStr(Tok o);
}
- private static void printRow(JspWriter out, String header, List[] arrLst, ToStr converter, boolean multival, boolean verbose, Set<Tok> match) throws IOException {
+ private static void printRow(JspWriter out, String header, String headerTitle, List<Tok>[] arrLst, TokToStr converter, boolean multival, boolean verbose, Set<BytesRef> match) throws IOException {
// find the maximum number of terms for any position
int maxSz=1;
if (multival) {
@@ -308,7 +319,13 @@
out.println("<tr>");
if (idx==0 && verbose) {
if (header != null) {
- out.print("<th NOWRAP rowspan=\""+maxSz+"\">");
+ out.print("<th NOWRAP rowspan=\""+maxSz+"\"");
+ if (headerTitle != null) {
+ out.print(" title=\"");
+ XML.escapeCharData(headerTitle,out);
+ out.print("\"");
+ }
+ out.print(">");
XML.escapeCharData(header,out);
out.println("</th>");
}
@@ -317,7 +334,7 @@
for (int posIndex=0; posIndex<arrLst.length; posIndex++) {
List<Tok> lst = arrLst[posIndex];
if (lst.size() <= idx) continue;
- if (match!=null && match.contains(lst.get(idx))) {
+ if (match!=null && match.contains(lst.get(idx).bytes)) {
out.print("<td class=\"highlight\"");
} else {
out.print("<td class=\"debugdata\"");
@@ -340,15 +357,6 @@
}
- static String isPayloadString( Payload p ) {
- String sp = new String( p.getData() );
- for( int i=0; i < sp.length(); i++ ) {
- if( !Character.isDefined( sp.charAt(i) ) || Character.isISOControl( sp.charAt(i) ) )
- return "";
- }
- return "(" + sp + ")";
- }
-
static void writeHeader(JspWriter out, Class clazz, Map<String,String> args) throws IOException {
out.print("<h4>");
out.print(clazz.getName());
@@ -359,137 +367,93 @@
// readable, raw, pos, type, start/end
- static void writeTokens(JspWriter out, List<AttributeSource> tokens, final FieldType ft, boolean verbose, Set<Tok> match) throws IOException {
+ static void writeTokens(JspWriter out, List<AttributeSource> tokens, final FieldType ft, boolean verbose, Set<BytesRef> match) throws IOException {
// Use a map to tell what tokens are in what positions
// because some tokenizers/filters may do funky stuff with
// very large increments, or negative increments.
HashMap<Integer,List<Tok>> map = new HashMap<Integer,List<Tok>>();
boolean needRaw=false;
- int pos=0;
+ int pos=0, reflectionCount = -1;
for (AttributeSource t : tokens) {
- if (!t.toString().equals(ft.indexedToReadable(t.toString()))) {
- needRaw=true;
- }
-
pos += t.addAttribute(PositionIncrementAttribute.class).getPositionIncrement();
List lst = map.get(pos);
if (lst==null) {
lst = new ArrayList(1);
map.put(pos,lst);
}
- Tok tok = new Tok(t,pos);
+ Tok tok = new Tok(t,pos,ft);
+ // sanity check
+ if (reflectionCount < 0) {
+ reflectionCount = tok.reflected.size();
+ } else {
+ if (reflectionCount != tok.reflected.size())
+ throw new RuntimeException("Should not happen: Number of reflected entries differs for position=" + pos);
+ }
+ if (tok.rawText != null && !tok.text.equals(tok.rawText)) {
+ needRaw=true;
+ }
lst.add(tok);
}
List<Tok>[] arr = (List<Tok>[])map.values().toArray(new ArrayList[map.size()]);
- /* Jetty 6.1.3 miscompiles this generics version...
- Arrays.sort(arr, new Comparator<List<Tok>>() {
- public int compare(List<Tok> toks, List<Tok> toks1) {
- return toks.get(0).pos - toks1.get(0).pos;
- }
- }
- */
-
+ // Jetty 6.1.3 miscompiles a generics-enabled version..., without generics:
Arrays.sort(arr, new Comparator() {
public int compare(Object toks, Object toks1) {
return ((List<Tok>)toks).get(0).pos - ((List<Tok>)toks1).get(0).pos;
}
- }
-
-
- );
+ });
out.println("<table width=\"auto\" class=\"analysis\" border=\"1\">");
if (verbose) {
- printRow(out,"term position", arr, new ToStr() {
- public String toStr(Object o) {
- return Integer.toString(((Tok)o).pos);
+ printRow(out, "position", "calculated from " + PositionIncrementAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ return Integer.toString(t.pos);
}
- }
- ,false
- ,verbose
- ,null);
- }
-
-
- printRow(out,"term text", arr, new ToStr() {
- public String toStr(Object o) {
- return ft.indexedToReadable( ((Tok)o).toPrintableString() );
- }
+ },false,verbose,null);
}
- ,true
- ,verbose
- ,match
- );
- if (needRaw) {
- printRow(out,"raw text", arr, new ToStr() {
- public String toStr(Object o) {
- // page is UTF-8, so anything goes.
- return ((Tok)o).toPrintableString();
- }
+ printRow(out, "term text", "indexedToReadable applied to " + TermToBytesRefAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ return t.text;
}
- ,true
- ,verbose
- ,match
- );
- }
+ },true,verbose,match);
if (verbose) {
- printRow(out,"term type", arr, new ToStr() {
- public String toStr(Object o) {
- String tt = ((Tok)o).token.addAttribute(TypeAttribute.class).type();
- if (tt == null) {
- return "null";
- } else {
- return tt;
+ if (needRaw) {
+ printRow(out, "raw text", CharTermAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ // page is UTF-8, so anything goes.
+ return (t.rawText == null) ? "" : t.rawText;
}
- }
+ },true,verbose,match);
}
- ,true
- ,verbose,
- null
- );
- }
-
- if (verbose) {
- printRow(out,"source start,end", arr, new ToStr() {
- public String toStr(Object o) {
- AttributeSource t = ((Tok)o).token;
- return Integer.toString(t.addAttribute(OffsetAttribute.class).startOffset()) + ',' + t.addAttribute(OffsetAttribute.class).endOffset() ;
+
+ printRow(out, "raw bytes", TermToBytesRefAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ return t.bytes.toString();
}
- }
- ,true
- ,verbose
- ,null
- );
- }
+ },true,verbose,match);
- if (verbose) {
- printRow(out,"payload", arr, new ToStr() {
- public String toStr(Object o) {
- AttributeSource t = ((Tok)o).token;
- Payload p = t.addAttribute(PayloadAttribute.class).getPayload();
- if( null != p ) {
- BigInteger bi = new BigInteger( p.getData() );
- String ret = bi.toString( 16 );
- if (ret.length() % 2 != 0) {
- // Pad with 0
- ret = "0"+ret;
+ for (int att=0; att < reflectionCount; att++) {
+ final ReflectItem item0 = arr[0].get(0).reflected.get(att);
+ final int i = att;
+ printRow(out, item0.key, item0.attClass.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ final ReflectItem item = t.reflected.get(i);
+ if (item0.attClass != item.attClass || !item0.key.equals(item.key))
+ throw new RuntimeException("Should not happen: attribute types suddenly change at position=" + t.pos);
+ if (item.value instanceof Payload) {
+ final Payload p = (Payload) item.value;
+ return new BytesRef(p.getData()).toString();
+ } else {
+ return (item.value != null) ? item.value.toString() : "";
}
- ret += isPayloadString( p );
- return ret;
}
- return "";
- }
+ },true,verbose, null);
}
- ,true
- ,verbose
- ,null
- );
}
out.println("</table>");
Modified: lucene/dev/branches/bulkpostings/solr/src/webapp/web/admin/replication/header.jsp
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/src/webapp/web/admin/replication/header.jsp?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/src/webapp/web/admin/replication/header.jsp (original)
+++ lucene/dev/branches/bulkpostings/solr/src/webapp/web/admin/replication/header.jsp Thu Jan 20 19:52:03 2011
@@ -19,7 +19,7 @@
<%@ page import="org.apache.solr.common.util.NamedList,
org.apache.solr.common.util.SimpleOrderedMap,
org.apache.solr.request.LocalSolrQueryRequest,
- org.apache.solr.request.SolrQueryResponse,
+ org.apache.solr.response.SolrQueryResponse,
org.apache.solr.request.SolrRequestHandler,
java.util.Map"%>
<%@ page import="org.apache.solr.handler.ReplicationHandler" %>