You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/07/19 17:59:32 UTC
svn commit: r1363400 [17/31] - in /lucene/dev/branches/pforcodec_3892: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/
dev-tools/idea/.idea/copyright/ dev-tools/idea/.idea/libraries/
dev-tools/idea/lucene/ dev-tools/maven/ dev-tools/maven/lucene...
Modified: lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCharBlockArray.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCharBlockArray.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCharBlockArray.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCharBlockArray.java Thu Jul 19 15:58:54 2012
@@ -5,9 +5,13 @@ import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
import org.junit.Test;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.facet.taxonomy.writercache.cl2o.CharBlockArray;
@@ -41,8 +45,12 @@ public class TestCharBlockArray extends
for (int i = 0; i < n; i++) {
random().nextBytes(buffer);
int size = 1 + random().nextInt(50);
-
- String s = new String(buffer, 0, size);
+ // This test is turning random bytes into a string,
+ // this is asking for trouble.
+ CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder()
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .onMalformedInput(CodingErrorAction.REPLACE);
+ String s = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString();
array.append(s);
builder.append(s);
}
@@ -50,8 +58,12 @@ public class TestCharBlockArray extends
for (int i = 0; i < n; i++) {
random().nextBytes(buffer);
int size = 1 + random().nextInt(50);
-
- String s = new String(buffer, 0, size);
+ // This test is turning random bytes into a string,
+ // this is asking for trouble.
+ CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder()
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .onMalformedInput(CodingErrorAction.REPLACE);
+ String s = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString();
array.append((CharSequence)s);
builder.append(s);
}
@@ -59,8 +71,12 @@ public class TestCharBlockArray extends
for (int i = 0; i < n; i++) {
random().nextBytes(buffer);
int size = 1 + random().nextInt(50);
-
- String s = new String(buffer, 0, size);
+ // This test is turning random bytes into a string,
+ // this is asking for trouble.
+ CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder()
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .onMalformedInput(CodingErrorAction.REPLACE);
+ String s = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString();
for (int j = 0; j < s.length(); j++) {
array.append(s.charAt(j));
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java Thu Jul 19 15:58:54 2012
@@ -1,11 +1,15 @@
package org.apache.lucene.facet.taxonomy.writercache.cl2o;
import java.io.File;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
import java.util.HashMap;
import java.util.Map;
import org.junit.Test;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.writercache.cl2o.CompactLabelToOrdinal;
@@ -46,7 +50,12 @@ public class TestCompactLabelToOrdinal e
random().nextBytes(buffer);
int size = 1 + random().nextInt(50);
- uniqueValues[i] = new String(buffer, 0, size);
+ // This test is turning random bytes into a string,
+ // this is asking for trouble.
+ CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder()
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .onMalformedInput(CodingErrorAction.REPLACE);
+ uniqueValues[i] = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString();
if (uniqueValues[i].indexOf(CompactLabelToOrdinal.TerminatorChar) == -1) {
i++;
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java Thu Jul 19 15:58:54 2012
@@ -5,6 +5,7 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.Arrays;
+import java.util.Locale;
import org.apache.lucene.util.encoding.DGapIntEncoder;
import org.apache.lucene.util.encoding.EightFlagsIntEncoder;
@@ -67,11 +68,11 @@ public class EncodingSpeed {
+ ") " + loopFactor + " times.");
System.out.println();
- String header = String.format(headerFormat, "Encoder", "Bits/Int",
+ String header = String.format(Locale.ROOT, headerFormat, "Encoder", "Bits/Int",
"Encode Time", "Encode Time", "Decode Time", "Decode Time");
System.out.println(header);
- String header2 = String.format(headerFormat, "", "", "[milliseconds]",
+ String header2 = String.format(Locale.ROOT, headerFormat, "", "", "[milliseconds]",
"[microsecond / int]", "[milliseconds]", "[microsecond / int]");
System.out.println(header2);
@@ -148,7 +149,7 @@ public class EncodingSpeed {
endTime = System.currentTimeMillis();
long decodeTime = endTime - startTime;
- System.out.println(String.format(resultsFormat, encoder, nf.format(baos
+ System.out.println(String.format(Locale.ROOT, resultsFormat, encoder, nf.format(baos
.size()
* 8.0 / data.length), encodeTime, nf.format(encodeTime
* 1000000.0 / (loopFactor * data.length)), decodeTime, nf
@@ -156,7 +157,7 @@ public class EncodingSpeed {
}
static {
- nf = NumberFormat.getInstance();
+ nf = NumberFormat.getInstance(Locale.ROOT);
nf.setMaximumFractionDigits(4);
nf.setMinimumFractionDigits(4);
Modified: lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java Thu Jul 19 15:58:54 2012
@@ -9,16 +9,6 @@ import java.util.TreeSet;
import org.junit.Test;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.encoding.DGapIntEncoder;
-import org.apache.lucene.util.encoding.EightFlagsIntEncoder;
-import org.apache.lucene.util.encoding.FourFlagsIntEncoder;
-import org.apache.lucene.util.encoding.IntDecoder;
-import org.apache.lucene.util.encoding.IntEncoder;
-import org.apache.lucene.util.encoding.NOnesIntEncoder;
-import org.apache.lucene.util.encoding.SimpleIntEncoder;
-import org.apache.lucene.util.encoding.SortingIntEncoder;
-import org.apache.lucene.util.encoding.UniqueValuesIntEncoder;
-import org.apache.lucene.util.encoding.VInt8IntEncoder;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -142,7 +132,7 @@ public class EncodingTest extends Lucene
}
private static void decoding(ByteArrayOutputStream baos, IntDecoder decoder)
- throws IOException, InstantiationException, IllegalAccessException {
+ throws IOException {
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
decoder.reInit(bais);
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/build.xml?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/build.xml (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/build.xml Thu Jul 19 15:58:54 2012
@@ -35,8 +35,8 @@
<path refid="base.classpath"/>
</path>
- <target name="compile" depends="jar-queries,common.compile-core" description="Compiles grouping classes" />
- <target name="jar-core" depends="common.jar-core" />
+ <target name="init" depends="module-build.init,jar-queries"/>
+
<target name="javadocs" depends="javadocs-queries,compile-core">
<invoke-module-javadoc>
<links>
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java Thu Jul 19 15:58:54 2012
@@ -97,6 +97,11 @@ public class BlockGroupingCollector exte
public float score() {
return score;
}
+
+ @Override
+ public float freq() {
+ throw new UnsupportedOperationException(); // TODO: wtf does this class do?
+ }
@Override
public int docID() {
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java Thu Jul 19 15:58:54 2012
@@ -30,6 +30,7 @@ import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
+import java.util.Locale;
import java.util.Map;
/**
@@ -65,10 +66,9 @@ public abstract class DVAllGroupHeadsCol
* @param type The {@link Type} which is used to select a concrete implementation.
* @param diskResident Whether the values to group by should be disk resident
* @return an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments
- * @throws IOException If I/O related errors occur
*/
@SuppressWarnings("unchecked")
- public static <T extends AbstractAllGroupHeadsCollector.GroupHead<?>> DVAllGroupHeadsCollector<T> create(String groupField, Sort sortWithinGroup, DocValues.Type type, boolean diskResident) throws IOException {
+ public static <T extends AbstractAllGroupHeadsCollector.GroupHead<?>> DVAllGroupHeadsCollector<T> create(String groupField, Sort sortWithinGroup, DocValues.Type type, boolean diskResident) {
switch (type) {
case VAR_INTS:
case FIXED_INTS_8:
@@ -89,7 +89,7 @@ public abstract class DVAllGroupHeadsCol
case BYTES_FIXED_SORTED:
return (DVAllGroupHeadsCollector) new GeneralAllGroupHeadsCollector.SortedBR(groupField, type, sortWithinGroup, diskResident);
default:
- throw new IllegalArgumentException(String.format("ValueType %s not supported", type));
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "ValueType %s not supported", type));
}
}
@@ -162,7 +162,7 @@ public abstract class DVAllGroupHeadsCol
private final Sort sortWithinGroup;
private final Map<Comparable<?>, GroupHead> groups;
- GeneralAllGroupHeadsCollector(String groupField, DocValues.Type valueType, Sort sortWithinGroup, boolean diskResident) throws IOException {
+ GeneralAllGroupHeadsCollector(String groupField, DocValues.Type valueType, Sort sortWithinGroup, boolean diskResident) {
super(groupField, valueType, sortWithinGroup.getSort().length, diskResident);
this.sortWithinGroup = sortWithinGroup;
groups = new HashMap<Comparable<?>, GroupHead>();
@@ -218,7 +218,7 @@ public abstract class DVAllGroupHeadsCol
private DocValues.SortedSource source;
- SortedBR(String groupField, DocValues.Type valueType, Sort sortWithinGroup, boolean diskResident) throws IOException {
+ SortedBR(String groupField, DocValues.Type valueType, Sort sortWithinGroup, boolean diskResident) {
super(groupField, valueType, sortWithinGroup, diskResident);
}
@@ -244,7 +244,7 @@ public abstract class DVAllGroupHeadsCol
private DocValues.Source source;
- BR(String groupField, DocValues.Type valueType, Sort sortWithinGroup, boolean diskResident) throws IOException {
+ BR(String groupField, DocValues.Type valueType, Sort sortWithinGroup, boolean diskResident) {
super(groupField, valueType, sortWithinGroup, diskResident);
}
@@ -266,7 +266,7 @@ public abstract class DVAllGroupHeadsCol
private DocValues.Source source;
- Lng(String groupField, DocValues.Type valueType, Sort sortWithinGroup, boolean diskResident) throws IOException {
+ Lng(String groupField, DocValues.Type valueType, Sort sortWithinGroup, boolean diskResident) {
super(groupField, valueType, sortWithinGroup, diskResident);
}
@@ -287,7 +287,7 @@ public abstract class DVAllGroupHeadsCol
private DocValues.Source source;
- Dbl(String groupField, DocValues.Type valueType, Sort sortWithinGroup, boolean diskResident) throws IOException {
+ Dbl(String groupField, DocValues.Type valueType, Sort sortWithinGroup, boolean diskResident) {
super(groupField, valueType, sortWithinGroup, diskResident);
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java Thu Jul 19 15:58:54 2012
@@ -77,7 +77,7 @@ public abstract class DVAllGroupsCollect
// Type erasure b/c otherwise we have inconvertible types...
return (DVAllGroupsCollector) new SortedBR(groupField, type, diskResident, initialSize);
default:
- throw new IllegalArgumentException(String.format("ValueType %s not supported", type));
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "ValueType %s not supported", type));
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java Thu Jul 19 15:58:54 2012
@@ -83,7 +83,7 @@ public abstract class DVDistinctValuesCo
// Type erasure b/c otherwise we have inconvertible types...
return (DVDistinctValuesCollector) new Sorted.BR(groupField, countField, (Collection) groups, diskResident, type);
default:
- throw new IllegalArgumentException(String.format("ValueType %s not supported", type));
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "ValueType %s not supported", type));
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVFirstPassGroupingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVFirstPassGroupingCollector.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVFirstPassGroupingCollector.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVFirstPassGroupingCollector.java Thu Jul 19 15:58:54 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.search.grouping
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
+import java.util.Locale;
/**
* IDV based Implementations of {@link AbstractFirstPassGroupingCollector}.
@@ -74,7 +75,7 @@ public abstract class DVFirstPassGroupin
// Type erasure b/c otherwise we have inconvertible types...
return (DVFirstPassGroupingCollector) new SortedBR(groupSort, topNGroups, groupField, diskResident, type);
default:
- throw new IllegalArgumentException(String.format("ValueType %s not supported", type));
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "ValueType %s not supported", type));
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java Thu Jul 19 15:58:54 2012
@@ -29,6 +29,7 @@ import org.apache.lucene.util.UnicodeUti
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
/**
* An implementation of {@link AbstractGroupFacetCollector} that computes grouped facets based on docvalues.
@@ -84,12 +85,12 @@ public abstract class DVGroupFacetCollec
case BYTES_FIXED_DEREF:
case BYTES_VAR_STRAIGHT:
case BYTES_VAR_DEREF:
- throw new IllegalArgumentException(String.format("Group valueType %s not supported", groupDvType));
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "Group valueType %s not supported", groupDvType));
case BYTES_VAR_SORTED:
case BYTES_FIXED_SORTED:
return GroupSortedBR.createGroupSortedFacetCollector(groupField, groupDvType, groupDiskResident, facetField, facetDvType, facetDiskResident, facetPrefix, initialSize);
default:
- throw new IllegalArgumentException(String.format("Group valueType %s not supported", groupDvType));
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "Group valueType %s not supported", groupDvType));
}
}
@@ -133,12 +134,12 @@ public abstract class DVGroupFacetCollec
case BYTES_FIXED_DEREF:
case BYTES_VAR_STRAIGHT:
case BYTES_VAR_DEREF:
- throw new IllegalArgumentException(String.format("Facet valueType %s not supported", facetDvType));
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "Facet valueType %s not supported", facetDvType));
case BYTES_VAR_SORTED:
case BYTES_FIXED_SORTED:
return new FacetSortedBR(groupField, groupDvType, groupDiskResident, facetField, facetDvType, facetDiskResident, facetPrefix, initialSize);
default:
- throw new IllegalArgumentException(String.format("Facet valueType %s not supported", facetDvType));
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "Facet valueType %s not supported", facetDvType));
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java Thu Jul 19 15:58:54 2012
@@ -28,6 +28,7 @@ import org.apache.lucene.util.SentinelIn
import java.io.IOException;
import java.util.Collection;
+import java.util.Locale;
/**
* IDV based implementation of {@link AbstractSecondPassGroupingCollector}.
@@ -87,7 +88,7 @@ public abstract class DVSecondPassGroupi
// Type erasure b/c otherwise we have inconvertible types...
return (DVSecondPassGroupingCollector) new SortedBR(groupField, type, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
default:
- throw new IllegalArgumentException(String.format("ValueType %s not supported", type));
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "ValueType %s not supported", type));
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java Thu Jul 19 15:58:54 2012
@@ -57,9 +57,8 @@ public abstract class TermAllGroupHeadsC
* @param groupField The field to group by
* @param sortWithinGroup The sort within each group
* @return an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments
- * @throws IOException If I/O related errors occur
*/
- public static AbstractAllGroupHeadsCollector<?> create(String groupField, Sort sortWithinGroup) throws IOException {
+ public static AbstractAllGroupHeadsCollector<?> create(String groupField, Sort sortWithinGroup) {
return create(groupField, sortWithinGroup, DEFAULT_INITIAL_SIZE);
}
@@ -73,9 +72,8 @@ public abstract class TermAllGroupHeadsC
* the total number of expected unique groups. Be aware that the heap usage is
* 4 bytes * initialSize.
* @return an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments
- * @throws IOException If I/O related errors occur
*/
- public static AbstractAllGroupHeadsCollector<?> create(String groupField, Sort sortWithinGroup, int initialSize) throws IOException {
+ public static AbstractAllGroupHeadsCollector<?> create(String groupField, Sort sortWithinGroup, int initialSize) {
boolean sortAllScore = true;
boolean sortAllFieldValue = true;
@@ -113,7 +111,7 @@ public abstract class TermAllGroupHeadsC
private Scorer scorer;
- GeneralAllGroupHeadsCollector(String groupField, Sort sortWithinGroup) throws IOException {
+ GeneralAllGroupHeadsCollector(String groupField, Sort sortWithinGroup) {
super(groupField, sortWithinGroup.getSort().length);
this.sortWithinGroup = sortWithinGroup;
groups = new HashMap<BytesRef, GroupHead>();
@@ -409,7 +407,7 @@ public abstract class TermAllGroupHeadsC
BytesRef[] sortValues;
int[] sortOrds;
- private GroupHead(int doc, BytesRef groupValue) throws IOException {
+ private GroupHead(int doc, BytesRef groupValue) {
super(groupValue, doc + readerContext.docBase);
sortValues = new BytesRef[sortsIndex.length];
sortOrds = new int[sortsIndex.length];
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java Thu Jul 19 15:58:54 2012
@@ -159,7 +159,7 @@ public class AllGroupHeadsCollectorTest
int numberOfRuns = _TestUtil.nextInt(random(), 3, 6);
for (int iter = 0; iter < numberOfRuns; iter++) {
if (VERBOSE) {
- System.out.println(String.format("TEST: iter=%d total=%d", iter, numberOfRuns));
+ System.out.println(String.format(Locale.ROOT, "TEST: iter=%d total=%d", iter, numberOfRuns));
}
final int numDocs = _TestUtil.nextInt(random(), 100, 1000) * RANDOM_MULTIPLIER;
@@ -257,7 +257,7 @@ public class AllGroupHeadsCollectorTest
groupValue,
groups.get(random().nextInt(groups.size())),
groups.get(random().nextInt(groups.size())),
- new BytesRef(String.format("%05d", i)),
+ new BytesRef(String.format(Locale.ROOT, "%05d", i)),
contentStrings[random().nextInt(contentStrings.length)]
);
@@ -349,7 +349,7 @@ public class AllGroupHeadsCollectorTest
GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
String expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.utf8ToString();
System.out.println(
- String.format(
+ String.format(Locale.ROOT,
"Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d",
expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.utf8ToString(),
expectedGroupDoc.sort2.utf8ToString(), expectedGroupDoc.sort3.utf8ToString(), expectedDocId
@@ -361,7 +361,7 @@ public class AllGroupHeadsCollectorTest
GroupDoc actualGroupDoc = groupDocs[actualDocId];
String actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.utf8ToString();
System.out.println(
- String.format(
+ String.format(Locale.ROOT,
"Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d",
actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.utf8ToString(),
actualGroupDoc.sort2.utf8ToString(), actualGroupDoc.sort3.utf8ToString(), actualDocId
@@ -429,7 +429,7 @@ public class AllGroupHeadsCollectorTest
return true;
}
- private int[] createExpectedGroupHeads(String searchTerm, GroupDoc[] groupDocs, Sort docSort, boolean sortByScoreOnly, int[] fieldIdToDocID) throws IOException {
+ private int[] createExpectedGroupHeads(String searchTerm, GroupDoc[] groupDocs, Sort docSort, boolean sortByScoreOnly, int[] fieldIdToDocID) {
Map<BytesRef, List<GroupDoc>> groupHeads = new HashMap<BytesRef, List<GroupDoc>>();
for (GroupDoc groupDoc : groupDocs) {
if (!groupDoc.content.startsWith(searchTerm)) {
@@ -518,7 +518,7 @@ public class AllGroupHeadsCollectorTest
}
@SuppressWarnings({"unchecked","rawtypes"})
- private AbstractAllGroupHeadsCollector<?> createRandomCollector(String groupField, Sort sortWithinGroup, boolean canUseIDV, Type valueType) throws IOException {
+ private AbstractAllGroupHeadsCollector<?> createRandomCollector(String groupField, Sort sortWithinGroup, boolean canUseIDV, Type valueType) {
AbstractAllGroupHeadsCollector<? extends AbstractAllGroupHeadsCollector.GroupHead> collector;
if (random().nextBoolean()) {
ValueSource vs = new BytesRefFieldSource(groupField);
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java Thu Jul 19 15:58:54 2012
@@ -33,7 +33,6 @@ import org.apache.lucene.store.Directory
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
-import java.io.IOException;
import java.util.HashMap;
public class AllGroupsCollectorTest extends LuceneTestCase {
@@ -127,7 +126,7 @@ public class AllGroupsCollectorTest exte
}
}
- private AbstractAllGroupsCollector<?> createRandomCollector(String groupField, boolean canUseIDV) throws IOException {
+ private AbstractAllGroupsCollector<?> createRandomCollector(String groupField, boolean canUseIDV) {
AbstractAllGroupsCollector<?> selected;
if (random().nextBoolean() && canUseIDV) {
boolean diskResident = random().nextBoolean();
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java Thu Jul 19 15:58:54 2012
@@ -450,7 +450,7 @@ public class DistinctValuesCollectorTest
countsVals.add(countValue);
Document doc = new Document();
- doc.add(new StringField("id", String.format("%09d", i), Field.Store.NO));
+ doc.add(new StringField("id", String.format(Locale.ROOT, "%09d", i), Field.Store.NO));
if (groupValue != null) {
addField(doc, groupField, groupValue, dvType);
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java Thu Jul 19 15:58:54 2012
@@ -284,7 +284,7 @@ public class GroupFacetCollectorTest ext
int counter = 1;
for (TermGroupFacetCollector.FacetEntry expectedFacetEntry : expectedFacetEntries) {
System.out.println(
- String.format(
+ String.format(Locale.ROOT,
"%d. Expected facet value %s with count %d",
counter++, expectedFacetEntry.getValue().utf8ToString(), expectedFacetEntry.getCount()
)
@@ -297,7 +297,7 @@ public class GroupFacetCollectorTest ext
counter = 1;
for (TermGroupFacetCollector.FacetEntry actualFacetEntry : actualFacetEntries) {
System.out.println(
- String.format(
+ String.format(Locale.ROOT,
"%d. Actual facet value %s with count %d",
counter++, actualFacetEntry.getValue().utf8ToString(), actualFacetEntry.getCount()
)
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java Thu Jul 19 15:58:54 2012
@@ -18,19 +18,28 @@ package org.apache.lucene.search.groupin
*/
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.*;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.SortedBytesDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.CachingWrapperFilter;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.QueryWrapperFilter;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.mutable.MutableValueStr;
-import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -195,7 +204,7 @@ public class GroupingSearchTest extends
}
}
- private GroupingSearch createRandomGroupingSearch(String groupField, Sort groupSort, int docsInGroup, boolean canUseIDV) throws IOException {
+ private GroupingSearch createRandomGroupingSearch(String groupField, Sort groupSort, int docsInGroup, boolean canUseIDV) {
GroupingSearch groupingSearch;
if (random().nextBoolean()) {
ValueSource vs = new BytesRefFieldSource(groupField);
Modified: lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java Thu Jul 19 15:58:54 2012
@@ -23,6 +23,7 @@ import org.apache.lucene.index.AtomicRea
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.CompositeReaderContext;
import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.IndexReaderContext;
@@ -44,7 +45,6 @@ import org.apache.lucene.search.grouping
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueStr;
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Thu Jul 19 15:58:54 2012
@@ -37,6 +37,7 @@ import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@@ -128,6 +129,8 @@ public class TokenSources {
if (termsEnum.next() != null) {
DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, false);
if (dpEnum != null) {
+ int doc = dpEnum.nextDoc();
+ assert doc >= 0 && doc != DocIdSetIterator.NO_MORE_DOCS;
int pos = dpEnum.nextPosition();
if (pos >= 0) {
return true;
@@ -191,7 +194,7 @@ public class TokenSources {
}
@Override
- public boolean incrementToken() throws IOException {
+ public boolean incrementToken() {
if (currentToken >= tokens.length) {
return false;
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java Thu Jul 19 15:58:54 2012
@@ -105,7 +105,7 @@ public final class TokenStreamFromTermPo
};
@Override
- public boolean incrementToken() throws IOException {
+ public boolean incrementToken() {
if (this.tokensAtCurrentPosition.hasNext()) {
final Token next = this.tokensAtCurrentPosition.next();
clearAttributes();
@@ -119,7 +119,7 @@ public final class TokenStreamFromTermPo
}
@Override
- public void reset() throws IOException {
+ public void reset() {
this.tokensAtCurrentPosition = this.positionedTokens.iterator();
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Thu Jul 19 15:58:54 2012
@@ -32,6 +32,7 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
@@ -43,7 +44,6 @@ import org.apache.lucene.search.spans.Sp
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.TermContext;
/**
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java Thu Jul 19 15:58:54 2012
@@ -17,10 +17,6 @@ package org.apache.lucene.search.vectorh
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
@@ -29,10 +25,19 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.search.highlight.DefaultEncoder;
import org.apache.lucene.search.highlight.Encoder;
-import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
+import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
protected String[] preTags, postTags;
@@ -48,6 +53,7 @@ public abstract class BaseFragmentsBuild
public static final String[] COLORED_POST_TAGS = { "</b>" };
private char multiValuedSeparator = ' ';
private final BoundaryScanner boundaryScanner;
+ private boolean discreteMultiValueHighlighting = false;
protected BaseFragmentsBuilder(){
this( new String[]{ "<b>" }, new String[]{ "</b>" } );
@@ -76,7 +82,7 @@ public abstract class BaseFragmentsBuild
public abstract List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src );
private static final Encoder NULL_ENCODER = new DefaultEncoder();
-
+
public String createFragment( IndexReader reader, int docId,
String fieldName, FieldFragList fieldFragList ) throws IOException {
return createFragment( reader, docId, fieldName, fieldFragList,
@@ -102,14 +108,23 @@ public abstract class BaseFragmentsBuild
public String[] createFragments( IndexReader reader, int docId,
String fieldName, FieldFragList fieldFragList, int maxNumFragments,
String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
- if( maxNumFragments < 0 )
+
+ if( maxNumFragments < 0 ) {
throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
+ }
- List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.getFragInfos() );
-
+ List<WeightedFragInfo> fragInfos = fieldFragList.getFragInfos();
List<String> fragments = new ArrayList<String>( maxNumFragments );
Field[] values = getFields( reader, docId, fieldName );
- if( values.length == 0 ) return null;
+ if( values.length == 0 ) {
+ return null;
+ }
+
+ if (discreteMultiValueHighlighting && values.length > 1) {
+ fragInfos = discreteMultiValueHighlighting(fragInfos, values);
+ }
+
+ fragInfos = getWeightedFragInfoList(fragInfos);
StringBuilder buffer = new StringBuilder();
int[] nextValueIndex = { 0 };
for( int n = 0; n < maxNumFragments && n < fragInfos.size(); n++ ){
@@ -125,14 +140,14 @@ public abstract class BaseFragmentsBuild
reader.document(docId, new StoredFieldVisitor() {
@Override
- public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+ public void stringField(FieldInfo fieldInfo, String value) {
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
fields.add(new Field(fieldInfo.name, value, ft));
}
@Override
- public Status needsField(FieldInfo fieldInfo) throws IOException {
+ public Status needsField(FieldInfo fieldInfo) {
return fieldInfo.name.equals(fieldName) ? Status.YES : Status.NO;
}
});
@@ -186,7 +201,92 @@ public abstract class BaseFragmentsBuild
int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
return buffer.substring( startOffset, eo );
}
-
+
+ protected List<WeightedFragInfo> discreteMultiValueHighlighting(List<WeightedFragInfo> fragInfos, Field[] fields) {
+ Map<String, List<WeightedFragInfo>> fieldNameToFragInfos = new HashMap<String, List<WeightedFragInfo>>();
+ for (Field field : fields) {
+ fieldNameToFragInfos.put(field.name(), new ArrayList<WeightedFragInfo>());
+ }
+
+ fragInfos: for (WeightedFragInfo fragInfo : fragInfos) {
+ int fieldStart;
+ int fieldEnd = 0;
+ for (Field field : fields) {
+ if (field.stringValue().isEmpty()) {
+ fieldEnd++;
+ continue;
+ }
+ fieldStart = fieldEnd;
+ fieldEnd += field.stringValue().length() + 1; // + 1 for going to next field with same name.
+
+ if (fragInfo.getStartOffset() >= fieldStart && fragInfo.getEndOffset() >= fieldStart &&
+ fragInfo.getStartOffset() <= fieldEnd && fragInfo.getEndOffset() <= fieldEnd) {
+ fieldNameToFragInfos.get(field.name()).add(fragInfo);
+ continue fragInfos;
+ }
+
+ if (fragInfo.getSubInfos().isEmpty()) {
+ continue fragInfos;
+ }
+
+ Toffs firstToffs = fragInfo.getSubInfos().get(0).getTermsOffsets().get(0);
+ if (fragInfo.getStartOffset() >= fieldEnd || firstToffs.getStartOffset() >= fieldEnd) {
+ continue;
+ }
+
+ int fragStart = fieldStart;
+ if (fragInfo.getStartOffset() > fieldStart && fragInfo.getStartOffset() < fieldEnd) {
+ fragStart = fragInfo.getStartOffset();
+ }
+
+ int fragEnd = fieldEnd;
+ if (fragInfo.getEndOffset() > fieldStart && fragInfo.getEndOffset() < fieldEnd) {
+ fragEnd = fragInfo.getEndOffset();
+ }
+
+
+ List<SubInfo> subInfos = new ArrayList<SubInfo>();
+ WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, subInfos, fragInfo.getTotalBoost());
+
+ Iterator<SubInfo> subInfoIterator = fragInfo.getSubInfos().iterator();
+ while (subInfoIterator.hasNext()) {
+ SubInfo subInfo = subInfoIterator.next();
+ List<Toffs> toffsList = new ArrayList<Toffs>();
+ Iterator<Toffs> toffsIterator = subInfo.getTermsOffsets().iterator();
+ while (toffsIterator.hasNext()) {
+ Toffs toffs = toffsIterator.next();
+ if (toffs.getStartOffset() >= fieldStart && toffs.getEndOffset() <= fieldEnd) {
+ toffsList.add(toffs);
+ toffsIterator.remove();
+ }
+ }
+ if (!toffsList.isEmpty()) {
+ subInfos.add(new SubInfo(subInfo.getText(), toffsList, subInfo.getSeqnum()));
+ }
+
+ if (subInfo.getTermsOffsets().isEmpty()) {
+ subInfoIterator.remove();
+ }
+ }
+ fieldNameToFragInfos.get(field.name()).add(weightedFragInfo);
+ }
+ }
+
+ List<WeightedFragInfo> result = new ArrayList<WeightedFragInfo>();
+ for (List<WeightedFragInfo> weightedFragInfos : fieldNameToFragInfos.values()) {
+ result.addAll(weightedFragInfos);
+ }
+ Collections.sort(result, new Comparator<WeightedFragInfo>() {
+
+ public int compare(FieldFragList.WeightedFragInfo info1, FieldFragList.WeightedFragInfo info2) {
+ return info1.getStartOffset() - info2.getStartOffset();
+ }
+
+ });
+
+ return result;
+ }
+
public void setMultiValuedSeparator( char separator ){
multiValuedSeparator = separator;
}
@@ -195,6 +295,14 @@ public abstract class BaseFragmentsBuild
return multiValuedSeparator;
}
+ public boolean isDiscreteMultiValueHighlighting() {
+ return discreteMultiValueHighlighting;
+ }
+
+ public void setDiscreteMultiValueHighlighting(boolean discreteMultiValueHighlighting) {
+ this.discreteMultiValueHighlighting = discreteMultiValueHighlighting;
+ }
+
protected String getPreTag( int num ){
return getPreTag( preTags, num );
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java Thu Jul 19 15:58:54 2012
@@ -17,12 +17,12 @@ package org.apache.lucene.search.vectorh
* limitations under the License.
*/
-import java.util.ArrayList;
-import java.util.List;
-
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* FieldFragList has a list of "frag info" that is used by FragmentsBuilder class
* to create fragments (snippets).
@@ -116,7 +116,11 @@ public abstract class FieldFragList {
public int getSeqnum(){
return seqnum;
}
-
+
+ public String getText(){
+ return text;
+ }
+
@Override
public String toString(){
StringBuilder sb = new StringBuilder();
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java Thu Jul 19 15:58:54 2012
@@ -348,7 +348,7 @@ public class FieldQuery {
return map;
}
- void add( Query query, IndexReader reader ) throws IOException {
+ void add( Query query, IndexReader reader ) {
if( query instanceof TermQuery ){
addTerm( ((TermQuery)query).getTerm(), query.getBoost() );
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java Thu Jul 19 15:58:54 2012
@@ -31,7 +31,6 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@@ -46,14 +45,12 @@ import org.apache.lucene.search.spans.Sp
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.FixedBitSet;
public class HighlighterPhraseTest extends LuceneTestCase {
private static final String FIELD = "text";
- public void testConcurrentPhrase() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
+ public void testConcurrentPhrase() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox jumped";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
@@ -94,8 +91,7 @@ public class HighlighterPhraseTest exten
}
}
- public void testConcurrentSpan() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
+ public void testConcurrentSpan() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox jumped";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
@@ -129,19 +125,17 @@ public class HighlighterPhraseTest exten
}
@Override
- public void collect(int i) throws IOException {
+ public void collect(int i) {
bitset.set(this.baseDoc + i);
}
@Override
- public void setNextReader(AtomicReaderContext context)
- throws IOException {
+ public void setNextReader(AtomicReaderContext context) {
this.baseDoc = context.docBase;
}
@Override
- public void setScorer(org.apache.lucene.search.Scorer scorer)
- throws IOException {
+ public void setScorer(org.apache.lucene.search.Scorer scorer) {
// Do Nothing
}
});
@@ -165,8 +159,7 @@ public class HighlighterPhraseTest exten
}
}
- public void testSparsePhrase() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
+ public void testSparsePhrase() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
@@ -208,8 +201,7 @@ public class HighlighterPhraseTest exten
}
}
- public void testSparsePhraseWithNoPositions() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
+ public void testSparsePhraseWithNoPositions() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
@@ -248,8 +240,7 @@ public class HighlighterPhraseTest exten
}
}
- public void testSparseSpan() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
+ public void testSparseSpan() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
@@ -304,7 +295,7 @@ public class HighlighterPhraseTest exten
}
@Override
- public boolean incrementToken() throws IOException {
+ public boolean incrementToken() {
this.i++;
if (this.i >= this.tokens.length) {
return false;
@@ -344,7 +335,7 @@ public class HighlighterPhraseTest exten
}
@Override
- public boolean incrementToken() throws IOException {
+ public boolean incrementToken() {
this.i++;
if (this.i >= this.tokens.length) {
return false;
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Thu Jul 19 15:58:54 2012
@@ -1371,7 +1371,7 @@ public class HighlighterTest extends Bas
// now an ugly built of XML parsing to test the snippet is encoded OK
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
- org.w3c.dom.Document doc = db.parse(new ByteArrayInputStream(xhtml.getBytes()));
+ org.w3c.dom.Document doc = db.parse(new ByteArrayInputStream(xhtml.getBytes("UTF-8")));
Element root = doc.getDocumentElement();
NodeList nodes = root.getElementsByTagName("body");
Element body = (Element) nodes.item(0);
@@ -1458,7 +1458,7 @@ public class HighlighterTest extends Bas
}
@Override
- public boolean incrementToken() throws IOException {
+ public boolean incrementToken() {
if(iter.hasNext()) {
Token token = iter.next();
clearAttributes();
@@ -1509,7 +1509,7 @@ public class HighlighterTest extends Bas
}
@Override
- public boolean incrementToken() throws IOException {
+ public boolean incrementToken() {
if(iter.hasNext()) {
Token token = iter.next();
clearAttributes();
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java Thu Jul 19 15:58:54 2012
@@ -19,7 +19,8 @@ package org.apache.lucene.search.highlig
import java.io.IOException;
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -27,7 +28,6 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@@ -40,7 +40,6 @@ import org.apache.lucene.search.spans.Sp
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.LuceneTestCase;
// LUCENE-2874
@@ -57,7 +56,7 @@ public class TokenSourcesTest extends Lu
private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
@Override
- public boolean incrementToken() throws IOException {
+ public boolean incrementToken() {
this.i++;
if (this.i >= this.tokens.length) {
return false;
@@ -85,8 +84,7 @@ public class TokenSourcesTest extends Lu
}
}
- public void testOverlapWithOffset() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
+ public void testOverlapWithOffset() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
@@ -129,8 +127,8 @@ public class TokenSourcesTest extends Lu
}
}
- public void testOverlapWithPositionsAndOffset() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
+ public void testOverlapWithPositionsAndOffset()
+ throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
@@ -174,8 +172,8 @@ public class TokenSourcesTest extends Lu
}
}
- public void testOverlapWithOffsetExactPhrase() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
+ public void testOverlapWithOffsetExactPhrase()
+ throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
@@ -219,8 +217,7 @@ public class TokenSourcesTest extends Lu
}
public void testOverlapWithPositionsAndOffsetExactPhrase()
- throws CorruptIndexException, LockObtainFailedException, IOException,
- InvalidTokenOffsetsException {
+ throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Thu Jul 19 15:58:54 2012
@@ -317,13 +317,13 @@ public abstract class AbstractTestCase e
}
@Override
- public void reset( Reader input ) throws IOException {
- super.reset( input );
+ public void setReader( Reader input ) throws IOException {
+ super.setReader( input );
reset();
}
@Override
- public void reset() throws IOException {
+ public void reset() {
startTerm = 0;
nextStartOffset = 0;
snippet = null;
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/BreakIteratorBoundaryScannerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/BreakIteratorBoundaryScannerTest.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/BreakIteratorBoundaryScannerTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/BreakIteratorBoundaryScannerTest.java Thu Jul 19 15:58:54 2012
@@ -30,7 +30,7 @@ public class BreakIteratorBoundaryScanne
public void testOutOfRange() throws Exception {
StringBuilder text = new StringBuilder(TEXT);
- BreakIterator bi = BreakIterator.getWordInstance(Locale.ENGLISH);
+ BreakIterator bi = BreakIterator.getWordInstance(Locale.ROOT);
BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
int start = TEXT.length() + 1;
@@ -44,7 +44,7 @@ public class BreakIteratorBoundaryScanne
public void testWordBoundary() throws Exception {
StringBuilder text = new StringBuilder(TEXT);
- BreakIterator bi = BreakIterator.getWordInstance(Locale.ENGLISH);
+ BreakIterator bi = BreakIterator.getWordInstance(Locale.ROOT);
BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
int start = TEXT.indexOf("formance");
@@ -57,7 +57,8 @@ public class BreakIteratorBoundaryScanne
public void testSentenceBoundary() throws Exception {
StringBuilder text = new StringBuilder(TEXT);
- BreakIterator bi = BreakIterator.getSentenceInstance();
+ // we test this with default locale, its randomized by LuceneTestCase
+ BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
int start = TEXT.indexOf("any application");
@@ -70,7 +71,8 @@ public class BreakIteratorBoundaryScanne
public void testLineBoundary() throws Exception {
StringBuilder text = new StringBuilder(TEXT);
- BreakIterator bi = BreakIterator.getLineInstance();
+ // we test this with default locale, its randomized by LuceneTestCase
+ BreakIterator bi = BreakIterator.getLineInstance(Locale.getDefault());
BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
int start = TEXT.indexOf("any application");
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java Thu Jul 19 15:58:54 2012
@@ -303,7 +303,7 @@ public class IndexTimeSynonymTest extend
int p = 0;
@Override
- public boolean incrementToken() throws IOException {
+ public boolean incrementToken() {
if( p >= tokens.length ) return false;
clearAttributes();
tokens[p++].copyTo(reusableToken);
Modified: lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java Thu Jul 19 15:58:54 2012
@@ -17,20 +17,32 @@ package org.apache.lucene.search.vectorh
* limitations under the License.
*/
+import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util._TestUtil;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
public class SimpleFragmentsBuilderTest extends AbstractTestCase {
@@ -175,4 +187,152 @@ public class SimpleFragmentsBuilderTest
sfb.setMultiValuedSeparator( '/' );
assertEquals( "//a b c//<b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
}
+
+ public void testDiscreteMultiValueHighlighting() throws Exception {
+ makeIndexShortMV();
+
+ FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
+ FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
+ FieldPhraseList fpl = new FieldPhraseList( stack, fq );
+ SimpleFragListBuilder sflb = new SimpleFragListBuilder();
+ FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
+ SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
+ sfb.setDiscreteMultiValueHighlighting(true);
+ assertEquals( "<b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
+
+ make1dmfIndex("some text to highlight", "highlight other text");
+ fq = new FieldQuery( tq( "text" ), true, true );
+ stack = new FieldTermStack( reader, 0, F, fq );
+ fpl = new FieldPhraseList( stack, fq );
+ sflb = new SimpleFragListBuilder();
+ ffl = sflb.createFieldFragList( fpl, 32 );
+ String[] result = sfb.createFragments(reader, 0, F, ffl, 3);
+ assertEquals(2, result.length);
+ assertEquals("some <b>text</b> to highlight", result[0]);
+ assertEquals("other <b>text</b>", result[1]);
+
+ fq = new FieldQuery( tq( "highlight" ), true, true );
+ stack = new FieldTermStack( reader, 0, F, fq );
+ fpl = new FieldPhraseList( stack, fq );
+ sflb = new SimpleFragListBuilder();
+ ffl = sflb.createFieldFragList( fpl, 32 );
+ result = sfb.createFragments(reader, 0, F, ffl, 3);
+ assertEquals(2, result.length);
+ assertEquals("text to <b>highlight</b>", result[0]);
+ assertEquals("<b>highlight</b> other text", result[1]);
+ }
+
+ public void testRandomDiscreteMultiValueHighlighting() throws Exception {
+ String[] randomValues = new String[3 + random().nextInt(10 * RANDOM_MULTIPLIER)];
+ for (int i = 0; i < randomValues.length; i++) {
+ String randomValue;
+ do {
+ randomValue = _TestUtil.randomSimpleString(random());
+ } while ("".equals(randomValue));
+ randomValues[i] = randomValue;
+ }
+
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(
+ random(),
+ dir,
+ newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
+
+ FieldType customType = new FieldType(TextField.TYPE_STORED);
+ customType.setStoreTermVectors(true);
+ customType.setStoreTermVectorOffsets(true);
+ customType.setStoreTermVectorPositions(true);
+
+ int numDocs = randomValues.length * 5;
+ int numFields = 2 + random().nextInt(5);
+ int numTerms = 2 + random().nextInt(3);
+ List<Doc> docs = new ArrayList<Doc>(numDocs);
+ List<Document> documents = new ArrayList<Document>(numDocs);
+ Map<String, Set<Integer>> valueToDocId = new HashMap<String, Set<Integer>>();
+ for (int i = 0; i < numDocs; i++) {
+ Document document = new Document();
+ String[][] fields = new String[numFields][numTerms];
+ for (int j = 0; j < numFields; j++) {
+ String[] fieldValues = new String[numTerms];
+ fieldValues[0] = getRandomValue(randomValues, valueToDocId, i);
+ StringBuilder builder = new StringBuilder(fieldValues[0]);
+ for (int k = 1; k < numTerms; k++) {
+ fieldValues[k] = getRandomValue(randomValues, valueToDocId, i);
+ builder.append(' ').append(fieldValues[k]);
+ }
+ document.add(new Field(F, builder.toString(), customType));
+ fields[j] = fieldValues;
+ }
+ docs.add(new Doc(fields));
+ documents.add(document);
+ }
+ writer.addDocuments(documents);
+ writer.close();
+ IndexReader reader = DirectoryReader.open(dir);
+
+ try {
+ int highlightIters = 1 + random().nextInt(120 * RANDOM_MULTIPLIER);
+ for (int highlightIter = 0; highlightIter < highlightIters; highlightIter++) {
+ String queryTerm = randomValues[random().nextInt(randomValues.length)];
+ int randomHit = valueToDocId.get(queryTerm).iterator().next();
+ List<StringBuilder> builders = new ArrayList<StringBuilder>();
+ for (String[] fieldValues : docs.get(randomHit).fieldValues) {
+ StringBuilder builder = new StringBuilder();
+ boolean hit = false;
+ for (int i = 0; i < fieldValues.length; i++) {
+ if (queryTerm.equals(fieldValues[i])) {
+ builder.append("<b>").append(queryTerm).append("</b>");
+ hit = true;
+ } else {
+ builder.append(fieldValues[i]);
+ }
+ if (i != fieldValues.length - 1) {
+ builder.append(' ');
+ }
+ }
+ if (hit) {
+ builders.add(builder);
+ }
+ }
+
+ FieldQuery fq = new FieldQuery(tq(queryTerm), true, true);
+ FieldTermStack stack = new FieldTermStack(reader, randomHit, F, fq);
+
+ FieldPhraseList fpl = new FieldPhraseList(stack, fq);
+ SimpleFragListBuilder sflb = new SimpleFragListBuilder(100);
+ FieldFragList ffl = sflb.createFieldFragList(fpl, 300);
+
+ SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
+ sfb.setDiscreteMultiValueHighlighting(true);
+ String[] actualFragments = sfb.createFragments(reader, randomHit, F, ffl, numFields);
+ assertEquals(builders.size(), actualFragments.length);
+ for (int i = 0; i < actualFragments.length; i++) {
+ assertEquals(builders.get(i).toString(), actualFragments[i]);
+ }
+ }
+ } finally {
+ reader.close();
+ dir.close();
+ }
+ }
+
+ private String getRandomValue(String[] randomValues, Map<String, Set<Integer>> valueToDocId, int docId) {
+ String value = randomValues[random().nextInt(randomValues.length)];
+ if (!valueToDocId.containsKey(value)) {
+ valueToDocId.put(value, new HashSet<Integer>());
+ }
+ valueToDocId.get(value).add(docId);
+ return value;
+ }
+
+ private static class Doc {
+
+ final String[][] fieldValues;
+
+ private Doc(String[][] fieldValues) {
+ this.fieldValues = fieldValues;
+ }
+ }
+
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/ivy-settings.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/ivy-settings.xml?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/ivy-settings.xml (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/ivy-settings.xml Thu Jul 19 15:58:54 2012
@@ -29,7 +29,7 @@
<!-- you might need to tweak this from china so it works -->
<ibiblio name="working-chinese-mirror" root="http://mirror.netcologne.de/maven2" m2compatible="true" />
- <chain name="default" returnFirst="true" checkmodified="true">
+ <chain name="default" returnFirst="true" checkmodified="true" changingPattern=".*SNAPSHOT">
<resolver ref="local"/>
<resolver ref="main"/>
<resolver ref="sonatype-releases" />
Modified: lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java Thu Jul 19 15:58:54 2012
@@ -21,6 +21,7 @@ import org.apache.lucene.search.IndexSea
import org.apache.lucene.search.Query;
import java.io.IOException;
+import java.util.Locale;
/**
* Utility for query time joining using TermsQuery and TermsCollector.
@@ -85,7 +86,7 @@ public final class JoinUtil {
fromQuery
);
default:
- throw new IllegalArgumentException(String.format("Score mode %s isn't supported.", scoreMode));
+ throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java Thu Jul 19 15:58:54 2012
@@ -36,6 +36,7 @@ import org.apache.lucene.util.BytesRefHa
import org.apache.lucene.util.FixedBitSet;
import java.io.IOException;
+import java.util.Locale;
import java.util.Set;
class TermsIncludingScoreQuery extends Query {
@@ -69,7 +70,7 @@ class TermsIncludingScoreQuery extends Q
}
public String toString(String string) {
- return String.format("TermsIncludingScoreQuery{field=%s;originalQuery=%s}", field, unwrittenOriginalQuery);
+ return String.format(Locale.ROOT, "TermsIncludingScoreQuery{field=%s;originalQuery=%s}", field, unwrittenOriginalQuery);
}
@Override
@@ -207,6 +208,11 @@ class TermsIncludingScoreQuery extends Q
} while (docId != DocIdSetIterator.NO_MORE_DOCS);
return docId;
}
+
+ @Override
+ public float freq() {
+ return 1;
+ }
}
// This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted
Modified: lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java Thu Jul 19 15:58:54 2012
@@ -74,7 +74,7 @@ class TermsQuery extends MultiTermQuery
private BytesRef seekTerm;
private int upto = 0;
- SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms) throws IOException {
+ SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms) {
super(tenum);
this.terms = terms;
Modified: lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java Thu Jul 19 15:58:54 2012
@@ -161,6 +161,7 @@ public class ToChildBlockJoinQuery exten
private final Bits acceptDocs;
private float parentScore;
+ private float parentFreq = 1;
private int childDoc = -1;
private int parentDoc;
@@ -175,7 +176,7 @@ public class ToChildBlockJoinQuery exten
@Override
public Collection<ChildScorer> getChildren() {
- return Collections.singletonList(new ChildScorer(parentScorer, "BLOCK_JOIN"));
+ return Collections.singleton(new ChildScorer(parentScorer, "BLOCK_JOIN"));
}
@Override
@@ -218,6 +219,7 @@ public class ToChildBlockJoinQuery exten
if (childDoc < parentDoc) {
if (doScores) {
parentScore = parentScorer.score();
+ parentFreq = parentScorer.freq();
}
//System.out.println(" " + childDoc);
return childDoc;
@@ -248,6 +250,11 @@ public class ToChildBlockJoinQuery exten
}
@Override
+ public float freq() throws IOException {
+ return parentFreq;
+ }
+
+ @Override
public int advance(int childTarget) throws IOException {
assert childTarget >= parentBits.length() || !parentBits.get(childTarget);
@@ -269,6 +276,7 @@ public class ToChildBlockJoinQuery exten
}
if (doScores) {
parentScore = parentScorer.score();
+ parentFreq = parentScorer.freq();
}
final int firstChild = parentBits.prevSetBit(parentDoc-1);
//System.out.println(" firstChild=" + firstChild);
Modified: lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java?rev=1363400&r1=1363399&r2=1363400&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java Thu Jul 19 15:58:54 2012
@@ -327,6 +327,11 @@ public class ToParentBlockJoinCollector
public float score() {
return score;
}
+
+ @Override
+ public float freq() {
+ return 1; // TODO: does anything else make sense?... duplicate of grouping's FakeScorer btw?
+ }
@Override
public int docID() {