You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/08/21 01:43:28 UTC
svn commit: r1375322 - in /lucene/dev/branches/pforcodec_3892: ./ dev-tools/
dev-tools/idea/lucene/analysis/morfologik/
dev-tools/idea/lucene/analysis/phonetic/
dev-tools/idea/solr/contrib/extraction/ dev-tools/maven/ lucene/
lucene/core/ lucene/core/s...
Author: mikemccand
Date: Mon Aug 20 23:43:27 2012
New Revision: 1375322
URL: http://svn.apache.org/viewvc?rev=1375322&view=rev
Log:
LUCENE-3892: merge trunk
Added:
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/util/FilterIterator.java
- copied unchanged from r1375317, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/FilterIterator.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/test/org/apache/lucene/util/TestFilterIterator.java
- copied unchanged from r1375317, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestFilterIterator.java
lucene/dev/branches/pforcodec_3892/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientConfigurer.java
- copied unchanged from r1375317, lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientConfigurer.java
Modified:
lucene/dev/branches/pforcodec_3892/ (props changed)
lucene/dev/branches/pforcodec_3892/dev-tools/ (props changed)
lucene/dev/branches/pforcodec_3892/dev-tools/idea/lucene/analysis/morfologik/morfologik.iml
lucene/dev/branches/pforcodec_3892/dev-tools/idea/lucene/analysis/phonetic/phonetic.iml
lucene/dev/branches/pforcodec_3892/dev-tools/idea/solr/contrib/extraction/extraction.iml
lucene/dev/branches/pforcodec_3892/dev-tools/maven/pom.xml.template
lucene/dev/branches/pforcodec_3892/lucene/ (props changed)
lucene/dev/branches/pforcodec_3892/lucene/core/ (props changed)
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/Fields.java
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/package.html
lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/util/WeakIdentityMap.java
lucene/dev/branches/pforcodec_3892/lucene/spatial/ (props changed)
lucene/dev/branches/pforcodec_3892/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java
lucene/dev/branches/pforcodec_3892/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgsParser.java
lucene/dev/branches/pforcodec_3892/lucene/test-framework/ (props changed)
lucene/dev/branches/pforcodec_3892/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
lucene/dev/branches/pforcodec_3892/solr/ (props changed)
lucene/dev/branches/pforcodec_3892/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/pforcodec_3892/solr/core/ (props changed)
lucene/dev/branches/pforcodec_3892/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
lucene/dev/branches/pforcodec_3892/solr/core/src/test-files/solr/collection1/conf/schema15.xml
lucene/dev/branches/pforcodec_3892/solr/core/src/test/org/apache/solr/update/TestUpdate.java
lucene/dev/branches/pforcodec_3892/solr/solrj/ (props changed)
lucene/dev/branches/pforcodec_3892/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java
lucene/dev/branches/pforcodec_3892/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpClientUtilTest.java
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/idea/lucene/analysis/morfologik/morfologik.iml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/idea/lucene/analysis/morfologik/morfologik.iml?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/idea/lucene/analysis/morfologik/morfologik.iml (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/idea/lucene/analysis/morfologik/morfologik.iml Mon Aug 20 23:43:27 2012
@@ -6,6 +6,7 @@
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
+ <sourceFolder url="file://$MODULE_DIR$/src/resources" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
</content>
<orderEntry type="inheritedJdk" />
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/idea/lucene/analysis/phonetic/phonetic.iml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/idea/lucene/analysis/phonetic/phonetic.iml?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/idea/lucene/analysis/phonetic/phonetic.iml (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/idea/lucene/analysis/phonetic/phonetic.iml Mon Aug 20 23:43:27 2012
@@ -6,6 +6,7 @@
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
+ <sourceFolder url="file://$MODULE_DIR$/src/resources" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
</content>
<orderEntry type="inheritedJdk" />
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/idea/solr/contrib/extraction/extraction.iml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/idea/solr/contrib/extraction/extraction.iml?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/idea/solr/contrib/extraction/extraction.iml (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/idea/solr/contrib/extraction/extraction.iml Mon Aug 20 23:43:27 2012
@@ -15,6 +15,6 @@
<orderEntry type="library" name="Solr library" level="project" />
<orderEntry type="library" name="Solr extraction library" level="project" />
<orderEntry type="module" module-name="solr" />
- <orderEntry type="module" module-name="lucene" scope="TEST" />
+ <orderEntry type="module" module-name="lucene" />
</component>
</module>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/pom.xml.template?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/pom.xml.template Mon Aug 20 23:43:27 2012
@@ -47,7 +47,7 @@
<java.compat.version>1.6</java.compat.version>
<jetty.version>8.1.2.v20120308</jetty.version>
<slf4j.version>1.6.4</slf4j.version>
- <tika.version>1.1</tika.version>
+ <tika.version>1.2</tika.version>
<httpcomponents.version>4.1.3</httpcomponents.version>
<!-- RandomizedTesting library system properties -->
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/Fields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/Fields.java?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/Fields.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/Fields.java Mon Aug 20 23:43:27 2012
@@ -33,10 +33,9 @@ public abstract class Fields implements
* null if the field does not exist. */
public abstract Terms terms(String field) throws IOException;
- /** Returns the number of terms for all fields, or -1 if this
- * measure isn't stored by the codec. Note that, just like
- * other term measures, this measure does not take deleted
- * documents into account. */
+ /** Returns the number of fields or -1 if the number of
+ * distinct field names is unknown. If >= 0,
+ * {@link #iterator} will return as many field names. */
public abstract int size() throws IOException;
/** Returns the number of terms for all fields, or -1 if this
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/package.html?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/package.html (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/package.html Mon Aug 20 23:43:27 2012
@@ -21,6 +21,244 @@
</head>
<body>
Code to maintain and access indices.
-<!-- TODO: add a BASIC overview here, including code examples of using postings apis -->
+<!-- TODO: add IndexWriter, IndexWriterConfig, DocValues, etc etc -->
+<h2>Table Of Contents</h2>
+<p>
+ <ol>
+ <li><a href="#postings">Postings APIs</a>
+ <ul>
+ <li><a href="#fields">Fields</a></li>
+ <li><a href="#terms">Terms</a></li>
+ <li><a href="#documents">Documents</a></li>
+ <li><a href="#positions">Positions</a></li>
+ </ul>
+ </li>
+ <li><a href="#stats">Index Statistics</a>
+ <ul>
+ <li><a href="#termstats">Term-level</a></li>
+ <li><a href="#fieldstats">Field-level</a></li>
+ <li><a href="#segmentstats">Segment-level</a></li>
+ <li><a href="#documentstats">Document-level</a></li>
+ </ul>
+ </li>
+ </ol>
+</p>
+<a name="postings"></a>
+<h2>Postings APIs</h2>
+<a name="fields"></a>
+<h4>
+ Fields
+</h4>
+<p>
+{@link org.apache.lucene.index.Fields} is the initial entry point into the
+postings APIs, this can be obtained in several ways:
+<pre class="prettyprint">
+// access indexed fields for an index segment
+Fields fields = reader.fields();
+// access term vector fields for a specified document
+Fields fields = reader.getTermVectors(docid);
+</pre>
+Fields implements Java's Iterable interface, so its easy to enumerate the
+list of fields:
+<pre class="prettyprint">
+// enumerate list of fields
+for (String field : fields) {
+ // access the terms for this field
+ Terms terms = fields.terms(field);
+}
+</pre>
+</p>
+<a name="terms"></a>
+<h4>
+ Terms
+</h4>
+<p>
+{@link org.apache.lucene.index.Terms} represents the collection of terms
+within a field, exposes some metadata and <a href="#fieldstats">statistics</a>,
+and an API for enumeration.
+<pre class="prettyprint">
+// metadata about the field
+System.out.println("positions? " + terms.hasPositions());
+System.out.println("offsets? " + terms.hasOffsets());
+System.out.println("payloads? " + terms.hasPayloads());
+// iterate through terms
+TermsEnum termsEnum = terms.iterator(null);
+BytesRef term = null;
+while ((term = termsEnum.next()) != null) {
+ doSomethingWith(termsEnum.term());
+}
+</pre>
+{@link org.apache.lucene.index.TermsEnum} provides an iterator over the list
+of terms within a field, some <a href="#termstats">statistics</a> about the term,
+and methods to access the term's <a href="#documents">documents</a> and
+<a href="#positions">positions</a>.
+<pre class="prettyprint">
+// seek to a specific term
+boolean found = termsEnum.seekExact(new BytesRef("foobar"), true);
+if (found) {
+ // get the document frequency
+ System.out.println(termsEnum.docFreq());
+ // enumerate through documents
+ DocsEnum docs = termsEnum.docs(null, null);
+ // enumerate through documents and positions
+ DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(null, null);
+}
+</pre>
+</p>
+<a name="documents"></a>
+<h4>
+ Documents
+</h4>
+<p>
+{@link org.apache.lucene.index.DocsEnum} is an extension of
+{@link org.apache.lucene.search.DocIdSetIterator}that iterates over the list of
+documents for a term, along with the term frequency within that document.
+<pre class="prettyprint">
+int docid;
+while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ System.out.println(docid);
+ System.out.println(docsEnum.freq());
+}
+</pre>
+</p>
+<a name="positions"></a>
+<h4>
+ Positions
+</h4>
+<p>
+{@link org.apache.lucene.index.DocsAndPositionsEnum} is an extension of
+{@link org.apache.lucene.index.DocsEnum} that additionally allows iteration
+of the positions a term occurred within the document, and any additional
+per-position information (offsets and payload)
+<pre class="prettyprint">
+int docid;
+while ((docid = docsAndPositionsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ System.out.println(docid);
+ int freq = docsAndPositionsEnum.freq();
+ for (int i = 0; i < freq; i++) {
+ System.out.println(docsAndPositionsEnum.nextPosition());
+ System.out.println(docsAndPositionsEnum.startOffset());
+ System.out.println(docsAndPositionsEnum.endOffset());
+ System.out.println(docsAndPositionsEnum.getPayload());
+ }
+}
+</pre>
+</p>
+<a name="stats"></a>
+<h2>Index Statistics</h2>
+<a name="termstats"></a>
+<h4>
+ Term statistics
+</h4>
+<p>
+ <ul>
+ <li>{@link org.apache.lucene.index.TermsEnum#docFreq}: Returns the number of
+ documents that contain at least one occurrence of the term. This statistic
+ is always available for an indexed term. Note that it will also count
+ deleted documents, when segments are merged the statistic is updated as
+ those deleted documents are merged away.
+ <li>{@link org.apache.lucene.index.TermsEnum#totalTermFreq}: Returns the number
+ of occurrences of this term across all documents. Note that this statistic
+ is unavailable (returns <code>-1</code>) if term frequencies were omitted
+ from the index
+ ({@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_ONLY DOCS_ONLY})
+ for the field. Like docFreq(), it will also count occurrences that appear in
+ deleted documents.
+ </ul>
+</p>
+<a name="fieldstats"></a>
+<h4>
+ Field statistics
+</h4>
+<p>
+ <ul>
+ <li>{@link org.apache.lucene.index.Terms#size}: Returns the number of
+ unique terms in the field. This statistic may be unavailable
+ (returns <code>-1</code>) for some Terms implementations such as
+ {@link org.apache.lucene.index.MultiTerms}, where it cannot be efficiently
+ computed. Note that this count also includes terms that appear only
+ in deleted documents: when segments are merged such terms are also merged
+ away and the statistic is then updated.
+ <li>{@link org.apache.lucene.index.Terms#getDocCount}: Returns the number of
+ documents that contain at least one occurrence of any term for this field.
+ This can be thought of as a Field-level docFreq(). Like docFreq() it will
+ also count deleted documents.
+ <li>{@link org.apache.lucene.index.Terms#getSumDocFreq}: Returns the number of
+ postings (term-document mappings in the inverted index) for the field. This
+ can be thought of as the sum of {@link org.apache.lucene.index.TermsEnum#docFreq}
+ across all terms in the field, and like docFreq() it will also count postings
+ that appear in deleted documents.
+ <li>{@link org.apache.lucene.index.Terms#getSumTotalTermFreq}: Returns the number
+ of tokens for the field. This can be thought of as the sum of
+ {@link org.apache.lucene.index.TermsEnum#totalTermFreq} across all terms in the
+ field, and like totalTermFreq() it will also count occurrences that appear in
+ deleted documents, and will be unavailable (returns <code>-1</code>) if term
+ frequencies were omitted from the index
+ ({@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_ONLY DOCS_ONLY})
+ for the field.
+ </ul>
+</p>
+<a name="segmentstats"></a>
+<h4>
+ Segment statistics
+</h4>
+<p>
+ <ul>
+ <li>{@link org.apache.lucene.index.IndexReader#maxDoc}: Returns the number of
+ documents (including deleted documents) in the index.
+ <li>{@link org.apache.lucene.index.IndexReader#numDocs}: Returns the number
+ of live documents (excluding deleted documents) in the index.
+ <li>{@link org.apache.lucene.index.IndexReader#numDeletedDocs}: Returns the
+ number of deleted documents in the index.
+ <li>{@link org.apache.lucene.index.Fields#size}: Returns the number of indexed
+ fields.
+ <li>{@link org.apache.lucene.index.Fields#getUniqueTermCount}: Returns the number
+ of indexed terms, the sum of {@link org.apache.lucene.index.Terms#size}
+ across all fields.
+ </ul>
+</p>
+<a name="documentstats"></a>
+<h4>
+ Document statistics
+</h4>
+<p>
+Document statistics are available during the indexing process for an indexed field: typically
+a {@link org.apache.lucene.search.similarities.Similarity} implementation will store some
+of these values (possibly in a lossy way), into the normalization value for the document in
+its {@link org.apache.lucene.search.similarities.Similarity#computeNorm} method.
+</p>
+<p>
+ <ul>
+ <li>{@link org.apache.lucene.index.FieldInvertState#getLength}: Returns the number of
+ tokens for this field in the document. Note that this is just the number
+ of times that {@link org.apache.lucene.analysis.TokenStream#incrementToken} returned
+ true, and is unrelated to the values in
+ {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute}.
+ <li>{@link org.apache.lucene.index.FieldInvertState#getNumOverlap}: Returns the number
+ of tokens for this field in the document that had a position increment of zero. This
+ can be used to compute a document length that discounts artificial tokens
+ such as synonyms.
+ <li>{@link org.apache.lucene.index.FieldInvertState#getPosition}: Returns the accumulated
+ position value for this field in the document: computed from the values of
+ {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute} and including
+ {@link org.apache.lucene.analysis.Analyzer#getPositionIncrementGap}s across multivalued
+ fields.
+ <li>{@link org.apache.lucene.index.FieldInvertState#getOffset}: Returns the total
+ character offset value for this field in the document: computed from the values of
+ {@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute} returned by
+ {@link org.apache.lucene.analysis.TokenStream#end}, and including
+ {@link org.apache.lucene.analysis.Analyzer#getOffsetGap}s across multivalued
+ fields.
+ <li>{@link org.apache.lucene.index.FieldInvertState#getUniqueTermCount}: Returns the number
+ of unique terms encountered for this field in the document.
+ <li>{@link org.apache.lucene.index.FieldInvertState#getMaxTermFrequency}: Returns the maximum
+ frequency across all unique terms encountered for this field in the document.
+ </ul>
+</p>
+<p>
+Additional user-supplied statistics can be added to the document as DocValues fields and
+accessed via {@link org.apache.lucene.index.AtomicReader#docValues}.
+</p>
+<p>
</body>
</html>
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/util/WeakIdentityMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/util/WeakIdentityMap.java?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/util/WeakIdentityMap.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/util/WeakIdentityMap.java Mon Aug 20 23:43:27 2012
@@ -107,6 +107,8 @@ public final class WeakIdentityMap<K,V>
public Iterator<K> keyIterator() {
reap();
final Iterator<IdentityWeakReference> iterator = backingStore.keySet().iterator();
+ // IMPORTANT: Don't use oal.util.FilterIterator here:
+ // We need *strong* reference to current key after setNext()!!!
return new Iterator<K>() {
// holds strong reference to next element in backing iterator:
private Object next = null;
Modified: lucene/dev/branches/pforcodec_3892/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java Mon Aug 20 23:43:27 2012
@@ -35,10 +35,6 @@ public class SpatialArgs {
private Shape shape;
private double distPrecision = DEFAULT_DIST_PRECISION;
- // Useful for 'distance' calculations
- private Double min;
- private Double max;
-
public SpatialArgs(SpatialOperation operation) {
this.operation = operation;
}
@@ -60,12 +56,6 @@ public class SpatialArgs {
StringBuilder str = new StringBuilder();
str.append(operation.getName()).append('(');
str.append(shape.toString());
- if (min != null) {
- str.append(" min=").append(min);
- }
- if (max != null) {
- str.append(" max=").append(max);
- }
str.append(" distPrec=").append(String.format(Locale.ROOT, "%.2f%%", distPrecision / 100d));
str.append(')');
return str.toString();
@@ -111,19 +101,4 @@ public class SpatialArgs {
this.distPrecision = distPrecision;
}
- public Double getMin() {
- return min;
- }
-
- public void setMin(Double min) {
- this.min = min;
- }
-
- public Double getMax() {
- return max;
- }
-
- public void setMax(Double max) {
- this.max = max;
- }
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgsParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgsParser.java?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgsParser.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgsParser.java Mon Aug 20 23:43:27 2012
@@ -75,8 +75,6 @@ public class SpatialArgsParser {
body = v.substring(edx + 1).trim();
if (body.length() > 0) {
Map<String, String> aa = parseMap(body);
- args.setMin(readDouble(aa.remove("min")));
- args.setMax(readDouble(aa.remove("max")));
args.setDistPrecision(readDouble(aa.remove("distPrec")));
if (!aa.isEmpty()) {
throw new InvalidSpatialArgument("unused parameters: " + aa, null);
Modified: lucene/dev/branches/pforcodec_3892/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java Mon Aug 20 23:43:27 2012
@@ -20,9 +20,10 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
-import java.util.NoSuchElementException;
import java.util.Set;
+import org.apache.lucene.util.FilterIterator;
+
/**
* A {@link FilterAtomicReader} that exposes only a subset
* of fields from the underlying wrapped reader.
@@ -62,8 +63,9 @@ public final class FieldFilterAtomicRead
return null;
}
f = new FieldFilterFields(f);
- // we need to check for emptyness, so we can return null:
- return (f.iterator().next() == null) ? null : f;
+ // we need to check for emptyness, so we can return
+ // null:
+ return f.iterator().hasNext() ? f : null;
}
@Override
@@ -138,55 +140,16 @@ public final class FieldFilterAtomicRead
@Override
public int size() {
- // TODO: add faster implementation!
- int c = 0;
- final Iterator<String> it = iterator();
- while (it.next() != null) {
- c++;
- }
- return c;
+ // this information is not cheap, return -1 like MultiFields does:
+ return -1;
}
@Override
public Iterator<String> iterator() {
- final Iterator<String> in = super.iterator();
- return new Iterator<String>() {
- String cached = null;
-
- @Override
- public String next() {
- if (cached != null) {
- String next = cached;
- cached = null;
- return next;
- } else {
- String next = doNext();
- if (next == null) {
- throw new NoSuchElementException();
- } else {
- return next;
- }
- }
- }
-
- @Override
- public boolean hasNext() {
- return cached != null || (cached = doNext()) != null;
- }
-
- private String doNext() {
- while (in.hasNext()) {
- String field = in.next();
- if (hasField(field)) {
- return field;
- }
- }
- return null;
- }
-
+ return new FilterIterator<String>(super.iterator()) {
@Override
- public void remove() {
- throw new UnsupportedOperationException();
+ protected boolean predicateFunction(String field) {
+ return hasField(field);
}
};
}
Modified: lucene/dev/branches/pforcodec_3892/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/solr/CHANGES.txt?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/solr/CHANGES.txt (original)
+++ lucene/dev/branches/pforcodec_3892/solr/CHANGES.txt Mon Aug 20 23:43:27 2012
@@ -78,6 +78,11 @@ Bug Fixes
when requesting multiple stats.facet fields.
(Roman Kliewer via hossman)
+* SOLR-3743: Fixed issues with atomic updates and optimistic concurrency in
+ conjunction with stored copyField targets by making real-time get never
+ return copyField targets. (yonik)
+
+
Other Changes
----------------------
Modified: lucene/dev/branches/pforcodec_3892/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java (original)
+++ lucene/dev/branches/pforcodec_3892/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java Mon Aug 20 23:43:27 2012
@@ -255,6 +255,7 @@ public class RealTimeGetComponent extend
SchemaField sf = schema.getFieldOrNull(f.name());
Object val = null;
if (sf != null) {
+ if (!sf.stored() || schema.isCopyFieldTarget(sf)) continue;
val = sf.getType().toObject(f); // object or external string?
} else {
val = f.stringValue();
@@ -277,6 +278,10 @@ public class RealTimeGetComponent extend
Object existing = out.get(f.name());
if (existing == null) {
SchemaField sf = schema.getFieldOrNull(f.name());
+
+ // don't return copyField targets
+ if (sf != null && schema.isCopyFieldTarget(sf)) continue;
+
if (sf != null && sf.multiValued()) {
List<Object> vals = new ArrayList<Object>();
vals.add( f );
@@ -301,7 +306,7 @@ public class RealTimeGetComponent extend
// copy the stored fields only
Document out = new Document();
for (IndexableField f : doc.getFields()) {
- if (f.fieldType().stored()) {
+ if (f.fieldType().stored() ) {
out.add(f);
}
}
Modified: lucene/dev/branches/pforcodec_3892/solr/core/src/test-files/solr/collection1/conf/schema15.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/solr/core/src/test-files/solr/collection1/conf/schema15.xml?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/solr/core/src/test-files/solr/collection1/conf/schema15.xml (original)
+++ lucene/dev/branches/pforcodec_3892/solr/core/src/test-files/solr/collection1/conf/schema15.xml Mon Aug 20 23:43:27 2012
@@ -425,7 +425,7 @@
<field name="signatureField" type="string" indexed="true" stored="false"/>
<field name="uuid" type="uuid" stored="true" />
<field name="name" type="nametext" indexed="true" stored="true"/>
- <field name="text" type="text" indexed="true" stored="false"/>
+ <field name="text" type="text" indexed="true" stored="false" multiValued="true" />
<field name="subject" type="text" indexed="true" stored="true"/>
<field name="title" type="nametext" indexed="true" stored="true"/>
<field name="weight" type="float" indexed="true" stored="true"/>
@@ -522,6 +522,9 @@
<!-- for versioning -->
<field name="_version_" type="long" indexed="true" stored="true"/>
+
+ <field name="copyfield_source" type="string" indexed="true" stored="true" multiValued="true"/>
+
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
@@ -587,5 +590,8 @@
<copyField source="title" dest="text"/>
<copyField source="subject" dest="text"/>
+
+ <copyField source="copyfield_source" dest="text"/>
+ <copyField source="copyfield_source" dest="copyfield_dest_ss"/> <!-- copyField into another stored copyField - not best practice -->
</schema>
Modified: lucene/dev/branches/pforcodec_3892/solr/core/src/test/org/apache/solr/update/TestUpdate.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/solr/core/src/test/org/apache/solr/update/TestUpdate.java?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/solr/core/src/test/org/apache/solr/update/TestUpdate.java (original)
+++ lucene/dev/branches/pforcodec_3892/solr/core/src/test/org/apache/solr/update/TestUpdate.java Mon Aug 20 23:43:27 2012
@@ -68,7 +68,7 @@ public class TestUpdate extends SolrTest
doUpdateTest(new Callable() {
@Override
public Object call() throws Exception {
- commit("softCommit","false");
+ assertU(commit("softCommit","false"));
return null;
}
});
@@ -82,15 +82,15 @@ public class TestUpdate extends SolrTest
long version;
- version = addAndGetVersion(sdoc("id","1", "val_i",5), null);
+ version = addAndGetVersion(sdoc("id","1", "val_i",5, "copyfield_source","a"), null);
afterUpdate.call();
- version = addAndGetVersion(sdoc("id","1", "val_is",map("add",10)), null);
+ version = addAndGetVersion(sdoc("id","1", "val_is",map("add",10), "copyfield_source",map("add","b")), null);
afterUpdate.call();
version = addAndGetVersion(sdoc("id","1", "val_is",map("add",5)), null);
afterUpdate.call();
- assertJQ(req("qt","/get", "id","1", "fl","id,*_i,*_is")
- ,"=={'doc':{'id':'1', 'val_i':5, 'val_is':[10,5]}}"
+ assertJQ(req("qt","/get", "id","1", "fl","id,*_i,*_is,copyfield_*")
+ ,"=={'doc':{'id':'1', 'val_i':5, 'val_is':[10,5], 'copyfield_source':['a','b']}}" // real-time get should not return stored copyfield targets
);
version = addAndGetVersion(sdoc("id","1", "val_is",map("add",-1), "val_i",map("set",100)), null);
@@ -101,6 +101,14 @@ public class TestUpdate extends SolrTest
);
+ // Do a search to get all stored fields back and make sure that the stored copyfield target only
+ // has one copy of the source. This may not be supported forever!
+ assertU(commit("softCommit","true"));
+ assertJQ(req("q","*:*", "fl","id,*_i,*_is,copyfield_*")
+ ,"/response/docs/[0]=={'id':'1', 'val_i':100, 'val_is':[10,5,-1], 'copyfield_source':['a','b'], 'copyfield_dest_ss':['a','b']}"
+ );
+
+
long version2;
try {
// try bad version added as a field in the doc
Modified: lucene/dev/branches/pforcodec_3892/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java (original)
+++ lucene/dev/branches/pforcodec_3892/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java Mon Aug 20 23:43:27 2012
@@ -78,9 +78,18 @@ public class HttpClientUtil {
static final DefaultHttpRequestRetryHandler NO_RETRY = new DefaultHttpRequestRetryHandler(
0, false);
+ private static HttpClientConfigurer configurer = new HttpClientConfigurer();
+
private HttpClientUtil(){}
/**
+ * Replace the {@link HttpClientConfigurer} class used in configuring the http
+ * clients with a custom implementation.
+ */
+ public static void setConfigurer(HttpClientConfigurer newConfigurer) {
+ configurer = newConfigurer;
+ }
+ /**
* Creates new http client by using the provided configuration.
*
* @param params
@@ -103,38 +112,7 @@ public class HttpClientUtil {
*/
public static void configureClient(final DefaultHttpClient httpClient,
SolrParams config) {
-
- if (config.get(PROP_MAX_CONNECTIONS) != null) {
- setMaxConnections(httpClient, config.getInt(PROP_MAX_CONNECTIONS));
- }
-
- if (config.get(PROP_MAX_CONNECTIONS_PER_HOST) != null) {
- setMaxConnectionsPerHost(httpClient, config.getInt(PROP_MAX_CONNECTIONS_PER_HOST));
- }
-
- if (config.get(PROP_CONNECTION_TIMEOUT) != null) {
- setConnectionTimeout(httpClient, config.getInt(PROP_CONNECTION_TIMEOUT));
- }
-
- if (config.get(PROP_SO_TIMEOUT) != null) {
- setSoTimeout(httpClient, config.getInt(PROP_SO_TIMEOUT));
- }
-
- if (config.get(PROP_USE_RETRY) != null) {
- setUseRetry(httpClient, config.getBool(PROP_USE_RETRY));
- }
-
- if (config.get(PROP_FOLLOW_REDIRECTS) != null) {
- setFollowRedirects(httpClient, config.getBool(PROP_FOLLOW_REDIRECTS));
- }
-
- final String basicAuthUser = config.get(PROP_BASIC_AUTH_USER);
- final String basicAuthPass = config.get(PROP_BASIC_AUTH_PASS);
- setBasicAuth(httpClient, basicAuthUser, basicAuthPass);
-
- if (config.get(PROP_ALLOW_COMPRESSION) != null) {
- setAllowCompression(httpClient, config.getBool(PROP_ALLOW_COMPRESSION));
- }
+ configurer.configure(httpClient, config);
}
/**
Modified: lucene/dev/branches/pforcodec_3892/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpClientUtilTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpClientUtilTest.java?rev=1375322&r1=1375321&r2=1375322&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpClientUtilTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpClientUtilTest.java Mon Aug 20 23:43:27 2012
@@ -18,6 +18,8 @@ package org.apache.solr.client.solrj.imp
import static org.junit.Assert.assertEquals;
+import java.util.concurrent.atomic.AtomicInteger;
+
import org.apache.http.auth.AuthScope;
import org.apache.http.client.HttpClient;
import org.apache.http.client.params.ClientPNames;
@@ -25,6 +27,7 @@ import org.apache.http.impl.client.Defau
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.params.HttpConnectionParams;
import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.SolrParams;
import org.junit.Test;
public class HttpClientUtilTest {
@@ -60,4 +63,31 @@ public class HttpClientUtilTest {
client.getConnectionManager().shutdown();
}
+ @Test
+ public void testReplaceConfigurer(){
+
+ try {
+ final AtomicInteger counter = new AtomicInteger();
+ HttpClientConfigurer custom = new HttpClientConfigurer(){
+ @Override
+ protected void configure(DefaultHttpClient httpClient, SolrParams config) {
+ super.configure(httpClient, config);
+ counter.set(config.getInt("custom-param", -1));
+ }
+
+ };
+
+ HttpClientUtil.setConfigurer(custom);
+
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.set("custom-param", 5);
+ HttpClientUtil.createClient(params).getConnectionManager().shutdown();
+ assertEquals(5, counter.get());
+ } finally {
+ //restore default configurer
+ HttpClientUtil.setConfigurer(new HttpClientConfigurer());
+ }
+
+ }
+
}