You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/01/04 15:53:21 UTC
svn commit: r1649347 [6/31] - in /lucene/dev/branches/lucene6005: ./
dev-tools/ dev-tools/idea/solr/contrib/dataimporthandler-extras/
dev-tools/idea/solr/contrib/extraction/
dev-tools/idea/solr/contrib/map-reduce/
dev-tools/idea/solr/contrib/velocity/ ...
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java Sun Jan 4 14:53:12 2015
@@ -36,6 +36,9 @@ import org.apache.lucene.util.FrequencyT
*/
public final class UsageTrackingFilterCachingPolicy implements FilterCachingPolicy {
+ // the hash code that we use as a sentinel in the ring buffer.
+ private static final int SENTINEL = Integer.MIN_VALUE;
+
static boolean isCostly(Filter filter) {
// This does not measure the cost of iterating over the filter (for this we
// already have the DocIdSetIterator#cost API) but the cost to build the
@@ -51,7 +54,7 @@ public final class UsageTrackingFilterCa
}
private final FilterCachingPolicy.CacheOnLargeSegments segmentPolicy;
- private final FrequencyTrackingRingBuffer<Integer> recentlyUsedFilters;
+ private final FrequencyTrackingRingBuffer recentlyUsedFilters;
private final int minFrequencyCostlyFilters;
private final int minFrequencyCheapFilters;
private final int minFrequencyOtherFilters;
@@ -96,17 +99,18 @@ public final class UsageTrackingFilterCa
if (minFrequencyCheapFilters > historySize || minFrequencyCostlyFilters > historySize || minFrequencyOtherFilters > historySize) {
throw new IllegalArgumentException("The minimum frequencies should be less than the size of the history of filters that are being tracked");
}
- this.recentlyUsedFilters = new FrequencyTrackingRingBuffer<>(historySize);
+ this.recentlyUsedFilters = new FrequencyTrackingRingBuffer(historySize, SENTINEL);
this.minFrequencyCostlyFilters = minFrequencyCostlyFilters;
this.minFrequencyCheapFilters = minFrequencyCheapFilters;
this.minFrequencyOtherFilters = minFrequencyOtherFilters;
}
@Override
- public void onCache(Filter filter) {
- // Using the filter hash codes might help keep memory usage a bit lower
- // since some filters might have non-negligible memory usage?
- recentlyUsedFilters.add(filter.hashCode());
+ public void onUse(Filter filter) {
+ // we only track hash codes, which
+ synchronized (this) {
+ recentlyUsedFilters.add(filter.hashCode());
+ }
}
@Override
@@ -114,7 +118,10 @@ public final class UsageTrackingFilterCa
if (segmentPolicy.shouldCache(filter, context, set) == false) {
return false;
}
- final int frequency = recentlyUsedFilters.frequency(filter.hashCode());
+ final int frequency;
+ synchronized (this) {
+ frequency = recentlyUsedFilters.frequency(filter.hashCode());
+ }
if (frequency >= minFrequencyOtherFilters) {
return true;
} else if (isCostly(filter) && frequency >= minFrequencyCostlyFilters) {
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/package.html?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/package.html (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/package.html Sun Jan 4 14:53:12 2015
@@ -521,7 +521,7 @@ on the built-in available scoring models
back
out of Lucene (similar to Doug adding SpanQuery functionality).</p>
-<!-- TODO: integrate this better, its better served as an intro than an appendix -->
+<!-- TODO: integrate this better, it's better served as an intro than an appendix -->
<a name="algorithm"></a>
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java Sun Jan 4 14:53:12 2015
@@ -233,7 +233,7 @@ public abstract class Similarity {
/** The value for normalization of contained query clauses (e.g. sum of squared weights).
* <p>
* NOTE: a Similarity implementation might not use any query normalization at all,
- * its not required. However, if it wants to participate in query normalization,
+ * it's not required. However, if it wants to participate in query normalization,
* it can return a value here.
*/
public abstract float getValueForNormalization();
@@ -241,7 +241,7 @@ public abstract class Similarity {
/** Assigns the query normalization factor and boost from parent queries to this.
* <p>
* NOTE: a Similarity implementation might not use this normalized value at all,
- * its not required. However, its usually a good idea to at least incorporate
+ * it's not required. However, it's usually a good idea to at least incorporate
* the topLevelBoost (e.g. from an outer BooleanQuery) into its score.
*/
public abstract void normalize(float queryNorm, float topLevelBoost);
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java Sun Jan 4 14:53:12 2015
@@ -92,7 +92,7 @@ public class SpanTermQuery extends SpanQ
final TermState state;
if (termContext == null) {
// this happens with span-not query, as it doesn't include the NOT side in extractTerms()
- // so we seek to the term now in this segment..., this sucks because its ugly mostly!
+ // so we seek to the term now in this segment..., this sucks because it's ugly mostly!
final Terms terms = context.reader().terms(term.field());
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java Sun Jan 4 14:53:12 2015
@@ -213,7 +213,7 @@ abstract class ByteBufferIndexInput exte
try {
return buffers[bi].getShort((int) (pos & chunkSizeMask));
} catch (IndexOutOfBoundsException ioobe) {
- // either its a boundary, or read past EOF, fall back:
+ // either it's a boundary, or read past EOF, fall back:
setPos(pos, bi);
return readShort();
} catch (NullPointerException npe) {
@@ -227,7 +227,7 @@ abstract class ByteBufferIndexInput exte
try {
return buffers[bi].getInt((int) (pos & chunkSizeMask));
} catch (IndexOutOfBoundsException ioobe) {
- // either its a boundary, or read past EOF, fall back:
+ // either it's a boundary, or read past EOF, fall back:
setPos(pos, bi);
return readInt();
} catch (NullPointerException npe) {
@@ -241,7 +241,7 @@ abstract class ByteBufferIndexInput exte
try {
return buffers[bi].getLong((int) (pos & chunkSizeMask));
} catch (IndexOutOfBoundsException ioobe) {
- // either its a boundary, or read past EOF, fall back:
+ // either it's a boundary, or read past EOF, fall back:
setPos(pos, bi);
return readLong();
} catch (NullPointerException npe) {
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/Directory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/Directory.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/Directory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/Directory.java Sun Jan 4 14:53:12 2015
@@ -126,7 +126,7 @@ public abstract class Directory implemen
}
/**
- * Copies the file <i>src</i> to {@link Directory} <i>to</i> under the new
+ * Copies the file <i>src</i> in <i>from</i> to this directory under the new
* file name <i>dest</i>.
* <p>
* If you want to copy the entire source directory to the destination one, you
@@ -135,31 +135,22 @@ public abstract class Directory implemen
* <pre class="prettyprint">
* Directory to; // the directory to copy to
* for (String file : dir.listAll()) {
- * dir.copy(to, file, newFile, IOContext.DEFAULT); // newFile can be either file, or a new name
+ * to.copyFrom(dir, file, newFile, IOContext.DEFAULT); // newFile can be either file, or a new name
* }
* </pre>
* <p>
* <b>NOTE:</b> this method does not check whether <i>dest</i> exist and will
* overwrite it if it does.
*/
- public void copy(Directory to, String src, String dest, IOContext context) throws IOException {
- IndexOutput os = null;
- IndexInput is = null;
+ public void copyFrom(Directory from, String src, String dest, IOContext context) throws IOException {
boolean success = false;
- try {
- os = to.createOutput(dest, context);
- is = openInput(src, context);
+ try (IndexInput is = from.openInput(src, context);
+ IndexOutput os = createOutput(dest, context)) {
os.copyBytes(is, is.length());
success = true;
} finally {
- if (success) {
- IOUtils.close(os, is);
- } else {
- IOUtils.closeWhileHandlingException(os, is);
- try {
- to.deleteFile(dest);
- } catch (Throwable t) {
- }
+ if (!success) {
+ IOUtils.deleteFilesIgnoringExceptions(this, dest);
}
}
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java Sun Jan 4 14:53:12 2015
@@ -17,9 +17,6 @@ package org.apache.lucene.store;
* limitations under the License.
*/
-import org.apache.lucene.util.Constants;
-import org.apache.lucene.util.IOUtils;
-
import java.io.FileOutputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
@@ -29,12 +26,11 @@ import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.HashSet;
import java.util.List;
-import java.util.Set;
import java.util.concurrent.Future;
-import static java.util.Collections.synchronizedSet;
+import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.IOUtils;
/**
* Base class for Directory implementations that store index
@@ -116,7 +112,6 @@ import static java.util.Collections.sync
public abstract class FSDirectory extends BaseDirectory {
protected final Path directory; // The underlying filesystem directory
- protected final Set<String> staleFiles = synchronizedSet(new HashSet<String>()); // Files written, but not yet sync'ed
/** Create a new FSDirectory for the named location (ctor for subclasses).
* @param path the path of the directory
@@ -126,7 +121,7 @@ public abstract class FSDirectory extend
*/
protected FSDirectory(Path path, LockFactory lockFactory) throws IOException {
super(lockFactory);
- Files.createDirectories(path); // create directory, if it doesnt exist
+ Files.createDirectories(path); // create directory, if it doesn't exist
directory = path.toRealPath();
}
@@ -208,7 +203,6 @@ public abstract class FSDirectory extend
public void deleteFile(String name) throws IOException {
ensureOpen();
Files.delete(directory.resolve(name));
- staleFiles.remove(name);
}
/** Creates an IndexOutput for the file with the given name. */
@@ -224,25 +218,13 @@ public abstract class FSDirectory extend
Files.deleteIfExists(directory.resolve(name)); // delete existing, if any
}
- /**
- * Sub classes should call this method on closing an open {@link IndexOutput}, reporting the name of the file
- * that was closed. {@code FSDirectory} needs this information to take care of syncing stale files.
- */
- protected void onIndexOutputClosed(String name) {
- staleFiles.add(name);
- }
-
@Override
public void sync(Collection<String> names) throws IOException {
ensureOpen();
- Set<String> toSync = new HashSet<>(names);
- toSync.retainAll(staleFiles);
- for (String name : toSync) {
+ for (String name : names) {
fsync(name);
}
-
- staleFiles.removeAll(toSync);
}
@Override
@@ -279,8 +261,6 @@ public abstract class FSDirectory extend
*/
static final int CHUNK_SIZE = 8192;
- private final String name;
-
public FSIndexOutput(String name) throws IOException {
super("FSIndexOutput(path=\"" + directory.resolve(name) + "\")", new FilterOutputStream(Files.newOutputStream(directory.resolve(name))) {
// This implementation ensures, that we never write more than CHUNK_SIZE bytes:
@@ -294,16 +274,6 @@ public abstract class FSDirectory extend
}
}
}, CHUNK_SIZE);
- this.name = name;
- }
-
- @Override
- public void close() throws IOException {
- try {
- onIndexOutputClosed(name);
- } finally {
- super.close();
- }
}
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/FilterDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/FilterDirectory.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/FilterDirectory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/FilterDirectory.java Sun Jan 4 14:53:12 2015
@@ -31,6 +31,15 @@ import java.util.Collection;
* @lucene.internal */
public class FilterDirectory extends Directory {
+ /** Get the wrapped instance by <code>dir</code> as long as this reader is
+ * an instance of {@link FilterDirectory}. */
+ public static Directory unwrap(Directory dir) {
+ while (dir instanceof FilterDirectory) {
+ dir = ((FilterDirectory) dir).in;
+ }
+ return dir;
+ }
+
protected final Directory in;
/** Sole constructor, typically called from sub-classes. */
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java Sun Jan 4 14:53:12 2015
@@ -244,7 +244,7 @@ public class MMapDirectory extends FSDir
final String originalMessage;
final Throwable originalCause;
if (ioe.getCause() instanceof OutOfMemoryError) {
- // nested OOM confuses users, because its "incorrect", just print a plain message:
+ // nested OOM confuses users, because it's "incorrect", just print a plain message:
originalMessage = "Map failed";
originalCause = null;
} else {
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java Sun Jan 4 14:53:12 2015
@@ -200,11 +200,21 @@ public class NRTCachingDirectory extends
// it for defensive reasons... or in case the app is
// doing something custom (creating outputs directly w/o
// using IndexWriter):
- for(String fileName : cache.listAll()) {
- unCache(fileName);
+ boolean success = false;
+ try {
+ if (cache.isOpen) {
+ for(String fileName : cache.listAll()) {
+ unCache(fileName);
+ }
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(cache, in);
+ } else {
+ IOUtils.closeWhileHandlingException(cache, in);
+ }
}
- cache.close();
- in.close();
}
/** Subclass can override this to customize logic; return
@@ -260,7 +270,7 @@ public class NRTCachingDirectory extends
}
@Override
- public Iterable<? extends Accountable> getChildResources() {
+ public Collection<Accountable> getChildResources() {
return Collections.singleton(Accountables.namedAccountable("cache", cache));
}
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/NativeFSLockFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/NativeFSLockFactory.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/NativeFSLockFactory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/NativeFSLockFactory.java Sun Jan 4 14:53:12 2015
@@ -114,7 +114,7 @@ public final class NativeFSLockFactory e
Files.createFile(path);
} catch (IOException ignore) {
// we must create the file to have a truly canonical path.
- // if its already created, we don't care. if it cant be created, it will fail below.
+ // if it's already created, we don't care. if it cant be created, it will fail below.
}
final Path canonicalPath = path.toRealPath();
// Make sure nobody else in-process has this lock held
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java Sun Jan 4 14:53:12 2015
@@ -30,6 +30,7 @@ public class OutputStreamIndexOutput ext
private final BufferedOutputStream os;
private long bytesWritten = 0L;
+ private boolean flushedOnClose = false;
/**
* Creates a new {@link OutputStreamIndexOutput} with the given buffer size.
@@ -58,9 +59,14 @@ public class OutputStreamIndexOutput ext
try (final OutputStream o = os) {
// We want to make sure that os.flush() was running before close:
// BufferedOutputStream may ignore IOExceptions while flushing on close().
- // TODO: this is no longer an issue in Java 8:
- // http://hg.openjdk.java.net/jdk8/tl/jdk/rev/759aa847dcaf
- o.flush();
+ // We keep this also in Java 8, although it claims to be fixed there,
+ // because there are more bugs around this! See:
+ // # https://bugs.openjdk.java.net/browse/JDK-7015589
+ // # https://bugs.openjdk.java.net/browse/JDK-8054565
+ if (!flushedOnClose) {
+ flushedOnClose = true; // set this BEFORE calling flush!
+ o.flush();
+ }
}
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java Sun Jan 4 14:53:12 2015
@@ -94,7 +94,7 @@ public class RAMDirectory extends BaseDi
private RAMDirectory(Directory dir, boolean closeDir, IOContext context) throws IOException {
this();
for (String file : dir.listAll()) {
- dir.copy(this, file, file, context);
+ copyFrom(dir, file, file, context);
}
if (closeDir) {
dir.close();
@@ -105,7 +105,7 @@ public class RAMDirectory extends BaseDi
public final String[] listAll() {
ensureOpen();
// NOTE: this returns a "weakly consistent view". Unless we change Dir API, keep this,
- // and do not synchronize or anything stronger. its great for testing!
+ // and do not synchronize or anything stronger. it's great for testing!
// NOTE: fileMap.keySet().toArray(new String[0]) is broken in non Sun JDKs,
// and the code below is resilient to map changes during the array population.
Set<String> fileNames = fileMap.keySet();
@@ -143,7 +143,7 @@ public class RAMDirectory extends BaseDi
}
@Override
- public Iterable<? extends Accountable> getChildResources() {
+ public Collection<Accountable> getChildResources() {
return Accountables.namedAccountables("file", fileMap);
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java Sun Jan 4 14:53:12 2015
@@ -18,6 +18,7 @@ package org.apache.lucene.store;
*/
import java.io.IOException;
+import java.util.Collection;
import java.util.Collections;
import java.util.zip.CRC32;
import java.util.zip.Checksum;
@@ -193,7 +194,7 @@ public class RAMOutputStream extends Ind
}
@Override
- public Iterable<? extends Accountable> getChildResources() {
+ public Collection<Accountable> getChildResources() {
return Collections.singleton(Accountables.namedAccountable("file", file));
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/RateLimitedDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/RateLimitedDirectoryWrapper.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/RateLimitedDirectoryWrapper.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/RateLimitedDirectoryWrapper.java Sun Jan 4 14:53:12 2015
@@ -51,9 +51,9 @@ public final class RateLimitedDirectoryW
}
@Override
- public void copy(Directory to, String src, String dest, IOContext context) throws IOException {
+ public void copyFrom(Directory from, String src, String dest, IOContext context) throws IOException {
ensureOpen();
- in.copy(to, src, dest, context);
+ in.copyFrom(from, src, dest, context);
}
private RateLimiter getRateLimiter(IOContext.Context context) {
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java Sun Jan 4 14:53:12 2015
@@ -34,20 +34,30 @@ public final class TrackingDirectoryWrap
@Override
public void deleteFile(String name) throws IOException {
- createdFileNames.remove(name);
in.deleteFile(name);
+ createdFileNames.remove(name);
}
@Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
+ IndexOutput output = in.createOutput(name, context);
createdFileNames.add(name);
- return in.createOutput(name, context);
+ return output;
}
@Override
- public void copy(Directory to, String src, String dest, IOContext context) throws IOException {
+ public void copyFrom(Directory from, String src, String dest, IOContext context) throws IOException {
+ in.copyFrom(from, src, dest, context);
createdFileNames.add(dest);
- in.copy(to, src, dest, context);
+ }
+
+ @Override
+ public void renameFile(String source, String dest) throws IOException {
+ in.renameFile(source, dest);
+ synchronized (createdFileNames) {
+ createdFileNames.add(dest);
+ createdFileNames.remove(source);
+ }
}
// maybe clone before returning.... all callers are
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/Accountable.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/Accountable.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/Accountable.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/Accountable.java Sun Jan 4 14:53:12 2015
@@ -17,6 +17,7 @@ package org.apache.lucene.util;
* limitations under the License.
*/
+import java.util.Collection;
import java.util.Collections;
/**
@@ -36,7 +37,7 @@ public interface Accountable {
* The result should be a point-in-time snapshot (to avoid race conditions).
* @see Accountables
*/
- default Iterable<? extends Accountable> getChildResources() {
+ default Collection<Accountable> getChildResources() {
return Collections.emptyList();
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/Accountables.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/Accountables.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/Accountables.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/Accountables.java Sun Jan 4 14:53:12 2015
@@ -112,15 +112,17 @@ public class Accountables {
}
/**
- * Returns an accountable with the provided description, bytes, and children.
+ * Returns an accountable with the provided description, children and bytes.
* <p>
* The resource descriptions are constructed in this format:
* {@code description [toString()]}
* <p>
* This is a point-in-time type safe view: consumers
- * will not be able to cast or manipulate the resources in any way..
+ * will not be able to cast or manipulate the resources in any way, provided
+ * that the passed in children Accountables (and all their descendants) were created
+ * with one of the namedAccountable functions.
*/
- private static Accountable namedAccountable(final String description, final Iterable<? extends Accountable> children, final long bytes) {
+ public static Accountable namedAccountable(final String description, final Collection<Accountable> children, final long bytes) {
return new Accountable() {
@Override
public long ramBytesUsed() {
@@ -128,7 +130,7 @@ public class Accountables {
}
@Override
- public Iterable<? extends Accountable> getChildResources() {
+ public Collection<Accountable> getChildResources() {
return children;
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java Sun Jan 4 14:53:12 2015
@@ -107,6 +107,11 @@ public class BitDocIdSet extends DocIdSe
this(maxDoc, false);
}
+ // pkg-private for testing
+ boolean dense() {
+ return denseSet != null;
+ }
+
/**
* Add the content of the provided {@link DocIdSetIterator} to this builder.
*/
@@ -157,8 +162,8 @@ public class BitDocIdSet extends DocIdSe
public void andNot(DocIdSetIterator it) throws IOException {
if (denseSet != null) {
denseSet.andNot(it);
- } else if (denseSet != null) {
- denseSet.andNot(it);
+ } else if (sparseSet != null) {
+ sparseSet.andNot(it);
}
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/FrequencyTrackingRingBuffer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/FrequencyTrackingRingBuffer.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/FrequencyTrackingRingBuffer.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/FrequencyTrackingRingBuffer.java Sun Jan 4 14:53:12 2015
@@ -17,75 +17,202 @@ package org.apache.lucene.util;
* limitations under the License.
*/
-import java.util.ArrayDeque;
-import java.util.Collections;
-import java.util.Deque;
+import java.util.Arrays;
+import java.util.HashMap;
import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentMap;
/**
- * A ring buffer that tracks the frequency of the items that it contains.
- * This is typically useful to track popular recently-used items.
+ * A ring buffer that tracks the frequency of the integers that it contains.
+ * This is typically useful to track the hash codes of popular recently-used
+ * items.
*
- * This class is thread-safe.
+ * This data-structure requires 22 bytes per entry on average (between 16 and
+ * 28).
*
* @lucene.internal
*/
-public final class FrequencyTrackingRingBuffer<T> {
+public final class FrequencyTrackingRingBuffer implements Accountable {
- private final int maxSize;
- private final Deque<T> ringBuffer;
- private final ConcurrentMap<T, Integer> frequencies;
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FrequencyTrackingRingBuffer.class);
- /** Create a new ring buffer that will contain at most <code>size</code> items. */
- public FrequencyTrackingRingBuffer(int maxSize) {
+ private final int maxSize;
+ private final int[] buffer;
+ private int position;
+ private final IntBag frequencies;
+
+ /** Create a new ring buffer that will contain at most <code>maxSize</code> items.
+ * This buffer will initially contain <code>maxSize</code> times the
+ * <code>sentinel</code> value. */
+ public FrequencyTrackingRingBuffer(int maxSize, int sentinel) {
+ if (maxSize < 2) {
+ throw new IllegalArgumentException("maxSize must be at least 2");
+ }
this.maxSize = maxSize;
- this.ringBuffer = new ArrayDeque<>(maxSize);
- this.frequencies = new ConcurrentHashMap<>();
+ buffer = new int[maxSize];
+ position = 0;
+ frequencies = new IntBag(maxSize);
+
+ Arrays.fill(buffer, sentinel);
+ for (int i = 0; i < maxSize; ++i) {
+ frequencies.add(sentinel);
+ }
+ assert frequencies.frequency(sentinel) == maxSize;
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return BASE_RAM_BYTES_USED
+ + frequencies.ramBytesUsed()
+ + RamUsageEstimator.sizeOf(buffer);
}
/**
* Add a new item to this ring buffer, potentially removing the oldest
* entry from this buffer if it is already full.
*/
- public synchronized void add(T item) {
- // we need this method to be protected by a lock since it is important for
- // correctness that the ring buffer and the frequencies table have
- // consistent content
- if (item == null) {
- throw new IllegalArgumentException("null items are not supported");
- }
- assert ringBuffer.size() <= maxSize;
- if (ringBuffer.size() == maxSize) {
- // evict the oldest entry
- final T removed = ringBuffer.removeFirst();
- final int newFrequency = frequency(removed) - 1;
- if (newFrequency == 0) {
- // free for GC
- frequencies.remove(removed);
- } else {
- frequencies.put(removed, newFrequency);
- }
+ public void add(int i) {
+ // remove the previous value
+ final int removed = buffer[position];
+ final boolean removedFromBag = frequencies.remove(removed);
+ assert removedFromBag;
+ // add the new value
+ buffer[position] = i;
+ frequencies.add(i);
+ // increment the position
+ position += 1;
+ if (position == maxSize) {
+ position = 0;
}
-
- // add the new entry and update frequencies
- ringBuffer.addLast(item);
- frequencies.put(item, frequency(item) + 1);
}
/**
- * Returns the frequency of the provided item in the ring buffer.
+ * Returns the frequency of the provided key in the ring buffer.
*/
- public int frequency(T item) {
- // The use of a concurrent hash map allows us to not use a lock for this read-only method
- final Integer freq = frequencies.get(item);
- return freq == null ? 0 : freq;
+ public int frequency(int key) {
+ return frequencies.frequency(key);
}
// pkg-private for testing
- Map<T, Integer> asFrequencyMap() {
- return Collections.unmodifiableMap(frequencies);
+ Map<Integer, Integer> asFrequencyMap() {
+ return frequencies.asMap();
+ }
+
+ /**
+ * A bag of integers.
+ * Since in the context of the ring buffer the maximum size is known up-front
+ * there is no need to worry about resizing the underlying storage.
+ */
+ private static class IntBag implements Accountable {
+
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(IntBag.class);
+
+ private final int[] keys;
+ private final int[] freqs;
+ private final int mask;
+
+ IntBag(int maxSize) {
+ // load factor of 2/3
+ int capacity = Math.max(2, maxSize * 3 / 2);
+ // round up to the next power of two
+ capacity = Integer.highestOneBit(capacity - 1) << 1;
+ assert capacity > maxSize;
+ keys = new int[capacity];
+ freqs = new int[capacity];
+ mask = capacity - 1;
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return BASE_RAM_BYTES_USED
+ + RamUsageEstimator.sizeOf(keys)
+ + RamUsageEstimator.sizeOf(freqs);
+ }
+
+ /** Return the frequency of the give key in the bag. */
+ int frequency(int key) {
+ for (int slot = key & mask; ; slot = (slot + 1) & mask) {
+ if (keys[slot] == key) {
+ return freqs[slot];
+ } else if (freqs[slot] == 0) {
+ return 0;
+ }
+ }
+ }
+
+ /** Increment the frequency of the given key by 1 and return its new frequency. */
+ int add(int key) {
+ for (int slot = key & mask; ; slot = (slot + 1) & mask) {
+ if (freqs[slot] == 0) {
+ keys[slot] = key;
+ return freqs[slot] = 1;
+ } else if (keys[slot] == key) {
+ return ++freqs[slot];
+ }
+ }
+ }
+
+ /** Decrement the frequency of the given key by one, or do nothing if the
+ * key is not present in the bag. Returns true iff the key was contained
+ * in the bag. */
+ boolean remove(int key) {
+ for (int slot = key & mask; ; slot = (slot + 1) & mask) {
+ if (freqs[slot] == 0) {
+ // no such key in the bag
+ return false;
+ } else if (keys[slot] == key) {
+ final int newFreq = --freqs[slot];
+ if (newFreq == 0) { // removed
+ relocateAdjacentKeys(slot);
+ }
+ return true;
+ }
+ }
+ }
+
+ private void relocateAdjacentKeys(int freeSlot) {
+ for (int slot = (freeSlot + 1) & mask; ; slot = (slot + 1) & mask) {
+ final int freq = freqs[slot];
+ if (freq == 0) {
+ // end of the collision chain, we're done
+ break;
+ }
+ final int key = keys[slot];
+ // the slot where <code>key</code> should be if there were no collisions
+ final int expectedSlot = key & mask;
+ // if the free slot is between the expected slot and the slot where the
+ // key is, then we can relocate there
+ if (between(expectedSlot, slot, freeSlot)) {
+ keys[freeSlot] = key;
+ freqs[freeSlot] = freq;
+ // slot is the new free slot
+ freqs[slot] = 0;
+ freeSlot = slot;
+ }
+ }
+ }
+
+ /** Given a chain of occupied slots between <code>chainStart</code>
+ * and <code>chainEnd</code>, return whether <code>slot</code> is
+ * between the start and end of the chain. */
+ private static boolean between(int chainStart, int chainEnd, int slot) {
+ if (chainStart <= chainEnd) {
+ return chainStart <= slot && slot <= chainEnd;
+ } else {
+ // the chain is across the end of the array
+ return slot >= chainStart || slot <= chainEnd;
+ }
+ }
+
+ Map<Integer, Integer> asMap() {
+ Map<Integer, Integer> map = new HashMap<>();
+ for (int i = 0; i < keys.length; ++i) {
+ if (freqs[i] > 0) {
+ map.put(keys[i], freqs[i]);
+ }
+ }
+ return map;
+ }
+
}
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/IOUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/IOUtils.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/IOUtils.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/IOUtils.java Sun Jan 4 14:53:12 2015
@@ -17,8 +17,6 @@ package org.apache.lucene.util;
* limitations under the License.
*/
-import org.apache.lucene.store.Directory;
-
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
@@ -30,6 +28,7 @@ import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
+import java.nio.file.FileStore;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Files;
@@ -41,6 +40,12 @@ import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.FileSwitchDirectory;
+import org.apache.lucene.store.FilterDirectory;
+import org.apache.lucene.store.RAMDirectory;
+
/** This class emulates the new Java 7 "Try-With-Resources" statement.
* Remove once Lucene is on Java 7.
* @lucene.internal */
@@ -135,7 +140,7 @@ public final class IOUtils {
* the read charset doesn't match the expected {@link Charset}.
* <p>
* Decoding readers are useful to load configuration files, stopword lists or synonym files
- * to detect character set problems. However, its not recommended to use as a common purpose
+ * to detect character set problems. However, it's not recommended to use as a common purpose
* reader.
*
* @param stream the stream to wrap in a reader
@@ -155,7 +160,7 @@ public final class IOUtils {
* the read charset doesn't match the expected {@link Charset}.
* <p>
* Decoding readers are useful to load configuration files, stopword lists or synonym files
- * to detect character set problems. However, its not recommended to use as a common purpose
+ * to detect character set problems. However, it's not recommended to use as a common purpose
* reader.
* @param clazz the class used to locate the resource
* @param resource the resource name to load
@@ -412,4 +417,129 @@ public final class IOUtils {
// Throw original exception
throw exc;
}
+
+ /** If the dir is an {@link FSDirectory} or wraps one via possibly
+ * nested {@link FilterDirectory} or {@link FileSwitchDirectory},
+ * this returns {@link #spins(Path)} for the wrapped directory,
+ * else, true.
+ *
+ * @throws IOException if {@code path} does not exist.
+ *
+ * @lucene.internal */
+ public static boolean spins(Directory dir) throws IOException {
+ dir = FilterDirectory.unwrap(dir);
+ if (dir instanceof FileSwitchDirectory) {
+ FileSwitchDirectory fsd = (FileSwitchDirectory) dir;
+ // Spinning is contagious:
+ return spins(fsd.getPrimaryDir()) || spins(fsd.getSecondaryDir());
+ } else if (dir instanceof RAMDirectory) {
+ return false;
+ } else if (dir instanceof FSDirectory) {
+ return spins(((FSDirectory) dir).getDirectory());
+ } else {
+ return true;
+ }
+ }
+
+ /** Rough Linux-only heuristics to determine whether the provided
+ * {@code Path} is backed by spinning storage. For example, this
+ * returns false if the disk is a solid-state disk.
+ *
+ * @param path a location to check which must exist. the mount point will be determined from this location.
+ * @return false if the storage is non-rotational (e.g. an SSD), or true if it is spinning or could not be determined
+ * @throws IOException if {@code path} does not exist.
+ *
+ * @lucene.internal */
+ public static boolean spins(Path path) throws IOException {
+ // resolve symlinks (this will throw exception if the path does not exist)
+ path = path.toRealPath();
+
+ // Super cowboy approach, but seems to work!
+ if (!Constants.LINUX) {
+ return true; // no detection
+ }
+
+ try {
+ return spinsLinux(path);
+ } catch (Exception exc) {
+ // our crazy heuristics can easily trigger SecurityException, AIOOBE, etc ...
+ return true;
+ }
+ }
+
+ // following methods are package-private for testing ONLY
+
+ // note: requires a real or fake linux filesystem!
+ static boolean spinsLinux(Path path) throws IOException {
+ FileStore store = getFileStore(path);
+
+ // if fs type is tmpfs, it doesn't spin.
+ // this won't have a corresponding block device
+ if ("tmpfs".equals(store.type())) {
+ return false;
+ }
+
+ // get block device name
+ String devName = getBlockDevice(store);
+ // not a device (e.g. NFS server)
+ if (!devName.startsWith("/")) {
+ return true;
+ }
+
+ // resolve any symlinks to real block device (e.g. LVM)
+ // /dev/sda0 -> sda0
+ // /devices/XXX -> sda0
+ devName = path.getRoot().resolve(devName).toRealPath().getFileName().toString();
+
+ // now read:
+ Path sysinfo = path.getRoot().resolve("sys/block");
+ Path devinfo = sysinfo.resolve(devName);
+
+ // tear away partition numbers until we find it.
+ while (!Files.exists(devinfo)) {
+ if (!devName.isEmpty() && Character.isDigit(devName.charAt(devName.length()-1))) {
+ devName = devName.substring(0, devName.length()-1);
+ } else {
+ break; // give up
+ }
+ devinfo = sysinfo.resolve(devName);
+ }
+
+ // read first byte from rotational, it's a 1 if it spins.
+ Path info = devinfo.resolve("queue/rotational");
+ try (InputStream stream = Files.newInputStream(info)) {
+ return stream.read() == '1';
+ }
+ }
+
+ // Files.getFileStore(Path) useless here!
+ // don't complain, just try it yourself
+ static FileStore getFileStore(Path path) throws IOException {
+ FileStore store = Files.getFileStore(path);
+ String mount = getMountPoint(store);
+
+ // find the "matching" FileStore from system list, it's the one we want.
+ for (FileStore fs : path.getFileSystem().getFileStores()) {
+ if (mount.equals(getMountPoint(fs))) {
+ return fs;
+ }
+ }
+
+ // fall back to crappy one we got from Files.getFileStore
+ return store;
+ }
+
+ // these are hacks that are not guaranteed
+ static String getMountPoint(FileStore store) {
+ String desc = store.toString();
+ return desc.substring(0, desc.lastIndexOf('(') - 1);
+ }
+
+ // these are hacks that are not guaranteed
+ static String getBlockDevice(FileStore store) {
+ String desc = store.toString();
+ int start = desc.lastIndexOf('(');
+ int end = desc.indexOf(')', start);
+ return desc.substring(start+1, end);
+ }
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java Sun Jan 4 14:53:12 2015
@@ -200,10 +200,9 @@ public class QueryBuilder {
int numTokens = 0;
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
- boolean hasMoreTokens = false;
-
+ boolean hasMoreTokens = false;
+
try (TokenStream source = analyzer.tokenStream(field, queryText)) {
- source.reset();
buffer = new CachingTokenFilter(source);
buffer.reset();
@@ -230,15 +229,21 @@ public class QueryBuilder {
} catch (IOException e) {
throw new RuntimeException("Error analyzing query text", e);
}
-
+
// rewind the buffer stream
- buffer.reset();
+ try {
+ if (numTokens > 0) {
+ buffer.reset();//will never throw; the buffer is cached
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef();
- if (numTokens == 0)
+ if (numTokens == 0) {
return null;
- else if (numTokens == 1) {
+ } else if (numTokens == 1) {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/RoaringDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/RoaringDocIdSet.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/RoaringDocIdSet.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/RoaringDocIdSet.java Sun Jan 4 14:53:12 2015
@@ -268,11 +268,8 @@ public class RoaringDocIdSet extends Doc
Iterator() throws IOException {
doc = -1;
- block = 0;
- while (docIdSets[block] == null) {
- block += 1;
- }
- sub = docIdSets[block].iterator();
+ block = -1;
+ sub = DocIdSetIterator.empty();
}
@Override
@@ -337,4 +334,8 @@ public class RoaringDocIdSet extends Doc
return cardinality;
}
+ @Override
+ public String toString() {
+ return "RoaringDocIdSet(cardinality=" + cardinality + ")";
+ }
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java Sun Jan 4 14:53:12 2015
@@ -515,4 +515,8 @@ public class SparseFixedBitSet extends B
return ramBytesUsed;
}
+ @Override
+ public String toString() {
+ return "SparseFixedBitSet(size=" + length + ",cardinality=~" + approximateCardinality();
+ }
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/UnicodeUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/UnicodeUtil.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/UnicodeUtil.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/UnicodeUtil.java Sun Jan 4 14:53:12 2015
@@ -123,7 +123,7 @@ public final class UnicodeUtil {
(UNI_SUR_HIGH_START << HALF_SHIFT) - UNI_SUR_LOW_START;
/** Maximum number of UTF8 bytes per UTF16 character. */
- public static final int MAX_UTF8_BYTES_PER_CHAR = 4;
+ public static final int MAX_UTF8_BYTES_PER_CHAR = 3;
/** Encode characters from a char[] source, starting at
* offset for length chars. It is the responsibility of the
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/automaton/createLevAutomata.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/automaton/createLevAutomata.py?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/automaton/createLevAutomata.py (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/automaton/createLevAutomata.py Sun Jan 4 14:53:12 2015
@@ -325,7 +325,7 @@ def main():
minErrors = []
for i in xrange(len(stateMap2)-1):
w('// %s -> %s' % (i, stateMap2[i]))
- # we replace t-notation as its not relevant here
+ # we replace t-notation as it's not relevant here
st = stateMap2[i].replace('t', '')
v = eval(st)
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/fst/FST.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/fst/FST.java Sun Jan 4 14:53:12 2015
@@ -22,9 +22,10 @@ import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
-import java.util.ArrayList;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -40,8 +41,6 @@ import org.apache.lucene.util.Accountabl
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Constants;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.fst.Builder.UnCompiledNode;
@@ -120,7 +119,8 @@ public final class FST<T> implements Acc
*/
final static int FIXED_ARRAY_NUM_ARCS_DEEP = 10;
- private int[] bytesPerArc = new int[0];
+ // Reused temporarily while building the FST:
+ private int[] reusedBytesPerArc = new int[0];
// Increment version to change it
private final static String FILE_FORMAT_NAME = "FST";
@@ -180,8 +180,6 @@ public final class FST<T> implements Acc
private final boolean allowArrayArcs;
private Arc<T> cachedRootArcs[];
- private Arc<T> assertingCachedRootArcs[]; // only set wit assert
-
/** Represents a single arc. */
public final static class Arc<T> {
@@ -443,12 +441,12 @@ public final class FST<T> implements Acc
size += inCounts.ramBytesUsed();
}
size += cachedArcsBytesUsed;
- size += RamUsageEstimator.sizeOf(bytesPerArc);
+ size += RamUsageEstimator.sizeOf(reusedBytesPerArc);
return size;
}
@Override
- public Iterable<? extends Accountable> getChildResources() {
+ public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
if (packed) {
resources.add(Accountables.namedAccountable("node ref to address", nodeRefToAddress));
@@ -487,26 +485,22 @@ public final class FST<T> implements Acc
}
}
- // Caches first 128 labels
+ // Optionally caches first 128 labels
@SuppressWarnings({"rawtypes","unchecked"})
private void cacheRootArcs() throws IOException {
- cachedRootArcs = (Arc<T>[]) new Arc[0x80];
- readRootArcs(cachedRootArcs);
- cachedArcsBytesUsed += ramBytesUsed(cachedRootArcs);
-
- assert setAssertingRootArcs(cachedRootArcs);
- assert assertRootArcs();
- }
-
- public void readRootArcs(Arc<T>[] arcs) throws IOException {
+ // We should only be called once per FST:
+ assert cachedArcsBytesUsed == 0;
+
final Arc<T> arc = new Arc<>();
getFirstArc(arc);
- final BytesReader in = getBytesReader();
if (targetHasArcs(arc)) {
+ final BytesReader in = getBytesReader();
+ Arc<T>[] arcs = (Arc<T>[]) new Arc[0x80];
readFirstRealTargetArc(arc.target, arc, in);
+ int count = 0;
while(true) {
assert arc.label != END_LABEL;
- if (arc.label < cachedRootArcs.length) {
+ if (arc.label < arcs.length) {
arcs[arc.label] = new Arc<T>().copyFrom(arc);
} else {
break;
@@ -515,43 +509,19 @@ public final class FST<T> implements Acc
break;
}
readNextRealArc(arc, in);
+ count++;
+ }
+
+ int cacheRAM = (int) ramBytesUsed(arcs);
+
+ // Don't cache if there are only a few arcs or if the cache would use > 20% RAM of the FST itself:
+ if (count >= FIXED_ARRAY_NUM_ARCS_SHALLOW && cacheRAM < ramBytesUsed()/5) {
+ cachedRootArcs = arcs;
+ cachedArcsBytesUsed = cacheRAM;
}
}
}
- @SuppressWarnings({"rawtypes","unchecked"})
- private boolean setAssertingRootArcs(Arc<T>[] arcs) throws IOException {
- assertingCachedRootArcs = (Arc<T>[]) new Arc[arcs.length];
- readRootArcs(assertingCachedRootArcs);
- cachedArcsBytesUsed *= 2;
- return true;
- }
-
- private boolean assertRootArcs() {
- assert cachedRootArcs != null;
- assert assertingCachedRootArcs != null;
- for (int i = 0; i < cachedRootArcs.length; i++) {
- final Arc<T> root = cachedRootArcs[i];
- final Arc<T> asserting = assertingCachedRootArcs[i];
- if (root != null) {
- assert root.arcIdx == asserting.arcIdx;
- assert root.bytesPerArc == asserting.bytesPerArc;
- assert root.flags == asserting.flags;
- assert root.label == asserting.label;
- assert root.nextArc == asserting.nextArc;
- assert root.nextFinalOutput.equals(asserting.nextFinalOutput);
- assert root.node == asserting.node;
- assert root.numArcs == asserting.numArcs;
- assert root.output.equals(asserting.output);
- assert root.posArcsStart == asserting.posArcsStart;
- assert root.target == asserting.target;
- } else {
- assert root == null && asserting == null;
- }
- }
- return true;
- }
-
public T getEmptyOutput() {
return emptyOutput;
}
@@ -701,8 +671,8 @@ public final class FST<T> implements Acc
final boolean doFixedArray = shouldExpand(nodeIn);
if (doFixedArray) {
//System.out.println(" fixedArray");
- if (bytesPerArc.length < nodeIn.numArcs) {
- bytesPerArc = new int[ArrayUtil.oversize(nodeIn.numArcs, 1)];
+ if (reusedBytesPerArc.length < nodeIn.numArcs) {
+ reusedBytesPerArc = new int[ArrayUtil.oversize(nodeIn.numArcs, 1)];
}
}
@@ -776,10 +746,10 @@ public final class FST<T> implements Acc
// but record how many bytes each one took, and max
// byte size:
if (doFixedArray) {
- bytesPerArc[arcIdx] = (int) (bytes.getPosition() - lastArcStart);
+ reusedBytesPerArc[arcIdx] = (int) (bytes.getPosition() - lastArcStart);
lastArcStart = bytes.getPosition();
- maxBytesPerArc = Math.max(maxBytesPerArc, bytesPerArc[arcIdx]);
- //System.out.println(" bytes=" + bytesPerArc[arcIdx]);
+ maxBytesPerArc = Math.max(maxBytesPerArc, reusedBytesPerArc[arcIdx]);
+ //System.out.println(" bytes=" + reusedBytesPerArc[arcIdx]);
}
}
@@ -830,12 +800,12 @@ public final class FST<T> implements Acc
bytes.skipBytes((int) (destPos - srcPos));
for(int arcIdx=nodeIn.numArcs-1;arcIdx>=0;arcIdx--) {
destPos -= maxBytesPerArc;
- srcPos -= bytesPerArc[arcIdx];
+ srcPos -= reusedBytesPerArc[arcIdx];
//System.out.println(" repack arcIdx=" + arcIdx + " srcPos=" + srcPos + " destPos=" + destPos);
if (srcPos != destPos) {
- //System.out.println(" copy len=" + bytesPerArc[arcIdx]);
- assert destPos > srcPos: "destPos=" + destPos + " srcPos=" + srcPos + " arcIdx=" + arcIdx + " maxBytesPerArc=" + maxBytesPerArc + " bytesPerArc[arcIdx]=" + bytesPerArc[arcIdx] + " nodeIn.numArcs=" + nodeIn.numArcs;
- bytes.copyBytes(srcPos, destPos, bytesPerArc[arcIdx]);
+ //System.out.println(" copy len=" + reusedBytesPerArc[arcIdx]);
+ assert destPos > srcPos: "destPos=" + destPos + " srcPos=" + srcPos + " arcIdx=" + arcIdx + " maxBytesPerArc=" + maxBytesPerArc + " reusedBytesPerArc[arcIdx]=" + reusedBytesPerArc[arcIdx] + " nodeIn.numArcs=" + nodeIn.numArcs;
+ bytes.copyBytes(srcPos, destPos, reusedBytesPerArc[arcIdx]);
}
}
}
@@ -1185,12 +1155,48 @@ public final class FST<T> implements Acc
return arc;
}
+ // LUCENE-5152: called only from asserts, to validate that the
+ // non-cached arc lookup would produce the same result, to
+ // catch callers that illegally modify shared structures with
+ // the result (we shallow-clone the Arc itself, but e.g. a BytesRef
+ // output is still shared):
+ private boolean assertRootCachedArc(int label, Arc<T> cachedArc) throws IOException {
+ Arc<T> arc = new Arc<>();
+ getFirstArc(arc);
+ BytesReader in = getBytesReader();
+ Arc<T> result = findTargetArc(label, arc, arc, in, false);
+ if (result == null) {
+ assert cachedArc == null;
+ } else {
+ assert cachedArc != null;
+ assert cachedArc.arcIdx == result.arcIdx;
+ assert cachedArc.bytesPerArc == result.bytesPerArc;
+ assert cachedArc.flags == result.flags;
+ assert cachedArc.label == result.label;
+ assert cachedArc.nextArc == result.nextArc;
+ assert cachedArc.nextFinalOutput.equals(result.nextFinalOutput);
+ assert cachedArc.node == result.node;
+ assert cachedArc.numArcs == result.numArcs;
+ assert cachedArc.output.equals(result.output);
+ assert cachedArc.posArcsStart == result.posArcsStart;
+ assert cachedArc.target == result.target;
+ }
+
+ return true;
+ }
+
// TODO: could we somehow [partially] tableize arc lookups
- // look automaton?
+ // like automaton?
/** Finds an arc leaving the incoming arc, replacing the arc in place.
* This returns null if the arc was not found, else the incoming arc. */
public Arc<T> findTargetArc(int labelToMatch, Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
+ return findTargetArc(labelToMatch, follow, arc, in, true);
+ }
+
+ /** Finds an arc leaving the incoming arc, replacing the arc in place.
+ * This returns null if the arc was not found, else the incoming arc. */
+ private Arc<T> findTargetArc(int labelToMatch, Arc<T> follow, Arc<T> arc, BytesReader in, boolean useRootArcCache) throws IOException {
if (labelToMatch == END_LABEL) {
if (follow.isFinal()) {
@@ -1211,12 +1217,13 @@ public final class FST<T> implements Acc
}
// Short-circuit if this arc is in the root arc cache:
- if (follow.target == startNode && labelToMatch < cachedRootArcs.length) {
-
+ if (useRootArcCache && cachedRootArcs != null && follow.target == startNode && labelToMatch < cachedRootArcs.length) {
+ final Arc<T> result = cachedRootArcs[labelToMatch];
+
// LUCENE-5152: detect tricky cases where caller
// modified previously returned cached root-arcs:
- assert assertRootArcs();
- final Arc<T> result = cachedRootArcs[labelToMatch];
+ assert assertRootCachedArc(labelToMatch, result);
+
if (result == null) {
return null;
} else {
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/fst/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/fst/package.html?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/fst/package.html (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/fst/package.html Sun Jan 4 14:53:12 2015
@@ -46,7 +46,7 @@ FST Construction example:
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
BytesRef scratchBytes = new BytesRef();
- IntsRef scratchInts = new IntsRef();
+ IntsRefBuilder scratchInts = new IntsRefBuilder();
for (int i = 0; i < inputValues.length; i++) {
scratchBytes.copyChars(inputValues[i]);
builder.add(Util.toIntsRef(scratchBytes, scratchInts), outputValues[i]);
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java Sun Jan 4 14:53:12 2015
@@ -42,11 +42,6 @@ final class Direct16 extends PackedInts.
for (int i = 0; i < valueCount; ++i) {
values[i] = in.readShort();
}
- // because packed ints have not always been byte-aligned
- final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 16) - 2L * valueCount);
- for (int i = 0; i < remaining; ++i) {
- in.readByte();
- }
}
@Override
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java Sun Jan 4 14:53:12 2015
@@ -42,11 +42,6 @@ final class Direct32 extends PackedInts.
for (int i = 0; i < valueCount; ++i) {
values[i] = in.readInt();
}
- // because packed ints have not always been byte-aligned
- final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 32) - 4L * valueCount);
- for (int i = 0; i < remaining; ++i) {
- in.readByte();
- }
}
@Override
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java Sun Jan 4 14:53:12 2015
@@ -40,11 +40,6 @@ final class Direct8 extends PackedInts.M
Direct8(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
this(valueCount);
in.readBytes(values, 0, valueCount);
- // because packed ints have not always been byte-aligned
- final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 8) - 1L * valueCount);
- for (int i = 0; i < remaining; ++i) {
- in.readByte();
- }
}
@Override
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java Sun Jan 4 14:53:12 2015
@@ -17,7 +17,6 @@ package org.apache.lucene.util.packed;
* limitations under the License.
*/
-import static org.apache.lucene.util.BitUtil.zigZagDecode;
import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MAX_BLOCK_SIZE;
import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_BLOCK_SIZE;
import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
@@ -50,14 +49,6 @@ public class MonotonicBlockPackedReader
/** Sole constructor. */
public static MonotonicBlockPackedReader of(IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) throws IOException {
- if (packedIntsVersion < PackedInts.VERSION_MONOTONIC_WITHOUT_ZIGZAG) {
- return new MonotonicBlockPackedReader(in, packedIntsVersion, blockSize, valueCount, direct) {
- @Override
- protected long decodeDelta(long delta) {
- return zigZagDecode(delta);
- }
- };
- }
return new MonotonicBlockPackedReader(in, packedIntsVersion, blockSize, valueCount, direct);
}
@@ -71,11 +62,7 @@ public class MonotonicBlockPackedReader
subReaders = new PackedInts.Reader[numBlocks];
long sumBPV = 0;
for (int i = 0; i < numBlocks; ++i) {
- if (packedIntsVersion < PackedInts.VERSION_MONOTONIC_WITHOUT_ZIGZAG) {
- minValues[i] = in.readVLong();
- } else {
- minValues[i] = in.readZLong();
- }
+ minValues[i] = in.readZLong();
averages[i] = Float.intBitsToFloat(in.readInt());
final int bitsPerValue = in.readVInt();
sumBPV += bitsPerValue;
@@ -103,11 +90,7 @@ public class MonotonicBlockPackedReader
assert index >= 0 && index < valueCount;
final int block = (int) (index >>> blockShift);
final int idx = (int) (index & blockMask);
- return expected(minValues[block], averages[block], idx) + decodeDelta(subReaders[block].get(idx));
- }
-
- protected long decodeDelta(long delta) {
- return delta;
+ return expected(minValues[block], averages[block], idx) + subReaders[block].get(idx);
}
/** Returns the number of values */
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java Sun Jan 4 14:53:12 2015
@@ -47,11 +47,6 @@ final class Packed16ThreeBlocks extends
for (int i = 0; i < 3 * valueCount; ++i) {
blocks[i] = in.readShort();
}
- // because packed ints have not always been byte-aligned
- final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 48) - 3L * valueCount * 2);
- for (int i = 0; i < remaining; ++i) {
- in.readByte();
- }
}
@Override
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java Sun Jan 4 14:53:12 2015
@@ -27,7 +27,7 @@ import org.apache.lucene.util.RamUsageEs
* Space optimized random access capable array of values with a fixed number of
* bits/value. Values are packed contiguously.
* </p><p>
- * The implementation strives to perform af fast as possible under the
+ * The implementation strives to perform as fast as possible under the
* constraint of contiguous bits, by avoiding expensive operations. This comes
* at the cost of code clarity.
* </p><p>
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java Sun Jan 4 14:53:12 2015
@@ -45,11 +45,6 @@ final class Packed8ThreeBlocks extends P
Packed8ThreeBlocks(int packedIntsVersion, DataInput in, int valueCount) throws IOException {
this(valueCount);
in.readBytes(blocks, 0, 3 * valueCount);
- // because packed ints have not always been byte-aligned
- final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, 24) - 3L * valueCount * 1);
- for (int i = 0; i < remaining; ++i) {
- in.readByte();
- }
}
@Override
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java Sun Jan 4 14:53:12 2015
@@ -65,9 +65,8 @@ public class PackedInts {
public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K
public final static String CODEC_NAME = "PackedInts";
- public final static int VERSION_START = 0; // PackedInts were long-aligned
- public final static int VERSION_BYTE_ALIGNED = 1;
public static final int VERSION_MONOTONIC_WITHOUT_ZIGZAG = 2;
+ public final static int VERSION_START = VERSION_MONOTONIC_WITHOUT_ZIGZAG;
public final static int VERSION_CURRENT = VERSION_MONOTONIC_WITHOUT_ZIGZAG;
/**
@@ -94,11 +93,7 @@ public class PackedInts {
@Override
public long byteCount(int packedIntsVersion, int valueCount, int bitsPerValue) {
- if (packedIntsVersion < VERSION_BYTE_ALIGNED) {
- return 8L * (long) Math.ceil((double) valueCount * bitsPerValue / 64);
- } else {
- return (long) Math.ceil((double) valueCount * bitsPerValue / 8);
- }
+ return (long) Math.ceil((double) valueCount * bitsPerValue / 8);
}
},
@@ -889,32 +884,7 @@ public class PackedInts {
checkVersion(version);
switch (format) {
case PACKED:
- final long byteCount = format.byteCount(version, valueCount, bitsPerValue);
- if (byteCount != format.byteCount(VERSION_CURRENT, valueCount, bitsPerValue)) {
- assert version == VERSION_START;
- final long endPointer = in.getFilePointer() + byteCount;
- // Some consumers of direct readers assume that reading the last value
- // will make the underlying IndexInput go to the end of the packed
- // stream, but this is not true because packed ints storage used to be
- // long-aligned and is now byte-aligned, hence this additional
- // condition when reading the last value
- return new DirectPackedReader(bitsPerValue, valueCount, in) {
- @Override
- public long get(int index) {
- final long result = super.get(index);
- if (index == valueCount - 1) {
- try {
- in.seek(endPointer);
- } catch (IOException e) {
- throw new IllegalStateException("failed", e);
- }
- }
- return result;
- }
- };
- } else {
- return new DirectPackedReader(bitsPerValue, valueCount, in);
- }
+ return new DirectPackedReader(bitsPerValue, valueCount, in);
case PACKED_SINGLE_BLOCK:
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
default:
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java Sun Jan 4 14:53:12 2015
@@ -39,7 +39,7 @@ final class PackedReaderIterator extends
this.format = format;
this.packedIntsVersion = packedIntsVersion;
bulkOperation = BulkOperation.of(format, bitsPerValue);
- iterations = iterations(mem);
+ iterations = bulkOperation.computeIterations(valueCount, mem);
assert valueCount == 0 || iterations > 0;
nextBlocks = new byte[iterations * bulkOperation.byteBlockCount()];
nextValues = new LongsRef(new long[iterations * bulkOperation.byteValueCount()], 0, 0);
@@ -47,15 +47,6 @@ final class PackedReaderIterator extends
position = -1;
}
- private int iterations(int mem) {
- int iterations = bulkOperation.computeIterations(valueCount, mem);
- if (packedIntsVersion < PackedInts.VERSION_BYTE_ALIGNED) {
- // make sure iterations is a multiple of 8
- iterations = (iterations + 7) & 0xFFFFFFF8;
- }
- return iterations;
- }
-
@Override
public LongsRef next(int count) throws IOException {
assert nextValues.length >= 0;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py Sun Jan 4 14:53:12 2015
@@ -78,11 +78,6 @@ if __name__ == '__main__':
f.write(" for (int i = 0; i < 3 * valueCount; ++i) {\n")
f.write(" blocks[i] = in.read%s();\n" %TYPES[bpv].title())
f.write(" }\n")
- f.write(" // because packed ints have not always been byte-aligned\n")
- f.write(" final int remaining = (int) (PackedInts.Format.PACKED.byteCount(packedIntsVersion, valueCount, %d) - 3L * valueCount * %d);\n" %(3 * bpv, bpv / 8))
- f.write(" for (int i = 0; i < remaining; ++i) {\n")
- f.write(" in.readByte();\n")
- f.write(" }\n")
f.write(" }\n")
f.write("""
Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java?rev=1649347&r1=1649346&r2=1649347&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java Sun Jan 4 14:53:12 2015
@@ -19,11 +19,13 @@ package org.apache.lucene.analysis;
import java.io.IOException;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
@@ -38,11 +40,18 @@ public class TestCachingTokenFilter exte
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = writer.newDocument();
+ AtomicInteger resetCount = new AtomicInteger(0);
TokenStream stream = new TokenStream() {
private int index = 0;
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ resetCount.incrementAndGet();
+ }
+
@Override
public boolean incrementToken() {
if (index == tokens.length) {
@@ -56,16 +65,20 @@ public class TestCachingTokenFilter exte
}
};
-
+
stream = new CachingTokenFilter(stream);
-
+
doc.addLargeText("preanalyzed", stream);
-
+
// 1) we consume all tokens twice before we add the doc to the index
+ assertFalse(((CachingTokenFilter)stream).isCached());
+ stream.reset();
+ assertFalse(((CachingTokenFilter) stream).isCached());
checkTokens(stream);
stream.reset();
checkTokens(stream);
-
+ assertTrue(((CachingTokenFilter)stream).isCached());
+
// 2) now add the document to the index and verify if all tokens are indexed
// don't reset the stream here, the DocumentWriter should do that implicitly
writer.addDocument(doc);
@@ -100,8 +113,24 @@ public class TestCachingTokenFilter exte
// 3) reset stream and consume tokens again
stream.reset();
checkTokens(stream);
+
+ assertEquals(1, resetCount.get());
+
dir.close();
}
+
+ public void testDoubleResetFails() throws IOException {
+ Analyzer analyzer = new MockAnalyzer(random());
+ final TokenStream input = analyzer.tokenStream("field", "abc");
+ CachingTokenFilter buffer = new CachingTokenFilter(input);
+ buffer.reset();//ok
+ try {
+ buffer.reset();//bad (this used to work which we don't want)
+ fail("didn't get expected exception");
+ } catch (IllegalStateException e) {
+ assertEquals("double reset()", e.getMessage());
+ }
+ }
private void checkTokens(TokenStream stream) throws IOException {
int count = 0;