You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by bu...@apache.org on 2008/05/20 09:40:56 UTC
svn commit: r658136 [2/4] - in /lucene/java/trunk/src:
java/org/apache/lucene/ java/org/apache/lucene/analysis/
java/org/apache/lucene/document/ java/org/apache/lucene/index/
java/org/apache/lucene/search/ java/org/apache/lucene/util/
test/org/apache/l...
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldReaderException.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldReaderException.java?rev=658136&r1=658135&r2=658136&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldReaderException.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldReaderException.java Tue May 20 00:40:54 2008
@@ -1,79 +1,79 @@
-package org.apache.lucene.index;
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- *
- *
- **/
-public class FieldReaderException extends RuntimeException{
- /**
- * Constructs a new runtime exception with <code>null</code> as its
- * detail message. The cause is not initialized, and may subsequently be
- * initialized by a call to {@link #initCause}.
- */
- public FieldReaderException() {
- }
-
- /**
- * Constructs a new runtime exception with the specified cause and a
- * detail message of <tt>(cause==null ? null : cause.toString())</tt>
- * (which typically contains the class and detail message of
- * <tt>cause</tt>).
- * <p>
- * This constructor is useful for runtime exceptions
- * that are little more than wrappers for other throwables.
- *
- * @param cause the cause (which is saved for later retrieval by the
- * {@link #getCause()} method). (A <tt>null</tt> value is
- * permitted, and indicates that the cause is nonexistent or
- * unknown.)
- * @since 1.4
- */
- public FieldReaderException(Throwable cause) {
- super(cause);
- }
-
- /**
- * Constructs a new runtime exception with the specified detail message.
- * The cause is not initialized, and may subsequently be initialized by a
- * call to {@link #initCause}.
- *
- * @param message the detail message. The detail message is saved for
- * later retrieval by the {@link #getMessage()} method.
- */
- public FieldReaderException(String message) {
- super(message);
- }
-
- /**
- * Constructs a new runtime exception with the specified detail message and
- * cause. <p>Note that the detail message associated with
- * <code>cause</code> is <i>not</i> automatically incorporated in
- * this runtime exception's detail message.
- *
- * @param message the detail message (which is saved for later retrieval
- * by the {@link #getMessage()} method).
- * @param cause the cause (which is saved for later retrieval by the
- * {@link #getCause()} method). (A <tt>null</tt> value is
- * permitted, and indicates that the cause is nonexistent or
- * unknown.)
- * @since 1.4
- */
- public FieldReaderException(String message, Throwable cause) {
- super(message, cause);
- }
-}
+package org.apache.lucene.index;
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *
+ *
+ **/
+public class FieldReaderException extends RuntimeException{
+ /**
+ * Constructs a new runtime exception with <code>null</code> as its
+ * detail message. The cause is not initialized, and may subsequently be
+ * initialized by a call to {@link #initCause}.
+ */
+ public FieldReaderException() {
+ }
+
+ /**
+ * Constructs a new runtime exception with the specified cause and a
+ * detail message of <tt>(cause==null ? null : cause.toString())</tt>
+ * (which typically contains the class and detail message of
+ * <tt>cause</tt>).
+ * <p>
+ * This constructor is useful for runtime exceptions
+ * that are little more than wrappers for other throwables.
+ *
+ * @param cause the cause (which is saved for later retrieval by the
+ * {@link #getCause()} method). (A <tt>null</tt> value is
+ * permitted, and indicates that the cause is nonexistent or
+ * unknown.)
+ * @since 1.4
+ */
+ public FieldReaderException(Throwable cause) {
+ super(cause);
+ }
+
+ /**
+ * Constructs a new runtime exception with the specified detail message.
+ * The cause is not initialized, and may subsequently be initialized by a
+ * call to {@link #initCause}.
+ *
+ * @param message the detail message. The detail message is saved for
+ * later retrieval by the {@link #getMessage()} method.
+ */
+ public FieldReaderException(String message) {
+ super(message);
+ }
+
+ /**
+ * Constructs a new runtime exception with the specified detail message and
+ * cause. <p>Note that the detail message associated with
+ * <code>cause</code> is <i>not</i> automatically incorporated in
+ * this runtime exception's detail message.
+ *
+ * @param message the detail message (which is saved for later retrieval
+ * by the {@link #getMessage()} method).
+ * @param cause the cause (which is saved for later retrieval by the
+ * {@link #getCause()} method). (A <tt>null</tt> value is
+ * permitted, and indicates that the cause is nonexistent or
+ * unknown.)
+ * @since 1.4
+ */
+ public FieldReaderException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/FieldReaderException.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileDeleter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileNameFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/IndexFileNames.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/IndexModifier.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/MultiLevelSkipListReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/MultiLevelSkipListReader.java?rev=658136&r1=658135&r2=658136&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/MultiLevelSkipListReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/MultiLevelSkipListReader.java Tue May 20 00:40:54 2008
@@ -1,273 +1,273 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.lucene.store.BufferedIndexInput;
-import org.apache.lucene.store.IndexInput;
-
-/**
- * This abstract class reads skip lists with multiple levels.
- *
- * See {@link MultiLevelSkipListWriter} for the information about the encoding
- * of the multi level skip lists.
- *
- * Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)}
- * which defines the actual format of the skip data.
- */
-abstract class MultiLevelSkipListReader {
- // the maximum number of skip levels possible for this index
- private int maxNumberOfSkipLevels;
-
- // number of levels in this skip list
- private int numberOfSkipLevels;
-
- // Expert: defines the number of top skip levels to buffer in memory.
- // Reducing this number results in less memory usage, but possibly
- // slower performance due to more random I/Os.
- // Please notice that the space each level occupies is limited by
- // the skipInterval. The top level can not contain more than
- // skipLevel entries, the second top level can not contain more
- // than skipLevel^2 entries and so forth.
- private int numberOfLevelsToBuffer = 1;
-
- private int docCount;
- private boolean haveSkipped;
-
- private IndexInput[] skipStream; // skipStream for each level
- private long skipPointer[]; // the start pointer of each skip level
- private int skipInterval[]; // skipInterval of each level
- private int[] numSkipped; // number of docs skipped per level
-
- private int[] skipDoc; // doc id of current skip entry per level
- private int lastDoc; // doc id of last read skip entry with docId <= target
- private long[] childPointer; // child pointer of current skip entry per level
- private long lastChildPointer; // childPointer of last read skip entry with docId <= target
-
- private boolean inputIsBuffered;
-
- public MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
- this.skipStream = new IndexInput[maxSkipLevels];
- this.skipPointer = new long[maxSkipLevels];
- this.childPointer = new long[maxSkipLevels];
- this.numSkipped = new int[maxSkipLevels];
- this.maxNumberOfSkipLevels = maxSkipLevels;
- this.skipInterval = new int[maxSkipLevels];
- this.skipStream [0]= skipStream;
- this.inputIsBuffered = (skipStream instanceof BufferedIndexInput);
- this.skipInterval[0] = skipInterval;
- for (int i = 1; i < maxSkipLevels; i++) {
- // cache skip intervals
- this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval;
- }
- skipDoc = new int[maxSkipLevels];
- }
-
-
- /** Returns the id of the doc to which the last call of {@link #skipTo(int)}
- * has skipped. */
- int getDoc() {
- return lastDoc;
- }
-
-
- /** Skips entries to the first beyond the current whose document number is
- * greater than or equal to <i>target</i>. Returns the current doc count.
- */
- int skipTo(int target) throws IOException {
- if (!haveSkipped) {
- // first time, load skip levels
- loadSkipLevels();
- haveSkipped = true;
- }
-
- // walk up the levels until highest level is found that has a skip
- // for this target
- int level = 0;
- while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
- level++;
- }
-
- while (level >= 0) {
- if (target > skipDoc[level]) {
- if (!loadNextSkip(level)) {
- continue;
- }
- } else {
- // no more skips on this level, go down one level
- if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
- seekChild(level - 1);
- }
- level--;
- }
- }
-
- return numSkipped[0] - skipInterval[0] - 1;
- }
-
- private boolean loadNextSkip(int level) throws IOException {
- // we have to skip, the target document is greater than the current
- // skip list entry
- setLastSkipData(level);
-
- numSkipped[level] += skipInterval[level];
-
- if (numSkipped[level] > docCount) {
- // this skip list is exhausted
- skipDoc[level] = Integer.MAX_VALUE;
- if (numberOfSkipLevels > level) numberOfSkipLevels = level;
- return false;
- }
-
- // read next skip entry
- skipDoc[level] += readSkipData(level, skipStream[level]);
-
- if (level != 0) {
- // read the child pointer if we are not on the leaf level
- childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
- }
-
- return true;
-
- }
-
- /** Seeks the skip entry on the given level */
- protected void seekChild(int level) throws IOException {
- skipStream[level].seek(lastChildPointer);
- numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1];
- skipDoc[level] = lastDoc;
- if (level > 0) {
- childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
- }
- }
-
- void close() throws IOException {
- for (int i = 1; i < skipStream.length; i++) {
- if (skipStream[i] != null) {
- skipStream[i].close();
- }
- }
- }
-
- /** initializes the reader */
- void init(long skipPointer, int df) {
- this.skipPointer[0] = skipPointer;
- this.docCount = df;
- Arrays.fill(skipDoc, 0);
- Arrays.fill(numSkipped, 0);
- Arrays.fill(childPointer, 0);
-
- haveSkipped = false;
- for (int i = 1; i < numberOfSkipLevels; i++) {
- skipStream[i] = null;
- }
- }
-
- /** Loads the skip levels */
- private void loadSkipLevels() throws IOException {
- numberOfSkipLevels = docCount == 0 ? 0 : (int) Math.floor(Math.log(docCount) / Math.log(skipInterval[0]));
- if (numberOfSkipLevels > maxNumberOfSkipLevels) {
- numberOfSkipLevels = maxNumberOfSkipLevels;
- }
-
- skipStream[0].seek(skipPointer[0]);
-
- int toBuffer = numberOfLevelsToBuffer;
-
- for (int i = numberOfSkipLevels - 1; i > 0; i--) {
- // the length of the current level
- long length = skipStream[0].readVLong();
-
- // the start pointer of the current level
- skipPointer[i] = skipStream[0].getFilePointer();
- if (toBuffer > 0) {
- // buffer this level
- skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
- toBuffer--;
- } else {
- // clone this stream, it is already at the start of the current level
- skipStream[i] = (IndexInput) skipStream[0].clone();
- if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
- ((BufferedIndexInput) skipStream[i]).setBufferSize((int) length);
- }
-
- // move base stream beyond the current level
- skipStream[0].seek(skipStream[0].getFilePointer() + length);
- }
- }
-
- // use base stream for the lowest level
- skipPointer[0] = skipStream[0].getFilePointer();
- }
-
- /**
- * Subclasses must implement the actual skip data encoding in this method.
- *
- * @param level the level skip data shall be read from
- * @param skipStream the skip stream to read from
- */
- protected abstract int readSkipData(int level, IndexInput skipStream) throws IOException;
-
- /** Copies the values of the last read skip entry on this level */
- protected void setLastSkipData(int level) {
- lastDoc = skipDoc[level];
- lastChildPointer = childPointer[level];
- }
-
-
- /** used to buffer the top skip levels */
- private final static class SkipBuffer extends IndexInput {
- private byte[] data;
- private long pointer;
- private int pos;
-
- SkipBuffer(IndexInput input, int length) throws IOException {
- data = new byte[length];
- pointer = input.getFilePointer();
- input.readBytes(data, 0, length);
- }
-
- public void close() throws IOException {
- data = null;
- }
-
- public long getFilePointer() {
- return pointer + pos;
- }
-
- public long length() {
- return data.length;
- }
-
- public byte readByte() throws IOException {
- return data[pos++];
- }
-
- public void readBytes(byte[] b, int offset, int len) throws IOException {
- System.arraycopy(data, pos, b, offset, len);
- pos += len;
- }
-
- public void seek(long pos) throws IOException {
- this.pos = (int) (pos - pointer);
- }
-
- }
-}
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * This abstract class reads skip lists with multiple levels.
+ *
+ * See {@link MultiLevelSkipListWriter} for the information about the encoding
+ * of the multi level skip lists.
+ *
+ * Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)}
+ * which defines the actual format of the skip data.
+ */
+abstract class MultiLevelSkipListReader {
+ // the maximum number of skip levels possible for this index
+ private int maxNumberOfSkipLevels;
+
+ // number of levels in this skip list
+ private int numberOfSkipLevels;
+
+ // Expert: defines the number of top skip levels to buffer in memory.
+ // Reducing this number results in less memory usage, but possibly
+ // slower performance due to more random I/Os.
+ // Please notice that the space each level occupies is limited by
+ // the skipInterval. The top level can not contain more than
+ // skipLevel entries, the second top level can not contain more
+ // than skipLevel^2 entries and so forth.
+ private int numberOfLevelsToBuffer = 1;
+
+ private int docCount;
+ private boolean haveSkipped;
+
+ private IndexInput[] skipStream; // skipStream for each level
+ private long skipPointer[]; // the start pointer of each skip level
+ private int skipInterval[]; // skipInterval of each level
+ private int[] numSkipped; // number of docs skipped per level
+
+ private int[] skipDoc; // doc id of current skip entry per level
+ private int lastDoc; // doc id of last read skip entry with docId <= target
+ private long[] childPointer; // child pointer of current skip entry per level
+ private long lastChildPointer; // childPointer of last read skip entry with docId <= target
+
+ private boolean inputIsBuffered;
+
+ public MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
+ this.skipStream = new IndexInput[maxSkipLevels];
+ this.skipPointer = new long[maxSkipLevels];
+ this.childPointer = new long[maxSkipLevels];
+ this.numSkipped = new int[maxSkipLevels];
+ this.maxNumberOfSkipLevels = maxSkipLevels;
+ this.skipInterval = new int[maxSkipLevels];
+ this.skipStream [0]= skipStream;
+ this.inputIsBuffered = (skipStream instanceof BufferedIndexInput);
+ this.skipInterval[0] = skipInterval;
+ for (int i = 1; i < maxSkipLevels; i++) {
+ // cache skip intervals
+ this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval;
+ }
+ skipDoc = new int[maxSkipLevels];
+ }
+
+
+ /** Returns the id of the doc to which the last call of {@link #skipTo(int)}
+ * has skipped. */
+ int getDoc() {
+ return lastDoc;
+ }
+
+
+ /** Skips entries to the first beyond the current whose document number is
+ * greater than or equal to <i>target</i>. Returns the current doc count.
+ */
+ int skipTo(int target) throws IOException {
+ if (!haveSkipped) {
+ // first time, load skip levels
+ loadSkipLevels();
+ haveSkipped = true;
+ }
+
+ // walk up the levels until highest level is found that has a skip
+ // for this target
+ int level = 0;
+ while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
+ level++;
+ }
+
+ while (level >= 0) {
+ if (target > skipDoc[level]) {
+ if (!loadNextSkip(level)) {
+ continue;
+ }
+ } else {
+ // no more skips on this level, go down one level
+ if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
+ seekChild(level - 1);
+ }
+ level--;
+ }
+ }
+
+ return numSkipped[0] - skipInterval[0] - 1;
+ }
+
+ private boolean loadNextSkip(int level) throws IOException {
+ // we have to skip, the target document is greater than the current
+ // skip list entry
+ setLastSkipData(level);
+
+ numSkipped[level] += skipInterval[level];
+
+ if (numSkipped[level] > docCount) {
+ // this skip list is exhausted
+ skipDoc[level] = Integer.MAX_VALUE;
+ if (numberOfSkipLevels > level) numberOfSkipLevels = level;
+ return false;
+ }
+
+ // read next skip entry
+ skipDoc[level] += readSkipData(level, skipStream[level]);
+
+ if (level != 0) {
+ // read the child pointer if we are not on the leaf level
+ childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
+ }
+
+ return true;
+
+ }
+
+ /** Seeks the skip entry on the given level */
+ protected void seekChild(int level) throws IOException {
+ skipStream[level].seek(lastChildPointer);
+ numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1];
+ skipDoc[level] = lastDoc;
+ if (level > 0) {
+ childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
+ }
+ }
+
+ void close() throws IOException {
+ for (int i = 1; i < skipStream.length; i++) {
+ if (skipStream[i] != null) {
+ skipStream[i].close();
+ }
+ }
+ }
+
+ /** initializes the reader */
+ void init(long skipPointer, int df) {
+ this.skipPointer[0] = skipPointer;
+ this.docCount = df;
+ Arrays.fill(skipDoc, 0);
+ Arrays.fill(numSkipped, 0);
+ Arrays.fill(childPointer, 0);
+
+ haveSkipped = false;
+ for (int i = 1; i < numberOfSkipLevels; i++) {
+ skipStream[i] = null;
+ }
+ }
+
+ /** Loads the skip levels */
+ private void loadSkipLevels() throws IOException {
+ numberOfSkipLevels = docCount == 0 ? 0 : (int) Math.floor(Math.log(docCount) / Math.log(skipInterval[0]));
+ if (numberOfSkipLevels > maxNumberOfSkipLevels) {
+ numberOfSkipLevels = maxNumberOfSkipLevels;
+ }
+
+ skipStream[0].seek(skipPointer[0]);
+
+ int toBuffer = numberOfLevelsToBuffer;
+
+ for (int i = numberOfSkipLevels - 1; i > 0; i--) {
+ // the length of the current level
+ long length = skipStream[0].readVLong();
+
+ // the start pointer of the current level
+ skipPointer[i] = skipStream[0].getFilePointer();
+ if (toBuffer > 0) {
+ // buffer this level
+ skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
+ toBuffer--;
+ } else {
+ // clone this stream, it is already at the start of the current level
+ skipStream[i] = (IndexInput) skipStream[0].clone();
+ if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
+ ((BufferedIndexInput) skipStream[i]).setBufferSize((int) length);
+ }
+
+ // move base stream beyond the current level
+ skipStream[0].seek(skipStream[0].getFilePointer() + length);
+ }
+ }
+
+ // use base stream for the lowest level
+ skipPointer[0] = skipStream[0].getFilePointer();
+ }
+
+ /**
+ * Subclasses must implement the actual skip data encoding in this method.
+ *
+ * @param level the level skip data shall be read from
+ * @param skipStream the skip stream to read from
+ */
+ protected abstract int readSkipData(int level, IndexInput skipStream) throws IOException;
+
+ /** Copies the values of the last read skip entry on this level */
+ protected void setLastSkipData(int level) {
+ lastDoc = skipDoc[level];
+ lastChildPointer = childPointer[level];
+ }
+
+
+ /** used to buffer the top skip levels */
+ private final static class SkipBuffer extends IndexInput {
+ private byte[] data;
+ private long pointer;
+ private int pos;
+
+ SkipBuffer(IndexInput input, int length) throws IOException {
+ data = new byte[length];
+ pointer = input.getFilePointer();
+ input.readBytes(data, 0, length);
+ }
+
+ public void close() throws IOException {
+ data = null;
+ }
+
+ public long getFilePointer() {
+ return pointer + pos;
+ }
+
+ public long length() {
+ return data.length;
+ }
+
+ public byte readByte() throws IOException {
+ return data[pos++];
+ }
+
+ public void readBytes(byte[] b, int offset, int len) throws IOException {
+ System.arraycopy(data, pos, b, offset, len);
+ pos += len;
+ }
+
+ public void seek(long pos) throws IOException {
+ this.pos = (int) (pos - pointer);
+ }
+
+ }
+}
Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/MultiLevelSkipListReader.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/MultiLevelSkipListWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/MultiLevelSkipListWriter.java?rev=658136&r1=658135&r2=658136&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/MultiLevelSkipListWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/MultiLevelSkipListWriter.java Tue May 20 00:40:54 2008
@@ -1,151 +1,151 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.RAMOutputStream;
-
-/**
- * This abstract class writes skip lists with multiple levels.
- *
- * Example for skipInterval = 3:
- * c (skip level 2)
- * c c c (skip level 1)
- * x x x x x x x x x x (skip level 0)
- * d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
- * 3 6 9 12 15 18 21 24 27 30 (df)
- *
- * d - document
- * x - skip data
- * c - skip data with child pointer
- *
- * Skip level i contains every skipInterval-th entry from skip level i-1.
- * Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))).
- *
- * Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1.
- * This guarantess a logarithmic amount of skips to find the target document.
- *
- * While this class takes care of writing the different skip levels,
- * subclasses must define the actual format of the skip data.
- *
- */
-abstract class MultiLevelSkipListWriter {
- // number of levels in this skip list
- private int numberOfSkipLevels;
-
- // the skip interval in the list with level = 0
- private int skipInterval;
-
- // for every skip level a different buffer is used
- private RAMOutputStream[] skipBuffer;
-
- protected MultiLevelSkipListWriter(int skipInterval, int maxSkipLevels, int df) {
- this.skipInterval = skipInterval;
-
- // calculate the maximum number of skip levels for this document frequency
- numberOfSkipLevels = df == 0 ? 0 : (int) Math.floor(Math.log(df) / Math.log(skipInterval));
-
- // make sure it does not exceed maxSkipLevels
- if (numberOfSkipLevels > maxSkipLevels) {
- numberOfSkipLevels = maxSkipLevels;
- }
- }
-
- protected void init() {
- skipBuffer = new RAMOutputStream[numberOfSkipLevels];
- for (int i = 0; i < numberOfSkipLevels; i++) {
- skipBuffer[i] = new RAMOutputStream();
- }
- }
-
- protected void resetSkip() {
- // creates new buffers or empties the existing ones
- if (skipBuffer == null) {
- init();
- } else {
- for (int i = 0; i < skipBuffer.length; i++) {
- skipBuffer[i].reset();
- }
- }
- }
-
- /**
- * Subclasses must implement the actual skip data encoding in this method.
- *
- * @param level the level skip data shall be writting for
- * @param skipBuffer the skip buffer to write to
- */
- protected abstract void writeSkipData(int level, IndexOutput skipBuffer) throws IOException;
-
- /**
- * Writes the current skip data to the buffers. The current document frequency determines
- * the max level is skip data is to be written to.
- *
- * @param df the current document frequency
- * @throws IOException
- */
- void bufferSkip(int df) throws IOException {
- int numLevels;
-
- // determine max level
- for (numLevels = 0; (df % skipInterval) == 0 && numLevels < numberOfSkipLevels; df /= skipInterval) {
- numLevels++;
- }
-
- long childPointer = 0;
-
- for (int level = 0; level < numLevels; level++) {
- writeSkipData(level, skipBuffer[level]);
-
- long newChildPointer = skipBuffer[level].getFilePointer();
-
- if (level != 0) {
- // store child pointers for all levels except the lowest
- skipBuffer[level].writeVLong(childPointer);
- }
-
- //remember the childPointer for the next level
- childPointer = newChildPointer;
- }
- }
-
- /**
- * Writes the buffered skip lists to the given output.
- *
- * @param output the IndexOutput the skip lists shall be written to
- * @return the pointer the skip list starts
- */
- long writeSkip(IndexOutput output) throws IOException {
- long skipPointer = output.getFilePointer();
- if (skipBuffer == null || skipBuffer.length == 0) return skipPointer;
-
- for (int level = numberOfSkipLevels - 1; level > 0; level--) {
- long length = skipBuffer[level].getFilePointer();
- if (length > 0) {
- output.writeVLong(length);
- skipBuffer[level].writeTo(output);
- }
- }
- skipBuffer[0].writeTo(output);
-
- return skipPointer;
- }
-
-}
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
+
+/**
+ * This abstract class writes skip lists with multiple levels.
+ *
+ * Example for skipInterval = 3:
+ * c (skip level 2)
+ * c c c (skip level 1)
+ * x x x x x x x x x x (skip level 0)
+ * d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
+ * 3 6 9 12 15 18 21 24 27 30 (df)
+ *
+ * d - document
+ * x - skip data
+ * c - skip data with child pointer
+ *
+ * Skip level i contains every skipInterval-th entry from skip level i-1.
+ * Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))).
+ *
+ * Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1.
+ * This guarantess a logarithmic amount of skips to find the target document.
+ *
+ * While this class takes care of writing the different skip levels,
+ * subclasses must define the actual format of the skip data.
+ *
+ */
+abstract class MultiLevelSkipListWriter {
+ // number of levels in this skip list
+ private int numberOfSkipLevels;
+
+ // the skip interval in the list with level = 0
+ private int skipInterval;
+
+ // for every skip level a different buffer is used
+ private RAMOutputStream[] skipBuffer;
+
+ protected MultiLevelSkipListWriter(int skipInterval, int maxSkipLevels, int df) {
+ this.skipInterval = skipInterval;
+
+ // calculate the maximum number of skip levels for this document frequency
+ numberOfSkipLevels = df == 0 ? 0 : (int) Math.floor(Math.log(df) / Math.log(skipInterval));
+
+ // make sure it does not exceed maxSkipLevels
+ if (numberOfSkipLevels > maxSkipLevels) {
+ numberOfSkipLevels = maxSkipLevels;
+ }
+ }
+
+ protected void init() {
+ skipBuffer = new RAMOutputStream[numberOfSkipLevels];
+ for (int i = 0; i < numberOfSkipLevels; i++) {
+ skipBuffer[i] = new RAMOutputStream();
+ }
+ }
+
+ protected void resetSkip() {
+ // creates new buffers or empties the existing ones
+ if (skipBuffer == null) {
+ init();
+ } else {
+ for (int i = 0; i < skipBuffer.length; i++) {
+ skipBuffer[i].reset();
+ }
+ }
+ }
+
+ /**
+ * Subclasses must implement the actual skip data encoding in this method.
+ *
+ * @param level the level skip data shall be writting for
+ * @param skipBuffer the skip buffer to write to
+ */
+ protected abstract void writeSkipData(int level, IndexOutput skipBuffer) throws IOException;
+
+ /**
+ * Writes the current skip data to the buffers. The current document frequency determines
+ * the max level is skip data is to be written to.
+ *
+ * @param df the current document frequency
+ * @throws IOException
+ */
+ void bufferSkip(int df) throws IOException {
+ int numLevels;
+
+ // determine max level
+ for (numLevels = 0; (df % skipInterval) == 0 && numLevels < numberOfSkipLevels; df /= skipInterval) {
+ numLevels++;
+ }
+
+ long childPointer = 0;
+
+ for (int level = 0; level < numLevels; level++) {
+ writeSkipData(level, skipBuffer[level]);
+
+ long newChildPointer = skipBuffer[level].getFilePointer();
+
+ if (level != 0) {
+ // store child pointers for all levels except the lowest
+ skipBuffer[level].writeVLong(childPointer);
+ }
+
+ //remember the childPointer for the next level
+ childPointer = newChildPointer;
+ }
+ }
+
+ /**
+ * Writes the buffered skip lists to the given output.
+ *
+ * @param output the IndexOutput the skip lists shall be written to
+ * @return the pointer the skip list starts
+ */
+ long writeSkip(IndexOutput output) throws IOException {
+ long skipPointer = output.getFilePointer();
+ if (skipBuffer == null || skipBuffer.length == 0) return skipPointer;
+
+ for (int level = numberOfSkipLevels - 1; level > 0; level--) {
+ long length = skipBuffer[level].getFilePointer();
+ if (length > 0) {
+ output.writeVLong(length);
+ skipBuffer[level].writeTo(output);
+ }
+ }
+ skipBuffer[0].writeTo(output);
+
+ return skipPointer;
+ }
+
+}
Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/MultiLevelSkipListWriter.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java?rev=658136&r1=658135&r2=658136&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java Tue May 20 00:40:54 2008
@@ -1,163 +1,163 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Serializable;
-
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
-
- /**
- * A Payload is metadata that can be stored together with each occurrence
- * of a term. This metadata is stored inline in the posting list of the
- * specific term.
- * <p>
- * To store payloads in the index a {@link TokenStream} has to be used that
- * produces {@link Token}s containing payload data.
- * <p>
- * Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)}
- * to retrieve the payloads from the index.<br>
- *
- */
- public class Payload implements Serializable, Cloneable {
- /** the byte array containing the payload data */
- protected byte[] data;
-
- /** the offset within the byte array */
- protected int offset;
-
- /** the length of the payload data */
- protected int length;
-
- /** Creates an empty payload and does not allocate a byte array. */
- public Payload() {
- // nothing to do
- }
-
- /**
- * Creates a new payload with the the given array as data.
- * A reference to the passed-in array is held, i. e. no
- * copy is made.
- *
- * @param data the data of this payload
- */
- public Payload(byte[] data) {
- this(data, 0, data.length);
- }
-
- /**
- * Creates a new payload with the the given array as data.
- * A reference to the passed-in array is held, i. e. no
- * copy is made.
- *
- * @param data the data of this payload
- * @param offset the offset in the data byte array
- * @param length the length of the data
- */
- public Payload(byte[] data, int offset, int length) {
- if (offset < 0 || offset + length > data.length) {
- throw new IllegalArgumentException();
- }
- this.data = data;
- this.offset = offset;
- this.length = length;
- }
-
- /**
- * Sets this payloads data.
- * A reference to the passed-in array is held, i. e. no
- * copy is made.
- */
- public void setData(byte[] data) {
- setData(data, 0, data.length);
- }
-
- /**
- * Sets this payloads data.
- * A reference to the passed-in array is held, i. e. no
- * copy is made.
- */
- public void setData(byte[] data, int offset, int length) {
- this.data = data;
- this.offset = offset;
- this.length = length;
- }
-
- /**
- * Returns a reference to the underlying byte array
- * that holds this payloads data.
- */
- public byte[] getData() {
- return this.data;
- }
-
- /**
- * Returns the offset in the underlying byte array
- */
- public int getOffset() {
- return this.offset;
- }
-
- /**
- * Returns the length of the payload data.
- */
- public int length() {
- return this.length;
- }
-
- /**
- * Returns the byte at the given index.
- */
- public byte byteAt(int index) {
- if (0 <= index && index < this.length) {
- return this.data[this.offset + index];
- }
- throw new ArrayIndexOutOfBoundsException(index);
- }
-
- /**
- * Allocates a new byte array, copies the payload data into it and returns it.
- */
- public byte[] toByteArray() {
- byte[] retArray = new byte[this.length];
- System.arraycopy(this.data, this.offset, retArray, 0, this.length);
- return retArray;
- }
-
- /**
- * Copies the payload data to a byte array.
- *
- * @param target the target byte array
- * @param targetOffset the offset in the target byte array
- */
- public void copyTo(byte[] target, int targetOffset) {
- if (this.length > target.length + targetOffset) {
- throw new ArrayIndexOutOfBoundsException();
- }
- System.arraycopy(this.data, this.offset, target, targetOffset, this.length);
- }
-
- /**
- * Clones this payload by creating a copy of the underlying
- * byte array.
- */
- public Object clone() {
- Payload clone = new Payload(this.toByteArray());
- return clone;
- }
-}
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Serializable;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+
+ /**
+ * A Payload is metadata that can be stored together with each occurrence
+ * of a term. This metadata is stored inline in the posting list of the
+ * specific term.
+ * <p>
+ * To store payloads in the index a {@link TokenStream} has to be used that
+ * produces {@link Token}s containing payload data.
+ * <p>
+ * Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)}
+ * to retrieve the payloads from the index.<br>
+ *
+ */
+ public class Payload implements Serializable, Cloneable {
+ /** the byte array containing the payload data */
+ protected byte[] data;
+
+ /** the offset within the byte array */
+ protected int offset;
+
+ /** the length of the payload data */
+ protected int length;
+
+ /** Creates an empty payload and does not allocate a byte array. */
+ public Payload() {
+ // nothing to do
+ }
+
+ /**
+ * Creates a new payload with the the given array as data.
+ * A reference to the passed-in array is held, i. e. no
+ * copy is made.
+ *
+ * @param data the data of this payload
+ */
+ public Payload(byte[] data) {
+ this(data, 0, data.length);
+ }
+
+ /**
+ * Creates a new payload with the the given array as data.
+ * A reference to the passed-in array is held, i. e. no
+ * copy is made.
+ *
+ * @param data the data of this payload
+ * @param offset the offset in the data byte array
+ * @param length the length of the data
+ */
+ public Payload(byte[] data, int offset, int length) {
+ if (offset < 0 || offset + length > data.length) {
+ throw new IllegalArgumentException();
+ }
+ this.data = data;
+ this.offset = offset;
+ this.length = length;
+ }
+
+ /**
+ * Sets this payloads data.
+ * A reference to the passed-in array is held, i. e. no
+ * copy is made.
+ */
+ public void setData(byte[] data) {
+ setData(data, 0, data.length);
+ }
+
+ /**
+ * Sets this payloads data.
+ * A reference to the passed-in array is held, i. e. no
+ * copy is made.
+ */
+ public void setData(byte[] data, int offset, int length) {
+ this.data = data;
+ this.offset = offset;
+ this.length = length;
+ }
+
+ /**
+ * Returns a reference to the underlying byte array
+ * that holds this payloads data.
+ */
+ public byte[] getData() {
+ return this.data;
+ }
+
+ /**
+ * Returns the offset in the underlying byte array
+ */
+ public int getOffset() {
+ return this.offset;
+ }
+
+ /**
+ * Returns the length of the payload data.
+ */
+ public int length() {
+ return this.length;
+ }
+
+ /**
+ * Returns the byte at the given index.
+ */
+ public byte byteAt(int index) {
+ if (0 <= index && index < this.length) {
+ return this.data[this.offset + index];
+ }
+ throw new ArrayIndexOutOfBoundsException(index);
+ }
+
+ /**
+ * Allocates a new byte array, copies the payload data into it and returns it.
+ */
+ public byte[] toByteArray() {
+ byte[] retArray = new byte[this.length];
+ System.arraycopy(this.data, this.offset, retArray, 0, this.length);
+ return retArray;
+ }
+
+ /**
+ * Copies the payload data to a byte array.
+ *
+ * @param target the target byte array
+ * @param targetOffset the offset in the target byte array
+ */
+ public void copyTo(byte[] target, int targetOffset) {
+ if (this.length > target.length + targetOffset) {
+ throw new ArrayIndexOutOfBoundsException();
+ }
+ System.arraycopy(this.data, this.offset, target, targetOffset, this.length);
+ }
+
+ /**
+ * Clones this payload by creating a copy of the underlying
+ * byte array.
+ */
+ public Object clone() {
+ Payload clone = new Payload(this.toByteArray());
+ return clone;
+ }
+}
Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/src/java/org/apache/lucene/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/ComplexExplanation.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java?rev=658136&r1=658135&r2=658136&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java Tue May 20 00:40:54 2008
@@ -1,257 +1,257 @@
-package org.apache.lucene.search;
-
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.IndexReader;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.Collection;
-import java.util.Set;
-
-/**
- * A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum
- * score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries.
- * This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be
- * combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost,
- * not the sum of the field scores (as BooleanQuery would give).
- * If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching
- * another gets a higher score than "albino" matching both fields.
- * To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in
- * each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery.
- * The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that
- * include this term in only the best of those multiple fields, without confusing this with the better case of two different terms
- * in the multiple fields.
- * @author Chuck Williams
- */
-public class DisjunctionMaxQuery extends Query {
-
- /* The subqueries */
- private ArrayList disjuncts = new ArrayList();
-
- /* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */
- private float tieBreakerMultiplier = 0.0f;
-
- /** Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries.
- * @param tieBreakerMultiplier this score of each non-maximum disjunct for a document is multiplied by this weight
- * and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that
- * 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique
- * word in the lower scored field (i.e., one that is not in any higher scored field.
- */
- public DisjunctionMaxQuery(float tieBreakerMultiplier) {
- this.tieBreakerMultiplier = tieBreakerMultiplier;
- }
-
- /**
- * Creates a new DisjunctionMaxQuery
- * @param disjuncts a Collection<Query> of all the disjuncts to add
- * @param tieBreakerMultiplier the weight to give to each matching non-maximum disjunct
- */
- public DisjunctionMaxQuery(Collection disjuncts, float tieBreakerMultiplier) {
- this.tieBreakerMultiplier = tieBreakerMultiplier;
- add(disjuncts);
- }
-
- /** Add a subquery to this disjunction
- * @param query the disjunct added
- */
- public void add(Query query) {
- disjuncts.add(query);
- }
-
- /** Add a collection of disjuncts to this disjunction
- * via Iterable<Query>
- */
- public void add(Collection disjuncts) {
- this.disjuncts.addAll(disjuncts);
- }
-
- /** An Iterator<Query> over the disjuncts */
- public Iterator iterator() {
- return disjuncts.iterator();
- }
-
- /* The Weight for DisjunctionMaxQuery's, used to normalize, score and explain these queries */
- private class DisjunctionMaxWeight implements Weight {
-
- private Similarity similarity; // The similarity which we are associated.
- private ArrayList weights = new ArrayList(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts
-
- /* Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */
- public DisjunctionMaxWeight(Searcher searcher) throws IOException {
- this.similarity = searcher.getSimilarity();
- for (int i = 0; i < disjuncts.size(); i++)
- weights.add(((Query) disjuncts.get(i)).createWeight(searcher));
- }
-
- /* Return our associated DisjunctionMaxQuery */
- public Query getQuery() { return DisjunctionMaxQuery.this; }
-
- /* Return our boost */
- public float getValue() { return getBoost(); }
-
- /* Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */
- public float sumOfSquaredWeights() throws IOException {
- float max = 0.0f, sum = 0.0f;
- for (int i = 0; i < weights.size(); i++) {
- float sub = ((Weight) weights.get(i)).sumOfSquaredWeights();
- sum += sub;
- max = Math.max(max, sub);
- }
- return (((sum - max) * tieBreakerMultiplier * tieBreakerMultiplier) + max) * getBoost() * getBoost();
- }
-
- /* Apply the computed normalization factor to our subqueries */
- public void normalize(float norm) {
- norm *= getBoost(); // Incorporate our boost
- for (int i = 0 ; i < weights.size(); i++)
- ((Weight) weights.get(i)).normalize(norm);
- }
-
- /* Create the scorer used to score our associated DisjunctionMaxQuery */
- public Scorer scorer(IndexReader reader) throws IOException {
- DisjunctionMaxScorer result = new DisjunctionMaxScorer(tieBreakerMultiplier, similarity);
- for (int i = 0 ; i < weights.size(); i++) {
- Weight w = (Weight) weights.get(i);
- Scorer subScorer = w.scorer(reader);
- if (subScorer == null) return null;
- result.add(subScorer);
- }
- return result;
- }
-
- /* Explain the score we computed for doc */
- public Explanation explain(IndexReader reader, int doc) throws IOException {
- if ( disjuncts.size() == 1) return ((Weight) weights.get(0)).explain(reader,doc);
- ComplexExplanation result = new ComplexExplanation();
- float max = 0.0f, sum = 0.0f;
- result.setDescription(tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:");
- for (int i = 0 ; i < weights.size(); i++) {
- Explanation e = ((Weight) weights.get(i)).explain(reader, doc);
- if (e.isMatch()) {
- result.setMatch(Boolean.TRUE);
- result.addDetail(e);
- sum += e.getValue();
- max = Math.max(max, e.getValue());
- }
- }
- result.setValue(max + (sum - max)*tieBreakerMultiplier);
- return result;
- }
-
- } // end of DisjunctionMaxWeight inner class
-
- /* Create the Weight used to score us */
- protected Weight createWeight(Searcher searcher) throws IOException {
- return new DisjunctionMaxWeight(searcher);
- }
-
- /** Optimize our representation and our subqueries representations
- * @param reader the IndexReader we query
- * @return an optimized copy of us (which may not be a copy if there is nothing to optimize) */
- public Query rewrite(IndexReader reader) throws IOException {
- if (disjuncts.size() == 1) {
- Query singleton = (Query) disjuncts.get(0);
- Query result = singleton.rewrite(reader);
- if (getBoost() != 1.0f) {
- if (result == singleton) result = (Query)result.clone();
- result.setBoost(getBoost() * result.getBoost());
- }
- return result;
- }
- DisjunctionMaxQuery clone = null;
- for (int i = 0 ; i < disjuncts.size(); i++) {
- Query clause = (Query) disjuncts.get(i);
- Query rewrite = clause.rewrite(reader);
- if (rewrite != clause) {
- if (clone == null) clone = (DisjunctionMaxQuery)this.clone();
- clone.disjuncts.set(i, rewrite);
- }
- }
- if (clone != null) return clone;
- else return this;
- }
-
- /** Create a shallow copy of us -- used in rewriting if necessary
- * @return a copy of us (but reuse, don't copy, our subqueries) */
- public Object clone() {
- DisjunctionMaxQuery clone = (DisjunctionMaxQuery)super.clone();
- clone.disjuncts = (ArrayList)this.disjuncts.clone();
- return clone;
- }
-
-
- // inherit javadoc
- public void extractTerms(Set terms) {
- for (int i = 0; i < disjuncts.size(); i++) {
- ((Query)disjuncts.get(i)).extractTerms(terms);
- }
- }
-
-
- /** Prettyprint us.
- * @param field the field to which we are applied
- * @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost"
- */
- public String toString(String field) {
- StringBuffer buffer = new StringBuffer();
- buffer.append("(");
- for (int i = 0 ; i < disjuncts.size(); i++) {
- Query subquery = (Query) disjuncts.get(i);
- if (subquery instanceof BooleanQuery) { // wrap sub-bools in parens
- buffer.append("(");
- buffer.append(subquery.toString(field));
- buffer.append(")");
- }
- else buffer.append(subquery.toString(field));
- if (i != disjuncts.size()-1) buffer.append(" | ");
- }
- buffer.append(")");
- if (tieBreakerMultiplier != 0.0f) {
- buffer.append("~");
- buffer.append(tieBreakerMultiplier);
- }
- if (getBoost() != 1.0) {
- buffer.append("^");
- buffer.append(getBoost());
- }
- return buffer.toString();
- }
-
- /** Return true iff we represent the same query as o
- * @param o another object
- * @return true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us
- */
- public boolean equals(Object o) {
- if (! (o instanceof DisjunctionMaxQuery) ) return false;
- DisjunctionMaxQuery other = (DisjunctionMaxQuery)o;
- return this.getBoost() == other.getBoost()
- && this.tieBreakerMultiplier == other.tieBreakerMultiplier
- && this.disjuncts.equals(other.disjuncts);
- }
-
- /** Compute a hash code for hashing us
- * @return the hash code
- */
- public int hashCode() {
- return Float.floatToIntBits(getBoost())
- + Float.floatToIntBits(tieBreakerMultiplier)
- + disjuncts.hashCode();
- }
-
-}
+package org.apache.lucene.search;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Collection;
+import java.util.Set;
+
+/**
+ * A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum
+ * score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries.
+ * This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be
+ * combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost,
+ * not the sum of the field scores (as BooleanQuery would give).
+ * If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching
+ * another gets a higher score than "albino" matching both fields.
+ * To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in
+ * each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery.
+ * The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that
+ * include this term in only the best of those multiple fields, without confusing this with the better case of two different terms
+ * in the multiple fields.
+ * @author Chuck Williams
+ */
+public class DisjunctionMaxQuery extends Query {
+
+ /* The subqueries */
+ private ArrayList disjuncts = new ArrayList();
+
+ /* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */
+ private float tieBreakerMultiplier = 0.0f;
+
+ /** Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries.
+ * @param tieBreakerMultiplier this score of each non-maximum disjunct for a document is multiplied by this weight
+ * and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that
+ * 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique
+ * word in the lower scored field (i.e., one that is not in any higher scored field.
+ */
+ public DisjunctionMaxQuery(float tieBreakerMultiplier) {
+ this.tieBreakerMultiplier = tieBreakerMultiplier;
+ }
+
+ /**
+ * Creates a new DisjunctionMaxQuery
+ * @param disjuncts a Collection<Query> of all the disjuncts to add
+ * @param tieBreakerMultiplier the weight to give to each matching non-maximum disjunct
+ */
+ public DisjunctionMaxQuery(Collection disjuncts, float tieBreakerMultiplier) {
+ this.tieBreakerMultiplier = tieBreakerMultiplier;
+ add(disjuncts);
+ }
+
+ /** Add a subquery to this disjunction
+ * @param query the disjunct added
+ */
+ public void add(Query query) {
+ disjuncts.add(query);
+ }
+
+ /** Add a collection of disjuncts to this disjunction
+ * via Iterable<Query>
+ */
+ public void add(Collection disjuncts) {
+ this.disjuncts.addAll(disjuncts);
+ }
+
+ /** An Iterator<Query> over the disjuncts */
+ public Iterator iterator() {
+ return disjuncts.iterator();
+ }
+
+ /* The Weight for DisjunctionMaxQuery's, used to normalize, score and explain these queries */
+ private class DisjunctionMaxWeight implements Weight {
+
+ private Similarity similarity; // The similarity which we are associated.
+ private ArrayList weights = new ArrayList(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts
+
+ /* Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */
+ public DisjunctionMaxWeight(Searcher searcher) throws IOException {
+ this.similarity = searcher.getSimilarity();
+ for (int i = 0; i < disjuncts.size(); i++)
+ weights.add(((Query) disjuncts.get(i)).createWeight(searcher));
+ }
+
+ /* Return our associated DisjunctionMaxQuery */
+ public Query getQuery() { return DisjunctionMaxQuery.this; }
+
+ /* Return our boost */
+ public float getValue() { return getBoost(); }
+
+ /* Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */
+ public float sumOfSquaredWeights() throws IOException {
+ float max = 0.0f, sum = 0.0f;
+ for (int i = 0; i < weights.size(); i++) {
+ float sub = ((Weight) weights.get(i)).sumOfSquaredWeights();
+ sum += sub;
+ max = Math.max(max, sub);
+ }
+ return (((sum - max) * tieBreakerMultiplier * tieBreakerMultiplier) + max) * getBoost() * getBoost();
+ }
+
+ /* Apply the computed normalization factor to our subqueries */
+ public void normalize(float norm) {
+ norm *= getBoost(); // Incorporate our boost
+ for (int i = 0 ; i < weights.size(); i++)
+ ((Weight) weights.get(i)).normalize(norm);
+ }
+
+ /* Create the scorer used to score our associated DisjunctionMaxQuery */
+ public Scorer scorer(IndexReader reader) throws IOException {
+ DisjunctionMaxScorer result = new DisjunctionMaxScorer(tieBreakerMultiplier, similarity);
+ for (int i = 0 ; i < weights.size(); i++) {
+ Weight w = (Weight) weights.get(i);
+ Scorer subScorer = w.scorer(reader);
+ if (subScorer == null) return null;
+ result.add(subScorer);
+ }
+ return result;
+ }
+
+ /* Explain the score we computed for doc */
+ public Explanation explain(IndexReader reader, int doc) throws IOException {
+ if ( disjuncts.size() == 1) return ((Weight) weights.get(0)).explain(reader,doc);
+ ComplexExplanation result = new ComplexExplanation();
+ float max = 0.0f, sum = 0.0f;
+ result.setDescription(tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:");
+ for (int i = 0 ; i < weights.size(); i++) {
+ Explanation e = ((Weight) weights.get(i)).explain(reader, doc);
+ if (e.isMatch()) {
+ result.setMatch(Boolean.TRUE);
+ result.addDetail(e);
+ sum += e.getValue();
+ max = Math.max(max, e.getValue());
+ }
+ }
+ result.setValue(max + (sum - max)*tieBreakerMultiplier);
+ return result;
+ }
+
+ } // end of DisjunctionMaxWeight inner class
+
+ /* Create the Weight used to score us */
+ protected Weight createWeight(Searcher searcher) throws IOException {
+ return new DisjunctionMaxWeight(searcher);
+ }
+
+ /** Optimize our representation and our subqueries representations
+ * @param reader the IndexReader we query
+ * @return an optimized copy of us (which may not be a copy if there is nothing to optimize) */
+ public Query rewrite(IndexReader reader) throws IOException {
+ if (disjuncts.size() == 1) {
+ Query singleton = (Query) disjuncts.get(0);
+ Query result = singleton.rewrite(reader);
+ if (getBoost() != 1.0f) {
+ if (result == singleton) result = (Query)result.clone();
+ result.setBoost(getBoost() * result.getBoost());
+ }
+ return result;
+ }
+ DisjunctionMaxQuery clone = null;
+ for (int i = 0 ; i < disjuncts.size(); i++) {
+ Query clause = (Query) disjuncts.get(i);
+ Query rewrite = clause.rewrite(reader);
+ if (rewrite != clause) {
+ if (clone == null) clone = (DisjunctionMaxQuery)this.clone();
+ clone.disjuncts.set(i, rewrite);
+ }
+ }
+ if (clone != null) return clone;
+ else return this;
+ }
+
+ /** Create a shallow copy of us -- used in rewriting if necessary
+ * @return a copy of us (but reuse, don't copy, our subqueries) */
+ public Object clone() {
+ DisjunctionMaxQuery clone = (DisjunctionMaxQuery)super.clone();
+ clone.disjuncts = (ArrayList)this.disjuncts.clone();
+ return clone;
+ }
+
+
+ // inherit javadoc
+ public void extractTerms(Set terms) {
+ for (int i = 0; i < disjuncts.size(); i++) {
+ ((Query)disjuncts.get(i)).extractTerms(terms);
+ }
+ }
+
+
+ /** Prettyprint us.
+ * @param field the field to which we are applied
+ * @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost"
+ */
+ public String toString(String field) {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("(");
+ for (int i = 0 ; i < disjuncts.size(); i++) {
+ Query subquery = (Query) disjuncts.get(i);
+ if (subquery instanceof BooleanQuery) { // wrap sub-bools in parens
+ buffer.append("(");
+ buffer.append(subquery.toString(field));
+ buffer.append(")");
+ }
+ else buffer.append(subquery.toString(field));
+ if (i != disjuncts.size()-1) buffer.append(" | ");
+ }
+ buffer.append(")");
+ if (tieBreakerMultiplier != 0.0f) {
+ buffer.append("~");
+ buffer.append(tieBreakerMultiplier);
+ }
+ if (getBoost() != 1.0) {
+ buffer.append("^");
+ buffer.append(getBoost());
+ }
+ return buffer.toString();
+ }
+
+ /** Return true iff we represent the same query as o
+ * @param o another object
+ * @return true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us
+ */
+ public boolean equals(Object o) {
+ if (! (o instanceof DisjunctionMaxQuery) ) return false;
+ DisjunctionMaxQuery other = (DisjunctionMaxQuery)o;
+ return this.getBoost() == other.getBoost()
+ && this.tieBreakerMultiplier == other.tieBreakerMultiplier
+ && this.disjuncts.equals(other.disjuncts);
+ }
+
+ /** Compute a hash code for hashing us
+ * @return the hash code
+ */
+ public int hashCode() {
+ return Float.floatToIntBits(getBoost())
+ + Float.floatToIntBits(tieBreakerMultiplier)
+ + disjuncts.hashCode();
+ }
+
+}
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java?rev=658136&r1=658135&r2=658136&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java Tue May 20 00:40:54 2008
@@ -1,195 +1,195 @@
-package org.apache.lucene.search;
-
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.ArrayList;
-
-/**
- * The Scorer for DisjunctionMaxQuery's. The union of all documents generated by the the subquery scorers
- * is generated in document number order. The score for each document is the maximum of the scores computed
- * by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores
- * for the other subqueries that generate the document.
- * @author Chuck Williams
- */
-class DisjunctionMaxScorer extends Scorer {
-
- /* The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. */
- private ArrayList subScorers = new ArrayList();
-
- /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
- private float tieBreakerMultiplier;
-
- private boolean more = false; // True iff there is a next document
- private boolean firstTime = true; // True iff next() has not yet been called
-
- /** Creates a new instance of DisjunctionMaxScorer
- * @param tieBreakerMultiplier Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result.
- * @param similarity -- not used since our definition involves neither coord nor terms directly */
- public DisjunctionMaxScorer(float tieBreakerMultiplier, Similarity similarity) {
- super(similarity);
- this.tieBreakerMultiplier = tieBreakerMultiplier;
- }
-
- /** Add the scorer for a subquery
- * @param scorer the scorer of a subquery of our associated DisjunctionMaxQuery
- */
- public void add(Scorer scorer) throws IOException {
- if (scorer.next()) { // Initialize and retain only if it produces docs
- subScorers.add(scorer);
- more = true;
- }
- }
-
- /** Generate the next document matching our associated DisjunctionMaxQuery.
- * @return true iff there is a next document
- */
- public boolean next() throws IOException {
- if (!more) return false;
- if (firstTime) {
- heapify();
- firstTime = false;
- return true; // more would have been false if no subScorers had any docs
- }
- // Increment all generators that generated the last doc and adjust the heap.
- int lastdoc = ((Scorer) subScorers.get(0)).doc();
- do {
- if (((Scorer) subScorers.get(0)).next())
- heapAdjust(0);
- else {
- heapRemoveRoot();
- if (subScorers.isEmpty()) return (more = false);
- }
- } while ( ((Scorer) subScorers.get(0)).doc()==lastdoc );
- return true;
- }
-
- /** Determine the current document number. Initially invalid, until {@link #next()} is called the first time.
- * @return the document number of the currently generated document
- */
- public int doc() {
- return ((Scorer) subScorers.get(0)).doc();
- }
-
- /** Determine the current document score. Initially invalid, until {@link #next()} is called the first time.
- * @return the score of the current generated document
- */
- public float score() throws IOException {
- int doc = ((Scorer) subScorers.get(0)).doc();
- float[] sum = {((Scorer) subScorers.get(0)).score()}, max = {sum[0]};
- int size = subScorers.size();
- scoreAll(1, size, doc, sum, max);
- scoreAll(2, size, doc, sum, max);
- return max[0] + (sum[0] - max[0])*tieBreakerMultiplier;
- }
-
- // Recursively iterate all subScorers that generated last doc computing sum and max
- private void scoreAll(int root, int size, int doc, float[] sum, float[] max) throws IOException {
- if (root<size && ((Scorer) subScorers.get(root)).doc() == doc) {
- float sub = ((Scorer) subScorers.get(root)).score();
- sum[0] += sub;
- max[0] = Math.max(max[0], sub);
- scoreAll((root<<1)+1, size, doc, sum, max);
- scoreAll((root<<1)+2, size, doc, sum, max);
- }
- }
-
- /** Advance to the first document beyond the current whose number is greater than or equal to target.
- * @param target the minimum number of the next desired document
- * @return true iff there is a document to be generated whose number is at least target
- */
- public boolean skipTo(int target) throws IOException {
- if (firstTime) {
- if (!more) return false;
- heapify();
- firstTime = false;
- }
-
- while (subScorers.size()>0 && ((Scorer)subScorers.get(0)).doc()<target) {
- if (((Scorer)subScorers.get(0)).skipTo(target))
- heapAdjust(0);
- else
- heapRemoveRoot();
- }
- if ((subScorers.size()==0))
- return (more = false);
- return true;
- }
-
- /** Explain a score that we computed. UNSUPPORTED -- see explanation capability in DisjunctionMaxQuery.
- * @param doc the number of a document we scored
- * @return the Explanation for our score
- */
- public Explanation explain(int doc) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- // Organize subScorers into a min heap with scorers generating the earlest document on top.
- private void heapify() {
- int size = subScorers.size();
- for (int i=(size>>1)-1; i>=0; i--)
- heapAdjust(i);
- }
-
- /* The subtree of subScorers at root is a min heap except possibly for its root element.
- * Bubble the root down as required to make the subtree a heap.
- */
- private void heapAdjust(int root) {
- Scorer scorer=(Scorer)subScorers.get(root);
- int doc=scorer.doc();
- int i=root, size=subScorers.size();
- while (i<=(size>>1)-1) {
- int lchild=(i<<1)+1;
- Scorer lscorer=(Scorer)subScorers.get(lchild);
- int ldoc=lscorer.doc();
- int rdoc=Integer.MAX_VALUE, rchild=(i<<1)+2;
- Scorer rscorer=null;
- if (rchild<size) {
- rscorer=(Scorer)subScorers.get(rchild);
- rdoc=rscorer.doc();
- }
- if (ldoc<doc) {
- if (rdoc<ldoc) {
- subScorers.set(i, rscorer);
- subScorers.set(rchild, scorer);
- i=rchild;
- } else {
- subScorers.set(i, lscorer);
- subScorers.set(lchild, scorer);
- i=lchild;
- }
- } else if (rdoc<doc) {
- subScorers.set(i, rscorer);
- subScorers.set(rchild, scorer);
- i=rchild;
- } else return;
- }
- }
-
- // Remove the root Scorer from subScorers and re-establish it as a heap
- private void heapRemoveRoot() {
- int size=subScorers.size();
- if (size==1)
- subScorers.remove(0);
- else {
- subScorers.set(0, subScorers.get(size-1));
- subScorers.remove(size-1);
- heapAdjust(0);
- }
- }
-
-}
+package org.apache.lucene.search;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+/**
+ * The Scorer for DisjunctionMaxQuery's. The union of all documents generated by the the subquery scorers
+ * is generated in document number order. The score for each document is the maximum of the scores computed
+ * by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores
+ * for the other subqueries that generate the document.
+ * @author Chuck Williams
+ */
+class DisjunctionMaxScorer extends Scorer {
+
+ /* The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. */
+ private ArrayList subScorers = new ArrayList();
+
+ /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
+ private float tieBreakerMultiplier;
+
+ private boolean more = false; // True iff there is a next document
+ private boolean firstTime = true; // True iff next() has not yet been called
+
+ /** Creates a new instance of DisjunctionMaxScorer
+ * @param tieBreakerMultiplier Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result.
+ * @param similarity -- not used since our definition involves neither coord nor terms directly */
+ public DisjunctionMaxScorer(float tieBreakerMultiplier, Similarity similarity) {
+ super(similarity);
+ this.tieBreakerMultiplier = tieBreakerMultiplier;
+ }
+
+ /** Add the scorer for a subquery
+ * @param scorer the scorer of a subquery of our associated DisjunctionMaxQuery
+ */
+ public void add(Scorer scorer) throws IOException {
+ if (scorer.next()) { // Initialize and retain only if it produces docs
+ subScorers.add(scorer);
+ more = true;
+ }
+ }
+
+ /** Generate the next document matching our associated DisjunctionMaxQuery.
+ * @return true iff there is a next document
+ */
+ public boolean next() throws IOException {
+ if (!more) return false;
+ if (firstTime) {
+ heapify();
+ firstTime = false;
+ return true; // more would have been false if no subScorers had any docs
+ }
+ // Increment all generators that generated the last doc and adjust the heap.
+ int lastdoc = ((Scorer) subScorers.get(0)).doc();
+ do {
+ if (((Scorer) subScorers.get(0)).next())
+ heapAdjust(0);
+ else {
+ heapRemoveRoot();
+ if (subScorers.isEmpty()) return (more = false);
+ }
+ } while ( ((Scorer) subScorers.get(0)).doc()==lastdoc );
+ return true;
+ }
+
+ /** Determine the current document number. Initially invalid, until {@link #next()} is called the first time.
+ * @return the document number of the currently generated document
+ */
+ public int doc() {
+ return ((Scorer) subScorers.get(0)).doc();
+ }
+
+ /** Determine the current document score. Initially invalid, until {@link #next()} is called the first time.
+ * @return the score of the current generated document
+ */
+ public float score() throws IOException {
+ int doc = ((Scorer) subScorers.get(0)).doc();
+ float[] sum = {((Scorer) subScorers.get(0)).score()}, max = {sum[0]};
+ int size = subScorers.size();
+ scoreAll(1, size, doc, sum, max);
+ scoreAll(2, size, doc, sum, max);
+ return max[0] + (sum[0] - max[0])*tieBreakerMultiplier;
+ }
+
+ // Recursively iterate all subScorers that generated last doc computing sum and max
+ private void scoreAll(int root, int size, int doc, float[] sum, float[] max) throws IOException {
+ if (root<size && ((Scorer) subScorers.get(root)).doc() == doc) {
+ float sub = ((Scorer) subScorers.get(root)).score();
+ sum[0] += sub;
+ max[0] = Math.max(max[0], sub);
+ scoreAll((root<<1)+1, size, doc, sum, max);
+ scoreAll((root<<1)+2, size, doc, sum, max);
+ }
+ }
+
+ /** Advance to the first document beyond the current whose number is greater than or equal to target.
+ * @param target the minimum number of the next desired document
+ * @return true iff there is a document to be generated whose number is at least target
+ */
+ public boolean skipTo(int target) throws IOException {
+ if (firstTime) {
+ if (!more) return false;
+ heapify();
+ firstTime = false;
+ }
+
+ while (subScorers.size()>0 && ((Scorer)subScorers.get(0)).doc()<target) {
+ if (((Scorer)subScorers.get(0)).skipTo(target))
+ heapAdjust(0);
+ else
+ heapRemoveRoot();
+ }
+ if ((subScorers.size()==0))
+ return (more = false);
+ return true;
+ }
+
+ /** Explain a score that we computed. UNSUPPORTED -- see explanation capability in DisjunctionMaxQuery.
+ * @param doc the number of a document we scored
+ * @return the Explanation for our score
+ */
+ public Explanation explain(int doc) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ // Organize subScorers into a min heap with scorers generating the earlest document on top.
+ private void heapify() {
+ int size = subScorers.size();
+ for (int i=(size>>1)-1; i>=0; i--)
+ heapAdjust(i);
+ }
+
+ /* The subtree of subScorers at root is a min heap except possibly for its root element.
+ * Bubble the root down as required to make the subtree a heap.
+ */
+ private void heapAdjust(int root) {
+ Scorer scorer=(Scorer)subScorers.get(root);
+ int doc=scorer.doc();
+ int i=root, size=subScorers.size();
+ while (i<=(size>>1)-1) {
+ int lchild=(i<<1)+1;
+ Scorer lscorer=(Scorer)subScorers.get(lchild);
+ int ldoc=lscorer.doc();
+ int rdoc=Integer.MAX_VALUE, rchild=(i<<1)+2;
+ Scorer rscorer=null;
+ if (rchild<size) {
+ rscorer=(Scorer)subScorers.get(rchild);
+ rdoc=rscorer.doc();
+ }
+ if (ldoc<doc) {
+ if (rdoc<ldoc) {
+ subScorers.set(i, rscorer);
+ subScorers.set(rchild, scorer);
+ i=rchild;
+ } else {
+ subScorers.set(i, lscorer);
+ subScorers.set(lchild, scorer);
+ i=lchild;
+ }
+ } else if (rdoc<doc) {
+ subScorers.set(i, rscorer);
+ subScorers.set(rchild, scorer);
+ i=rchild;
+ } else return;
+ }
+ }
+
+ // Remove the root Scorer from subScorers and re-establish it as a heap
+ private void heapRemoveRoot() {
+ int size=subScorers.size();
+ if (size==1)
+ subScorers.remove(0);
+ else {
+ subScorers.set(0, subScorers.get(size-1));
+ subScorers.remove(size-1);
+ heapAdjust(0);
+ }
+ }
+
+}
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/FilterManager.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/Hit.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/HitIterator.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/SimilarityDelegator.java
------------------------------------------------------------------------------
svn:eol-style = native