You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@lucene.apache.org by GitBox <gi...@apache.org> on 2020/06/14 18:55:57 UTC

[GitHub] [lucene-solr] dweiss commented on a change in pull request #1573: Cleanup TermsHashPerField

dweiss commented on a change in pull request #1573:
URL: https://github.com/apache/lucene-solr/pull/1573#discussion_r439857921



##########
File path: lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
##########
@@ -19,203 +19,207 @@
 
 import java.io.IOException;
 
-import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash.BytesStartArray;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.IntBlockPool;
 
+/**
+ * This class stores streams of information per term without knowing
+ * the size of the stream ahead of time. Each stream typically encodes one level
+ * of information like term frequency per document or term proximity. Internally
+ * this class allocates a linked list of slices that can be read by a {@link ByteSliceReader}
+ * for each term. Terms are first deduplicated in a {@link BytesRefHash} once this is done
+ * internal data-structures point to the current offset of each stream that can be written to.
+ */
 abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
   private static final int HASH_INIT_SIZE = 4;
 
-  final TermsHash termsHash;
-
-  final TermsHashPerField nextPerField;
-  protected final DocumentsWriterPerThread.DocState docState;
-  protected final FieldInvertState fieldState;
-  TermToBytesRefAttribute termAtt;
-  protected TermFrequencyAttribute termFreqAtt;
-
-  // Copied from our perThread
-  final IntBlockPool intPool;
+  private final TermsHashPerField nextPerField;
+  private final IntBlockPool intPool;
   final ByteBlockPool bytePool;
-  final ByteBlockPool termBytePool;
-
-  final int streamCount;
-  final int numPostingInt;
-
-  protected final FieldInfo fieldInfo;
-
-  final BytesRefHash bytesHash;
+  // for each term we store an integer per stream that points into the bytePool above
+  // the address is updated once data is written to the stream to point to the next free offset
+  // this the terms stream. The start address for the stream is stored in postingsArray.byteStarts[termId]

Review comment:
       this the?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@lucene.apache.org
For additional commands, e-mail: issues-help@lucene.apache.org