You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by GitBox <gi...@apache.org> on 2019/04/28 19:25:25 UTC

[GitHub] [lucene-solr] dweiss commented on a change in pull request #657: LUCENE-8781: FST direct arc addressing

dweiss commented on a change in pull request #657: LUCENE-8781: FST direct arc addressing
URL: https://github.com/apache/lucene-solr/pull/657#discussion_r279208474
 
 

 ##########
 File path: lucene/core/src/java/org/apache/lucene/util/fst/FST.java
 ##########
 @@ -632,36 +641,85 @@ long addNode(Builder<T> builder, Builder.UnCompiledNode<T> nodeIn) throws IOExce
     if (doFixedArray) {
       final int MAX_HEADER_SIZE = 11; // header(byte) + numArcs(vint) + numBytes(vint)
       assert maxBytesPerArc > 0;
-      // 2nd pass just "expands" all arcs to take up a fixed
-      // byte size
+      // 2nd pass just "expands" all arcs to take up a fixed byte size
+
+      // If more than 1/2 of the "slots" would be occupied, write an arc array that may have holes in it
+      // so that we can address the arcs directly by label without binary search
+      // TODO: There's no need to write the labels in this case; we can just store the first label in the header
+      // TODO: tune this heuristic?
+      // TODO: write multiple direct blocks with gaps between?
+      int labelRange = nodeIn.arcs[nodeIn.numArcs - 1].label - nodeIn.arcs[0].label + 1;
+      boolean writeDirectly = builder.useDirectArcAddressing && labelRange > 0 && labelRange < 4 * nodeIn.numArcs;
 
       //System.out.println("write int @pos=" + (fixedArrayStart-4) + " numArcs=" + nodeIn.numArcs);
       // create the header
       // TODO: clean this up: or just rewind+reuse and deal with it
       byte header[] = new byte[MAX_HEADER_SIZE]; 
       ByteArrayDataOutput bad = new ByteArrayDataOutput(header);
       // write a "false" first arc:
-      bad.writeByte(ARCS_AS_FIXED_ARRAY);
-      bad.writeVInt(nodeIn.numArcs);
+      if (writeDirectly) {
+        bad.writeByte(ARCS_AS_DIRECT_ARRAY);
+        bad.writeVInt(labelRange);
+      } else {
+        bad.writeByte(ARCS_AS_FIXED_ARRAY);
+        bad.writeVInt(nodeIn.numArcs);
+      }
       bad.writeVInt(maxBytesPerArc);
       int headerLen = bad.getPosition();
       
       final long fixedArrayStart = startAddress + headerLen;
 
-      // expand the arcs in place, backwards
-      long srcPos = builder.bytes.getPosition();
-      long destPos = fixedArrayStart + nodeIn.numArcs*maxBytesPerArc;
-      assert destPos >= srcPos;
-      if (destPos > srcPos) {
-        builder.bytes.skipBytes((int) (destPos - srcPos));
-        for(int arcIdx=nodeIn.numArcs-1;arcIdx>=0;arcIdx--) {
-          destPos -= maxBytesPerArc;
-          srcPos -= builder.reusedBytesPerArc[arcIdx];
-          //System.out.println("  repack arcIdx=" + arcIdx + " srcPos=" + srcPos + " destPos=" + destPos);
-          if (srcPos != destPos) {
-            //System.out.println("  copy len=" + builder.reusedBytesPerArc[arcIdx]);
-            assert destPos > srcPos: "destPos=" + destPos + " srcPos=" + srcPos + " arcIdx=" + arcIdx + " maxBytesPerArc=" + maxBytesPerArc + " reusedBytesPerArc[arcIdx]=" + builder.reusedBytesPerArc[arcIdx] + " nodeIn.numArcs=" + nodeIn.numArcs;
-            builder.bytes.copyBytes(srcPos, destPos, builder.reusedBytesPerArc[arcIdx]);
+      if (writeDirectly) {
 
 Review comment:
   Huge methods... move to a submethod?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org