You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2022/02/01 19:28:02 UTC

[GitHub] [pinot] richardstartin commented on a change in pull request #8101: intern strings extracted from small dictionaries

richardstartin commented on a change in pull request #8101:
URL: https://github.com/apache/pinot/pull/8101#discussion_r796926613



##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/StringDictionary.java
##########
@@ -41,84 +47,117 @@ public DataType getValueType() {
 
   @Override
   public String get(int dictId) {
-    return getUnpaddedString(dictId, getBuffer());
+    return internStringValue(dictId);
   }
 
   @Override
   public int getIntValue(int dictId) {
-    return Integer.parseInt(getUnpaddedString(dictId, getBuffer()));
+    return Integer.parseInt(internStringValue(dictId));
   }
 
   @Override
   public long getLongValue(int dictId) {
-    return Long.parseLong(getUnpaddedString(dictId, getBuffer()));
+    return Long.parseLong(internStringValue(dictId));
   }
 
   @Override
   public float getFloatValue(int dictId) {
-    return Float.parseFloat(getUnpaddedString(dictId, getBuffer()));
+    return Float.parseFloat(internStringValue(dictId));
   }
 
   @Override
   public double getDoubleValue(int dictId) {
-    return Double.parseDouble(getUnpaddedString(dictId, getBuffer()));
+    return Double.parseDouble(internStringValue(dictId));
   }
 
   @Override
   public String getStringValue(int dictId) {
-    return getUnpaddedString(dictId, getBuffer());
+    return internStringValue(dictId);
   }
 
   @Override
   public byte[] getBytesValue(int dictId) {
-    return BytesUtils.toBytes(getUnpaddedString(dictId, getBuffer()));
+    return BytesUtils.toBytes(internStringValue(dictId, getBuffer()));
   }
 
   @Override
   public void readIntValues(int[] dictIds, int length, int[] outValues) {
     byte[] buffer = getBuffer();
     for (int i = 0; i < length; i++) {
-      outValues[i] = Integer.parseInt(getUnpaddedString(dictIds[i], buffer));
+      outValues[i] = Integer.parseInt(internStringValue(dictIds[i], buffer));
     }
   }
 
   @Override
   public void readLongValues(int[] dictIds, int length, long[] outValues) {
     byte[] buffer = getBuffer();
     for (int i = 0; i < length; i++) {
-      outValues[i] = Long.parseLong(getUnpaddedString(dictIds[i], buffer));
+      outValues[i] = Long.parseLong(internStringValue(dictIds[i], buffer));
     }
   }
 
   @Override
   public void readFloatValues(int[] dictIds, int length, float[] outValues) {
     byte[] buffer = getBuffer();
     for (int i = 0; i < length; i++) {
-      outValues[i] = Float.parseFloat(getUnpaddedString(dictIds[i], buffer));
+      outValues[i] = Float.parseFloat(internStringValue(dictIds[i], buffer));
     }
   }
 
   @Override
   public void readDoubleValues(int[] dictIds, int length, double[] outValues) {
     byte[] buffer = getBuffer();
     for (int i = 0; i < length; i++) {
-      outValues[i] = Double.parseDouble(getUnpaddedString(dictIds[i], buffer));
+      outValues[i] = Double.parseDouble(internStringValue(dictIds[i], buffer));
     }
   }
 
   @Override
   public void readStringValues(int[] dictIds, int length, String[] outValues) {
     byte[] buffer = getBuffer();
     for (int i = 0; i < length; i++) {
-      outValues[i] = getUnpaddedString(dictIds[i], buffer);
+      outValues[i] = internStringValue(dictIds[i], buffer);
     }
   }
 
   @Override
   public void readBytesValues(int[] dictIds, int length, byte[][] outValues) {
     byte[] buffer = getBuffer();
     for (int i = 0; i < length; i++) {
-      outValues[i] = BytesUtils.toBytes(getUnpaddedString(dictIds[i], buffer));
+      outValues[i] = BytesUtils.toBytes(internStringValue(dictIds[i], buffer));
+    }
+  }
+
+  private String internStringValue(int dictId) {
+    if (_internTable == null) {
+      return getUnpaddedString(dictId, getBuffer());
+    }
+    String interned = _internTable[dictId];
+    if (interned == null) {
+      interned = getUnpaddedString(dictId, getBuffer());
+      _internTable[dictId] = interned;
+    }
+    return interned;
+  }
+
+  private String internStringValue(int dictId, byte[] buffer) {
+    if (_internTable == null) {
+      return getUnpaddedString(dictId, buffer);
+    }
+    String interned = _internTable[dictId];
+    if (interned == null) {
+      interned = getUnpaddedString(dictId, buffer);
+      _internTable[dictId] = interned;
+    }
+    return interned;
+  }
+
+  @Override
+  public void close()
+      throws IOException {
+    if (_internTable != null) {
+      Arrays.fill(_internTable, null);

Review comment:
       I don’t think we need this, but note that the intern table is final. I’ll just remove it.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org