You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@geode.apache.org by GitBox <gi...@apache.org> on 2020/07/01 19:41:47 UTC
[GitHub] [geode] agingade commented on a change in pull request #5329: GEODE-8029: Allow OplogEntryIdSet to Overflow

agingade commented on a change in pull request #5329:
URL: https://github.com/apache/geode/pull/5329#discussion_r448560371



##########
File path: geode-core/src/main/java/org/apache/geode/internal/cache/DiskStoreImpl.java
##########
@@ -3517,33 +3518,86 @@ public String toString() {
   }
 
   /**
-   * Set of OplogEntryIds (longs). Memory is optimized by using an int[] for ids in the unsigned int
-   * range.
+   * Set of OplogEntryIds (longs).
+   * Memory is optimized by using an int[] for ids in the unsigned int range.
+   * By default we can't have more than 805306401 ids for a load factor of 0.75, the internal lists
+   * are used to overcome this limit, allowing the disk-store to recover successfully (the internal
+   * class is **only** used during recovery to read all deleted entries).
    */
   static class OplogEntryIdSet {
-    private final IntOpenHashSet ints = new IntOpenHashSet((int) INVALID_ID);
-    private final LongOpenHashSet longs = new LongOpenHashSet((int) INVALID_ID);
+    private final List<IntOpenHashSet> allInts;
+    private final List<LongOpenHashSet> allLongs;
+    private final AtomicReference<IntOpenHashSet> currentInts;
+    private final AtomicReference<LongOpenHashSet> currentLongs;
+
+    // For testing purposes only.
+    @VisibleForTesting
+    OplogEntryIdSet(List<IntOpenHashSet> allInts, List<LongOpenHashSet> allLongs) {
+      this.allInts = allInts;
+      this.currentInts = new AtomicReference<>(this.allInts.get(0));
+
+      this.allLongs = allLongs;
+      this.currentLongs = new AtomicReference<>(this.allLongs.get(0));
+    }
+
+    public OplogEntryIdSet() {
+      IntOpenHashSet intHashSet = new IntOpenHashSet((int) INVALID_ID);
+      this.allInts = new ArrayList<>();
+      this.allInts.add(intHashSet);
+      this.currentInts = new AtomicReference<>(intHashSet);
+
+      LongOpenHashSet longHashSet = new LongOpenHashSet((int) INVALID_ID);
+      this.allLongs = new ArrayList<>();
+      this.allLongs.add(longHashSet);
+      this.currentLongs = new AtomicReference<>(longHashSet);
+    }
 
     public void add(long id) {
       if (id == 0) {
         throw new IllegalArgumentException();
-      } else if (id > 0 && id <= 0x00000000FFFFFFFFL) {
-        this.ints.add((int) id);
-      } else {
-        this.longs.add(id);
+      }
+
+      try {
+        if (id > 0 && id <= 0x00000000FFFFFFFFL) {
+          this.currentInts.get().add((int) id);
+        } else {
+          this.currentLongs.get().add(id);
+        }
+      } catch (IllegalArgumentException illegalArgumentException) {
+        // See GEODE-8029.
+        // Too many entries on the accumulated drf files, overflow and continue.
+        logger.warn(
+            "There are too many entries within the disk-store, please execute an offline compaction.",

Review comment:
       "Too many entries", this is referring to deleted entries, right? In that case how about changing it 
   to "large number of deleted entries"?

##########
File path: geode-core/src/main/java/org/apache/geode/internal/cache/DiskStoreImpl.java
##########
@@ -3517,33 +3518,86 @@ public String toString() {
   }
 
   /**
-   * Set of OplogEntryIds (longs). Memory is optimized by using an int[] for ids in the unsigned int
-   * range.
+   * Set of OplogEntryIds (longs).
+   * Memory is optimized by using an int[] for ids in the unsigned int range.
+   * By default we can't have more than 805306401 ids for a load factor of 0.75, the internal lists
+   * are used to overcome this limit, allowing the disk-store to recover successfully (the internal
+   * class is **only** used during recovery to read all deleted entries).
    */
   static class OplogEntryIdSet {
-    private final IntOpenHashSet ints = new IntOpenHashSet((int) INVALID_ID);
-    private final LongOpenHashSet longs = new LongOpenHashSet((int) INVALID_ID);
+    private final List<IntOpenHashSet> allInts;
+    private final List<LongOpenHashSet> allLongs;
+    private final AtomicReference<IntOpenHashSet> currentInts;
+    private final AtomicReference<LongOpenHashSet> currentLongs;
+
+    // For testing purposes only.
+    @VisibleForTesting
+    OplogEntryIdSet(List<IntOpenHashSet> allInts, List<LongOpenHashSet> allLongs) {
+      this.allInts = allInts;
+      this.currentInts = new AtomicReference<>(this.allInts.get(0));
+
+      this.allLongs = allLongs;
+      this.currentLongs = new AtomicReference<>(this.allLongs.get(0));
+    }
+
+    public OplogEntryIdSet() {
+      IntOpenHashSet intHashSet = new IntOpenHashSet((int) INVALID_ID);
+      this.allInts = new ArrayList<>();
+      this.allInts.add(intHashSet);
+      this.currentInts = new AtomicReference<>(intHashSet);
+
+      LongOpenHashSet longHashSet = new LongOpenHashSet((int) INVALID_ID);
+      this.allLongs = new ArrayList<>();
+      this.allLongs.add(longHashSet);
+      this.currentLongs = new AtomicReference<>(longHashSet);
+    }
 
     public void add(long id) {
       if (id == 0) {
         throw new IllegalArgumentException();
-      } else if (id > 0 && id <= 0x00000000FFFFFFFFL) {
-        this.ints.add((int) id);
-      } else {
-        this.longs.add(id);
+      }
+
+      try {
+        if (id > 0 && id <= 0x00000000FFFFFFFFL) {
+          this.currentInts.get().add((int) id);
+        } else {
+          this.currentLongs.get().add(id);
+        }
+      } catch (IllegalArgumentException illegalArgumentException) {
+        // See GEODE-8029.
+        // Too many entries on the accumulated drf files, overflow and continue.
+        logger.warn(
+            "There are too many entries within the disk-store, please execute an offline compaction.",
+            illegalArgumentException);
+
+        // Overflow to the next [Int|Long]OpenHashSet and continue.
+        if (id > 0 && id <= 0x00000000FFFFFFFFL) {

Review comment:
       @dschneider-pivotal 
   Is there a scenario where the same ID is added many times? In that case, earlier we had only one id set, and there was no chance of duplicate. Now with the new approach, the same ID could be present in multiple id sets.
   One of the caller of add is "offlineCompact()" its iterating over live entries and calling add...Will there be two live entries with the same id?
   

##########
File path: geode-core/src/main/java/org/apache/geode/internal/cache/DiskStoreImpl.java
##########
@@ -3517,33 +3518,86 @@ public String toString() {
   }
 
   /**
-   * Set of OplogEntryIds (longs). Memory is optimized by using an int[] for ids in the unsigned int
-   * range.
+   * Set of OplogEntryIds (longs).
+   * Memory is optimized by using an int[] for ids in the unsigned int range.
+   * By default we can't have more than 805306401 ids for a load factor of 0.75, the internal lists
+   * are used to overcome this limit, allowing the disk-store to recover successfully (the internal
+   * class is **only** used during recovery to read all deleted entries).
    */
   static class OplogEntryIdSet {
-    private final IntOpenHashSet ints = new IntOpenHashSet((int) INVALID_ID);
-    private final LongOpenHashSet longs = new LongOpenHashSet((int) INVALID_ID);
+    private final List<IntOpenHashSet> allInts;
+    private final List<LongOpenHashSet> allLongs;
+    private final AtomicReference<IntOpenHashSet> currentInts;
+    private final AtomicReference<LongOpenHashSet> currentLongs;
+
+    // For testing purposes only.
+    @VisibleForTesting
+    OplogEntryIdSet(List<IntOpenHashSet> allInts, List<LongOpenHashSet> allLongs) {
+      this.allInts = allInts;
+      this.currentInts = new AtomicReference<>(this.allInts.get(0));
+
+      this.allLongs = allLongs;
+      this.currentLongs = new AtomicReference<>(this.allLongs.get(0));
+    }
+
+    public OplogEntryIdSet() {
+      IntOpenHashSet intHashSet = new IntOpenHashSet((int) INVALID_ID);
+      this.allInts = new ArrayList<>();
+      this.allInts.add(intHashSet);
+      this.currentInts = new AtomicReference<>(intHashSet);
+
+      LongOpenHashSet longHashSet = new LongOpenHashSet((int) INVALID_ID);
+      this.allLongs = new ArrayList<>();
+      this.allLongs.add(longHashSet);
+      this.currentLongs = new AtomicReference<>(longHashSet);
+    }
 
     public void add(long id) {
       if (id == 0) {
         throw new IllegalArgumentException();
-      } else if (id > 0 && id <= 0x00000000FFFFFFFFL) {
-        this.ints.add((int) id);
-      } else {
-        this.longs.add(id);
+      }
+
+      try {
+        if (id > 0 && id <= 0x00000000FFFFFFFFL) {
+          this.currentInts.get().add((int) id);
+        } else {
+          this.currentLongs.get().add(id);
+        }
+      } catch (IllegalArgumentException illegalArgumentException) {
+        // See GEODE-8029.
+        // Too many entries on the accumulated drf files, overflow and continue.
+        logger.warn(
+            "There are too many entries within the disk-store, please execute an offline compaction.",

Review comment:
       Do we need to pass "illegalArgumentExcpetion" to warning message? It may not be useful for end user.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org