You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/06/15 14:54:46 UTC

[GitHub] [spark] tgravescs commented on a change in pull request #28412: [SPARK-31608][CORE][WEBUI] Add a new type of KVStore to make loading UI faster

tgravescs commented on a change in pull request #28412:
URL: https://github.com/apache/spark/pull/28412#discussion_r440225765



##########
File path: core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala
##########
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.io.IOException
+import java.util.Collection
+import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.util.kvstore._
+
+/**
+ * An implementation of KVStore that accelerates event logs loading.
+ *
+ * When rebuilding the application state from event logs, HybridStore will
+ * write data to InMemoryStore at first and use a background thread to dump
+ * data to LevelDB once the app store is restored. We don't expect write
+ * operations (except the case for caching) after calling switch to level DB.
+ */
+
+private[history] class HybridStore extends KVStore {
+
+  private val inMemoryStore = new InMemoryStore()
+
+  private var levelDB: LevelDB = null
+
+  // Flag to indicate whether we should use inMemoryStore or levelDB
+  private val shouldUseInMemoryStore = new AtomicBoolean(true)
+
+  // Flag to indicate whether this hybrid store is closed, use this flag
+  // to avoid starting background thread after the store is closed
+  private val closed = new AtomicBoolean(false)
+
+  // A background thread that dumps data from inMemoryStore to levelDB
+  private var backgroundThread: Thread = null
+
+  // A hash map that stores all classes that had been writen to inMemoryStore
+  private val klassMap = new ConcurrentHashMap[Class[_], Boolean]
+
+  override def getMetadata[T](klass: Class[T]): T = {
+    getStore().getMetadata(klass)
+  }
+
+  override def setMetadata(value: Object): Unit = {
+    getStore().setMetadata(value)
+  }
+
+  override def read[T](klass: Class[T], naturalKey: Object): T = {
+    getStore().read(klass, naturalKey)
+  }
+
+  override def write(value: Object): Unit = {
+    getStore().write(value)
+
+    if (backgroundThread == null) {
+      // New classes won't be dumped once the background thread is started
+      klassMap.putIfAbsent(value.getClass(), true)
+    }
+  }
+
+  override def delete(klass: Class[_], naturalKey: Object): Unit = {
+    if (backgroundThread != null) {
+      throw new IllegalStateException("delete() shouldn't be called after " +
+        "the hybrid store begins switching to levelDB")
+    }
+
+    getStore().delete(klass, naturalKey)
+  }
+
+  override def view[T](klass: Class[T]): KVStoreView[T] = {
+    getStore().view(klass)
+  }
+
+  override def count(klass: Class[_]): Long = {
+    getStore().count(klass)
+  }
+
+  override def count(klass: Class[_], index: String, indexedValue: Object): Long = {
+    getStore().count(klass, index, indexedValue)
+  }
+
+  override def close(): Unit = {
+    try {
+      closed.set(true)
+      if (backgroundThread != null && backgroundThread.isAlive()) {
+        // The background thread is still running, wait for it to finish
+        backgroundThread.join()
+      }
+    } finally {
+      inMemoryStore.close()
+      if (levelDB != null) {
+        levelDB.close()
+      }
+    }
+  }
+
+  override def removeAllByIndexValues[T](
+      klass: Class[T],
+      index: String,
+      indexValues: Collection[_]): Boolean = {
+    if (backgroundThread != null) {
+      throw new IllegalStateException("removeAllByIndexValues() shouldn't be " +
+        "called after the hybrid store begins switching to levelDB")
+    }
+
+    getStore().removeAllByIndexValues(klass, index, indexValues)
+  }
+
+  def setLevelDB(levelDB: LevelDB): Unit = {
+    this.levelDB = levelDB
+  }
+
+  /**
+   * This method is called when the writing is done for inMemoryStore. A
+   * background thread will be created and be started to dump data in inMemoryStore
+   * to levelDB. Once the dumping is completed, the underlying kvstore will be
+   * switched to levelDB.
+   */
+  def switchToLevelDB(listener: HybridStore.SwitchToLevelDBListener): Unit = {
+    if (closed.get) {
+      return
+    }
+
+    backgroundThread = new Thread(() => {
+      try {
+        for (klass <- klassMap.keys().asScala) {
+          val it = inMemoryStore.view(klass).closeableIterator()
+          while (it.hasNext()) {
+            levelDB.write(it.next())
+          }
+        }
+        listener.onSwitchToLevelDBSuccess()
+        shouldUseInMemoryStore.set(false)

Review comment:
       do we need to reset this to true if the exception is thrown in close?

##########
File path: core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
##########
@@ -128,6 +128,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   private val storePath = conf.get(LOCAL_STORE_DIR).map(new File(_))
   private val fastInProgressParsing = conf.get(FAST_IN_PROGRESS_PARSING)
 
+  private val hybridStoreEnabled = conf.get(History.HYBRID_STORE_ENABLED)
+
+  private val memoryManager = new HistoryServerMemoryManager(conf)

Review comment:
       memory manager only appears to be used with hybrid store, so we shouldn't create and initialize it if not using the hybrid store

##########
File path: core/src/main/scala/org/apache/spark/internal/config/History.scala
##########
@@ -195,4 +195,20 @@ private[spark] object History {
       .version("3.0.0")
       .booleanConf
       .createWithDefault(true)
+
+  val HYBRID_STORE_ENABLED = ConfigBuilder("spark.history.store.hybridStore.enabled")
+    .doc("Whether to use HybridStore as the store when parsing event logs. " +
+      "HybridStore will first write data to an in-memory store and having a background thread " +
+      "that dumps data to a disk store after the writing to in-memory store is completed.")
+    .version("3.1.0")
+    .booleanConf
+    .createWithDefault(false)
+
+  val MAX_IN_MEMORY_STORE_USAGE = ConfigBuilder("spark.history.store.hybridStore.maxMemoryUsage")

Review comment:
       you need to update monitoring.md doc

##########
File path: core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
##########
@@ -1167,6 +1172,17 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     // At this point the disk data either does not exist or was deleted because it failed to
     // load, so the event log needs to be replayed.
 
+    // If hybrid store is enabled, try it first.

Review comment:
       I would extend the comment to say try it first, fall back to leveldb store.

##########
File path: core/src/main/scala/org/apache/spark/deploy/history/HistoryServerMemoryManager.scala
##########
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.util.concurrent.atomic.AtomicLong
+
+import scala.collection.mutable.HashMap
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.History._
+import org.apache.spark.util.Utils
+
+/**
+ * A class used to keep track of in-memory store usage by the SHS.
+ */
+private class HistoryServerMemoryManager(
+    conf: SparkConf) extends Logging {
+
+  private val maxUsage = conf.get(MAX_IN_MEMORY_STORE_USAGE)
+  private val currentUsage = new AtomicLong(0L)
+  private val active = new HashMap[(String, Option[String]), Long]()
+
+  def initialize(): Unit = {
+    logInfo("Initialized memory manager: " +
+      s"current usage = ${Utils.bytesToString(currentUsage.get())}, " +
+      s"max usage = ${Utils.bytesToString(maxUsage)}")
+  }
+
+  def lease(
+      appId: String,
+      attemptId: Option[String],
+      eventLogSize: Long,
+      isCompressed: Boolean): Unit = {
+    val memoryUsage = approximateMemoryUsage(eventLogSize, isCompressed)
+    if (memoryUsage + currentUsage.get > maxUsage) {
+      throw new RuntimeException("Not enough memory to create hybrid store " +
+        s"for app $appId / $attemptId.")
+    }
+    active.synchronized {
+      active(appId -> attemptId) = memoryUsage
+    }
+    currentUsage.addAndGet(memoryUsage)
+    logInfo(s"Leasing ${Utils.bytesToString(memoryUsage)} memory usage for " +
+      s"app $appId / $attemptId")
+  }
+
+  def release(appId: String, attemptId: Option[String]): Unit = {
+    val memoryUsage = active.synchronized { active.remove(appId -> attemptId) }
+
+    memoryUsage match {
+      case Some(m) =>
+        currentUsage.addAndGet(-m)
+        logInfo(s"Released ${Utils.bytesToString(m)} memory usage for " +
+          s"app $appId / $attemptId")
+      case None =>
+    }
+  }
+
+  private def approximateMemoryUsage(eventLogSize: Long, isCompressed: Boolean): Long = {
+    if (isCompressed) {
+      eventLogSize * 2

Review comment:
       how accurate is this in practice? Will depend on the codec chosen. did you try some large files?

##########
File path: core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
##########
@@ -1167,6 +1172,17 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     // At this point the disk data either does not exist or was deleted because it failed to
     // load, so the event log needs to be replayed.
 
+    // If hybrid store is enabled, try it first.
+    if (hybridStoreEnabled) {
+      try {
+        return createHybridStore(dm, appId, attempt, metadata)
+      } catch {
+        case e: Exception =>
+          logInfo(s"Failed to create HybridStore for $appId/${attempt.info.attemptId}." +
+            " Using LevelDB.", e)
+      }
+    }
+

Review comment:
       I find this function hard to read now. I would say at least put a comment here and possibly split the below logic into a separate function for easier readability.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org