You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ec...@apache.org on 2014/08/27 00:04:47 UTC

[4/6] git commit: ACCUMULO-3084 just warn about long timer delays

ACCUMULO-3084 just warn about long timer delays


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/1dc77838
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/1dc77838
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/1dc77838

Branch: refs/heads/master
Commit: 1dc77838353f3b253e407cfbb27f7de3cc69c815
Parents: 13d2094 c906e4b
Author: Eric C. Newton <er...@gmail.com>
Authored: Tue Aug 26 17:47:35 2014 -0400
Committer: Eric C. Newton <er...@gmail.com>
Committed: Tue Aug 26 17:47:35 2014 -0400

----------------------------------------------------------------------
 .../accumulo/server/GarbageCollectionLogger.java       | 13 +++++++++++++
 .../java/org/apache/accumulo/tserver/TabletServer.java |  5 +++--
 2 files changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/1dc77838/server/tserver/src/main/java/org/apache/accumulo/server/GarbageCollectionLogger.java
----------------------------------------------------------------------
diff --cc server/tserver/src/main/java/org/apache/accumulo/server/GarbageCollectionLogger.java
index b66d0f7,0000000..d53f240
mode 100644,000000..100644
--- a/server/tserver/src/main/java/org/apache/accumulo/server/GarbageCollectionLogger.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/server/GarbageCollectionLogger.java
@@@ -1,103 -1,0 +1,116 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.accumulo.server;
 +
 +import java.lang.management.GarbageCollectorMXBean;
 +import java.lang.management.ManagementFactory;
 +import java.util.HashMap;
 +import java.util.List;
 +
 +import org.apache.accumulo.core.conf.AccumuloConfiguration;
 +import org.apache.accumulo.core.conf.Property;
 +import org.apache.accumulo.server.util.Halt;
 +import org.apache.log4j.Logger;
 +
 +public class GarbageCollectionLogger {
 +  private static final Logger log = Logger.getLogger(GarbageCollectionLogger.class);
 +  
 +  private final HashMap<String,Long> prevGcTime = new HashMap<String,Long>();
 +  private long lastMemorySize = 0;
 +  private long gcTimeIncreasedCount = 0;
++  private static long lastMemoryCheckTime = 0;
 +  
 +  public GarbageCollectionLogger() {
 +  }
 +
 +  public synchronized void logGCInfo(AccumuloConfiguration conf) {
++    final long now = System.currentTimeMillis();
++
 +    List<GarbageCollectorMXBean> gcmBeans = ManagementFactory.getGarbageCollectorMXBeans();
 +    Runtime rt = Runtime.getRuntime();
 +
 +    StringBuilder sb = new StringBuilder("gc");
 +
 +    boolean sawChange = false;
 +
 +    long maxIncreaseInCollectionTime = 0;
 +
 +    for (GarbageCollectorMXBean gcBean : gcmBeans) {
 +      Long prevTime = prevGcTime.get(gcBean.getName());
 +      long pt = 0;
 +      if (prevTime != null) {
 +        pt = prevTime;
 +      }
 +
 +      long time = gcBean.getCollectionTime();
 +
 +      if (time - pt != 0) {
 +        sawChange = true;
 +      }
 +
 +      long increaseInCollectionTime = time - pt;
 +      sb.append(String.format(" %s=%,.2f(+%,.2f) secs", gcBean.getName(), time / 1000.0, increaseInCollectionTime / 1000.0));
 +      maxIncreaseInCollectionTime = Math.max(increaseInCollectionTime, maxIncreaseInCollectionTime);
 +      prevGcTime.put(gcBean.getName(), time);
 +    }
 +
 +    long mem = rt.freeMemory();
 +    if (maxIncreaseInCollectionTime == 0) {
 +      gcTimeIncreasedCount = 0;
 +    } else {
 +      gcTimeIncreasedCount++;
 +      if (gcTimeIncreasedCount > 3 && mem < rt.maxMemory() * 0.05) {
 +        log.warn("Running low on memory");
 +        gcTimeIncreasedCount = 0;
 +      }
 +    }
 +
 +    if (mem > lastMemorySize) {
 +      sawChange = true;
 +    }
 +
 +    String sign = "+";
 +    if (mem - lastMemorySize <= 0) {
 +      sign = "";
 +    }
 +
 +    sb.append(String.format(" freemem=%,d(%s%,d) totalmem=%,d", mem, sign, (mem - lastMemorySize), rt.totalMemory()));
 +
 +    if (sawChange) {
 +      log.debug(sb.toString());
 +    }
 +
 +    final long keepAliveTimeout = conf.getTimeInMillis(Property.INSTANCE_ZK_TIMEOUT);
++    if (lastMemoryCheckTime > 0 && lastMemoryCheckTime < now) {
++      final long diff = now - lastMemoryCheckTime;
++      if (diff > keepAliveTimeout) {
++        log.warn(String.format("Check for long GC pauses not called in a timely fashion %.1f", diff / 1000.));
++      }
++      lastMemoryCheckTime = now;
++      return;
++    }
++
 +    if (maxIncreaseInCollectionTime > keepAliveTimeout) {
 +      Halt.halt("Garbage collection may be interfering with lock keep-alive.  Halting.", -1);
 +    }
 +
 +    lastMemorySize = mem;
++    lastMemoryCheckTime = now;
 +  }
 +
 +
 +}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/1dc77838/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
----------------------------------------------------------------------
diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
index ce84bd3,f1a9f2e..b9505d9
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
@@@ -236,70 -235,28 +236,71 @@@ import org.apache.zookeeper.KeeperExcep
  
  import com.google.common.net.HostAndPort;
  
 -enum ScanRunState {
 -  QUEUED, RUNNING, FINISHED
 -}
 -
  public class TabletServer extends AbstractMetricsImpl implements org.apache.accumulo.tserver.metrics.TabletServerMBean {
 -  private static final Logger log = Logger.getLogger(TabletServer.class);
 -
 -  private static HashMap<String,Long> prevGcTime = new HashMap<String,Long>();
 -  private static long lastMemorySize = 0;
 -  private static long lastMemoryCheckTime = 0;
 -  private static long gcTimeIncreasedCount;
 +  static enum ScanRunState {
 +    QUEUED, RUNNING, FINISHED
 +  }
  
 +  private static final Logger log = Logger.getLogger(TabletServer.class);
 +  private static final String METRICS_PREFIX = "tserver";
    private static final long MAX_TIME_TO_WAIT_FOR_SCAN_RESULT_MILLIS = 1000;
    private static final long RECENTLY_SPLIT_MILLIES = 60 * 1000;
+   private static final long TIME_BETWEEN_GC_CHECKS = 5000;
 +  private static final Set<Column> EMPTY_COLUMNS = Collections.emptySet();
 +
 +  private final GarbageCollectionLogger gcLogger = new GarbageCollectionLogger();
 +  private final TransactionWatcher watcher = new TransactionWatcher();
 +  private final ZooCache masterLockCache = new ZooCache();
 +
 +  private final TabletServerLogger logger;
 +
 +  private final TabletServerMinCMetrics mincMetrics = new TabletServerMinCMetrics();
 +  public TabletServerMinCMetrics getMinCMetrics() {
 +    return mincMetrics;
 +  }
 +
 +  private final ServerConfiguration serverConfig;
 +  private final LogSorter logSorter;
 +  private final TabletStatsKeeper statsKeeper;
 +  private final AtomicInteger logIdGenerator = new AtomicInteger();
    
 -  private TabletServerLogger logger;
 +  private final VolumeManager fs;
 +  public Instance getInstance() {
 +    return serverConfig.getInstance();
 +  }
 +
 +  private final SortedMap<KeyExtent,Tablet> onlineTablets = Collections.synchronizedSortedMap(new TreeMap<KeyExtent,Tablet>());
 +  private final SortedSet<KeyExtent> unopenedTablets = Collections.synchronizedSortedSet(new TreeSet<KeyExtent>());
 +  private final SortedSet<KeyExtent> openingTablets = Collections.synchronizedSortedSet(new TreeSet<KeyExtent>());
 +  @SuppressWarnings("unchecked")
 +  private final Map<KeyExtent,Long> recentlyUnloadedCache = Collections.synchronizedMap(new LRUMap(1000));
  
 -  protected TabletServerMinCMetrics mincMetrics = new TabletServerMinCMetrics();
 +  private final TabletServerResourceManager resourceManager;
 +  private final SecurityOperation security;
  
 -  private ServerConfiguration serverConfig;
 -  private LogSorter logSorter = null;
 +  private final BlockingDeque<MasterMessage> masterMessages = new LinkedBlockingDeque<MasterMessage>();
 +
 +  private Thread majorCompactorThread;
 +
 +  private HostAndPort clientAddress;
 +
 +  private volatile boolean serverStopRequested = false;
 +  private volatile boolean majorCompactorDisabled = false;
 +  private volatile boolean shutdownComplete = false;
- 
++  
 +  private ZooLock tabletServerLock;
 +
 +  private TServer server;
 +
 +  private DistributedWorkQueue bulkFailedCopyQ;
 +
 +  private String lockID;
 +
 +  private static ObjectName OBJECT_NAME = null;
 +
 +  public static final AtomicLong seekCount = new AtomicLong(0);
 +  
 +  private final AtomicLong totalMinorCompactions = new AtomicLong(0);
  
    public TabletServer(ServerConfiguration conf, VolumeManager fs) {
      super();
@@@ -3281,7 -3539,7 +3282,7 @@@
        }
      };
  
-     SimpleTimer.getInstance(aconf).schedule(gcDebugTask, 0, 1000);
 -    SimpleTimer.getInstance().schedule(gcDebugTask, 0, 1000);
++    SimpleTimer.getInstance(aconf).schedule(gcDebugTask, 0, TIME_BETWEEN_GC_CHECKS);
  
      Runnable constraintTask = new Runnable() {