浏览代码

allow grace period for node not found in db before reaping

tags/v0.17.0
Jonathan Cobb 4 年前
父节点
当前提交
e9a200a95f
共有 1 个文件被更改,包括 23 次插入14 次删除
  1. +23
    -14
      bubble-server/src/main/java/bubble/cloud/compute/NodeReaper.java

+ 23
- 14
bubble-server/src/main/java/bubble/cloud/compute/NodeReaper.java 查看文件

@@ -12,13 +12,13 @@ import bubble.server.BubbleConfiguration;
import bubble.service.cloud.NetworkService;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.cobbzilla.util.collection.ExpirationMap;
import org.cobbzilla.util.daemon.SimpleDaemon;
import org.cobbzilla.util.network.NetworkUtil;
import org.cobbzilla.util.string.StringUtil;
import org.cobbzilla.util.time.TimeUtil;
import org.springframework.beans.factory.annotation.Autowired;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

@@ -33,6 +33,7 @@ public class NodeReaper extends SimpleDaemon {

private static final long STARTUP_DELAY = MINUTES.toMillis(30);
private static final long KILL_CHECK_INTERVAL = MINUTES.toMillis(30);
private static final long MAX_TIME_NOT_IN_DB_BEFORE_DELETION = MINUTES.toMillis(45);
private static final long MAX_DOWNTIME_BEFORE_DELETION = DAYS.toMillis(2);

private final ComputeServiceDriverBase compute;
@@ -48,7 +49,8 @@ public class NodeReaper extends SimpleDaemon {
@Autowired private BubbleConfiguration configuration;
@Autowired private NetworkService networkService;

private final Map<String, Long> unreachableSince = new HashMap<>(100);
private final Map<String, Long> noNodeInDb = new ExpirationMap<>(100, MAX_TIME_NOT_IN_DB_BEFORE_DELETION*2);
private final Map<String, Long> unreachableSince = new ExpirationMap<>(100, MAX_DOWNTIME_BEFORE_DELETION*2);

private String prefix() { return compute.getClass().getSimpleName()+": "; }

@@ -68,20 +70,27 @@ public class NodeReaper extends SimpleDaemon {
if (wouldKillSelf(node)) return;
final var nodeFromDB = nodeDAO.findByIp4(node.getIp4());
if (nodeFromDB == null) {
final String message = prefix() + "processNode: no node exists with ip4=" + node.getIp4() + ", killing it";
log.warn(message);
reportError(message);
final var domain = domainDAO.findByUuid(node.getDomain());
final var dns = domain != null ? cloudDAO.findByUuid(domain.getPublicDns()) : null;
try {
if (dns != null) dns.getDnsDriver(configuration).deleteNode(node);
compute.stop(node);
} catch (Exception e) {
final String errMessage = prefix() + "processNode: error stopping node " + node.getIp4();
reportError(errMessage, e);
log.error(errMessage, e);
final Long notInDbSince = noNodeInDb.get(node.getIp4());
if (notInDbSince == null) {
noNodeInDb.put(node.getIp4(), now());

} else if (now() - notInDbSince > MAX_TIME_NOT_IN_DB_BEFORE_DELETION) {
final String message = prefix() + "processNode: no node exists with ip4=" + node.getIp4() + ", killing it";
log.warn(message);
reportError(message);
final var domain = domainDAO.findByUuid(node.getDomain());
final var dns = domain != null ? cloudDAO.findByUuid(domain.getPublicDns()) : null;
try {
if (dns != null) dns.getDnsDriver(configuration).deleteNode(node);
compute.stop(node);
} catch (Exception e) {
final String errMessage = prefix() + "processNode: error stopping node " + node.getIp4();
reportError(errMessage, e);
log.error(errMessage, e);
}
}
} else {
noNodeInDb.remove(nodeFromDB.getIp4());
if (networkService.isReachable(nodeFromDB)) {
unreachableSince.remove(nodeFromDB.getUuid());
} else {


正在加载...
取消
保存