Преглед изворни кода

improve stop network handling

tags/v0.12.9
Jonathan Cobb пре 4 година
родитељ
комит
a50b3ec2a1
12 измењених фајлова са 114 додато и 67 уклоњено
  1. +1
    -1
      bubble-server/src/main/java/bubble/dao/bill/AccountPlanDAO.java
  2. +3
    -3
      bubble-server/src/main/java/bubble/resources/account/AccountsResource.java
  3. +3
    -3
      bubble-server/src/main/java/bubble/resources/account/MeResource.java
  4. +4
    -2
      bubble-server/src/main/java/bubble/resources/cloud/NetworkActionsResource.java
  5. +1
    -1
      bubble-server/src/main/java/bubble/server/BubbleConfiguration.java
  6. +70
    -4
      bubble-server/src/main/java/bubble/service/cloud/NodeLaunchMonitor.java
  7. +1
    -1
      bubble-server/src/main/java/bubble/service/cloud/NodeLauncher.java
  8. +19
    -2
      bubble-server/src/main/java/bubble/service/cloud/NodeProgressMeter.java
  9. +1
    -0
      bubble-server/src/main/java/bubble/service/cloud/NodeProgressMeterConstants.java
  10. +9
    -49
      bubble-server/src/main/java/bubble/service/cloud/StandardNetworkService.java
  11. +1
    -0
      bubble-server/src/main/resources/message_templates/en_US/server/post_auth/ResourceMessages.properties
  12. +1
    -1
      bubble-web

+ 1
- 1
bubble-server/src/main/java/bubble/dao/bill/AccountPlanDAO.java Прегледај датотеку

@@ -94,7 +94,7 @@ public class AccountPlanDAO extends AccountOwnedEntityDAO<AccountPlan> {

public boolean isNotDeleted(String networkUuid) {
final AccountPlan accountPlan = findByNetwork(networkUuid);
return accountPlan != null && accountPlan.notDeleted() && accountPlan.notDeleted();
return accountPlan != null && accountPlan.notDeleting() && accountPlan.notDeleted();
}

@Override public Object preCreate(AccountPlan accountPlan) {


+ 3
- 3
bubble-server/src/main/java/bubble/resources/account/AccountsResource.java Прегледај датотеку

@@ -27,7 +27,7 @@ import bubble.service.account.MitmControlService;
import bubble.service.account.StandardAuthenticatorService;
import bubble.service.account.download.AccountDownloadService;
import bubble.service.boot.SelfNodeService;
import bubble.service.cloud.StandardNetworkService;
import bubble.service.cloud.NodeLaunchMonitor;
import lombok.extern.slf4j.Slf4j;
import org.cobbzilla.wizard.auth.ChangePasswordRequest;
import org.cobbzilla.wizard.model.HashedPassword;
@@ -168,14 +168,14 @@ public class AccountsResource {
return ok(accountDAO.update(c.account));
}

@Autowired private StandardNetworkService networkService;
@Autowired private NodeLaunchMonitor launchMonitor;

@GET @Path("/{id}"+EP_STATUS)
public Response listLaunchStatuses(@Context Request req,
@Context ContainerRequest ctx,
@PathParam("id") String id) {
final AccountContext c = new AccountContext(ctx, id);
return ok(networkService.listLaunchStatuses(c.account.getUuid()));
return ok(launchMonitor.listLaunchStatuses(c.account.getUuid()));
}

@Path("/{id}"+EP_PROMOTIONS)


+ 3
- 3
bubble-server/src/main/java/bubble/resources/account/MeResource.java Прегледај датотеку

@@ -27,7 +27,7 @@ import bubble.service.account.StandardAccountMessageService;
import bubble.service.account.StandardAuthenticatorService;
import bubble.service.account.download.AccountDownloadService;
import bubble.service.boot.BubbleModelSetupService;
import bubble.service.cloud.StandardNetworkService;
import bubble.service.cloud.NodeLaunchMonitor;
import com.fasterxml.jackson.databind.JsonNode;
import lombok.Cleanup;
import lombok.Getter;
@@ -369,13 +369,13 @@ public class MeResource {
return configuration.subResource(ReferralCodesResource.class, caller);
}

@Autowired private StandardNetworkService networkService;
@Autowired private NodeLaunchMonitor launchMonitor;

@GET @Path(EP_STATUS)
public Response listLaunchStatuses(@Context Request req,
@Context ContainerRequest ctx) {
final Account caller = userPrincipal(ctx);
return ok(networkService.listLaunchStatuses(caller.getUuid()));
return ok(launchMonitor.listLaunchStatuses(caller.getUuid()));
}

@Path(EP_PROMOTIONS)


+ 4
- 2
bubble-server/src/main/java/bubble/resources/cloud/NetworkActionsResource.java Прегледај датотеку

@@ -21,6 +21,7 @@ import bubble.model.cloud.*;
import bubble.server.BubbleConfiguration;
import bubble.service.account.StandardAuthenticatorService;
import bubble.service.backup.NetworkKeysService;
import bubble.service.cloud.NodeLaunchMonitor;
import bubble.service.cloud.NodeProgressMeterTick;
import bubble.service.cloud.StandardNetworkService;
import lombok.extern.slf4j.Slf4j;
@@ -49,6 +50,7 @@ public class NetworkActionsResource {

@Autowired private BubbleNodeDAO nodeDAO;
@Autowired private StandardNetworkService networkService;
@Autowired private NodeLaunchMonitor launchMonitor;
@Autowired private AccountMessageDAO messageDAO;
@Autowired private AccountPolicyDAO policyDAO;
@Autowired private NetworkKeysService keysService;
@@ -94,7 +96,7 @@ public class NetworkActionsResource {
@Context ContainerRequest ctx) {
final Account caller = userPrincipal(ctx);
final String account = caller.admin() ? network.getAccount() : caller.getUuid();
return ok(networkService.listLaunchStatuses(account, network.getUuid()));
return ok(launchMonitor.listLaunchStatuses(account, network.getUuid()));
}

@GET @Path(EP_STATUS+"/{uuid}")
@@ -103,7 +105,7 @@ public class NetworkActionsResource {
@PathParam("uuid") String uuid) {
final Account caller = userPrincipal(ctx);
final String account = caller.admin() ? network.getAccount() : caller.getUuid();
final NodeProgressMeterTick tick = networkService.getLaunchStatus(account, uuid);
final NodeProgressMeterTick tick = launchMonitor.getLaunchStatus(account, uuid);
return tick == null ? notFound(uuid) : ok(tick);
}



+ 1
- 1
bubble-server/src/main/java/bubble/server/BubbleConfiguration.java Прегледај датотеку

@@ -103,7 +103,7 @@ public class BubbleConfiguration extends PgRestServerConfiguration
}

@Getter @Setter private Boolean backupsEnabled = true;
public boolean backupsEnabled() { return !isSelfSage() && (backupsEnabled == null || backupsEnabled); }
public boolean backupsEnabled() { return (hasSageNode() || isSelfSage()) && (backupsEnabled == null || backupsEnabled); }

@Override public void registerConfigHandlerbarsHelpers(Handlebars handlebars) { registerUtilityHelpers(handlebars); }



+ 70
- 4
bubble-server/src/main/java/bubble/service/cloud/NodeLaunchMonitor.java Прегледај датотеку

@@ -6,22 +6,32 @@ package bubble.service.cloud;

import bubble.model.cloud.AnsibleInstallType;
import bubble.model.cloud.BubbleNetwork;
import bubble.notify.NewNodeNotification;
import bubble.server.BubbleConfiguration;
import lombok.Getter;
import lombok.NonNull;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.cobbzilla.util.collection.ExpirationEvictionPolicy;
import org.cobbzilla.util.collection.ExpirationMap;
import org.cobbzilla.util.daemon.SimpleDaemon;
import org.cobbzilla.wizard.cache.redis.RedisService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import static bubble.service.cloud.NodeProgressMeter.getProgressMeterKey;
import static bubble.service.cloud.NodeProgressMeter.getProgressMeterPrefix;
import static java.util.concurrent.TimeUnit.HOURS;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.cobbzilla.util.daemon.ZillaRuntime.*;
import static org.cobbzilla.util.json.JsonUtil.json;

@Service @Slf4j
public class NodeLaunchMonitor extends SimpleDaemon {
@@ -31,21 +41,29 @@ public class NodeLaunchMonitor extends SimpleDaemon {

@Getter private final long sleepTime = SECONDS.toMillis(15);

@Override public void processException(Exception e) { log.warn("processException: "+shortError(e)); }

@Autowired private BubbleConfiguration configuration;
@Autowired private RedisService redis;
@Autowired private StandardNetworkService networkService;

@Getter(lazy=true) private final RedisService networkSetupStatus = redis.prefixNamespace(getClass().getSimpleName()+"_status_");

private final Map<String, LauncherEntry> launcherThreads = new ConcurrentHashMap<>();
private final Map<String, String> canceledNetworks = new ExpirationMap<>(50, HOURS.toMillis(2));

public void register(String networkUuid, Thread t) {
public void register(String nnUuid, String networkUuid, Thread t) {
startIfNotRunning();
final LauncherEntry previousLaunch = launcherThreads.get(networkUuid);
if (previousLaunch != null && previousLaunch.isAlive()) {
log.warn("registerLauncher("+networkUuid+"): entry thread exists, stopping it: "+previousLaunch);
forceEndLauncher(previousLaunch);
}
launcherThreads.put(networkUuid, new LauncherEntry(networkUuid, t));
launcherThreads.put(networkUuid, new LauncherEntry(nnUuid, networkUuid, t));
}

public void cancel(String networkUuid) {
canceledNetworks.put(networkUuid, networkUuid);
final LauncherEntry previousLaunch = launcherThreads.get(networkUuid);
if (previousLaunch == null || !previousLaunch.isAlive()) {
log.warn("cancel("+networkUuid+"): entry does not thread exist, or is not alive: "+previousLaunch);
@@ -102,21 +120,69 @@ public class NodeLaunchMonitor extends SimpleDaemon {
}

private void forceEndLauncher(LauncherEntry entry) {
final NodeProgressMeter meter = getProgressMeter(entry.getNnUuid());
if (meter != null) meter.cancel();
terminate(entry.getThread(), LAUNCH_TERMINATE_TIMEOUT);
launcherThreads.remove(entry.getNetworkUuid());
}

@Override public void processException(Exception e) { log.warn("processException: "+shortError(e)); }
private final Map<String, NodeProgressMeter> progressMeters = new ExpirationMap<>(50, HOURS.toMillis(1), ExpirationEvictionPolicy.atime);

public NodeProgressMeter getProgressMeter(String nnId) { return progressMeters.get(nnId); }

public NodeProgressMeter getProgressMeter(@NonNull NewNodeNotification nn) {
return progressMeters.computeIfAbsent(nn.getUuid(), k -> new NodeProgressMeter(nn, getNetworkSetupStatus(), networkService, this));
}

public NodeProgressMeterTick getLaunchStatus(String accountUuid, String uuid) {
final String json = getNetworkSetupStatus().get(getProgressMeterKey(uuid, accountUuid));
if (json == null) return null;
try {
final NodeProgressMeterTick tick = json(json, NodeProgressMeterTick.class);
if (!tick.hasAccount() || !tick.getAccount().equals(accountUuid)) {
log.warn("getLaunchStatus: tick.account != accountUuid, returning null");
return null;
}
return tick.setPattern(null);
} catch (Exception e) {
return die("getLaunchStatus: "+e);
}
}

public List<NodeProgressMeterTick> listLaunchStatuses(String accountUuid) {
return listLaunchStatuses(accountUuid, null);
}

public List<NodeProgressMeterTick> listLaunchStatuses(String accountUuid, String networkUuid) {
final RedisService stats = getNetworkSetupStatus();
final List<NodeProgressMeterTick> ticks = new ArrayList<>();
for (String key : stats.keys(getProgressMeterPrefix(accountUuid)+"*")) {
final String json = stats.get_withPrefix(key);
if (json != null) {
try {
final NodeProgressMeterTick tick = json(json, NodeProgressMeterTick.class).setPattern(null);
if (networkUuid != null && tick.hasNetwork() && networkUuid.equals(tick.getNetwork())) {
ticks.add(tick);
}
} catch (Exception e) {
log.warn("currentTicks (bad json?): "+e);
}
}
}
return ticks;
}

@ToString
private static class LauncherEntry {

@Getter private final String nnUuid;
@Getter private final String networkUuid;
@Getter private final Thread thread;
@Getter private final long ctime;
@Getter private volatile long mtime;

public LauncherEntry(String networkUuid, Thread thread) {
public LauncherEntry(String nnUuid, String networkUuid, Thread thread) {
this.nnUuid = nnUuid;
this.networkUuid = networkUuid;
this.thread = thread;
this.ctime = now();


+ 1
- 1
bubble-server/src/main/java/bubble/service/cloud/NodeLauncher.java Прегледај датотеку

@@ -47,7 +47,7 @@ public class NodeLauncher implements Runnable {
final Thread launchThread = new Thread(new NodeLaunchThread(nodeRef, exceptionRef, networkService, newNodeRequest, launchMonitor));
launchThread.setDaemon(true);
launchThread.setName("NodeLaunchThread(network="+networkUuid+")");
launchMonitor.register(networkUuid, launchThread);
launchMonitor.register(newNodeRequest.getUuid(), networkUuid, launchThread);

log.info("NodeLauncher.run: launching node..."+newNodeRequest.getFqdn());
launchThread.start();


+ 19
- 2
bubble-server/src/main/java/bubble/service/cloud/NodeProgressMeter.java Прегледај датотеку

@@ -67,7 +67,7 @@ public class NodeProgressMeter extends PipedOutputStream implements Runnable {
public NodeProgressMeter(NewNodeNotification nn,
RedisService redis,
StandardNetworkService networkService,
NodeLaunchMonitor launchMonitor) throws IOException {
NodeLaunchMonitor launchMonitor) {

this.nn = nn;
this.redis = redis;
@@ -83,7 +83,11 @@ public class NodeProgressMeter extends PipedOutputStream implements Runnable {
key = nn.getUuid();

final PipedInputStream pipeIn = new PipedInputStream(PIPE_SIZE);
connect(pipeIn);
try {
connect(pipeIn);
} catch (IOException e) {
die("NodeProgressMeter: error connecting pipe: "+shortError(e));
}

reader = new BufferedReader(new InputStreamReader(pipeIn));
writer = new BufferedWriter(new OutputStreamWriter(this));
@@ -199,6 +203,18 @@ public class NodeProgressMeter extends PipedOutputStream implements Runnable {
return new UncloseableNodeProgressMeter(this);
}

public void cancel () {
log.info("cancel: cancelling progress meter for network: "+nn.getNetworkName());
closed.set(true);
success.set(true);
_setCurrentTick(new NodeProgressMeterTick()
.setNetwork(nn.getNetwork())
.setAccount(nn.getAccount())
.setMessageKey(METER_CANCELED)
.setPercent(0));
background(this::close);
}

private class UncloseableNodeProgressMeter extends NodeProgressMeter {
private final NodeProgressMeter meter;
public UncloseableNodeProgressMeter(NodeProgressMeter meter) throws IOException {
@@ -206,5 +222,6 @@ public class NodeProgressMeter extends PipedOutputStream implements Runnable {
this.meter = meter;
}
@Override public void close() {}
@Override public void cancel() { meter.cancel(); }
}
}

+ 1
- 0
bubble-server/src/main/java/bubble/service/cloud/NodeProgressMeterConstants.java Прегледај датотеку

@@ -68,6 +68,7 @@ public class NodeProgressMeterConstants {
public static final String METER_ERROR_ROLE_VALIDATION_ERRORS = "BUBBLE-ERROR: ROLE VALIDATION FAILED";

public static final String METER_COMPLETED = "meter_completed";
public static final String METER_CANCELED = "meter_canceled";
public static final String METER_START_OR_DNS_ERROR = "meter_start_or_dns_error";
public static final String METER_UNKNOWN_ERROR = "meter_unknown_error";



+ 9
- 49
bubble-server/src/main/java/bubble/service/cloud/StandardNetworkService.java Прегледај датотеку

@@ -76,10 +76,9 @@ import static bubble.server.BubbleConfiguration.ENV_DEBUG_NODE_INSTALL;
import static bubble.service.boot.StandardSelfNodeService.*;
import static bubble.service.cloud.NodeLaunchException.fatalLaunchFailure;
import static bubble.service.cloud.NodeLaunchException.launchFailureCanRetry;
import static bubble.service.cloud.NodeProgressMeter.getProgressMeterKey;
import static bubble.service.cloud.NodeProgressMeter.getProgressMeterPrefix;
import static bubble.service.cloud.NodeProgressMeterConstants.*;
import static java.util.concurrent.TimeUnit.*;
import static java.util.concurrent.TimeUnit.MINUTES;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.apache.commons.lang3.RandomStringUtils.randomAlphanumeric;
import static org.cobbzilla.util.daemon.Await.awaitAll;
import static org.cobbzilla.util.daemon.ZillaRuntime.*;
@@ -136,9 +135,9 @@ public class StandardNetworkService implements NetworkService {

@Autowired private RedisService redisService;
@Getter(lazy=true) private final RedisService networkLocks = redisService.prefixNamespace(getClass().getSimpleName()+"_lock_");
@Getter(lazy=true) private final RedisService networkSetupStatus = redisService.prefixNamespace(getClass().getSimpleName()+"_status_");

@NonNull public BubbleNode newNode(@NonNull final NewNodeNotification nn, NodeLaunchMonitor launchMonitor) {
@NonNull public BubbleNode newNode(@NonNull final NewNodeNotification nn,
NodeLaunchMonitor launchMonitor) {
final long start = now();
log.info("newNode starting:\n"+json(nn));
ComputeServiceDriver computeDriver = null;
@@ -148,7 +147,7 @@ public class StandardNetworkService implements NetworkService {
final BubbleNetwork network = nn.getNetworkObject();
final ExecutorService backgroundJobs = DaemonThreadFactory.fixedPool(3);
try {
progressMeter = new NodeProgressMeter(nn, getNetworkSetupStatus(), this, launchMonitor);
progressMeter = launchMonitor.getProgressMeter(nn);
progressMeter.write(METER_TICK_CONFIRMING_NETWORK_LOCK);

if (!confirmLock(nn.getNetwork(), lock)) {
@@ -536,7 +535,7 @@ public class StandardNetworkService implements NetworkService {
log.info(prefix+"starting");
final NotificationReceipt receipt = notificationService.notify(node, NotificationType.health_check, null);
if (receipt == null) {
log.info(prefix+" health_check failed, checking via cloud");
log.info(prefix+"health_check failed, checking via cloud");
final CloudService cloud = cloudDAO.findByUuid(node.getCloud());
if (cloud == null) {
log.warn(prefix+"cloud not found: "+node.getCloud());
@@ -705,6 +704,8 @@ public class StandardNetworkService implements NetworkService {
return true;
}

networkDAO.update(network.setState(BubbleNetworkState.stopping));

background(() -> {
String lock = null;
final String networkUuid = network.getUuid();
@@ -726,9 +727,6 @@ public class StandardNetworkService implements NetworkService {
}
}

network.setState(BubbleNetworkState.stopping);
networkDAO.update(network);

final ValidationResult validationResult = new ValidationResult();

// todo: parallel shutdown?
@@ -755,7 +753,7 @@ public class StandardNetworkService implements NetworkService {

} catch (RuntimeException e) {
log.error("stopNetwork: error stopping: "+e);
if (network != null) network.setState(BubbleNetworkState.error_stopping);
network.setState(BubbleNetworkState.error_stopping);
networkDAO.update(network);
throw e;

@@ -786,44 +784,6 @@ public class StandardNetworkService implements NetworkService {
return cloud;
}

public NodeProgressMeterTick getLaunchStatus(String accountUuid, String uuid) {
final String json = getNetworkSetupStatus().get(getProgressMeterKey(uuid, accountUuid));
if (json == null) return null;
try {
final NodeProgressMeterTick tick = json(json, NodeProgressMeterTick.class);
if (!tick.hasAccount() || !tick.getAccount().equals(accountUuid)) {
log.warn("getLaunchStatus: tick.account != accountUuid, returning null");
return null;
}
return tick.setPattern(null);
} catch (Exception e) {
return die("getLaunchStatus: "+e);
}
}

public List<NodeProgressMeterTick> listLaunchStatuses(String accountUuid) {
return listLaunchStatuses(accountUuid, null);
}

public List<NodeProgressMeterTick> listLaunchStatuses(String accountUuid, String networkUuid) {
final RedisService stats = getNetworkSetupStatus();
final List<NodeProgressMeterTick> ticks = new ArrayList<>();
for (String key : stats.keys(getProgressMeterPrefix(accountUuid)+"*")) {
final String json = stats.get_withPrefix(key);
if (json != null) {
try {
final NodeProgressMeterTick tick = json(json, NodeProgressMeterTick.class).setPattern(null);
if (networkUuid != null && tick.hasNetwork() && networkUuid.equals(tick.getNetwork())) {
ticks.add(tick);
}
} catch (Exception e) {
log.warn("currentTicks (bad json?): "+e);
}
}
}
return ticks;
}

private static class NodeLaunchAwait implements Runnable {

private final NodeProgressMeter progressMeter;


+ 1
- 0
bubble-server/src/main/resources/message_templates/en_US/server/post_auth/ResourceMessages.properties Прегледај датотеку

@@ -399,6 +399,7 @@ meter_tick_ready_check2=And what a lovely pie it is...
#meter_tick_ssh_keys=Setting up SSH keys

# Launch progress meter: success marker
meter_canceled=Bubble installation was canceled
meter_completed=Bubble installation completed successfully! On to your Bubble!

# Launch progress meter: errors


+ 1
- 1
bubble-web

@@ -1 +1 @@
Subproject commit 5ece6028d57ecf5b970d249e8ed35b70f32b23fb
Subproject commit 1ba5ed4572441a5ec253f455e4f00eea111bcacc

Loading…
Откажи
Сачувај