# Conflicts: # bubble-server/src/main/java/bubble/server/BubbleConfiguration.java # bubble-webpull/26/head
@@ -197,6 +197,7 @@ public class ApiConstants { | |||||
public static final String EP_PROMOTIONS = PROMOTIONS_ENDPOINT; | public static final String EP_PROMOTIONS = PROMOTIONS_ENDPOINT; | ||||
public static final String EP_FORK = "/fork"; | public static final String EP_FORK = "/fork"; | ||||
public static final String EP_NODE_MANAGER = "/nodeman"; | public static final String EP_NODE_MANAGER = "/nodeman"; | ||||
public static final String EP_UPGRADE = "/upgrade"; | |||||
public static final String DETECT_ENDPOINT = "/detect"; | public static final String DETECT_ENDPOINT = "/detect"; | ||||
public static final String EP_LOCALE = "/locale"; | public static final String EP_LOCALE = "/locale"; | ||||
@@ -33,6 +33,7 @@ import static org.cobbzilla.util.daemon.ZillaRuntime.*; | |||||
import static org.cobbzilla.util.http.HttpMethods.POST; | import static org.cobbzilla.util.http.HttpMethods.POST; | ||||
import static org.cobbzilla.util.http.HttpStatusCodes.*; | import static org.cobbzilla.util.http.HttpStatusCodes.*; | ||||
import static org.cobbzilla.util.http.HttpUtil.getResponse; | import static org.cobbzilla.util.http.HttpUtil.getResponse; | ||||
import static org.cobbzilla.util.json.JsonUtil.COMPACT_MAPPER; | |||||
import static org.cobbzilla.util.json.JsonUtil.json; | import static org.cobbzilla.util.json.JsonUtil.json; | ||||
import static org.cobbzilla.util.system.Sleep.sleep; | import static org.cobbzilla.util.system.Sleep.sleep; | ||||
import static org.cobbzilla.wizard.resources.ResourceUtil.invalidEx; | import static org.cobbzilla.wizard.resources.ResourceUtil.invalidEx; | ||||
@@ -54,6 +55,16 @@ public class VultrDriver extends ComputeServiceDriverBase { | |||||
public static final String VULTR_SUBID = "SUBID"; | public static final String VULTR_SUBID = "SUBID"; | ||||
public static final String VULTR_V4_IP = "main_ip"; | public static final String VULTR_V4_IP = "main_ip"; | ||||
public static final String VULTR_V6_IP = "v6_main_ip"; | public static final String VULTR_V6_IP = "v6_main_ip"; | ||||
public static final String VULTR_LABEL = "label"; | |||||
public static final String VULTR_STATUS = "status"; | |||||
public static final String VULTR_STATUS_PENDING = "pending"; | |||||
public static final String VULTR_STATUS_ACTIVE = "active"; | |||||
public static final String VULTR_SERVER_STATE = "server_state"; | |||||
public static final String VULTR_STATE_NONE = "none"; | |||||
public static final String VULTR_STATE_OK = "ok"; | |||||
public static final String VULTR_STATE_LOCKED = "locked"; | |||||
public static final String CREATE_SERVER_URL = VULTR_API_BASE + "server/create"; | public static final String CREATE_SERVER_URL = VULTR_API_BASE + "server/create"; | ||||
public static final String DESTROY_SERVER_URL = VULTR_API_BASE + "server/destroy"; | public static final String DESTROY_SERVER_URL = VULTR_API_BASE + "server/destroy"; | ||||
@@ -129,6 +140,7 @@ public class VultrDriver extends ComputeServiceDriverBase { | |||||
final HttpRequestBean serverRequest = auth(new HttpRequestBean(POST, CREATE_SERVER_URL, data)); | final HttpRequestBean serverRequest = auth(new HttpRequestBean(POST, CREATE_SERVER_URL, data)); | ||||
// create server, check response | // create server, check response | ||||
if (log.isInfoEnabled()) log.info("start: calling Vultr to start node: "+node.id()); | |||||
final HttpResponseBean serverResponse = serverRequest.curl(); // fixme: we can do better than shelling to curl | final HttpResponseBean serverResponse = serverRequest.curl(); // fixme: we can do better than shelling to curl | ||||
if (serverResponse.getStatus() != 200) return die("start: error creating server: " + serverResponse); | if (serverResponse.getStatus() != 200) return die("start: error creating server: " + serverResponse); | ||||
final JsonNode responseJson; | final JsonNode responseJson; | ||||
@@ -138,10 +150,11 @@ public class VultrDriver extends ComputeServiceDriverBase { | |||||
return die("start: error creating server (error parsing response as JSON): " + serverResponse); | return die("start: error creating server (error parsing response as JSON): " + serverResponse); | ||||
} | } | ||||
final var subId = responseJson.get(VULTR_SUBID).textValue(); | final var subId = responseJson.get(VULTR_SUBID).textValue(); | ||||
if (log.isDebugEnabled()) log.debug("start: Vultr started node: "+node.id()+" SUBID="+subId); | |||||
node.setState(BubbleNodeState.booting); | node.setState(BubbleNodeState.booting); | ||||
node.setTag(TAG_INSTANCE_ID, subId); | node.setTag(TAG_INSTANCE_ID, subId); | ||||
nodeDAO.update(node); | |||||
// nodeDAO.update(node); | |||||
final long start = now(); | final long start = now(); | ||||
boolean startedOk = false; | boolean startedOk = false; | ||||
@@ -151,37 +164,37 @@ public class VultrDriver extends ComputeServiceDriverBase { | |||||
sleep(SERVER_START_POLL_INTERVAL); | sleep(SERVER_START_POLL_INTERVAL); | ||||
final HttpResponseBean pollResponse = getResponse(poll); | final HttpResponseBean pollResponse = getResponse(poll); | ||||
if (pollResponse.getStatus() != OK) { | if (pollResponse.getStatus() != OK) { | ||||
return die("start: error polling subid: "+subId+": "+pollResponse); | |||||
return die("start: error polling node "+node.id()+" subid: "+subId+": "+pollResponse); | |||||
} | } | ||||
// todo: add timeout, if server doesn't come up within X minutes, try to kill it and report an error | |||||
final JsonNode serverNode = json(pollResponse.getEntityString(), JsonNode.class); | final JsonNode serverNode = json(pollResponse.getEntityString(), JsonNode.class); | ||||
if (log.isDebugEnabled()) log.debug("start: polled node "+node.id()+" json="+json(serverNode, COMPACT_MAPPER)); | |||||
if (serverNode != null) { | if (serverNode != null) { | ||||
if (serverNode.has("tag") | if (serverNode.has("tag") | ||||
&& serverNode.get("tag").textValue().equals(cloud.getUuid()) | && serverNode.get("tag").textValue().equals(cloud.getUuid()) | ||||
&& serverNode.has("status") | |||||
&& serverNode.has("server_state") | |||||
&& serverNode.has(VULTR_STATUS) | |||||
&& serverNode.has(VULTR_SERVER_STATE) | |||||
&& serverNode.has(VULTR_V4_IP)) { | && serverNode.has(VULTR_V4_IP)) { | ||||
final String status = serverNode.get("status").textValue(); | |||||
final String serverState = serverNode.get("server_state").textValue(); | |||||
final String status = serverNode.get(VULTR_STATUS).textValue(); | |||||
final String serverState = serverNode.get(VULTR_SERVER_STATE).textValue(); | |||||
final String ip4 = serverNode.get(VULTR_V4_IP).textValue(); | final String ip4 = serverNode.get(VULTR_V4_IP).textValue(); | ||||
final String ip6 = serverNode.get(VULTR_V6_IP).textValue(); | final String ip6 = serverNode.get(VULTR_V6_IP).textValue(); | ||||
// log.info("start: server_state="+serverState+", status="+status, "ip4="+ip4+", ip6="+ip6); | |||||
// if (log.isInfoEnabled()) log.info("start: server_state="+serverState+", status="+status, "ip4="+ip4+", ip6="+ip6); | |||||
if (ip4 != null && ip4.length() > 0 && !ip4.equals("0.0.0.0")) { | if (ip4 != null && ip4.length() > 0 && !ip4.equals("0.0.0.0")) { | ||||
node.setIp4(ip4); | node.setIp4(ip4); | ||||
nodeDAO.update(node); | |||||
// nodeDAO.update(node); | |||||
} | } | ||||
if (ip6 != null && ip6.length() > 0) { | if (ip6 != null && ip6.length() > 0) { | ||||
node.setIp6(ip6); | node.setIp6(ip6); | ||||
nodeDAO.update(node); | |||||
// nodeDAO.update(node); | |||||
} | } | ||||
if (status.equals("active") && (node.hasIp4() || node.hasIp6())) { | |||||
if (status.equals(VULTR_STATUS_ACTIVE) && (node.hasIp4() || node.hasIp6())) { | |||||
node.setState(BubbleNodeState.booted); | node.setState(BubbleNodeState.booted); | ||||
nodeDAO.update(node); | |||||
// nodeDAO.update(node); | |||||
} | } | ||||
if (serverState.equals("ok")) { | |||||
log.info("start: server is ready: "+node.id()); | |||||
if (serverState.equals(VULTR_STATE_OK)) { | |||||
if (log.isInfoEnabled()) log.info("start: server is ready: "+node.id()); | |||||
startedOk = true; | startedOk = true; | ||||
break; | break; | ||||
} | } | ||||
@@ -189,7 +202,7 @@ public class VultrDriver extends ComputeServiceDriverBase { | |||||
} | } | ||||
} | } | ||||
if (!startedOk) { | if (!startedOk) { | ||||
log.error("start: timeout waiting for node to boot and become available, stopping it"); | |||||
if (log.isErrorEnabled()) log.error("start: timeout waiting for node "+node.id()+" to boot and become available, stopping it"); | |||||
stop(node); | stop(node); | ||||
} | } | ||||
return node; | return node; | ||||
@@ -210,45 +223,35 @@ public class VultrDriver extends ComputeServiceDriverBase { | |||||
try { | try { | ||||
_stop(node); | _stop(node); | ||||
} catch (EntityNotFoundException e) { | } catch (EntityNotFoundException e) { | ||||
log.info("stop: node stopped"); | |||||
if (log.isInfoEnabled()) log.info("stop: node stopped"); | |||||
return node; | return node; | ||||
} catch (Exception e) { | } catch (Exception e) { | ||||
if (log.isInfoEnabled()) log.info("stop: _stop failed with: "+shortError(e)); | |||||
lastEx = e; | lastEx = e; | ||||
} | } | ||||
sleep(SERVER_STOP_CHECK_INTERVAL, "stop: waiting to try stopping again until node is not found"); | sleep(SERVER_STOP_CHECK_INTERVAL, "stop: waiting to try stopping again until node is not found"); | ||||
log.warn("stop: node still running: "+node.id()); | |||||
if (log.isWarnEnabled()) log.warn("stop: node still running: "+node.id()); | |||||
} | } | ||||
log.error("stop: error stopping node: "+node.id()); | |||||
if (log.isErrorEnabled()) log.error("stop: error stopping node: "+node.id()); | |||||
if (lastEx != null) throw lastEx; | if (lastEx != null) throw lastEx; | ||||
return die("stop: timeout stopping node: "+node.id()); | return die("stop: timeout stopping node: "+node.id()); | ||||
} | } | ||||
public BubbleNode _stop(BubbleNode node) throws IOException { | public BubbleNode _stop(BubbleNode node) throws IOException { | ||||
BubbleNode vultrNode; | |||||
final String ip4 = node.getIp4(); | |||||
if (!node.hasTag(TAG_INSTANCE_ID)) { | |||||
if (ip4 == null) { | |||||
throw notFoundEx(node.id()); | |||||
} | |||||
log.warn("stop: no "+TAG_INSTANCE_ID+" tag found on node ("+node.getFqdn()+"/"+ ip4 +"), searching based in ip4..."); | |||||
vultrNode = findByIp4(node, ip4); | |||||
} else { | |||||
// does the node still exist? | |||||
vultrNode = listNode(node); | |||||
if (vultrNode == null) { | |||||
vultrNode = findByIp4(node, ip4); | |||||
} | |||||
} | |||||
// does the node still exist? | |||||
BubbleNode vultrNode = listNode(node); | |||||
if (vultrNode == null) { | if (vultrNode == null) { | ||||
throw notFoundEx(node.id()); | throw notFoundEx(node.id()); | ||||
} | } | ||||
final String subId = vultrNode.getTag(TAG_INSTANCE_ID); | final String subId = vultrNode.getTag(TAG_INSTANCE_ID); | ||||
if (subId == null) { | if (subId == null) { | ||||
if (log.isErrorEnabled()) log.error("_stop: node "+node.id()+" is missing tag "+TAG_INSTANCE_ID+", cannot stop, throwing invalidEx"); | |||||
throw invalidEx("err.node.stop.error", "stop: no " + VULTR_SUBID + " on node, returning"); | throw invalidEx("err.node.stop.error", "stop: no " + VULTR_SUBID + " on node, returning"); | ||||
} | } | ||||
if (log.isInfoEnabled()) log.info("_stop: calling stopServer("+subId+") for node "+node.id()); | |||||
stopServer(subId); | stopServer(subId); | ||||
return node; | return node; | ||||
} | } | ||||
@@ -267,11 +270,11 @@ public class VultrDriver extends ComputeServiceDriverBase { | |||||
.findFirst() | .findFirst() | ||||
.orElse(null); | .orElse(null); | ||||
if (found == null) { | if (found == null) { | ||||
log.warn("stop: no subid tag found on node ("+node.getFqdn()+"/"+ ip4 +") and no server had this ip4"); | |||||
if (log.isWarnEnabled()) log.warn("stop: no subid tag found on node ("+node.getFqdn()+"/"+ ip4 +") and no server had this ip4"); | |||||
return null; | return null; | ||||
} | } | ||||
if (!found.hasTag(TAG_INSTANCE_ID)) { | if (!found.hasTag(TAG_INSTANCE_ID)) { | ||||
log.warn("stop: no subid tag found on node ("+node.getFqdn()+"/"+ ip4 +"), cannot stop"); | |||||
if (log.isWarnEnabled()) log.warn("stop: no subid tag found on node ("+node.getFqdn()+"/"+ ip4 +"), cannot stop"); | |||||
return null; | return null; | ||||
} | } | ||||
return found; | return found; | ||||
@@ -288,16 +291,19 @@ public class VultrDriver extends ComputeServiceDriverBase { | |||||
final JsonNode subId = jsonNode.get(VULTR_SUBID); | final JsonNode subId = jsonNode.get(VULTR_SUBID); | ||||
final JsonNode ip4 = jsonNode.get(VULTR_V4_IP); | final JsonNode ip4 = jsonNode.get(VULTR_V4_IP); | ||||
final JsonNode ip6 = jsonNode.get(VULTR_V6_IP); | final JsonNode ip6 = jsonNode.get(VULTR_V6_IP); | ||||
return (subId != null && node.hasTag(TAG_INSTANCE_ID) && subId.textValue().equals(node.getTag(TAG_INSTANCE_ID))) | |||||
|| (ip4 != null && node.hasIp4() && ip4.textValue().equals(node.getIp4())) | |||||
|| (ip6 != null && node.hasIp6() && ip6.textValue().equals(node.getIp6())) ? node : null; | |||||
if (log.isTraceEnabled()) log.trace("listNode("+node.id()+") found node: "+json(jsonNode, COMPACT_MAPPER)); | |||||
if (subId != null) node.setTag(TAG_INSTANCE_ID, subId.textValue()); | |||||
return node.setIp4(ip4 == null ? null : ip4.textValue()).setIp6(ip6 == null ? null : ip6.textValue()); | |||||
} catch (Exception e) { | } catch (Exception e) { | ||||
log.error("listNode: error finding node "+node.id()+", status="+listResponse.getStatus()+": "+listResponse+": exception="+shortError(e)); | |||||
if (log.isErrorEnabled()) log.error("listNode: error finding node "+node.id()+", status="+listResponse.getStatus()+": "+listResponse+": exception="+shortError(e)); | |||||
return null; | return null; | ||||
} | } | ||||
case NOT_FOUND: return null; | |||||
case NOT_FOUND: | |||||
if (log.isDebugEnabled()) log.debug("listNode("+node.id()+") returned 404 Not Found"); | |||||
return null; | |||||
default: | default: | ||||
log.error("listNode: error finding node "+node.id()+", status="+listResponse.getStatus()+": "+listResponse); | |||||
if (log.isErrorEnabled()) log.error("listNode: error finding node "+node.id()+", status="+listResponse.getStatus()+": "+listResponse); | |||||
return null; | return null; | ||||
} | } | ||||
} | } | ||||
@@ -320,7 +326,7 @@ public class VultrDriver extends ComputeServiceDriverBase { | |||||
final String subid = iter.next(); | final String subid = iter.next(); | ||||
final ObjectNode server = (ObjectNode) entity.get(subid); | final ObjectNode server = (ObjectNode) entity.get(subid); | ||||
if (!filter.apply(server)) { | if (!filter.apply(server)) { | ||||
log.debug("Skipping node without cloud tag "+cloud.getUuid()+": "+subid); | |||||
if (log.isTraceEnabled()) log.trace("Skipping node without cloud tag "+cloud.getUuid()+": "+subid); | |||||
continue; | continue; | ||||
} | } | ||||
final String subId = server.has(VULTR_SUBID) ? server.get(VULTR_SUBID).textValue() : null; | final String subId = server.has(VULTR_SUBID) ? server.get(VULTR_SUBID).textValue() : null; | ||||
@@ -336,41 +342,53 @@ public class VultrDriver extends ComputeServiceDriverBase { | |||||
} | } | ||||
@Override public BubbleNode status(BubbleNode node) throws Exception { | @Override public BubbleNode status(BubbleNode node) throws Exception { | ||||
if (node.hasTag(TAG_INSTANCE_ID)) { | |||||
final BubbleNode found = listNode(node); | |||||
if (found == null) return node.setState(BubbleNodeState.stopped); | |||||
return node; | |||||
} else if (node.hasIp4()) { | |||||
// find by IPv4 | |||||
final HttpRequestBean listServerRequest = auth(new HttpRequestBean(LIST_SERVERS_URL)); | |||||
final HttpResponseBean listResponse = getResponse(listServerRequest); | |||||
switch (listResponse.getStatus()) { | |||||
case OK: | |||||
final JsonNode entity = json(listResponse.getEntityString(), JsonNode.class); | |||||
for (Iterator<String> iter = entity.fieldNames(); iter.hasNext(); ) { | |||||
final String subid = iter.next(); | |||||
final ObjectNode server = (ObjectNode) entity.get(subid); | |||||
final String ip4 = server.has(VULTR_V4_IP) ? server.get(VULTR_V4_IP).textValue() : ""; | |||||
if (ip4.equals(node.getIp4())) { | |||||
if (server.has("power_status") && server.get("power_status").textValue().equals("running") | |||||
&& server.has("server_state") && server.get("server_state").textValue().equals("ok")) { | |||||
final String ip6 = server.has(VULTR_V6_IP) ? server.get(VULTR_V6_IP).textValue() : null; | |||||
return node.setIp4(ip4).setIp6(ip6).setState(BubbleNodeState.running); | |||||
// find by label | |||||
final HttpRequestBean listServerRequest = auth(new HttpRequestBean(LIST_SERVERS_URL+"?"+VULTR_LABEL+"="+node.getFqdn())); | |||||
final HttpResponseBean listResponse = getResponse(listServerRequest); | |||||
switch (listResponse.getStatus()) { | |||||
case OK: | |||||
final JsonNode entity = json(listResponse.getEntityString(), JsonNode.class); | |||||
for (Iterator<String> iter = entity.fieldNames(); iter.hasNext(); ) { | |||||
final String subid = iter.next(); | |||||
final ObjectNode server = (ObjectNode) entity.get(subid); | |||||
final String label = server.has(VULTR_LABEL) ? server.get(VULTR_LABEL).textValue() : ""; | |||||
if (label.equals(node.getFqdn())) { | |||||
if (log.isDebugEnabled()) log.debug("status("+node.id()+"): found json: "+json(server, COMPACT_MAPPER)); | |||||
if (server.has(VULTR_SERVER_STATE) && server.has(VULTR_STATUS)) { | |||||
final String status = server.get(VULTR_STATUS).textValue(); | |||||
final String serverState = server.get(VULTR_SERVER_STATE).textValue(); | |||||
final String ip4 = server.has(VULTR_V4_IP) ? server.get(VULTR_V4_IP).textValue() : null; | |||||
final String ip6 = server.has(VULTR_V6_IP) ? server.get(VULTR_V6_IP).textValue() : null; | |||||
node.setIp4(ip4).setIp6(ip6); | |||||
if (status.equals(VULTR_STATUS_PENDING) || serverState.equals(VULTR_STATE_NONE)) { | |||||
if (log.isDebugEnabled()) log.debug("status("+node.id()+"): pending/none: returning node status==starting"); | |||||
return node.setState(BubbleNodeState.starting); | |||||
} | |||||
if (status.equals(VULTR_STATUS_ACTIVE)) { | |||||
if (serverState.equals(VULTR_STATE_OK)) { | |||||
if (log.isDebugEnabled()) log.debug("status(" + node.id() + "): active/ok: returning node status==running"); | |||||
return node.setState(BubbleNodeState.running); | |||||
} else if (serverState.equals(VULTR_STATE_LOCKED)) { | |||||
if (log.isDebugEnabled()) log.debug("status(" + node.id() + "): active/locked: returning node status==starting"); | |||||
return node.setState(BubbleNodeState.starting); | |||||
} | |||||
} | } | ||||
if (log.isDebugEnabled()) log.debug("status("+node.id()+"): status/state = "+status+"/"+serverState+": returning node status==unknown_error"); | |||||
return node.setState(BubbleNodeState.unknown_error); | |||||
} | } | ||||
} | } | ||||
case NOT_FOUND: case PRECONDITION_FAILED: | |||||
log.error("status: error response from API, returning unknown"); | |||||
return node.setState(BubbleNodeState.unknown_error); | |||||
} | |||||
log.error("status: error finding node "+node.id()+", status="+listResponse.getStatus()+": "+listResponse); | |||||
return node.setState(BubbleNodeState.unknown_error); | |||||
default: | |||||
log.error("status: error finding node "+node.id()+", status="+listResponse.getStatus()+": "+listResponse); | |||||
return node.setState(BubbleNodeState.unknown_error); | |||||
} | |||||
} else { | |||||
// Node has no IP4 | |||||
return node.setState(BubbleNodeState.unknown_error); | |||||
case NOT_FOUND: case PRECONDITION_FAILED: | |||||
log.error("status: error response from API, returning unknown"); | |||||
return node.setState(BubbleNodeState.unknown_error); | |||||
default: | |||||
log.error("status: error finding node "+node.id()+", status="+listResponse.getStatus()+": "+listResponse); | |||||
return node.setState(BubbleNodeState.unknown_error); | |||||
} | } | ||||
} | } | ||||
@@ -34,7 +34,8 @@ import java.util.Arrays; | |||||
import java.util.Collection; | import java.util.Collection; | ||||
import java.util.List; | import java.util.List; | ||||
import static bubble.ApiConstants.*; | |||||
import static bubble.ApiConstants.EP_NETWORKS; | |||||
import static bubble.ApiConstants.ROOT_NETWORK_UUID; | |||||
import static bubble.model.cloud.BubbleDomain.DOMAIN_NAME_MAXLEN; | import static bubble.model.cloud.BubbleDomain.DOMAIN_NAME_MAXLEN; | ||||
import static bubble.model.cloud.BubbleNetworkState.created; | import static bubble.model.cloud.BubbleNetworkState.created; | ||||
import static bubble.server.BubbleConfiguration.getDEFAULT_LOCALE; | import static bubble.server.BubbleConfiguration.getDEFAULT_LOCALE; | ||||
@@ -52,7 +53,7 @@ import static org.cobbzilla.wizard.model.crypto.EncryptedTypes.ENC_PAD; | |||||
@ECTypeChild(type=BubbleNode.class, backref="network") | @ECTypeChild(type=BubbleNode.class, backref="network") | ||||
}) | }) | ||||
@Entity @NoArgsConstructor @Accessors(chain=true) | @Entity @NoArgsConstructor @Accessors(chain=true) | ||||
@Slf4j @ToString(of={"name", "domainName", "installType"}) | |||||
@Slf4j @ToString(of={"name", "domainName", "installType", "state"}) | |||||
@ECIndexes({ | @ECIndexes({ | ||||
@ECIndex(unique=true, of={"account", "name"}), | @ECIndex(unique=true, of={"account", "name"}), | ||||
@ECIndex(unique=true, of={"name", "domainName"}) | @ECIndex(unique=true, of={"name", "domainName"}) | ||||
@@ -16,6 +16,6 @@ public enum BubbleNetworkState { | |||||
public boolean canStart() { return this == created || this == stopped; } | public boolean canStart() { return this == created || this == stopped; } | ||||
public boolean canStop() { return this != stopping && this != stopped && this != error_stopping; } | |||||
public boolean canStop() { return this != stopped && this != error_stopping; } | |||||
} | } |
@@ -254,4 +254,8 @@ public class BubbleNode extends IdentifiableBase implements HasNetwork, HasBubbl | |||||
} | } | ||||
if (!hasIp4() || !hasIp6()) throw new TimeoutException("waitForIpAddresses: timeout"); | if (!hasIp4() || !hasIp6()) throw new TimeoutException("waitForIpAddresses: timeout"); | ||||
} | } | ||||
@Transient @Getter @Setter private BubbleVersionInfo sageVersion; | |||||
public boolean hasSageVersion () { return sageVersion != null && sageVersion.valid(); } | |||||
} | } |
@@ -0,0 +1,17 @@ | |||||
package bubble.model.cloud; | |||||
import lombok.Getter; | |||||
import lombok.Setter; | |||||
import lombok.experimental.Accessors; | |||||
import static org.cobbzilla.util.daemon.ZillaRuntime.empty; | |||||
@Accessors(chain=true) | |||||
public class BubbleVersionInfo { | |||||
@Getter @Setter private String version; | |||||
@Getter @Setter private String sha256; | |||||
public boolean valid() { return !empty(version) && !empty(sha256); } | |||||
} |
@@ -37,6 +37,10 @@ public enum NotificationType { | |||||
health_check, hello_to_sage, hello_from_sage, peer_hello, sync_password, | health_check, hello_to_sage, hello_from_sage, peer_hello, sync_password, | ||||
register_backup, retrieve_backup, backup_response, restore_complete, fork, | register_backup, retrieve_backup, backup_response, restore_complete, fork, | ||||
// upgrade notifications | |||||
upgrade_request (String.class), | |||||
upgrade_response (true), | |||||
// driver-level notifications | // driver-level notifications | ||||
// delegated dns driver notifications | // delegated dns driver notifications | ||||
@@ -32,7 +32,7 @@ public class NotificationHandler_hello_to_sage extends ReceivedNotificationHandl | |||||
log.info("hello_to_sage: returning peers: "+peers.stream().map(BubbleNode::getFqdn).collect(joining(", "))); | log.info("hello_to_sage: returning peers: "+peers.stream().map(BubbleNode::getFqdn).collect(joining(", "))); | ||||
node.setPeers(peers); | node.setPeers(peers); | ||||
notificationService.notify(node, hello_from_sage, node); | |||||
notificationService.notify(node, hello_from_sage, node.setSageVersion(configuration.getVersionInfo())); | |||||
} | } | ||||
} | } | ||||
} | } |
@@ -19,7 +19,8 @@ public class NotificationHandler_compute_driver_stop extends NotificationHandler | |||||
@Override protected BubbleNode handle(ReceivedNotification n, | @Override protected BubbleNode handle(ReceivedNotification n, | ||||
ComputeDriverNotification notification, | ComputeDriverNotification notification, | ||||
ComputeServiceDriver compute) throws Exception { | ComputeServiceDriver compute) throws Exception { | ||||
return nodeService.stopNode(compute, notification.getNode()); | |||||
nodeService.stopNode(compute, notification.getNode()); | |||||
return notification.getNode(); | |||||
} | } | ||||
} | } |
@@ -0,0 +1,18 @@ | |||||
package bubble.notify.upgrade; | |||||
import bubble.model.cloud.BubbleVersionInfo; | |||||
import bubble.notify.SynchronousNotification; | |||||
import lombok.AllArgsConstructor; | |||||
import lombok.Getter; | |||||
import lombok.NoArgsConstructor; | |||||
import lombok.Setter; | |||||
import lombok.experimental.Accessors; | |||||
@NoArgsConstructor @AllArgsConstructor @Accessors(chain=true) | |||||
public class JarUpgradeNotification extends SynchronousNotification { | |||||
@Getter @Setter private BubbleVersionInfo versionInfo; | |||||
@Override protected String getCacheKey() { return versionInfo.getSha256(); } | |||||
} |
@@ -0,0 +1,28 @@ | |||||
package bubble.notify.upgrade; | |||||
import bubble.dao.cloud.BubbleNodeDAO; | |||||
import bubble.model.cloud.BubbleNode; | |||||
import bubble.model.cloud.notify.ReceivedNotification; | |||||
import bubble.notify.DelegatedNotificationHandlerBase; | |||||
import bubble.service.boot.BubbleJarUpgradeService; | |||||
import org.springframework.beans.factory.annotation.Autowired; | |||||
import static bubble.model.cloud.notify.NotificationType.upgrade_response; | |||||
import static org.cobbzilla.util.daemon.ZillaRuntime.die; | |||||
public class NotificationHandler_upgrade_request extends DelegatedNotificationHandlerBase { | |||||
@Autowired private BubbleNodeDAO nodeDAO; | |||||
@Autowired private BubbleJarUpgradeService upgradeService; | |||||
@Override public void handleNotification(ReceivedNotification n) { | |||||
final BubbleNode sender = nodeDAO.findByUuid(n.getFromNode()); | |||||
if (sender == null) { | |||||
die("sender not found: "+n.getFromNode()); | |||||
} else { | |||||
final String key = upgradeService.registerNodeUpgrade(sender.getUuid()); | |||||
notifySender(upgrade_response, n.getNotificationId(), sender, key); | |||||
} | |||||
} | |||||
} |
@@ -30,6 +30,7 @@ import bubble.service.account.StandardAuthenticatorService; | |||||
import bubble.service.backup.RestoreService; | import bubble.service.backup.RestoreService; | ||||
import bubble.service.bill.PromotionService; | import bubble.service.bill.PromotionService; | ||||
import bubble.service.boot.ActivationService; | import bubble.service.boot.ActivationService; | ||||
import bubble.service.boot.BubbleJarUpgradeService; | |||||
import bubble.service.boot.NodeManagerService; | import bubble.service.boot.NodeManagerService; | ||||
import bubble.service.boot.SageHelloService; | import bubble.service.boot.SageHelloService; | ||||
import bubble.service.cloud.DeviceIdService; | import bubble.service.cloud.DeviceIdService; | ||||
@@ -675,4 +676,32 @@ public class AuthResource { | |||||
return send(new FileSendableResource(patch)); | return send(new FileSendableResource(patch)); | ||||
} | } | ||||
@Autowired private BubbleJarUpgradeService upgradeService; | |||||
@GET @Path(EP_UPGRADE+"/{key}") | |||||
@Produces(APPLICATION_OCTET_STREAM) | |||||
public Response getUpgrade(@Context Request req, | |||||
@Context ContainerRequest ctx, | |||||
@PathParam("key") String key) { | |||||
final String nodeUuid = upgradeService.getNodeForKey(key); | |||||
if (nodeUuid == null) { | |||||
log.warn("getUpgrade: key not found: "+key); | |||||
return unauthorized(); | |||||
} | |||||
final BubbleNode node = nodeDAO.findByUuid(nodeUuid); | |||||
if (node == null) { | |||||
log.warn("getUpgrade: node not found: "+nodeUuid); | |||||
return unauthorized(); | |||||
} | |||||
final String remoteAddr = req.getRemoteAddr(); | |||||
if (!node.hasSameIp(remoteAddr)) { | |||||
log.warn("getUpgrade: node has wrong IP (request came from "+remoteAddr+"): "+node.id()); | |||||
return unauthorized(); | |||||
} | |||||
return send(new FileSendableResource(configuration.getBubbleJar())); | |||||
} | |||||
} | } |
@@ -26,6 +26,7 @@ import bubble.server.BubbleConfiguration; | |||||
import bubble.service.account.StandardAccountMessageService; | import bubble.service.account.StandardAccountMessageService; | ||||
import bubble.service.account.StandardAuthenticatorService; | import bubble.service.account.StandardAuthenticatorService; | ||||
import bubble.service.account.download.AccountDownloadService; | import bubble.service.account.download.AccountDownloadService; | ||||
import bubble.service.boot.BubbleJarUpgradeService; | |||||
import bubble.service.boot.BubbleModelSetupService; | import bubble.service.boot.BubbleModelSetupService; | ||||
import bubble.service.cloud.NodeLaunchMonitor; | import bubble.service.cloud.NodeLaunchMonitor; | ||||
import com.fasterxml.jackson.databind.JsonNode; | import com.fasterxml.jackson.databind.JsonNode; | ||||
@@ -65,8 +66,7 @@ import java.util.Map; | |||||
import static bubble.ApiConstants.*; | import static bubble.ApiConstants.*; | ||||
import static bubble.model.account.Account.validatePassword; | import static bubble.model.account.Account.validatePassword; | ||||
import static bubble.resources.account.AuthResource.forgotPasswordMessage; | import static bubble.resources.account.AuthResource.forgotPasswordMessage; | ||||
import static org.cobbzilla.util.daemon.ZillaRuntime.empty; | |||||
import static org.cobbzilla.util.daemon.ZillaRuntime.errorString; | |||||
import static org.cobbzilla.util.daemon.ZillaRuntime.*; | |||||
import static org.cobbzilla.util.http.HttpContentTypes.*; | import static org.cobbzilla.util.http.HttpContentTypes.*; | ||||
import static org.cobbzilla.util.json.JsonUtil.json; | import static org.cobbzilla.util.json.JsonUtil.json; | ||||
import static org.cobbzilla.wizard.resources.ResourceUtil.*; | import static org.cobbzilla.wizard.resources.ResourceUtil.*; | ||||
@@ -406,4 +406,13 @@ public class MeResource { | |||||
return ok(modelSetupService.setupModel(api, caller, modelFile)); | return ok(modelSetupService.setupModel(api, caller, modelFile)); | ||||
} | } | ||||
@Autowired private BubbleJarUpgradeService jarUpgradeService; | |||||
@POST @Path(EP_UPGRADE) | |||||
public Response uploadModel(@Context Request req, | |||||
@Context ContainerRequest ctx) { | |||||
background(() -> jarUpgradeService.upgrade()); | |||||
return ok(configuration.getPublicSystemConfigs()); | |||||
} | |||||
} | } |
@@ -11,10 +11,7 @@ import bubble.client.BubbleApiClient; | |||||
import bubble.cloud.CloudServiceDriver; | import bubble.cloud.CloudServiceDriver; | ||||
import bubble.dao.account.AccountDAO; | import bubble.dao.account.AccountDAO; | ||||
import bubble.dao.cloud.CloudServiceDAO; | import bubble.dao.cloud.CloudServiceDAO; | ||||
import bubble.model.cloud.AnsibleInstallType; | |||||
import bubble.model.cloud.BubbleNetwork; | |||||
import bubble.model.cloud.BubbleNetworkState; | |||||
import bubble.model.cloud.BubbleNode; | |||||
import bubble.model.cloud.*; | |||||
import bubble.model.device.DeviceSecurityLevel; | import bubble.model.device.DeviceSecurityLevel; | ||||
import bubble.server.listener.BubbleFirstTimeListener; | import bubble.server.listener.BubbleFirstTimeListener; | ||||
import bubble.service.backup.RestoreService; | import bubble.service.backup.RestoreService; | ||||
@@ -69,6 +66,7 @@ import static org.cobbzilla.util.io.FileUtil.abs; | |||||
import static org.cobbzilla.util.io.StreamUtil.loadResourceAsStream; | import static org.cobbzilla.util.io.StreamUtil.loadResourceAsStream; | ||||
import static org.cobbzilla.util.reflect.ReflectionUtil.copy; | import static org.cobbzilla.util.reflect.ReflectionUtil.copy; | ||||
import static org.cobbzilla.util.security.ShaUtil.sha256_file; | import static org.cobbzilla.util.security.ShaUtil.sha256_file; | ||||
import static org.cobbzilla.wizard.model.SemanticVersion.isNewerVersion; | |||||
@Configuration @NoArgsConstructor @Slf4j | @Configuration @NoArgsConstructor @Slf4j | ||||
public class BubbleConfiguration extends PgRestServerConfiguration | public class BubbleConfiguration extends PgRestServerConfiguration | ||||
@@ -95,6 +93,8 @@ public class BubbleConfiguration extends PgRestServerConfiguration | |||||
public static final String TAG_SECURITY_LEVELS = "securityLevels"; | public static final String TAG_SECURITY_LEVELS = "securityLevels"; | ||||
public static final String TAG_RESTORE_MODE = "awaitingRestore"; | public static final String TAG_RESTORE_MODE = "awaitingRestore"; | ||||
public static final String TAG_RESTORING_IN_PROGRESS = "restoringInProgress"; | public static final String TAG_RESTORING_IN_PROGRESS = "restoringInProgress"; | ||||
public static final String TAG_JAR_VERSION = "jarVersion"; | |||||
public static final String TAG_JAR_UPGRADE_AVAILABLE = "jarUpgradeAvailable"; | |||||
public static final String DEFAULT_LOCAL_STORAGE_DIR = HOME_DIR + "/.bubble_local_storage"; | public static final String DEFAULT_LOCAL_STORAGE_DIR = HOME_DIR + "/.bubble_local_storage"; | ||||
@@ -245,6 +245,24 @@ public class BubbleConfiguration extends PgRestServerConfiguration | |||||
} | } | ||||
return properties.getProperty(META_PROP_BUBBLE_VERSION); | return properties.getProperty(META_PROP_BUBBLE_VERSION); | ||||
} | } | ||||
@Getter(lazy=true) private final BubbleVersionInfo versionInfo = initBubbleVersionInfo(); | |||||
private BubbleVersionInfo initBubbleVersionInfo() { | |||||
return new BubbleVersionInfo() | |||||
.setVersion(getVersion()) | |||||
.setSha256(getJarSha()); | |||||
} | |||||
@Getter private BubbleVersionInfo sageVersionInfo; | |||||
public void setSageVersionInfo(BubbleVersionInfo version) { | |||||
sageVersionInfo = version; | |||||
final boolean isNewer = isNewerVersion(getVersionInfo().getVersion(), sageVersionInfo.getVersion()); | |||||
if (!jarUpgradeAvailable && isNewer) { | |||||
jarUpgradeAvailable = true; | |||||
refreshPublicSystemConfigs(); | |||||
} | |||||
} | |||||
public boolean hasSageVersionInfo () { return sageVersionInfo != null; } | |||||
@Getter private Boolean jarUpgradeAvailable = false; | |||||
@JsonIgnore public String getUnlockKey () { return BubbleFirstTimeListener.getUnlockKey(); } | @JsonIgnore public String getUnlockKey () { return BubbleFirstTimeListener.getUnlockKey(); } | ||||
@@ -320,7 +338,9 @@ public class BubbleConfiguration extends PgRestServerConfiguration | |||||
&& getBean(RestoreService.class).isSelfRestoreStarted()}, | && getBean(RestoreService.class).isSelfRestoreStarted()}, | ||||
{TAG_SSL_PORT, getDefaultSslPort()}, | {TAG_SSL_PORT, getDefaultSslPort()}, | ||||
{TAG_SUPPORT, getSupport()}, | {TAG_SUPPORT, getSupport()}, | ||||
{TAG_SECURITY_LEVELS, DeviceSecurityLevel.values()} | |||||
{TAG_SECURITY_LEVELS, DeviceSecurityLevel.values()}, | |||||
{TAG_JAR_VERSION, getVersion()}, | |||||
{TAG_JAR_UPGRADE_AVAILABLE, getJarUpgradeAvailable() ? getSageVersionInfo() : null} | |||||
})); | })); | ||||
} else { | } else { | ||||
// some things has to be refreshed all the time in some cases: | // some things has to be refreshed all the time in some cases: | ||||
@@ -0,0 +1,93 @@ | |||||
package bubble.service.boot; | |||||
import bubble.dao.cloud.BubbleBackupDAO; | |||||
import bubble.model.cloud.BackupStatus; | |||||
import bubble.model.cloud.BubbleBackup; | |||||
import bubble.model.cloud.BubbleVersionInfo; | |||||
import bubble.notify.upgrade.JarUpgradeNotification; | |||||
import bubble.server.BubbleConfiguration; | |||||
import bubble.service.backup.BackupService; | |||||
import bubble.service.notify.NotificationService; | |||||
import lombok.Getter; | |||||
import lombok.extern.slf4j.Slf4j; | |||||
import org.cobbzilla.util.http.HttpRequestBean; | |||||
import org.cobbzilla.wizard.cache.redis.RedisService; | |||||
import org.springframework.beans.factory.annotation.Autowired; | |||||
import org.springframework.stereotype.Service; | |||||
import java.io.File; | |||||
import static bubble.ApiConstants.AUTH_ENDPOINT; | |||||
import static bubble.ApiConstants.EP_UPGRADE; | |||||
import static bubble.client.BubbleNodeClient.nodeBaseUri; | |||||
import static bubble.model.cloud.notify.NotificationType.upgrade_request; | |||||
import static java.util.concurrent.TimeUnit.MINUTES; | |||||
import static java.util.concurrent.TimeUnit.SECONDS; | |||||
import static org.apache.commons.lang3.RandomStringUtils.randomAlphanumeric; | |||||
import static org.cobbzilla.util.daemon.ZillaRuntime.now; | |||||
import static org.cobbzilla.util.http.HttpMethods.GET; | |||||
import static org.cobbzilla.util.io.FileUtil.*; | |||||
import static org.cobbzilla.util.system.Sleep.sleep; | |||||
import static org.cobbzilla.util.time.TimeUtil.DATE_FORMAT_YYYY_MM_DD_HH_mm_ss_SSS; | |||||
import static org.cobbzilla.wizard.cache.redis.RedisService.EX; | |||||
@Service @Slf4j | |||||
public class BubbleJarUpgradeService { | |||||
private static final long PRE_UPGRADE_BACKUP_TIMEOUT = MINUTES.toMillis(20); | |||||
@Autowired private BubbleConfiguration configuration; | |||||
@Autowired private BackupService backupService; | |||||
@Autowired private BubbleBackupDAO backupDAO; | |||||
@Autowired private NotificationService notificationService; | |||||
@Autowired private RedisService redis; | |||||
@Getter(lazy=true) private final RedisService nodeUpgradeRequests = redis.prefixNamespace(getClass().getName()); | |||||
public String registerNodeUpgrade(String nodeUuid) { | |||||
final String key = randomAlphanumeric(10) + "." + now(); | |||||
getNodeUpgradeRequests().set(key, nodeUuid, EX, MINUTES.toMillis(1)); | |||||
return key; | |||||
} | |||||
public String getNodeForKey(String key) { return getNodeUpgradeRequests().get(key); } | |||||
public void upgrade() { | |||||
if (!configuration.getJarUpgradeAvailable()) { | |||||
log.warn("upgrade: No upgrade available, returning"); | |||||
return; | |||||
} | |||||
final String currentVersion = configuration.getVersion(); | |||||
final BubbleVersionInfo sageVersion = configuration.getSageVersionInfo(); | |||||
final String newVersion = sageVersion.getVersion(); | |||||
BubbleBackup bubbleBackup = backupService.queueBackup("before_upgrade_" + currentVersion + "_to_" + newVersion + "_on_" + DATE_FORMAT_YYYY_MM_DD_HH_mm_ss_SSS.print(now())); | |||||
// monitor backup, ensure it completes | |||||
final long start = now(); | |||||
while (bubbleBackup.getStatus() != BackupStatus.backup_completed && now() - start < PRE_UPGRADE_BACKUP_TIMEOUT) { | |||||
sleep(SECONDS.toMillis(5), "waiting for backup to complete before upgrading"); | |||||
bubbleBackup = backupDAO.findByUuid(bubbleBackup.getUuid()); | |||||
} | |||||
if (bubbleBackup.getStatus() != BackupStatus.backup_completed) { | |||||
log.warn("upgrade: timeout waiting for backup to complete, status="+bubbleBackup.getStatus()); | |||||
return; | |||||
} | |||||
final File upgradeJar = new File(configuration.getBubbleJar().getParentFile(), ".upgrade.jar"); | |||||
if (upgradeJar.exists()) { | |||||
log.error("upgrade: jar already exists, not upgrading: "+abs(upgradeJar)); | |||||
return; | |||||
} | |||||
// ask the sage to allow us to download the upgrade | |||||
final String key = notificationService.notifySync(configuration.getSageNode(), upgrade_request, new JarUpgradeNotification(sageVersion)); | |||||
// request the jar from the sage | |||||
final String uri = nodeBaseUri(configuration.getSageNode(), configuration) + AUTH_ENDPOINT + EP_UPGRADE + "/" + key; | |||||
final HttpRequestBean requestBean = new HttpRequestBean(GET, uri); | |||||
final File newJar = temp(".jar"); | |||||
// move to upgrade location | |||||
renameOrDie(newJar, upgradeJar); | |||||
} | |||||
} |
@@ -9,50 +9,59 @@ import lombok.Getter; | |||||
public class NodeLaunchException extends RuntimeException { | public class NodeLaunchException extends RuntimeException { | ||||
public enum NodeLaunchExceptionType { fatal, canRetry, interrupted } | |||||
@Getter private final BubbleNode node; | @Getter private final BubbleNode node; | ||||
public boolean hasNode () { return node != null; } | public boolean hasNode () { return node != null; } | ||||
public String nodeSummary () { return node == null ? "null" : node.id()+"/"+node.getState(); } | public String nodeSummary () { return node == null ? "null" : node.id()+"/"+node.getState(); } | ||||
@Getter private final boolean fatal; | |||||
@Getter private final NodeLaunchExceptionType type; | |||||
private NodeLaunchException (BubbleNode node, Exception e, String message, boolean fatal) { | |||||
private NodeLaunchException (BubbleNode node, Exception e, String message, NodeLaunchExceptionType type) { | |||||
super(message, e); | super(message, e); | ||||
this.node = node; | this.node = node; | ||||
this.fatal = fatal; | |||||
this.type = type; | |||||
} | } | ||||
private NodeLaunchException (BubbleNode node, Exception e, boolean fatal) { | |||||
this(node, e, e.getMessage(), fatal); | |||||
private NodeLaunchException (BubbleNode node, Exception e, NodeLaunchExceptionType type) { | |||||
this(node, e, e.getMessage(), type); | |||||
} | } | ||||
private NodeLaunchException (BubbleNode node, String message, boolean fatal) { | |||||
this(node, null, message, fatal); | |||||
private NodeLaunchException (BubbleNode node, String message, NodeLaunchExceptionType type) { | |||||
this(node, null, message, type); | |||||
} | } | ||||
private NodeLaunchException (Exception e, String message, boolean fatal) { | |||||
this(null, e, message, fatal); | |||||
private NodeLaunchException (Exception e, String message, NodeLaunchExceptionType type) { | |||||
this(null, e, message, type); | |||||
} | } | ||||
private NodeLaunchException (String message, boolean fatal) { | |||||
this(null, null, message, fatal); | |||||
private NodeLaunchException (String message, NodeLaunchExceptionType type) { | |||||
this(null, null, message, type); | |||||
} | } | ||||
private NodeLaunchException (Exception e, boolean fatal) { | |||||
this(null, e, e.getMessage(), fatal); | |||||
private NodeLaunchException (Exception e, NodeLaunchExceptionType type) { | |||||
this(null, e, e.getMessage(), type); | |||||
} | } | ||||
public static <T> T fatalLaunchFailure (String message) { throw new NodeLaunchException(message, true); } | |||||
public static <T> T fatalLaunchFailure (Exception e, String message) { throw new NodeLaunchException(e, message, true); } | |||||
public static <T> T fatalLaunchFailure (Exception e) { throw new NodeLaunchException(e, true); } | |||||
public static <T> T fatalLaunchFailure (BubbleNode node, String message) { throw new NodeLaunchException(node, message, true); } | |||||
public static <T> T fatalLaunchFailure (BubbleNode node, Exception e) { throw new NodeLaunchException(node, e, true); } | |||||
public static <T> T fatalLaunchFailure (BubbleNode node, Exception e, String message) { throw new NodeLaunchException(node, e, message, true); } | |||||
public static <T> T launchFailureCanRetry (String message) { throw new NodeLaunchException(message, false); } | |||||
public static <T> T launchFailureCanRetry (Exception e, String message) { throw new NodeLaunchException(e, message, false); } | |||||
public static <T> T launchFailureCanRetry (Exception e) { throw new NodeLaunchException(e, false); } | |||||
public static <T> T launchFailureCanRetry (BubbleNode node, String message) { throw new NodeLaunchException(node, message, false); } | |||||
public static <T> T launchFailureCanRetry (BubbleNode node, Exception e) { throw new NodeLaunchException(node, e, false); } | |||||
public static <T> T launchFailureCanRetry (BubbleNode node, Exception e, String message) { throw new NodeLaunchException(node, e, message, false); } | |||||
public static <T> T fatalLaunchFailure (String message) { throw new NodeLaunchException(message, NodeLaunchExceptionType.fatal); } | |||||
public static <T> T fatalLaunchFailure (Exception e, String message) { throw new NodeLaunchException(e, message, NodeLaunchExceptionType.fatal); } | |||||
public static <T> T fatalLaunchFailure (Exception e) { throw new NodeLaunchException(e, NodeLaunchExceptionType.fatal); } | |||||
public static <T> T fatalLaunchFailure (BubbleNode node, String message) { throw new NodeLaunchException(node, message, NodeLaunchExceptionType.fatal); } | |||||
public static <T> T fatalLaunchFailure (BubbleNode node, Exception e) { throw new NodeLaunchException(node, e, NodeLaunchExceptionType.fatal); } | |||||
public static <T> T fatalLaunchFailure (BubbleNode node, Exception e, String message) { throw new NodeLaunchException(node, e, message, NodeLaunchExceptionType.fatal); } | |||||
public static <T> T launchFailureCanRetry (String message) { throw new NodeLaunchException(message, NodeLaunchExceptionType.canRetry); } | |||||
public static <T> T launchFailureCanRetry (Exception e, String message) { throw new NodeLaunchException(e, message, NodeLaunchExceptionType.canRetry); } | |||||
public static <T> T launchFailureCanRetry (Exception e) { throw new NodeLaunchException(e, NodeLaunchExceptionType.canRetry); } | |||||
public static <T> T launchFailureCanRetry (BubbleNode node, String message) { throw new NodeLaunchException(node, message, NodeLaunchExceptionType.canRetry); } | |||||
public static <T> T launchFailureCanRetry (BubbleNode node, Exception e) { throw new NodeLaunchException(node, e, NodeLaunchExceptionType.canRetry); } | |||||
public static <T> T launchFailureCanRetry (BubbleNode node, Exception e, String message) { throw new NodeLaunchException(node, e, message, NodeLaunchExceptionType.canRetry); } | |||||
public static <T> T launchInterrupted (String message) { throw new NodeLaunchException(message, NodeLaunchExceptionType.interrupted); } | |||||
public static <T> T launchInterrupted (Exception e, String message) { throw new NodeLaunchException(e, message, NodeLaunchExceptionType.interrupted); } | |||||
public static <T> T launchInterrupted (Exception e) { throw new NodeLaunchException(e, NodeLaunchExceptionType.interrupted); } | |||||
public static <T> T launchInterrupted (BubbleNode node, String message) { throw new NodeLaunchException(node, message, NodeLaunchExceptionType.interrupted); } | |||||
public static <T> T launchInterrupted (BubbleNode node, Exception e) { throw new NodeLaunchException(node, e, NodeLaunchExceptionType.interrupted); } | |||||
public static <T> T launchInterrupted (BubbleNode node, Exception e, String message) { throw new NodeLaunchException(node, e, message, NodeLaunchExceptionType.interrupted); } | |||||
} | } |
@@ -28,8 +28,7 @@ import java.util.concurrent.ConcurrentHashMap; | |||||
import static bubble.service.cloud.NodeProgressMeter.getProgressMeterKey; | import static bubble.service.cloud.NodeProgressMeter.getProgressMeterKey; | ||||
import static bubble.service.cloud.NodeProgressMeter.getProgressMeterPrefix; | import static bubble.service.cloud.NodeProgressMeter.getProgressMeterPrefix; | ||||
import static java.util.concurrent.TimeUnit.HOURS; | |||||
import static java.util.concurrent.TimeUnit.SECONDS; | |||||
import static java.util.concurrent.TimeUnit.*; | |||||
import static org.cobbzilla.util.daemon.ZillaRuntime.*; | import static org.cobbzilla.util.daemon.ZillaRuntime.*; | ||||
import static org.cobbzilla.util.json.JsonUtil.json; | import static org.cobbzilla.util.json.JsonUtil.json; | ||||
@@ -37,7 +36,7 @@ import static org.cobbzilla.util.json.JsonUtil.json; | |||||
public class NodeLaunchMonitor extends SimpleDaemon { | public class NodeLaunchMonitor extends SimpleDaemon { | ||||
private static final long LAUNCH_ACTIVITY_TIMEOUT = SECONDS.toMillis(180); | private static final long LAUNCH_ACTIVITY_TIMEOUT = SECONDS.toMillis(180); | ||||
private static final long LAUNCH_TERMINATE_TIMEOUT = SECONDS.toMillis(5); | |||||
private static final long LAUNCH_TERMINATE_TIMEOUT = MINUTES.toMillis(6); | |||||
@Getter private final long sleepTime = SECONDS.toMillis(15); | @Getter private final long sleepTime = SECONDS.toMillis(15); | ||||
@@ -50,7 +49,6 @@ public class NodeLaunchMonitor extends SimpleDaemon { | |||||
@Getter(lazy=true) private final RedisService networkSetupStatus = redis.prefixNamespace(getClass().getSimpleName()+"_status_"); | @Getter(lazy=true) private final RedisService networkSetupStatus = redis.prefixNamespace(getClass().getSimpleName()+"_status_"); | ||||
private final Map<String, LauncherEntry> launcherThreads = new ConcurrentHashMap<>(); | private final Map<String, LauncherEntry> launcherThreads = new ConcurrentHashMap<>(); | ||||
private final Map<String, String> canceledNetworks = new ExpirationMap<>(50, HOURS.toMillis(2)); | |||||
public void register(String nnUuid, String networkUuid, Thread t) { | public void register(String nnUuid, String networkUuid, Thread t) { | ||||
startIfNotRunning(); | startIfNotRunning(); | ||||
@@ -63,7 +61,6 @@ public class NodeLaunchMonitor extends SimpleDaemon { | |||||
} | } | ||||
public void cancel(String networkUuid) { | public void cancel(String networkUuid) { | ||||
canceledNetworks.put(networkUuid, networkUuid); | |||||
final LauncherEntry previousLaunch = launcherThreads.get(networkUuid); | final LauncherEntry previousLaunch = launcherThreads.get(networkUuid); | ||||
if (previousLaunch == null || !previousLaunch.isAlive()) { | if (previousLaunch == null || !previousLaunch.isAlive()) { | ||||
log.warn("cancel("+networkUuid+"): entry does not thread exist, or is not alive: "+previousLaunch); | log.warn("cancel("+networkUuid+"): entry does not thread exist, or is not alive: "+previousLaunch); | ||||
@@ -13,8 +13,7 @@ import lombok.extern.slf4j.Slf4j; | |||||
import java.util.concurrent.atomic.AtomicReference; | import java.util.concurrent.atomic.AtomicReference; | ||||
import static java.util.concurrent.TimeUnit.SECONDS; | import static java.util.concurrent.TimeUnit.SECONDS; | ||||
import static org.cobbzilla.util.daemon.ZillaRuntime.die; | |||||
import static org.cobbzilla.util.daemon.ZillaRuntime.shortError; | |||||
import static org.cobbzilla.util.daemon.ZillaRuntime.*; | |||||
import static org.cobbzilla.util.system.Sleep.sleep; | import static org.cobbzilla.util.system.Sleep.sleep; | ||||
@AllArgsConstructor @Slf4j | @AllArgsConstructor @Slf4j | ||||
@@ -33,12 +32,13 @@ public class NodeLauncher implements Runnable { | |||||
try { | try { | ||||
for (int i=0; i<LAUNCH_MAX_RETRIES; i++) { | for (int i=0; i<LAUNCH_MAX_RETRIES; i++) { | ||||
if (i > 0 && !launchMonitor.isRegistered(networkUuid)) { | if (i > 0 && !launchMonitor.isRegistered(networkUuid)) { | ||||
throw new IllegalStateException("NodeLauncher.run: no longer registered: "+networkUuid); | |||||
log.warn("NodeLauncher.run: no longer registered: "+networkUuid); | |||||
return; | |||||
} | } | ||||
if (!lock.get().equals(newNodeRequest.getLock())) { | if (!lock.get().equals(newNodeRequest.getLock())) { | ||||
die("NodeLauncher.run: existingLock (" + lock.get() + ") is different than lock in NewNodeNotification: " + newNodeRequest.getLock()); | die("NodeLauncher.run: existingLock (" + lock.get() + ") is different than lock in NewNodeNotification: " + newNodeRequest.getLock()); | ||||
} | } | ||||
if (!networkService.confirmLock(networkUuid, lock.get())) { | |||||
if (!networkService.confirmNetLock(networkUuid, lock.get())) { | |||||
die("NodeLauncher.run: error confirming lock (" + lock.get() + ") for network: " + networkUuid); | die("NodeLauncher.run: error confirming lock (" + lock.get() + ") for network: " + networkUuid); | ||||
} | } | ||||
@@ -51,20 +51,37 @@ public class NodeLauncher implements Runnable { | |||||
log.info("NodeLauncher.run: launching node..."+newNodeRequest.getFqdn()); | log.info("NodeLauncher.run: launching node..."+newNodeRequest.getFqdn()); | ||||
launchThread.start(); | launchThread.start(); | ||||
launchThread.join(); | |||||
do { | |||||
launchThread.join(SECONDS.toMillis(5)); | |||||
if (log.isTraceEnabled()) log.trace("NodeLauncher.run: still waiting for thread join: "+newNodeRequest.getFqdn()+" stack="+stacktrace(launchThread)); | |||||
} while (launchThread.isAlive() && !launchThread.isInterrupted()); | |||||
if (launchThread.isInterrupted()) { | |||||
log.warn("NodeLauncher.run: launch interrupted while waiting for join, exiting early"); | |||||
return; | |||||
} | |||||
final Exception exception = exceptionRef.get(); | final Exception exception = exceptionRef.get(); | ||||
final BubbleNode node = nodeRef.get(); | final BubbleNode node = nodeRef.get(); | ||||
log.debug("NodeLauncher.run: node="+(node == null ? "null" : node.id())+", exception="+shortError(exception)); | log.debug("NodeLauncher.run: node="+(node == null ? "null" : node.id())+", exception="+shortError(exception)); | ||||
if (exception != null) { | if (exception != null) { | ||||
if (exception instanceof NodeLaunchException) { | if (exception instanceof NodeLaunchException) { | ||||
final NodeLaunchException launchException = (NodeLaunchException) exception; | final NodeLaunchException launchException = (NodeLaunchException) exception; | ||||
if (launchException.isFatal()) { | |||||
die("NodeLauncher.run: fatal launch exception: " + shortError(launchException)); | |||||
} else { | |||||
log.warn("NodeLauncher.run: nonfatal launch exception for node " + launchException.nodeSummary() + " : " + shortError(launchException)); | |||||
switch (launchException.getType()) { | |||||
case fatal: | |||||
die("NodeLauncher.run: fatal launch exception: " + shortError(launchException)); | |||||
break; | |||||
case interrupted: | |||||
log.warn("NodeLauncher.run: launch interrupted, exiting early"); | |||||
return; | |||||
case canRetry: | |||||
log.warn("NodeLauncher.run: nonfatal launch exception for node " + launchException.nodeSummary() + " : " + shortError(launchException)); | |||||
break; | |||||
default: | |||||
die("NodeLauncher.run: unknown launch exception (type="+launchException.getType()+"): "+shortError(launchException)); | |||||
} | } | ||||
} else { | } else { | ||||
die("NodeLauncher.run: fatal launch exception: " + shortError(exception)); | |||||
die("NodeLauncher.run: fatal launch exception: " + shortError(exception), exception); | |||||
} | } | ||||
} | } | ||||
if (node != null && node.isRunning()) { | if (node != null && node.isRunning()) { | ||||
@@ -59,7 +59,7 @@ public class NodeProgressMeter extends PipedOutputStream implements Runnable { | |||||
public void touch() { | public void touch() { | ||||
if (now() > lastTouch + MAX_TOUCH_INTERVAL) { | if (now() > lastTouch + MAX_TOUCH_INTERVAL) { | ||||
launchMonitor.touch(nn.getNetwork()); | launchMonitor.touch(nn.getNetwork()); | ||||
networkService.confirmLock(nn.getNetwork(), nn.getLock()); | |||||
networkService.confirmNetLock(nn.getNetwork(), nn.getLock()); | |||||
lastTouch = now(); | lastTouch = now(); | ||||
} | } | ||||
} | } | ||||
@@ -99,6 +99,10 @@ public class NodeProgressMeter extends PipedOutputStream implements Runnable { | |||||
public void write(String line) throws IOException { | public void write(String line) throws IOException { | ||||
touch(); | touch(); | ||||
if (closed.get()) { | |||||
log.warn("write("+line+"): stream closed, not writing"); | |||||
return; | |||||
} | |||||
writer.write(line.endsWith("\n") ? line : line+"\n"); | writer.write(line.endsWith("\n") ? line : line+"\n"); | ||||
writer.flush(); | writer.flush(); | ||||
} | } | ||||
@@ -6,10 +6,10 @@ package bubble.service.cloud; | |||||
import bubble.cloud.compute.ComputeServiceDriver; | import bubble.cloud.compute.ComputeServiceDriver; | ||||
import bubble.dao.cloud.BubbleDomainDAO; | import bubble.dao.cloud.BubbleDomainDAO; | ||||
import bubble.dao.cloud.BubbleNetworkDAO; | |||||
import bubble.dao.cloud.BubbleNodeDAO; | |||||
import bubble.dao.cloud.CloudServiceDAO; | import bubble.dao.cloud.CloudServiceDAO; | ||||
import bubble.model.cloud.*; | |||||
import bubble.model.cloud.BubbleDomain; | |||||
import bubble.model.cloud.BubbleNode; | |||||
import bubble.model.cloud.CloudService; | |||||
import bubble.server.BubbleConfiguration; | import bubble.server.BubbleConfiguration; | ||||
import lombok.extern.slf4j.Slf4j; | import lombok.extern.slf4j.Slf4j; | ||||
import org.cobbzilla.wizard.validation.SimpleViolationException; | import org.cobbzilla.wizard.validation.SimpleViolationException; | ||||
@@ -18,58 +18,36 @@ import org.springframework.stereotype.Service; | |||||
import javax.persistence.EntityNotFoundException; | import javax.persistence.EntityNotFoundException; | ||||
import static org.cobbzilla.util.daemon.ZillaRuntime.die; | |||||
import static org.cobbzilla.util.daemon.ZillaRuntime.shortError; | |||||
@Service @Slf4j | @Service @Slf4j | ||||
public class NodeService { | public class NodeService { | ||||
@Autowired private BubbleNodeDAO nodeDAO; | |||||
@Autowired private BubbleNetworkDAO networkDAO; | |||||
@Autowired private BubbleDomainDAO domainDAO; | @Autowired private BubbleDomainDAO domainDAO; | ||||
@Autowired private CloudServiceDAO cloudDAO; | @Autowired private CloudServiceDAO cloudDAO; | ||||
@Autowired private BubbleConfiguration configuration; | @Autowired private BubbleConfiguration configuration; | ||||
public BubbleNode stopNode(ComputeServiceDriver compute, BubbleNode node) { | |||||
public void stopNode(ComputeServiceDriver compute, BubbleNode node) { | |||||
log.info("stopNode: starting for node "+node.id()); | |||||
final BubbleDomain domain = domainDAO.findByUuid(node.getDomain()); | final BubbleDomain domain = domainDAO.findByUuid(node.getDomain()); | ||||
final CloudService dns = cloudDAO.findByUuid(domain.getPublicDns()); | final CloudService dns = cloudDAO.findByUuid(domain.getPublicDns()); | ||||
node.setState(BubbleNodeState.stopping); | |||||
if (node.hasUuid()) nodeDAO.update(node); | |||||
try { | try { | ||||
log.debug("stopNode: deleting dns entries for node: "+node.id()); | |||||
dns.getDnsDriver(configuration).deleteNode(node); | dns.getDnsDriver(configuration).deleteNode(node); | ||||
log.debug("stopNode: stopping instance for node: "+node.id()); | |||||
node = compute.stop(node); | node = compute.stop(node); | ||||
return safeUpdateNodeState(node, BubbleNodeState.stopped); | |||||
log.debug("stopNode: node stopped: "+node.id()); | |||||
} catch (EntityNotFoundException e) { | } catch (EntityNotFoundException e) { | ||||
log.info("stopNode: node not found by compute service: "+node.id()+": "+e); | |||||
return safeUpdateNodeState(node, BubbleNodeState.unreachable); | |||||
log.warn("stopNode: node not found by compute service: "+node.id()+": "+e); | |||||
} catch (SimpleViolationException e) { | } catch (SimpleViolationException e) { | ||||
log.info("stopNode: error stopping "+node.id()+": "+e); | |||||
log.warn("stopNode: error stopping "+node.id()+": "+e); | |||||
throw e; | throw e; | ||||
} catch (Exception e) { | } catch (Exception e) { | ||||
log.info("stopNode: error stopping "+node.id()); | |||||
return die("stopNode: "+e, e); | |||||
} | |||||
} | |||||
public BubbleNode safeUpdateNodeState(BubbleNode node, BubbleNodeState newState) { | |||||
// ensure node still exists | |||||
final BubbleNode existingNode = nodeDAO.findByUuid(node.getUuid()); | |||||
if (existingNode == null) { | |||||
log.warn("stopNode: node not found, not updating: " + node.id()); | |||||
return node; | |||||
} else { | |||||
// ensure network still exists | |||||
final BubbleNetwork network = networkDAO.findByUuid(node.getNetwork()); | |||||
if (network == null) { | |||||
log.warn("stopNode: node exists (" + node.id() + ") but network (" + node.getNetwork() + ") does not, deleting node"); | |||||
nodeDAO.delete(node.getUuid()); | |||||
return node; | |||||
} | |||||
return nodeDAO.update(node.setState(newState)); | |||||
log.warn("stopNode: error stopping "+node.id()+": "+shortError(e)); | |||||
} | } | ||||
} | } | ||||
@@ -49,6 +49,7 @@ import org.cobbzilla.util.io.TempDir; | |||||
import org.cobbzilla.util.system.Command; | import org.cobbzilla.util.system.Command; | ||||
import org.cobbzilla.util.system.CommandResult; | import org.cobbzilla.util.system.CommandResult; | ||||
import org.cobbzilla.util.system.CommandShell; | import org.cobbzilla.util.system.CommandShell; | ||||
import org.cobbzilla.util.system.SleepInterruptedException; | |||||
import org.cobbzilla.wizard.api.ApiException; | import org.cobbzilla.wizard.api.ApiException; | ||||
import org.cobbzilla.wizard.cache.redis.RedisService; | import org.cobbzilla.wizard.cache.redis.RedisService; | ||||
import org.cobbzilla.wizard.validation.MultiViolationException; | import org.cobbzilla.wizard.validation.MultiViolationException; | ||||
@@ -76,8 +77,7 @@ import static bubble.model.cloud.BubbleNode.TAG_ERROR; | |||||
import static bubble.server.BubbleConfiguration.DEBUG_NODE_INSTALL_FILE; | import static bubble.server.BubbleConfiguration.DEBUG_NODE_INSTALL_FILE; | ||||
import static bubble.server.BubbleConfiguration.ENV_DEBUG_NODE_INSTALL; | import static bubble.server.BubbleConfiguration.ENV_DEBUG_NODE_INSTALL; | ||||
import static bubble.service.boot.StandardSelfNodeService.*; | import static bubble.service.boot.StandardSelfNodeService.*; | ||||
import static bubble.service.cloud.NodeLaunchException.fatalLaunchFailure; | |||||
import static bubble.service.cloud.NodeLaunchException.launchFailureCanRetry; | |||||
import static bubble.service.cloud.NodeLaunchException.*; | |||||
import static bubble.service.cloud.NodeProgressMeterConstants.*; | import static bubble.service.cloud.NodeProgressMeterConstants.*; | ||||
import static java.util.concurrent.TimeUnit.MINUTES; | import static java.util.concurrent.TimeUnit.MINUTES; | ||||
import static java.util.concurrent.TimeUnit.SECONDS; | import static java.util.concurrent.TimeUnit.SECONDS; | ||||
@@ -136,7 +136,8 @@ public class StandardNetworkService implements NetworkService { | |||||
@Autowired private NodeLaunchMonitor launchMonitor; | @Autowired private NodeLaunchMonitor launchMonitor; | ||||
@Autowired private RedisService redisService; | @Autowired private RedisService redisService; | ||||
@Getter(lazy=true) private final RedisService networkLocks = redisService.prefixNamespace(getClass().getSimpleName()+"_lock_"); | |||||
@Getter(lazy=true) private final RedisService networkLocks = redisService.prefixNamespace(getClass().getSimpleName()+"_net_lock_"); | |||||
@Getter(lazy=true) private final RedisService nodeKillLocks = redisService.prefixNamespace(getClass().getSimpleName()+"_node_kill_lock_"); | |||||
@NonNull public BubbleNode newNode(@NonNull final NewNodeNotification nn, | @NonNull public BubbleNode newNode(@NonNull final NewNodeNotification nn, | ||||
NodeLaunchMonitor launchMonitor) { | NodeLaunchMonitor launchMonitor) { | ||||
@@ -148,11 +149,12 @@ public class StandardNetworkService implements NetworkService { | |||||
NodeProgressMeter progressMeter = null; | NodeProgressMeter progressMeter = null; | ||||
final BubbleNetwork network = nn.getNetworkObject(); | final BubbleNetwork network = nn.getNetworkObject(); | ||||
final ExecutorService backgroundJobs = DaemonThreadFactory.fixedPool(3); | final ExecutorService backgroundJobs = DaemonThreadFactory.fixedPool(3); | ||||
boolean killNode = false; | |||||
try { | try { | ||||
progressMeter = launchMonitor.getProgressMeter(nn); | progressMeter = launchMonitor.getProgressMeter(nn); | ||||
progressMeter.write(METER_TICK_CONFIRMING_NETWORK_LOCK); | progressMeter.write(METER_TICK_CONFIRMING_NETWORK_LOCK); | ||||
if (!confirmLock(nn.getNetwork(), lock)) { | |||||
if (!confirmNetLock(nn.getNetwork(), lock)) { | |||||
progressMeter.error(METER_ERROR_CONFIRMING_NETWORK_LOCK); | progressMeter.error(METER_ERROR_CONFIRMING_NETWORK_LOCK); | ||||
return launchFailureCanRetry("newNode: Error confirming network lock"); | return launchFailureCanRetry("newNode: Error confirming network lock"); | ||||
} | } | ||||
@@ -245,7 +247,7 @@ public class StandardNetworkService implements NetworkService { | |||||
final List<Future<?>> jobFutures = new ArrayList<>(); | final List<Future<?>> jobFutures = new ArrayList<>(); | ||||
// Start the cloud compute instance | // Start the cloud compute instance | ||||
final NodeStartJob startJob = new NodeStartJob(node, nodeDAO, computeDriver); | |||||
final NodeStartJob startJob = new NodeStartJob(node, computeDriver); | |||||
jobFutures.add(backgroundJobs.submit(startJob)); | jobFutures.add(backgroundJobs.submit(startJob)); | ||||
// Create DNS records for node | // Create DNS records for node | ||||
@@ -410,18 +412,14 @@ public class StandardNetworkService implements NetworkService { | |||||
log.info("newNode: ready in "+formatDuration(now() - start)); | log.info("newNode: ready in "+formatDuration(now() - start)); | ||||
} catch (Exception e) { | } catch (Exception e) { | ||||
log.error("newNode: "+e, e); | |||||
if (node != null) { | |||||
node.setState(BubbleNodeState.unknown_error); | |||||
nodeDAO.update(node); | |||||
if (!progressMeter.hasError()) progressMeter.error(METER_UNKNOWN_ERROR); | |||||
killNode(node, "error: "+e); | |||||
} | |||||
if (noNodesActive(network)) { | |||||
// if no nodes are running, then the network is stopped | |||||
networkDAO.update(network.setState(BubbleNetworkState.stopped)); | |||||
if (e instanceof SleepInterruptedException) { | |||||
log.warn("newNode: interrupted!"); | |||||
} else { | |||||
log.error("newNode: " + e, e); | |||||
} | } | ||||
killNode = node != null; | |||||
if (e instanceof NodeLaunchException) throw (NodeLaunchException) e; | if (e instanceof NodeLaunchException) throw (NodeLaunchException) e; | ||||
if (e instanceof SleepInterruptedException) launchInterrupted("newNode: interrupted: "+shortError(e)); | |||||
return die("newNode: "+e, e); | return die("newNode: "+e, e); | ||||
} finally { | } finally { | ||||
@@ -434,15 +432,11 @@ public class StandardNetworkService implements NetworkService { | |||||
} | } | ||||
} | } | ||||
if (node != null && !node.isRunning()) { | |||||
if (node != null && (killNode || !node.isRunning())) { | |||||
node.setState(BubbleNodeState.unknown_error); | node.setState(BubbleNodeState.unknown_error); | ||||
nodeDAO.update(node); | nodeDAO.update(node); | ||||
if (!progressMeter.hasError()) progressMeter.error(METER_UNKNOWN_ERROR); | if (!progressMeter.hasError()) progressMeter.error(METER_UNKNOWN_ERROR); | ||||
killNode(node, "error: node not running: "+node.id()+": "+node.getState()); | killNode(node, "error: node not running: "+node.id()+": "+node.getState()); | ||||
if (noNodesActive(network)) { | |||||
// if no nodes are running, then the network is stopped | |||||
networkDAO.update(network.setState(BubbleNetworkState.stopped)); | |||||
} | |||||
} | } | ||||
if (progressMeter != null) { | if (progressMeter != null) { | ||||
@@ -501,25 +495,30 @@ public class StandardNetworkService implements NetworkService { | |||||
} | } | ||||
public BubbleNode killNode(BubbleNode node, String message) { | public BubbleNode killNode(BubbleNode node, String message) { | ||||
if (node == null) return die("(but node was null?): "+message); | |||||
node.setState(BubbleNodeState.error_stopping); | |||||
node.setTag(TAG_ERROR, message); | |||||
if (node.hasUuid()) nodeDAO.update(node); | |||||
if (node == null) return die("killNode: node was null (message=" + message + ")"); | |||||
String lock = null; | |||||
try { | try { | ||||
stopNode(node); // kill it | |||||
} catch (Exception e) { | |||||
log.warn("killNode("+node.id()+"): error stopping: "+e); | |||||
} | |||||
node.setState(BubbleNodeState.error_stopped); | |||||
if (node.hasUuid()) nodeDAO.update(node); | |||||
lock = lockNode(node.getUuid()); | |||||
if (nodeDAO.findByUuid(node.getUuid()) == null) { | |||||
log.warn("killNode: node already deleted"); | |||||
return node; | |||||
} | |||||
final BubbleNetwork network = networkDAO.findByUuid(node.getNetwork()); | |||||
if (noNodesActive(network)) { | |||||
// if no nodes are running, then the network is stopped | |||||
networkDAO.update(network.setState(BubbleNetworkState.stopped)); | |||||
} | |||||
node.setState(BubbleNodeState.error_stopping); | |||||
node.setTag(TAG_ERROR, message); | |||||
if (node.hasUuid()) nodeDAO.update(node); | |||||
try { | |||||
stopNode(node); // kill it | |||||
} catch (Exception e) { | |||||
log.warn("killNode(" + node.id() + "): error stopping: " + e); | |||||
} | |||||
node.setState(BubbleNodeState.error_stopped); | |||||
nodeDAO.update(node); | |||||
return node; | |||||
return node; | |||||
} finally { | |||||
if (lock != null) unlockNode(node.getUuid(), lock); | |||||
} | |||||
} | } | ||||
protected String lockNetwork(String network) { | protected String lockNetwork(String network) { | ||||
@@ -529,7 +528,7 @@ public class StandardNetworkService implements NetworkService { | |||||
return lock; | return lock; | ||||
} | } | ||||
protected boolean confirmLock(String network, String lock) { | |||||
protected boolean confirmNetLock(String network, String lock) { | |||||
return getNetworkLocks().confirmLock(network, lock); | return getNetworkLocks().confirmLock(network, lock); | ||||
} | } | ||||
@@ -539,10 +538,27 @@ public class StandardNetworkService implements NetworkService { | |||||
log.info("unlockNetwork: unlocked "+network); | log.info("unlockNetwork: unlocked "+network); | ||||
} | } | ||||
public BubbleNode stopNode(BubbleNode node) { | |||||
protected String lockNode(String node) { | |||||
log.info("lockNode: locking "+node); | |||||
final String lock = getNodeKillLocks().lock(node, NET_LOCK_TIMEOUT, NET_DEADLOCK_TIMEOUT); | |||||
log.info("lockNode: locked "+node); | |||||
return lock; | |||||
} | |||||
protected boolean confirmNodeLock(String node, String lock) { | |||||
return getNodeKillLocks().confirmLock(node, lock); | |||||
} | |||||
protected void unlockNode(String node, String lock) { | |||||
log.info("unlockNode: unlocking "+node); | |||||
getNodeKillLocks().unlock(node, lock); | |||||
log.info("unlockNode: unlocked "+node); | |||||
} | |||||
public void stopNode(BubbleNode node) { | |||||
log.info("stopNode: stopping "+node.id()); | log.info("stopNode: stopping "+node.id()); | ||||
final CloudService cloud = cloudDAO.findByUuid(node.getCloud()); | final CloudService cloud = cloudDAO.findByUuid(node.getCloud()); | ||||
return nodeService.stopNode(cloud.getComputeDriver(configuration), node); | |||||
nodeService.stopNode(cloud.getComputeDriver(configuration), node); | |||||
} | } | ||||
public boolean isReachable(BubbleNode node) { | public boolean isReachable(BubbleNode node) { | ||||
@@ -550,6 +566,7 @@ public class StandardNetworkService implements NetworkService { | |||||
try { | try { | ||||
log.info(prefix+"starting"); | log.info(prefix+"starting"); | ||||
final NotificationReceipt receipt = notificationService.notify(node, NotificationType.health_check, null); | final NotificationReceipt receipt = notificationService.notify(node, NotificationType.health_check, null); | ||||
BubbleNodeState state = null; | |||||
if (receipt == null) { | if (receipt == null) { | ||||
log.info(prefix+"health_check failed, checking via cloud"); | log.info(prefix+"health_check failed, checking via cloud"); | ||||
final CloudService cloud = cloudDAO.findByUuid(node.getCloud()); | final CloudService cloud = cloudDAO.findByUuid(node.getCloud()); | ||||
@@ -559,14 +576,14 @@ public class StandardNetworkService implements NetworkService { | |||||
} | } | ||||
final BubbleNode status = cloud.getComputeDriver(configuration).status(node); | final BubbleNode status = cloud.getComputeDriver(configuration).status(node); | ||||
if (status != null) { | if (status != null) { | ||||
final BubbleNodeState state = status.getState(); | |||||
state = status.getState(); | |||||
if (state != null && state.active()) { | if (state != null && state.active()) { | ||||
log.info(prefix + "cloud status was: " + state + ", returning true"); | log.info(prefix + "cloud status was: " + state + ", returning true"); | ||||
return true; | return true; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
log.warn(prefix+"no way of reaching node, returning false"); | |||||
log.warn(prefix+"no way of reaching node "+node.id()+" (state="+state+"), returning false"); | |||||
return false; | return false; | ||||
} catch (Exception e) { | } catch (Exception e) { | ||||
@@ -5,35 +5,39 @@ | |||||
package bubble.service.cloud.job; | package bubble.service.cloud.job; | ||||
import bubble.cloud.compute.ComputeServiceDriver; | import bubble.cloud.compute.ComputeServiceDriver; | ||||
import bubble.dao.cloud.BubbleNodeDAO; | |||||
import bubble.model.cloud.BubbleNode; | import bubble.model.cloud.BubbleNode; | ||||
import bubble.model.cloud.BubbleNodeState; | |||||
import lombok.extern.slf4j.Slf4j; | |||||
import static bubble.service.cloud.NodeProgressMeterConstants.METER_ERROR_NO_IP; | import static bubble.service.cloud.NodeProgressMeterConstants.METER_ERROR_NO_IP; | ||||
import static bubble.service.cloud.NodeProgressMeterConstants.METER_ERROR_STARTING_NODE; | import static bubble.service.cloud.NodeProgressMeterConstants.METER_ERROR_STARTING_NODE; | ||||
import static org.cobbzilla.util.daemon.ZillaRuntime.shortError; | import static org.cobbzilla.util.daemon.ZillaRuntime.shortError; | ||||
@Slf4j | |||||
public class NodeStartJob implements Runnable { | public class NodeStartJob implements Runnable { | ||||
private BubbleNode node; | private BubbleNode node; | ||||
private final BubbleNodeDAO nodeDAO; | |||||
// private final BubbleNodeDAO nodeDAO; | |||||
private final ComputeServiceDriver computeDriver; | private final ComputeServiceDriver computeDriver; | ||||
public NodeStartJob(BubbleNode node, | public NodeStartJob(BubbleNode node, | ||||
BubbleNodeDAO nodeDAO, | |||||
// BubbleNodeDAO nodeDAO, | |||||
ComputeServiceDriver computeDriver) { | ComputeServiceDriver computeDriver) { | ||||
this.node = node; | this.node = node; | ||||
this.nodeDAO = nodeDAO; | |||||
// this.nodeDAO = nodeDAO; | |||||
this.computeDriver = computeDriver; | this.computeDriver = computeDriver; | ||||
} | } | ||||
@Override public void run() { | @Override public void run() { | ||||
try { | try { | ||||
node.setState(BubbleNodeState.booting); | |||||
nodeDAO.update(node); | |||||
// node.setState(BubbleNodeState.booting); | |||||
// nodeDAO.update(node); | |||||
log.debug("run: calling computeDriver.start("+node.id()+")"); | |||||
node = computeDriver.start(node); | node = computeDriver.start(node); | ||||
node.setState(BubbleNodeState.booted); | |||||
nodeDAO.update(node); | |||||
log.debug("run: computeDriver.start("+node.id()+") returned successfully"); | |||||
// node.setState(BubbleNodeState.booted); | |||||
// nodeDAO.update(node); | |||||
if (!node.hasIp4()) { | if (!node.hasIp4()) { | ||||
throw new NodeJobException(METER_ERROR_NO_IP, "node booted but has no IP"); | throw new NodeJobException(METER_ERROR_NO_IP, "node booted but has no IP"); | ||||
@@ -22,6 +22,7 @@ import org.cobbzilla.wizard.util.RestResponse; | |||||
import org.springframework.beans.factory.annotation.Autowired; | import org.springframework.beans.factory.annotation.Autowired; | ||||
import org.springframework.stereotype.Service; | import org.springframework.stereotype.Service; | ||||
import javax.net.ssl.SSLException; | |||||
import java.net.ConnectException; | import java.net.ConnectException; | ||||
import java.net.UnknownHostException; | import java.net.UnknownHostException; | ||||
import java.util.List; | import java.util.List; | ||||
@@ -123,17 +124,30 @@ public class NotificationService { | |||||
log.debug("_notify: <<<<< RECEIPT <<<<<< " + json(receipt, COMPACT_MAPPER) + " <<<<<<<<<<<<<<<<<<"); | log.debug("_notify: <<<<< RECEIPT <<<<<< " + json(receipt, COMPACT_MAPPER) + " <<<<<<<<<<<<<<<<<<"); | ||||
return receipt; | return receipt; | ||||
} catch (ConnectException | ConnectTimeoutException | UnknownHostException | ApiException e) { | |||||
} catch (ConnectException | ConnectTimeoutException | UnknownHostException | SSLException | ApiException e) { | |||||
notification.setStatus(NotificationSendStatus.error); | notification.setStatus(NotificationSendStatus.error); | ||||
notification.setException(e); | notification.setException(e); | ||||
sentNotificationDAO.update(notification); | sentNotificationDAO.update(notification); | ||||
throw new IllegalStateException("_notify: "+shortError(e), e); | |||||
return handleNotifyException(notification, e, true); | |||||
} catch (Exception e) { | } catch (Exception e) { | ||||
notification.setStatus(NotificationSendStatus.error); | notification.setStatus(NotificationSendStatus.error); | ||||
notification.setException(e); | notification.setException(e); | ||||
sentNotificationDAO.update(notification); | sentNotificationDAO.update(notification); | ||||
return die("_notify: "+shortError(e), e); | |||||
return handleNotifyException(notification, e, true); | |||||
} | |||||
} | |||||
} | |||||
public NotificationReceipt handleNotifyException(SentNotification notification, Exception e, boolean die) { | |||||
if (notification.getType() == NotificationType.health_check) { | |||||
log.error("_notify: health check failed for node "+notification.getToNode()+": "+shortError(e)); | |||||
return null; | |||||
} else { | |||||
if (die) { | |||||
return die("_notify: " + shortError(e), e); | |||||
} else { | |||||
throw new IllegalStateException("_notify: "+shortError(e), e); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -2139,7 +2139,6 @@ comix | |||||
comma | comma | ||||
commo | commo | ||||
comms | comms | ||||
commy | |||||
compo | compo | ||||
comps | comps | ||||
compt | compt | ||||
@@ -2165,7 +2164,6 @@ conte | |||||
conto | conto | ||||
conus | conus | ||||
convo | convo | ||||
cooch | |||||
cooed | cooed | ||||
cooee | cooee | ||||
cooer | cooer | ||||
@@ -2182,7 +2180,6 @@ coops | |||||
coopt | coopt | ||||
coost | coost | ||||
coots | coots | ||||
cooze | |||||
copal | copal | ||||
copay | copay | ||||
coped | coped | ||||
@@ -2307,7 +2304,6 @@ creds | |||||
creed | creed | ||||
creek | creek | ||||
creel | creel | ||||
creep | |||||
crees | crees | ||||
creme | creme | ||||
crems | crems | ||||
@@ -2346,7 +2342,6 @@ crome | |||||
crone | crone | ||||
cronk | cronk | ||||
crony | crony | ||||
crook | |||||
crool | crool | ||||
croon | croon | ||||
crops | crops | ||||
@@ -2403,8 +2398,6 @@ cully | |||||
culms | culms | ||||
culpa | culpa | ||||
culti | culti | ||||
cults | |||||
culty | |||||
cumec | cumec | ||||
cumin | cumin | ||||
cundy | cundy | ||||
@@ -2433,7 +2426,6 @@ curns | |||||
curny | curny | ||||
currs | currs | ||||
curry | curry | ||||
curse | |||||
cursi | cursi | ||||
curst | curst | ||||
curve | curve | ||||
@@ -52,6 +52,10 @@ | |||||
<logger name="bubble.service.stream" level="INFO" /> | <logger name="bubble.service.stream" level="INFO" /> | ||||
<!-- <logger name="bubble.service.account.StandardAccountMessageService" level="DEBUG" />--> | <!-- <logger name="bubble.service.account.StandardAccountMessageService" level="DEBUG" />--> | ||||
<!-- <logger name="bubble.dao.account.message.AccountMessageDAO" level="DEBUG" />--> | <!-- <logger name="bubble.dao.account.message.AccountMessageDAO" level="DEBUG" />--> | ||||
<logger name="bubble.service.cloud.job" level="DEBUG" /> | |||||
<logger name="bubble.service.cloud.NodeLauncher" level="DEBUG" /> | |||||
<logger name="bubble.service.cloud.NodeService" level="DEBUG" /> | |||||
<logger name="bubble.cloud.compute.vultr" level="DEBUG" /> | |||||
<logger name="bubble.resources.message" level="INFO" /> | <logger name="bubble.resources.message" level="INFO" /> | ||||
<logger name="bubble.app.analytics" level="DEBUG" /> | <logger name="bubble.app.analytics" level="DEBUG" /> | ||||
<logger name="bubble.app.passthru" level="DEBUG" /> | <logger name="bubble.app.passthru" level="DEBUG" /> | ||||
@@ -41,6 +41,14 @@ message_profile_update_success=Profile update was successful | |||||
downloading_notice=File download will start promptly... | downloading_notice=File download will start promptly... | ||||
downloading_failed=File download failed. Please retry from the start | downloading_failed=File download failed. Please retry from the start | ||||
# Jar upgrade | |||||
message_jar_upgrade_available=A new version of Bubble is available | |||||
message_jar_upgrade_version=The new Bubble version is | |||||
message_jar_current_version=Your current Bubble version is | |||||
button_label_jar_upgrade=Upgrade Your Bubble | |||||
button_label_jar_upgrading=Upgrading... | |||||
message_jar_upgrading=Your Bubble may be unresponsive for a minute or two while the upgrade occurs | |||||
# Account SSH key fields | # Account SSH key fields | ||||
form_title_ssh_keys=Account SSH Keys | form_title_ssh_keys=Account SSH Keys | ||||
form_title_add_ssh_key=Add SSH Key | form_title_add_ssh_key=Add SSH Key | ||||
@@ -0,0 +1,60 @@ | |||||
#!/bin/bash | |||||
BUBBLE_HOME="/home/bubble" | |||||
UPGRADE_JAR="${BUBBLE_HOME}/api/.upgrade.jar" | |||||
BUBBLE_JAR="${BUBBLE_HOME}/api/bubble.jar" | |||||
LOG=/tmp/bubble.upgrade.log | |||||
function die { | |||||
echo 1>&2 "${1}" | |||||
log "${1}" | |||||
exit 1 | |||||
} | |||||
function log { | |||||
echo "$(date): ${1}" >> ${LOG} | |||||
} | |||||
function verify_api_ok { | |||||
log "Restarting API..." | |||||
supervisorctl restart bubble || die "Error restarting bubble" | |||||
OK=255 | |||||
START_VERIFY=$(date +%s) | |||||
VERIFY_TIMEOUT=180 | |||||
VERIFY_URL="https://$(hostname):1443/api/auth/ready" | |||||
if [[ ${OK} -ne 0 && $(expr $(date +%s) - ${START_VERIFY} -le ${VERIFY_TIMEOUT}) ]] ; then | |||||
sleep 10s | |||||
log "Verifying ${VERIFY_URL} is OK...." | |||||
curl "${VERIFY_URL}" 2>&1 | tee -a ${LOG} | |||||
OK=$? | |||||
fi | |||||
if [[ ${OK} -eq 0 ]] ; then | |||||
echo "ok" | |||||
else | |||||
echo "error" | |||||
fi | |||||
} | |||||
BACKUP_JAR=$(mktemp /tmp/bubble.jar.XXXXXXX) | |||||
log "Backing up to ${BACKUP_JAR} ..." | |||||
cp ${BUBBLE_JAR} ${BACKUP_JAR} || die "Error backing up existing jar before upgrade ${BUBBLE_JAR} ${BACKUP_JAR}" | |||||
log "Upgrading..." | |||||
mv ${UPGRADE_JAR} ${BUBBLE_JAR} || die "Error moving ${UPGRADE_JAR} -> ${BUBBLE_JAR}" | |||||
log "Verifying upgrade..." | |||||
API_OK=$(verify_api_ok) | |||||
if [[ -z "${API_OK}" || "${API_OK}" != "ok" ]] ; then | |||||
log "Error starting upgraded API, reverting...." | |||||
cp ${BACKUP_JAR} ${BUBBLE_JAR} || die "Error restoring API jar from backup!" | |||||
API_OK=$(verify_api_ok) | |||||
if [[ -z "${API_OK}" || "${API_OK}" != "ok" ]] ; then | |||||
log "Error starting API from backup!" | |||||
fi | |||||
else | |||||
log "Upgrading web site files..." | |||||
cd ~bubble && jar xf ${BUBBLE_JAR} site && chown -R bubble:bubble site || die "Error updating web files..." | |||||
fi |
@@ -0,0 +1,23 @@ | |||||
#!/bin/bash | |||||
# | |||||
# Copyright (c) 2020 Bubble, Inc. All rights reserved. For personal (non-commercial) use, see license: https://getbubblenow.com/bubble-license/ | |||||
# | |||||
THIS_DIR="$(cd "$(dirname "${0}")" && pwd)" | |||||
LOG=/tmp/bubble.upgrade.log | |||||
function log { | |||||
echo "$(date): ${1}" >> ${LOG} | |||||
} | |||||
while : ; do | |||||
sleep 5 | |||||
if [[ -f "${UPGRADE_JAR}" ]] ; then | |||||
"${THIS_DIR}/bubble_upgrade.sh" | |||||
if [[ $? -eq 0 ]] ; then | |||||
log "Upgrade completed successfully" | |||||
else | |||||
log "Upgrade failed" | |||||
fi | |||||
rm -f "${UPGRADE_JAR}" | |||||
fi | |||||
done |
@@ -0,0 +1,5 @@ | |||||
[program:supervisor_bubble_upgrade_monitor] | |||||
stdout_logfile = /dev/null | |||||
stderr_logfile = /dev/null | |||||
command=/usr/local/sbin/supervisor_bubble_upgrade_monitor.sh |
@@ -101,27 +101,29 @@ | |||||
with_items: | with_items: | ||||
- init_bubble_db.sh | - init_bubble_db.sh | ||||
- name: Install refresh_bubble_ssh_keys monitor | |||||
- name: Install sbin monitors and scripts | |||||
copy: | copy: | ||||
src: "refresh_bubble_ssh_keys_monitor.sh" | |||||
dest: "/usr/local/sbin/refresh_bubble_ssh_keys_monitor.sh" | |||||
owner: root | |||||
group: root | |||||
mode: 0500 | |||||
- name: Install refresh_bubble_ssh_keys script | |||||
copy: | |||||
src: refresh_bubble_ssh_keys.sh | |||||
dest: /usr/local/sbin/refresh_bubble_ssh_keys.sh | |||||
src: "{{ item }}" | |||||
dest: "/usr/local/sbin/{{ item }}" | |||||
owner: root | owner: root | ||||
group: root | group: root | ||||
mode: 0500 | mode: 0500 | ||||
with_items: | |||||
- refresh_bubble_ssh_keys_monitor.sh | |||||
- refresh_bubble_ssh_keys.sh | |||||
- bubble_upgrade_monitor.sh | |||||
- bubble_upgrade.sh | |||||
- name: Install refresh_bubble_ssh_keys_monitor supervisor conf file | - name: Install refresh_bubble_ssh_keys_monitor supervisor conf file | ||||
copy: | copy: | ||||
src: supervisor_refresh_bubble_ssh_keys_monitor.conf | src: supervisor_refresh_bubble_ssh_keys_monitor.conf | ||||
dest: /etc/supervisor/conf.d/refresh_bubble_ssh_keys_monitor.conf | dest: /etc/supervisor/conf.d/refresh_bubble_ssh_keys_monitor.conf | ||||
- name: Install bubble_upgrade_monitor supervisor conf file | |||||
copy: | |||||
src: supervisor_bubble_upgrade_monitor.conf | |||||
dest: /etc/supervisor/conf.d/bubble_upgrade_monitor.conf | |||||
- name: Install packer for sage node | - name: Install packer for sage node | ||||
shell: su - bubble bash -c install_packer.sh | shell: su - bubble bash -c install_packer.sh | ||||
when: install_type == 'sage' | when: install_type == 'sage' |
@@ -28,7 +28,7 @@ public class MockNetworkService extends StandardNetworkService { | |||||
@Autowired private BubbleConfiguration configuration; | @Autowired private BubbleConfiguration configuration; | ||||
@Override protected String lockNetwork(String network) { return "lock"; } | @Override protected String lockNetwork(String network) { return "lock"; } | ||||
@Override protected boolean confirmLock(String network, String lock) { return true; } | |||||
@Override protected boolean confirmNetLock(String network, String lock) { return true; } | |||||
@Override protected void unlockNetwork(String network, String lock) {} | @Override protected void unlockNetwork(String network, String lock) {} | ||||
@Override public BubbleNode newNode(NewNodeNotification nn, NodeLaunchMonitor launchMonitor) { | @Override public BubbleNode newNode(NewNodeNotification nn, NodeLaunchMonitor launchMonitor) { | ||||
@@ -70,9 +70,7 @@ public class MockNetworkService extends StandardNetworkService { | |||||
return true; | return true; | ||||
} | } | ||||
@Override public BubbleNode stopNode(BubbleNode node) { | |||||
return node.setState(BubbleNodeState.stopped); | |||||
} | |||||
@Override public void stopNode(BubbleNode node) { node.setState(BubbleNodeState.stopped); } | |||||
@Override public BubbleNode killNode(BubbleNode node, String message) { | @Override public BubbleNode killNode(BubbleNode node, String message) { | ||||
return node.setState(BubbleNodeState.stopped); | return node.setState(BubbleNodeState.stopped); | ||||
@@ -1 +1 @@ | |||||
Subproject commit 38af7d4ed0f733e35f8a743a65331984c377d758 | |||||
Subproject commit 9dc962290999b5105549ed0d495152288f0d8281 |
@@ -1 +1 @@ | |||||
Subproject commit 009a52edb53315fcb7d90c9feed382970aa9a4b8 | |||||
Subproject commit 549884d63dc1d46f15c33cdcf5d9604deb821992 |
@@ -1 +1 @@ | |||||
Subproject commit 773a75f6cc2659bc330a5b4bdbb61d31affeec10 | |||||
Subproject commit 035690f052f72841bfedc5b25e3798fa22f7b2dd |