podlaunch/main.py

162 lines
6.1 KiB
Python
Raw Normal View History

2020-11-14 17:50:04 +00:00
import json
import os
import pathlib
import sys
import threading
import traceback
from datetime import datetime
2020-12-02 10:15:56 +00:00
from queue import SimpleQueue
from signal import signal, SIGHUP, SIGINT, SIGTERM, setitimer, SIGALRM, ITIMER_REAL, SIGUSR1, SIGUSR2, strsignal
2020-11-14 17:50:04 +00:00
import click
import sh
# noinspection PyUnresolvedReferences
from sh import podman
SERVICES_BASE_PATH = "/docker/services/"
sdnotify = sh.Command("systemd-notify")
class PodKeeper:
def __init__(self, network, replace, remove, identifier):
2020-11-14 17:50:04 +00:00
self.podnet_args = ("--network", network) if network else ()
2020-12-02 10:31:47 +00:00
self.replace = replace
self.remove = remove
2020-11-14 17:50:04 +00:00
identifier_path = pathlib.PurePath(identifier)
if len(identifier_path.parts) != 1:
2020-11-14 23:09:45 +00:00
raise ValueError(f"identifier has path parts: {identifier_path}")
2020-11-14 17:50:04 +00:00
self.podhome = pathlib.Path(SERVICES_BASE_PATH) / identifier_path
if not self.podhome.exists():
raise NotADirectoryError(f"pod home does not exist: {self.podhome}")
self.podname = f"{identifier}_pod"
self.podyaml = f"pod-{identifier}.yaml"
podyaml_complete = (self.podhome / self.podyaml)
if not podyaml_complete.exists():
raise FileNotFoundError(f"pod definition does not exist: {podyaml_complete}")
self.stopping = threading.Event()
self.reloading = threading.Event()
self.checking = threading.Event()
self.waiter = threading.Event()
2020-11-14 23:09:45 +00:00
self.last_check = datetime.utcnow()
2020-12-02 10:15:56 +00:00
self.passing_signal = threading.Event()
self.pass_signal_nums = SimpleQueue()
2020-11-14 17:50:04 +00:00
2020-11-14 17:56:50 +00:00
def destroy(self, signum, stackframe):
2020-11-14 22:29:08 +00:00
print("Destroy signal", signum, file=sys.stderr, flush=True)
2020-11-14 17:50:04 +00:00
self.stopping.set()
self.waiter.set()
2020-11-14 17:56:50 +00:00
def reload(self, signum, stackframe):
2020-11-14 22:29:08 +00:00
print("Reload signal", signum, file=sys.stderr, flush=True)
2020-11-14 17:50:04 +00:00
self.reloading.set()
self.waiter.set()
2020-11-14 17:56:50 +00:00
def check(self, signum, stackframe):
2020-11-14 17:50:04 +00:00
self.checking.set()
self.waiter.set()
2020-12-02 10:15:56 +00:00
def passthrough(self, signum, stackframe):
self.pass_signal_nums.put(item=signum, block=True, timeout=3)
self.passing_signal.set()
self.waiter.set()
2020-11-14 17:50:04 +00:00
def run(self):
os.chdir(self.podhome)
2020-12-02 10:31:47 +00:00
if self.replace and podman.pod.exists(self.podname, _ok_code=[0, 1]).exit_code == 0:
print(f"Replacing existing pod {self.podname}", file=sys.stderr, flush=True)
podman.pod.stop(self.podname)
podman.pod.rm("-f", self.podname)
2020-11-14 23:09:45 +00:00
print(f"Starting pod {self.podname} at {self.last_check}", file=sys.stderr, flush=True)
2020-11-14 17:50:04 +00:00
podman.play.kube(self.podyaml, *self.podnet_args)
2020-11-14 17:59:47 +00:00
try:
if 'NOTIFY_SOCKET' in os.environ:
2020-11-18 23:38:10 +00:00
sdnotify("--ready", f"--pid={os.getpid()}", "--status=Monitoring pod...")
2020-11-14 17:59:47 +00:00
while not self.stopping.is_set():
self.waiter.wait()
self.waiter.clear()
2020-11-14 23:09:45 +00:00
2020-12-02 10:15:56 +00:00
if self.passing_signal.is_set():
self.passing_signal.clear()
while not self.pass_signal_nums.empty():
signum = self.pass_signal_nums.get(block=True, timeout=2)
self.signal_pod(signum)
2020-11-14 17:59:47 +00:00
if self.checking.is_set():
self.checking.clear()
2020-11-14 23:09:45 +00:00
self.check_pod()
2020-11-14 17:50:04 +00:00
2020-11-14 17:59:47 +00:00
if self.reloading.is_set():
self.reloading.clear()
2020-12-02 10:15:56 +00:00
self.signal_pod(SIGHUP)
2020-11-14 17:50:04 +00:00
2020-11-18 22:41:14 +00:00
if 'NOTIFY_SOCKET' in os.environ:
2020-11-18 23:38:10 +00:00
sdnotify("--status=Stopping pod")
2020-11-14 17:59:47 +00:00
finally:
2020-11-14 23:09:45 +00:00
self.stop_pod()
2020-12-02 10:15:56 +00:00
def signal_pod(self, signum):
print(f"Sending signal '{strsignal(signum)}' to pod {self.podname}", file=sys.stderr, flush=True)
2020-11-14 23:09:45 +00:00
try:
2020-12-02 10:15:56 +00:00
podman.pod.kill("--signal", str(signum), self.podname)
2020-11-14 23:09:45 +00:00
except sh.ErrorReturnCode:
2020-12-02 10:15:56 +00:00
print("Error signaling pod", file=sys.stderr, flush=True)
2020-11-14 23:09:45 +00:00
traceback.print_exc()
def check_pod(self):
new_timestamp = datetime.utcnow()
inspect_command = podman.pod.inspect(self.podname)
pod_description = json.loads(inspect_command.stdout)
for container in pod_description["Containers"]:
if container["State"] != "running":
print(f"Container {container['Name']} exited", file=sys.stderr, flush=True)
logs = podman.logs('--since', self.last_check.isoformat(), container['Name'])
print(f"Log since last check:\n{logs}", file=sys.stderr, flush=True)
self.stopping.set()
self.last_check = new_timestamp
2020-11-14 17:50:04 +00:00
2020-11-14 23:09:45 +00:00
def stop_pod(self):
2020-11-14 17:50:04 +00:00
print("Stopping pod", self.podname, file=sys.stderr, flush=True)
try:
podman.pod.stop("-t", "19", self.podname)
successful_stopped = True
except sh.ErrorReturnCode:
print(f"First stop of {self.podname} was not successful!", file=sys.stderr, flush=True)
successful_stopped = False
try:
podman.pod.stop("-t", "5", self.podname)
except sh.ErrorReturnCode:
if not successful_stopped:
print(f"Second stop of {self.podname} was not successful!", file=sys.stderr, flush=True)
if self.remove:
try:
podman.pod.rm(self.podname)
except sh.ErrorReturnCode:
print(f"Removal of {self.podname} was not successful!", file=sys.stderr, flush=True)
2020-11-14 17:50:04 +00:00
@click.command()
@click.option("--network", default="brodge", help="Network for the created pod")
2020-12-30 19:41:52 +00:00
@click.option("--replace/--no-replace", default=True, help="Controls replacement of previously running pod with the same name")
@click.option("--remove/--keep", default=True, help="Controls removal of pod after stopping")
2020-11-14 17:50:04 +00:00
@click.argument("identifier")
def main(network, replace, remove, identifier):
keeper = PodKeeper(network, replace, remove, identifier)
2020-11-14 17:50:04 +00:00
signal(SIGINT, keeper.destroy)
signal(SIGTERM, keeper.destroy)
signal(SIGHUP, keeper.reload)
signal(SIGALRM, keeper.check)
2020-12-02 10:15:56 +00:00
signal(SIGUSR1, keeper.passthrough)
signal(SIGUSR2, keeper.passthrough)
2020-11-14 17:50:04 +00:00
setitimer(ITIMER_REAL, 4.0, 10.0)
keeper.run()
if __name__ == '__main__':
main()