2020-11-14 17:50:04 +00:00
|
|
|
import json
|
2021-09-15 21:22:23 +00:00
|
|
|
import logging
|
2020-11-14 17:50:04 +00:00
|
|
|
import os
|
|
|
|
import pathlib
|
|
|
|
import sys
|
|
|
|
import threading
|
|
|
|
import traceback
|
2020-12-30 19:49:45 +00:00
|
|
|
from datetime import datetime, timedelta
|
2020-12-02 10:15:56 +00:00
|
|
|
from queue import SimpleQueue
|
|
|
|
from signal import signal, SIGHUP, SIGINT, SIGTERM, setitimer, SIGALRM, ITIMER_REAL, SIGUSR1, SIGUSR2, strsignal
|
2020-11-14 17:50:04 +00:00
|
|
|
|
|
|
|
import click
|
2021-09-15 21:22:23 +00:00
|
|
|
import sh
|
2020-11-14 17:50:04 +00:00
|
|
|
|
2023-09-22 22:36:58 +00:00
|
|
|
SERVICES_BASE_PATH = "/infra/services/"
|
2020-11-14 17:50:04 +00:00
|
|
|
|
2024-03-23 15:37:47 +00:00
|
|
|
shlog = sh.bake(_out=sys.stdout, _err=sys.stderr)
|
2020-11-14 17:50:04 +00:00
|
|
|
sdnotify = sh.Command("systemd-notify")
|
|
|
|
|
|
|
|
|
|
|
|
class PodKeeper:
|
2021-09-15 20:35:30 +00:00
|
|
|
def __init__(self, network, log_driver, log_level, replace, remove, identifier):
|
2021-06-15 08:23:20 +00:00
|
|
|
self.podnet_args = ()
|
|
|
|
self.podnet_args += ("--network", network) if network else ()
|
|
|
|
self.podnet_args += ("--log-driver", log_driver) if log_driver else ()
|
2021-09-15 20:35:30 +00:00
|
|
|
self.podnet_args += ("--log-level", log_level) if log_level else ()
|
2020-12-02 10:31:47 +00:00
|
|
|
self.replace = replace
|
2020-12-30 19:15:02 +00:00
|
|
|
self.remove = remove
|
2020-11-14 17:50:04 +00:00
|
|
|
identifier_path = pathlib.PurePath(identifier)
|
|
|
|
if len(identifier_path.parts) != 1:
|
2020-11-14 23:09:45 +00:00
|
|
|
raise ValueError(f"identifier has path parts: {identifier_path}")
|
2020-11-14 17:50:04 +00:00
|
|
|
self.podhome = pathlib.Path(SERVICES_BASE_PATH) / identifier_path
|
|
|
|
if not self.podhome.exists():
|
|
|
|
raise NotADirectoryError(f"pod home does not exist: {self.podhome}")
|
|
|
|
self.podname = f"{identifier}_pod"
|
|
|
|
self.podyaml = f"pod-{identifier}.yaml"
|
|
|
|
podyaml_complete = (self.podhome / self.podyaml)
|
|
|
|
if not podyaml_complete.exists():
|
|
|
|
raise FileNotFoundError(f"pod definition does not exist: {podyaml_complete}")
|
|
|
|
self.stopping = threading.Event()
|
|
|
|
self.reloading = threading.Event()
|
|
|
|
self.checking = threading.Event()
|
|
|
|
self.waiter = threading.Event()
|
2020-11-14 23:09:45 +00:00
|
|
|
self.last_check = datetime.utcnow()
|
2020-12-02 10:15:56 +00:00
|
|
|
self.passing_signal = threading.Event()
|
|
|
|
self.pass_signal_nums = SimpleQueue()
|
2020-11-14 17:50:04 +00:00
|
|
|
|
2020-11-14 17:56:50 +00:00
|
|
|
def destroy(self, signum, stackframe):
|
2020-11-14 22:29:08 +00:00
|
|
|
print("Destroy signal", signum, file=sys.stderr, flush=True)
|
2020-11-14 17:50:04 +00:00
|
|
|
self.stopping.set()
|
|
|
|
self.waiter.set()
|
|
|
|
|
2020-11-14 17:56:50 +00:00
|
|
|
def reload(self, signum, stackframe):
|
2020-11-14 22:29:08 +00:00
|
|
|
print("Reload signal", signum, file=sys.stderr, flush=True)
|
2020-11-14 17:50:04 +00:00
|
|
|
self.reloading.set()
|
|
|
|
self.waiter.set()
|
|
|
|
|
2020-11-14 17:56:50 +00:00
|
|
|
def check(self, signum, stackframe):
|
2020-11-14 17:50:04 +00:00
|
|
|
self.checking.set()
|
|
|
|
self.waiter.set()
|
|
|
|
|
2020-12-02 10:15:56 +00:00
|
|
|
def passthrough(self, signum, stackframe):
|
|
|
|
self.pass_signal_nums.put(item=signum, block=True, timeout=3)
|
|
|
|
self.passing_signal.set()
|
|
|
|
self.waiter.set()
|
|
|
|
|
2020-11-14 17:50:04 +00:00
|
|
|
def run(self):
|
|
|
|
os.chdir(self.podhome)
|
2024-03-23 15:37:47 +00:00
|
|
|
if self.replace and sh.podman.pod.exists(self.podname, _ok_code=[0, 1], _return_cmd=True).exit_code == 0:
|
2020-12-02 10:17:33 +00:00
|
|
|
print(f"Replacing existing pod {self.podname}", file=sys.stderr, flush=True)
|
2021-09-15 21:22:23 +00:00
|
|
|
shlog.podman.pod.stop(self.podname)
|
|
|
|
shlog.podman.pod.rm("-f", self.podname)
|
2020-11-18 23:45:50 +00:00
|
|
|
|
2020-11-14 23:09:45 +00:00
|
|
|
print(f"Starting pod {self.podname} at {self.last_check}", file=sys.stderr, flush=True)
|
2021-09-15 21:22:23 +00:00
|
|
|
shlog.podman.play.kube(self.podyaml, *self.podnet_args)
|
2020-11-14 17:59:47 +00:00
|
|
|
try:
|
2021-09-16 22:07:25 +00:00
|
|
|
shlogger = logging.getLogger("sh.command")
|
|
|
|
oldlevel = shlogger.level
|
|
|
|
shlogger.setLevel(logging.ERROR)
|
|
|
|
|
2020-11-14 22:21:41 +00:00
|
|
|
if 'NOTIFY_SOCKET' in os.environ:
|
2020-11-18 23:38:10 +00:00
|
|
|
sdnotify("--ready", f"--pid={os.getpid()}", "--status=Monitoring pod...")
|
2020-11-14 22:21:41 +00:00
|
|
|
|
2020-11-14 17:59:47 +00:00
|
|
|
while not self.stopping.is_set():
|
|
|
|
self.waiter.wait()
|
|
|
|
self.waiter.clear()
|
2020-11-14 23:09:45 +00:00
|
|
|
|
2020-12-02 10:15:56 +00:00
|
|
|
if self.passing_signal.is_set():
|
|
|
|
self.passing_signal.clear()
|
|
|
|
while not self.pass_signal_nums.empty():
|
|
|
|
signum = self.pass_signal_nums.get(block=True, timeout=2)
|
|
|
|
self.signal_pod(signum)
|
|
|
|
|
2020-11-14 17:59:47 +00:00
|
|
|
if self.checking.is_set():
|
|
|
|
self.checking.clear()
|
2020-11-14 23:09:45 +00:00
|
|
|
self.check_pod()
|
2020-11-14 17:50:04 +00:00
|
|
|
|
2020-11-14 17:59:47 +00:00
|
|
|
if self.reloading.is_set():
|
|
|
|
self.reloading.clear()
|
2020-12-02 10:15:56 +00:00
|
|
|
self.signal_pod(SIGHUP)
|
2020-11-14 17:50:04 +00:00
|
|
|
|
2020-11-18 22:41:14 +00:00
|
|
|
if 'NOTIFY_SOCKET' in os.environ:
|
2020-11-18 23:38:10 +00:00
|
|
|
sdnotify("--status=Stopping pod")
|
2021-09-16 22:07:25 +00:00
|
|
|
|
|
|
|
logging.getLogger("sh.command").setLevel(oldlevel)
|
2020-11-14 17:59:47 +00:00
|
|
|
finally:
|
2020-11-14 23:09:45 +00:00
|
|
|
self.stop_pod()
|
|
|
|
|
2020-12-02 10:15:56 +00:00
|
|
|
def signal_pod(self, signum):
|
|
|
|
print(f"Sending signal '{strsignal(signum)}' to pod {self.podname}", file=sys.stderr, flush=True)
|
2020-11-14 23:09:45 +00:00
|
|
|
try:
|
2021-09-15 21:22:23 +00:00
|
|
|
shlog.podman.pod.kill("--signal", str(signum), self.podname)
|
2020-11-14 23:09:45 +00:00
|
|
|
except sh.ErrorReturnCode:
|
2020-12-02 10:15:56 +00:00
|
|
|
print("Error signaling pod", file=sys.stderr, flush=True)
|
2020-11-14 23:09:45 +00:00
|
|
|
traceback.print_exc()
|
|
|
|
|
|
|
|
def check_pod(self):
|
|
|
|
new_timestamp = datetime.utcnow()
|
2024-03-23 15:37:47 +00:00
|
|
|
inspect_command = sh.podman.pod.inspect(self.podname, _return_cmd=True)
|
2020-11-14 23:09:45 +00:00
|
|
|
pod_description = json.loads(inspect_command.stdout)
|
|
|
|
for container in pod_description["Containers"]:
|
|
|
|
if container["State"] != "running":
|
|
|
|
print(f"Container {container['Name']} exited", file=sys.stderr, flush=True)
|
2020-12-30 19:49:45 +00:00
|
|
|
logs_since = self.last_check - timedelta(seconds=10)
|
2021-09-15 21:22:23 +00:00
|
|
|
print(f"Log since last check (-10s):\n", file=sys.stderr, flush=True)
|
|
|
|
shlog.podman.logs('--since', logs_since.isoformat(), container['Name'], _out=sys.stderr)
|
2020-11-14 23:09:45 +00:00
|
|
|
self.stopping.set()
|
|
|
|
self.last_check = new_timestamp
|
2020-11-14 17:50:04 +00:00
|
|
|
|
2020-11-14 23:09:45 +00:00
|
|
|
def stop_pod(self):
|
2020-11-14 17:50:04 +00:00
|
|
|
print("Stopping pod", self.podname, file=sys.stderr, flush=True)
|
|
|
|
try:
|
2021-09-15 21:22:23 +00:00
|
|
|
shlog.podman.pod.stop("-t", "19", self.podname)
|
2020-11-14 17:50:04 +00:00
|
|
|
successful_stopped = True
|
|
|
|
except sh.ErrorReturnCode:
|
|
|
|
print(f"First stop of {self.podname} was not successful!", file=sys.stderr, flush=True)
|
|
|
|
successful_stopped = False
|
|
|
|
try:
|
2021-09-15 21:22:23 +00:00
|
|
|
shlog.podman.pod.stop("-t", "5", self.podname)
|
2020-11-14 17:50:04 +00:00
|
|
|
except sh.ErrorReturnCode:
|
|
|
|
if not successful_stopped:
|
|
|
|
print(f"Second stop of {self.podname} was not successful!", file=sys.stderr, flush=True)
|
2020-12-30 19:15:02 +00:00
|
|
|
|
|
|
|
if self.remove:
|
|
|
|
try:
|
2021-09-15 21:22:23 +00:00
|
|
|
shlog.podman.pod.rm(self.podname)
|
2020-12-30 19:15:02 +00:00
|
|
|
except sh.ErrorReturnCode:
|
|
|
|
print(f"Removal of {self.podname} was not successful!", file=sys.stderr, flush=True)
|
2020-11-14 17:50:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
@click.command()
|
|
|
|
@click.option("--network", default="brodge", help="Network for the created pod")
|
2021-06-15 08:26:11 +00:00
|
|
|
@click.option("--log-driver", default="journald", help="Logging driver for the created pod")
|
2021-09-15 20:35:30 +00:00
|
|
|
@click.option("--log-level", default="", help="Controls log-level on podman call")
|
2021-06-15 08:23:20 +00:00
|
|
|
@click.option("--replace/--no-replace", default=True, help="Controls replacement of previously running pod with the "
|
|
|
|
"same name")
|
2020-12-30 19:41:52 +00:00
|
|
|
@click.option("--remove/--keep", default=True, help="Controls removal of pod after stopping")
|
2023-09-22 22:36:58 +00:00
|
|
|
@click.option("--verbose", is_flag=True, default=False, help="Enable DEBUG logging")
|
2020-11-14 17:50:04 +00:00
|
|
|
@click.argument("identifier")
|
2023-09-22 22:36:58 +00:00
|
|
|
def main(network, log_driver, log_level, replace, remove, verbose, identifier):
|
|
|
|
if verbose:
|
|
|
|
logging.root.setLevel(logging.DEBUG)
|
|
|
|
else:
|
|
|
|
logging.root.setLevel(logging.INFO)
|
2021-09-15 21:22:23 +00:00
|
|
|
|
2021-09-15 20:35:30 +00:00
|
|
|
keeper = PodKeeper(
|
|
|
|
network=network,
|
|
|
|
log_driver=log_driver,
|
|
|
|
log_level=log_level,
|
|
|
|
replace=replace,
|
|
|
|
remove=remove,
|
|
|
|
identifier=identifier
|
|
|
|
)
|
2020-11-14 17:50:04 +00:00
|
|
|
|
|
|
|
signal(SIGINT, keeper.destroy)
|
|
|
|
signal(SIGTERM, keeper.destroy)
|
|
|
|
signal(SIGHUP, keeper.reload)
|
|
|
|
signal(SIGALRM, keeper.check)
|
2020-12-02 10:15:56 +00:00
|
|
|
signal(SIGUSR1, keeper.passthrough)
|
|
|
|
signal(SIGUSR2, keeper.passthrough)
|
2024-03-23 15:37:47 +00:00
|
|
|
setitimer(ITIMER_REAL, 4.0, 120.0)
|
2020-11-14 17:50:04 +00:00
|
|
|
|
|
|
|
keeper.run()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2023-09-22 22:36:58 +00:00
|
|
|
logging.basicConfig()
|
2020-11-14 17:50:04 +00:00
|
|
|
main()
|