diff --git a/main.py b/main.py index 80b72a3..3e3c490 100644 --- a/main.py +++ b/main.py @@ -13,6 +13,7 @@ import click import sh SERVICES_BASE_PATH = "/infra/services/" +POD_CHECK_TIME = 120.0 shlog = sh.bake(_out=sys.stdout, _err=sys.stderr) sdnotify = sh.Command("systemd-notify") @@ -117,12 +118,18 @@ class PodKeeper: def check_pod(self): new_timestamp = datetime.utcnow() inspect_command = sh.podman.pod.inspect(self.podname, _return_cmd=True) - pod_description = json.loads(inspect_command.stdout) + multiple_descriptions = json.loads(inspect_command.stdout) + if not multiple_descriptions: + print(f"No pod descriptions found for {self.podname}", file=sys.stderr, flush=True) + self.stopping.set() + return + assert len(multiple_descriptions) == 1, f"Single pod description expected for {self.podname}" + pod_description = multiple_descriptions[0] for container in pod_description["Containers"]: if container["State"] != "running": print(f"Container {container['Name']} exited", file=sys.stderr, flush=True) - logs_since = self.last_check - timedelta(seconds=10) - print(f"Log since last check (-10s):\n", file=sys.stderr, flush=True) + logs_since = self.last_check - timedelta(seconds=POD_CHECK_TIME) + print(f"Log since last check (-{POD_CHECK_TIME}s):\n", file=sys.stderr, flush=True) shlog.podman.logs('--since', logs_since.isoformat(), container['Name'], _out=sys.stderr) self.stopping.set() self.last_check = new_timestamp @@ -178,7 +185,7 @@ def main(network, log_driver, log_level, replace, remove, verbose, identifier): signal(SIGALRM, keeper.check) signal(SIGUSR1, keeper.passthrough) signal(SIGUSR2, keeper.passthrough) - setitimer(ITIMER_REAL, 4.0, 120.0) + setitimer(ITIMER_REAL, 3.0, POD_CHECK_TIME) keeper.run()