Compare commits

...

2 commits

Author SHA1 Message Date
Ben 828478916d
Update imagerebuild.py for sh 2.0 2024-05-28 14:58:34 +02:00
Ben 3cd0e7adfb
Support Podman 5 2024-04-03 17:52:13 +02:00
4 changed files with 64 additions and 44 deletions

View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.11" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.11" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.12" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View file

@ -8,11 +8,17 @@ from typing import Set, Callable
import click
import sh
from sh import podman
from sh import systemctl
SERVICES_BASE_PATH = "/infra/services/"
progressbar = click.progressbar
def hidden_progressbar(*args, **kwargs):
bar = click.progressbar(*args, **kwargs)
bar.is_hidden = True
return bar
def resolve_image_units():
services_path = pathlib.Path(SERVICES_BASE_PATH)
@ -20,33 +26,29 @@ def resolve_image_units():
logging.info(f"Found {len(services_set)} services: {str(services_set)}")
systemctl("daemon-reload")
sh.systemctl("daemon-reload")
def remove_masked_unit(
_item_set: Set[str],
item: str,
item_to_unit: Callable[[str], str] = lambda i: i,
):
load_state = systemctl.show(
load_state_output = sh.systemctl.show(
"--property=LoadState", "--value", item_to_unit(item)
)
load_state = load_state.stdout.strip().decode(
encoding="utf-8", errors="replace"
)
load_state = load_state_output.strip()
logging.debug(f"{item} load state: {repr(load_state)}")
if load_state == "masked":
logging.info(f"Removed masked entry: {item}")
_item_set.remove(item)
with click.progressbar(list(services_set), label="Checking service units..", show_pos=True) as bar:
with progressbar(list(services_set), label="Checking service units..", show_pos=True) as bar:
for service in bar:
remove_masked_unit(services_set, service, lambda srv: f"pod@{srv}.service")
def add_wants_to_image_units(_image_units: Set[str], unit: str):
wants = systemctl.show("--property=Wants", "--value", unit)
wants_list = (
wants.stdout.strip().decode(encoding="utf-8", errors="replace").split(" ")
)
wants_output = sh.systemctl.show("--property=Wants", "--value", unit)
wants_list = wants_output.strip().split(" ")
logging.debug(f"{unit} wants: {repr(wants_list)}")
for next_unit in wants_list:
if next_unit.startswith("image@") and next_unit.endswith(".service"):
@ -55,7 +57,7 @@ def resolve_image_units():
image_units: Set[str] = set()
with click.progressbar(
with progressbar(
length=len(services_set) * 2, label="Collecting container image services.."
) as bar:
for service in services_set:
@ -77,7 +79,7 @@ def resolve_image_units():
new_image_units
) # add new image units to all image units
with click.progressbar(
with progressbar(
list(image_units), label="Checking container image units..", show_pos=True
) as bar:
for image_unit in bar:
@ -88,39 +90,47 @@ def resolve_image_units():
@click.command()
@click.option("--verbose", is_flag=True, default=False, help="Enable INFO logging")
def main(verbose):
if verbose:
logging.root.setLevel(logging.INFO)
@click.option("-v", "--verbose", count=True, help="Enable INFO logging, use twice for DEBUG")
def main(verbose: int):
if verbose > 0:
if verbose > 2:
logging.root.setLevel(logging.DEBUG)
elif verbose == 2:
logging.root.setLevel(logging.DEBUG)
shlogger = logging.getLogger("sh")
shlogger.setLevel(logging.INFO)
else:
logging.root.setLevel(logging.INFO)
global progressbar
progressbar = hidden_progressbar
image_units = resolve_image_units()
image_tags: Set[str] = set()
with click.progressbar(image_units, label="Collecting container image tags..") as bar:
with progressbar(image_units, label="Collecting container image tags..") as bar:
for image_unit in bar:
environment = systemctl.show(
environment_output = sh.systemctl.show(
"--property=Environment",
"--value",
image_unit,
)
environment_list = (
environment.stdout.strip()
.decode(encoding="utf-8", errors="replace")
.split(" ")
)
environment_list = environment_output.strip().split(" ")
logging.debug(f"{image_unit} environment: {repr(environment_list)}")
for envvar in environment_list:
search_str = "IMAGE_TAG="
if envvar.startswith(search_str):
image_tags.add(envvar[len(search_str) :])
keylen = len(search_str)
image_tags.add(envvar[keylen:])
started_processes = []
with click.progressbar(
with progressbar(
length=len(image_tags), label="Untagging container images..", show_pos=True
) as bar:
for image_tag in image_tags:
process = podman.untag(
process = sh.podman.untag(
image_tag,
_return_cmd=True,
_bg=True,
_err_to_out=True,
_done=lambda cmd, success, exit_code: bar.update(1),
@ -131,7 +141,7 @@ def main(verbose):
for p in started_processes:
try:
p.wait()
except sh.ErrorReturnCode as error:
except sh.ErrorReturnCode:
# ignore missing image tags
if "image not known".encode() in p.stdout:
pass
@ -139,18 +149,16 @@ def main(verbose):
raise
started_processes = []
with click.progressbar(
with progressbar(
length=len(image_units), label="Building images..", show_pos=True
) as bar:
semaphore = multiprocessing.Semaphore(8)
for image_unit in image_units:
try:
systemctl("reset-failed", image_unit, _bg=False, _err_to_out=True)
sh.systemctl("reset-failed", image_unit, _bg=False, _err_to_out=True)
except sh.ErrorReturnCode as error:
if f"Unit {image_unit} not loaded".encode() in error.stdout:
logging.info(
f"Not resetting failed state for {image_unit}, unit not loaded"
)
logging.info(f"Not resetting failed state for {image_unit}, unit not loaded")
else:
raise
@ -162,7 +170,9 @@ def main(verbose):
logging.warning(f"{cmd.cmd}{tuple(cmd.call_args)} completed with exit code {exit_code}")
semaphore.release()
process = systemctl.restart(image_unit, _bg=True, _done=restart_done)
process = sh.systemctl.restart(
image_unit, _return_cmd=True, _bg=True, _done=restart_done
)
started_processes.append(process)
# join processes
[p.wait() for p in started_processes]

24
main.py
View file

@ -1,11 +1,12 @@
import datetime
import json
import logging
import os
import pathlib
import random
import sys
import threading
import traceback
from datetime import datetime, timedelta
from queue import SimpleQueue
from signal import signal, SIGHUP, SIGINT, SIGTERM, setitimer, SIGALRM, ITIMER_REAL, SIGUSR1, SIGUSR2, strsignal
@ -13,6 +14,8 @@ import click
import sh
SERVICES_BASE_PATH = "/infra/services/"
POD_CHECK_TIME = 120.0
RAND_OFFSET_MAX_SECONDS = 10.0
shlog = sh.bake(_out=sys.stdout, _err=sys.stderr)
sdnotify = sh.Command("systemd-notify")
@ -41,7 +44,7 @@ class PodKeeper:
self.reloading = threading.Event()
self.checking = threading.Event()
self.waiter = threading.Event()
self.last_check = datetime.utcnow()
self.last_check = datetime.datetime.now(datetime.UTC)
self.passing_signal = threading.Event()
self.pass_signal_nums = SimpleQueue()
@ -115,14 +118,20 @@ class PodKeeper:
traceback.print_exc()
def check_pod(self):
new_timestamp = datetime.utcnow()
new_timestamp = datetime.datetime.now(datetime.UTC)
inspect_command = sh.podman.pod.inspect(self.podname, _return_cmd=True)
pod_description = json.loads(inspect_command.stdout)
multiple_descriptions = json.loads(inspect_command.stdout)
if not multiple_descriptions:
print(f"No pod descriptions found for {self.podname}", file=sys.stderr, flush=True)
self.stopping.set()
return
assert len(multiple_descriptions) == 1, f"Single pod description expected for {self.podname}"
pod_description = multiple_descriptions[0]
for container in pod_description["Containers"]:
if container["State"] != "running":
print(f"Container {container['Name']} exited", file=sys.stderr, flush=True)
logs_since = self.last_check - timedelta(seconds=10)
print(f"Log since last check (-10s):\n", file=sys.stderr, flush=True)
logs_since = self.last_check - datetime.timedelta(seconds=POD_CHECK_TIME)
print(f"Log since last check (-{POD_CHECK_TIME}s):\n", file=sys.stderr, flush=True)
shlog.podman.logs('--since', logs_since.isoformat(), container['Name'], _out=sys.stderr)
self.stopping.set()
self.last_check = new_timestamp
@ -178,7 +187,8 @@ def main(network, log_driver, log_level, replace, remove, verbose, identifier):
signal(SIGALRM, keeper.check)
signal(SIGUSR1, keeper.passthrough)
signal(SIGUSR2, keeper.passthrough)
setitimer(ITIMER_REAL, 4.0, 120.0)
random_offset = random.random() * RAND_OFFSET_MAX_SECONDS
setitimer(ITIMER_REAL, 1.0 + random_offset, POD_CHECK_TIME + random_offset)
keeper.run()