Show recipes from database, if available

Add refetch option.
Strip html text before inserting into columns.
This commit is contained in:
Ben 2024-01-29 03:23:36 +01:00
parent 9b1cae051a
commit 82e7f26c89
Signed by: ben
GPG Key ID: 0F54A7ED232D3319
1 changed files with 195 additions and 124 deletions

View File

@ -10,9 +10,11 @@ from selenium.webdriver import Firefox
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options from selenium.webdriver.firefox.options import Options
from selenium.webdriver.remote.webdriver import WebDriver from selenium.webdriver.remote.webdriver import WebDriver
from sqlalchemy import String, create_engine, select, ForeignKey, Select, Table, Column from sqlalchemy import String, create_engine, select, ForeignKey, Select, Table, Column, delete
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship, Session from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship, Session
recipe_info_timeout = datetime.timedelta(days=30)
class Base(DeclarativeBase): class Base(DeclarativeBase):
pass pass
@ -31,6 +33,14 @@ class Resource(Base):
def by_label(cls, search: str) -> Select[tuple["Resource"]]: def by_label(cls, search: str) -> Select[tuple["Resource"]]:
return select(cls).where(cls.label.ilike(search)) return select(cls).where(cls.label.ilike(search))
def __repr__(self):
return (
f"Resource(id={self.id}, "
f"label={self.label}, "
f"wiki_url={self.wiki_url}, "
f"recipes_populated_at={self.recipes_populated_at})"
)
class Factory(Base): class Factory(Base):
__tablename__ = "factories" __tablename__ = "factories"
@ -43,6 +53,9 @@ class Factory(Base):
def by_label(cls, search: str) -> Select[tuple["Factory"]]: def by_label(cls, search: str) -> Select[tuple["Factory"]]:
return select(cls).where(cls.label.ilike(search)) return select(cls).where(cls.label.ilike(search))
def __repr__(self):
return f"Factory(id={self.id}, label={self.label}, wiki_url={self.wiki_url})"
ingredients_table = Table( ingredients_table = Table(
"recipe_ingredients", "recipe_ingredients",
@ -71,6 +84,9 @@ class ResourceFlow(Base):
amount: Mapped[str] amount: Mapped[str]
time: Mapped[str] time: Mapped[str]
def __repr__(self):
return f"ResourceFlow(id={self.id}, resource_id={self.resource_id}, amount={self.amount}, time={self.time})"
class Recipe(Base): class Recipe(Base):
__tablename__ = "recipes" __tablename__ = "recipes"
@ -83,101 +99,100 @@ class Recipe(Base):
) )
results: Mapped[list["ResourceFlow"]] = relationship(secondary=results_table, back_populates="result_of") results: Mapped[list["ResourceFlow"]] = relationship(secondary=results_table, back_populates="result_of")
def __repr__(self):
return f"Recipe(id={self.id}, factory={self.factory}, ingredients={self.ingredients}, results={self.results})"
def normalize_url(browser: WebDriver, href: str) -> str: def normalize_url(browser: WebDriver, href: str) -> str:
return urljoin(base=browser.current_url, url=href) return urljoin(base=browser.current_url, url=href)
def populate_recipes(browser: WebDriver, engine: sqlalchemy.Engine, input_resource_label: str): def populate_recipes(browser: WebDriver, engine: sqlalchemy.Engine, input_resource_label: str) -> int:
browser.find_element(By.CSS_SELECTOR, "button#\\3Ar1\\3A-tab-0").click() browser.find_element(By.CSS_SELECTOR, "button[id$='tab-0']").click()
recipes_html_elems = browser.find_elements(By.CSS_SELECTOR, "div[id$='tabpanel-0'] > div > div")
with Session(engine, autoflush=False) as session:
for recipe_idx in range(len(recipes_html_elems)):
recipe_html_elem = recipes_html_elems[recipe_idx]
factory_html_elem = recipe_html_elem.find_element(By.CSS_SELECTOR, ".flex-col > span > a")
factory_label = factory_html_elem.text.strip()
factory_url = urljoin(base=browser.current_url, url=factory_html_elem.get_attribute("href"))
print("recipe", recipe_idx, "produced in:", factory_label, factory_url)
recipes_html_elems = browser.find_elements(By.CSS_SELECTOR, "#\\3Ar1\\3A-tabpanel-0 > div > div") def extract_resource_flow(html_elem):
for recipe_idx in range(len(recipes_html_elems)): resource_img = html_elem.find_element(By.TAG_NAME, "img")
recipe_html_elem = recipes_html_elems[recipe_idx] resource_label = resource_img.get_attribute("alt").strip()
factory_html_elem = recipe_html_elem.find_element(By.CSS_SELECTOR, ".flex-col > span > a") wiki_url = normalize_url(
factory_label = factory_html_elem.text browser=browser,
factory_url = urljoin(base=browser.current_url, url=factory_html_elem.get_attribute("href")) href=html_elem.find_element(By.TAG_NAME, "a").get_attribute("href"),
print("recipe", recipe_idx, "produced in:", factory_label, factory_url) )
resource = Resource(label=resource_label, wiki_url=wiki_url)
amount = html_elem.find_element(By.CSS_SELECTOR, ".text-xs:nth-child(2)").text.strip()
time = html_elem.find_element(By.CSS_SELECTOR, ".text-xs:nth-child(3)").text.strip()
return ResourceFlow(resource=resource, amount=amount, time=time)
def extract_resource_flow(html_elem): ingredient_html_elems = recipe_html_elem.find_elements(
resource_img = html_elem.find_element(By.TAG_NAME, "img") By.CSS_SELECTOR, f".flex-row > div:nth-child(1) > div:has(> a)"
resource_label = resource_img.get_attribute("alt")
wiki_url = normalize_url(
browser=browser,
href=html_elem.find_element(By.TAG_NAME, "a").get_attribute("href"),
) )
kwargs = {} ingredients: list[ResourceFlow] = []
if resource_label == input_resource_label: for ingredient_idx in range(len(ingredient_html_elems)):
kwargs['recipes_populated_at'] = datetime.datetime.utcnow() resource_flow = extract_resource_flow(ingredient_html_elems[ingredient_idx])
resource = Resource(label=resource_label, wiki_url=wiki_url, **kwargs) ingredients.append(resource_flow)
amount = html_elem.find_element(By.CSS_SELECTOR, ".text-xs:nth-child(2)").text print(
time = html_elem.find_element(By.CSS_SELECTOR, ".text-xs:nth-child(3)").text "recipe",
return ResourceFlow(resource=resource, amount=amount, time=time) recipe_idx,
"ingredient",
ingredient_idx,
"name:",
resource_flow.resource.label,
)
print(
"recipe",
recipe_idx,
"ingredient",
ingredient_idx,
"count:",
resource_flow.amount,
)
print(
"recipe",
recipe_idx,
"ingredient",
ingredient_idx,
"time:",
resource_flow.time,
)
result_html_elems = recipe_html_elem.find_elements(
By.CSS_SELECTOR, f".flex-row > div:nth-child(3) > div:has(> a)"
)
results: list[ResourceFlow] = []
for result_idx in range(len(result_html_elems)):
resource_flow = extract_resource_flow(result_html_elems[result_idx])
results.append(resource_flow)
print(
"recipe",
recipe_idx,
"result",
result_idx,
"name:",
resource_flow.resource.label,
)
print(
"recipe",
recipe_idx,
"result",
result_idx,
"count:",
resource_flow.amount,
)
print(
"recipe",
recipe_idx,
"result",
result_idx,
"time:",
resource_flow.time,
)
ingredient_html_elems = recipe_html_elem.find_elements(
By.CSS_SELECTOR, f".flex-row > div:nth-child(1) > div:has(> a)"
)
ingredients: list[ResourceFlow] = []
for ingredient_idx in range(len(ingredient_html_elems)):
resource_flow = extract_resource_flow(ingredient_html_elems[ingredient_idx])
ingredients.append(resource_flow)
print(
"recipe",
recipe_idx,
"ingredient",
ingredient_idx,
"name:",
resource_flow.resource.label,
)
print(
"recipe",
recipe_idx,
"ingredient",
ingredient_idx,
"count:",
resource_flow.amount,
)
print(
"recipe",
recipe_idx,
"ingredient",
ingredient_idx,
"time:",
resource_flow.time,
)
result_html_elems = recipe_html_elem.find_elements(
By.CSS_SELECTOR, f".flex-row > div:nth-child(3) > div:has(> a)"
)
results: list[ResourceFlow] = []
for result_idx in range(len(result_html_elems)):
resource_flow = extract_resource_flow(result_html_elems[result_idx])
results.append(resource_flow)
print(
"recipe",
recipe_idx,
"result",
result_idx,
"name:",
resource_flow.resource.label,
)
print(
"recipe",
recipe_idx,
"result",
result_idx,
"count:",
resource_flow.amount,
)
print(
"recipe",
recipe_idx,
"result",
result_idx,
"time:",
resource_flow.time,
)
with Session(engine, autoflush=False) as session:
factory = session.scalars(Factory.by_label(factory_label)).one_or_none() factory = session.scalars(Factory.by_label(factory_label)).one_or_none()
if not factory: if not factory:
factory = Factory(label=factory_label, wiki_url=factory_url) factory = Factory(label=factory_label, wiki_url=factory_url)
@ -193,62 +208,118 @@ def populate_recipes(browser: WebDriver, engine: sqlalchemy.Engine, input_resour
session.add(recipe) session.add(recipe)
session.commit() session.commit()
updated_resource = session.scalars(Resource.by_label(input_resource_label)).one()
updated_resource.recipes_populated_at = datetime.datetime.utcnow()
res_id = updated_resource.id
session.commit()
return res_id
@click.command() @click.command()
@click.option("--result", is_flag=True) @click.option("--result", is_flag=True)
@click.option("--debug", is_flag=True) @click.option("--debug", is_flag=True)
@click.option("--refetch", is_flag=True)
@click.argument("search") @click.argument("search")
def main(result: bool, debug: bool, search: str): def main(result: bool, debug: bool, refetch: bool, search: str):
engine = create_engine("sqlite:///file.db", echo=debug) engine = create_engine("sqlite:///file.db", echo=debug)
Base.metadata.create_all(bind=engine) Base.metadata.create_all(bind=engine)
if result and search: if result and search:
wiki_search = True
resource_label = search
with Session(engine) as session: with Session(engine) as session:
for obj in session.scalars(Resource.by_label(search)): resources = session.scalars(Resource.by_label(resource_label)).all()
print(obj) if len(resources) == 0:
print("Could not find existing resources matching the search string.. starting wiki search")
else:
for idx in range(1, len(resources) + 1):
print(f"{idx}: {resources[idx - 1].label}")
user_choice = click.prompt(
"Chose a resource to continue or 0 to continue with a wiki search", default=1
)
if user_choice != 0:
res_id = resources[user_choice - 1].id
resource_label = resources[user_choice - 1].label
wiki_search = False
session.commit()
firefox_options = Options() firefox_options = Options()
firefox_options.add_argument("--width=1600")
firefox_options.add_argument("--height=1015")
if not debug: if not debug:
firefox_options.add_argument("--headless") firefox_options.add_argument("--headless")
browser = Firefox(options=firefox_options) browser = Firefox(options=firefox_options)
browser.implicitly_wait(5)
try: try:
browser.get("https://wiki.kyrium.space/") if wiki_search:
browser.set_window_size(1600, 1015) browser.get("https://wiki.kyrium.space/")
search_bar = browser.find_element(By.CSS_SELECTOR, "nav input[placeholder='Search for an item...']") search_bar = browser.find_element(By.CSS_SELECTOR, "nav input[placeholder='Search for an item...']")
search_bar.click() search_bar.click()
search_bar.send_keys(search) search_bar.send_keys(search)
search_button = browser.find_element(By.CSS_SELECTOR, "nav button[type='submit']") search_button = browser.find_element(By.CSS_SELECTOR, "nav button[type='submit']")
search_button.click() search_button.click()
browser.implicitly_wait(5) choices = browser.find_elements(By.CSS_SELECTOR, "body > div > .container:nth-child(1) a.items-center")
choices = browser.find_elements(By.CSS_SELECTOR, "body > div > .container:nth-child(1) a.items-center") if not choices:
if not choices: print("No wiki entries found for this result")
print("No wiki entries found for this result") return
return elif len(choices) > 1:
elif len(choices) > 1: default_choice = 1
default_choice = 1 choice_names: list[str] = []
choice_names: list[str] = [] for choice_idx in range(1, len(choices) + 1):
for choice_idx in range(1, len(choices) + 1): recipe_choice = choices[choice_idx - 1]
recipe_choice = choices[choice_idx - 1] name = recipe_choice.find_element(By.TAG_NAME, "img").get_attribute("alt")
name = recipe_choice.find_element(By.TAG_NAME, "img").get_attribute("alt") choice_names.append(name)
choice_names.append(name) if name.casefold() == search.casefold():
if name.casefold() == search.casefold(): default_choice = choice_idx
default_choice = choice_idx print(f"{choice_idx}: {name}")
print(f"{choice_idx}: {name}") user_choice = click.prompt("Chose a recipe to continue…", default=default_choice)
user_choice = click.prompt("Chose a recipe to continue…", default=default_choice) if not user_choice:
if not user_choice: user_choice = default_choice
user_choice = default_choice else:
user_choice = int(user_choice)
link_html_elem = choices[user_choice - 1]
else: else:
user_choice = int(user_choice) link_html_elem = choices[0]
link_html_elem = choices[user_choice - 1] resource_label = link_html_elem.find_element(By.TAG_NAME, "img").get_attribute("alt")
# FIXME: check if resource_label is in database
if debug:
print("resource_label:", resource_label)
link_html_elem.click()
res_id = populate_recipes(browser=browser, engine=engine, input_resource_label=resource_label)
else: else:
link_html_elem = choices[0] with Session(engine) as session:
input_resource = session.get(Resource, res_id)
input_resource_url = input_resource.wiki_url
resource_label = input_resource.label
refetch = (
refetch
or input_resource.recipes_populated_at is None
or datetime.datetime.utcnow() - input_resource.recipes_populated_at > recipe_info_timeout
)
if refetch:
print("Deleting recipes for", input_resource)
for flow in session.scalars(select(ResourceFlow).where(ResourceFlow.resource_id == res_id)):
if flow.result_of:
for flow2 in flow.result_of.ingredients:
session.delete(flow2)
for flow2 in flow.result_of.results:
session.delete(flow2)
session.delete(flow.result_of)
session.commit()
if refetch:
print("Refetching recipes for", resource_label)
browser.get(input_resource_url)
res_id = populate_recipes(browser=browser, engine=engine, input_resource_label=resource_label)
resource_label = link_html_elem.find_element(By.TAG_NAME, "img").get_attribute("alt") with Session(engine) as session:
# FIXME: check if resource_label is in database stmt = select(Recipe).join(Recipe.results).filter(ResourceFlow.resource_id == res_id)
if debug: for recipe in session.scalars(stmt):
print("resource_label:", resource_label) print(recipe)
link_html_elem.click() for flow in recipe.ingredients:
populate_recipes(browser=browser, engine=engine, input_resource_label=resource_label) print("ingredient:", flow.resource, flow)
for flow in recipe.results:
print("result: ", flow.resource, flow)
finally: finally:
if not debug: if not debug:
browser.quit() browser.quit()