diff --git a/downloader.py b/downloader.py index c2aca1b..f4df59c 100644 --- a/downloader.py +++ b/downloader.py @@ -118,6 +118,14 @@ def get_year_options(page: Page) -> list[str]: def get_invoice_links(page: Page) -> list[dict]: links = [] + seen = set() + + def _add_link(url: str, label: str) -> None: + if not url or url in seen: + return + seen.add(url) + links.append({"url": url, "label": label}) + rows = page.locator("table.billing-table tbody tr, table#billingTable tbody tr, table tbody tr").all() for row in rows: anchors = row.locator("a[href*='billing_invoice'], a[href*='invoice']").all() @@ -127,7 +135,7 @@ def get_invoice_links(page: Page) -> list[dict]: if href: if not href.startswith("http"): href = f"{BASE_URL}/{href.lstrip('/')}" - links.append({"url": href, "label": text}) + _add_link(href, text) if not links: all_anchors = page.locator("a[href*='invoice']").all() @@ -137,7 +145,29 @@ def get_invoice_links(page: Page) -> list[dict]: if href and "invoice" in href.lower(): if not href.startswith("http"): href = f"{BASE_URL}/{href.lstrip('/')}" - links.append({"url": href, "label": text}) + _add_link(href, text) + + if not links: + print_links = page.locator("a.no-print[data-reference-object-id], a[data-reference-object-id]").all() + for anchor in print_links: + href = anchor.get_attribute("href") or "" + if href: + if not href.startswith("http"): + href = f"{BASE_URL}/{href.lstrip('/')}" + _add_link(href, anchor.inner_text().strip()) + continue + + try: + anchor.scroll_into_view_if_needed() + with page.expect_popup() as popup_info: + anchor.click() + popup = popup_info.value + popup.wait_for_load_state("domcontentloaded") + label = anchor.inner_text().strip() or anchor.get_attribute("data-reference-object-id") or popup.title() + _add_link(popup.url, label) + popup.close() + except Exception as e: + logger.warning("Failed to open invoice popup: %s", e) return links @@ -285,6 +315,12 @@ def download_all_invoices() -> list[Path]: ) if not invoices: + logger.warning( + "No invoices found for group '%s', year '%s'. Browser will remain open for inspection.", + group["label"], + year, + ) + input("No invoices found. Inspect the browser, then press Enter to continue...") continue year_dir.mkdir(parents=True, exist_ok=True)