diff --git a/downloader.py b/downloader.py index 41e3c3f..3e13f91 100644 --- a/downloader.py +++ b/downloader.py @@ -121,9 +121,12 @@ def get_invoice_links(page: Page) -> list[dict]: seen = set() def _add_link(url: str, label: str, **meta) -> None: - if not url or url in seen: + if not url: return - seen.add(url) + dedupe_key = meta.get("reference_id") or url + if dedupe_key in seen: + return + seen.add(dedupe_key) entry = {"url": url, "label": label} entry.update(meta) links.append(entry)