From 03bb94db2daa6b5e3eb2496225e293694af03cf9 Mon Sep 17 00:00:00 2001 From: Jan Bader Date: Tue, 7 Apr 2026 23:28:04 +0200 Subject: [PATCH] fix: deduping by url --- downloader.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/downloader.py b/downloader.py index 41e3c3f..3e13f91 100644 --- a/downloader.py +++ b/downloader.py @@ -121,9 +121,12 @@ def get_invoice_links(page: Page) -> list[dict]: seen = set() def _add_link(url: str, label: str, **meta) -> None: - if not url or url in seen: + if not url: return - seen.add(url) + dedupe_key = meta.get("reference_id") or url + if dedupe_key in seen: + return + seen.add(dedupe_key) entry = {"url": url, "label": label} entry.update(meta) links.append(entry)