convert to backblaze fetcher

This commit is contained in:
Jan Bader
2026-04-05 22:01:46 +02:00
parent 66e1c9e0e0
commit a9bb2460c6
15 changed files with 333 additions and 1620 deletions

View File

@@ -1,54 +1,26 @@
"""
Configuration for Content Extractor
"""
import os
from pathlib import Path
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
class Config:
"""Configuration settings for content extractor."""
BACKBLAZE_EMAIL = os.getenv("BACKBLAZE_EMAIL", "")
BACKBLAZE_PASSWORD = os.getenv("BACKBLAZE_PASSWORD", "")
# Obsidian vault path (default to common locations)
OBSIDIAN_VAULT_PATH = os.getenv(
"OBSIDIAN_VAULT_PATH",
os.path.expanduser("~/Obsidian Vault") # Default location
)
INVOICE_VAT_ID = os.getenv("INVOICE_VAT_ID", "")
INVOICE_DOCUMENT_TYPE = os.getenv("INVOICE_DOCUMENT_TYPE", "")
INVOICE_COMPANY = os.getenv("INVOICE_COMPANY", "")
INVOICE_NOTES = os.getenv("INVOICE_NOTES", "")
# Browser settings (for Instagram and dynamic content)
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./invoices")
BROWSER_HEADLESS = os.getenv("BROWSER_HEADLESS", "true").lower() == "true"
BROWSER_TIMEOUT = int(os.getenv("BROWSER_TIMEOUT", "30000")) # 30 seconds
# Content extraction settings
MAX_CONTENT_LENGTH = int(os.getenv("MAX_CONTENT_LENGTH", "10000")) # Max chars
GENERATE_SUMMARY = os.getenv("GENERATE_SUMMARY", "true").lower() == "true"
# OpenAI/OpenRouter settings
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
OPENAI_URL = os.getenv("OPENAI_URL", "https://api.openai.com/v1/chat/completions")
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
OPENAI_TIMEOUT = int(os.getenv("OPENAI_TIMEOUT", "30"))
OPENAI_LOG_PAYLOAD = os.getenv("OPENAI_LOG_PAYLOAD", "false").lower() == "true"
# YouTube settings
YOUTUBE_LANGUAGE = os.getenv("YOUTUBE_LANGUAGE", "en")
# Instagram settings (requires browser automation)
INSTAGRAM_WAIT_TIME = int(os.getenv("INSTAGRAM_WAIT_TIME", "5")) # seconds
# Logging
BROWSER_TIMEOUT = int(os.getenv("BROWSER_TIMEOUT", "30000"))
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
LOG_FILE = os.getenv("LOG_FILE", "content_extractor.log")
@classmethod
def validate(cls):
"""Validate configuration."""
# Check if Obsidian vault path exists
if not Path(cls.OBSIDIAN_VAULT_PATH).exists():
print(f"⚠️ Warning: Obsidian vault path does not exist: {cls.OBSIDIAN_VAULT_PATH}")
print(" You can set OBSIDIAN_VAULT_PATH environment variable or use --obsidian-path flag")
return True
if not cls.BACKBLAZE_EMAIL or not cls.BACKBLAZE_PASSWORD:
raise ValueError("BACKBLAZE_EMAIL and BACKBLAZE_PASSWORD must be set")
Path(cls.OUTPUT_DIR).mkdir(parents=True, exist_ok=True)