Files
backblaze-invoices-downloader/config.py
naki c997e764b5 feat: Initial commit - Content Extractor for YouTube, Instagram, and blogs
- YouTube extraction with transcript support
- Instagram reel extraction via browser automation
- Blog/article web scraping
- Auto-save to Obsidian vaults
- Smart key point generation
- Configurable via .env file
- Quick extract shell script

Tech stack: Python, requests, beautifulsoup4, playwright, youtube-transcript-api
2026-03-05 13:02:58 +05:30

48 lines
1.6 KiB
Python

"""
Configuration for Content Extractor
"""
import os
from pathlib import Path
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
class Config:
"""Configuration settings for content extractor."""
# Obsidian vault path (default to common locations)
OBSIDIAN_VAULT_PATH = os.getenv(
"OBSIDIAN_VAULT_PATH",
os.path.expanduser("~/Obsidian Vault") # Default location
)
# Browser settings (for Instagram and dynamic content)
BROWSER_HEADLESS = os.getenv("BROWSER_HEADLESS", "true").lower() == "true"
BROWSER_TIMEOUT = int(os.getenv("BROWSER_TIMEOUT", "30000")) # 30 seconds
# Content extraction settings
MAX_CONTENT_LENGTH = int(os.getenv("MAX_CONTENT_LENGTH", "10000")) # Max chars
GENERATE_SUMMARY = os.getenv("GENERATE_SUMMARY", "true").lower() == "true"
# YouTube settings
YOUTUBE_LANGUAGE = os.getenv("YOUTUBE_LANGUAGE", "en")
# Instagram settings (requires browser automation)
INSTAGRAM_WAIT_TIME = int(os.getenv("INSTAGRAM_WAIT_TIME", "5")) # seconds
# Logging
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
LOG_FILE = os.getenv("LOG_FILE", "content_extractor.log")
@classmethod
def validate(cls):
"""Validate configuration."""
# Check if Obsidian vault path exists
if not Path(cls.OBSIDIAN_VAULT_PATH).exists():
print(f"⚠️ Warning: Obsidian vault path does not exist: {cls.OBSIDIAN_VAULT_PATH}")
print(" You can set OBSIDIAN_VAULT_PATH environment variable or use --obsidian-path flag")
return True