feat: Initial commit - Content Extractor for YouTube, Instagram, and blogs
- YouTube extraction with transcript support - Instagram reel extraction via browser automation - Blog/article web scraping - Auto-save to Obsidian vaults - Smart key point generation - Configurable via .env file - Quick extract shell script Tech stack: Python, requests, beautifulsoup4, playwright, youtube-transcript-api
This commit is contained in:
47
config.py
Normal file
47
config.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""
|
||||
Configuration for Content Extractor
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class Config:
|
||||
"""Configuration settings for content extractor."""
|
||||
|
||||
# Obsidian vault path (default to common locations)
|
||||
OBSIDIAN_VAULT_PATH = os.getenv(
|
||||
"OBSIDIAN_VAULT_PATH",
|
||||
os.path.expanduser("~/Obsidian Vault") # Default location
|
||||
)
|
||||
|
||||
# Browser settings (for Instagram and dynamic content)
|
||||
BROWSER_HEADLESS = os.getenv("BROWSER_HEADLESS", "true").lower() == "true"
|
||||
BROWSER_TIMEOUT = int(os.getenv("BROWSER_TIMEOUT", "30000")) # 30 seconds
|
||||
|
||||
# Content extraction settings
|
||||
MAX_CONTENT_LENGTH = int(os.getenv("MAX_CONTENT_LENGTH", "10000")) # Max chars
|
||||
GENERATE_SUMMARY = os.getenv("GENERATE_SUMMARY", "true").lower() == "true"
|
||||
|
||||
# YouTube settings
|
||||
YOUTUBE_LANGUAGE = os.getenv("YOUTUBE_LANGUAGE", "en")
|
||||
|
||||
# Instagram settings (requires browser automation)
|
||||
INSTAGRAM_WAIT_TIME = int(os.getenv("INSTAGRAM_WAIT_TIME", "5")) # seconds
|
||||
|
||||
# Logging
|
||||
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
|
||||
LOG_FILE = os.getenv("LOG_FILE", "content_extractor.log")
|
||||
|
||||
@classmethod
|
||||
def validate(cls):
|
||||
"""Validate configuration."""
|
||||
# Check if Obsidian vault path exists
|
||||
if not Path(cls.OBSIDIAN_VAULT_PATH).exists():
|
||||
print(f"⚠️ Warning: Obsidian vault path does not exist: {cls.OBSIDIAN_VAULT_PATH}")
|
||||
print(" You can set OBSIDIAN_VAULT_PATH environment variable or use --obsidian-path flag")
|
||||
return True
|
||||
Reference in New Issue
Block a user