feat: Initial commit - Content Extractor for YouTube, Instagram, and blogs

- YouTube extraction with transcript support
- Instagram reel extraction via browser automation
- Blog/article web scraping
- Auto-save to Obsidian vaults
- Smart key point generation
- Configurable via .env file
- Quick extract shell script

Tech stack: Python, requests, beautifulsoup4, playwright, youtube-transcript-api
This commit is contained in:
naki
2026-03-05 13:02:58 +05:30
commit c997e764b5
12 changed files with 1302 additions and 0 deletions

128
obsidian_writer.py Normal file
View File

@@ -0,0 +1,128 @@
"""
Obsidian Note Writer
Saves extracted content as markdown notes in Obsidian vault.
"""
import os
from pathlib import Path
from datetime import datetime
from typing import Optional
class ObsidianWriter:
"""Write content to Obsidian vault as markdown notes."""
def __init__(self, vault_path: str):
self.vault_path = Path(vault_path).expanduser()
self._validate_vault()
def _validate_vault(self):
"""Validate that the path is an Obsidian vault."""
if not self.vault_path.exists():
print(f"⚠️ Creating Obsidian vault directory: {self.vault_path}")
self.vault_path.mkdir(parents=True, exist_ok=True)
# Check if it looks like an Obsidian vault
obsidian_config = self.vault_path / ".obsidian"
if not obsidian_config.exists():
print(f"⚠️ Warning: {self.vault_path} doesn't look like an Obsidian vault")
print(" (No .obsidian directory found)")
print(" Notes will still be saved, but you may want to set the correct vault path")
def save_note(
self,
content: str,
filename: str,
folder: Optional[str] = None,
subfolder: Optional[str] = None
) -> Path:
"""
Save a note to Obsidian vault.
Args:
content: Markdown content to save
filename: Filename without .md extension
folder: Folder in vault (default: root)
subfolder: Subfolder within folder (optional)
Returns:
Path to saved file
"""
# Build path
if folder:
note_dir = self.vault_path / folder
if subfolder:
note_dir = note_dir / subfolder
else:
note_dir = self.vault_path
# Create directory if it doesn't exist
note_dir.mkdir(parents=True, exist_ok=True)
# Sanitize filename
filename = self._sanitize_filename(filename)
# Add .md extension
filepath = note_dir / f"{filename}.md"
# Handle duplicate filenames
counter = 1
original_filepath = filepath
while filepath.exists():
filepath = original_filepath.with_name(f"{filename}_{counter}.md")
counter += 1
# Write the file
try:
with open(filepath, 'w', encoding='utf-8') as f:
f.write(content)
print(f"✅ Note saved: {filepath.name}")
return filepath
except Exception as e:
raise Exception(f"Failed to save note: {str(e)}")
def _sanitize_filename(self, filename: str) -> str:
"""Sanitize filename for filesystem."""
# Remove invalid characters
invalid_chars = '<>:"/\\|?*'
for char in invalid_chars:
filename = filename.replace(char, '')
# Replace spaces with hyphens (optional, but cleaner)
# filename = filename.replace(' ', '-')
# Limit length
if len(filename) > 100:
filename = filename[:100]
return filename.strip()
def create_daily_note(self, content: str) -> Path:
"""Create/update a daily note."""
today = datetime.now().strftime("%Y-%m-%d")
folder = "Daily Notes"
return self.save_note(content, today, folder)
def append_to_note(self, filename: str, content: str, folder: Optional[str] = None) -> Path:
"""Append content to an existing note."""
if folder:
note_dir = self.vault_path / folder
else:
note_dir = self.vault_path
filepath = note_dir / f"{filename}.md"
# If file doesn't exist, create it
if not filepath.exists():
return self.save_note(content, filename, folder)
# Append to existing file
try:
with open(filepath, 'a', encoding='utf-8') as f:
f.write("\n\n---\n\n")
f.write(content)
print(f"✅ Content appended to: {filepath.name}")
return filepath
except Exception as e:
raise Exception(f"Failed to append to note: {str(e)}")