Add AI summarization
This commit is contained in:
@@ -92,6 +92,12 @@ BROWSER_TIMEOUT=30000
|
||||
MAX_CONTENT_LENGTH=10000
|
||||
GENERATE_SUMMARY=true
|
||||
|
||||
# OpenAI/OpenRouter
|
||||
OPENAI_API_KEY=your_key_here
|
||||
OPENAI_URL=https://openrouter.ai/api/v1/chat/completions
|
||||
OPENAI_MODEL=gpt-4o-mini
|
||||
OPENAI_TIMEOUT=30
|
||||
|
||||
# YouTube
|
||||
YOUTUBE_LANGUAGE=en
|
||||
|
||||
|
||||
@@ -27,6 +27,12 @@ class Config:
|
||||
MAX_CONTENT_LENGTH = int(os.getenv("MAX_CONTENT_LENGTH", "10000")) # Max chars
|
||||
GENERATE_SUMMARY = os.getenv("GENERATE_SUMMARY", "true").lower() == "true"
|
||||
|
||||
# OpenAI/OpenRouter settings
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||
OPENAI_URL = os.getenv("OPENAI_URL", "https://api.openai.com/v1/chat/completions")
|
||||
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
|
||||
OPENAI_TIMEOUT = int(os.getenv("OPENAI_TIMEOUT", "30"))
|
||||
|
||||
# YouTube settings
|
||||
YOUTUBE_LANGUAGE = os.getenv("YOUTUBE_LANGUAGE", "en")
|
||||
|
||||
|
||||
17
main.py
17
main.py
@@ -23,6 +23,7 @@ from extractors.blog_extractor import BlogExtractor
|
||||
from extractors.instagram_extractor import InstagramExtractor
|
||||
from obsidian_writer import ObsidianWriter
|
||||
from config import Config
|
||||
from summarizer import summarize_text, SummarizationError
|
||||
|
||||
|
||||
def detect_source_type(url: str) -> str:
|
||||
@@ -102,6 +103,22 @@ def main():
|
||||
print("❌ No content could be extracted")
|
||||
sys.exit(1)
|
||||
|
||||
# Generate AI summary + key points
|
||||
if args.summarize or Config.GENERATE_SUMMARY:
|
||||
source_text = "\n\n".join(
|
||||
part for part in [content.get("description", ""), content.get("content", "")]
|
||||
if part
|
||||
).strip()
|
||||
if source_text:
|
||||
try:
|
||||
summary_result = summarize_text(source_text, max_points=3)
|
||||
if summary_result.get("summary"):
|
||||
content["description"] = summary_result["summary"]
|
||||
if summary_result.get("key_points"):
|
||||
content["key_points"] = summary_result["key_points"]
|
||||
except SummarizationError as e:
|
||||
print(f"⚠️ Summarization failed: {e}")
|
||||
|
||||
# Generate output filename
|
||||
if args.output:
|
||||
filename = args.output
|
||||
|
||||
84
summarizer.py
Normal file
84
summarizer.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""
|
||||
OpenAI/OpenRouter summarizer utility.
|
||||
|
||||
Uses OPENAI_API_KEY and OPENAI_URL from environment (via Config).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from config import Config
|
||||
|
||||
|
||||
class SummarizationError(RuntimeError):
|
||||
"""Raised when summarization fails."""
|
||||
|
||||
|
||||
def summarize_text(text: str, max_points: int = 3) -> Dict[str, List[str] | str]:
|
||||
"""
|
||||
Summarize text into a short summary and key points.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"summary": "string",
|
||||
"key_points": ["point 1", "point 2", ...]
|
||||
}
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return {"summary": "", "key_points": []}
|
||||
|
||||
if not Config.OPENAI_API_KEY:
|
||||
raise SummarizationError("OPENAI_API_KEY is not set")
|
||||
|
||||
payload = {
|
||||
"model": Config.OPENAI_MODEL,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a precise summarizer. Return JSON only with keys "
|
||||
"`summary` and `key_points` (array of strings). Do not add extra keys."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Summarize the following content in 2-4 sentences and provide "
|
||||
f"{max_points} key points.\n\n"
|
||||
f"CONTENT:\n{text}"
|
||||
),
|
||||
},
|
||||
],
|
||||
"temperature": 0.2,
|
||||
"max_tokens": 400,
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {Config.OPENAI_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
Config.OPENAI_URL,
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=Config.OPENAI_TIMEOUT,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
except Exception as exc:
|
||||
raise SummarizationError(f"Request failed: {exc}") from exc
|
||||
|
||||
try:
|
||||
content = data["choices"][0]["message"]["content"].strip()
|
||||
result = json.loads(content)
|
||||
summary = result.get("summary", "").strip()
|
||||
key_points = [p.strip() for p in result.get("key_points", []) if p.strip()]
|
||||
return {"summary": summary, "key_points": key_points}
|
||||
except Exception as exc:
|
||||
raise SummarizationError(f"Invalid response format: {exc}") from exc
|
||||
Reference in New Issue
Block a user