Big update, finished the blog, added the feeds

2025-05-24 17:23:40 +02:00
parent b17d059d12
commit be8c61916c
28 changed files with 423 additions and 16 deletions
--- a/feed_gen.py
+++ b/feed_gen.py
@@ -0,0 +1,84 @@
+import os
+import datetime
+from bs4 import BeautifulSoup
+from feedgen.feed import FeedGenerator
+from pytz import UTC  # Ensures timezone-aware datetime
+
+# Configuration
+BASE_URL = 'https://purplebored.pl'
+POSTS_DIR = './blog/posts'
+FEED_OUTPUT_DIR = './feeds'
+AUTHOR_NAME = 'Purplebored'
+AUTHOR_EMAIL = 'purplebored@posteo.com'
+
+def parse_xhtml_post(filepath):
+    with open(filepath, 'r', encoding='utf-8') as f:
+        soup = BeautifulSoup(f, 'lxml')
+
+    # Try <meta name="og:title" content="...">
+    og_title = soup.find('meta', attrs={'name': 'og:title'})
+    if og_title and og_title.get('content'):
+        title = og_title['content']
+    else:
+        # Fallback to <title> tag
+        title_tag = soup.find('title')
+        title = title_tag.get_text() if title_tag else 'Untitled'
+
+    # Parse <meta name="date" content="YYYY-MM-DD">
+    date_meta = soup.find('meta', {'name': 'date'})
+    if date_meta and date_meta.get('content'):
+        pub_date = datetime.datetime.strptime(date_meta['content'], '%Y-%m-%d')
+    else:
+        pub_date = datetime.datetime.fromtimestamp(os.path.getmtime(filepath))
+
+    pub_date = pub_date.replace(tzinfo=UTC)
+
+    # Extract article or body content
+    content = soup.find('article') or soup.find('body')
+
+    # Generate relative URL
+    rel_path = os.path.relpath(filepath, POSTS_DIR)
+    url = f'{BASE_URL}/blog/posts/{rel_path}'.replace('\\', '/')
+
+    return {
+        'title': title,
+        'url': url,
+        'date': pub_date,
+        'content': str(content)
+    }
+
+def generate_feeds(posts):
+    fg = FeedGenerator()
+    fg.id(BASE_URL)
+    fg.title('Purplebored Blog')
+    fg.author({'name': AUTHOR_NAME, 'email': AUTHOR_EMAIL})
+    fg.link(href=BASE_URL, rel='alternate')
+    fg.language('en')
+    fg.description('A blog about snuff reviews and other thoughts.')
+
+    for post in sorted(posts, key=lambda x: x['date'], reverse=True):
+        fe = fg.add_entry()
+        fe.id(post['url'])
+        fe.title(post['title'])
+        fe.link(href=post['url'])
+        fe.published(post['date'])
+        fe.content(post['content'], type='xhtml')
+
+    os.makedirs(FEED_OUTPUT_DIR, exist_ok=True)
+    fg.rss_file(os.path.join(FEED_OUTPUT_DIR, 'feed.rss'))
+    fg.atom_file(os.path.join(FEED_OUTPUT_DIR, 'feed.atom'))
+
+def main():
+    posts = []
+    for root, dirs, files in os.walk(POSTS_DIR):
+        for file in files:
+            if file.endswith('.xhtml'):
+                filepath = os.path.join(root, file)
+                post_data = parse_xhtml_post(filepath)
+                posts.append(post_data)
+
+    generate_feeds(posts)
+    print(f'✅ Generated {len(posts)} posts in RSS and Atom feeds.')
+
+if __name__ == '__main__':
+    main()