import os import datetime from bs4 import BeautifulSoup from feedgen.feed import FeedGenerator from pytz import UTC # Ensures timezone-aware datetime # Configuration BASE_URL = 'https://purplebored.pl' POSTS_DIR = './blog/posts' FEED_OUTPUT_DIR = './feeds' AUTHOR_NAME = 'Purplebored' AUTHOR_EMAIL = 'purplebored@posteo.com' def parse_xhtml_post(filepath): with open(filepath, 'r', encoding='utf-8') as f: soup = BeautifulSoup(f, 'lxml') # Try og_title = soup.find('meta', attrs={'name': 'og:title'}) if og_title and og_title.get('content'): title = og_title['content'] else: # Fallback to tag title_tag = soup.find('title') title = title_tag.get_text() if title_tag else 'Untitled' # Parse <meta name="date" content="YYYY-MM-DD"> date_meta = soup.find('meta', {'name': 'date'}) if date_meta and date_meta.get('content'): pub_date = datetime.datetime.strptime(date_meta['content'], '%Y-%m-%d') else: pub_date = datetime.datetime.fromtimestamp(os.path.getmtime(filepath)) pub_date = pub_date.replace(tzinfo=UTC) # Extract article or body content content = soup.find('article') or soup.find('body') # Generate relative URL rel_path = os.path.relpath(filepath, POSTS_DIR) url = f'{BASE_URL}/blog/posts/{rel_path}'.replace('\\', '/') return { 'title': title, 'url': url, 'date': pub_date, 'content': str(content) } def generate_feeds(posts): fg = FeedGenerator() fg.id(BASE_URL) fg.title('Purplebored Blog') fg.author({'name': AUTHOR_NAME, 'email': AUTHOR_EMAIL}) fg.link(href=BASE_URL, rel='alternate') fg.language('en') fg.description('A blog about snuff reviews and other thoughts.') for post in sorted(posts, key=lambda x: x['date'], reverse=True): fe = fg.add_entry() fe.id(post['url']) fe.title(post['title']) fe.link(href=post['url']) fe.published(post['date']) fe.content(post['content'], type='xhtml') os.makedirs(FEED_OUTPUT_DIR, exist_ok=True) fg.rss_file(os.path.join(FEED_OUTPUT_DIR, 'feed.rss')) fg.atom_file(os.path.join(FEED_OUTPUT_DIR, 'feed.atom')) def main(): posts = [] for root, dirs, files in os.walk(POSTS_DIR): for file in files: if file.endswith('.xhtml'): filepath = os.path.join(root, file) post_data = parse_xhtml_post(filepath) posts.append(post_data) generate_feeds(posts) print(f'✅ Generated {len(posts)} posts in RSS and Atom feeds.') if __name__ == '__main__': main()