Big update, finished the blog, added the feeds
This commit is contained in:
84
feed_gen.py
Normal file
84
feed_gen.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import os
|
||||
import datetime
|
||||
from bs4 import BeautifulSoup
|
||||
from feedgen.feed import FeedGenerator
|
||||
from pytz import UTC # Ensures timezone-aware datetime
|
||||
|
||||
# Configuration
|
||||
BASE_URL = 'https://purplebored.pl'
|
||||
POSTS_DIR = './blog/posts'
|
||||
FEED_OUTPUT_DIR = './feeds'
|
||||
AUTHOR_NAME = 'Purplebored'
|
||||
AUTHOR_EMAIL = 'purplebored@posteo.com'
|
||||
|
||||
def parse_xhtml_post(filepath):
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
soup = BeautifulSoup(f, 'lxml')
|
||||
|
||||
# Try <meta name="og:title" content="...">
|
||||
og_title = soup.find('meta', attrs={'name': 'og:title'})
|
||||
if og_title and og_title.get('content'):
|
||||
title = og_title['content']
|
||||
else:
|
||||
# Fallback to <title> tag
|
||||
title_tag = soup.find('title')
|
||||
title = title_tag.get_text() if title_tag else 'Untitled'
|
||||
|
||||
# Parse <meta name="date" content="YYYY-MM-DD">
|
||||
date_meta = soup.find('meta', {'name': 'date'})
|
||||
if date_meta and date_meta.get('content'):
|
||||
pub_date = datetime.datetime.strptime(date_meta['content'], '%Y-%m-%d')
|
||||
else:
|
||||
pub_date = datetime.datetime.fromtimestamp(os.path.getmtime(filepath))
|
||||
|
||||
pub_date = pub_date.replace(tzinfo=UTC)
|
||||
|
||||
# Extract article or body content
|
||||
content = soup.find('article') or soup.find('body')
|
||||
|
||||
# Generate relative URL
|
||||
rel_path = os.path.relpath(filepath, POSTS_DIR)
|
||||
url = f'{BASE_URL}/blog/posts/{rel_path}'.replace('\\', '/')
|
||||
|
||||
return {
|
||||
'title': title,
|
||||
'url': url,
|
||||
'date': pub_date,
|
||||
'content': str(content)
|
||||
}
|
||||
|
||||
def generate_feeds(posts):
|
||||
fg = FeedGenerator()
|
||||
fg.id(BASE_URL)
|
||||
fg.title('Purplebored Blog')
|
||||
fg.author({'name': AUTHOR_NAME, 'email': AUTHOR_EMAIL})
|
||||
fg.link(href=BASE_URL, rel='alternate')
|
||||
fg.language('en')
|
||||
fg.description('A blog about snuff reviews and other thoughts.')
|
||||
|
||||
for post in sorted(posts, key=lambda x: x['date'], reverse=True):
|
||||
fe = fg.add_entry()
|
||||
fe.id(post['url'])
|
||||
fe.title(post['title'])
|
||||
fe.link(href=post['url'])
|
||||
fe.published(post['date'])
|
||||
fe.content(post['content'], type='xhtml')
|
||||
|
||||
os.makedirs(FEED_OUTPUT_DIR, exist_ok=True)
|
||||
fg.rss_file(os.path.join(FEED_OUTPUT_DIR, 'feed.rss'))
|
||||
fg.atom_file(os.path.join(FEED_OUTPUT_DIR, 'feed.atom'))
|
||||
|
||||
def main():
|
||||
posts = []
|
||||
for root, dirs, files in os.walk(POSTS_DIR):
|
||||
for file in files:
|
||||
if file.endswith('.xhtml'):
|
||||
filepath = os.path.join(root, file)
|
||||
post_data = parse_xhtml_post(filepath)
|
||||
posts.append(post_data)
|
||||
|
||||
generate_feeds(posts)
|
||||
print(f'✅ Generated {len(posts)} posts in RSS and Atom feeds.')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user