chg: [rssmerge] Markdown output improved and code clean-up

This commit is contained in:
Alexandre Dulaunoy 2024-03-09 18:22:30 +01:00
parent 779f49d143
commit 502018fcbb
Signed by: adulau
GPG key ID: 09E2CD4944E6CBCD

View file

@ -1,21 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# a at foo dot be - Alexandre Dulaunoy - http://www.foo.be/cgi-bin/wiki.pl/RssAny # a at foo dot be - Alexandre Dulaunoy - https://git.foo.be/adulau/rss-tools
# #
# rssmerge.py is a simple script to gather rss feed and merge them in reverse # rssmerge.py is a simple script designed to aggregate RSS feeds and merge them in reverse chronological order.
# time order. Useful to keep track of recent events. # It outputs the merged content in text, HTML, or Markdown format. This tool is useful for tracking recent events
# from various feeds and publishing them on your website.
# #
# this is still an early prototype and assume that you have full control of the # Sample usage:
# remote rss feeds (if not you may have some security issues).
# #
# TODO : - rss 2.0 and atom output # python3 rssmerge.py "https://git.foo.be/adulau.rss" "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom"
# - full html output # "https://github.com/adulau.atom" -o markdown --maxitem 20
#
# example of use :
# python3 rssmerge.py --output phtml --maxitem 20 "http://www.foo.be/cgi-bin/wiki.pl?action=journal&tile=AdulauMessyDesk"
# "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom" "http://a.6f2.net/cgi-bin/gitweb.cgi?
# p=adulau/.git;a=rss" "http://www.librarything.com/rss/reviews/adulau" > /tmp/test.inc
import feedparser import feedparser
import sys, os import sys, os
@ -25,28 +20,23 @@ import hashlib
from optparse import OptionParser from optparse import OptionParser
import html import html
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from urllib.parse import urlparse
feedparser.USER_AGENT = "rssmerge.py +https://github.com/adulau/rss-tools" feedparser.USER_AGENT = "rssmerge.py +https://github.com/adulau/rss-tools"
def RenderMerge(itemlist, output="text"): def RenderMerge(itemlist, output="text"):
i = 0 i = 0
if output == "text": if output == "text":
for item in itemlist: for item in itemlist:
i = i + 1 i = i + 1
# Keep consistent datetime representation if not use allitem[item[1]]['updated'] # Keep consistent datetime representation if not use allitem[item[1]]['updated']
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"]) link = allitem[item[1]]["link"]
print( title = html.escape(allitem[item[1]]["title"])
str(i) timestamp = datetime.datetime.fromtimestamp(
+ ":" allitem[item[1]]["epoch"]
+ allitem[item[1]]["title"] ).ctime()
+ ":" print(f'{i}:{title}:{timestamp}:{link}')
+ timetuple.ctime()
+ ":"
+ allitem[item[1]]["link"]
)
if i == int(options.maxitem): if i == int(options.maxitem):
break break
@ -56,16 +46,12 @@ def RenderMerge(itemlist, output="text"):
for item in itemlist: for item in itemlist:
i = i + 1 i = i + 1
# Keep consistent datetime representation if not use allitem[item[1]]['updated'] # Keep consistent datetime representation if not use allitem[item[1]]['updated']
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"]) link = allitem[item[1]]["link"]
print( title = html.escape(allitem[item[1]]["title"])
'<li><a href="' timestamp = datetime.datetime.fromtimestamp(
+ str(str(allitem[item[1]]["link"])) allitem[item[1]]["epoch"]
+ '">' ).ctime()
+ str(str(html.escape(allitem[item[1]]["title"]))) print(f'<li><a href="{link}"> {title}</a> --- (<i>{timestamp}</i>)</li>')
+ "</a> --- (<i>"
+ timetuple.ctime()
+ "</i>)</li>"
)
if i == int(options.maxitem): if i == int(options.maxitem):
break break
print("</ul>") print("</ul>")
@ -73,14 +59,13 @@ def RenderMerge(itemlist, output="text"):
if output == "markdown": if output == "markdown":
for item in itemlist: for item in itemlist:
i = i + 1 i = i + 1
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"]) title = html.escape(allitem[item[1]]["title"])
print( link = allitem[item[1]]["link"]
"- [" timestamp = datetime.datetime.fromtimestamp(
+ str(html.escape(allitem[item[1]]["title"])) allitem[item[1]]["epoch"]
+ "](" ).ctime()
+ str(allitem[item[1]]["link"]) domain = urlparse(allitem[item[1]]["link"]).netloc
+ ")" print(f'- {domain} [{title}]({link}) @{timestamp}')
)
if i == int(options.maxitem): if i == int(options.maxitem):
break break
@ -118,9 +103,6 @@ pattern = "%Y-%m-%d %H:%M:%S"
allitem = {} allitem = {}
for url in args: for url in args:
# print url
d = feedparser.parse(url) d = feedparser.parse(url)
for el in d.entries: for el in d.entries: