chg: [rssmerge] Convert very old script (17 years old ;-) to Python 3

- Add a very basic Markdow list output
- Fix RSS feed with no `modified` time
This commit is contained in:
Alexandre Dulaunoy 2024-02-11 11:17:49 +01:00
parent 54a82c0ce9
commit 659d74535e
Signed by: adulau
GPG key ID: 09E2CD4944E6CBCD

View file

@ -4,101 +4,149 @@
# a at foo dot be - Alexandre Dulaunoy - http://www.foo.be/cgi-bin/wiki.pl/RssAny # a at foo dot be - Alexandre Dulaunoy - http://www.foo.be/cgi-bin/wiki.pl/RssAny
# #
# rssmerge.py is a simple script to gather rss feed and merge them in reverse # rssmerge.py is a simple script to gather rss feed and merge them in reverse
# time order. Useful to keep track of recent events. # time order. Useful to keep track of recent events.
# #
# this is still an early prototype and assume that you have full control of the # this is still an early prototype and assume that you have full control of the
# remote rss feeds (if not you may have some security issues). # remote rss feeds (if not you may have some security issues).
# #
# TODO : - rss 2.0 and atom output # TODO : - rss 2.0 and atom output
# - full html output # - full html output
# #
# example of use : # example of use :
# python2.5 rssmerge.py --output phtml --maxitem 20 "http://www.foo.be/cgi-bin/wiki.pl?action=journal&tile=AdulauMessyDesk" # python3 rssmerge.py --output phtml --maxitem 20 "http://www.foo.be/cgi-bin/wiki.pl?action=journal&tile=AdulauMessyDesk"
# "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom" "http://a.6f2.net/cgi-bin/gitweb.cgi? # "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom" "http://a.6f2.net/cgi-bin/gitweb.cgi?
# p=adulau/.git;a=rss" "http://www.librarything.com/rss/reviews/adulau" > /tmp/test.inc # p=adulau/.git;a=rss" "http://www.librarything.com/rss/reviews/adulau" > /tmp/test.inc
import feedparser import feedparser
import sys,os import sys, os
import time import time
import datetime import datetime
import md5 import hashlib
from optparse import OptionParser from optparse import OptionParser
import cgi import html
feedparser.USER_AGENT = "rssmerge.py +http://www.foo.be/" feedparser.USER_AGENT = "rssmerge.py +http://www.foo.be/"
def RenderMerge(itemlist,output="text"):
i = 0 def RenderMerge(itemlist, output="text"):
if output == "text" : i = 0
for item in itemlist:
i = i + 1
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]['epoch'])
print str(i)+":"+allitem[item[1]]['title']+":"+timetuple.ctime()+":"+allitem[item[1]]['link'] if output == "text":
for item in itemlist:
i = i + 1
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
if i == int(options.maxitem): print(
break str(i)
+ ":"
+ allitem[item[1]]["title"]
+ ":"
+ timetuple.ctime()
+ ":"
+ allitem[item[1]]["link"]
)
if output == "phtml" : if i == int(options.maxitem):
print "<ul>" break
for item in itemlist:
i = i + 1 if output == "phtml":
# Keep consistent datetime representation if not use allitem[item[1]]['updated'] print("<ul>")
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]['epoch']) for item in itemlist:
print "<li><a href=\""+unicode(allitem[item[1]]['link']).encode("utf-8")+"\">"+unicode(cgi.escape(allitem[item[1]]['title'])).encode("utf-8")+"</a> --- (<i>"+timetuple.ctime()+"</i>)</li>" i = i + 1
if i == int(options.maxitem): # Keep consistent datetime representation if not use allitem[item[1]]['updated']
break timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
print "</ul>" print(
'<li><a href="'
+ str(str(allitem[item[1]]["link"]))
+ '">'
+ str(str(html.escape(allitem[item[1]]["title"])))
+ "</a> --- (<i>"
+ timetuple.ctime()
+ "</i>)</li>"
)
if i == int(options.maxitem):
break
print("</ul>")
if output == "markdown":
for item in itemlist:
i = i + 1
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
print(
"- ["
+ str(html.escape(allitem[item[1]]["title"]))
+ "]("
+ str(allitem[item[1]]["link"])
+ ")"
)
if i == int(options.maxitem):
break
usage = "usage: %prog [options] url" usage = "usage: %prog [options] url"
parser = OptionParser(usage) parser = OptionParser(usage)
parser.add_option("-m","--maxitem",dest="maxitem",help="maximum item to list in the feed, default 200") parser.add_option(
parser.add_option("-o","--output",dest="output",help="output format (text, phtml), default text") "-m",
"--maxitem",
dest="maxitem",
help="maximum item to list in the feed, default 200",
)
parser.add_option(
"-o",
"--output",
dest="output",
help="output format (text, phtml, markdown), default text",
)
#2007-11-10 11:25:51 # 2007-11-10 11:25:51
pattern = '%Y-%m-%d %H:%M:%S' pattern = "%Y-%m-%d %H:%M:%S"
(options, args) = parser.parse_args() (options, args) = parser.parse_args()
if options.output == None: if options.output == None:
options.output = "text" options.output = "text"
if options.maxitem == None: if options.maxitem == None:
options.maxitem = 200 options.maxitem = 200
allitem = {} allitem = {}
for url in args: for url in args:
#print url # print url
d = feedparser.parse(url) d = feedparser.parse(url)
for el in d.entries: for el in d.entries:
if "modified_parsed" in el:
eldatetime = datetime.datetime.fromtimestamp(
time.mktime(el.modified_parsed)
)
else:
eldatetime = datetime.datetime.fromtimestamp(
time.mktime(el.published_parsed)
)
elepoch = int(time.mktime(time.strptime(str(eldatetime), pattern)))
h = hashlib.md5()
h.update(el.link.encode("utf-8"))
linkkey = h.hexdigest()
allitem[linkkey] = {}
allitem[linkkey]["link"] = str(el.link)
allitem[linkkey]["epoch"] = int(elepoch)
allitem[linkkey]["updated"] = el.updated
allitem[linkkey]["title"] = el.title
eldatetime = datetime.datetime.fromtimestamp(time.mktime(el.modified_parsed))
elepoch = int(time.mktime(time.strptime(str(eldatetime), pattern)))
linkkey = md5.new(el.link).hexdigest()
allitem[linkkey] = {}
allitem[linkkey]['link'] = str(el.link)
allitem[linkkey]['epoch'] = int(elepoch)
allitem[linkkey]['updated'] = el.updated
allitem[linkkey]['title'] = el.title
itemlist = [] itemlist = []
for something in allitem.keys(): for something in list(allitem.keys()):
epochkeytuple = (allitem[something]['epoch'],something) epochkeytuple = (allitem[something]["epoch"], something)
itemlist.append (epochkeytuple) itemlist.append(epochkeytuple)
itemlist.sort() itemlist.sort()
itemlist.reverse() itemlist.reverse()
RenderMerge(itemlist,options.output) RenderMerge(itemlist, options.output)