chg: [rssmerge] Convert very old script (17 years old ;-) to Python 3

- Add a very basic Markdow list output
- Fix RSS feed with no `modified` time
This commit is contained in:
Alexandre Dulaunoy 2024-02-11 11:17:49 +01:00
parent 54a82c0ce9
commit 659d74535e
Signed by: adulau
GPG key ID: 09E2CD4944E6CBCD

View file

@ -13,92 +13,140 @@
# - full html output # - full html output
# #
# example of use : # example of use :
# python2.5 rssmerge.py --output phtml --maxitem 20 "http://www.foo.be/cgi-bin/wiki.pl?action=journal&tile=AdulauMessyDesk" # python3 rssmerge.py --output phtml --maxitem 20 "http://www.foo.be/cgi-bin/wiki.pl?action=journal&tile=AdulauMessyDesk"
# "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom" "http://a.6f2.net/cgi-bin/gitweb.cgi? # "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom" "http://a.6f2.net/cgi-bin/gitweb.cgi?
# p=adulau/.git;a=rss" "http://www.librarything.com/rss/reviews/adulau" > /tmp/test.inc # p=adulau/.git;a=rss" "http://www.librarything.com/rss/reviews/adulau" > /tmp/test.inc
import feedparser import feedparser
import sys,os import sys, os
import time import time
import datetime import datetime
import md5 import hashlib
from optparse import OptionParser from optparse import OptionParser
import cgi import html
feedparser.USER_AGENT = "rssmerge.py +http://www.foo.be/" feedparser.USER_AGENT = "rssmerge.py +http://www.foo.be/"
def RenderMerge(itemlist,output="text"):
i = 0 def RenderMerge(itemlist, output="text"):
if output == "text" : i = 0
for item in itemlist:
i = i + 1
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]['epoch'])
print str(i)+":"+allitem[item[1]]['title']+":"+timetuple.ctime()+":"+allitem[item[1]]['link'] if output == "text":
for item in itemlist:
i = i + 1
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
if i == int(options.maxitem): print(
break str(i)
+ ":"
+ allitem[item[1]]["title"]
+ ":"
+ timetuple.ctime()
+ ":"
+ allitem[item[1]]["link"]
)
if output == "phtml" : if i == int(options.maxitem):
print "<ul>" break
for item in itemlist:
i = i + 1 if output == "phtml":
# Keep consistent datetime representation if not use allitem[item[1]]['updated'] print("<ul>")
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]['epoch']) for item in itemlist:
print "<li><a href=\""+unicode(allitem[item[1]]['link']).encode("utf-8")+"\">"+unicode(cgi.escape(allitem[item[1]]['title'])).encode("utf-8")+"</a> --- (<i>"+timetuple.ctime()+"</i>)</li>" i = i + 1
if i == int(options.maxitem): # Keep consistent datetime representation if not use allitem[item[1]]['updated']
break timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
print "</ul>" print(
'<li><a href="'
+ str(str(allitem[item[1]]["link"]))
+ '">'
+ str(str(html.escape(allitem[item[1]]["title"])))
+ "</a> --- (<i>"
+ timetuple.ctime()
+ "</i>)</li>"
)
if i == int(options.maxitem):
break
print("</ul>")
if output == "markdown":
for item in itemlist:
i = i + 1
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
print(
"- ["
+ str(html.escape(allitem[item[1]]["title"]))
+ "]("
+ str(allitem[item[1]]["link"])
+ ")"
)
if i == int(options.maxitem):
break
usage = "usage: %prog [options] url" usage = "usage: %prog [options] url"
parser = OptionParser(usage) parser = OptionParser(usage)
parser.add_option("-m","--maxitem",dest="maxitem",help="maximum item to list in the feed, default 200") parser.add_option(
parser.add_option("-o","--output",dest="output",help="output format (text, phtml), default text") "-m",
"--maxitem",
dest="maxitem",
help="maximum item to list in the feed, default 200",
)
parser.add_option(
"-o",
"--output",
dest="output",
help="output format (text, phtml, markdown), default text",
)
#2007-11-10 11:25:51 # 2007-11-10 11:25:51
pattern = '%Y-%m-%d %H:%M:%S' pattern = "%Y-%m-%d %H:%M:%S"
(options, args) = parser.parse_args() (options, args) = parser.parse_args()
if options.output == None: if options.output == None:
options.output = "text" options.output = "text"
if options.maxitem == None: if options.maxitem == None:
options.maxitem = 200 options.maxitem = 200
allitem = {} allitem = {}
for url in args: for url in args:
#print url # print url
d = feedparser.parse(url) d = feedparser.parse(url)
for el in d.entries:
eldatetime = datetime.datetime.fromtimestamp(time.mktime(el.modified_parsed))
elepoch = int(time.mktime(time.strptime(str(eldatetime), pattern)))
linkkey = md5.new(el.link).hexdigest()
allitem[linkkey] = {}
allitem[linkkey]['link'] = str(el.link)
allitem[linkkey]['epoch'] = int(elepoch)
allitem[linkkey]['updated'] = el.updated
allitem[linkkey]['title'] = el.title
for el in d.entries:
if "modified_parsed" in el:
eldatetime = datetime.datetime.fromtimestamp(
time.mktime(el.modified_parsed)
)
else:
eldatetime = datetime.datetime.fromtimestamp(
time.mktime(el.published_parsed)
)
elepoch = int(time.mktime(time.strptime(str(eldatetime), pattern)))
h = hashlib.md5()
h.update(el.link.encode("utf-8"))
linkkey = h.hexdigest()
allitem[linkkey] = {}
allitem[linkkey]["link"] = str(el.link)
allitem[linkkey]["epoch"] = int(elepoch)
allitem[linkkey]["updated"] = el.updated
allitem[linkkey]["title"] = el.title
itemlist = [] itemlist = []
for something in allitem.keys(): for something in list(allitem.keys()):
epochkeytuple = (allitem[something]['epoch'],something) epochkeytuple = (allitem[something]["epoch"], something)
itemlist.append (epochkeytuple) itemlist.append(epochkeytuple)
itemlist.sort() itemlist.sort()
itemlist.reverse() itemlist.reverse()
RenderMerge(itemlist,options.output) RenderMerge(itemlist, options.output)