mirror of
https://github.com/adulau/rss-tools.git
synced 2024-12-04 15:57:12 +00:00
chg: [rssmerge] Convert very old script (17 years old ;-) to Python 3
- Add a very basic Markdow list output - Fix RSS feed with no `modified` time
This commit is contained in:
parent
54a82c0ce9
commit
659d74535e
1 changed files with 102 additions and 54 deletions
142
bin/rssmerge.py
142
bin/rssmerge.py
|
@ -13,92 +13,140 @@
|
||||||
# - full html output
|
# - full html output
|
||||||
#
|
#
|
||||||
# example of use :
|
# example of use :
|
||||||
# python2.5 rssmerge.py --output phtml --maxitem 20 "http://www.foo.be/cgi-bin/wiki.pl?action=journal&tile=AdulauMessyDesk"
|
# python3 rssmerge.py --output phtml --maxitem 20 "http://www.foo.be/cgi-bin/wiki.pl?action=journal&tile=AdulauMessyDesk"
|
||||||
# "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom" "http://a.6f2.net/cgi-bin/gitweb.cgi?
|
# "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom" "http://a.6f2.net/cgi-bin/gitweb.cgi?
|
||||||
# p=adulau/.git;a=rss" "http://www.librarything.com/rss/reviews/adulau" > /tmp/test.inc
|
# p=adulau/.git;a=rss" "http://www.librarything.com/rss/reviews/adulau" > /tmp/test.inc
|
||||||
|
|
||||||
import feedparser
|
import feedparser
|
||||||
import sys,os
|
import sys, os
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
import md5
|
import hashlib
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
import cgi
|
import html
|
||||||
|
|
||||||
feedparser.USER_AGENT = "rssmerge.py +http://www.foo.be/"
|
feedparser.USER_AGENT = "rssmerge.py +http://www.foo.be/"
|
||||||
|
|
||||||
def RenderMerge(itemlist,output="text"):
|
|
||||||
|
|
||||||
i = 0
|
def RenderMerge(itemlist, output="text"):
|
||||||
|
|
||||||
if output == "text" :
|
i = 0
|
||||||
for item in itemlist:
|
|
||||||
i = i + 1
|
|
||||||
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
|
|
||||||
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]['epoch'])
|
|
||||||
|
|
||||||
print str(i)+":"+allitem[item[1]]['title']+":"+timetuple.ctime()+":"+allitem[item[1]]['link']
|
if output == "text":
|
||||||
|
for item in itemlist:
|
||||||
|
i = i + 1
|
||||||
|
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
|
||||||
|
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
|
||||||
|
|
||||||
if i == int(options.maxitem):
|
print(
|
||||||
break
|
str(i)
|
||||||
|
+ ":"
|
||||||
|
+ allitem[item[1]]["title"]
|
||||||
|
+ ":"
|
||||||
|
+ timetuple.ctime()
|
||||||
|
+ ":"
|
||||||
|
+ allitem[item[1]]["link"]
|
||||||
|
)
|
||||||
|
|
||||||
if output == "phtml" :
|
if i == int(options.maxitem):
|
||||||
print "<ul>"
|
break
|
||||||
for item in itemlist:
|
|
||||||
i = i + 1
|
if output == "phtml":
|
||||||
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
|
print("<ul>")
|
||||||
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]['epoch'])
|
for item in itemlist:
|
||||||
print "<li><a href=\""+unicode(allitem[item[1]]['link']).encode("utf-8")+"\">"+unicode(cgi.escape(allitem[item[1]]['title'])).encode("utf-8")+"</a> --- (<i>"+timetuple.ctime()+"</i>)</li>"
|
i = i + 1
|
||||||
if i == int(options.maxitem):
|
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
|
||||||
break
|
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
|
||||||
print "</ul>"
|
print(
|
||||||
|
'<li><a href="'
|
||||||
|
+ str(str(allitem[item[1]]["link"]))
|
||||||
|
+ '">'
|
||||||
|
+ str(str(html.escape(allitem[item[1]]["title"])))
|
||||||
|
+ "</a> --- (<i>"
|
||||||
|
+ timetuple.ctime()
|
||||||
|
+ "</i>)</li>"
|
||||||
|
)
|
||||||
|
if i == int(options.maxitem):
|
||||||
|
break
|
||||||
|
print("</ul>")
|
||||||
|
|
||||||
|
if output == "markdown":
|
||||||
|
for item in itemlist:
|
||||||
|
i = i + 1
|
||||||
|
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
|
||||||
|
print(
|
||||||
|
"- ["
|
||||||
|
+ str(html.escape(allitem[item[1]]["title"]))
|
||||||
|
+ "]("
|
||||||
|
+ str(allitem[item[1]]["link"])
|
||||||
|
+ ")"
|
||||||
|
)
|
||||||
|
if i == int(options.maxitem):
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
usage = "usage: %prog [options] url"
|
usage = "usage: %prog [options] url"
|
||||||
parser = OptionParser(usage)
|
parser = OptionParser(usage)
|
||||||
|
|
||||||
parser.add_option("-m","--maxitem",dest="maxitem",help="maximum item to list in the feed, default 200")
|
parser.add_option(
|
||||||
parser.add_option("-o","--output",dest="output",help="output format (text, phtml), default text")
|
"-m",
|
||||||
|
"--maxitem",
|
||||||
|
dest="maxitem",
|
||||||
|
help="maximum item to list in the feed, default 200",
|
||||||
|
)
|
||||||
|
parser.add_option(
|
||||||
|
"-o",
|
||||||
|
"--output",
|
||||||
|
dest="output",
|
||||||
|
help="output format (text, phtml, markdown), default text",
|
||||||
|
)
|
||||||
|
|
||||||
#2007-11-10 11:25:51
|
# 2007-11-10 11:25:51
|
||||||
pattern = '%Y-%m-%d %H:%M:%S'
|
pattern = "%Y-%m-%d %H:%M:%S"
|
||||||
|
|
||||||
(options, args) = parser.parse_args()
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
if options.output == None:
|
if options.output == None:
|
||||||
options.output = "text"
|
options.output = "text"
|
||||||
|
|
||||||
if options.maxitem == None:
|
if options.maxitem == None:
|
||||||
options.maxitem = 200
|
options.maxitem = 200
|
||||||
|
|
||||||
allitem = {}
|
allitem = {}
|
||||||
|
|
||||||
for url in args:
|
for url in args:
|
||||||
|
|
||||||
#print url
|
# print url
|
||||||
|
|
||||||
d = feedparser.parse(url)
|
d = feedparser.parse(url)
|
||||||
|
|
||||||
for el in d.entries:
|
|
||||||
|
|
||||||
eldatetime = datetime.datetime.fromtimestamp(time.mktime(el.modified_parsed))
|
|
||||||
elepoch = int(time.mktime(time.strptime(str(eldatetime), pattern)))
|
|
||||||
linkkey = md5.new(el.link).hexdigest()
|
|
||||||
allitem[linkkey] = {}
|
|
||||||
allitem[linkkey]['link'] = str(el.link)
|
|
||||||
allitem[linkkey]['epoch'] = int(elepoch)
|
|
||||||
allitem[linkkey]['updated'] = el.updated
|
|
||||||
allitem[linkkey]['title'] = el.title
|
|
||||||
|
|
||||||
|
for el in d.entries:
|
||||||
|
if "modified_parsed" in el:
|
||||||
|
eldatetime = datetime.datetime.fromtimestamp(
|
||||||
|
time.mktime(el.modified_parsed)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
eldatetime = datetime.datetime.fromtimestamp(
|
||||||
|
time.mktime(el.published_parsed)
|
||||||
|
)
|
||||||
|
elepoch = int(time.mktime(time.strptime(str(eldatetime), pattern)))
|
||||||
|
h = hashlib.md5()
|
||||||
|
h.update(el.link.encode("utf-8"))
|
||||||
|
linkkey = h.hexdigest()
|
||||||
|
allitem[linkkey] = {}
|
||||||
|
allitem[linkkey]["link"] = str(el.link)
|
||||||
|
allitem[linkkey]["epoch"] = int(elepoch)
|
||||||
|
allitem[linkkey]["updated"] = el.updated
|
||||||
|
allitem[linkkey]["title"] = el.title
|
||||||
|
|
||||||
|
|
||||||
itemlist = []
|
itemlist = []
|
||||||
|
|
||||||
for something in allitem.keys():
|
for something in list(allitem.keys()):
|
||||||
epochkeytuple = (allitem[something]['epoch'],something)
|
epochkeytuple = (allitem[something]["epoch"], something)
|
||||||
itemlist.append (epochkeytuple)
|
itemlist.append(epochkeytuple)
|
||||||
|
|
||||||
itemlist.sort()
|
itemlist.sort()
|
||||||
itemlist.reverse()
|
itemlist.reverse()
|
||||||
|
|
||||||
RenderMerge(itemlist,options.output)
|
RenderMerge(itemlist, options.output)
|
||||||
|
|
Loading…
Reference in a new issue