mirror of
https://github.com/adulau/rss-tools.git
synced 2024-12-04 15:57:12 +00:00
Initial rss-tools crappy code from 2007 imported.
This commit is contained in:
commit
fe766ccbbc
6 changed files with 535 additions and 0 deletions
52
README.md
Normal file
52
README.md
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
RSS tools
|
||||||
|
=========
|
||||||
|
|
||||||
|
Following an old idea from 2007 published on my blog post called [RSS Everything?](http://www.foo.be/cgi-bin/wiki.pl/2007-02-11_RSS_Everything), this is a set of tools to
|
||||||
|
work on RSS (Really Simple Syndication) in an [Unix way](http://en.wikipedia.org/wiki/Unix_philosophy).
|
||||||
|
|
||||||
|
The code committed in this repository is old Python code from 2007, it might break your PC, kill your cat or the Flying Spaghetti Monster might loose a ball.
|
||||||
|
|
||||||
|
Forks and pull requests more than welcome. You have been warned the code was just there to experiment RSS workflows.
|
||||||
|
|
||||||
|
Requirements
|
||||||
|
------------
|
||||||
|
|
||||||
|
* Python 2.x
|
||||||
|
* Feedparser
|
||||||
|
|
||||||
|
rsscluster.py
|
||||||
|
-------------
|
||||||
|
|
||||||
|
rsscluster.py is a simple script to cluster items from an rss feed based on a time interval (expressed in number of days).
|
||||||
|
The maxitem is the number of item maximum kept after the clustering. An example use is for del.icio.us/pinboard.in where
|
||||||
|
you can have a lot of bookmarks during one day and you want to cluster them in one single item per a defined time slot in RSS or in (X)HTML.
|
||||||
|
|
||||||
|
rsscluster.py --interval 2 --maxitem 20 "http://del.icio.us/rss/adulau" >adulau.xml
|
||||||
|
|
||||||
|
rsscount.py
|
||||||
|
-----------
|
||||||
|
|
||||||
|
rsscount.py is a simple script to count how many items are in a RSS feed per day. This is used to build the [wiki creativity index](http://www.foo.be/cgi-bin/wiki.pl/WikiCreativityIndex). There is no limit for url arguments.
|
||||||
|
|
||||||
|
rsscount.py "<rss_url>" | sort
|
||||||
|
|
||||||
|
rssdir.py
|
||||||
|
---------
|
||||||
|
|
||||||
|
rssdir is a simply-and-dirty script to rssify any directory on the filesystem.
|
||||||
|
|
||||||
|
rssdir.py --prefix http://www.foo.be/cours/ . >rss.xml
|
||||||
|
|
||||||
|
rssinternetdraft.py
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
rssinternetdraft is a simple test to read a mbox file and generate an RSS from the subject.
|
||||||
|
|
||||||
|
rssmerge.py
|
||||||
|
-----------
|
||||||
|
|
||||||
|
rssmerge.py is a simple script to gather rss feed and merge them in reverse time order. Useful to keep track of recent events.
|
||||||
|
|
||||||
|
python2.5 --maxitem 30 --output phtml "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom" "http://www.foo.be/cgi-bin/wiki.pl?action=journal&tile=AdulauMessyDesk"
|
||||||
|
|
||||||
|
|
148
bin/rsscluster.py
Normal file
148
bin/rsscluster.py
Normal file
|
@ -0,0 +1,148 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# a at foo dot be - Alexandre Dulaunoy - http://www.foo.be/cgi-bin/wiki.pl/RssAny
|
||||||
|
#
|
||||||
|
# rsscluster.py is a simple script to cluster items from an rss feed based on a
|
||||||
|
# time interval (expressed in number of days). The maxitem is the
|
||||||
|
# number of item maximum after the clustering.
|
||||||
|
#
|
||||||
|
# an example use is for del.icio.us where you can have a lot of bookmarks during
|
||||||
|
# one day and you want to cluster them in one single item in RSS or in (X)HTML.
|
||||||
|
#
|
||||||
|
# example of use :
|
||||||
|
# python2.5 rsscluster.py --interval 5 --maxitem 20 "http://del.icio.us/rss/adulau" >adulau.xml
|
||||||
|
|
||||||
|
import feedparser
|
||||||
|
import sys,os
|
||||||
|
import time
|
||||||
|
import datetime
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import hashlib
|
||||||
|
from optparse import OptionParser
|
||||||
|
|
||||||
|
#print sys.stdout.encoding
|
||||||
|
version = "0.2"
|
||||||
|
|
||||||
|
feedparser.USER_AGENT = "rsscluster.py "+ version + " +http://www.foo.be/"
|
||||||
|
|
||||||
|
|
||||||
|
def date_as_rfc(value):
|
||||||
|
return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime(value))
|
||||||
|
|
||||||
|
|
||||||
|
def build_rss(myitem,maxitem):
|
||||||
|
|
||||||
|
RSSroot = ET.Element( 'rss', {'version':'2.0'} )
|
||||||
|
RSSchannel = ET.SubElement( RSSroot, 'channel' )
|
||||||
|
|
||||||
|
ET.SubElement( RSSchannel, 'title' ).text = 'RSS cluster of ' + str(url) +' per '+options.interval+' days'
|
||||||
|
ET.SubElement( RSSchannel, 'link' ).text = str(url)
|
||||||
|
ET.SubElement( RSSchannel, 'description' ).text = 'RSS cluster of ' + str(url) +' per '+options.interval+' days'
|
||||||
|
ET.SubElement( RSSchannel, 'generator' ).text = 'by rsscluster.py ' + version
|
||||||
|
ET.SubElement( RSSchannel, 'pubDate' ).text = date_as_rfc(time.time())
|
||||||
|
|
||||||
|
for bloodyitem in myitem[0:maxitem]:
|
||||||
|
|
||||||
|
RSSitem = ET.SubElement ( RSSchannel, 'item' )
|
||||||
|
ET.SubElement( RSSitem, 'title' ).text = 'clustered data of ' + date_as_rfc(float(bloodyitem[0])) +" for "+ str(url)
|
||||||
|
ET.SubElement( RSSitem, 'pubDate' ).text = date_as_rfc(float(bloodyitem[0]))
|
||||||
|
ET.SubElement( RSSitem, 'description').text = bloodyitem[1]
|
||||||
|
h = hashlib.md5()
|
||||||
|
h.update(bloodyitem[1])
|
||||||
|
ET.SubElement( RSSitem, 'guid').text = h.hexdigest()
|
||||||
|
|
||||||
|
RSSfeed = ET.ElementTree(RSSroot)
|
||||||
|
feed = ET.tostring(RSSroot)
|
||||||
|
return feed
|
||||||
|
|
||||||
|
|
||||||
|
def complete_feed(myfeed):
|
||||||
|
|
||||||
|
myheader = '<?xml version="1.0"?>'
|
||||||
|
return myheader + str(myfeed)
|
||||||
|
|
||||||
|
def DaysInSec(val):
|
||||||
|
|
||||||
|
return int(val)*24*60*60
|
||||||
|
|
||||||
|
usage = "usage: %prog [options] url"
|
||||||
|
parser = OptionParser(usage)
|
||||||
|
|
||||||
|
parser.add_option("-m","--maxitem",dest="maxitem",help="maximum item to list in the feed, default 200")
|
||||||
|
parser.add_option("-i","--interval",dest="interval",help="time interval expressed in days, default 1 day")
|
||||||
|
|
||||||
|
#2007-11-10 11:25:51
|
||||||
|
pattern = '%Y-%m-%d %H:%M:%S'
|
||||||
|
|
||||||
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
|
if options.interval == None:
|
||||||
|
options.output = 1
|
||||||
|
|
||||||
|
if options.maxitem == None:
|
||||||
|
options.maxitem = 200
|
||||||
|
|
||||||
|
|
||||||
|
if len(args) != 1:
|
||||||
|
parser.print_help()
|
||||||
|
parser.error("incorrect number of arguments")
|
||||||
|
|
||||||
|
allitem = {}
|
||||||
|
url = args[0]
|
||||||
|
|
||||||
|
d = feedparser.parse(url)
|
||||||
|
|
||||||
|
interval = DaysInSec(options.interval)
|
||||||
|
|
||||||
|
previousepoch = []
|
||||||
|
clusteredepoch = []
|
||||||
|
tcluster = []
|
||||||
|
|
||||||
|
for el in d.entries:
|
||||||
|
|
||||||
|
eldatetime = datetime.datetime.fromtimestamp(time.mktime(el.modified_parsed))
|
||||||
|
elepoch = int(time.mktime(time.strptime(unicode(eldatetime), pattern)))
|
||||||
|
|
||||||
|
if len(previousepoch):
|
||||||
|
|
||||||
|
#print el.link, int(previousepoch[0])-int(elepoch), interval
|
||||||
|
|
||||||
|
if len(clusteredepoch):
|
||||||
|
value = clusteredepoch.pop()
|
||||||
|
else:
|
||||||
|
value = ""
|
||||||
|
|
||||||
|
clusteredepoch.append(value+" <a href=\""+el.link+"\">"+el.title+"</a>")
|
||||||
|
|
||||||
|
|
||||||
|
if not ((int(previousepoch[0])-int(elepoch)) < interval):
|
||||||
|
|
||||||
|
value = clusteredepoch.pop()
|
||||||
|
|
||||||
|
starttimetuple = datetime.datetime.fromtimestamp(previousepoch[0])
|
||||||
|
endttimetuple = datetime.datetime.fromtimestamp(previousepoch.pop())
|
||||||
|
clusteredepoch.append(value+ " from: "+unicode(starttimetuple.ctime())+" to: "+unicode(endttimetuple.ctime()))
|
||||||
|
startdatelist = unicode(previousepoch[0]),unicode(clusteredepoch[len(clusteredepoch)-1])
|
||||||
|
tcluster.append(startdatelist)
|
||||||
|
del previousepoch[0:len(previousepoch)]
|
||||||
|
del clusteredepoch[0:len(clusteredepoch)]
|
||||||
|
else:
|
||||||
|
clusteredepoch.append(" <a href=\""+el.link+"\">"+el.title+"</a>")
|
||||||
|
previousepoch.append(elepoch)
|
||||||
|
|
||||||
|
# if last cluster list was not complete, we add the time period information.
|
||||||
|
if len(previousepoch):
|
||||||
|
value = clusteredepoch.pop()
|
||||||
|
starttimetuple = datetime.datetime.fromtimestamp(previousepoch[0])
|
||||||
|
endttimetuple = datetime.datetime.fromtimestamp(previousepoch.pop())
|
||||||
|
clusteredepoch.append(value+ " from: "+unicode(starttimetuple.ctime())+" to: "+unicode(endttimetuple.ctime()))
|
||||||
|
del previousepoch[0:len(previousepoch)]
|
||||||
|
|
||||||
|
|
||||||
|
tcluster.sort()
|
||||||
|
tcluster.reverse()
|
||||||
|
print complete_feed(build_rss(tcluster,int(options.maxitem)))
|
||||||
|
|
||||||
|
|
||||||
|
|
61
bin/rsscount.py
Normal file
61
bin/rsscount.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# a at foo dot be - Alexandre Dulaunoy - http://www.foo.be/cgi-bin/wiki.pl/RssAny
|
||||||
|
#
|
||||||
|
# rsscount.py is a simple script to count how many items in a RSS feed per day
|
||||||
|
#
|
||||||
|
# The output is epoch + the number of changes separated with a tab.
|
||||||
|
#
|
||||||
|
# This is used to build statistic like the wiki creativity index.
|
||||||
|
#
|
||||||
|
|
||||||
|
import feedparser
|
||||||
|
import sys,os
|
||||||
|
import time
|
||||||
|
import datetime
|
||||||
|
from optparse import OptionParser
|
||||||
|
|
||||||
|
|
||||||
|
feedparser.USER_AGENT = "rsscount.py +http://www.foo.be/"
|
||||||
|
|
||||||
|
|
||||||
|
usage = "usage: %prog url(s)"
|
||||||
|
parser = OptionParser(usage)
|
||||||
|
|
||||||
|
|
||||||
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
|
if args is None:
|
||||||
|
print usage
|
||||||
|
|
||||||
|
|
||||||
|
counteditem = {}
|
||||||
|
|
||||||
|
for url in args:
|
||||||
|
|
||||||
|
d = feedparser.parse(url)
|
||||||
|
|
||||||
|
for el in d.entries:
|
||||||
|
|
||||||
|
try:
|
||||||
|
eldatetime = datetime.datetime.fromtimestamp(time.mktime(el.modified_parsed))
|
||||||
|
except AttributeError:
|
||||||
|
# discard RSS without pubDate grrr...
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
eventdate = eldatetime.isoformat(' ').split(' ',1)
|
||||||
|
edate = eventdate[0].replace("-","")
|
||||||
|
|
||||||
|
if counteditem.has_key(edate):
|
||||||
|
counteditem[edate] = counteditem[edate] + 1
|
||||||
|
else:
|
||||||
|
counteditem[edate] = 1
|
||||||
|
|
||||||
|
|
||||||
|
for k in counteditem.keys():
|
||||||
|
|
||||||
|
print unicode(k).encode("utf-8")+"\t"+ unicode(counteditem[k]).encode("utf-8")
|
||||||
|
|
||||||
|
|
117
bin/rssdir.py
Normal file
117
bin/rssdir.py
Normal file
|
@ -0,0 +1,117 @@
|
||||||
|
# rssdir.py
|
||||||
|
# a at foo dot be - Alexandre Dulaunoy - http://www.foo.be/cgi-bin/wiki.pl/RssAny
|
||||||
|
#
|
||||||
|
# rssdir is a simply-and-dirty script to rssify any directory on the filesystem.
|
||||||
|
#
|
||||||
|
# an example of use on the current directory :
|
||||||
|
#
|
||||||
|
# python2.5 /usr/local/bin/rssdir.py --prefix http://www.foo.be/cours/ . >rss.xml
|
||||||
|
#
|
||||||
|
# Don't really need python2.5 except for ElementTree but you are free to install it.
|
||||||
|
|
||||||
|
import os, fnmatch
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from optparse import OptionParser
|
||||||
|
|
||||||
|
version = "0.1"
|
||||||
|
|
||||||
|
# recursive list file function from the ASPN cookbook
|
||||||
|
def all_files(root, patterns='*', single_level=False, yield_folders=False):
|
||||||
|
patterns = patterns.split(';')
|
||||||
|
for path, subdirs, files in os.walk(root):
|
||||||
|
if yield_folders:
|
||||||
|
files.extend(subdirs)
|
||||||
|
files.sort()
|
||||||
|
for name in files:
|
||||||
|
for pattern in patterns:
|
||||||
|
if fnmatch.fnmatch(name, pattern):
|
||||||
|
yield os.path.join(path, name)
|
||||||
|
break
|
||||||
|
if single_level:
|
||||||
|
break
|
||||||
|
|
||||||
|
def date_files(filelist):
|
||||||
|
date_filename_list = []
|
||||||
|
|
||||||
|
for filename in filelist:
|
||||||
|
stats = os.stat(filename)
|
||||||
|
last_update = stats[8]
|
||||||
|
date_filename_tuple = last_update, filename
|
||||||
|
date_filename_list.append(date_filename_tuple)
|
||||||
|
|
||||||
|
return date_filename_list
|
||||||
|
|
||||||
|
def date_as_rfc(value):
|
||||||
|
return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime(value))
|
||||||
|
|
||||||
|
|
||||||
|
def build_rss(myitem,maxitem):
|
||||||
|
|
||||||
|
RSSroot = ET.Element( 'rss', {'version':'2.0'} )
|
||||||
|
RSSchannel = ET.SubElement( RSSroot, 'channel' )
|
||||||
|
|
||||||
|
ET.SubElement( RSSchannel, 'title' ).text = 'RSS feed of ' + str(title)
|
||||||
|
ET.SubElement( RSSchannel, 'link' ).text = link
|
||||||
|
ET.SubElement( RSSchannel, 'description' ).text = 'A directory RSSified by rssdir.py ' + version
|
||||||
|
ET.SubElement( RSSchannel, 'generator' ).text = 'A directory RSSified by rssdir.py ' + version
|
||||||
|
ET.SubElement( RSSchannel, 'pubDate' ).text = date_as_rfc(time.time())
|
||||||
|
|
||||||
|
for bloodyitem in myitem[0:maxitem]:
|
||||||
|
|
||||||
|
RSSitem = ET.SubElement ( RSSchannel, 'item' )
|
||||||
|
ET.SubElement( RSSitem, 'title' ).text = bloodyitem[1]
|
||||||
|
ET.SubElement( RSSitem, 'pubDate' ).text = date_as_rfc(bloodyitem[0])
|
||||||
|
ET.SubElement( RSSitem, 'description').text = prefixurl+bloodyitem[1]
|
||||||
|
ET.SubElement( RSSitem, 'guid').text = prefixurl+bloodyitem[1]
|
||||||
|
|
||||||
|
RSSfeed = ET.ElementTree(RSSroot)
|
||||||
|
feed = ET.tostring(RSSroot)
|
||||||
|
return feed
|
||||||
|
|
||||||
|
|
||||||
|
def complete_feed(myfeed):
|
||||||
|
|
||||||
|
myheader = '<?xml version="1.0"?>'
|
||||||
|
return myheader + str(myfeed)
|
||||||
|
|
||||||
|
|
||||||
|
usage = "usage: %prog [options] directory"
|
||||||
|
parser = OptionParser(usage)
|
||||||
|
|
||||||
|
parser.add_option("-p","--prefix",dest="prefix",help="http prefix to be used for each entry, default none")
|
||||||
|
parser.add_option("-t","--title",dest="title",help="set a title to the rss feed, default using prefix",type="string")
|
||||||
|
parser.add_option("-l","--link",dest="link",help="http link set, default is prefix and none if prefix not set")
|
||||||
|
parser.add_option("-m","--maxitem",dest="maxitem",help="maximum item to list in the feed, default 32",type="int")
|
||||||
|
|
||||||
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
|
if options.prefix is None:
|
||||||
|
prefixurl = ''
|
||||||
|
else :
|
||||||
|
prefixurl = options.prefix
|
||||||
|
|
||||||
|
if options.link is None:
|
||||||
|
link = options.prefix
|
||||||
|
else :
|
||||||
|
link = options.link
|
||||||
|
|
||||||
|
if options.title is None:
|
||||||
|
title = options.prefix
|
||||||
|
else :
|
||||||
|
title = options.title
|
||||||
|
|
||||||
|
if options.maxitem is None:
|
||||||
|
maxitem = 32
|
||||||
|
else :
|
||||||
|
maxitem = options.maxitem
|
||||||
|
|
||||||
|
|
||||||
|
mylist = date_files(all_files(args[0]))
|
||||||
|
|
||||||
|
mylist.sort()
|
||||||
|
mylist.reverse()
|
||||||
|
|
||||||
|
print complete_feed(build_rss(mylist,maxitem))
|
||||||
|
|
53
bin/rssinternetdraft.py
Normal file
53
bin/rssinternetdraft.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
#
|
||||||
|
# quick-and-dirty(tm) script to gather IETF Internet-Draft announce
|
||||||
|
# from a mbox and to generate a nice RSS feed of the recent announce.
|
||||||
|
#
|
||||||
|
# for more information : http://www.foo.be/ietf/id/
|
||||||
|
|
||||||
|
import mailbox
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
date_rfc2822 = "%a, %d %b %Y %H:%M:%S"
|
||||||
|
|
||||||
|
tmsg = []
|
||||||
|
|
||||||
|
def date_as_rfc(value):
|
||||||
|
return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime(value))
|
||||||
|
|
||||||
|
def build_rss(myitem,maxitem):
|
||||||
|
|
||||||
|
RSSroot = ET.Element( 'rss', {'version':'2.0'} )
|
||||||
|
RSSchannel = ET.SubElement( RSSroot, 'channel' )
|
||||||
|
|
||||||
|
ET.SubElement( RSSchannel, 'title' ).text = 'Latest Internet-Draft (IDs) Published - IETF - custom RSS feed'
|
||||||
|
ET.SubElement( RSSchannel, 'link' ).text = 'http://www.foo.be/ietf/id/'
|
||||||
|
ET.SubElement( RSSchannel, 'description' ).text = 'Latest Internet-Draft (IDs) Published - IETF - custom RSS feed'
|
||||||
|
ET.SubElement( RSSchannel, 'generator' ).text = 'rssany extended for parsing IETF IDs - http://www.foo.be/cgi-bin/wiki.pl/RssAny'
|
||||||
|
# ET.SubElement( RSSchannel, 'pubDate' ).text = date_as_rfc(time.time())
|
||||||
|
ET.SubElement( RSSchannel, 'pubDate' ).text = date_as_rfc(time.time()-10000)
|
||||||
|
|
||||||
|
for bloodyitem in myitem[0:maxitem]:
|
||||||
|
RSSitem = ET.SubElement ( RSSchannel, 'item' )
|
||||||
|
ET.SubElement( RSSitem, 'title' ).text = bloodyitem[1]
|
||||||
|
ET.SubElement( RSSitem, 'pubDate' ).text = date_as_rfc(bloodyitem[0])
|
||||||
|
ET.SubElement( RSSitem, 'description').text = '<pre>'+bloodyitem[2]+'</pre>'
|
||||||
|
ET.SubElement( RSSitem, 'guid').text = "http://tools.ietf.org/html/"+bloodyitem[3]
|
||||||
|
ET.SubElement( RSSitem, 'link').text = "http://tools.ietf.org/html/"+bloodyitem[3]
|
||||||
|
RSSfeed = ET.ElementTree(RSSroot)
|
||||||
|
feed = ET.tostring(RSSroot)
|
||||||
|
return feed
|
||||||
|
|
||||||
|
for message in mailbox.mbox('/var/spool/mail/ietf'):
|
||||||
|
subject = message['subject']
|
||||||
|
date = message['date']
|
||||||
|
date_epoch = int(time.mktime(time.strptime(date[0:-12], date_rfc2822)))
|
||||||
|
message_id = message['Message-Id']
|
||||||
|
body = message.get_payload()[0].get_payload()
|
||||||
|
id = subject.split(":")[1].split(".")[0]
|
||||||
|
tmsg.append([date_epoch,subject,body,id])
|
||||||
|
|
||||||
|
tmsg.sort()
|
||||||
|
tmsg.reverse()
|
||||||
|
print build_rss(tmsg,100)
|
104
bin/rssmerge.py
Normal file
104
bin/rssmerge.py
Normal file
|
@ -0,0 +1,104 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# a at foo dot be - Alexandre Dulaunoy - http://www.foo.be/cgi-bin/wiki.pl/RssAny
|
||||||
|
#
|
||||||
|
# rssmerge.py is a simple script to gather rss feed and merge them in reverse
|
||||||
|
# time order. Useful to keep track of recent events.
|
||||||
|
#
|
||||||
|
# this is still an early prototype and assume that you have full control of the
|
||||||
|
# remote rss feeds (if not you may have some security issues).
|
||||||
|
#
|
||||||
|
# TODO : - rss 2.0 and atom output
|
||||||
|
# - full html output
|
||||||
|
#
|
||||||
|
# example of use :
|
||||||
|
# python2.5 rssmerge.py --output phtml --maxitem 20 "http://www.foo.be/cgi-bin/wiki.pl?action=journal&tile=AdulauMessyDesk"
|
||||||
|
# "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom" "http://a.6f2.net/cgi-bin/gitweb.cgi?
|
||||||
|
# p=adulau/.git;a=rss" "http://www.librarything.com/rss/reviews/adulau" > /tmp/test.inc
|
||||||
|
|
||||||
|
import feedparser
|
||||||
|
import sys,os
|
||||||
|
import time
|
||||||
|
import datetime
|
||||||
|
import md5
|
||||||
|
from optparse import OptionParser
|
||||||
|
import cgi
|
||||||
|
|
||||||
|
feedparser.USER_AGENT = "rssmerge.py +http://www.foo.be/"
|
||||||
|
|
||||||
|
def RenderMerge(itemlist,output="text"):
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
if output == "text" :
|
||||||
|
for item in itemlist:
|
||||||
|
i = i + 1
|
||||||
|
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
|
||||||
|
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]['epoch'])
|
||||||
|
|
||||||
|
print str(i)+":"+allitem[item[1]]['title']+":"+timetuple.ctime()+":"+allitem[item[1]]['link']
|
||||||
|
|
||||||
|
if i == int(options.maxitem):
|
||||||
|
break
|
||||||
|
|
||||||
|
if output == "phtml" :
|
||||||
|
print "<ul>"
|
||||||
|
for item in itemlist:
|
||||||
|
i = i + 1
|
||||||
|
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
|
||||||
|
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]['epoch'])
|
||||||
|
print "<li><a href=\""+unicode(allitem[item[1]]['link']).encode("utf-8")+"\">"+unicode(cgi.escape(allitem[item[1]]['title'])).encode("utf-8")+"</a> --- (<i>"+timetuple.ctime()+"</i>)</li>"
|
||||||
|
if i == int(options.maxitem):
|
||||||
|
break
|
||||||
|
print "</ul>"
|
||||||
|
|
||||||
|
|
||||||
|
usage = "usage: %prog [options] url"
|
||||||
|
parser = OptionParser(usage)
|
||||||
|
|
||||||
|
parser.add_option("-m","--maxitem",dest="maxitem",help="maximum item to list in the feed, default 200")
|
||||||
|
parser.add_option("-o","--output",dest="output",help="output format (text, phtml), default text")
|
||||||
|
|
||||||
|
#2007-11-10 11:25:51
|
||||||
|
pattern = '%Y-%m-%d %H:%M:%S'
|
||||||
|
|
||||||
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
|
if options.output == None:
|
||||||
|
options.output = "text"
|
||||||
|
|
||||||
|
if options.maxitem == None:
|
||||||
|
options.maxitem = 200
|
||||||
|
|
||||||
|
allitem = {}
|
||||||
|
|
||||||
|
for url in args:
|
||||||
|
|
||||||
|
#print url
|
||||||
|
|
||||||
|
d = feedparser.parse(url)
|
||||||
|
|
||||||
|
for el in d.entries:
|
||||||
|
|
||||||
|
eldatetime = datetime.datetime.fromtimestamp(time.mktime(el.modified_parsed))
|
||||||
|
elepoch = int(time.mktime(time.strptime(str(eldatetime), pattern)))
|
||||||
|
linkkey = md5.new(el.link).hexdigest()
|
||||||
|
allitem[linkkey] = {}
|
||||||
|
allitem[linkkey]['link'] = str(el.link)
|
||||||
|
allitem[linkkey]['epoch'] = int(elepoch)
|
||||||
|
allitem[linkkey]['updated'] = el.updated
|
||||||
|
allitem[linkkey]['title'] = el.title
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
itemlist = []
|
||||||
|
|
||||||
|
for something in allitem.keys():
|
||||||
|
epochkeytuple = (allitem[something]['epoch'],something)
|
||||||
|
itemlist.append (epochkeytuple)
|
||||||
|
|
||||||
|
itemlist.sort()
|
||||||
|
itemlist.reverse()
|
||||||
|
|
||||||
|
RenderMerge(itemlist,options.output)
|
Loading…
Reference in a new issue