commit fe766ccbbc6c9807e78aef287a43b5b630e750e3
Author: Alexandre Dulaunoy <a@foo.be>
Date:   Sun Apr 14 14:05:30 2013 +0200

    Initial rss-tools crappy code from 2007 imported.

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..61320ea
--- /dev/null
+++ b/README.md
@@ -0,0 +1,52 @@
+RSS tools
+=========
+
+Following an old idea from 2007 published on my blog post called [RSS Everything?](http://www.foo.be/cgi-bin/wiki.pl/2007-02-11_RSS_Everything), this is a set of tools to
+work on RSS (Really Simple Syndication) in an [Unix way](http://en.wikipedia.org/wiki/Unix_philosophy).
+
+The code committed in this repository is old Python code from 2007, it might break your PC, kill your cat or the Flying Spaghetti Monster might loose a ball.  
+
+Forks and pull requests more than welcome. You have been warned the code was just there to experiment RSS workflows.
+
+Requirements
+------------
+
+* Python 2.x
+* Feedparser
+
+rsscluster.py
+-------------
+
+rsscluster.py is a simple script to cluster items from an rss feed based on a time interval (expressed in number of days).
+The maxitem is the number of item maximum kept after the clustering. An example use is for del.icio.us/pinboard.in where 
+you can have a lot of bookmarks during one day and you want to cluster them in one single item per a defined time slot in RSS or in (X)HTML.
+
+    rsscluster.py --interval 2 --maxitem 20 "http://del.icio.us/rss/adulau" >adulau.xml
+
+rsscount.py
+-----------
+
+rsscount.py is a simple script to count how many items are in a RSS feed per day. This is used to build the [wiki creativity index](http://www.foo.be/cgi-bin/wiki.pl/WikiCreativityIndex). There is no limit for url arguments.
+
+    rsscount.py "<rss_url>" | sort
+
+rssdir.py
+---------
+
+rssdir is a simply-and-dirty script to rssify any directory on the filesystem.
+
+    rssdir.py --prefix http://www.foo.be/cours/ . >rss.xml
+
+rssinternetdraft.py
+-------------------
+
+rssinternetdraft is a simple test to read a mbox file and generate an RSS from the subject.
+
+rssmerge.py
+-----------
+
+rssmerge.py is a simple script to gather rss feed and merge them in reverse time order. Useful to keep track of recent events.
+
+    python2.5 --maxitem 30 --output phtml "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom"  "http://www.foo.be/cgi-bin/wiki.pl?action=journal&tile=AdulauMessyDesk"
+    
+  
diff --git a/bin/rsscluster.py b/bin/rsscluster.py
new file mode 100644
index 0000000..43e4f77
--- /dev/null
+++ b/bin/rsscluster.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# a at foo dot be - Alexandre Dulaunoy - http://www.foo.be/cgi-bin/wiki.pl/RssAny
+#
+# rsscluster.py is a simple script to cluster items from an rss feed based on a
+#               time interval (expressed in number of days). The maxitem is the
+#               number of item maximum after the clustering.
+#
+# an example use is for del.icio.us where you can have a lot of bookmarks during
+# one day and you want to cluster them in one single item in RSS or in (X)HTML.
+#               
+# example of use : 
+#  python2.5 rsscluster.py --interval 5 --maxitem 20 "http://del.icio.us/rss/adulau" >adulau.xml
+
+import feedparser
+import sys,os
+import time
+import datetime
+import xml.etree.ElementTree as ET
+import hashlib
+from optparse import OptionParser
+
+#print sys.stdout.encoding 
+version = "0.2"
+
+feedparser.USER_AGENT = "rsscluster.py "+ version + " +http://www.foo.be/"
+
+
+def date_as_rfc(value):
+	return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime(value))
+
+
+def build_rss(myitem,maxitem):
+
+	RSSroot = ET.Element( 'rss', {'version':'2.0'} )
+	RSSchannel = ET.SubElement( RSSroot, 'channel' )
+
+	ET.SubElement( RSSchannel, 'title' ).text = 'RSS cluster of ' + str(url) +' per '+options.interval+' days'
+	ET.SubElement( RSSchannel, 'link' ).text = str(url)
+	ET.SubElement( RSSchannel, 'description' ).text = 'RSS cluster of ' + str(url) +' per '+options.interval+' days'
+	ET.SubElement( RSSchannel, 'generator' ).text = 'by rsscluster.py ' + version
+	ET.SubElement( RSSchannel, 'pubDate' ).text = date_as_rfc(time.time())
+
+	for bloodyitem in myitem[0:maxitem]:
+
+		RSSitem = ET.SubElement ( RSSchannel, 'item' )
+		ET.SubElement( RSSitem, 'title' ).text = 'clustered data of ' + date_as_rfc(float(bloodyitem[0])) +" for "+ str(url)
+		ET.SubElement( RSSitem, 'pubDate' ).text = date_as_rfc(float(bloodyitem[0]))
+		ET.SubElement( RSSitem, 'description').text = bloodyitem[1]
+        h = hashlib.md5()
+        h.update(bloodyitem[1])
+        ET.SubElement( RSSitem, 'guid').text = h.hexdigest()
+
+	RSSfeed = ET.ElementTree(RSSroot)
+	feed = ET.tostring(RSSroot)
+	return feed
+
+
+def complete_feed(myfeed):
+
+	myheader = '<?xml version="1.0"?>'
+	return myheader + str(myfeed)
+
+def DaysInSec(val):
+
+    return int(val)*24*60*60
+
+usage = "usage: %prog [options] url"
+parser = OptionParser(usage)
+
+parser.add_option("-m","--maxitem",dest="maxitem",help="maximum item to list in the feed, default 200")
+parser.add_option("-i","--interval",dest="interval",help="time interval expressed in days, default 1 day")
+
+#2007-11-10 11:25:51
+pattern = '%Y-%m-%d %H:%M:%S'
+
+(options, args) = parser.parse_args()
+
+if options.interval == None:
+        options.output = 1
+
+if options.maxitem == None:
+        options.maxitem = 200
+
+
+if len(args) != 1:
+    parser.print_help()
+    parser.error("incorrect number of arguments")
+
+allitem = {}
+url = args[0]
+
+d = feedparser.parse(url)
+
+interval = DaysInSec(options.interval)
+
+previousepoch = []
+clusteredepoch = []
+tcluster = []
+
+for el in d.entries:
+
+    eldatetime = datetime.datetime.fromtimestamp(time.mktime(el.modified_parsed))
+    elepoch = int(time.mktime(time.strptime(unicode(eldatetime), pattern)))
+
+    if len(previousepoch):
+
+        #print el.link, int(previousepoch[0])-int(elepoch), interval
+
+        if len(clusteredepoch):
+            value = clusteredepoch.pop()
+        else:
+            value = ""
+
+        clusteredepoch.append(value+" <a href=\""+el.link+"\">"+el.title+"</a>")
+
+
+        if not ((int(previousepoch[0])-int(elepoch)) < interval):
+
+            value = clusteredepoch.pop()
+
+            starttimetuple = datetime.datetime.fromtimestamp(previousepoch[0])
+            endttimetuple = datetime.datetime.fromtimestamp(previousepoch.pop())
+            clusteredepoch.append(value+ " from: "+unicode(starttimetuple.ctime())+" to: "+unicode(endttimetuple.ctime()))
+            startdatelist = unicode(previousepoch[0]),unicode(clusteredepoch[len(clusteredepoch)-1])
+            tcluster.append(startdatelist)
+            del previousepoch[0:len(previousepoch)]
+            del clusteredepoch[0:len(clusteredepoch)]
+    else:
+            clusteredepoch.append(" <a href=\""+el.link+"\">"+el.title+"</a>")
+    previousepoch.append(elepoch)
+
+# if last cluster list was not complete, we add the time period information.
+if len(previousepoch):
+    value = clusteredepoch.pop()
+    starttimetuple = datetime.datetime.fromtimestamp(previousepoch[0])
+    endttimetuple = datetime.datetime.fromtimestamp(previousepoch.pop())
+    clusteredepoch.append(value+ " from: "+unicode(starttimetuple.ctime())+" to: "+unicode(endttimetuple.ctime()))
+    del previousepoch[0:len(previousepoch)]
+
+
+tcluster.sort()
+tcluster.reverse()
+print complete_feed(build_rss(tcluster,int(options.maxitem)))
+
+
+
diff --git a/bin/rsscount.py b/bin/rsscount.py
new file mode 100644
index 0000000..1f1b735
--- /dev/null
+++ b/bin/rsscount.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# a at foo dot be - Alexandre Dulaunoy - http://www.foo.be/cgi-bin/wiki.pl/RssAny
+#
+# rsscount.py is a simple script to count how many items in a RSS feed per day
+#
+# The output is epoch + the number of changes separated with a tab.
+#
+# This is used to build statistic like the wiki creativity index.             
+#
+
+import feedparser
+import sys,os
+import time
+import datetime
+from optparse import OptionParser
+
+
+feedparser.USER_AGENT = "rsscount.py +http://www.foo.be/"
+
+
+usage = "usage: %prog url(s)"
+parser = OptionParser(usage)
+
+
+(options, args) = parser.parse_args()
+
+if args is None:
+    print usage
+
+ 
+counteditem = {}
+
+for url in args:
+
+        d = feedparser.parse(url)
+
+        for el in d.entries:
+
+		try:
+                	eldatetime = datetime.datetime.fromtimestamp(time.mktime(el.modified_parsed))
+		except AttributeError:
+			# discard RSS without pubDate grrr...
+			break
+		
+		
+		eventdate = eldatetime.isoformat(' ').split(' ',1)
+		edate = eventdate[0].replace("-","")
+
+ 		if counteditem.has_key(edate):
+			counteditem[edate] = counteditem[edate] + 1
+		else:
+			counteditem[edate] = 1
+
+
+for k in counteditem.keys():
+
+	print unicode(k).encode("utf-8")+"\t"+ unicode(counteditem[k]).encode("utf-8")
+
+
diff --git a/bin/rssdir.py b/bin/rssdir.py
new file mode 100644
index 0000000..f363c4d
--- /dev/null
+++ b/bin/rssdir.py
@@ -0,0 +1,117 @@
+# rssdir.py
+# a at foo dot be - Alexandre Dulaunoy - http://www.foo.be/cgi-bin/wiki.pl/RssAny
+#
+# rssdir is a simply-and-dirty script to rssify any directory on the filesystem.
+# 
+# an example of use on the current directory :
+#
+# python2.5 /usr/local/bin/rssdir.py --prefix http://www.foo.be/cours/ . >rss.xml
+#
+# Don't really need python2.5 except for ElementTree but you are free to install it.
+
+import os, fnmatch
+import time
+import sys
+import xml.etree.ElementTree as ET
+from optparse import OptionParser
+
+version = "0.1"
+
+# recursive list file function from the ASPN cookbook
+def all_files(root, patterns='*', single_level=False, yield_folders=False):
+        patterns = patterns.split(';')
+        for path, subdirs, files in os.walk(root):
+                if yield_folders:
+                        files.extend(subdirs)
+                files.sort()
+                for name in files:
+                        for pattern in patterns:
+                                if fnmatch.fnmatch(name, pattern):
+                                        yield os.path.join(path, name)
+                                        break
+                if single_level:
+                        break
+
+def date_files(filelist):
+	date_filename_list = []
+
+        for filename in filelist:
+                stats = os.stat(filename)
+                last_update = stats[8]
+		date_filename_tuple = last_update, filename
+		date_filename_list.append(date_filename_tuple)
+
+	return date_filename_list
+
+def date_as_rfc(value):
+	return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime(value))
+
+
+def build_rss(myitem,maxitem):
+
+	RSSroot = ET.Element( 'rss', {'version':'2.0'} )
+	RSSchannel = ET.SubElement( RSSroot, 'channel' )
+
+	ET.SubElement( RSSchannel, 'title' ).text = 'RSS feed of ' + str(title)
+	ET.SubElement( RSSchannel, 'link' ).text = link
+	ET.SubElement( RSSchannel, 'description' ).text = 'A directory RSSified by rssdir.py ' + version
+	ET.SubElement( RSSchannel, 'generator' ).text = 'A directory RSSified by rssdir.py ' + version
+	ET.SubElement( RSSchannel, 'pubDate' ).text = date_as_rfc(time.time())
+
+	for bloodyitem in myitem[0:maxitem]:
+
+		RSSitem = ET.SubElement ( RSSchannel, 'item' )
+		ET.SubElement( RSSitem, 'title' ).text = bloodyitem[1]
+		ET.SubElement( RSSitem, 'pubDate' ).text = date_as_rfc(bloodyitem[0])
+		ET.SubElement( RSSitem, 'description').text = prefixurl+bloodyitem[1]
+		ET.SubElement( RSSitem, 'guid').text = prefixurl+bloodyitem[1]
+		
+	RSSfeed = ET.ElementTree(RSSroot)
+	feed = ET.tostring(RSSroot)
+	return feed
+
+
+def complete_feed(myfeed):
+
+	myheader = '<?xml version="1.0"?>'
+	return myheader + str(myfeed)
+
+
+usage = "usage: %prog [options] directory"
+parser = OptionParser(usage)
+
+parser.add_option("-p","--prefix",dest="prefix",help="http prefix to be used for each entry, default none")
+parser.add_option("-t","--title",dest="title",help="set a title to the rss feed, default using prefix",type="string")
+parser.add_option("-l","--link",dest="link",help="http link set, default is prefix and none if prefix not set")
+parser.add_option("-m","--maxitem",dest="maxitem",help="maximum item to list in the feed, default 32",type="int")
+
+(options, args) = parser.parse_args()
+
+if options.prefix is None:
+	prefixurl = ''
+else :
+        prefixurl = options.prefix
+
+if options.link is None:
+	link = options.prefix
+else :
+        link = options.link
+
+if options.title is None:
+	title = options.prefix
+else :
+	title = options.title
+
+if options.maxitem is None:
+	maxitem = 32
+else :
+	maxitem = options.maxitem
+
+
+mylist = date_files(all_files(args[0]))
+
+mylist.sort()
+mylist.reverse()
+
+print complete_feed(build_rss(mylist,maxitem))
+
diff --git a/bin/rssinternetdraft.py b/bin/rssinternetdraft.py
new file mode 100644
index 0000000..8105ba4
--- /dev/null
+++ b/bin/rssinternetdraft.py
@@ -0,0 +1,53 @@
+#
+# quick-and-dirty(tm) script to gather IETF Internet-Draft announce
+# from a mbox and to generate a nice RSS feed of the recent announce.
+#
+# for more information : http://www.foo.be/ietf/id/
+
+import mailbox
+import time
+import re
+import xml.etree.ElementTree as ET
+
+date_rfc2822 = "%a, %d %b %Y %H:%M:%S"
+
+tmsg = []
+
+def date_as_rfc(value):
+        return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime(value))
+
+def build_rss(myitem,maxitem):
+
+        RSSroot = ET.Element( 'rss', {'version':'2.0'} )
+        RSSchannel = ET.SubElement( RSSroot, 'channel' )
+
+        ET.SubElement( RSSchannel, 'title' ).text = 'Latest Internet-Draft (IDs) Published - IETF - custom RSS feed'
+        ET.SubElement( RSSchannel, 'link' ).text = 'http://www.foo.be/ietf/id/' 
+        ET.SubElement( RSSchannel, 'description' ).text = 'Latest Internet-Draft (IDs) Published - IETF - custom RSS feed' 
+        ET.SubElement( RSSchannel, 'generator' ).text = 'rssany extended for parsing IETF IDs - http://www.foo.be/cgi-bin/wiki.pl/RssAny'
+#        ET.SubElement( RSSchannel, 'pubDate' ).text = date_as_rfc(time.time())
+        ET.SubElement( RSSchannel, 'pubDate' ).text = date_as_rfc(time.time()-10000)
+
+        for bloodyitem in myitem[0:maxitem]:
+                RSSitem = ET.SubElement ( RSSchannel, 'item' )
+                ET.SubElement( RSSitem, 'title' ).text = bloodyitem[1]
+                ET.SubElement( RSSitem, 'pubDate' ).text = date_as_rfc(bloodyitem[0])
+                ET.SubElement( RSSitem, 'description').text = '<pre>'+bloodyitem[2]+'</pre>'
+                ET.SubElement( RSSitem, 'guid').text = "http://tools.ietf.org/html/"+bloodyitem[3]
+                ET.SubElement( RSSitem, 'link').text = "http://tools.ietf.org/html/"+bloodyitem[3]
+        RSSfeed = ET.ElementTree(RSSroot)
+        feed = ET.tostring(RSSroot)
+        return feed
+
+for message in mailbox.mbox('/var/spool/mail/ietf'):
+    subject = message['subject']
+    date = message['date']
+    date_epoch = int(time.mktime(time.strptime(date[0:-12], date_rfc2822))) 
+    message_id = message['Message-Id']
+    body =  message.get_payload()[0].get_payload()
+    id = subject.split(":")[1].split(".")[0]
+    tmsg.append([date_epoch,subject,body,id])
+
+tmsg.sort()
+tmsg.reverse()
+print build_rss(tmsg,100)
diff --git a/bin/rssmerge.py b/bin/rssmerge.py
new file mode 100644
index 0000000..684e234
--- /dev/null
+++ b/bin/rssmerge.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# a at foo dot be - Alexandre Dulaunoy - http://www.foo.be/cgi-bin/wiki.pl/RssAny
+#
+# rssmerge.py is a simple script to gather rss feed and merge them in reverse
+#             time order. Useful to keep track of recent events. 
+#               
+# this is still an early prototype and assume that you have full control of the 
+# remote rss feeds (if not you may have some security issues).
+# 
+# TODO : - rss 2.0 and atom output
+#        - full html output
+#
+# example of use : 
+#  python2.5 rssmerge.py --output phtml --maxitem 20 "http://www.foo.be/cgi-bin/wiki.pl?action=journal&tile=AdulauMessyDesk" 
+#   "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom" "http://a.6f2.net/cgi-bin/gitweb.cgi?
+#   p=adulau/.git;a=rss" "http://www.librarything.com/rss/reviews/adulau"  > /tmp/test.inc
+
+import feedparser
+import sys,os
+import time
+import datetime
+import md5
+from optparse import OptionParser
+import cgi
+
+feedparser.USER_AGENT = "rssmerge.py +http://www.foo.be/"
+
+def RenderMerge(itemlist,output="text"):
+
+        i = 0
+
+        if output == "text" :
+                for item in itemlist:
+                        i = i + 1
+                        # Keep consistent datetime representation if not use allitem[item[1]]['updated'] 
+                        timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]['epoch'])
+
+                        print str(i)+":"+allitem[item[1]]['title']+":"+timetuple.ctime()+":"+allitem[item[1]]['link']   
+
+                        if i == int(options.maxitem):
+                                break
+
+        if output == "phtml" :
+                print "<ul>"
+                for item in itemlist:
+                        i = i + 1
+                        # Keep consistent datetime representation if not use allitem[item[1]]['updated'] 
+                        timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]['epoch'])
+                        print "<li><a href=\""+unicode(allitem[item[1]]['link']).encode("utf-8")+"\">"+unicode(cgi.escape(allitem[item[1]]['title'])).encode("utf-8")+"</a> --- (<i>"+timetuple.ctime()+"</i>)</li>"
+                        if i == int(options.maxitem):
+                                break
+                print "</ul>"
+
+
+usage = "usage: %prog [options] url"
+parser = OptionParser(usage)
+
+parser.add_option("-m","--maxitem",dest="maxitem",help="maximum item to list in the feed, default 200")
+parser.add_option("-o","--output",dest="output",help="output format (text, phtml), default text")
+
+#2007-11-10 11:25:51
+pattern = '%Y-%m-%d %H:%M:%S'
+
+(options, args) = parser.parse_args()
+
+if options.output == None:
+        options.output = "text"
+        
+if options.maxitem == None:
+        options.maxitem = 200
+ 
+allitem = {}
+
+for url in args:
+
+        #print url
+
+        d = feedparser.parse(url)
+
+        for el in d.entries:
+
+                eldatetime = datetime.datetime.fromtimestamp(time.mktime(el.modified_parsed))
+                elepoch = int(time.mktime(time.strptime(str(eldatetime), pattern)))
+                linkkey = md5.new(el.link).hexdigest()
+                allitem[linkkey] = {}
+                allitem[linkkey]['link'] = str(el.link)
+                allitem[linkkey]['epoch'] = int(elepoch)
+                allitem[linkkey]['updated'] = el.updated
+                allitem[linkkey]['title'] = el.title
+
+ 
+
+itemlist = []
+
+for something in allitem.keys():
+        epochkeytuple = (allitem[something]['epoch'],something)
+        itemlist.append (epochkeytuple)
+
+itemlist.sort()
+itemlist.reverse()
+
+RenderMerge(itemlist,options.output)