diff --git a/bin/rsscluster.py b/bin/rsscluster.py
index 43e4f77..b6e2d8a 100644
--- a/bin/rsscluster.py
+++ b/bin/rsscluster.py
@@ -7,81 +7,103 @@
# time interval (expressed in number of days). The maxitem is the
# number of item maximum after the clustering.
#
-# an example use is for del.icio.us where you can have a lot of bookmarks during
+# an example use is for Mastodon where you can have a lot of toots during
# one day and you want to cluster them in one single item in RSS or in (X)HTML.
-#
-# example of use :
-# python2.5 rsscluster.py --interval 5 --maxitem 20 "http://del.icio.us/rss/adulau" >adulau.xml
+#
+# example of use :
+# python3 rsscluster.py --interval 5 --maxitem 20 "https://paperbay.org/@a.rss" >adulau.xml
import feedparser
-import sys,os
+import sys, os
import time
import datetime
import xml.etree.ElementTree as ET
import hashlib
from optparse import OptionParser
-#print sys.stdout.encoding
+# print sys.stdout.encoding
version = "0.2"
-feedparser.USER_AGENT = "rsscluster.py "+ version + " +http://www.foo.be/"
+feedparser.USER_AGENT = "rsscluster.py " + version + " +http://www.foo.be/"
def date_as_rfc(value):
- return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime(value))
+ return time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime(value))
-def build_rss(myitem,maxitem):
+def build_rss(myitem, maxitem):
- RSSroot = ET.Element( 'rss', {'version':'2.0'} )
- RSSchannel = ET.SubElement( RSSroot, 'channel' )
+ RSSroot = ET.Element("rss", {"version": "2.0"})
+ RSSchannel = ET.SubElement(RSSroot, "channel")
- ET.SubElement( RSSchannel, 'title' ).text = 'RSS cluster of ' + str(url) +' per '+options.interval+' days'
- ET.SubElement( RSSchannel, 'link' ).text = str(url)
- ET.SubElement( RSSchannel, 'description' ).text = 'RSS cluster of ' + str(url) +' per '+options.interval+' days'
- ET.SubElement( RSSchannel, 'generator' ).text = 'by rsscluster.py ' + version
- ET.SubElement( RSSchannel, 'pubDate' ).text = date_as_rfc(time.time())
+ ET.SubElement(RSSchannel, "title").text = (
+ "RSS cluster of " + str(url) + " per " + str(options.interval) + " days"
+ )
+ ET.SubElement(RSSchannel, "link").text = str(url)
+ ET.SubElement(RSSchannel, "description").text = (
+ "RSS cluster of " + str(url) + " per " + str(options.interval) + " days"
+ )
+ ET.SubElement(RSSchannel, "generator").text = "by rsscluster.py " + version
+ ET.SubElement(RSSchannel, "pubDate").text = date_as_rfc(time.time())
- for bloodyitem in myitem[0:maxitem]:
+ for bloodyitem in myitem[0:maxitem]:
- RSSitem = ET.SubElement ( RSSchannel, 'item' )
- ET.SubElement( RSSitem, 'title' ).text = 'clustered data of ' + date_as_rfc(float(bloodyitem[0])) +" for "+ str(url)
- ET.SubElement( RSSitem, 'pubDate' ).text = date_as_rfc(float(bloodyitem[0]))
- ET.SubElement( RSSitem, 'description').text = bloodyitem[1]
+ RSSitem = ET.SubElement(RSSchannel, "item")
+ ET.SubElement(RSSitem, "title").text = (
+ "clustered data of "
+ + date_as_rfc(float(bloodyitem[0]))
+ + " for "
+ + str(url)
+ )
+ ET.SubElement(RSSitem, "pubDate").text = date_as_rfc(float(bloodyitem[0]))
+ ET.SubElement(RSSitem, "description").text = bloodyitem[1]
h = hashlib.md5()
- h.update(bloodyitem[1])
- ET.SubElement( RSSitem, 'guid').text = h.hexdigest()
+ h.update(bloodyitem[1].encode("utf-8"))
+ ET.SubElement(RSSitem, "guid").text = h.hexdigest()
- RSSfeed = ET.ElementTree(RSSroot)
- feed = ET.tostring(RSSroot)
- return feed
+ RSSfeed = ET.ElementTree(RSSroot)
+ feed = ET.tostring(RSSroot)
+ return feed
def complete_feed(myfeed):
- myheader = ''
- return myheader + str(myfeed)
+ myheader = ''
+ return myheader + str(myfeed)
+
def DaysInSec(val):
- return int(val)*24*60*60
+ return int(val) * 24 * 60 * 60
+
usage = "usage: %prog [options] url"
parser = OptionParser(usage)
-parser.add_option("-m","--maxitem",dest="maxitem",help="maximum item to list in the feed, default 200")
-parser.add_option("-i","--interval",dest="interval",help="time interval expressed in days, default 1 day")
+parser.add_option(
+ "-m",
+ "--maxitem",
+ dest="maxitem",
+ help="maximum item to list in the feed, default 200",
+)
+parser.add_option(
+ "-i",
+ "--interval",
+ dest="interval",
+ help="time interval expressed in days, default 1 day",
+)
-#2007-11-10 11:25:51
-pattern = '%Y-%m-%d %H:%M:%S'
+# 2007-11-10 11:25:51
+pattern = "%Y-%m-%d %H:%M:%S"
(options, args) = parser.parse_args()
-if options.interval == None:
- options.output = 1
+if options.interval is None:
+ options.interval = 1
+ options.output = 1
if options.maxitem == None:
- options.maxitem = 200
+ options.maxitem = 200
if len(args) != 1:
@@ -93,6 +115,9 @@ url = args[0]
d = feedparser.parse(url)
+if options.interval is None:
+ options.interval = 0
+
interval = DaysInSec(options.interval)
previousepoch = []
@@ -100,35 +125,52 @@ clusteredepoch = []
tcluster = []
for el in d.entries:
+ if 'modified_parsed' in el:
+ eldatetime = datetime.datetime.fromtimestamp(time.mktime(el.modified_parsed))
+ else:
+ eldatetime = datetime.datetime.fromtimestamp(time.mktime(el.published_parsed))
- eldatetime = datetime.datetime.fromtimestamp(time.mktime(el.modified_parsed))
- elepoch = int(time.mktime(time.strptime(unicode(eldatetime), pattern)))
+ elepoch = int(time.mktime(time.strptime(str(eldatetime), pattern)))
if len(previousepoch):
- #print el.link, int(previousepoch[0])-int(elepoch), interval
+ # print el.link, int(previousepoch[0])-int(elepoch), interval
if len(clusteredepoch):
value = clusteredepoch.pop()
else:
value = ""
+ if 'title' in el:
+ clusteredepoch.append(value + ' ' + el.title + "")
+ else:
+ clusteredepoch.append(value + ' ' + el.summary + "")
- clusteredepoch.append(value+" "+el.title+"")
-
-
- if not ((int(previousepoch[0])-int(elepoch)) < interval):
+ if not ((int(previousepoch[0]) - int(elepoch)) < interval):
value = clusteredepoch.pop()
starttimetuple = datetime.datetime.fromtimestamp(previousepoch[0])
endttimetuple = datetime.datetime.fromtimestamp(previousepoch.pop())
- clusteredepoch.append(value+ " from: "+unicode(starttimetuple.ctime())+" to: "+unicode(endttimetuple.ctime()))
- startdatelist = unicode(previousepoch[0]),unicode(clusteredepoch[len(clusteredepoch)-1])
- tcluster.append(startdatelist)
- del previousepoch[0:len(previousepoch)]
- del clusteredepoch[0:len(clusteredepoch)]
+ clusteredepoch.append(
+ value
+ + " from: "
+ + str(starttimetuple.ctime())
+ + " to: "
+ + str(endttimetuple.ctime())
+ )
+ if previousepoch:
+ startdatelist = str(previousepoch[0]), str(
+ clusteredepoch[len(clusteredepoch) - 1]
+ )
+ tcluster.append(startdatelist)
+ del previousepoch[0 : len(previousepoch)]
+ del clusteredepoch[0 : len(clusteredepoch)]
else:
- clusteredepoch.append(" "+el.title+"")
+ if 'title' in el:
+ clusteredepoch.append(' ' + el.title + "")
+ else:
+ clusteredepoch.append(' ' + el.summary + "")
+
previousepoch.append(elepoch)
# if last cluster list was not complete, we add the time period information.
@@ -136,13 +178,16 @@ if len(previousepoch):
value = clusteredepoch.pop()
starttimetuple = datetime.datetime.fromtimestamp(previousepoch[0])
endttimetuple = datetime.datetime.fromtimestamp(previousepoch.pop())
- clusteredepoch.append(value+ " from: "+unicode(starttimetuple.ctime())+" to: "+unicode(endttimetuple.ctime()))
- del previousepoch[0:len(previousepoch)]
+ clusteredepoch.append(
+ value
+ + " from: "
+ + str(starttimetuple.ctime())
+ + " to: "
+ + str(endttimetuple.ctime())
+ )
+ del previousepoch[0 : len(previousepoch)]
tcluster.sort()
tcluster.reverse()
-print complete_feed(build_rss(tcluster,int(options.maxitem)))
-
-
-
+print(complete_feed(build_rss(tcluster, int(options.maxitem))))