commit 359fabc38e6c6aae87ae82d5b9e166ed38b3cc19 Author: Alexandre Dulaunoy Date: Sun Jan 9 18:43:18 2011 +0100 Initial version of wikirc2text diff --git a/README.md b/README.md new file mode 100644 index 0000000..45df70a --- /dev/null +++ b/README.md @@ -0,0 +1,56 @@ +wikirc2text +=========== + +wikirc2text is a simple Python script to dump MediaWiki +RecentChanges as single text line. The script is able to +keep the state of the already seen lines avoiding to +output the same changes again. + +The idea behind the script is to have a simple command-line +interface to dump Wiki RecentChanges feed into other program +like sendxmpp (http://sendxmpp.platon.sk/). As I didn't +want to flood the XMPP conference room with the same output, +I made wikirc2text to track the already seen lines. + +Usage +----- + + Usage: wikirc2text.py url(s) + + Options: + -h, --help show this help message and exit + -s, --state keep state of existing rcline seen and don't output + them + -c STATESEC, --cleanstate=STATESEC + expire states existing more than number of seconds + specified + +### Sample usage + + + % python wikirc2text.py "http://en.ekopedia.org/w/index.php?title=Special:RecentChanges&feed=atom" --state | head -4 + Ekopedia - Recent changes [en] + http://en.ekopedia.org/w/index.php?title=Ecological_rucksack&diff=8004&oldid=prev by Wibil @ Sun, 09 Jan 2011 08:09:03 +0000 + http://en.ekopedia.org/w/index.php?title=User:Wibil/to_do_list&diff=8003&oldid=prev by Wibil @ Sun, 09 Jan 2011 06:31:28 +0000 + http://en.ekopedia.org/User:Social_Networking by Social Networking @ Sat, 08 Jan 2011 20:19:42 +0000 + +If you do a second request, just after. As there is no changes, there is no output. +If you don't use the --state option, you'll get all the latest changes without checking +the state cache. + + % python wikirc2text.py "http://en.ekopedia.org/w/index.php?title=Special:RecentChanges&feed=atom" --state + +You can clear the state following an interval specified in seconds. + + % python wikirc2text.py -c 60 + 29 states deleted + +### Usage with sendxmpp + + % python wikirc2text.py --state "http://www.hackerspace.lu/w/index.php?title=Special:RecentChanges&feed=atom" | head -5 |sendxmpp -u yourbotname -p yourbotpassword -r yourbotressource -j an.xmpp.server -t -c aconference@conference.somewhere + +### Software required + +* Python 2.4 and up +* Universal Feed Parser - http://www.feedparser.org/ + diff --git a/wikirc2text.py b/wikirc2text.py new file mode 100644 index 0000000..59cf643 --- /dev/null +++ b/wikirc2text.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +# +# wikirc2text - dump MediaWiki RecentChanges into text line while keeping +# the state of the already seen lines. +# +# Copyright (C) 2011 Alexandre Dulaunoy (a AT foo.be) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import feedparser +import sys +from optparse import OptionParser +import time +import datetime + +feedparser.USER_AGENT = "wikirc2text.py" +usage = "usage: %s url(s)" % sys.argv[0] +parser = OptionParser(usage) + +parser.add_option("-s", "--state", action="store_true" ,dest="state", help="keep state of existing rcline seen and don't output them", default=False) +parser.add_option("-c", "--cleanstate", dest="statesec", help="expire states existing more than number of seconds specified") + +(options, args) = parser.parse_args() + +if (options.state): + import shelve + import hashlib + s = shelve.open("wikircstate.shelve") + +if len(args) < 1 and not options.statesec: + print usage + exit() + +def core (): + for url in args: + d = feedparser.parse(url) + print d.feed.title + for e in d['entries']: + nicedate = time.strftime("%a, %d %b %Y %H:%M:%S +0000", e.updated_parsed) + rcline = e.links[0]['href'] + " by " +e.author_detail['name'] +" @ "+ nicedate + if (options.state): + sh = hashlib.md5() + sh.update(rcline.encode('utf-8')) + sh.digest() + shkey = sh.hexdigest() + if not (s.has_key(shkey)): + s[shkey] = time.mktime(datetime.datetime.now().timetuple()) + print rcline + else: + print rcline + + + if (options.state): + s.close() + +def expirecache (seconds): + import shelve + s = shelve.open("wikircstate.shelve") + sdeleted = 0 + for k,v in s.iteritems(): + cepoch = time.mktime(datetime.datetime.now().timetuple()) + timedelta = cepoch-v + if timedelta > float(seconds): + del s[k] + sdeleted=sdeleted+1 + + s.close() + print "%s states deleted" % str(sdeleted) + +if not options.statesec: + core() +else: + expirecache(options.statesec) +