mirror of
https://github.com/adulau/wikirc2text.git
synced 2024-12-22 08:46:02 +00:00
Initial version of wikirc2text
This commit is contained in:
commit
359fabc38e
2 changed files with 142 additions and 0 deletions
56
README.md
Normal file
56
README.md
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
wikirc2text
|
||||||
|
===========
|
||||||
|
|
||||||
|
wikirc2text is a simple Python script to dump MediaWiki
|
||||||
|
RecentChanges as single text line. The script is able to
|
||||||
|
keep the state of the already seen lines avoiding to
|
||||||
|
output the same changes again.
|
||||||
|
|
||||||
|
The idea behind the script is to have a simple command-line
|
||||||
|
interface to dump Wiki RecentChanges feed into other program
|
||||||
|
like sendxmpp (http://sendxmpp.platon.sk/). As I didn't
|
||||||
|
want to flood the XMPP conference room with the same output,
|
||||||
|
I made wikirc2text to track the already seen lines.
|
||||||
|
|
||||||
|
Usage
|
||||||
|
-----
|
||||||
|
|
||||||
|
Usage: wikirc2text.py url(s)
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
-s, --state keep state of existing rcline seen and don't output
|
||||||
|
them
|
||||||
|
-c STATESEC, --cleanstate=STATESEC
|
||||||
|
expire states existing more than number of seconds
|
||||||
|
specified
|
||||||
|
|
||||||
|
### Sample usage
|
||||||
|
|
||||||
|
|
||||||
|
% python wikirc2text.py "http://en.ekopedia.org/w/index.php?title=Special:RecentChanges&feed=atom" --state | head -4
|
||||||
|
Ekopedia - Recent changes [en]
|
||||||
|
http://en.ekopedia.org/w/index.php?title=Ecological_rucksack&diff=8004&oldid=prev by Wibil @ Sun, 09 Jan 2011 08:09:03 +0000
|
||||||
|
http://en.ekopedia.org/w/index.php?title=User:Wibil/to_do_list&diff=8003&oldid=prev by Wibil @ Sun, 09 Jan 2011 06:31:28 +0000
|
||||||
|
http://en.ekopedia.org/User:Social_Networking by Social Networking @ Sat, 08 Jan 2011 20:19:42 +0000
|
||||||
|
|
||||||
|
If you do a second request, just after. As there is no changes, there is no output.
|
||||||
|
If you don't use the --state option, you'll get all the latest changes without checking
|
||||||
|
the state cache.
|
||||||
|
|
||||||
|
% python wikirc2text.py "http://en.ekopedia.org/w/index.php?title=Special:RecentChanges&feed=atom" --state
|
||||||
|
|
||||||
|
You can clear the state following an interval specified in seconds.
|
||||||
|
|
||||||
|
% python wikirc2text.py -c 60
|
||||||
|
29 states deleted
|
||||||
|
|
||||||
|
### Usage with sendxmpp
|
||||||
|
|
||||||
|
% python wikirc2text.py --state "http://www.hackerspace.lu/w/index.php?title=Special:RecentChanges&feed=atom" | head -5 |sendxmpp -u yourbotname -p yourbotpassword -r yourbotressource -j an.xmpp.server -t -c aconference@conference.somewhere
|
||||||
|
|
||||||
|
### Software required
|
||||||
|
|
||||||
|
* Python 2.4 and up
|
||||||
|
* Universal Feed Parser - http://www.feedparser.org/
|
||||||
|
|
86
wikirc2text.py
Normal file
86
wikirc2text.py
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# wikirc2text - dump MediaWiki RecentChanges into text line while keeping
|
||||||
|
# the state of the already seen lines.
|
||||||
|
#
|
||||||
|
# Copyright (C) 2011 Alexandre Dulaunoy (a AT foo.be)
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
import feedparser
|
||||||
|
import sys
|
||||||
|
from optparse import OptionParser
|
||||||
|
import time
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
feedparser.USER_AGENT = "wikirc2text.py"
|
||||||
|
usage = "usage: %s url(s)" % sys.argv[0]
|
||||||
|
parser = OptionParser(usage)
|
||||||
|
|
||||||
|
parser.add_option("-s", "--state", action="store_true" ,dest="state", help="keep state of existing rcline seen and don't output them", default=False)
|
||||||
|
parser.add_option("-c", "--cleanstate", dest="statesec", help="expire states existing more than number of seconds specified")
|
||||||
|
|
||||||
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
|
if (options.state):
|
||||||
|
import shelve
|
||||||
|
import hashlib
|
||||||
|
s = shelve.open("wikircstate.shelve")
|
||||||
|
|
||||||
|
if len(args) < 1 and not options.statesec:
|
||||||
|
print usage
|
||||||
|
exit()
|
||||||
|
|
||||||
|
def core ():
|
||||||
|
for url in args:
|
||||||
|
d = feedparser.parse(url)
|
||||||
|
print d.feed.title
|
||||||
|
for e in d['entries']:
|
||||||
|
nicedate = time.strftime("%a, %d %b %Y %H:%M:%S +0000", e.updated_parsed)
|
||||||
|
rcline = e.links[0]['href'] + " by " +e.author_detail['name'] +" @ "+ nicedate
|
||||||
|
if (options.state):
|
||||||
|
sh = hashlib.md5()
|
||||||
|
sh.update(rcline.encode('utf-8'))
|
||||||
|
sh.digest()
|
||||||
|
shkey = sh.hexdigest()
|
||||||
|
if not (s.has_key(shkey)):
|
||||||
|
s[shkey] = time.mktime(datetime.datetime.now().timetuple())
|
||||||
|
print rcline
|
||||||
|
else:
|
||||||
|
print rcline
|
||||||
|
|
||||||
|
|
||||||
|
if (options.state):
|
||||||
|
s.close()
|
||||||
|
|
||||||
|
def expirecache (seconds):
|
||||||
|
import shelve
|
||||||
|
s = shelve.open("wikircstate.shelve")
|
||||||
|
sdeleted = 0
|
||||||
|
for k,v in s.iteritems():
|
||||||
|
cepoch = time.mktime(datetime.datetime.now().timetuple())
|
||||||
|
timedelta = cepoch-v
|
||||||
|
if timedelta > float(seconds):
|
||||||
|
del s[k]
|
||||||
|
sdeleted=sdeleted+1
|
||||||
|
|
||||||
|
s.close()
|
||||||
|
print "%s states deleted" % str(sdeleted)
|
||||||
|
|
||||||
|
if not options.statesec:
|
||||||
|
core()
|
||||||
|
else:
|
||||||
|
expirecache(options.statesec)
|
||||||
|
|
Loading…
Reference in a new issue