mirror of
https://github.com/adulau/wikirc2text.git
synced 2024-12-22 00:35:59 +00:00
Initial version of wikirc2text
This commit is contained in:
commit
359fabc38e
2 changed files with 142 additions and 0 deletions
56
README.md
Normal file
56
README.md
Normal file
|
@ -0,0 +1,56 @@
|
|||
wikirc2text
|
||||
===========
|
||||
|
||||
wikirc2text is a simple Python script to dump MediaWiki
|
||||
RecentChanges as single text line. The script is able to
|
||||
keep the state of the already seen lines avoiding to
|
||||
output the same changes again.
|
||||
|
||||
The idea behind the script is to have a simple command-line
|
||||
interface to dump Wiki RecentChanges feed into other program
|
||||
like sendxmpp (http://sendxmpp.platon.sk/). As I didn't
|
||||
want to flood the XMPP conference room with the same output,
|
||||
I made wikirc2text to track the already seen lines.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
Usage: wikirc2text.py url(s)
|
||||
|
||||
Options:
|
||||
-h, --help show this help message and exit
|
||||
-s, --state keep state of existing rcline seen and don't output
|
||||
them
|
||||
-c STATESEC, --cleanstate=STATESEC
|
||||
expire states existing more than number of seconds
|
||||
specified
|
||||
|
||||
### Sample usage
|
||||
|
||||
|
||||
% python wikirc2text.py "http://en.ekopedia.org/w/index.php?title=Special:RecentChanges&feed=atom" --state | head -4
|
||||
Ekopedia - Recent changes [en]
|
||||
http://en.ekopedia.org/w/index.php?title=Ecological_rucksack&diff=8004&oldid=prev by Wibil @ Sun, 09 Jan 2011 08:09:03 +0000
|
||||
http://en.ekopedia.org/w/index.php?title=User:Wibil/to_do_list&diff=8003&oldid=prev by Wibil @ Sun, 09 Jan 2011 06:31:28 +0000
|
||||
http://en.ekopedia.org/User:Social_Networking by Social Networking @ Sat, 08 Jan 2011 20:19:42 +0000
|
||||
|
||||
If you do a second request, just after. As there is no changes, there is no output.
|
||||
If you don't use the --state option, you'll get all the latest changes without checking
|
||||
the state cache.
|
||||
|
||||
% python wikirc2text.py "http://en.ekopedia.org/w/index.php?title=Special:RecentChanges&feed=atom" --state
|
||||
|
||||
You can clear the state following an interval specified in seconds.
|
||||
|
||||
% python wikirc2text.py -c 60
|
||||
29 states deleted
|
||||
|
||||
### Usage with sendxmpp
|
||||
|
||||
% python wikirc2text.py --state "http://www.hackerspace.lu/w/index.php?title=Special:RecentChanges&feed=atom" | head -5 |sendxmpp -u yourbotname -p yourbotpassword -r yourbotressource -j an.xmpp.server -t -c aconference@conference.somewhere
|
||||
|
||||
### Software required
|
||||
|
||||
* Python 2.4 and up
|
||||
* Universal Feed Parser - http://www.feedparser.org/
|
||||
|
86
wikirc2text.py
Normal file
86
wikirc2text.py
Normal file
|
@ -0,0 +1,86 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# wikirc2text - dump MediaWiki RecentChanges into text line while keeping
|
||||
# the state of the already seen lines.
|
||||
#
|
||||
# Copyright (C) 2011 Alexandre Dulaunoy (a AT foo.be)
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
import feedparser
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
import time
|
||||
import datetime
|
||||
|
||||
feedparser.USER_AGENT = "wikirc2text.py"
|
||||
usage = "usage: %s url(s)" % sys.argv[0]
|
||||
parser = OptionParser(usage)
|
||||
|
||||
parser.add_option("-s", "--state", action="store_true" ,dest="state", help="keep state of existing rcline seen and don't output them", default=False)
|
||||
parser.add_option("-c", "--cleanstate", dest="statesec", help="expire states existing more than number of seconds specified")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if (options.state):
|
||||
import shelve
|
||||
import hashlib
|
||||
s = shelve.open("wikircstate.shelve")
|
||||
|
||||
if len(args) < 1 and not options.statesec:
|
||||
print usage
|
||||
exit()
|
||||
|
||||
def core ():
|
||||
for url in args:
|
||||
d = feedparser.parse(url)
|
||||
print d.feed.title
|
||||
for e in d['entries']:
|
||||
nicedate = time.strftime("%a, %d %b %Y %H:%M:%S +0000", e.updated_parsed)
|
||||
rcline = e.links[0]['href'] + " by " +e.author_detail['name'] +" @ "+ nicedate
|
||||
if (options.state):
|
||||
sh = hashlib.md5()
|
||||
sh.update(rcline.encode('utf-8'))
|
||||
sh.digest()
|
||||
shkey = sh.hexdigest()
|
||||
if not (s.has_key(shkey)):
|
||||
s[shkey] = time.mktime(datetime.datetime.now().timetuple())
|
||||
print rcline
|
||||
else:
|
||||
print rcline
|
||||
|
||||
|
||||
if (options.state):
|
||||
s.close()
|
||||
|
||||
def expirecache (seconds):
|
||||
import shelve
|
||||
s = shelve.open("wikircstate.shelve")
|
||||
sdeleted = 0
|
||||
for k,v in s.iteritems():
|
||||
cepoch = time.mktime(datetime.datetime.now().timetuple())
|
||||
timedelta = cepoch-v
|
||||
if timedelta > float(seconds):
|
||||
del s[k]
|
||||
sdeleted=sdeleted+1
|
||||
|
||||
s.close()
|
||||
print "%s states deleted" % str(sdeleted)
|
||||
|
||||
if not options.statesec:
|
||||
core()
|
||||
else:
|
||||
expirecache(options.statesec)
|
||||
|
Loading…
Reference in a new issue