mirror of
https://github.com/adulau/rss-tools.git
synced 2024-11-22 01:57:10 +00:00
chg: [rssmerge] Markdown output improved and code clean-up
This commit is contained in:
parent
779f49d143
commit
502018fcbb
1 changed files with 27 additions and 45 deletions
|
@ -1,21 +1,16 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# a at foo dot be - Alexandre Dulaunoy - http://www.foo.be/cgi-bin/wiki.pl/RssAny
|
# a at foo dot be - Alexandre Dulaunoy - https://git.foo.be/adulau/rss-tools
|
||||||
#
|
#
|
||||||
# rssmerge.py is a simple script to gather rss feed and merge them in reverse
|
# rssmerge.py is a simple script designed to aggregate RSS feeds and merge them in reverse chronological order.
|
||||||
# time order. Useful to keep track of recent events.
|
# It outputs the merged content in text, HTML, or Markdown format. This tool is useful for tracking recent events
|
||||||
|
# from various feeds and publishing them on your website.
|
||||||
#
|
#
|
||||||
# this is still an early prototype and assume that you have full control of the
|
# Sample usage:
|
||||||
# remote rss feeds (if not you may have some security issues).
|
|
||||||
#
|
#
|
||||||
# TODO : - rss 2.0 and atom output
|
# python3 rssmerge.py "https://git.foo.be/adulau.rss" "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom"
|
||||||
# - full html output
|
# "https://github.com/adulau.atom" -o markdown --maxitem 20
|
||||||
#
|
|
||||||
# example of use :
|
|
||||||
# python3 rssmerge.py --output phtml --maxitem 20 "http://www.foo.be/cgi-bin/wiki.pl?action=journal&tile=AdulauMessyDesk"
|
|
||||||
# "http://api.flickr.com/services/feeds/photos_public.gne?id=31797858@N00&lang=en-us&format=atom" "http://a.6f2.net/cgi-bin/gitweb.cgi?
|
|
||||||
# p=adulau/.git;a=rss" "http://www.librarything.com/rss/reviews/adulau" > /tmp/test.inc
|
|
||||||
|
|
||||||
import feedparser
|
import feedparser
|
||||||
import sys, os
|
import sys, os
|
||||||
|
@ -25,28 +20,23 @@ import hashlib
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
import html
|
import html
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
feedparser.USER_AGENT = "rssmerge.py +https://github.com/adulau/rss-tools"
|
feedparser.USER_AGENT = "rssmerge.py +https://github.com/adulau/rss-tools"
|
||||||
|
|
||||||
|
|
||||||
def RenderMerge(itemlist, output="text"):
|
def RenderMerge(itemlist, output="text"):
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
|
|
||||||
if output == "text":
|
if output == "text":
|
||||||
for item in itemlist:
|
for item in itemlist:
|
||||||
i = i + 1
|
i = i + 1
|
||||||
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
|
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
|
||||||
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
|
link = allitem[item[1]]["link"]
|
||||||
print(
|
title = html.escape(allitem[item[1]]["title"])
|
||||||
str(i)
|
timestamp = datetime.datetime.fromtimestamp(
|
||||||
+ ":"
|
allitem[item[1]]["epoch"]
|
||||||
+ allitem[item[1]]["title"]
|
).ctime()
|
||||||
+ ":"
|
print(f'{i}:{title}:{timestamp}:{link}')
|
||||||
+ timetuple.ctime()
|
|
||||||
+ ":"
|
|
||||||
+ allitem[item[1]]["link"]
|
|
||||||
)
|
|
||||||
|
|
||||||
if i == int(options.maxitem):
|
if i == int(options.maxitem):
|
||||||
break
|
break
|
||||||
|
@ -56,16 +46,12 @@ def RenderMerge(itemlist, output="text"):
|
||||||
for item in itemlist:
|
for item in itemlist:
|
||||||
i = i + 1
|
i = i + 1
|
||||||
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
|
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
|
||||||
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
|
link = allitem[item[1]]["link"]
|
||||||
print(
|
title = html.escape(allitem[item[1]]["title"])
|
||||||
'<li><a href="'
|
timestamp = datetime.datetime.fromtimestamp(
|
||||||
+ str(str(allitem[item[1]]["link"]))
|
allitem[item[1]]["epoch"]
|
||||||
+ '">'
|
).ctime()
|
||||||
+ str(str(html.escape(allitem[item[1]]["title"])))
|
print(f'<li><a href="{link}"> {title}</a> --- (<i>{timestamp}</i>)</li>')
|
||||||
+ "</a> --- (<i>"
|
|
||||||
+ timetuple.ctime()
|
|
||||||
+ "</i>)</li>"
|
|
||||||
)
|
|
||||||
if i == int(options.maxitem):
|
if i == int(options.maxitem):
|
||||||
break
|
break
|
||||||
print("</ul>")
|
print("</ul>")
|
||||||
|
@ -73,14 +59,13 @@ def RenderMerge(itemlist, output="text"):
|
||||||
if output == "markdown":
|
if output == "markdown":
|
||||||
for item in itemlist:
|
for item in itemlist:
|
||||||
i = i + 1
|
i = i + 1
|
||||||
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
|
title = html.escape(allitem[item[1]]["title"])
|
||||||
print(
|
link = allitem[item[1]]["link"]
|
||||||
"- ["
|
timestamp = datetime.datetime.fromtimestamp(
|
||||||
+ str(html.escape(allitem[item[1]]["title"]))
|
allitem[item[1]]["epoch"]
|
||||||
+ "]("
|
).ctime()
|
||||||
+ str(allitem[item[1]]["link"])
|
domain = urlparse(allitem[item[1]]["link"]).netloc
|
||||||
+ ")"
|
print(f'- {domain} [{title}]({link}) @{timestamp}')
|
||||||
)
|
|
||||||
if i == int(options.maxitem):
|
if i == int(options.maxitem):
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -118,9 +103,6 @@ pattern = "%Y-%m-%d %H:%M:%S"
|
||||||
allitem = {}
|
allitem = {}
|
||||||
|
|
||||||
for url in args:
|
for url in args:
|
||||||
|
|
||||||
# print url
|
|
||||||
|
|
||||||
d = feedparser.parse(url)
|
d = feedparser.parse(url)
|
||||||
|
|
||||||
for el in d.entries:
|
for el in d.entries:
|
||||||
|
|
Loading…
Reference in a new issue