mirror of
https://github.com/adulau/rss-tools.git
synced 2024-11-07 12:06:25 +00:00
fix: [rssmerge] Improved the summary text when it's coming from RSS feed
without Title - Title is built from the summary text and the HTML code is removed - An option has been added to set the limit of the summary
This commit is contained in:
parent
5957d24d85
commit
85381a7673
1 changed files with 17 additions and 8 deletions
|
@ -24,6 +24,7 @@ import datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
import html
|
import html
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
feedparser.USER_AGENT = "rssmerge.py +http://www.foo.be/"
|
feedparser.USER_AGENT = "rssmerge.py +http://www.foo.be/"
|
||||||
|
|
||||||
|
@ -37,7 +38,6 @@ def RenderMerge(itemlist, output="text"):
|
||||||
i = i + 1
|
i = i + 1
|
||||||
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
|
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
|
||||||
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
|
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
|
||||||
|
|
||||||
print(
|
print(
|
||||||
str(i)
|
str(i)
|
||||||
+ ":"
|
+ ":"
|
||||||
|
@ -92,12 +92,21 @@ parser.add_option(
|
||||||
"-m",
|
"-m",
|
||||||
"--maxitem",
|
"--maxitem",
|
||||||
dest="maxitem",
|
dest="maxitem",
|
||||||
|
default=200,
|
||||||
help="maximum item to list in the feed, default 200",
|
help="maximum item to list in the feed, default 200",
|
||||||
)
|
)
|
||||||
|
parser.add_option(
|
||||||
|
"-s",
|
||||||
|
"--summarysize",
|
||||||
|
dest="summarysize",
|
||||||
|
default=60,
|
||||||
|
help="maximum size of the summary if a title is not present",
|
||||||
|
)
|
||||||
parser.add_option(
|
parser.add_option(
|
||||||
"-o",
|
"-o",
|
||||||
"--output",
|
"--output",
|
||||||
dest="output",
|
dest="output",
|
||||||
|
default="text",
|
||||||
help="output format (text, phtml, markdown), default text",
|
help="output format (text, phtml, markdown), default text",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -106,12 +115,6 @@ pattern = "%Y-%m-%d %H:%M:%S"
|
||||||
|
|
||||||
(options, args) = parser.parse_args()
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
if options.output == None:
|
|
||||||
options.output = "text"
|
|
||||||
|
|
||||||
if options.maxitem == None:
|
|
||||||
options.maxitem = 200
|
|
||||||
|
|
||||||
allitem = {}
|
allitem = {}
|
||||||
|
|
||||||
for url in args:
|
for url in args:
|
||||||
|
@ -137,7 +140,13 @@ for url in args:
|
||||||
allitem[linkkey]["link"] = str(el.link)
|
allitem[linkkey]["link"] = str(el.link)
|
||||||
allitem[linkkey]["epoch"] = int(elepoch)
|
allitem[linkkey]["epoch"] = int(elepoch)
|
||||||
allitem[linkkey]["updated"] = el.updated
|
allitem[linkkey]["updated"] = el.updated
|
||||||
allitem[linkkey]["title"] = el.title
|
if "title" in el:
|
||||||
|
allitem[linkkey]["title"] = html.unescape(el.title)
|
||||||
|
else:
|
||||||
|
cleantext = BeautifulSoup(el.summary, "lxml").text
|
||||||
|
allitem[linkkey]["title"] = cleantext[: options.summarysize]
|
||||||
|
|
||||||
|
print(allitem[linkkey]["title"])
|
||||||
|
|
||||||
|
|
||||||
itemlist = []
|
itemlist = []
|
||||||
|
|
Loading…
Reference in a new issue