fix: [rssmerge] Improved the summary text when it's coming from RSS feed

without Title

- Title is built from the summary text and the HTML code is removed
- An option has been added to set the limit of the summary
This commit is contained in:
Alexandre Dulaunoy 2024-02-11 11:54:31 +01:00
parent 5957d24d85
commit 85381a7673
Signed by: adulau
GPG key ID: 09E2CD4944E6CBCD

View file

@ -24,6 +24,7 @@ import datetime
import hashlib
from optparse import OptionParser
import html
from bs4 import BeautifulSoup
feedparser.USER_AGENT = "rssmerge.py +http://www.foo.be/"
@ -37,7 +38,6 @@ def RenderMerge(itemlist, output="text"):
i = i + 1
# Keep consistent datetime representation if not use allitem[item[1]]['updated']
timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"])
print(
str(i)
+ ":"
@ -92,12 +92,21 @@ parser.add_option(
"-m",
"--maxitem",
dest="maxitem",
default=200,
help="maximum item to list in the feed, default 200",
)
parser.add_option(
"-s",
"--summarysize",
dest="summarysize",
default=60,
help="maximum size of the summary if a title is not present",
)
parser.add_option(
"-o",
"--output",
dest="output",
default="text",
help="output format (text, phtml, markdown), default text",
)
@ -106,12 +115,6 @@ pattern = "%Y-%m-%d %H:%M:%S"
(options, args) = parser.parse_args()
if options.output == None:
options.output = "text"
if options.maxitem == None:
options.maxitem = 200
allitem = {}
for url in args:
@ -137,7 +140,13 @@ for url in args:
allitem[linkkey]["link"] = str(el.link)
allitem[linkkey]["epoch"] = int(elepoch)
allitem[linkkey]["updated"] = el.updated
allitem[linkkey]["title"] = el.title
if "title" in el:
allitem[linkkey]["title"] = html.unescape(el.title)
else:
cleantext = BeautifulSoup(el.summary, "lxml").text
allitem[linkkey]["title"] = cleantext[: options.summarysize]
print(allitem[linkkey]["title"])
itemlist = []