From 85381a7673d673a4dbcfa887e60f7e9730a314e1 Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Sun, 11 Feb 2024 11:54:31 +0100 Subject: [PATCH] fix: [rssmerge] Improved the summary text when it's coming from RSS feed without Title - Title is built from the summary text and the HTML code is removed - An option has been added to set the limit of the summary --- bin/rssmerge.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/bin/rssmerge.py b/bin/rssmerge.py index 4ddf6bf..990070c 100644 --- a/bin/rssmerge.py +++ b/bin/rssmerge.py @@ -24,6 +24,7 @@ import datetime import hashlib from optparse import OptionParser import html +from bs4 import BeautifulSoup feedparser.USER_AGENT = "rssmerge.py +http://www.foo.be/" @@ -37,7 +38,6 @@ def RenderMerge(itemlist, output="text"): i = i + 1 # Keep consistent datetime representation if not use allitem[item[1]]['updated'] timetuple = datetime.datetime.fromtimestamp(allitem[item[1]]["epoch"]) - print( str(i) + ":" @@ -92,12 +92,21 @@ parser.add_option( "-m", "--maxitem", dest="maxitem", + default=200, help="maximum item to list in the feed, default 200", ) +parser.add_option( + "-s", + "--summarysize", + dest="summarysize", + default=60, + help="maximum size of the summary if a title is not present", +) parser.add_option( "-o", "--output", dest="output", + default="text", help="output format (text, phtml, markdown), default text", ) @@ -106,12 +115,6 @@ pattern = "%Y-%m-%d %H:%M:%S" (options, args) = parser.parse_args() -if options.output == None: - options.output = "text" - -if options.maxitem == None: - options.maxitem = 200 - allitem = {} for url in args: @@ -137,7 +140,13 @@ for url in args: allitem[linkkey]["link"] = str(el.link) allitem[linkkey]["epoch"] = int(elepoch) allitem[linkkey]["updated"] = el.updated - allitem[linkkey]["title"] = el.title + if "title" in el: + allitem[linkkey]["title"] = html.unescape(el.title) + else: + cleantext = BeautifulSoup(el.summary, "lxml").text + allitem[linkkey]["title"] = cleantext[: options.summarysize] + + print(allitem[linkkey]["title"]) itemlist = []