zs

Zeitungsschau rss to email converter
git clone git://r-36.net/zs
Log | Files | Refs | LICENSE

commit 86b129e4386789f5331765d7b96098e6c2c7bde7
parent 4010b280ba3871965b852aad19c26f9331e861bd
Author: Christoph Lohmann <20h@r-36.net>
Date:   Sun, 15 Apr 2018 11:35:39 +0200

Allow relative URIs in article linkes.

+ Add feeduri for better filtering handling.

Diffstat:
zeitungsschau/feed.py | 11++++++++---
zeitungsschau/feeddb.py | 4++--
zeitungsschau/feedemail.py | 21++++++++++++++++++++-
3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/zeitungsschau/feed.py b/zeitungsschau/feed.py @@ -414,9 +414,14 @@ def fetch(uri): ftype = "twtxt" if ftype == "xml": - return (rcode, parseatomfeed(fval)) + rval = (rcode, parseatomfeed(fval)) elif ftype == "twtxt": - return (rcode, parsetwtxtfeed(fval.decode("utf-8"), uri)) + rval = (rcode, parsetwtxtfeed(fval.decode("utf-8"), uri)) else: - return (rcode, parsejsonfeed(fval.decode("utf-8"))) + rval = (rcode, parsejsonfeed(fval.decode("utf-8"))) + + if rval[1] != None: + rval[1]["feeduri"] = uri + + return rval diff --git a/zeitungsschau/feeddb.py b/zeitungsschau/feeddb.py @@ -215,7 +215,7 @@ class feeddb(object): feed["articles"] = history[-2048:] for metakey in ("link", "title", "updated", "author", \ - "email"): + "email", "feeduri"): if metakey in curfeed: feed[metakey] = curfeed[metakey] @@ -232,7 +232,7 @@ class feeddb(object): return rfeed for metakey in ("link", "title", "updated", "author", \ - "email", "toemail"): + "email", "toemail", "feeduri"): if metakey in feed: rfeed[metakey] = feed[metakey] diff --git a/zeitungsschau/feedemail.py b/zeitungsschau/feedemail.py @@ -13,6 +13,7 @@ from email.header import Header import time import subprocess import lxml.html +import urllib.parse import html2text @@ -96,9 +97,27 @@ def send(feed, to, smtphost="localhost", smtpport=None, ssl="False",\ msg["Subject"] = subject if "link" in article: - msg["X-RSS-URL"] = article["link"] + if "://" not in article["link"]: + aurl = urllib.parse.urljoin(feed["feeduri"],\ + article["link"]) + if "gopher://" in aurl: + urls = urllib.parse.urlparse(aurl, \ + allow_fragments=False) + if urls.path.startswith("/0"): + aurl = "%s://%s%s" % \ + (urls.scheme, urls.netloc, \ + urls.path.replace(\ + "/0", "/1", 1)) + if len(urls.query) > 0: + aurl = "%s?%s" % \ + (aurl, urls.query) + else: + aurl = article["link"] + msg["X-RSS-URL"] = aurl if "link" in feed: msg["X-RSS-Feed"] = feed["link"] + else: + msg["X-RSS-Feed"] = feed["feeduri"] if "id" in article: msg["X-RSS-ID"] = article["id"] if "uuid" in article: