From 51f1d330a4de8551063bab5aba5080f93842155c Mon Sep 17 00:00:00 2001 From: pictuga Date: Sun, 5 Dec 2021 12:09:01 +0100 Subject: [PATCH] Fn to access data_files & pkg files --- morss/feeds.py | 22 +++++++--------------- morss/util.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 15 deletions(-) create mode 100644 morss/util.py diff --git a/morss/feeds.py b/morss/feeds.py index 906db4d..b3c13c9 100644 --- a/morss/feeds.py +++ b/morss/feeds.py @@ -17,9 +17,7 @@ import csv import json -import os.path import re -import sys from copy import deepcopy from datetime import datetime from fnmatch import fnmatch @@ -30,6 +28,7 @@ from dateutil import tz from lxml import etree from .readabilite import parse as html_parse +from .util import * json.encoder.c_make_encoder = None @@ -52,7 +51,7 @@ except NameError: def parse_rules(filename=None): if not filename: - filename = os.path.join(os.path.dirname(__file__), 'feedify.ini') + filename = pkg_path('feedify.ini') config = RawConfigParser() config.read(filename) @@ -66,18 +65,9 @@ def parse_rules(filename=None): # for each rule if rules[section][arg].startswith('file:'): - paths = [os.path.join(sys.prefix, 'share/morss/www', rules[section][arg][5:]), - os.path.join(os.path.dirname(__file__), '../www', rules[section][arg][5:]), - os.path.join(os.path.dirname(__file__), '../..', rules[section][arg][5:])] - - for path in paths: - try: - file_raw = open(path).read() - file_clean = re.sub('<[/?]?(xsl|xml)[^>]+?>', '', file_raw) - rules[section][arg] = file_clean - - except IOError: - pass + file_raw = open(data_path(rules[section][arg][5:])).read() + file_clean = re.sub('<[/?]?(xsl|xml)[^>]+?>', '', file_raw) + rules[section][arg] = file_clean elif '\n' in rules[section][arg]: rules[section][arg] = rules[section][arg].split('\n')[1:] @@ -810,6 +800,8 @@ class FeedJSON(Feed, ParserJSON): if __name__ == '__main__': + import sys + from . import crawler req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss') diff --git a/morss/util.py b/morss/util.py new file mode 100644 index 0000000..7f887e8 --- /dev/null +++ b/morss/util.py @@ -0,0 +1,50 @@ +# This file is part of morss +# +# Copyright (C) 2013-2020 pictuga +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see . + +import os.path +import sys + + +def pkg_path(path=''): + return os.path.join(os.path.dirname(__file__), path) + + +data_path_base = None + + +def data_path(path=''): + global data_path_base + + if data_path_base is not None: + return os.path.join(data_path_base, path) + + bases = [ + os.path.join(sys.prefix, 'share/morss/www'), + os.path.join(pkg_path(), './../../../../share/morss/www'), + os.path.join(pkg_path(), '../www'), + os.path.join(pkg_path(), '../..') + ] + + for base in bases: + full_path = os.path.join(base, path) + + if os.path.isfile(full_path): + data_path_base = base + return data_path(path) + + else: + raise IOError()