Fn to access data_files & pkg files

2021-12-05 12:09:01 +01:00
parent 11bc9f643e
commit 51f1d330a4
2 changed files with 57 additions and 15 deletions
--- a/morss/feeds.py
+++ b/morss/feeds.py
@@ -17,9 +17,7 @@
 import csv
 import json
 import os.path
 import re
 import sys
 from copy import deepcopy
 from datetime import datetime
 from fnmatch import fnmatch
@@ -30,6 +28,7 @@ from dateutil import tz
 from lxml import etree
 from .readabilite import parse as html_parse
 from .util import *
 json.encoder.c_make_encoder = None
@@ -52,7 +51,7 @@ except NameError:
 def parse_rules(filename=None):
    if not filename:
-        filename = os.path.join(os.path.dirname(__file__), 'feedify.ini')
+        filename = pkg_path('feedify.ini')
    config = RawConfigParser()
    config.read(filename)
@@ -66,18 +65,9 @@ def parse_rules(filename=None):
            # for each rule
            if rules[section][arg].startswith('file:'):
-                paths = [os.path.join(sys.prefix, 'share/morss/www', rules[section][arg][5:]),
+                file_raw = open(data_path(rules[section][arg][5:])).read()
-                    os.path.join(os.path.dirname(__file__), '../www', rules[section][arg][5:]),
+                file_clean = re.sub('<[/?]?(xsl|xml)[^>]+?>', '', file_raw)
-                    os.path.join(os.path.dirname(__file__), '../..', rules[section][arg][5:])]
+                rules[section][arg] = file_clean
                for path in paths:
                    try:
                        file_raw = open(path).read()
                        file_clean = re.sub('<[/?]?(xsl|xml)[^>]+?>', '', file_raw)
                        rules[section][arg] = file_clean
                    except IOError:
                        pass
            elif '\n' in rules[section][arg]:
                rules[section][arg] = rules[section][arg].split('\n')[1:]
@@ -810,6 +800,8 @@ class FeedJSON(Feed, ParserJSON):
 if __name__ == '__main__':
    import sys
    from . import crawler
    req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss')
--- a/morss/util.py
+++ b/morss/util.py
@@ -0,0 +1,50 @@
 # This file is part of morss
 #
 # Copyright (C) 2013-2020 pictuga <contact@pictuga.com>
 #
 # This program is free software: you can redistribute it and/or modify it under
 # the terms of the GNU Affero General Public License as published by the Free
 # Software Foundation, either version 3 of the License, or (at your option) any
 # later version.
 #
 # This program is distributed in the hope that it will be useful, but WITHOUT
 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
 # details.
 #
 # You should have received a copy of the GNU Affero General Public License along
 # with this program. If not, see <https://www.gnu.org/licenses/>.
 import os.path
 import sys
 def pkg_path(path=''):
    return os.path.join(os.path.dirname(__file__), path)
 data_path_base = None
 def data_path(path=''):
    global data_path_base
    if data_path_base is not None:
        return os.path.join(data_path_base, path)
    bases = [
        os.path.join(sys.prefix, 'share/morss/www'),
        os.path.join(pkg_path(), './../../../../share/morss/www'),
        os.path.join(pkg_path(), '../www'),
        os.path.join(pkg_path(), '../..')
    ]
    for base in bases:
        full_path = os.path.join(base, path)
        if os.path.isfile(full_path):
            data_path_base = base
            return data_path(path)
    else:
        raise IOError()