Fn to access data_files & pkg files
continuous-integration/drone Build is running Details
continuous-integration/drone/push Build is passing Details

master
pictuga 2021-12-05 12:09:01 +01:00
parent 11bc9f643e
commit 51f1d330a4
2 changed files with 57 additions and 15 deletions

View File

@ -17,9 +17,7 @@
import csv import csv
import json import json
import os.path
import re import re
import sys
from copy import deepcopy from copy import deepcopy
from datetime import datetime from datetime import datetime
from fnmatch import fnmatch from fnmatch import fnmatch
@ -30,6 +28,7 @@ from dateutil import tz
from lxml import etree from lxml import etree
from .readabilite import parse as html_parse from .readabilite import parse as html_parse
from .util import *
json.encoder.c_make_encoder = None json.encoder.c_make_encoder = None
@ -52,7 +51,7 @@ except NameError:
def parse_rules(filename=None): def parse_rules(filename=None):
if not filename: if not filename:
filename = os.path.join(os.path.dirname(__file__), 'feedify.ini') filename = pkg_path('feedify.ini')
config = RawConfigParser() config = RawConfigParser()
config.read(filename) config.read(filename)
@ -66,18 +65,9 @@ def parse_rules(filename=None):
# for each rule # for each rule
if rules[section][arg].startswith('file:'): if rules[section][arg].startswith('file:'):
paths = [os.path.join(sys.prefix, 'share/morss/www', rules[section][arg][5:]), file_raw = open(data_path(rules[section][arg][5:])).read()
os.path.join(os.path.dirname(__file__), '../www', rules[section][arg][5:]), file_clean = re.sub('<[/?]?(xsl|xml)[^>]+?>', '', file_raw)
os.path.join(os.path.dirname(__file__), '../..', rules[section][arg][5:])] rules[section][arg] = file_clean
for path in paths:
try:
file_raw = open(path).read()
file_clean = re.sub('<[/?]?(xsl|xml)[^>]+?>', '', file_raw)
rules[section][arg] = file_clean
except IOError:
pass
elif '\n' in rules[section][arg]: elif '\n' in rules[section][arg]:
rules[section][arg] = rules[section][arg].split('\n')[1:] rules[section][arg] = rules[section][arg].split('\n')[1:]
@ -810,6 +800,8 @@ class FeedJSON(Feed, ParserJSON):
if __name__ == '__main__': if __name__ == '__main__':
import sys
from . import crawler from . import crawler
req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss') req = crawler.adv_get(sys.argv[1] if len(sys.argv) > 1 else 'https://www.nytimes.com/', follow='rss')

50
morss/util.py 100644
View File

@ -0,0 +1,50 @@
# This file is part of morss
#
# Copyright (C) 2013-2020 pictuga <contact@pictuga.com>
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
import os.path
import sys
def pkg_path(path=''):
return os.path.join(os.path.dirname(__file__), path)
data_path_base = None
def data_path(path=''):
global data_path_base
if data_path_base is not None:
return os.path.join(data_path_base, path)
bases = [
os.path.join(sys.prefix, 'share/morss/www'),
os.path.join(pkg_path(), './../../../../share/morss/www'),
os.path.join(pkg_path(), '../www'),
os.path.join(pkg_path(), '../..')
]
for base in bases:
full_path = os.path.join(base, path)
if os.path.isfile(full_path):
data_path_base = base
return data_path(path)
else:
raise IOError()