39b0a1d7cc 
					 
					
						
						
							
							setup.py: fix deps & files  
						
						 
						
						
						
						
					 
					
						2020-04-14 17:36:42 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65803b328d 
					 
					
						
						
							
							New git url and updated date in provided index.html  
						
						 
						
						
						
						
					 
					
						2020-04-13 15:30:32 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e6b7c0eb33 
					 
					
						
						
							
							Fix app definition for uwsgi  
						
						 
						
						
						
						
					 
					
						2020-04-13 15:30:09 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						67c096ad5b 
					 
					
						
						
							
							feeds: add fake path to default html parser  
						
						 
						
						... 
						
						
						
						Without it, some websites were accidentally matching it (false positives) 
						
						
					 
					
						2020-04-12 13:00:56 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f018437544 
					 
					
						
						
							
							crawler: make mysql backend thread safe  
						
						 
						
						
						
						
					 
					
						2020-04-12 12:53:05 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8e5e8d24a4 
					 
					
						
						
							
							Timezone fixes  
						
						 
						
						
						
						
					 
					
						2020-04-10 20:33:59 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ee78a7875a 
					 
					
						
						
							
							morss: focus on the most recent feed items  
						
						 
						
						
						
						
					 
					
						2020-04-10 16:08:13 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e7b9d95ee 
					 
					
						
						
							
							feeds: properly use html template  
						
						 
						
						
						
						
					 
					
						2020-04-09 20:00:51 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						987a719c4e 
					 
					
						
						
							
							feeds: try all parsers regardless of contenttype  
						
						 
						
						... 
						
						
						
						Turns out some websites send the wrong contenttype (json for html, html for xml, etc.) 
						
						
					 
					
						2020-04-09 19:17:51 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						47b33f4baa 
					 
					
						
						
							
							morss: specify server output encoding  
						
						 
						
						
						
						
					 
					
						2020-04-09 19:10:45 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c7f512583 
					 
					
						
						
							
							feeds: handle several errors  
						
						 
						
						
						
						
					 
					
						2020-04-09 19:09:10 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a32f5a8536 
					 
					
						
						
							
							readabilite: add debug option (also used by :get)  
						
						 
						
						
						
						
					 
					
						2020-04-09 19:08:13 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						63a06524b7 
					 
					
						
						
							
							morss: various encoding fixes  
						
						 
						
						
						
						
					 
					
						2020-04-09 19:06:51 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b0f80c6d3c 
					 
					
						
						
							
							morss: fix csv output encoding  
						
						 
						
						
						
						
					 
					
						2020-04-09 19:05:50 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						78cea10ead 
					 
					
						
						
							
							morss: replace :getpage with :get  
						
						 
						
						... 
						
						
						
						Also provides readabilite debugging 
						
						
					 
					
						2020-04-09 18:43:20 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e5a82ff1f4 
					 
					
						
						
							
							crawler: drop auto-referer  
						
						 
						
						... 
						
						
						
						Was solving some issues. But creating even more issues. 
						
						
					 
					
						2020-04-07 10:39:21 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f3d1f92b39 
					 
					
						
						
							
							Detect encoding everytime  
						
						 
						
						
						
						
					 
					
						2020-04-07 10:38:36 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7691df5257 
					 
					
						
						
							
							Use wrapper for http calls  
						
						 
						
						
						
						
					 
					
						2020-04-07 10:30:17 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0ae0dbc175 
					 
					
						
						
							
							README: mention csv output  
						
						 
						
						
						
						
					 
					
						2020-04-07 09:24:32 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f1d0431e68 
					 
					
						
						
							
							morss: drop :html, replaced with :reader  
						
						 
						
						... 
						
						
						
						README updated accordingly 
						
						
					 
					
						2020-04-07 09:23:29 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a09831415f 
					 
					
						
						
							
							feeds: fix bug when mimetype matches nothing  
						
						 
						
						
						
						
					 
					
						2020-04-06 18:53:07 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bfad6b7a4a 
					 
					
						
						
							
							readabilite: clean before counting  
						
						 
						
						... 
						
						
						
						To remove links which are not kept anyway 
						
						
					 
					
						2020-04-06 16:55:39 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6b8c3e51e7 
					 
					
						
						
							
							readabilite: fix threshold feature  
						
						 
						
						... 
						
						
						
						Awkward typo... 
						
						
					 
					
						2020-04-06 16:52:06 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc9e425247 
					 
					
						
						
							
							readabilite: don't clean-out the top 10% nodes  
						
						 
						
						... 
						
						
						
						Loosen up the code once again to limit over-kill 
						
						
					 
					
						2020-04-06 14:26:28 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f48e18bb1 
					 
					
						
						
							
							readabilite: put scores directly in html node  
						
						 
						
						... 
						
						
						
						Probably slower but makes code somewhat cleaner... 
						
						
					 
					
						2020-04-06 14:21:41 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						31cac921c7 
					 
					
						
						
							
							README: remove ref to iTunes  
						
						 
						
						
						
						
					 
					
						2020-04-05 22:20:33 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a82ec96eb7 
					 
					
						
						
							
							Delete feedify.py leftover code  
						
						 
						
						... 
						
						
						
						iTunes integration untested, unreliable and not working... 
						
						
					 
					
						2020-04-05 22:16:52 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aad2398e69 
					 
					
						
						
							
							feeds: turns out lxml.etree doesn't have drop_tag  
						
						 
						
						
						
						
					 
					
						2020-04-05 21:50:38 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eeac630855 
					 
					
						
						
							
							crawler: add more "realistic" headers  
						
						 
						
						
						
						
					 
					
						2020-04-05 21:11:57 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e136b0feb2 
					 
					
						
						
							
							readabilite: loosen the slayer  
						
						 
						
						... 
						
						
						
						Previous impl. lead to too many empty results 
						
						
					 
					
						2020-04-05 20:47:30 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6cf32af6c0 
					 
					
						
						
							
							readabilite: also use BS  
						
						 
						
						
						
						
					 
					
						2020-04-05 20:46:42 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						568e7d7dd2 
					 
					
						
						
							
							feeds: make BS's output bytes for lxml's sake  
						
						 
						
						
						
						
					 
					
						2020-04-05 20:46:04 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3617f86e9d 
					 
					
						
						
							
							morss: make cgi_encore more robust  
						
						 
						
						
						
						
					 
					
						2020-04-05 16:43:11 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d90756b337 
					 
					
						
						
							
							morss: drop 'keep' option  
						
						 
						
						... 
						
						
						
						Because the Firefox behaviour it is working around is no longer in use 
						
						
					 
					
						2020-04-05 16:37:27 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						40c69f17d2 
					 
					
						
						
							
							feeds: parse html with BS  
						
						 
						
						... 
						
						
						
						More robust & to make it consistent with :getpage 
						
						
					 
					
						2020-04-05 16:12:41 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						99461ea185 
					 
					
						
						
							
							crawler: fix var name issues (private_cache)  
						
						 
						
						
						
						
					 
					
						2020-04-05 16:11:36 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bf86c1e962 
					 
					
						
						
							
							crawler: make AutoUA match http(s) type  
						
						 
						
						
						
						
					 
					
						2020-04-05 16:07:51 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d20f6237bd 
					 
					
						
						
							
							crawler: replace ContentNegoHandler with AlternateHandler  
						
						 
						
						... 
						
						
						
						More basic. Sends the same headers no matter what. Make requests more "replicable".
Also, drop "text/xml" from RSS contenttype, too broad, matches garbage 
						
						
					 
					
						2020-04-05 16:05:59 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a4d68d72c 
					 
					
						
						
							
							crawler: drop 'basic' toggle  
						
						 
						
						... 
						
						
						
						Can't even remember the use case 
						
						
					 
					
						2020-04-05 16:03:06 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e6811138fd 
					 
					
						
						
							
							morss: use redirected url in :getpage  
						
						 
						
						... 
						
						
						
						Still have to find how to do the same thing with feeds... 
						
						
					 
					
						2020-04-04 20:04:57 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						35b702fffd 
					 
					
						
						
							
							morss: default values for feed creation  
						
						 
						
						
						
						
					 
					
						2020-04-04 19:39:32 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4a88886767 
					 
					
						
						
							
							morss: get_page to act as a basic proxy (for iframes)  
						
						 
						
						
						
						
					 
					
						2020-04-04 16:37:15 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1653394cf7 
					 
					
						
						
							
							morss: cgi_dispatcher to be able to create extra functions  
						
						 
						
						
						
						
					 
					
						2020-04-04 16:35:16 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a8a90cf414 
					 
					
						
						
							
							morss: move url/options parsing to own function  
						
						 
						
						... 
						
						
						
						For future re-use 
						
						
					 
					
						2020-04-04 16:33:52 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bdbaf0f8a7 
					 
					
						
						
							
							morss/cgi: fix handling of special chars in url  
						
						 
						
						
						
						
					 
					
						2020-04-04 16:21:37 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d0e447a2a6 
					 
					
						
						
							
							ItemFix: clean up Pocket links  
						
						 
						
						
						
						
					 
					
						2020-04-04 16:20:39 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e6817e01b4 
					 
					
						
						
							
							sheet.xsl: set font to "sans"  
						
						 
						
						... 
						
						
						
						Browsers don't all have the same default font. Overriding for consistency 
						
						
					 
					
						2020-04-03 17:47:19 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7c3091d64c 
					 
					
						
						
							
							morss: code spacing  
						
						 
						
						... 
						
						
						
						One of those commits that make me feel useful 
						
						
					 
					
						2020-03-21 23:41:46 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						37b4e144a9 
					 
					
						
						
							
							morss: small fixes  
						
						 
						
						... 
						
						
						
						Includes dropping off ftp support 
						
						
					 
					
						2020-03-21 23:30:18 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bd4b7b5bb2 
					 
					
						
						
							
							morss: convert HTML feeds to XML ones for completeness  
						
						 
						
						
						
						
					 
					
						2020-03-21 23:27:42 +01:00