crawler: improve html iter code
	
		
			
	
		
	
	
		
	
		
			All checks were successful
		
		
	
	
		
			
				
	
				continuous-integration/drone/push Build is passing
				
			
		
		
	
	
				
					
				
			
		
			All checks were successful
		
		
	
	continuous-integration/drone/push Build is passing
				
			Ignores tags without attributes. Avoids bug with unclosed tags.
This commit is contained in:
		@@ -368,7 +368,7 @@ class BrowserlyHeaderHandler(BaseHandler):
 | 
				
			|||||||
def iter_html_tag(html_str, tag_name):
 | 
					def iter_html_tag(html_str, tag_name):
 | 
				
			||||||
    " To avoid parsing whole pages when looking for a simple tag "
 | 
					    " To avoid parsing whole pages when looking for a simple tag "
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    re_tag = r'<%s(\s*[^>])*>' % tag_name
 | 
					    re_tag = r'<%s\s+[^>]+>' % tag_name
 | 
				
			||||||
    re_attr = r'(?P<key>[^=\s]+)=[\'"](?P<value>[^\'"]+)[\'"]'
 | 
					    re_attr = r'(?P<key>[^=\s]+)=[\'"](?P<value>[^\'"]+)[\'"]'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for tag_match in re.finditer(re_tag, html_str):
 | 
					    for tag_match in re.finditer(re_tag, html_str):
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user