readabilite: comment the clean_html function
This commit is contained in:
		@@ -112,6 +112,7 @@ def clean_html(root):
 | 
				
			|||||||
        # Step 1. Do we keep the node?
 | 
					        # Step 1. Do we keep the node?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if item.tag in tags_junk:
 | 
					        if item.tag in tags_junk:
 | 
				
			||||||
 | 
					            # remove shitty tags
 | 
				
			||||||
            item.getparent().remove(item)
 | 
					            item.getparent().remove(item)
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -119,15 +120,18 @@ def clean_html(root):
 | 
				
			|||||||
            and len(list(item.iterchildren())) <= 1 \
 | 
					            and len(list(item.iterchildren())) <= 1 \
 | 
				
			||||||
            and not (item.text or '').strip() \
 | 
					            and not (item.text or '').strip() \
 | 
				
			||||||
            and not (item.tail or '').strip():
 | 
					            and not (item.tail or '').strip():
 | 
				
			||||||
 | 
					            # remove div with only one item inside
 | 
				
			||||||
            item.drop_tag()
 | 
					            item.drop_tag()
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        class_id = item.get('class', '') + item.get('id', '')
 | 
					        class_id = item.get('class', '') + item.get('id', '')
 | 
				
			||||||
        if regex_bad.match(class_id) is not None:
 | 
					        if regex_bad.match(class_id) is not None:
 | 
				
			||||||
 | 
					            # remove shitty class/id
 | 
				
			||||||
            item.getparent().remove(item)
 | 
					            item.getparent().remove(item)
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if isinstance(item, lxml.html.HtmlComment):
 | 
					        if isinstance(item, lxml.html.HtmlComment):
 | 
				
			||||||
 | 
					            # remove comments
 | 
				
			||||||
            item.getparent().remove(item)
 | 
					            item.getparent().remove(item)
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user