- 01
- 02
- 03
- 04
- 05
- 06
- 07
- 08
- 09
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
# класс ЧегоТоТамПарсер(object):
def _get_images(self, soup, basedomain):
soup = soup.get_soup()
fb_images = soup.findAll(name="meta", attrs={
"property": lambda property: property and property.lower() == "og:image"})
images = (soup.body or soup).findAll(name="img", src=True)
get_source = lambda img: img.get("content") \
if img.get("content", None) \
else img.get("src") \
if img.get("src").startswith("http://") \
else "http://"+img.get("src").lstrip("/") \
if img.get("src").startswith("//") \
else urljoin("http://"+basedomain, img.get("src").lstrip("/"))
return list(set(map(get_source, fb_images + images)))
def get_images(self):
try:
if self.basedomain:
return self._get_images(self.soup, self.basedomain)
return []
except URLError:
return []