1. Python / Говнокод #25447

    0

    1. 01
    2. 02
    3. 03
    4. 04
    5. 05
    6. 06
    7. 07
    8. 08
    9. 09
    10. 10
    11. 11
    12. 12
    13. 13
    14. 14
    15. 15
    16. 16
    17. 17
    18. 18
    19. 19
    20. 20
    21. 21
    22. 22
    23. 23
    24. 24
    25. 25
    26. 26
    27. 27
    28. 28
    29. 29
    30. 30
    31. 31
    32. 32
    33. 33
    34. 34
    35. 35
    36. 36
    37. 37
    38. 38
    39. 39
    40. 40
    41. 41
    42. 42
    43. 43
    44. 44
    45. 45
    46. 46
    47. 47
    48. 48
    49. 49
    50. 50
    51. 51
    52. 52
    53. 53
    54. 54
    55. 55
    56. 56
    import sqlite3
    from bs4 import BeautifulSoup
    import requests, hashlib
    from io import open as iopen
    from urlparse import urlsplit
    
    def md5sum(filename, blocksize=65536):
    	hash = hashlib.md5()
    	with open(filename, "rb") as f:
    		for block in iter(lambda: f.read(blocksize), b""):
    			hash.update(block)
    	return hash.hexdigest()
    
    def parse_image_url(url):
    	html_doc = requests.get(url).text
    	soup = BeautifulSoup(html_doc, 'html.parser')
    	first = soup.find(class_='postContainer')
    	two = first.find_all('img')
    	requests_image(two[1].get('src'))
    
    def unic_check(file_name):
    	check_sum = md5sum(file_name)
    	if c.execute("SELECT * FROM sums WHERE sum = '%s'" % check_sum) != None:
    		cur.close()
    		conn.close()
    		return
    	else:
    		c.execute("INSERT INTO sums VALUES (%s)" % check_sum)
    		c.commit()
    		cur.close()
    		conn.close()
    		return
    
    def requests_image(file_url):
    	suffix_list = ['jpg', 'gif', 'png', 'tif', 'svg',]
    	file_name =  urlsplit(file_url)[2].split('/')[-1]
    	file_suffix = file_name.split('.')[1]
    	i = requests.get(file_url)
    	if file_suffix in suffix_list and i.status_code == requests.codes.ok:
    		with iopen(file_name, 'wb') as file:
    			file.write(i.content)
    	else:
    		return False
    	unic_check(file_name)
    
    def main():
    	Anime_types = ['http://anime.reactor.cc/tag/Anime+%D0%9D%D1%8F%D1%88%D0%B8', 'http://anime.reactor.cc/tag/Anime+Cosplay', 'http://anime.reactor.cc/tag/Anime+%D0%9A%D0%BE%D0%BC%D0%B8%D0%BA%D1%81%D1%8B', 'http://anime.reactor.cc/tag/Anime+Art']
    	global conn
    	global c
    	conn = sqlite3.connect('anime.db')
    	c = conn.cursor()
    	for x in Anime_types:
    		parse_image_url(x)
    		
    if __name__ == "__main__":
    	main()

    Запостил: marataziat, 13 Марта 2019

    Комментарии (20) RSS

    Добавить комментарий