python 爬虫需要的库
pip install builtwit 该模块将URL作为参数,下载该URL并对其进行分析,然后返回该网站使 用的技术。下面是使用该模块的-一个例子。 import builtwith builtwith.parse('http://example.webscraping.com') {'web-servers': ['Nginx'], 'web-frameworks': ['Web2py', 'Twitter Bootstrap'], 'programming-languages': ['Python'], 'javascript-frameworks': ['jQuery', 'Modernizr', 'jQuery UI']}
寻找网站所有者 pip install python-whois
import whois print (whois.whois('http://example.webscraping.com/')) { "domain_name": "WEBSCRAPING.COM", "registrar": "GoDaddy.com, LLC", "whois_server": "whois.godaddy.com", "referral_url": null, "updated_date": [ "2013-08-20 08:08:30", "2013-08-20 08:08:29" ], "creation_date": "2004-06-26 18:01:19", "expiration_date": "2020-06-26 18:01:19", "name_servers": [ "NS1.WEBFACTION.COM", "NS2.WEBFACTION.COM", "NS3.WEBFACTION.COM", "NS4.WEBFACTION.COM" ], "status": [ "clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited", "clientRenewProhibited https://icann.org/epp#clientRenewProhibited", "clientTransferProhibited https://icann.org/epp#clientTransferProhibited", "clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited", "clientTransferProhibited http://www.icann.org/epp#clientTransferProhibited", "clientUpdateProhibited http://www.icann.org/epp#clientUpdateProhibited", "clientRenewProhibited http://www.icann.org/epp#clientRenewProhibited", "clientDeleteProhibited http://www.icann.org/epp#clientDeleteProhibited" ], "emails": "abuse@godaddy.com", "dnssec": "unsigned", "name": null, "org": null, "address": null, "city": null, "state": "Victoria", "zipcode": null, "country": "AU" }