python半自动爬虫简介
一,爬虫步骤
1,找到想要爬取数据的网址,得到源代码
<!DOCTYPE HTML>
<html>
<head>
<meta charset="gbk"/>
<meta name="renderer" content="webkit"/>
<meta http-equiv="X-UA-Compatible" content="IE=Edge"/>
<link rel="dns-prefetch" href="//g.alicdn.com"/>
<link rel="dns-prefetch" href="//img.alicdn.com"/>
<link rel="dns-prefetch" href="//gm.mmstat.com"/>
<link rel="dns-prefetch" href="//ald.taobao.com"/>
<link rel="dns-prefetch" href="//bar.tmall.com"/>
<meta name="spm-id" content="a3204.7933263"/>
<title>-天猫超市-天猫Tmall.com-上天猫,就购了-理想生活上天猫</title>
<link rel="shortcut icon" href="//img.alicdn.com/tfs/TB1XlF3RpXXXXc6XXXXXXXXXXXX-16-16.png" type="image/x-icon"/>
<script>
window.g_config = window.g_config || {};
window.g_config.devId = "pc";
window.g_config.headerVersion = '1.0.0';
window.g_config.pageId = 'chaoshi';
window.g_config.bizId = 'chaoshi';
window.g_config.isMarket = true;
window.g_config.loadModulesLater = true;
window.g_config.sl = 'vm';
</script>
<style>
#footer .footer-info {width: auto !important }
#footer .tmall-intro { width: 990px !important }
</style>
<script>
g_config.notInitSearchBar=true;
g_config.closeMiniBag=true;
g_config.tmallConfig=g_config.tmallConfig||{};
g_config.tmallConfig.commonJS=g_config.tmallConfig.commonJS||{};
g_config.tmallConfig.commonJS.miniBag={off:true};
</script>