腾讯微博模拟登陆+数据抓取(java实现)
不多说,贴出相关代码。 参数实体: package token.def; import java.io.Serializable; import java.util.Properties; public class TLoginParams implements Serializable { private static final long serialVersionUID = 6120319409538285515L; private String saltUin; private String dataRedirect; private String loginSig; private String loginUrl; private String imgURl; private String imgCookie; private boolean isLogin = true; private Properties prop; public String getSaltUin() { return saltUin; } public void setSaltUin(String saltUin) { this.saltUin = saltUin; } public String getDataRedirect() { return dataRedirect; } public void setDataRedirect(String dataRedirect) { this.dataRedirect = dataRedirect; } public String getLoginSig() { return loginSig; } public void setLoginSig(String loginSig) { this.loginSig = loginSig; } public String getLoginUrl() { return loginUrl; } public void setLoginUrl(String loginUrl) { this.loginUrl = loginUrl; } public String getImgURl() { return imgURl; } public void setImgURl(String imgURl) { this.imgURl = imgURl; } public String getImgCookie() { return imgCookie; } public void setImgCookie(String imgCookie) { this.imgCookie = imgCookie; } public boolean isLogin() { return isLogin; } public void setLogin(boolean isLogin) { this.isLogin = isLogin; } public Properties getProp() { return prop; } public void setProp(Properties prop) { this.prop = prop; } @Override public String toString() { return "TLoginParams [saltUin=" + saltUin + ", dataRedirect=" + dataRedirect + ", loginSig=" + loginSig + ", loginUrl=" + loginUrl + ", imgURl=" + imgURl + ", imgCookie=" + imgCookie + ", isLogin=" + isLogin + ", prop=" + prop + "]"; } } 加密实现: package token.exe; import java.io.ByteArrayOutputStream; import java.io.UnsupportedEncodingException; import java.security.MessageDigest; public class TencentWeiboEncryption { private static final String HEXSTRING = "0123456789ABCDEF"; /** * 获取指定字符串的md5值 * @param originalText * @return * @throws Exception */ private static String md5(String originalText) throws Exception { byte buf[] = originalText.getBytes("ISO-8859-1"); StringBuffer hexString = new StringBuffer(); String result = ""; String digit = ""; try { MessageDigest algorithm = MessageDigest.getInstance("MD5"); algorithm.reset(); algorithm.update(buf); byte[] digest = algorithm.digest(); for (int i = 0; i < digest.length; i++) { digit = Integer.toHexString(0xFF & digest[i]); if (digit.length() == 1) { digit = "0" + digit; } hexString.append(digit); } result = hexString.toString(); } catch (Exception ex) { result = ""; } return result.toUpperCase(); } /** * 将16进制编码转换为相应的ASCII字符串 * @param md5str * @return * @throws UnsupportedEncodingException */ private static String hexchar2bin(String md5str) throws UnsupportedEncodingException { ByteArrayOutputStream baos = new ByteArrayOutputStream(md5str.length() / 2); for (int i = 0; i < md5str.length(); i = i + 2) { baos.write((HEXSTRING.indexOf(md5str.charAt(i)) << 4 | HEXSTRING.indexOf(md5str.charAt(i + 1)))); } return new String(baos.toByteArray(), "ISO-8859-1"); } /** * 获取加密后的密码 * @param qq * @param password * @param verifycode * @return * @throws Exception */ public static String getPassword(String qq, String password, String verifycode) throws Exception { String P = hexchar2bin(md5(password)); String U = md5(P + hexchar2bin(qq.replace("\\x", "").toUpperCase())); String V = md5(U + verifycode.toUpperCase()); return V; } } 微博登陆实现: package token.exe; import java.io.ByteArrayInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.security.KeyManagementException; import java.security.NoSuchAlgorithmException; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Properties; import java.util.Scanner; import javax.net.ssl.SSLContext; import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; import org.apache.http.Header; import org.apache.http.HttpHeaders; import org.apache.http.HttpHost; import org.apache.http.HttpResponse; import org.apache.http.HttpVersion; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.params.CookiePolicy; import org.apache.http.client.params.HttpClientParams; import org.apache.http.conn.params.ConnRoutePNames; import org.apache.http.conn.routing.HttpRoute; import org.apache.http.conn.scheme.PlainSocketFactory; import org.apache.http.conn.scheme.Scheme; import org.apache.http.conn.scheme.SchemeRegistry; import org.apache.http.conn.ssl.SSLSocketFactory; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager; import org.apache.http.message.BasicHeader; import org.apache.http.params.CoreConnectionPNames; import org.apache.http.params.HttpParams; import org.apache.http.params.HttpProtocolParams; import org.apache.http.params.SyncBasicHttpParams; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import token.TencentWeiboOAuth; import token.def.TLoginParams; import com.tencent.weibo.beans.RouteCfg; public class TencentWeiboLoginer { private DefaultHttpClient httpClient; //默认连接配置参数 private static final int CONNECT_TIME_OUT = 5000; private static final int SOCKET_TIME_OUT = 5000; private static final int MAX_CONNECTIONS_PRE_HOST = 20; private static final int MAX_TOTAL_CONNECTIONS = 200; public TencentWeiboLoginer() { this(CONNECT_TIME_OUT, SOCKET_TIME_OUT, MAX_CONNECTIONS_PRE_HOST, MAX_TOTAL_CONNECTIONS, null, null); } public TencentWeiboLoginer(int connectTimeOut, int socketTimeOut, int maxConnectionsPreHost, int maxTotalConnections, List<RouteCfg> routeCfgs, HttpHost proxy) { //注册ssl协议 SSLContext ssl = null; SchemeRegistry schemeRegistry = null; X509TrustManager x509TrustManager = null; SSLSocketFactory sslSocketFactory = null; try { ssl = SSLContext.getInstance("TLS"); x509TrustManager = new X509TrustManager() { @Override public X509Certificate[] getAcceptedIssuers() { // TODO Auto-generated method stub return null; } @Override public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException { // TODO Auto-generated method stub } @Override public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException { // TODO Auto-generated method stub } }; ssl.init(null, new TrustManager[]{x509TrustManager}, null); sslSocketFactory = new SSLSocketFactory(ssl); sslSocketFactory.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER); //注册http和https协议 schemeRegistry = new SchemeRegistry(); schemeRegistry.register(new Scheme("https", 443, sslSocketFactory)); // schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory())); } catch (NoSuchAlgorithmException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (KeyManagementException e) { // TODO Auto-generated catch block e.printStackTrace(); } //配置客户端链接管理类 ThreadSafeClientConnManager connManager = new ThreadSafeClientConnManager(schemeRegistry); connManager.setDefaultMaxPerRoute(maxConnectionsPreHost); connManager.setMaxTotal(maxTotalConnections); //配置http请求连接参数 HttpParams httpParams = new SyncBasicHttpParams(); httpParams.setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, connectTimeOut); httpParams.setParameter(CoreConnectionPNames.SO_TIMEOUT, socketTimeOut); //http协议参数配置 HttpProtocolParams.setVersion(httpParams, HttpVersion.HTTP_1_1); HttpProtocolParams.setUseExpectContinue(httpParams, false); //启用cookie HttpClientParams.setCookiePolicy(httpParams, CookiePolicy.BROWSER_COMPATIBILITY); //对特定ip端口修改最大连接数 if (routeCfgs != null) { for (RouteCfg routeCfg : routeCfgs) { HttpHost host = new HttpHost(routeCfg.getHost(), routeCfg.getPort()); connManager.setMaxForRoute(new HttpRoute(host), routeCfg.getMaxConnetions()); } } //初始化httpClient httpClient = new DefaultHttpClient(connManager,httpParams); //添加headers List<Header> headers = new ArrayList<Header>(); headers.add(new BasicHeader(HttpHeaders.ACCEPT, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")); headers.add(new BasicHeader(HttpHeaders.ACCEPT_LANGUAGE, "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3")); headers.add(new BasicHeader(HttpHeaders.ACCEPT_CHARSET, "UTF-8")); headers.add(new BasicHeader(HttpHeaders.USER_AGENT, "Mozilla/5.0 (Windows NT 5.1; rv:25.0) Gecko/20100101 Firefox/25.0")); headers.add(new BasicHeader(HttpHeaders.CONNECTION, "keep-alive")); headers.add(new BasicHeader("X-Forwarded-For", "192.168.0.1")); headers.add(new BasicHeader("Client-IP", "192.168.0.1")); headers.add(new BasicHeader("API-RemoteIP", "192.168.0.1")); httpClient.getParams().setParameter("http.default-headers", headers); //设置代理 if (proxy != null) { httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy); } } /** * 模拟腾讯微博登陆 * @return code值 */ public TLoginParams doLogin(String username, String password) { Properties properties = initProperties(); String clientID = properties.getProperty("client_id"); String redirectURI = properties.getProperty("redirect_uri"); HashMap<String, String> urlMap = getUrlMap(clientID, redirectURI); String dataRedirect = urlMap.get("data-redirect"); HashMap<String, String> loginInfoMap = preLogin(urlMap); String loginSig = loginInfoMap.get("login_sig"); String loginUrl = loginInfoMap.get("login_url"); HashMap<String, String> checkMap = isHasVC(dataRedirect, username, loginSig, loginUrl); String isHasVC = checkMap.get("isHasVC"); String vc = checkMap.get("vc"); String saltUin = checkMap.get("saltUin"); TLoginParams tLoginParams = new TLoginParams(); if (Integer.parseInt(isHasVC) != 0) { tLoginParams.setDataRedirect(dataRedirect); tLoginParams.setLoginSig(loginSig); tLoginParams.setLoginUrl(loginUrl); tLoginParams.setSaltUin(saltUin); tLoginParams.setImgURl(getVCode(username)); return tLoginParams; } String checkSigUrl = finalLogin(vc, saltUin, dataRedirect, username, password, loginSig, loginUrl); Properties result = authorize(loginUrl, checkSigUrl); tLoginParams.setProp(result); return tLoginParams; } /** * 有验证码时验证登陆 * @param vc * @param saltUin * @param dataRedirect * @param username * @param password * @param loginSig * @param loginUrl * @return */ public TLoginParams doLoginByVC(String vc, String saltUin, String dataRedirect, String username, String password, String loginSig, String loginUrl) { TLoginParams tLoginParams = new TLoginParams(); String checkSigUrl = finalLogin(vc, saltUin, dataRedirect, username, password, loginSig, loginUrl); if (checkSigUrl.equals("您输入的验证码不正确,请重新输入。")) { tLoginParams.setLogin(false); return tLoginParams; } Properties prop = authorize(loginUrl, checkSigUrl); tLoginParams.setProp(prop); return tLoginParams; } /** * 初始化登陆,获取含有sessionkey的url提交链接 * @param clientID 应用ID * @param redirectURI 应用回调地址 * @return */ private HashMap<String, String> getUrlMap(String clientID, String redirectURI) { String url = "https://open.t.qq.com/cgi-bin/oauth2/authorize?" + "client_id=" + clientID + "&response_type=code" + "&redirect_uri=" + redirectURI + "&forcelogin=true"; Header[] headers = new BasicHeader[]{ new BasicHeader(HttpHeaders.HOST, "open.t.qq.com") }; String htmlDatas = httpGetDatas(url, headers); HashMap<String, String> map = new HashMap<String, String>(); String data_redirect = null; String data_proxy = null; Document document = Jsoup.parse(htmlDatas); Element element = document.getElementsByTag("noscript").first(); data_redirect = element.attr("data-redirect"); map.put("data-redirect", data_redirect); data_proxy = element.attr("data-proxy"); map.put("data-proxy", data_proxy); return map; } /** * 预登陆腾讯微博,获取login_sig * @param urlMap 初始化登陆返回的urlMap * @return */ private HashMap<String, String> preLogin(HashMap<String, String> urlMap) { String s_url_encode = null; String proxy_url_encode = null; String script = null; try { s_url_encode = URLEncoder.encode(urlMap.get("data-redirect"), "UTF-8"); proxy_url_encode = URLEncoder.encode(urlMap.get("data-proxy"), "UTF-8"); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } String url = "https://ui.ptlogin2.qq.com/cgi-bin/login?appid=46000101" + "&s_url=" + s_url_encode + "&proxy_url=" + proxy_url_encode + "&f_url=loginerroralert" + "&style=13" + "&daid=6" + "&pt_no_auth=1" + "&hide_close_icon=1" + "&link_target=blank" + "&target=blank" + "&hide_title_bar=1" + "&no_drop_domain=1" + "&dummy=1" + "&bgcolor=ffffff" + "&r=" + Math.random(); Header[] headers = new BasicHeader[]{ new BasicHeader(HttpHeaders.HOST, "ui.ptlogin2.qq.com") }; String htmlDatas = httpGetDatas(url, headers); Document document = Jsoup.parse(htmlDatas); Element headElement = document.getElementsByTag("head").first(); Element element = headElement.getElementsByTag("script").first(); script = element.html(); String login_sig = script.substring(script.indexOf("login_sig:"), script.indexOf("\",clientip")); String login_sig_key = login_sig.substring(login_sig.indexOf("\"") + 1); HashMap<String, String> loginMap = new HashMap<String, String>(); loginMap.put("login_sig", login_sig_key); loginMap.put("login_url", url); return loginMap; } /** * 检查预登陆时是否需要验证码 * @param dataRedirect 初始化登陆返回的map * @param username 用户名 * @param loginSig TODO * @param loginUrl TODO * @return */ private HashMap<String, String> isHasVC(String dataRedirect, String username, String loginSig, String loginUrl){ String url = null; try { url = "https://ssl.ptlogin2.qq.com/check?" + "regmaster=" + "&uin=" + username + "&appid=46000101" + "&js_ver=10052" + "&js_type=1" + "&login_sig=" + loginSig + "&u1=" + URLEncoder.encode(dataRedirect, "UTF-8") + "&r=" + Math.random(); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } Header[] headers = new BasicHeader[]{ new BasicHeader(HttpHeaders.REFERER, loginUrl) }; String htmlDatas = httpGetDatas(url, headers); String str = htmlDatas.substring(htmlDatas.indexOf("(") + 1, htmlDatas.indexOf(");")); String[] strs = str.split(","); String isHasVC = strs[0].substring(strs[0].indexOf("'") + 1, strs[0].lastIndexOf("'")); HashMap<String,String> checkVCMap = new HashMap<String, String>(); checkVCMap.put("isHasVC", isHasVC); String vc = strs[1].substring(strs[1].indexOf("'") + 1, strs[1].lastIndexOf("'")); checkVCMap.put("vc", vc); String saltUin = strs[2].substring(strs[2].indexOf("'") + 1, strs[2].lastIndexOf("'")); checkVCMap.put("saltUin", saltUin); return checkVCMap; } /** * 获取当前用户登陆所需要的验证码 * @param username 用户名 * @return */ public String getVCode(String username) { String imageUrl = "https://ssl.captcha.qq.com/getimage?" + "uin=" +username + "&aid=46000101" + "&" + Math.random(); return imageUrl; } /** * 保存验证码 * @param url 验证码链接 */ public void saveVCodeImg(String url) { HttpGet getImages = new HttpGet(url); HttpResponse response = null; try { response = httpClient.execute(getImages); byte[] imageBytes = EntityUtils.toByteArray(response.getEntity()); FileOutputStream fileWrite = new FileOutputStream("vc.jpg"); fileWrite.write(imageBytes); fileWrite.close(); } catch (ClientProtocolException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 模拟最终登陆 * @param vc 验证码信息 * @param dataRedirect 链接信息 * @param username 用户名 * @param password 密码 * @param loginSig TODO * @param loginUrl TODO * @param saltUin TODO * @return */ private String finalLogin(String vc, String saltUin, String dataRedirect, String username, String password, String loginSig, String loginUrl){ String p = null; try { p = TencentWeiboEncryption.getPassword(saltUin, password, vc); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } String url = null; try { url = "https://ssl.ptlogin2.qq.com/login?" + "u=" + URLEncoder.encode(username, "UTF-8") + "&p=" + p + "&verifycode=" + vc + "&aid=46000101" + "&u1=" + URLEncoder.encode(dataRedirect, "UTF-8") + "&h=1" + "&ptredirect=1" + "&ptlang=2052" + "&daid=6" + "&from_ui=1" + "&dumy=" + "&low_login_enable=0" + "®master=" + "&fp=loginerroralert" + "&action=2-20-" + new Date().getTime() + "&mibao_css=" + "&t=1" + "&g=1" + "&js_ver=10052" + "&js_type=1" + "&login_sig=" + loginSig + "&pt_rsa=0"; } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } Header[] headers = new BasicHeader[]{ new BasicHeader(HttpHeaders.REFERER, loginUrl) }; String htmlDatas = httpGetDatas(url, headers); String str = htmlDatas.substring(htmlDatas.indexOf("(") + 1, htmlDatas.indexOf(");")); String[] strs = str.split(","); String checkUrl = strs[2].substring(strs[2].indexOf("'") + 1, strs[2].lastIndexOf("'")); String loginResult = strs[4].substring(strs[4].indexOf("'") + 1, strs[4].lastIndexOf("'")); if (loginResult.equals("登录成功!")) { return checkUrl; } return loginResult; } /** * 获取最终授权 * @param loginUrl * @param checkSigUrl * @return */ private Properties authorize(String loginUrl, String checkSigUrl) { Properties prop = null; if (checkSigUrl != null) { Header[] headers = new BasicHeader[]{ new BasicHeader(HttpHeaders.REFERER, loginUrl) }; String htmlDatas = httpGetDatas(checkSigUrl, headers); Document document = Jsoup.parse(htmlDatas); Element element = document.getElementsByTag("meta").first(); String content = element.attr("content");; String subContent = content.substring(content.indexOf("?") + 1); String propStr = subContent.replace("&", "\n"); prop = new Properties(); InputStream stream = new ByteArrayInputStream(propStr.getBytes()); try { prop.load(stream); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return prop; } /** * 提交URL,并获取页面数据(GET方式) * @param url 请求页面 * @param headers http请求header * @return */ private String httpGetDatas(String url,Header[] headers) { String response =null; HttpResponse httpResponse = null; if (url == null) { throw new NullPointerException("URL is null"); } HttpGet httpGet = new HttpGet(url); httpGet.setHeaders(headers); try { httpResponse = httpClient.execute(httpGet); response = EntityUtils.toString(httpResponse.getEntity()); } catch (ClientProtocolException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return response; } /** * 初始化配置信息 * @return */ public Properties initProperties() { Properties properties = new Properties(); InputStream inputStream = Thread.currentThread(). getContextClassLoader().getResourceAsStream("cfg.properties"); try { properties.load(inputStream); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return properties; } public static void main(String[] args) { TencentWeiboLoginer loginer = new TencentWeiboLoginer(); TLoginParams tLoginParams = loginer.doLogin("",""); //有验证码时 if (tLoginParams.getProp() == null) { String saltUin = tLoginParams.getSaltUin(); String dataRedirect = tLoginParams.getDataRedirect(); String loginSig = tLoginParams.getLoginSig(); String loginUrl = tLoginParams.getLoginUrl(); String imgUrl = tLoginParams.getImgURl(); //要返回的验证码 System.err.println(imgUrl); //测试再次获取验证码 imgUrl = loginer.getVCode(""); //保存验证码(用于测试并查看验证码) loginer.saveVCodeImg(imgUrl); Scanner input = new Scanner(System.in); String vc = input.nextLine(); TLoginParams loginresult =loginer.doLoginByVC(vc, saltUin, dataRedirect, "", "", loginSig, loginUrl); //如果验证码录入错误,则重新获取并返回验证码 if (!loginresult.isLogin()) { System.err.println("验证码错误!重新录入"); imgUrl = loginer.getVCode(""); loginer.saveVCodeImg(imgUrl); Scanner input2 = new Scanner(System.in); String vc1 = input2.nextLine(); Properties codeProp = loginer.doLoginByVC(vc1, saltUin, dataRedirect, "", "", loginSig, loginUrl).getProp(); System.out.println(TencentWeiboOAuth.getOAuthV2Instance(codeProp)); }else { //验证码正确则直接输出结果 Properties codeProp = loginresult.getProp(); System.out.println(TencentWeiboOAuth.getOAuthV2Instance(codeProp)); } }else { //无验证码时 Properties codeProp = tLoginParams.getProp(); System.out.println(TencentWeiboOAuth.getOAuthV2Instance(codeProp)); } } } 上述代码完整模拟了腾讯微博的登陆过程,并最终获得授权
参考地址:http://www.cnblogs.com/zhengbing/p/3459249.html
作者:EliteQing
出处:http://www.cnblogs.com/liinux/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利。
欢迎加入网络爬虫QQ群:322937592 ;数据分析&网络爬虫
网络爬虫模拟登录开源项目ghost-login:ghost-login
微信订阅号:网络爬虫AI数据分析【WebCrawlerAIDA】