今日头条反反爬思路总结
前言
Python 爬虫相关技术实操,爬取今日头条(www.toutiao.com)相关思路总结
一、分析步骤
1.1、内容刷新机制分析
浏览器访问域名 www.toutiao.com 进入头条主页, 滚动窗口查看更多内容, 新内容动态加载的, 打开浏览器开发者工具(F12
)监控 Elements 面板下内容, 动态生成标签如下(头条主页内容采用异步请求动态加载)
1.2、网络请求分析
重新加载页面资源(Ctrl + F5
), 切换到 Network 面板下 XHR 选项卡查看所有的 XHR 类型的请求
初始的所有 XHR 类型请求:
向下滚动窗口, 直到加载新的内容停止滚动, 此时出现新的 XHR 类型请求:
对比两个相似的请求:
点击以查看其详细信息 -> Headers 中包含了:
- Request Headers(请求头)
- Query String Parameters(查询字符串参数)
- ?min_behot_time=156955... 如下:
- ?max_behot_time=0.... 如下:
对比后发现这两个请求只有部分请求参数是变动的:
- min_behot_time / max_behot_time
- as
- cp
接下来的分析着重考虑这三个参数的生成机制
1.3、参数生成机制分析
Preview(预览响应内容)查看响应数据和其结构
?min_behot_time=0.... 如下:
min_behot_time=... 这个请求获得的响应中包含 ?max_behot_time=... 这个请求需要的 max_behot_time 请求参数 "next: {max_behot_time: 1569556156}"
分析 min_behot_time=... 这个请求可能为获取到初始的动态加载内容
?min_behot_time=0.... 如下:
?max_behot_time=0.... 如下:
利用正则全局搜索参数 as 和 cp,发现一个名为 index.d337d64118bf9b864485.js 的文件中存在匹配项,继而发现自定制的加密算法:
接下来采用 Debug 调试 JavaScript 代码, 了解上述参数的具体生成机制
1.4、Debug 调试网页 JavaScript 代码
首先找到 index.d337d64118bf9b864485.js 文件:
打开文件, 接着找到加密函数 a() 添加断点(了解其机制):
接下来要找到 ?min_behot_time=... 和 ?max_behot_time=... 这两个异步请求是如何发起的
Initiator 标记请求是由哪个对象或进程发起的(请求源):
- 跳转到 Sources 面板:
- 查看格式化后的代码, 发现发起请求的外层函数 l(t) , 添加断点进行调试(了解其机制):
- 清空所有 XHR 类型的请求:
清空后:
- 重新加载页面(
Ctrl + F5
), 在第一个断点处暂停:
- 这时查看所有的 XHR 类型的请求:
证明了上面的分析结果: 『分析 min_behot_time=... 这个请求可能为获取到初始的动态加载内容』
1.4.1、Debug
- 获取请求路径:
- 获取动态参数 as 和 cp:
变量 i 赋值时 (0, o.default)(t) 发生跳转, 相当于执行了 k(t):
调用 k(t) 后一系列连锁调用:
分析上面的代码发现其非常类似 MD5 信息摘要算法:
可查询 md5.js 进行比对:
/*
* A JavaScript implementation of the RSA Data Security, Inc. MD5 Message
* Digest Algorithm, as defined in RFC 1321.
* Version 2.1 Copyright (C) Paul Johnston 1999 - 2002.
* Other contributors: Greg Holt, Andrew Kepert, Ydnar, Lostinet
* Distributed under the BSD License
* See http://pajhome.org.uk/crypt/md5 for more info.
*/
/*
* Configurable variables. You may need to tweak these to be compatible with
* the server-side, but the defaults work in most cases.
*/
var hexcase = 0; /* hex output format. 0 - lowercase; 1 - uppercase */
var b64pad = ""; /* base-64 pad character. "=" for strict RFC compliance */
var chrsz = 8; /* bits per input character. 8 - ASCII; 16 - Unicode */
/*
* These are the functions you'll usually want to call
* They take string arguments and return either hex or base-64 encoded strings
*/
function hex_md5(s){ return binl2hex(core_md5(str2binl(s), s.length * chrsz));}
function b64_md5(s){ return binl2b64(core_md5(str2binl(s), s.length * chrsz));}
function str_md5(s){ return binl2str(core_md5(str2binl(s), s.length * chrsz));}
function hex_hmac_md5(key, data) { return binl2hex(core_hmac_md5(key, data)); }
function b64_hmac_md5(key, data) { return binl2b64(core_hmac_md5(key, data)); }
function str_hmac_md5(key, data) { return binl2str(core_hmac_md5(key, data)); }
/*
* Perform a simple self-test to see if the VM is working
*/
function md5_vm_test()
{
return hex_md5("abc") == "900150983cd24fb0d6963f7d28e17f72";
}
/*
* Calculate the MD5 of an array of little-endian words, and a bit length
*/
function core_md5(x, len)
{
/* append padding */
x[len >> 5] |= 0x80 << ((len) % 32);
x[(((len + 64) >>> 9) << 4) + 14] = len;
var a = 1732584193;
var b = -271733879;
var c = -1732584194;
var d = 271733878;
for(var i = 0; i < x.length; i += 16)
{
var olda = a;
var oldb = b;
var oldc = c;
var oldd = d;
a = md5_ff(a, b, c, d, x[i+ 0], 7 , -680876936);
d = md5_ff(d, a, b, c, x[i+ 1], 12, -389564586);
c = md5_ff(c, d, a, b, x[i+ 2], 17, 606105819);
b = md5_ff(b, c, d, a, x[i+ 3], 22, -1044525330);
a = md5_ff(a, b, c, d, x[i+ 4], 7 , -176418897);
d = md5_ff(d, a, b, c, x[i+ 5], 12, 1200080426);
c = md5_ff(c, d, a, b, x[i+ 6], 17, -1473231341);
b = md5_ff(b, c, d, a, x[i+ 7], 22, -45705983);
a = md5_ff(a, b, c, d, x[i+ 8], 7 , 1770035416);
d = md5_ff(d, a, b, c, x[i+ 9], 12, -1958414417);
c = md5_ff(c, d, a, b, x[i+10], 17, -42063);
b = md5_ff(b, c, d, a, x[i+11], 22, -1990404162);
a = md5_ff(a, b, c, d, x[i+12], 7 , 1804603682);
d = md5_ff(d, a, b, c, x[i+13], 12, -40341101);
c = md5_ff(c, d, a, b, x[i+14], 17, -1502002290);
b = md5_ff(b, c, d, a, x[i+15], 22, 1236535329);
a = md5_gg(a, b, c, d, x[i+ 1], 5 , -165796510);
d = md5_gg(d, a, b, c, x[i+ 6], 9 , -1069501632);
c = md5_gg(c, d, a, b, x[i+11], 14, 643717713);
b = md5_gg(b, c, d, a, x[i+ 0], 20, -373897302);
a = md5_gg(a, b, c, d, x[i+ 5], 5 , -701558691);
d = md5_gg(d, a, b, c, x[i+10], 9 , 38016083);
c = md5_gg(c, d, a, b, x[i+15], 14, -660478335);
b = md5_gg(b, c, d, a, x[i+ 4], 20, -405537848);
a = md5_gg(a, b, c, d, x[i+ 9], 5 , 568446438);
d = md5_gg(d, a, b, c, x[i+14], 9 , -1019803690);
c = md5_gg(c, d, a, b, x[i+ 3], 14, -187363961);
b = md5_gg(b, c, d, a, x[i+ 8], 20, 1163531501);
a = md5_gg(a, b, c, d, x[i+13], 5 , -1444681467);
d = md5_gg(d, a, b, c, x[i+ 2], 9 , -51403784);
c = md5_gg(c, d, a, b, x[i+ 7], 14, 1735328473);
b = md5_gg(b, c, d, a, x[i+12], 20, -1926607734);
a = md5_hh(a, b, c, d, x[i+ 5], 4 , -378558);
d = md5_hh(d, a, b, c, x[i+ 8], 11, -2022574463);
c = md5_hh(c, d, a, b, x[i+11], 16, 1839030562);
b = md5_hh(b, c, d, a, x[i+14], 23, -35309556);
a = md5_hh(a, b, c, d, x[i+ 1], 4 , -1530992060);
d = md5_hh(d, a, b, c, x[i+ 4], 11, 1272893353);
c = md5_hh(c, d, a, b, x[i+ 7], 16, -155497632);
b = md5_hh(b, c, d, a, x[i+10], 23, -1094730640);
a = md5_hh(a, b, c, d, x[i+13], 4 , 681279174);
d = md5_hh(d, a, b, c, x[i+ 0], 11, -358537222);
c = md5_hh(c, d, a, b, x[i+ 3], 16, -722521979);
b = md5_hh(b, c, d, a, x[i+ 6], 23, 76029189);
a = md5_hh(a, b, c, d, x[i+ 9], 4 , -640364487);
d = md5_hh(d, a, b, c, x[i+12], 11, -421815835);
c = md5_hh(c, d, a, b, x[i+15], 16, 530742520);
b = md5_hh(b, c, d, a, x[i+ 2], 23, -995338651);
a = md5_ii(a, b, c, d, x[i+ 0], 6 , -198630844);
d = md5_ii(d, a, b, c, x[i+ 7], 10, 1126891415);
c = md5_ii(c, d, a, b, x[i+14], 15, -1416354905);
b = md5_ii(b, c, d, a, x[i+ 5], 21, -57434055);
a = md5_ii(a, b, c, d, x[i+12], 6 , 1700485571);
d = md5_ii(d, a, b, c, x[i+ 3], 10, -1894986606);
c = md5_ii(c, d, a, b, x[i+10], 15, -1051523);
b = md5_ii(b, c, d, a, x[i+ 1], 21, -2054922799);
a = md5_ii(a, b, c, d, x[i+ 8], 6 , 1873313359);
d = md5_ii(d, a, b, c, x[i+15], 10, -30611744);
c = md5_ii(c, d, a, b, x[i+ 6], 15, -1560198380);
b = md5_ii(b, c, d, a, x[i+13], 21, 1309151649);
a = md5_ii(a, b, c, d, x[i+ 4], 6 , -145523070);
d = md5_ii(d, a, b, c, x[i+11], 10, -1120210379);
c = md5_ii(c, d, a, b, x[i+ 2], 15, 718787259);
b = md5_ii(b, c, d, a, x[i+ 9], 21, -343485551);
a = safe_add(a, olda);
b = safe_add(b, oldb);
c = safe_add(c, oldc);
d = safe_add(d, oldd);
}
return Array(a, b, c, d);
}
/*
* These functions implement the four basic operations the algorithm uses.
*/
function md5_cmn(q, a, b, x, s, t)
{
return safe_add(bit_rol(safe_add(safe_add(a, q), safe_add(x, t)), s),b);
}
function md5_ff(a, b, c, d, x, s, t)
{
return md5_cmn((b & c) | ((~b) & d), a, b, x, s, t);
}
function md5_gg(a, b, c, d, x, s, t)
{
return md5_cmn((b & d) | (c & (~d)), a, b, x, s, t);
}
function md5_hh(a, b, c, d, x, s, t)
{
return md5_cmn(b ^ c ^ d, a, b, x, s, t);
}
function md5_ii(a, b, c, d, x, s, t)
{
return md5_cmn(c ^ (b | (~d)), a, b, x, s, t);
}
/*
* Calculate the HMAC-MD5, of a key and some data
*/
function core_hmac_md5(key, data)
{
var bkey = str2binl(key);
if(bkey.length > 16) bkey = core_md5(bkey, key.length * chrsz);
var ipad = Array(16), opad = Array(16);
for(var i = 0; i < 16; i++)
{
ipad[i] = bkey[i] ^ 0x36363636;
opad[i] = bkey[i] ^ 0x5C5C5C5C;
}
var hash = core_md5(ipad.concat(str2binl(data)), 512 + data.length * chrsz);
return core_md5(opad.concat(hash), 512 + 128);
}
/*
* Add integers, wrapping at 2^32. This uses 16-bit operations internally
* to work around bugs in some JS interpreters.
*/
function safe_add(x, y)
{
var lsw = (x & 0xFFFF) + (y & 0xFFFF);
var msw = (x >> 16) + (y >> 16) + (lsw >> 16);
return (msw << 16) | (lsw & 0xFFFF);
}
/*
* Bitwise rotate a 32-bit number to the left.
*/
function bit_rol(num, cnt)
{
return (num << cnt) | (num >>> (32 - cnt));
}
/*
* Convert a string to an array of little-endian words
* If chrsz is ASCII, characters >255 have their hi-byte silently ignored.
*/
function str2binl(str)
{
var bin = Array();
var mask = (1 << chrsz) - 1;
for(var i = 0; i < str.length * chrsz; i += chrsz)
bin[i>>5] |= (str.charCodeAt(i / chrsz) & mask) << (i%32);
return bin;
}
/*
* Convert an array of little-endian words to a string
*/
function binl2str(bin)
{
var str = "";
var mask = (1 << chrsz) - 1;
for(var i = 0; i < bin.length * 32; i += chrsz)
str += String.fromCharCode((bin[i>>5] >>> (i % 32)) & mask);
return str;
}
/*
* Convert an array of little-endian words to a hex string.
*/
function binl2hex(binarray)
{
var hex_tab = hexcase ? "0123456789ABCDEF" : "0123456789abcdef";
var str = "";
for(var i = 0; i < binarray.length * 4; i++)
{
str += hex_tab.charAt((binarray[i>>2] >> ((i%4)*8+4)) & 0xF) +
hex_tab.charAt((binarray[i>>2] >> ((i%4)*8 )) & 0xF);
}
return str;
}
/*
* Convert an array of little-endian words to a base-64 string
*/
function binl2b64(binarray)
{
var tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
var str = "";
for(var i = 0; i < binarray.length * 4; i += 3)
{
var triplet = (((binarray[i >> 2] >> 8 * ( i %4)) & 0xFF) << 16)
| (((binarray[i+1 >> 2] >> 8 * ((i+1)%4)) & 0xFF) << 8 )
| ((binarray[i+2 >> 2] >> 8 * ((i+2)%4)) & 0xFF);
for(var j = 0; j < 4; j++)
{
if(i * 8 + j * 6 > binarray.length * 32) str += b64pad;
else str += tab.charAt((triplet >> 6*(3-j)) & 0x3F);
}
}
return str;
}
获得结果值,利用 Python 标准库 hashlib 验证结果是否与以上分析相吻合:
结果相同, 证明上述分析正确:
拼接查询参数:
构建异步请求:
设置请求头:
发起请求后回到最初断点处:
完成上述步骤后 Network 面板下 XHR 选项卡查看到 ?min_behot_time=0 请求已完成, 其中查询字符串参数完全符和上述步骤中所生成的动态参数
到此分析过程结束
二、部分代码展示
导入相关模块:
import datetime
import json
import hashlib
import math
import time
from urllib.parse import urlencode
import execjs
import requests
import xlsxwriter
from pymongo import MongoClient
配置:
# 创建数据库连接
client = MongoClient("localhost", 27017)
# 初始化数据库
db = client["toutiao"]
# 目标地址
url = "https://www.toutiao.com/"
# 请求头
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
"AppleWebKit/537.36 (KHTML, like Gecko)"
"Chrome/79.0.3907.0 Safari/537.36 Edg/79.0.279.0"
)
}
# 实例化 session 对象
session = requests.Session()
# 获取 cookies
session.get(url=url, headers=headers)
# 指定初始 url
start_url = "https://www.toutiao.com/api/pc/feed/?{}&category=__all__&utm_source=toutiao&widen=1&tadrequire=true&as={}&cp={}"
2.1、参数生成方式(一)
def get_md5(_str):
md5 = hashlib.md5()
md5.update(_str.encode())
return md5.hexdigest()
def get_params():
t = str(math.floor(time.time()))
e = hex(int(t))[2:]
i = get_md5(t).upper()
if 8 != len(e):
return {
"as": "479BB4B7254C150",
"cp": "7E0AC8874BB0985"
}
s = ""
n = i[:5]
for r in range(5):
s += n[r] + e[r]
l = ""
a = i[-5:]
for u in range(5):
l += e[u+3] + a[u]
return {
"as": "A1" + s + e[-3:],
"cp": e[:3] + l + "E1"
}
2.2、参数生成方式(二)
基于以上分析创建(来自于网页) JavaScript 文件如下 gener-params.js
function m (n) {
function s(t, e) {
var i = (65535 & t) + (65535 & e),
n = (t >> 16) + (e >> 16) + (i >> 16);
return n << 16 | 65535 & i
}
function o(t, e) {
return t << e | t >>> 32 - e
}
function r(t, e, i, n, a, r) {
return s(o(s(s(e, t), s(n, r)), a), i)
}
function l(t, e, i, n, a, s, o) {
return r(e & i | ~e & n, t, e, a, s, o)
}
function u(t, e, i, n, a, s, o) {
return r(e & n | i & ~n, t, e, a, s, o)
}
function c(t, e, i, n, a, s, o) {
return r(e ^ i ^ n, t, e, a, s, o)
}
function d(t, e, i, n, a, s, o) {
return r(i ^ (e | ~n), t, e, a, s, o)
}
function h(t, e) {
t[e >> 5] |= 128 << e % 32,
t[(e + 64 >>> 9 << 4) + 14] = e;
var i, n, a, o, r, h = 1732584193,
_ = -271733879,
m = -1732584194,
p = 271733878;
for (i = 0; i < t.length; i += 16)
n = h,
a = _,
o = m,
r = p,
h = l(h, _, m, p, t[i], 7, -680876936),
p = l(p, h, _, m, t[i + 1], 12, -389564586),
m = l(m, p, h, _, t[i + 2], 17, 606105819),
_ = l(_, m, p, h, t[i + 3], 22, -1044525330),
h = l(h, _, m, p, t[i + 4], 7, -176418897),
p = l(p, h, _, m, t[i + 5], 12, 1200080426),
m = l(m, p, h, _, t[i + 6], 17, -1473231341),
_ = l(_, m, p, h, t[i + 7], 22, -45705983),
h = l(h, _, m, p, t[i + 8], 7, 1770035416),
p = l(p, h, _, m, t[i + 9], 12, -1958414417),
m = l(m, p, h, _, t[i + 10], 17, -42063),
_ = l(_, m, p, h, t[i + 11], 22, -1990404162),
h = l(h, _, m, p, t[i + 12], 7, 1804603682),
p = l(p, h, _, m, t[i + 13], 12, -40341101),
m = l(m, p, h, _, t[i + 14], 17, -1502002290),
_ = l(_, m, p, h, t[i + 15], 22, 1236535329),
h = u(h, _, m, p, t[i + 1], 5, -165796510),
p = u(p, h, _, m, t[i + 6], 9, -1069501632),
m = u(m, p, h, _, t[i + 11], 14, 643717713),
_ = u(_, m, p, h, t[i], 20, -373897302),
h = u(h, _, m, p, t[i + 5], 5, -701558691),
p = u(p, h, _, m, t[i + 10], 9, 38016083),
m = u(m, p, h, _, t[i + 15], 14, -660478335),
_ = u(_, m, p, h, t[i + 4], 20, -405537848),
h = u(h, _, m, p, t[i + 9], 5, 568446438),
p = u(p, h, _, m, t[i + 14], 9, -1019803690),
m = u(m, p, h, _, t[i + 3], 14, -187363961),
_ = u(_, m, p, h, t[i + 8], 20, 1163531501),
h = u(h, _, m, p, t[i + 13], 5, -1444681467),
p = u(p, h, _, m, t[i + 2], 9, -51403784),
m = u(m, p, h, _, t[i + 7], 14, 1735328473),
_ = u(_, m, p, h, t[i + 12], 20, -1926607734),
h = c(h, _, m, p, t[i + 5], 4, -378558),
p = c(p, h, _, m, t[i + 8], 11, -2022574463),
m = c(m, p, h, _, t[i + 11], 16, 1839030562),
_ = c(_, m, p, h, t[i + 14], 23, -35309556),
h = c(h, _, m, p, t[i + 1], 4, -1530992060),
p = c(p, h, _, m, t[i + 4], 11, 1272893353),
m = c(m, p, h, _, t[i + 7], 16, -155497632),
_ = c(_, m, p, h, t[i + 10], 23, -1094730640),
h = c(h, _, m, p, t[i + 13], 4, 681279174),
p = c(p, h, _, m, t[i], 11, -358537222),
m = c(m, p, h, _, t[i + 3], 16, -722521979),
_ = c(_, m, p, h, t[i + 6], 23, 76029189),
h = c(h, _, m, p, t[i + 9], 4, -640364487),
p = c(p, h, _, m, t[i + 12], 11, -421815835),
m = c(m, p, h, _, t[i + 15], 16, 530742520),
_ = c(_, m, p, h, t[i + 2], 23, -995338651),
h = d(h, _, m, p, t[i], 6, -198630844),
p = d(p, h, _, m, t[i + 7], 10, 1126891415),
m = d(m, p, h, _, t[i + 14], 15, -1416354905),
_ = d(_, m, p, h, t[i + 5], 21, -57434055),
h = d(h, _, m, p, t[i + 12], 6, 1700485571),
p = d(p, h, _, m, t[i + 3], 10, -1894986606),
m = d(m, p, h, _, t[i + 10], 15, -1051523),
_ = d(_, m, p, h, t[i + 1], 21, -2054922799),
h = d(h, _, m, p, t[i + 8], 6, 1873313359),
p = d(p, h, _, m, t[i + 15], 10, -30611744),
m = d(m, p, h, _, t[i + 6], 15, -1560198380),
_ = d(_, m, p, h, t[i + 13], 21, 1309151649),
h = d(h, _, m, p, t[i + 4], 6, -145523070),
p = d(p, h, _, m, t[i + 11], 10, -1120210379),
m = d(m, p, h, _, t[i + 2], 15, 718787259),
_ = d(_, m, p, h, t[i + 9], 21, -343485551),
h = s(h, n),
_ = s(_, a),
m = s(m, o),
p = s(p, r);
return [h, _, m, p]
}
function _(t) {
var e, i = "";
for (e = 0; e < 32 * t.length; e += 8)
i += String.fromCharCode(t[e >> 5] >>> e % 32 & 255);
return i
}
function m(t) {
var e, i = [];
for (i[(t.length >> 2) - 1] = void 0, e = 0; e < i.length; e += 1)
i[e] = 0;
for (e = 0; e < 8 * t.length; e += 8)
i[e >> 5] |= (255 & t.charCodeAt(e / 8)) << e % 32;
return i
}
function p(t) {
return _(h(m(t), 8 * t.length))
}
function f(t, e) {
var i, n, a = m(t),
s = [],
o = [];
for (s[15] = o[15] = void 0, a.length > 16 && (a = h(a, 8 * t.length)), i = 0; i < 16; i += 1)
s[i] = 909522486 ^ a[i],
o[i] = 1549556828 ^ a[i];
return n = h(s.concat(m(e)), 512 + 8 * e.length),
_(h(o.concat(n), 640))
}
function g(t) {
var e, i, n = "0123456789abcdef",
a = "";
for (i = 0; i < t.length; i += 1)
e = t.charCodeAt(i),
a += n.charAt(e >>> 4 & 15) + n.charAt(15 & e);
return a
}
function v(t) {
return unescape(encodeURIComponent(t))
}
function w(t) {
return p(v(t))
}
function y(t) {
return g(w(t))
}
function b(t, e) {
return f(v(t), v(e))
}
function x(t, e) {
return g(b(t, e))
}
function k(t, e, i) {
return e ? i ? b(e, t) : x(e, t) : i ? w(t) : y(t)
}
return k(n, 0,0);
}
function o(s) {
var e = parseInt(s)
, t = e.toString(16).toUpperCase()
, i = m(e).toString().toUpperCase();
if (8 != t.length)
return {
as: "479BB4B7254C150",
cp: "7E0AC8874BB0985"
};
for (var n = i.slice(0, 5), o = i.slice(-5), r = "", s = 0; s < 5; s++)
r += n[s] + t[s];
for (var l = "", c = 0; c < 5; c++)
l += t[c + 3] + o[c];
return {
as: "A1" + r + t.slice(-3),
cp: t.slice(0, 3) + l + "E1"
}
}
定义获取参数的方法:
def get_params():
timestamp = str(math.floor(time.time()))
with open("./gener-params.js", 'r', encoding="utf-8") as f:
js = f.read()
result = execjs.compile(js)
return result.call("o", timestamp)
定义生成 xlsx 文件的方法:
def data2xls(data_list):
row, col = 1, 0
workbook = xlsxwriter.Workbook('{}.xlsx'.format(datetime.date.today()))
cell_format = workbook.add_format({'border': 1, 'text_wrap': 1})
merge_format = workbook.add_format({'bold': True, 'border': 1, 'text_wrap': 1})
worksheet = workbook.add_worksheet("首页新闻")
title_list = ["chinese_tag", "media_avatar_url", "title", "abstract", "tag", "source_url", "source", "media_url"]
for i, v in enumerate(title_list):
worksheet.write(0, i, v, merge_format)
length_list = [20, 65, 70, 255, 25, 30, 20, 75]
for i, v in enumerate(length_list):
worksheet.set_column(i, i, v)
for i, v in enumerate(data_list):
worksheet.write(row, col + i, v.get(title_list[i]), cell_format)
row += 1
workbook.close()
三、部分数据展示
【END】