简单抓取小程序大全,并展示

前言,想利用小程序导航页面来提升网站的流量,找到www.xcxdh666.com该小程序导航网站。

分析网页

      1发现网站其实也是用异步分页请求加载数据的,所以根本用不着xpath解析html,直接分析其请求URL

      2点击加载更多找到请求,发现其实就是pageNum,cagegory两个参数

      3所以直接请求URL,带入参数,分析起返回json结果

编写代码

        1首先建立接收类型

            public class XcxApplet

{

public int id{get;set;}

public string categoryName{get;set;}

public string name {get;set;}

public string saomaUrl{get;set;}

public string sum{get;set;}

public string logoUrl{get;set;}

}

public class Result

{

public List<XcxApplet> dataList{get;set;}

public string category{get;set;}

public int status{get;set;}

public int pageNum{get;set;}

}

2 封装请求页面方法

public static string GetPostPage(this string posturl,string postData)

{

Encoding encoding=Encoding.UTF8;

byte[] data=null;

if(!string.IsNullOrEmpty(postData)) data=encoding.GetBytes(postData);

try

{

//设置参数

var request=WebRequest.Create(posturl) as HttpWebRequest;

if(request ==null) return string.Empty;

var cookieContainer=new CookieContainer();

request.cookieContainer=cookieContainer();

request.AllowAutoRedirect=true;

request.Method="POST";

request.ContentType="application/x-www-form=urlencoded";

if(data !=null)

{

request.ContentLength=data.Length;

Stream outstream=request.GetRequestStream();

outstream.Write(data,0,data.Length);

outstream.Close();

}

//发送请求并获取相应回应数据

var response=request.GetResponse() as HttpWebResponse;

if(response==null)return string.Empty;

//直到request.GetResponse()程序才开始向目标网页发送POST请求

Stream instream =response.GetResponseStream();

if(instream==null)return string.Empty;

var sr=new StreamReader(instream,encoding);

//返回结果网页(html)代码

string content=sr.ReadToEnd();

string err=string.Empty;

return content;

}

catch(Exception ex)

{

string err=ex.Message;

return string.Empty;

}

}

3 图片url处理,思路就是要将其返回的URL请求下载到本地或者上川到自己对应的图片服务器,我这里是用七牛云存储img的

这里你可以改成下载到本地返回本地的URL就好

public string QiniuUplod(string imgurl)

{

  var accessKey="你的accesskey";

 var secretkey="你的secretkey";

//生成(上传)凭证时需要使用此Mac

//这个示例单独使用了一个Setting类,其中包含AccessKEY和SecretKey

//实际应用中,请自行设置您的AccessKey和SecretKey

Mac mac=new Mac(accessKey,secretKey);

string bucket="siyoku";

string saveKey=imgurl.Substring(imgurl.LastIndexOf('/')+1,imgurl.Length-imgurl.LastIndexof('/')-1);

//使用前请确保AK和BUCKET正确,否则此函数会抛出异常

Qiniu.Common.Config.AutoZone(accessKey,bucket,false);

//上传策略

PutPolicy  putPolicy=new PutPolicy();

putPolicy.Scope=bucket+":"+saveKey;

putPolicy.Scope=bucket;

putPolicy.SetExpires(3600);

string jstr=putPolicy.ToJsonString();

string token=Auth.CreateUploadToken(mac,jstr);

try

{

var wReq=System.Net.WebRequest.Create(imgurl) as System.Net.HttpWebRequest;

var resp=wReq.GetResponse() as System.Net.HttpWebResponse;

using(var stream=resp.GetResponseStream())

{

FormUploader fu= new FormUploader;

var result=fu.UploadStream(stream,saveKey,token);

var x=Newtonsoft.Json.JsonConvert.DeserializeObject<QiniuResult>(result.Text);

return $"http://img.siyouku.cn/{x.key}";

}

}

catch (Exception ex)

{

return "";

}

}

4 最后是请求主体方法

public ActionResult GetxcxList()

{

Stopwatch watch=new Stopwatch();

watch.Start();

var result=new Result();

for(int j=0;j<54;j++)

{

string url=$"https://www.xcxdh666.com/pageList.htm?pageNum={j}";

var str=url.GetPostPage(null);

if(str !=null)

{

result=str.JsonConvert<Result>();

}

result.dataList.ForEach(i=>

{

if(!Db.Applet.Any(x=>x.Name==i.name))

{

var x=new Applet()

{

 CategoryName=string.IsNullOrEmpty(i.categoryName)?"其它":i.categoryName,

Name=i.name,

SaomiaoUrl=QiniuUpload($"http://img.xcxdh666.com/wxppnav/{i.saomaUrl}",

summary=i.sum,

LogoUrl=QiniuUpload($"http://img.xcxdh666.com/wxappnav/{i.logoUrl}"),

SortNum=j,

CreateUser="wenqing",

CreateTime=DateTime.Now

};

Db.Applet.Add(x);

}

});

Db.SaveChanges();

}

watch.Stop();

return Content("派取完成!本次请求总共耗时:"+watch.ElapsedMilliseconds);

}

}

posted @ 2017-07-03 15:04  维尼熊320  阅读(772)  评论(0编辑  收藏  举报