下载网页中远程图片的方法
在网站内容编辑时, 经常会把别的网站的内容COPY后再PASTE到自己的后台编辑中, 但是其中的图片只是保存的URL, 并未COPY到本地. 下面这个方法能把COPY回来的HTML代码中的图片的URL转换成指定的URL, 并把图片下载到指定的物理路径.
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
namespace EFPlatform.Helper
{
public class WebRequestHelper
{
#region RequestFileFromHtml
public static string RequestFileFromHtml(string html, string fileUrl, string filePath, string reservedHost)
{
Uri url;
string fileExt;
string fileName;
WebClient c = new WebClient();
string p = @"((http|https|ftp):(\/\/|\\\\){1}(([A-Za-z0-9_-])+[.]){1,}(net|com|cn|org|cc|tv|[0-9]{1,3})(\S*\/)((\S)+[.]{1}(gif|jpg|png)))";
Regex r = new Regex(p, RegexOptions.Compiled | RegexOptions.IgnoreCase);
MatchCollection mc = r.Matches(html);
if(mc.Count > 0)
{
List<Uri> urlList = new List<Uri>();
int matchIndex = 0;
bool repeated = false;
for(int i = 0;i < mc.Count;i++)
{
url = new Uri(mc[i].Value);
for(int j = 0;j < urlList.Count;j++)
{
if(url == urlList[j])
{
repeated = true;
break;
}
}
if(!repeated && (url.Host.ToLower() != reservedHost.ToLower()))
{
urlList.Add(url);
matchIndex++;
}
}
for(int i = 0;i < urlList.Count;i++)
{
url = urlList[i];
fileExt = url.AbsoluteUri.Substring(url.AbsoluteUri.LastIndexOf("."));
fileName = string.Format("{0:yyMMddHHmmssff}{1}{2}", DateTime.Now, i, fileExt);
try
{
c.DownloadFile(url, filePath + fileName);
html = html.Replace(url.AbsoluteUri, fileUrl + fileName);
}
catch
{
}
}
}
return html;
}
#endregion
}
}
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
namespace EFPlatform.Helper
{
public class WebRequestHelper
{
#region RequestFileFromHtml
public static string RequestFileFromHtml(string html, string fileUrl, string filePath, string reservedHost)
{
Uri url;
string fileExt;
string fileName;
WebClient c = new WebClient();
string p = @"((http|https|ftp):(\/\/|\\\\){1}(([A-Za-z0-9_-])+[.]){1,}(net|com|cn|org|cc|tv|[0-9]{1,3})(\S*\/)((\S)+[.]{1}(gif|jpg|png)))";
Regex r = new Regex(p, RegexOptions.Compiled | RegexOptions.IgnoreCase);
MatchCollection mc = r.Matches(html);
if(mc.Count > 0)
{
List<Uri> urlList = new List<Uri>();
int matchIndex = 0;
bool repeated = false;
for(int i = 0;i < mc.Count;i++)
{
url = new Uri(mc[i].Value);
for(int j = 0;j < urlList.Count;j++)
{
if(url == urlList[j])
{
repeated = true;
break;
}
}
if(!repeated && (url.Host.ToLower() != reservedHost.ToLower()))
{
urlList.Add(url);
matchIndex++;
}
}
for(int i = 0;i < urlList.Count;i++)
{
url = urlList[i];
fileExt = url.AbsoluteUri.Substring(url.AbsoluteUri.LastIndexOf("."));
fileName = string.Format("{0:yyMMddHHmmssff}{1}{2}", DateTime.Now, i, fileExt);
try
{
c.DownloadFile(url, filePath + fileName);
html = html.Replace(url.AbsoluteUri, fileUrl + fileName);
}
catch
{
}
}
}
return html;
}
#endregion
}
}