httpClient download file(爬虫)
package com.opensource.httpclient.bfs;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
public class DownLoadFile
{
public String getFileNameByUrl(String url, String contentType)
{
url = url.substring(7);
if (contentType.indexOf("html") != -1)
{
url = url.replaceAll("[\\?/:*|<>\"]", "_") + ".html";
return url;
}
else
{
return url.replaceAll("[\\?/:*|<>\"]", "_") + "." + contentType.substring(contentType.lastIndexOf("/") + 1);
}
}
public void saveToLocal(byte[] data, String filePath)
{
try
{
DataOutputStream out = new DataOutputStream(new FileOutputStream(new File(filePath)));
for (int i = 0; i < data.length; i++)
out.write(data[i]);
out.flush();
out.close();
}
catch (IOException e)
{
e.printStackTrace();
}
}
public String downloadFile(String url)
throws ClientProtocolException, IOException
{
String filePath = null;
HttpClient httpClient = new DefaultHttpClient();
HttpGet get = new HttpGet(url);
HttpResponse rsp = httpClient.execute(get);
if (rsp.getStatusLine().getStatusCode() != HttpStatus.SC_OK)
{
System.err.println("Method failed: " + rsp.getStatusLine());
filePath = null;
}
Header[] header = rsp.getHeaders("Content-Type");
filePath = "D:\\" + getFileNameByUrl(url, header[0].getValue());
saveToLocal(rsp.toString().getBytes(), filePath);
return filePath;
}
public static void main(String[] args)
throws ClientProtocolException, IOException
{
DownLoadFile downLoadFile = new DownLoadFile();
String temp = downLoadFile.downloadFile("http://www.huawei.com/cn/");
System.out.println(temp);
}
}