java读取网页内容乱码
1.首先判断网页的编码
URL url =
new
URL(
"http://www.qq.com"
);
URLConnection connection = url.openConnection();
Map<String,List<String>> headers = connection.getHeaderFields();
String ctype = headers.get(
"Content-Type"
).get(
0
);
//判断获取网页编码 text/html; charset=GB2312
String charset = ctype.substring(ctype.lastIndexOf(
"="
) +
1
);
// GB2312
2.读取网页内容
InputStreamReader isr =
new
InputStreamReader(url.openStream(),charset);
StringBuffer sb =
new
StringBuffer();
InputStreamReader isr=
new
InputStreamReader(url.openStream());
BufferedReader br=
new
BufferedReader(isr);
String str;
while
((str=br.readLine())!=
null
)
{
sb.append(str);
}
br.close();
isr.close();