IOS之解析Html的两种方式
1.最近没什么做的就解析了html的文本来了,在解析的时候会遇到一些问题,但是现在也解决了, 我使用了两种方式去解析Html 页面,现在就来说一下是什么方式去解析的
第一种的方法:使用正则表达式(http://rss.sina.com.cn/sports/basketball/nba.xml 需要解析的数据)
使用多线程的方式去解析html数据:
-(void)getNews{
//使用多线程开发
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
//请求的url
NSString *requestStr = @"http://rss.sina.com.cn/sports/global/focus.xml";
NSString *htmlStr = [NSString stringWithContentsOfURL:[NSURL URLWithString:requestStr] encoding:NSUTF8StringEncoding error:nil];
dispatch_async(dispatch_get_main_queue(), ^{ //回到主线程的方法
NSLog(@"call back the data is :%@",htmlStr);
[self updatview:htmlStr];
});
});
}
//<item>需要要解析的数据
//<title>
//<![CDATA[热身-世界波扳平后遭绝杀 AC米兰1-2客负里昂]]>
//</title>
//<link>http://go.rss.sina.com.cn/redirect.php?url=http://sports.sina.com.cn/g/seriea/2015-07-19/04407658051.shtml</link>
//<author>WWW.SINA.COM.CN</author>
//<guid>http://go.rss.sina.com.cn/redirect.php?url=http://sports.sina.com.cn/g/seriea/2015-07-19/04407658051.shtml</guid>
//<category>
//<![CDATA[国际足坛-焦点新闻]]>
//</category>
//<pubDate>Sat, 18 Jul 2015 20:40:57 GMT</pubDate>
//<comments>http://comment.news.sina.com.cn/cgi-bin/comment/comment.cgi?channel=gn&newsid=6-12-4810592</comments>
//<description>
//<![CDATA[]]>
//</description>
//</item>
-(void)updatview:(NSString *)htmlStr{
//title的数据源
NSString *copy = htmlStr;
int start = 0;
while (YES) {
//从那个位置开始
htmlStr = [htmlStr substringFromIndex:start];
//设置一个标志
NSString *tag = @"CDATA\\[.*\\]";
//设定一个范围(正则表达式方法)
NSRange rang = [htmlStr rangeOfString:tag options:NSRegularExpressionSearch];
if (rang.length >0) {
NSString *find = [htmlStr substringWithRange:rang];
find = [[find stringByReplacingOccurrencesOfString:@"CDATA[" withString:@""]stringByReplacingOccurrencesOfString:@"]" withString:@""];
NSLog(@"find :%@",find);
start = (int)(rang.location +rang.length);
if ([find isEqualToString:@""]||[find isEqualToString:@"国际足坛-焦点新闻"]) {
continue;
}
else{
[self.titleArrray addObject:find];
}
}else{
break;
}
}
//urlArray
htmlStr = copy;
int start2 = 0;
while (YES) {
//1从那个位置开始
htmlStr = [htmlStr substringFromIndex:start2];
//2.设定一个tag
NSString *tag2 = @"<link>.*</link>";
//3.设定一个范围
NSRange rang2 = [htmlStr rangeOfString:tag2 options:NSRegularExpressionSearch];
if (rang2.length > 0) {
NSString *find2 = [htmlStr substringWithRange:rang2];
find2 = [[find2 stringByReplacingOccurrencesOfString:@"<link>" withString:@""]stringByReplacingOccurrencesOfString:@"</link>" withString:@""];
[self.urlArray addObject:find2];
NSLog(@"find2:%@",find2);
start2 = (int)(rang2.location +rang2.length);
}else {
break;
}
}
//timeArray
htmlStr = copy;
int start3 = 0;
while (YES) {
//1从那个位置开始
htmlStr = [htmlStr substringFromIndex:start3];
//2.设定一个tag
NSString *tag3 = @"<pubDate>.*</pubDate>";
//3.设定一个范围
NSRange rang3 = [htmlStr rangeOfString:tag3 options:NSRegularExpressionSearch];
if (rang3.length > 0) {
NSString *find3 = [htmlStr substringWithRange:rang3];
find3 = [[find3 stringByReplacingOccurrencesOfString:@"<pubDate>" withString:@""]stringByReplacingOccurrencesOfString:@"</pubDate>" withString:@""];
[self.dataArray addObject:find3];
NSLog(@"find2:%@",find3);
start3 = (int)(rang3.location +rang3.length);
}else {
break;
}
}
[self.titleArrray removeObjectAtIndex:0];
[self.titleArrray removeObjectAtIndex:0];
[self.titleArrray removeObjectAtIndex:0];
[self.urlArray removeObjectAtIndex:0];
[self.urlArray removeObjectAtIndex:0];
[self.urlArray removeObjectAtIndex:0];
[self.dataArray removeObjectAtIndex:0];
NSLog(@"%@,%@,%@" ,self.titleArrray , self.urlArray, self.dataArray);
[self.NewTableView reloadData];
}
第二中方式:(使用了第三方库TFHpple)
- (void)jiexi {
NSURLRequest *request=[NSURLRequest requestWithURL:[NSURL URLWithString:@"http://rss.sina.com.cn/sports/basketball/nba.xml"]];
NSData *data=[NSURLConnection sendSynchronousRequest:request returningResponse:nil error:nil];
TFHpple *doc=[[TFHpple alloc] initWithXMLData:data];
NSArray *items=[doc searchWithXPathQuery:@"//item"];
self.articles=[NSMutableArray array];
Article *aricle=nil;
for (TFHppleElement *item in items) {
aricle=[[Article alloc] init];
for (TFHppleElement *element in item.children) {
if ([@"title" isEqualToString:element.tagName]) {
aricle.title=element.content;
}else if ([@"link" isEqualToString:element.tagName]){
aricle.link=element.content;
}
}
[self.articles addObject:aricle.title];
}
}
在这里有问题了,我昨晚取到数据老是出不来,老师郁闷呀,原来是解析的数据里面好多的空格和换行了,要去掉换行就可以实现了数据的加载:
-(NSInteger)tableView:(UITableView *)tableView numberOfRowsInSection:(NSInteger)section{
return self.articles.count;
}
-(UITableViewCell *)tableView:(UITableView *)tableView cellForRowAtIndexPath:(NSIndexPath *)indexPath
{
UITableViewCell *cell = [tableView dequeueReusableCellWithIdentifier:@"cell"];
if (!cell) {
cell = [[UITableViewCell alloc]initWithStyle:UITableViewCellStyleDefault reuseIdentifier:@"cell"];
}
NSString * string = [self.articles objectAtIndex:indexPath.row];
NSString* headerData=string;
headerData = [headerData stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; //去除掉首尾的空白字符和换行字符
headerData = [headerData stringByReplacingOccurrencesOfString:@"\r" withString:@""];
headerData = [headerData stringByReplacingOccurrencesOfString:@"\n" withString:@""];
cell.textLabel.text = headerData;
cell.textLabel.numberOfLines = 0;
return cell;
}