iOS中使用正则表达式去掉HTML中的标签元素获得纯文本的方法

content是根据网址获得的网页源码字符串

- (NSString *)changeToString:(NSString *)content
{
    NSRegularExpression *regularExpretion=[NSRegularExpression regularExpressionWithPattern:@"<[^>]*>|\n"
                                                                                    options:0
                                                                                      error:nil];
    
    content = [regularExpretion stringByReplacingMatchesInString:content options:NSMatchingReportProgress range:NSMakeRange(0, content.length) withTemplate:@"-"];         //  替换所有html和换行匹配元素为"-"
    
    regularExpretion = [NSRegularExpression regularExpressionWithPattern:@"-{1,}" options:0 error:nil] ;
    content = [regularExpretion stringByReplacingMatchesInString:content options:NSMatchingReportProgress range:NSMakeRange(0, content.length) withTemplate:@"-"];          //  把多个"-"匹配为一个"-"
    
    //  根据"-"分割到数组
    NSArray *arr=[NSArray array];
    content = [NSString stringWithString:content];
    arr =  [content componentsSeparatedByString:@"-"];
    NSMutableArray *marr=[NSMutableArray arrayWithArray:arr];
    [marr removeObject:@""];
    NSMutableString *string = [[NSMutableString alloc] init];
    for (int i = 0; i < arr.count; i++) {
        [string appendString:[NSString stringWithFormat:@"%@",arr[i]]];
    }
    return  string;
}
posted @ 2015-10-07 14:33  求真求道  阅读(441)  评论(0编辑  收藏  举报