asp数据采集
asp数据采集
数据采集程序
'On Error Resume Next
Server.Scripttimeout=300

'---------------------------------------------------------------------
'采集数据
Function getHTTPData(url)
dim http
set http=Server.createobject("Msxml2.XMLHTTP")
if instr(url,"http://")=0 then url="http://"&url
Http.open "GET",url,false
Http.send()
if Http.Status<>200 then exit function
getHTTPData=bytesToBSTR(Http.responseBody,"UTF-8")
set http=nothing
if err.number<>0 then err.Clear
sCharset=""
End function
'---------------------------------------------------------------------
Function BytesToBstr(body,Cset)
dim objstream
set objstream = Server.CreateObject("adodb.stream")
objstream.Type = 1
objstream.Mode =3
objstream.Open
objstream.Write body
objstream.Position = 0
objstream.Type = 2
objstream.Charset = Cset
BytesToBstr = objstream.ReadText
objstream.Close
set objstream = nothing
End Function
'---------------------------------------------------------------------
'服务器登录
Function login(url)
dim http
set http=Server.createobject("Msxml2.XMLHTTP")
if instr(url,"http://")=0 then url="http://"&url
Http.open "GET",url,false
Http.send()
if Http.Status<>200 then exit function
set http=nothing
if err.number<>0 then err.Clear
End function
'---------------------------------------------------------------------
'正则替换
Function ReplaceText(fString,patrn, replStr)
Set regEx = New RegExp
regEx.Pattern = patrn
regEx.IgnoreCase = True
regEx.Global = True
ReplaceText = regEx.Replace(fString, replStr)
End Function
'---------------------------------------------------------------------
'去标签 包括内容
Function ReplaceTag(str, tag)
Set regEx = New RegExp
regEx.Pattern = "<"&tag&"[^>]*?>.*?<\/"&tag&">"
regEx.IgnoreCase = True
regEx.Global = True
ReplaceTag=regEx.Replace(str, "")
End Function
'---------------------------------------------------------------------
'去标签 不包括内容
Function ReplaceTab(str, tag)
Set regEx = New RegExp
regEx.Pattern = "<\/?"&tag&"[^>]*>"
regEx.IgnoreCase = True
regEx.Global = True
ReplaceTab=regEx.Replace(str, "")
End Function
'---------------------------------------------------------------------
'去标签属性 保留标签
Function ReplaceinnerTag(str, tag)
Set regEx = New RegExp
regEx.Pattern = "(<\/?"&tag&")[^>]*>"
regEx.IgnoreCase = True
regEx.Global = True
ReplaceinnerTag=regEx.Replace(str, "$1>")
End Function
'---------------------------------------------------------------------
'按正则取数据
Function getText(fString, patrn,n)
dim Matches, tStr
tStr = fString
Set re = New Regexp
re.IgnoreCase = True
re.Global = True
re.Pattern = patrn
set Matches = re.Execute(tStr)
set re = nothing
rStr = ""
For Each Match in Matches
rStr = Match.SubMatches(n)
exit for
Next
getText = rStr
End Function
'---------------------------------------------------------------------
'数据过滤
Function Encode_text(str)
If Isnull(str) Then
Encode_text = ""
Exit Function
End If
str = ReplaceText(str, "<\/?br[^>]*>" , vbCrlf )
str = ReplaceText(str, "<\/?p[^>]*>" , vbCrlf )
str = ReplaceTab(str, "[a-zA-Z]")
str = ReplaceText(str, "\n\s*\r" ,Chr(10)&Chr(13))
str = Replace(str, "&" , "&" )
str = Replace(str, ";" , ";" )
str = Replace(str, "&" , "&" )
str = Replace(str,Chr(34), """ )
str = Replace(str, "'" , "'" )
str = Replace(str, "<" , "<" )
str = Replace(str, ">" , ">" )
str = Replace(str, "(" , "(" )
str = Replace(str, ")" , ")" )
str = Replace(str, "*" , "*" )
str = Replace(str, "%" , "%" )
str = Replace(str,vbCrlf, "<br/>" )
Encode_text = str
End Function
'---------------------------------------------------------------------
'通过Matches取数据
dim Matches
sub setMatches(str,sRe)
Set re = New Regexp
re.IgnoreCase = True
re.Global = True
re.Pattern = sRe
set Matches = re.Execute(str)
set re=nothing
end sub
'---------------------------------------------------------------------
例子
'例子
call setMatches(textcontent, re)
For Each Match in Matches
response.write Match.value
Next
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· .NET周刊【3月第1期 2025-03-02】
· 分享 3 个 .NET 开源的文件压缩处理库,助力快速实现文件压缩解压功能!
· [AI/GPT/综述] AI Agent的设计模式综述