通过referer url获取用户在搜索引擎中输入的关键字和关键字所在页数[asp]
Posted on 2006-12-31 10:03 nasdaqhe 阅读(1692) 评论(2) 编辑 收藏 举报首先必须分析referer url中哪部分是关键字,哪部分是页数。例如在hao123输入关键字"我们",在搜索结果中的第7页访问你的网站,那么来源的url就是:http://www.baidu.com/s?lm=0&si=&rn=10&tn=sitehao123&ie=gb2312&ct=0&wd=%CE%D2%C3%C7&pn=60&cl=3
上面的url中,wd=%CE%D2%C3%C7就是关键字部分,其中%CE%D2%C3%C7是"我们"两字通过urlencode得到的CED2是"我"字的GB编码,C3C7是"们"字的GB编码,只要使用urlDecode我们就可以得到原来用户输入了什么关键字访问了自己的网站。而pn=60则是你网站所在的页数,百度是每页10条记录,因此pn=0就是第一页,pn=60是第7页。若url中没有pn部分,则表示是第一页。一般来说,百度对关键字是使用GB编码(如"我们"二字编码成%CE%D2%C3%C7),除非url中含有ie=utf8,那么将采用utf8对关键字编码("我们"二字将编译成%E6%88%91%E4%BB%AC)。相对于百度,google则默认采用utf8对关键字编码,那么就需要将utf8编码转为unicode再通过chrw还原为原来的文字。
一下列出各个搜索引擎的关键字和页数的String表示
名称 | 关键字String | 页数String |
www.baidu | wd/word | pn |
image.baidu | word | pn |
cache.baidu | word | pn |
tom | word | pn |
sou.china | query | pgNum |
sogou | query | page |
114.vnet | kw | start |
seek.3721 | name/p | page |
sj53 | wd | pn |
iask | k | p |
------------------------------------上面为多数采用gb对关键字进行编码,下面为多数采用utf8对关键字进行编码
yahoo | p | b |
q | start | |
baidugoo | Keyword | pn |
search.msn | q | first |
以上只是大致的结果,其实还有很多细致的地方,需要大家自己去发现,我不多说了。
下面是原程序
<%
function shortAgent(agentString)
shortAgent="Other"
if instr(agentString,"MSIE")>=1 then
shortAgent=mid(agentString,instr(agentString,"MSIE"),8)
end if
if instr(agentString,"Opera")>=1 then
shortAgent="Opera"
end if
end function
%>
<%
Function URLDecode(enStr)
dim deStr
dim c,i,v
deStr=""
for i=1 to len(enStr)
c=Mid(enStr,i,1)
if c="%" then
v=eval("&h"+Mid(enStr,i+1,2))
if v<128 then
deStr=deStr&chr(v)
i=i+2
else
if isvalidhex(mid(enstr,i,3)) then
if isvalidhex(mid(enstr,i+3,3)) then
v=eval("&h"+Mid(enStr,i+1,2)+Mid(enStr,i+4,2))
deStr=deStr&chr(v)
i=i+5
else
v=eval("&h"+Mid(enStr,i+1,2)+cstr(hex(asc(Mid(enStr,i+3,1)))))
deStr=deStr&chr(v)
i=i+3
end if
else
destr=destr&c
end if
end if
else
if c="+" then
deStr=deStr&" "
else
deStr=deStr&c
end if
end if
next
URLDecode=deStr
end function
function isvalidhex(str)
isvalidhex=true
str=ucase(str)
if len(str)<>3 then isvalidhex=false:exit function
if left(str,1)<>"%" then isvalidhex=false:exit function
c=mid(str,2,1)
if not (((c>="0") and (c<="9")) or ((c>="A") and (c<="Z"))) then isvalidhex=false:exit function
c=mid(str,3,1)
if not (((c>="0") and (c<="9")) or ((c>="A") and (c<="Z"))) then isvalidhex=false:exit function
end function
function getKw(URLString,kwString)
getKw=""
kwString=kwString&"="
kwArray=split(URLString,"&")
for i=0 to UBound(kwArray)
kwStart=instr(kwArray(i),kwString)
if kwStart=1 then
getKw=mid(kwArray(i),kwStart+len(kwString),len(kwArray(i)))
end if
next
end function
function getPn(URLString,pnString)
getPn=0
pnString=pnString&"="
pnArray=split(URLString,"&")
for i=0 to UBound(pnArray)
pnStart=instr(pnArray(i),pnString)
if pnStart=1 then
getPn=mid(pnArray(i),pnStart+len(pnString),len(pnArray(i)))
end if
next
end function
function calculatePn(pn_tmp,pn_type,pn_beg,pn_base)
if pn_type=0 then
if pn_tmp=0 then pn_tmp=1
calculatePn=pn_tmp
else
if pn_beg=0 then calculatePn=Int((pn_tmp+1)/pn_base)+1
if pn_beg=1 then calculatePn=Int(pn_tmp/pn_base)+1
end if
end function
%>
<%
function UTF2GB(UTFStr)
for Dig=1 to len(UTFStr)
'如果UTF8编码文字以%开头则进行转换
if mid(UTFStr,Dig,1)="%" then
'UTF8编码文字大于8则转换为汉字
if len(UTFStr) >= Dig+8 then
GBStr=GBStr & ConvChinese(mid(UTFStr,Dig,9))
Dig=Dig+8
else
GBStr=GBStr & mid(UTFStr,Dig,1)
end if
else
GBStr=GBStr & mid(UTFStr,Dig,1)
end if
next
UTF2GB=GBStr
end function
'UTF8编码文字将转换为汉字
function ConvChinese(x)
A=split(mid(x,2),"%")
i=0
j=0
for i=0 to ubound(A)
A(i)=c16to2(A(i))
' Response.Write(A(i)&"<br/>")
next
for i=0 to ubound(A)-1
DigS=instr(A(i),"0")
Unicode=""
for j=1 to DigS-1
if j=1 then
A(i)=right(A(i),len(A(i))-DigS)
Unicode=Unicode & A(i)
else
i=i+1
A(i)=right(A(i),len(A(i))-2)
Unicode=Unicode & A(i)
end if
'Response.Write(Unicode&"<br/>")
next
'Response.Write(c2to16(Unicode))
if len(c2to16(Unicode))=4 then
ConvChinese=ConvChinese & chrw(int("&H" & c2to16(Unicode)))
else
ConvChinese=ConvChinese & chr(int("&H" & c2to16(Unicode)))
end if
next
end function
'二进制代码转换为十六进制代码
function c2to16(x)
i=1
for i=1 to len(x) step 4
c2to16=c2to16 & hex(c2to10(mid(x,i,4)))
next
end function
'二进制代码转换为十进制代码
function c2to10(x)
c2to10=0
if x="0" then exit function
i=0
for i= 0 to len(x) -1
if mid(x,len(x)-i,1)="1" then c2to10=c2to10+2^(i)
next
end function
'十六进制代码转换为二进制代码
function c16to2(x)
i=0
for i=1 to len(trim(x))
tempstr= c10to2(cint(int("&h" & mid(x,i,1))))
do while len(tempstr)<4
tempstr="0" & tempstr
loop
c16to2=c16to2 & tempstr
next
end function
'十进制代码转换为二进制代码
function c10to2(x)
mysign=sgn(x)
x=abs(x)
DigS=1
do
if x<2^DigS then
exit do
else
DigS=DigS+1
end if
loop
tempnum=x
i=0
for i=DigS to 1 step-1
if tempnum>=2^(i-1) then
tempnum=tempnum-2^(i-1)
c10to2=c10to2 & "1"
else
c10to2=c10to2 & "0"
end if
next
if mysign=-1 then c10to2="-" & c10to2
end function
%>
<%
function resolveRef(URLStringAll)
URLString=split(URLStringAll,"?")(1)
URLHead=split(URLStringAll,"?")(0)
if instr(URLHead,"http://www.baidu.com")>=1 or instr(URLHead,"http://www1.baidu.com")>=1 then
if getKw(URLString,"wd")="" then
kw=URLDecode(getKw(URLString,"word"))
else
kw=URLDecode(getKw(URLString,"wd"))
end if
pn=calculatePn(getPn(URLString,"pn"),1,0,10)
exit function
end if
if instr(URLHead,"http://image.baidu.com")>=1 then
kw=URLDecode(getKw(URLString,"word"))
pn=calculatePn(getPn(URLString,"pn"),1,0,16)
exit function
end if
if instr(URLHead,"http://cache.baidu.com")>=1 then
kw=URLDecode(getKw(URLString,"word"))
kw=replace(kw,";","")
kw=replace(kw,","," ")
exit function
end if
if instr(URLHead,"http://search.tom.com")>=1 then
if getKw(URLString,"word")="" then
kw=URLDecode(getKw(URLString,"w"))
else
kw=URLDecode(getKw(URLString,"word"))
end if
pn=calculatePn(getPn(URLString,"pn"),1,0,10)
exit function
end if
if instr(URLHead,"http://sou.china.com")>=1 then
kw=URLDecode(getKw(URLString,"query"))
pn=calculatePn(getPn(URLString,"pgNum"),0,0,10)
exit function
end if
if instr(URLHead,"sogou.com")>=1 then
kw=URLDecode(getKw(URLString,"query"))
pn=calculatePn(getPn(URLString,"page"),0,0,10)
exit function
end if
if instr(URLHead,"http://search.114.vnet.cn")>=1 then
kw=URLDecode(getKw(URLString,"kw"))
pn=calculatePn(getPn(URLString,"start"),1,0,10)
exit function
end if
if instr(URLHead,"http://seek.3721.com")>=1 then
kw=URLDecode(getKw(URLString,"name"))
if kw="" then kw=URLDecode(getKw(URLString,"p"))
pn=calculatePn(getPn(URLString,"page"),0,0,10)
exit function
end if
if instr(URLHead,"http://search.msn.com.cn")>=1 then
kw=UTF2GB(getKw(URLString,"q"))
pn=calculatePn(getPn(URLString,"first"),1,1,10)
exit function
end if
if instr(URLHead,"http://www.sj53.com")>=1 then
kw=URLDecode(getKw(URLString,"wd"))
pn=calculatePn(getPn(URLString,"pn"),1,0,10)
exit function
end if
if instr(URLHead,"iask.com")>=1 then
kw=URLDecode(getKw(URLString,"k"))
pn=calculatePn(getPn(URLString,"p"),0,0,10)
exit function
end if
if instr(URLHead,"http://www.baidugoo.com")>=1 then
kw=UTF2GB(getKw(URLString,"Keyword"))
pn=calculatePn(getPn(URLString,"pn"),0,0,10)
exit function
end if
if instr(URLHead,"yahoo.com")>=1 then
if instr(LCase(URLStringAll),"ei=utf-8")>=1 then
if getKw(URLString,"p")="" then
kw=UTF2GB(getKw(URLString,"keyword"))
else
kw=UTF2GB(getKw(URLString,"p"))
end if
pn=calculatePn(getPn(URLString,"b"),1,1,10)
else
if getKw(URLString,"p")="" then
kw=URLDecode(getKw(URLString,"keyword"))
else
kw=URLDecode(getKw(URLString,"p"))
end if
pn=calculatePn(getPn(URLString,"b"),1,1,10)
end if
exit function
end if
if instr(URLStringAll,"http://www.google")>=1 then
kw=UTF2GB(getKw(URLString,"q"))
pn=calculatePn(getPn(URLString,"start"),1,0,10)
exit function
end if
end function
%>
function shortAgent(agentString)
shortAgent="Other"
if instr(agentString,"MSIE")>=1 then
shortAgent=mid(agentString,instr(agentString,"MSIE"),8)
end if
if instr(agentString,"Opera")>=1 then
shortAgent="Opera"
end if
end function
%>
<%
Function URLDecode(enStr)
dim deStr
dim c,i,v
deStr=""
for i=1 to len(enStr)
c=Mid(enStr,i,1)
if c="%" then
v=eval("&h"+Mid(enStr,i+1,2))
if v<128 then
deStr=deStr&chr(v)
i=i+2
else
if isvalidhex(mid(enstr,i,3)) then
if isvalidhex(mid(enstr,i+3,3)) then
v=eval("&h"+Mid(enStr,i+1,2)+Mid(enStr,i+4,2))
deStr=deStr&chr(v)
i=i+5
else
v=eval("&h"+Mid(enStr,i+1,2)+cstr(hex(asc(Mid(enStr,i+3,1)))))
deStr=deStr&chr(v)
i=i+3
end if
else
destr=destr&c
end if
end if
else
if c="+" then
deStr=deStr&" "
else
deStr=deStr&c
end if
end if
next
URLDecode=deStr
end function
function isvalidhex(str)
isvalidhex=true
str=ucase(str)
if len(str)<>3 then isvalidhex=false:exit function
if left(str,1)<>"%" then isvalidhex=false:exit function
c=mid(str,2,1)
if not (((c>="0") and (c<="9")) or ((c>="A") and (c<="Z"))) then isvalidhex=false:exit function
c=mid(str,3,1)
if not (((c>="0") and (c<="9")) or ((c>="A") and (c<="Z"))) then isvalidhex=false:exit function
end function
function getKw(URLString,kwString)
getKw=""
kwString=kwString&"="
kwArray=split(URLString,"&")
for i=0 to UBound(kwArray)
kwStart=instr(kwArray(i),kwString)
if kwStart=1 then
getKw=mid(kwArray(i),kwStart+len(kwString),len(kwArray(i)))
end if
next
end function
function getPn(URLString,pnString)
getPn=0
pnString=pnString&"="
pnArray=split(URLString,"&")
for i=0 to UBound(pnArray)
pnStart=instr(pnArray(i),pnString)
if pnStart=1 then
getPn=mid(pnArray(i),pnStart+len(pnString),len(pnArray(i)))
end if
next
end function
function calculatePn(pn_tmp,pn_type,pn_beg,pn_base)
if pn_type=0 then
if pn_tmp=0 then pn_tmp=1
calculatePn=pn_tmp
else
if pn_beg=0 then calculatePn=Int((pn_tmp+1)/pn_base)+1
if pn_beg=1 then calculatePn=Int(pn_tmp/pn_base)+1
end if
end function
%>
<%
function UTF2GB(UTFStr)
for Dig=1 to len(UTFStr)
'如果UTF8编码文字以%开头则进行转换
if mid(UTFStr,Dig,1)="%" then
'UTF8编码文字大于8则转换为汉字
if len(UTFStr) >= Dig+8 then
GBStr=GBStr & ConvChinese(mid(UTFStr,Dig,9))
Dig=Dig+8
else
GBStr=GBStr & mid(UTFStr,Dig,1)
end if
else
GBStr=GBStr & mid(UTFStr,Dig,1)
end if
next
UTF2GB=GBStr
end function
'UTF8编码文字将转换为汉字
function ConvChinese(x)
A=split(mid(x,2),"%")
i=0
j=0
for i=0 to ubound(A)
A(i)=c16to2(A(i))
' Response.Write(A(i)&"<br/>")
next
for i=0 to ubound(A)-1
DigS=instr(A(i),"0")
Unicode=""
for j=1 to DigS-1
if j=1 then
A(i)=right(A(i),len(A(i))-DigS)
Unicode=Unicode & A(i)
else
i=i+1
A(i)=right(A(i),len(A(i))-2)
Unicode=Unicode & A(i)
end if
'Response.Write(Unicode&"<br/>")
next
'Response.Write(c2to16(Unicode))
if len(c2to16(Unicode))=4 then
ConvChinese=ConvChinese & chrw(int("&H" & c2to16(Unicode)))
else
ConvChinese=ConvChinese & chr(int("&H" & c2to16(Unicode)))
end if
next
end function
'二进制代码转换为十六进制代码
function c2to16(x)
i=1
for i=1 to len(x) step 4
c2to16=c2to16 & hex(c2to10(mid(x,i,4)))
next
end function
'二进制代码转换为十进制代码
function c2to10(x)
c2to10=0
if x="0" then exit function
i=0
for i= 0 to len(x) -1
if mid(x,len(x)-i,1)="1" then c2to10=c2to10+2^(i)
next
end function
'十六进制代码转换为二进制代码
function c16to2(x)
i=0
for i=1 to len(trim(x))
tempstr= c10to2(cint(int("&h" & mid(x,i,1))))
do while len(tempstr)<4
tempstr="0" & tempstr
loop
c16to2=c16to2 & tempstr
next
end function
'十进制代码转换为二进制代码
function c10to2(x)
mysign=sgn(x)
x=abs(x)
DigS=1
do
if x<2^DigS then
exit do
else
DigS=DigS+1
end if
loop
tempnum=x
i=0
for i=DigS to 1 step-1
if tempnum>=2^(i-1) then
tempnum=tempnum-2^(i-1)
c10to2=c10to2 & "1"
else
c10to2=c10to2 & "0"
end if
next
if mysign=-1 then c10to2="-" & c10to2
end function
%>
<%
function resolveRef(URLStringAll)
URLString=split(URLStringAll,"?")(1)
URLHead=split(URLStringAll,"?")(0)
if instr(URLHead,"http://www.baidu.com")>=1 or instr(URLHead,"http://www1.baidu.com")>=1 then
if getKw(URLString,"wd")="" then
kw=URLDecode(getKw(URLString,"word"))
else
kw=URLDecode(getKw(URLString,"wd"))
end if
pn=calculatePn(getPn(URLString,"pn"),1,0,10)
exit function
end if
if instr(URLHead,"http://image.baidu.com")>=1 then
kw=URLDecode(getKw(URLString,"word"))
pn=calculatePn(getPn(URLString,"pn"),1,0,16)
exit function
end if
if instr(URLHead,"http://cache.baidu.com")>=1 then
kw=URLDecode(getKw(URLString,"word"))
kw=replace(kw,";","")
kw=replace(kw,","," ")
exit function
end if
if instr(URLHead,"http://search.tom.com")>=1 then
if getKw(URLString,"word")="" then
kw=URLDecode(getKw(URLString,"w"))
else
kw=URLDecode(getKw(URLString,"word"))
end if
pn=calculatePn(getPn(URLString,"pn"),1,0,10)
exit function
end if
if instr(URLHead,"http://sou.china.com")>=1 then
kw=URLDecode(getKw(URLString,"query"))
pn=calculatePn(getPn(URLString,"pgNum"),0,0,10)
exit function
end if
if instr(URLHead,"sogou.com")>=1 then
kw=URLDecode(getKw(URLString,"query"))
pn=calculatePn(getPn(URLString,"page"),0,0,10)
exit function
end if
if instr(URLHead,"http://search.114.vnet.cn")>=1 then
kw=URLDecode(getKw(URLString,"kw"))
pn=calculatePn(getPn(URLString,"start"),1,0,10)
exit function
end if
if instr(URLHead,"http://seek.3721.com")>=1 then
kw=URLDecode(getKw(URLString,"name"))
if kw="" then kw=URLDecode(getKw(URLString,"p"))
pn=calculatePn(getPn(URLString,"page"),0,0,10)
exit function
end if
if instr(URLHead,"http://search.msn.com.cn")>=1 then
kw=UTF2GB(getKw(URLString,"q"))
pn=calculatePn(getPn(URLString,"first"),1,1,10)
exit function
end if
if instr(URLHead,"http://www.sj53.com")>=1 then
kw=URLDecode(getKw(URLString,"wd"))
pn=calculatePn(getPn(URLString,"pn"),1,0,10)
exit function
end if
if instr(URLHead,"iask.com")>=1 then
kw=URLDecode(getKw(URLString,"k"))
pn=calculatePn(getPn(URLString,"p"),0,0,10)
exit function
end if
if instr(URLHead,"http://www.baidugoo.com")>=1 then
kw=UTF2GB(getKw(URLString,"Keyword"))
pn=calculatePn(getPn(URLString,"pn"),0,0,10)
exit function
end if
if instr(URLHead,"yahoo.com")>=1 then
if instr(LCase(URLStringAll),"ei=utf-8")>=1 then
if getKw(URLString,"p")="" then
kw=UTF2GB(getKw(URLString,"keyword"))
else
kw=UTF2GB(getKw(URLString,"p"))
end if
pn=calculatePn(getPn(URLString,"b"),1,1,10)
else
if getKw(URLString,"p")="" then
kw=URLDecode(getKw(URLString,"keyword"))
else
kw=URLDecode(getKw(URLString,"p"))
end if
pn=calculatePn(getPn(URLString,"b"),1,1,10)
end if
exit function
end if
if instr(URLStringAll,"http://www.google")>=1 then
kw=UTF2GB(getKw(URLString,"q"))
pn=calculatePn(getPn(URLString,"start"),1,0,10)
exit function
end if
end function
%>