可以在此基础上结合正则表达式做成更好的效果,希望大家能分享一下Xmlhttp的session共享技术
<html>
<head>
<title>AUTOGET</title>
<metahttp-equiv="Content-Type"content="text/html;charset=gb2312">
</head>
<bodybgcolor="#FFFFFF">
<%
'=================================================
'FileName:Getit.Asp
'Intro:AutoGetDataFromRemoteWebSite
'Author:Babyt(阿泰)
'URL:http://blog.csdn.net/babyt
'createAt:2002-02Lastupdate:2004-09
'DBTable:data
'TableField:
'UID->Long->KeepIDOfthepages
'UContent->Text->KeepContentOfthePages(HTML)
'=================================================
Server.ScriptTimeout=5000
'onerrorresumenext
Setconn=Server.createObject("ADODB.Connection")
conn.open"Provider=Microsoft.Jet.OLEDB.4.0;DataSource="&Server.MapPath("getit.mdb")
Setrs=Server.createObject("ADODB.Recordset")
sql="select*fromdata"
rs.opensql,conn,1,3
DimcomeFrom,myErr,myCount
'========================================================
comeFrom="http://www.xxx.com/U.asp?ID="
myErr1="该资料不存在"
myErr2="该资料已隐藏"
'========================================================
'***************************************************************
'只需要更改这里i的始点intMin和终点intMax,设定步长intStep
'每次区间设置成5万左右。估计要两个多小时。期间不需要人工干预
'****************************************************************
intMin=0
intMax=10000
'设定步长
intStep=100
'==========================================================
'以下代码不要更改
'==========================================================
CallGetPart(intMin)
Response.write"已经转换完成"&intMin&"~~"&intMax&"之间的数据"
rs.close
Setrs=Nothing
conn.Close
setconn=nothing
%>
</body>
</html>
<%
'使用XMLHTTP抓取地址并进次内容处理
FunctionGetBody(Url)
DimobjXML
OnErrorResumeNext
SetobjXML=createObject("Microsoft.XMLHTTP")
WithobjXML
.Open"Get",Url,False,"",""
.Send
GetBody=.ResponseBody
EndWith
GetBody=BytesToBstr(GetBody,"GB2312")
SetobjXML=Nothing
EndFunction
'使用Adodb.Stream处理二进制数据
FunctionBytesToBstr(strBody,CodeBase)
dimobjStream
setobjStream=Server.createObject("Adodb.Stream")
objStream.Type=1
objStream.Mode=3
objStream.Open
objStream.WritestrBody
objStream.Position=0
objStream.Type=2
objStream.Charset=CodeBase
BytesToBstr=objStream.ReadText
objStream.Close
setobjStream=nothing
EndFunction
'主函数
FunctionGetPart(iStart)
DimiGo
time1=timer()
myCount=0
ForiGo=iStartToiStart+intStep
IfiGo<=intMaxThen
Response.ExecutecomeFrom&iGo
'进行简单的数据处理
content=GetBody(comeFrom&iGo)
content=Replace(content,chr(34),""")
Ifinstr(content,myErr1)ORinstr(content,myErr2)Then
'跳过错误信息
Else
'写入数据库
rs.AddNew
rs("UID")=iGo
'********************************
rs("UContent")=Replace(content,""",chr(34))
'*********************************
rs.update
myCount=myCount+1
Response.WriteiGo&"<BR>"
Response.Flush
EndIf
Else
Response.write"<fontcolor=red>成功抓取"&myCount&"条记录,"
time2=timer()
Response.write"耗时:"&Int(FormatNumber((time2-time1)*1000000,3))&"秒</font><BR>"
Response.Flush
ExitFunction
EndIf
Next
Response.write"<fontcolor=red>成功抓取"&myCount&"条记录,"
time2=timer()
Response.write"耗时:"&CInt(FormatNumber((time2-time1),3))&"秒</font><BR>"
Response.Flush
'递归
GetPart(iGo+1)
EndFunction%>