asp采集抓取网上房产信息的代码_ASP教程-查字典教程网
asp采集抓取网上房产信息的代码
asp采集抓取网上房产信息的代码
发布时间:2016-12-29 来源:查字典编辑
摘要:复制代码代码如下:UntitledDocument=15)thenexitforendifNextSetregEx=nothingSetMa...

复制代码 代码如下:

<%@LANGUAGE="VBSCRIPT"CODEPAGE="936"%>

<>

<>

<!DOCTYPEHTMLPUBLIC"-//W3C//DTDHTML4.01Transitional//EN"

"http://www.w3.org/TR/html4/loose.dtd">

<html>

<head>

<title>UntitledDocument</title>

<metahttp-equiv="Content-Type"content="text/html;charset=gb2312">

<metahttp-equiv="refresh"content="300;URL=steal_house.asp">

</head>

<body>

<%

onerrorresumenext

'

Server.ScriptTimeout=999999

'========================================================

'字符编码函数

'====================================================

FunctionBytesToBstr(body,code)

dimobjstream

setobjstream=Server.CreateObject("adodb.stream")

objstream.Type=1

objstream.Mode=3

objstream.Open

objstream.Writebody

objstream.Position=0

objstream.Type=2

objstream.Charset=code

BytesToBstr=objstream.ReadText

objstream.Close

setobjstream=nothing

EndFunction

'取行字符串在另一字符串中的出现位置

FunctionNewstring(wstr,strng)

Newstring=Instr(lcase(wstr),lcase(strng))

ifNewstring<=0thenNewstring=Len(wstr)

EndFunction

'替换字符串函数

functionReplaceStr(ori,str1,str2)

ReplaceStr=replace(ori,str1,str2)

endfunction

'====================================================

functionReadXml(url,code,start,ends)

setoSend=createobject("Microsoft.XMLHTTP")

SourceCode=oSend.open("GET",url,false)

oSend.send()

ReadXml=BytesToBstr(oSend.responseBody,code)

start=Instr(ReadXml,start)

ReadXml=mid(ReadXml,start)

ends=Instr(ReadXml,ends)

ReadXml=left(ReadXml,ends-1)

endfunction

functionSubStr(body,start,ends)

start=Instr(body,start)

SubStr=mid(body,start+len(start)+1)

ends=Instr(SubStr,ends)

SubStr=left(SubStr,ends-1)

endfunction

dimgetcont,NewsContent

dimurl,title

url="http://www.***.com"'新闻网址

getcont=ReadXml(url,"gb2312","<tableclass=k2border=""0""","</table>")

getcont=RegexHtml(getcont)

dimKeyId,NewsClass,City,Position,HouseType,Level,Area,Price,Demostra

dimContactMan,Contact

fori=2toubound(getcont)

response.Write(getcont(i)&"__<br>")

tempLink=mid(getcont(i),instr(getcont(i),"href=""")+6,instr(getcont(i),"""

onClick")-10)

tempLink=replace(tempLink,"../","")

response.Write(i&":"&tempLink&"<br>")

NewsContent=ReadXml(tempLink,"gb2312","<tdvalign=""bottom""

width=""400"">","<hrwidth=""760""

noshadesize=""1""color=""#808080"">

")

NewsContent=RemoveHtml(NewsContent)

NewsContent=replace(NewsContent,VbCrLf,"")

NewsContent=replace(NewsContent,vbNewLine,"")

NewsContent=replace(NewsContent,"","")

NewsContent=replace(NewsContent,"","")

NewsContent=replace(NewsContent,"","")

NewsContent=replace(NewsContent,"n","")

NewsContent=replace(NewsContent,chr(10),"")

NewsContent=replace(NewsContent,chr(13),"")

'===============getContent=======================

response.Write(NewsContent)

KeyId=SubStr(NewsContent,"列号:","信息类别:")

NewsClass=SubStr(NewsContent,"类别:","所在城市:")

City=SubStr(NewsContent,"城市:","房屋具体位置:")

Position=SubStr(NewsContent,"位置:","房屋类型:")

HouseType=SubStr(NewsContent,"类型:","楼层:")

Level=SubStr(NewsContent,"楼层:","使用面积:")

Area=SubStr(NewsContent,"面积:","房价:")

Price=SubStr(NewsContent,"房价:","其他说明:")

Demostra=SubStr(NewsContent,"说明:","联系人:")

ContactMan=SubStr(NewsContent,"联系人:","联系方式:")

Contact=SubStr(NewsContent,"联系方式:","信息来源:")

response.Write("总序列号:"&KeyId&"<br>")

response.Write("信息类别:"&NewsClass&"<br>")

response.Write("所在城市:"&City&"<br>")

response.Write("房屋具体位置:"&Position&"<br>")

response.Write("房屋类型:"&HouseType&"<br>")

response.Write("楼层:"&Level&"<br>")

response.Write("使用面积:"&Area&"<br>")

response.Write("房价:"&Price&"<br>")

response.Write("其他说明:"&Demostra&"<br>")

response.Write("联系人:"&ContactMan&"<br>")

response.Write("联系方式:"&Contact&"<br>")

'title=RemoveHTML(aa(i))

'response.Write("title:"&title)

forn=0toapplication.Contents.count

if(application.Contents(n)=KeyId)then

ifexit=true

endif

next

ifnotifexitthen

application(time&i)=KeyId

'添加到数据库

'====================================================

setrs=server.CreateObject("adodb.recordset")

rs.open"selecttop1*fromnewsorderbyiddesc",conn,3,3

rs.addnew

rs("NewsClass")=NewsClass

rs("City")=City

rs("Position")=Position

rs("HouseType")=HouseType

rs("Level")=Level

rs("Area")=Area

rs("Price")=Price

rs("Demostra")=Demostra

rs("ContactMan")=ContactMan

rs("Contact")=Contact

rs.update

rs.close

setrs=nothing

endif

'==================================================

next

functionRemoveTag(body)

SetregEx=NewRegExp

regEx.Pattern="<[a].*?</[a]>"

regEx.IgnoreCase=True

regEx.Global=True

SetMatches=regEx.Execute(body)

dimi,arr(15),ifexit

i=0

j=0

ForEachMatchinMatches

TempStr=Match.Value

TempStr=replace(TempStr,"<td>","")

TempStr=replace(TempStr,"</td>","")

TempStr=replace(TempStr,"<tr>","")

TempStr=replace(TempStr,"</tr>","")

arr(i)=TempStr

i=i+1

if(i>=15)then

exitfor

endif

Next

SetregEx=nothing

SetMatches=nothing

RemoveTag=arr

endfunction

functionRegexHtml(body)

dimr_arr(47),r_temp

SetregEx2=NewRegExp

regEx2.Pattern="<a.*?</a>"

regEx2.IgnoreCase=True

regEx2.Global=True

SetMatches2=regEx2.Execute(body)

iii=0

ForEachMatchinMatches2

r_arr(iii)=Match.Value

iii=iii+1

Next

RegexHtml=r_arr

setregEx2=nothing

setMatches2=nothing

endfunction

'======================================================

conn.close

setconn=nothing

%>

</body>

</html>

相关阅读
推荐文章
猜你喜欢
附近的人在看
推荐阅读
拓展阅读
  • 大家都在看
  • 小编推荐
  • 猜你喜欢
  • 最新ASP教程学习
    热门ASP教程学习
    编程开发子分类