java 抓取网页内容实现代码
java 抓取网页内容实现代码
发布时间:2016-12-28 来源:查字典编辑
摘要:复制代码代码如下:packagetest;importjava.io.BufferedReader;importjava.io.IOExce...

复制代码 代码如下:

package test;

import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.net.Authenticator;

import java.net.HttpURLConnection;

import java.net.PasswordAuthentication;

import java.net.URL;

import java.net.URLConnection;

import java.util.Properties;

public class URLTest {

// 一个public方法,返回字符串,错误则返回"error open url"

public static String getContent(String strUrl) {

try {

URL url = new URL(strUrl);

BufferedReader br = new BufferedReader(new InputStreamReader(url

.openStream()));

String s = "";

StringBuffer sb = new StringBuffer("");

while ((s = br.readLine()) != null) {

sb.append(s + "/r/n");

}

br.close();

return sb.toString();

} catch (Exception e) {

return "error open url:" + strUrl;

}

}

public static void initProxy(String host, int port, final String username,

final String password) {

Authenticator.setDefault(new Authenticator() {

protected PasswordAuthentication getPasswordAuthentication() {

return new PasswordAuthentication(username,

new String(password).toCharArray());

}

});

System.setProperty("http.proxyType", "4");

System.setProperty("http.proxyPort", Integer.toString(port));

System.setProperty("http.proxyHost", host);

System.setProperty("http.proxySet", "true");

}

public static void main(String[] args) throws IOException {

String url = "http://www.jb51.net";

String proxy = "http://192.168.22.81";

int port = 80;

String username = "username";

String password = "password";

String curLine = "";

String content = "";

URL server = new URL(url);

initProxy(proxy, port, username, password);

HttpURLConnection connection = (HttpURLConnection) server

.openConnection();

connection.connect();

InputStream is = connection.getInputStream();

BufferedReader reader = new BufferedReader(new

InputStreamReader(is));

while ((curLine = reader.readLine()) != null) {

content = content + curLine+ "/r/n";

}

System.out.println("content= " + content);

is.close();

System.out.println(getContent(url));

}

}

推荐文章
猜你喜欢
附近的人在看
推荐阅读
拓展阅读
相关阅读
网友关注
最新Java学习
热门Java学习
编程开发子分类