Java获取网络文件并插入数据库的代码
Java获取网络文件并插入数据库的代码
发布时间:2016-12-28 来源:查字典编辑
摘要:获取百度的歌曲名,歌手和链接!!复制代码代码如下:packagewebTools;importjava.io.BufferedReader;...

获取百度的歌曲名,歌手和链接!!

复制代码 代码如下:

package webTools;

import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStreamReader;

import java.io.UnsupportedEncodingException;

import java.net.MalformedURLException;

import java.net.URL;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.List;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

import dbTools.DBTools;

public class IOTOWeb {

public String getHtmlContent(String htmlURL) {

URL url = null;

String rowContent = "";

StringBuffer htmlContent = new StringBuffer();

try {

url = new URL(htmlURL);

BufferedReader in = new BufferedReader(new InputStreamReader(url

.openStream(), "gb2312"));

while ((rowContent = in.readLine()) != null) {

htmlContent.append(rowContent);

}

in.close();

} catch (MalformedURLException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} catch (UnsupportedEncodingException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

return htmlContent.toString();

}

public List getLink(String htmlContent) {

ArrayList listLink = new ArrayList();

String regex = "<td[^>]*>[(]*<a[^>]*href=("([^"]*)"|'([^']*)'|([^s>]*))[^>]*>(.*?)[)]*[s]*</td>";

Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);

Matcher matcher = pattern.matcher(htmlContent);

while (matcher.find()) {

listLink.add(matcher.group());

}

return listLink;

}

public List<String> getHref(String htmlContent) {

String regex;

List listtHref = new ArrayList();

regex = "href=("([^"]*)"|'([^']*)'|([^s>]*))"";

Pattern pa = Pattern.compile(regex, Pattern.DOTALL);

Matcher ma = pa.matcher(htmlContent);

while (ma.find()) {

listtHref.add(ma.group().replaceFirst("href="", "").replace(""",

""));

}

return listtHref;

}

public List<String> getPerson(String htmlContent) {

String regex;

List list = new ArrayList();

regex = "(<a[^>]*href=("([^"]*)"|'([^']*)'|([^s>]*))[^>]*>(.*?))";

Pattern pa = Pattern.compile(regex, Pattern.DOTALL);

Matcher ma = pa.matcher(htmlContent);

while (ma.find()) {

list.add(ma.group().replaceFirst("href="", "").replace(""", ""));

}

return list;

}

public List<String> getSongName(String htmlContent) {

String regex;

List listPerson = new ArrayList();

regex = "<a[^>]*href=("([^"]*)"|'([^']*)'|([^s>]*))[^>]*>(.*?)</a>s";

Pattern pa = Pattern.compile(regex, Pattern.DOTALL);

Matcher ma = pa.matcher(htmlContent);

while (ma.find()) {

listPerson.add(ma.group());

}

return listPerson;

}

public String getMainContent(String htmlContent) {

String regex = "<table width="100%" align="center" cellpadding="0" cellspacing="0">(.*?)</table>";

StringBuffer mainContent = new StringBuffer();

Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);

Matcher matcher = pattern.matcher(htmlContent);

while (matcher.find()) {

mainContent.append(matcher.group());

}

return mainContent.toString();

}

public String outTag(final String s) {

return s.replaceAll("<.*?>", "");

}

DBTools dbTools = new DBTools();

public void getFromBaiduMap3(String htmlURL) throws Throwable {

HashMap htmlContentMap = new HashMap();

String htmlContent = getHtmlContent(htmlURL);

String mainContent = getMainContent(htmlContent);

List listLink = getLink(mainContent);

for (int j = 0; j < listLink.size(); j++) {

String tdTag = listLink.get(j).toString();

List songNameList = getSongName(tdTag);

String songName = outTag(songNameList.get(0).toString());

List personList = getPerson(tdTag);

String songPerson = "";

if (personList.size() != 0) {

for (int n = 0; n < personList.size(); n++) {

// System.out.println(personList.get(n).toString());

songPerson = outTag(personList.get(n).toString());

}

} else {

songPerson = "无";

}

// System.out.print(songNameList.get(0).toString());

List hrefList = getHref(songNameList.get(0).toString());

String songHref = hrefList.get(0).toString();

System.out.println();

String sql = "insert into song(songName,songPerson,songHref) values(?,?,?)";

ArrayList list_values = new ArrayList();

list_values.add(songName);

list_values.add(songPerson);

list_values.add(songHref);

dbTools.update(sql, list_values);

}

}

}

DBTools数据库链接类:

复制代码 代码如下:

package dbTools;

import java.util.ArrayList;

import java.sql.*;

public class DBTools {

private PreparedStatement preparedStatement;

private ResultSet resultSet;

private Connection connection;

public DBTools() {

try {

Class.forName("com.mysql.jdbc.Driver");

} catch (ClassNotFoundException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

try {

connection = DriverManager.getConnection(

"jdbc:mysql://localhost:3306/TestURL", "root", "zhuyi");

} catch (SQLException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

}

public ArrayList query(String sql, ArrayList list_values) throws Throwable {

ArrayList listRows = new ArrayList();

preparedStatement = connection.prepareStatement(sql);

for (int i = 0; i < list_values.size(); i++) {

preparedStatement.setObject(i + 1, list_values.get(i));

}

resultSet = preparedStatement.executeQuery();

while (resultSet.next()) {

String[] rowinfo = new String[resultSet.getMetaData()

.getColumnCount()];

for (int i = 0; i < rowinfo.length; i++) {

rowinfo[i] = resultSet.getString(i + 1);

}

listRows.add(rowinfo);

}

return listRows;

}

public void update(String sql, ArrayList list_values) throws Throwable {

preparedStatement = connection.prepareStatement(sql);

for (int i = 0; i < list_values.size(); i++) {

preparedStatement.setObject(i + 1, list_values.get(i));

}

preparedStatement.executeUpdate();

preparedStatement.close();

}

}

Servlet调用:

复制代码 代码如下:

package controller;

import java.io.IOException;

import java.io.PrintWriter;

import java.util.List;

import javax.servlet.ServletException;

import javax.servlet.http.HttpServlet;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

import webTools.IOTOWeb;

public class TestURL extends HttpServlet {

/**

* Constructor of the object.

*/

public TestURL() {

super();

}

/**

* Destruction of the servlet. <br>

*/

public void destroy() {

super.destroy(); // Just puts "destroy" string in log

// Put your code here

}

/**

* The doGet method of the servlet. <br>

*

* This method is called when a form has its tag value method equals to get.

*

* @param request

* the request send by the client to the server

* @param response

* the response send by the server to the client

* @throws ServletException

* if an error occurred

* @throws IOException

* if an error occurred

*/

public void doGet(HttpServletRequest request, HttpServletResponse response)

throws ServletException, IOException {

try {

IOTOWeb iotoWeb = new IOTOWeb();

iotoWeb.getFromBaiduMap3("http://list.mp3.baidu.com/topso/mp3topsong.html?id=1?top2");

} catch (Throwable e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

}

/**

* The doPost method of the servlet. <br>

*

* This method is called when a form has its tag value method equals to

* post.

*

* @param request

* the request send by the client to the server

* @param response

* the response send by the server to the client

* @throws ServletException

* if an error occurred

* @throws IOException

* if an error occurred

*/

public void doPost(HttpServletRequest request, HttpServletResponse response)

throws ServletException, IOException {

response.setContentType("text/html");

PrintWriter out = response.getWriter();

out

.println("<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">");

out.println("<HTML>");

out.println(" <HEAD><TITLE>A Servlet</TITLE></HEAD>");

out.println(" <BODY>");

out.print(" This is ");

out.print(this.getClass());

out.println(", using the POST method");

out.println(" </BODY>");

out.println("</HTML>");

out.flush();

out.close();

}

/**

* Initialization of the servlet. <br>

*

* @throws ServletException

* if an error occurs

*/

public void init() throws ServletException {

// Put your code here

}

}

获取金书网的图书名:

复制代码 代码如下:

package webTools;

import java.io.BufferedReader;

import java.io.InputStreamReader;

import java.net.URL;

import java.util.ArrayList;

import java.util.List;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

import dbTools.DBTools;

public class GetBook {

public String getHtmlContent(String htmlURL) throws Throwable {

URL url = null;

String rowContent = "";

StringBuffer htmlContent = new StringBuffer();

url = new URL(htmlURL);

BufferedReader in = new BufferedReader(new InputStreamReader(url

.openStream(), "gb2312"));

while ((rowContent = in.readLine()) != null) {

htmlContent.append(rowContent);

}

in.close();

return htmlContent.toString();

}

public String getBookName(String htmlContent) {

String bookName = "";

String regex = "<span>[^>]*</span>";

Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);

Matcher matcher = pattern.matcher(htmlContent);

if (matcher.find()) {

bookName = matcher.group();

}

return bookName;

}

public String outTag(final String s) {

return s.replaceAll("<.*?>", "");

}

DBTools dbtools = new DBTools();

public void getFromJINSHU(String htmlURL) throws Throwable {

String htmlContent = getHtmlContent(htmlURL);

String bookName = outTag(getBookName(htmlContent));

if (bookName != null && !"".equals(bookName)) {

System.out.println(bookName);

String sql = "insert into bookinfo(bookName) values(?)";

ArrayList list_values = new ArrayList();

list_values.add(bookName);

dbtools.update(sql, list_values);

}

}

}

调用Servlet:

复制代码 代码如下:

package controller;

import java.io.IOException;

import java.io.PrintWriter;

import javax.servlet.ServletException;

import javax.servlet.http.HttpServlet;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

import webTools.GetBook;

public class TestBook extends HttpServlet {

/**

* Constructor of the object.

*/

public TestBook() {

super();

}

/**

* Destruction of the servlet. <br>

*/

public void destroy() {

super.destroy(); // Just puts "destroy" string in log

// Put your code here

}

/**

* The doGet method of the servlet. <br>

*

* This method is called when a form has its tag value method equals to get.

*

* @param request

* the request send by the client to the server

* @param response

* the response send by the server to the client

* @throws ServletException

* if an error occurred

* @throws IOException

* if an error occurred

*/

int i = 1;

public void doGet(HttpServletRequest request, HttpServletResponse response)

throws ServletException, IOException {

GetBook bookinfo = new GetBook();

for (; i < 10000; i++) {

String bookURL = "http://www.golden-book.com/booksinfo/12/" + i

+ ".html";

try {

bookinfo.getFromJINSHU(bookURL);

} catch (Throwable e) {

i++;

doPost(request, response);

}

}

}

/**

* The doPost method of the servlet. <br>

*

* This method is called when a form has its tag value method equals to

* post.

*

* @param request

* the request send by the client to the server

* @param response

* the response send by the server to the client

* @throws ServletException

* if an error occurred

* @throws IOException

* if an error occurred

*/

public void doPost(HttpServletRequest request, HttpServletResponse response)

throws ServletException, IOException {

GetBook bookinfo = new GetBook();

for (; i < 10000; i++) {

String bookURL = "http://www.golden-book.com/booksinfo/12/" + i

+ ".html";

try {

bookinfo.getFromJINSHU(bookURL);

} catch (Throwable e) {

i++;

doGet(request, response);

}

}

}

/**

* Initialization of the servlet. <br>

*

* @throws ServletException

* if an error occurs

*/

public void init() throws ServletException {

// Put your code here

}

}

每种功能的实现方法有很多,希望各位可以交流不同的思想和方法。可以加QQ412546724。呵呵

推荐文章
猜你喜欢
附近的人在看
推荐阅读
拓展阅读
相关阅读
网友关注
最新Java学习
热门Java学习
编程开发子分类