java jsoup 网络爬虫 学习例子(四)抓取网页连接插入mysql数据库

java jsoup 网络爬虫 学习例子(四)抓取网页连接插入mysql数据库javajsoup 网络爬虫学习例子 四 抓取网页连接插入 mysql 数据库 packagecom iteye injavawetrus jsoup importjava io IOException importjava util Iterator importorg jsoup Jsoup importorg jsoup nodes Document i

java jsoup 网络爬虫 学习例子(四) 抓取网页连接插入mysql数据库

package com.iteye.injavawetrust.jsoup; import java.io.IOException; import java.util.Iterator; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; / * * @author InJavaWeTrust * */ public class GetLink { private JsoupUtil ju = JsoupUtil.getInstance(); private DBUtil du = DBUtil.getInstance(); private Link link = new Link(); private String insertSql = ""; public void getLink(String url) { try { Document document = Jsoup.connect(url).timeout(5000).get(); Elements hrefs = document.select("a[href]"); Iterator 
  
    hrefIter = hrefs.iterator(); while (hrefIter.hasNext()) { Element href = hrefIter.next(); link.setId(ju.getUUID()); link.setUrlName(href.text()); link.setUrl(href.attr("href")); insertSql = ju.getInsertSql(link); du.insert(insertSql); } Elements srcs = document.select("img[src]"); Iterator 
   
     srcIter = srcs.iterator(); while(srcIter.hasNext()){ Element src = srcIter.next(); link.setId(ju.getUUID()); link.setUrlName(src.attr("alt")); link.setUrl(src.attr("src")); insertSql = ju.getInsertSql(link); du.insert(insertSql); } Elements opts = document.select("option[value]"); Iterator 
    
      optIter = opts.iterator(); while(optIter.hasNext()){ Element opt = optIter.next(); link.setId(ju.getUUID()); link.setUrlName(opt.text()); link.setUrl(opt.attr("value")); insertSql = ju.getInsertSql(link); du.insert(insertSql); } Elements links = document.select("link[href]"); Iterator 
     
       linkIter = links.iterator(); while(linkIter.hasNext()){ Element li = linkIter.next(); link.setId(ju.getUUID()); link.setUrlName(li.text()); link.setUrl(li.attr("href")); insertSql = ju.getInsertSql(link); du.insert(insertSql); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { new GetLink().getLink(Constants.URL); } } package com.iteye.injavawetrust.jsoup; import java.io.Serializable; import java.util.Date; / * * @author InJavaWeTrust * */ public class Link implements Serializable{ private static final long serialVersionUID = 1165098694307553167L; / * ID */ private String id; / * link name */ private String urlName; / * link url */ private String url; / * insert db date */ private Date date; public String getId() { return id; } public void setId(String id) { this.id = id; } public String getUrlName() { return urlName; } public void setUrlName(String urlName) { this.urlName = urlName; } public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } public Date getDate() { return date; } public void setDate(Date date) { this.date = date; } } package com.iteye.injavawetrust.jsoup; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; / * * @author InJavaWeTrust * */ public class DBUtil { private static Connection conn = null; private static Statement st = null; private static ResultSet rs = null; private DBUtil() { } private static final DBUtil instance = new DBUtil(); public static DBUtil getInstance() { return instance; } / * 连接数据库 * * @return */ public Connection connection() { try { Class.forName(Constants.DRIVER); } catch (ClassNotFoundException e1) { e1.printStackTrace(); } try { conn = DriverManager.getConnection(Constants.DBURL, Constants.USER, Constants.PASSWORD); } catch (SQLException e) { e.printStackTrace(); } return conn; } / * 关闭连接 * * @param rs * @param st * @param conn */ public void release(ResultSet rs, Statement st, Connection conn) { try { try { if (null != rs) { rs.close(); } } catch (Exception e) { rs = null; } try { if (null != st) { st.close(); } } catch (Exception e) { st = null; } try { if (null != conn) { conn.close(); } } catch (Exception e) { conn = null; } } finally { rs = null; st = null; conn = null; } } / * 插入 * @param sql */ public void insert(String sql){ try{ DBUtil.getInstance().connection(); st = conn.createStatement(); st.execute(sql); DBUtil.getInstance().release(rs, st, conn); }catch(Exception e){ e.printStackTrace(); } } } package com.iteye.injavawetrust.jsoup; / * * @author InJavaWeTrust * */ public class Constants { / * mysql 驱动 */ public static final String DRIVER = "com.mysql.jdbc.Driver"; / * 链接 */ public static final String DBURL = "jdbc:mysql://localhost:3306/jsoupdb?useUnicode=true&characterEncoding=utf-8"; / * username */ public static final String USER = "root"; / * password */ public static final String PASSWORD = "root"; / * 随便找的一个URL */ public static final String URL = "http://www.hrbhuade.net/html/main/index.htm"; } package com.iteye.injavawetrust.jsoup; import java.util.UUID; / * * @author InJavaWeTrust * */ public class JsoupUtil { private JsoupUtil() { } private static final JsoupUtil instance = new JsoupUtil(); public static JsoupUtil getInstance() { return instance; } / * 得到UUID * @return 32位UUID */ public String getUUID() { String s = UUID.randomUUID().toString(); return s.substring(0, 8) + s.substring(9, 13) + s.substring(14, 18) + s.substring(19, 23) + s.substring(24); } / * insert sql * @param link Link obj * @return sql */ public String getInsertSql(Link link) { return "insert into link (id, urlname, url, date) values ('" + link.getId() + "','" + link.getUrlName() + "','" + link.getUrl() + "',NOW())"; } } 
      
     
    
  

link 表

DROP TABLE IF EXISTS `link`; CREATE TABLE `link` ( `id` varchar(32) NOT NULL, `urlname` varchar(200) DEFAULT NULL, `url` varchar(200) DEFAULT NULL, `date` datetime DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请联系我们举报,一经查实,本站将立刻删除。

发布者:全栈程序员-站长,转载请注明出处:https://javaforall.net/226244.html原文链接:https://javaforall.net

(0)
上一篇 2026年3月17日 上午7:31
下一篇 2026年3月17日 上午7:31


相关推荐

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注

关注全栈程序员社区公众号