import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class TencentVideo { public static void getTencentVideoName(String url) { List<Map<String,String>> resultList = new ArrayList<Map<String,String>>(); Document document = null; int pageSize = 30; int index = 1; try { for(int i = 0 ; i < 167; i ++) { String urlget = url + (i*pageSize); Thread.sleep(1000); System.out.println("URL:" + urlget.toString()); document = Jsoup.connect(urlget).userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36") //加上cookie信息 .cookie("auth", "token") //设置超时 .timeout(30000) //用get()方式请求网址,也可以post()方式 .get(); Elements elements = document.select("li.list_item"); if(elements == null || "".equals(elements.toString())) { break; } for (Element ele : elements) { Map<String,String> obj = new HashMap<String,String>(); Elements name = ele.select("strong.figure_title"); String nameStr = name.select("a").attr("title"); String leader = ele.select("div.figure_desc").text(); String count = ele.select("div.figure_count").text(); String remark = ele.select("span.figure_info").text(); String score = ele.select("div.figure_score").text(); System.out.println("================== " + index + " ====================="); System.out.println("名称:" + nameStr.toString()); System.out.println("主演:" + leader.toString()); System.out.println("评分:" + score.toString()); System.out.println("描述:" + remark.toString()); System.out.println("点播量:" + count.toString()); obj.put("name", nameStr); obj.put("lead", leader); obj.put("desc", remark); obj.put("score", score); obj.put("dianbo", count); resultList.add(obj); index ++; } } new ExportExcel().exportTencentExcle(resultList); } catch (IOException e) { e.printStackTrace(); }catch (Exception ae) { ae.printStackTrace(); } } public static void main(String[] args) { //电影 //getTencentVideoName("http://v.qq.com/x/list/movie?itype=-1&offset="); //电视剧 // getTencentVideoName("http://v.qq.com/x/list/tv?feature=-1&offset="); //动漫 // getTencentVideoName("http://v.qq.com/x/list/cartoon?itype=-1&offset="); //少儿 // getTencentVideoName("http://v.qq.com/x/list/child?iarea=-1&offset="); //综艺 // getTencentVideoName("http://v.qq.com/x/list/variety?exclusive=-1&offset="); //演唱会 // getTencentVideoName("http://v.qq.com/x/list/music?istate=2&offset="); //纪录片 // getTencentVideoName("http://v.qq.com/x/list/doco?itrailer=-1&offset="); //电影独播 // getTencentVideoName("https://v.qq.com/x/list/movie?characteristic=5&offset="); //电视剧独播 getTencentVideoName("https://v.qq.com/x/list/tv?feature=44&offset="); } }
来源:博客园
作者:julian_chang
链接:https://www.cnblogs.com/julian-chang/p/11818397.html