从搜狐下在每日交易数据的爬虫程序

谁说胖子不能爱 提交于 2020-05-08 10:55:04

网易不行有搜狐,搜狐提供的每日股票交易数据可比网易的强多了,近四千支股票4月的交易数据八万余条一气呵成.看来以后要靠它当主力.

程序:

package com.ufo.hy.agumaster.crawler.daytransact;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.ufo.hy.agumaster.entity.DayTransact;

public class SohuDTCrawler {
    private List<DayTransact> dtList;
    
    public List<DayTransact> getDtList(){
        return dtList;
    }
    
    public void download(String originalCode,String name,String fromDate,String toDate) {
        dtList=new ArrayList<>();
        try {
            Document doc=Jsoup.connect(getReqUrl(originalCode,fromDate,toDate)).ignoreContentType(true)
                    .data("query", "Java")
                    .userAgent("Mozilla")
                    .cookie("auth", "token")
                    .timeout(30000)
                    .get();
            String rawText=doc.text();
            String json=rawText.substring(22, rawText.length()-2);
            
            ObjectMapper mapper = new ObjectMapper();
            JsonNode node = mapper.readTree(json);
            
            JsonNode listNode=node.path("hq");
            Iterator<JsonNode> iterator = listNode.elements();

            while (iterator.hasNext()) {
                JsonNode transNode = iterator.next();
                
                DayTransact dt=new DayTransact();
                dt.setCode(originalCode);
                dt.setName(name);
                dt.setDay(transNode.get(0).asText());
                dt.setTopen(Double.parseDouble(transNode.get(1).asText()));
                dt.setTclose(Double.parseDouble(transNode.get(2).asText()));
                dt.setChg(Double.parseDouble(transNode.get(3).asText()));
                dt.setPchg(Double.parseDouble(transNode.get(4).asText().replace("%", "")));
                dt.setLow(Double.parseDouble(transNode.get(5).asText()));
                dt.setHigh(Double.parseDouble(transNode.get(6).asText()));
                dt.setVoturnover(Long.parseLong(transNode.get(7).asText()));
                dt.setVaturnover(Double.parseDouble(transNode.get(8).asText()));
                dt.setTurnover(Double.parseDouble(transNode.get(9).asText().replace("%", "")));
                
                dtList.add(dt);
            }
        }catch(Exception ex) {
            ex.printStackTrace();
        }
    }
    
    private String getReqUrl(String code,String startDate,String endDate) {
        return "http://q.stock.sohu.com/hisHq?code=cn_"+code+"&start="+startDate+"&end="+endDate+"&stat=1&order=D&period=d&callback=historySearchHandler&rt=jsonp";
    }
    
    public static void main(String[] args) {
        SohuDTCrawler n=new SohuDTCrawler();
        n.download("002101","广东鸿图", "20200401", "20200410");
        
        for(DayTransact dt:n.getDtList()) {
            System.out.println(dt);
        }
    }
}

用到的实体类:

package com.ufo.hy.agumaster.entity;

import com.fasterxml.jackson.databind.JsonNode;

/**
 * 每日交易数据实体类
 * @author ufo
 *
 */
public class DayTransact {
    private long    id;            // ID
    private String  day;        // 日期
    private String  code;        // 代号
    private String  name;        // 名称
    private double  tclose;        // 收盘价
    private double  high;        // 最高价
    private double  low;        // 最低价
    private double  topen;        // 开盘价
    private double  lclose;        // 前日收盘价
    private double  chg;        // 涨跌额
    private double  pchg;        // 涨跌幅
    private double  turnover;    // 换手率
    private long    voturnover;    // 成交量
    private double  vaturnover;    // 成交金额
    private double  tcap;        // 总市值
    private double  mcap;        // 流通市值
    
    public DayTransact() {
        
    }
    
    public DayTransact(JsonNode transNode) {
        
    }
    
    public DayTransact(String[] arr) {
        if(arr.length!=15) {
            throw new ArrayIndexOutOfBoundsException("Array size should be 15 but now it is "+arr.length);
        }
        
        String dataLine=String.join(",", arr);
        
        day=arr[0];

        try {
            tclose=Double.parseDouble(arr[3]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get tclose from string:"+arr[3]+" dataLine:"+dataLine);
        }
        
        try {
            high=Double.parseDouble(arr[4]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get high from string:"+arr[4]+" dataLine:"+dataLine);
        }
        
        try {
            low=Double.parseDouble(arr[5]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get low from string:"+arr[5]+" dataLine:"+dataLine);
        }
        
        try {
            topen=Double.parseDouble(arr[6]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get topen from string:"+arr[6]+" dataLine:"+dataLine);
        }
        
        try {
            lclose=Double.parseDouble(arr[7]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get lclose from string:"+arr[7]+" dataLine:"+dataLine);
        }
    
        try {
            chg=Double.parseDouble(arr[8]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get chg from string:"+arr[8]+" dataLine:"+dataLine);
        }
        
        try {
            pchg=Double.parseDouble(arr[9]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get pchg from string:"+arr[9]+" dataLine:"+dataLine);
        }
        
        try {
            turnover=Double.parseDouble(arr[10]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get turnover from string:"+arr[10]+" dataLine:"+dataLine);
        }
        
        try {
            voturnover=Long.parseLong(arr[11]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get voturnover from string:"+arr[11]+" dataLine:"+dataLine);
        }
        
        try {
            vaturnover=Double.parseDouble(arr[12]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get vaturnover from string:"+arr[12]+" dataLine:"+dataLine);
        }
        
        try {
            tcap=Double.parseDouble(arr[13]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get tcap from string:"+arr[13]+" dataLine:"+dataLine);
        }
        
        try {
            mcap=Double.parseDouble(arr[14]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get mcap from string:"+arr[14]+" dataLine:"+dataLine);
        }
    
    }
    
    public String toString() {
        StringBuilder sb=new StringBuilder();
        sb.append("id:"+id);
        sb.append(" 日期day:"+day);
        sb.append(" 代号code:"+code);
        sb.append(" 名称name:"+name);
        sb.append(" 收盘价tclose:"+tclose);
        sb.append(" 最高价high:"+high);
        sb.append(" 最低价low:"+low);
        sb.append(" 开盘价topen:"+topen);
        sb.append(" 前日收盘价lclose:"+lclose);
        sb.append(" 涨跌额chg:"+chg);
        sb.append(" 涨跌幅pchg:"+pchg);
        sb.append(" 换手率turnover:"+turnover);
        sb.append(" 成交量voturnover:"+voturnover);
        sb.append(" 成交金额vaturnover:"+vaturnover);
        sb.append(" 总市值tcap:"+tcap);
        sb.append(" 流通市值mcap:"+mcap);
        
        return sb.toString();//"code:"+code+" name:"+name+" date:"+day+" tclose:"+tclose;
    }
    
    public long getId() {
        return id;
    }
    public void setId(long id) {
        this.id = id;
    }
    public String getDay() {
        return day;
    }
    public void setDay(String day) {
        this.day = day;
    }
    public String getCode() {
        return code;
    }
    public void setCode(String code) {
        this.code = code;
    }
    public String getName() {
        return name;
    }
    public void setName(String name) {
        this.name = name;
    }
    public double getTclose() {
        return tclose;
    }
    public void setTclose(double tclose) {
        this.tclose = tclose;
    }
    public double getHigh() {
        return high;
    }
    public void setHigh(double high) {
        this.high = high;
    }
    public double getLow() {
        return low;
    }
    public void setLow(double low) {
        this.low = low;
    }
    public double getTopen() {
        return topen;
    }
    public void setTopen(double topen) {
        this.topen = topen;
    }
    public double getLclose() {
        return lclose;
    }
    public void setLclose(double lclose) {
        this.lclose = lclose;
    }
    public double getChg() {
        return chg;
    }
    public void setChg(double chg) {
        this.chg = chg;
    }
    public double getPchg() {
        return pchg;
    }
    public void setPchg(double pchg) {
        this.pchg = pchg;
    }
    public double getTurnover() {
        return turnover;
    }
    public void setTurnover(double turnover) {
        this.turnover = turnover;
    }
    public long getVoturnover() {
        return voturnover;
    }
    public void setVoturnover(long voturnover) {
        this.voturnover = voturnover;
    }
    public double getVaturnover() {
        return vaturnover;
    }
    public void setVaturnover(double vaturnover) {
        this.vaturnover = vaturnover;
    }
    public double getTcap() {
        return tcap;
    }
    public void setTcap(double tcap) {
        this.tcap = tcap;
    }
    public double getMcap() {
        return mcap;
    }
    public void setMcap(double mcap) {
        this.mcap = mcap;
    }
}

执行情况:

id:0 日期day:2020-04-10 代号code:002101 名称name:广东鸿图 收盘价tclose:7.95 最高价high:8.23 最低价low:7.81 开盘价topen:8.11 前日收盘价lclose:0.0 涨跌额chg:-0.15 涨跌幅pchg:-1.85 换手率turnover:2.2 成交量voturnover:93679 成交金额vaturnover:7500.99 总市值tcap:0.0 流通市值mcap:0.0
id:0 日期day:2020-04-09 代号code:002101 名称name:广东鸿图 收盘价tclose:8.1 最高价high:8.18 最低价low:7.94 开盘价topen:8.13 前日收盘价lclose:0.0 涨跌额chg:0.0 涨跌幅pchg:0.0 换手率turnover:2.75 成交量voturnover:116902 成交金额vaturnover:9441.65 总市值tcap:0.0 流通市值mcap:0.0
id:0 日期day:2020-04-08 代号code:002101 名称name:广东鸿图 收盘价tclose:8.1 最高价high:8.27 最低价low:8.03 开盘价topen:8.06 前日收盘价lclose:0.0 涨跌额chg:-0.13 涨跌幅pchg:-1.58 换手率turnover:2.75 成交量voturnover:116971 成交金额vaturnover:9499.67 总市值tcap:0.0 流通市值mcap:0.0
id:0 日期day:2020-04-07 代号code:002101 名称name:广东鸿图 收盘价tclose:8.23 最高价high:8.28 最低价low:7.9 开盘价topen:8.04 前日收盘价lclose:0.0 涨跌额chg:0.33 涨跌幅pchg:4.18 换手率turnover:3.76 成交量voturnover:159804 成交金额vaturnover:12937.74 总市值tcap:0.0 流通市值mcap:0.0
id:0 日期day:2020-04-03 代号code:002101 名称name:广东鸿图 收盘价tclose:7.9 最高价high:8.11 最低价low:7.82 开盘价topen:8.11 前日收盘价lclose:0.0 涨跌额chg:-0.3 涨跌幅pchg:-3.66 换手率turnover:3.24 成交量voturnover:138091 成交金额vaturnover:10978.95 总市值tcap:0.0 流通市值mcap:0.0
id:0 日期day:2020-04-02 代号code:002101 名称name:广东鸿图 收盘价tclose:8.2 最高价high:8.2 最低价low:7.58 开盘价topen:7.7 前日收盘价lclose:0.0 涨跌额chg:0.45 涨跌幅pchg:5.81 换手率turnover:4.54 成交量voturnover:193364 成交金额vaturnover:15326.84 总市值tcap:0.0 流通市值mcap:0.0
id:0 日期day:2020-04-01 代号code:002101 名称name:广东鸿图 收盘价tclose:7.75 最高价high:8.07 最低价low:7.62 开盘价topen:7.62 前日收盘价lclose:0.0 涨跌额chg:0.31 涨跌幅pchg:4.17 换手率turnover:3.7 成交量voturnover:157608 成交金额vaturnover:12279.22 总市值tcap:0.0 流通市值mcap:0.0

希望此程序对你也有用.

--2020年5月7日--

 

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!