Date Format getting disturb when creating .CSV file in Java

偶尔善良 提交于 2020-01-16 01:10:30

问题


I am creating a web scraper and then store the data in the .CSV file. My program is running fine but, there is a problem that the website from where I am retrieving data have a date which is in (Month Day, Year) format. So when I save the data in .CSV file it will consider the Year as another column due to which all the data gets manipulated. I actually want to store that data into (MM-MON-YYYY) and store Validity date in one column. I am posting my code below. Kindly, help me out. Thanks!

P.S: I am sorry for not writing the format I want in the original post.

package com.mufapscraping;

//import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
//import java.util.Collections;
import java.util.Iterator;
//import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ComMufapScraping {

    boolean writeCSVToConsole = true;
    boolean writeCSVToFile = true;
    //String destinationCSVFile = "C:\\convertedCSV.csv";
    boolean sortTheList = true;
    boolean writeToConsole;
    boolean writeToFile;
    public static Document doc = null;
    public static Elements tbodyElements = null;
    public static Elements elements = null;
    public static Elements tdElements = null;
    public static Elements trElement2 = null;
    public static String Dcomma = ", 2";
    public static ArrayList<Elements> sampleList = new ArrayList<Elements>();

    public static void createConnection() throws IOException {
        System.setProperty("http.proxyHost", "191.1.1.123");
        System.setProperty("http.proxyPort", "8080");
        String tempUrl = "http://www.mufap.com.pk/nav_returns_performance.php?tab=01";
        doc = Jsoup.connect(tempUrl).get();
    }

    public static void parsingHTML() throws Exception {
        for (int i = 1; i <= 1; i++) {

            tbodyElements = doc.getElementsByTag("tbody");
            //Element table = doc.getElementById("dataTable");

            if (tbodyElements.isEmpty()) {
                throw new Exception("Table is not found");
            }
            elements = tbodyElements.get(0).getElementsByTag("tr");

            for (Element trElement : elements) {
                trElement2 = trElement.getElementsByTag("tr");
                tdElements = trElement.getElementsByTag("td");
                FileWriter sb = new FileWriter("C:\\convertedCSV2.csv", true);
                for (Iterator<Element> it = tdElements.iterator(); it.hasNext();) {
                    if (it.hasNext()) {
                        sb.append("  \n  ");
                    }
                    for (Iterator<Element> it2 = trElement2.iterator(); it.hasNext();) {
                        Element tdElement = it.next();
                        sb.append(tdElement.text());
                        if (it2.hasNext()) {
                            sb.append("   ,   ");
                        }

                    }

                    System.out.println(sb.toString());
                    sb.flush();
                    sb.close();
                }

                System.out.println(sampleList.add(tdElements));
                /* for (Elements elements2 : zakazky) {
                System.out.println(elements2);
            }*/

            }
        }
    }

    public static void main(String[] args) throws IOException, Exception {
        createConnection();
        parsingHTML();

    }

}


回答1:


Instead of appeding directly the element text in the FileWriter, format it first then append it.

So, replace the following line:

sb.append(tdElement.text());

into

sb.append(formatData(tdElement.text()));

private static final SimpleDateFormat FORMATTER_MMM_d_yyyy = new SimpleDateFormat("MMM d, yyyy", Locale.US);
private static final SimpleDateFormat FORMATTER_dd_MMM_yyyy = new SimpleDateFormat("dd-MMM-YYYY", Locale.US);

public static String formatData(String text) {
    String tmp = null;

    try {
        Date d = FORMATTER_MMM_d_yyyy.parse(text);
        tmp = FORMATTER_dd_MMM_yyyy.format(d);
    } catch (ParseException pe) {
        tmp = text;
    }

    return tmp;
}

SAMPLE

public static void main(String[] args) {
    String[] fields = new String[] { //
            "ABL Cash Fund", //
            "AA(f)", //
            "Apr 18, 2016", //
            "10.4729" //
    };

    for (String field : fields) {
        System.out.format("%s\n%s\n\n", field, formatData(field));
    }
}

OUTPUT

ABL Cash Fund
ABL Cash Fund

AA(f)
AA(f)

Apr 18, 2016
18-Apr-2016

10.4729
10.4729



回答2:


Instead of using the method getElementsByTag many times you can use cssSelector which can be much easier and enables you to get the same output in few lines of code

public static void main (String []args) throws IOException{
    String tempUrl = "http://www.mufap.com.pk/nav_returns_performance.php?tab=01";
    Document doc = Jsoup.connect(tempUrl).get();

    Elements trElements = doc.select("#dataTable tbody tr");
    FileWriter sb = new FileWriter("C:\\convertedCSV2.csv", true);
    for(Element tr : trElements){
        Elements tdElements = tr.select("td");
        for (Element td : tdElements){
        sb.append(td.text());
        sb.append(";");
        }
        sb.append("\n");
    }
}



回答3:


This could be achieved by simply surrounding your data with double quotes, so month day, year would become "month day, year". Here's modified code that does the job for you:

package com.mufapscraping;

//import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
//import java.util.Collections;
import java.util.Iterator;
//import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ComMufapScraping {

    boolean writeCSVToConsole = true;
    boolean writeCSVToFile = true;
    //String destinationCSVFile = "C:\\convertedCSV.csv";
    boolean sortTheList = true;
    boolean writeToConsole;
    boolean writeToFile;
    public static Document doc = null;
    public static Elements tbodyElements = null;
    public static Elements elements = null;
    public static Elements tdElements = null;
    public static Elements trElement2 = null;
    public static String Dcomma = ", 2";
    public static ArrayList<Elements> sampleList = new ArrayList<Elements>();

    public static void createConnection() throws IOException {
        System.setProperty("http.proxyHost", "191.1.1.123");
        System.setProperty("http.proxyPort", "8080");
        String tempUrl = "http://www.mufap.com.pk/nav_returns_performance.php?tab=01";
        doc = Jsoup.connect(tempUrl).get();
    }

    public static void parsingHTML() throws Exception {
        for (int i = 1; i <= 1; i++) {

            tbodyElements = doc.getElementsByTag("tbody");
            //Element table = doc.getElementById("dataTable");

            if (tbodyElements.isEmpty()) {
                throw new Exception("Table is not found");
            }
            elements = tbodyElements.get(0).getElementsByTag("tr");

            for (Element trElement : elements) {
                trElement2 = trElement.getElementsByTag("tr");
                tdElements = trElement.getElementsByTag("td");
                FileWriter sb = new FileWriter("C:\\convertedCSV2.csv", true);
                for (Iterator<Element> it = tdElements.iterator(); it.hasNext();) {
                    if (it.hasNext()) {
                        sb.append("  \n  ");
                    }
                    for (Iterator<Element> it2 = trElement2.iterator(); it.hasNext();) {
                        Element tdElement = it.next();
                        sb.append('\"'); // surround your data
                        sb.append(tdElement.text());
                        sb.append('\"'); // with double quotes
                        if (it2.hasNext()) {

                            sb.append("   ,   ");
                        }

                    }

                    System.out.println(sb.toString());
                    sb.flush();
                    sb.close();
                }

                System.out.println(sampleList.add(tdElements));
                /* for (Elements elements2 : zakazky) {
                System.out.println(elements2);
            }*/

            }
        }
    }

    public static void main(String[] args) throws IOException, Exception {
        createConnection();
        parsingHTML();

    }

}



回答4:


Then you do want to split it. ok, then modify the first line by adding "year," column:

Element tdElement = it.next();
final String content = tdElement.text()
sb.append(content);
if (it2.hasNext()) {
    sb.append("   ,   ");
if (content.equals("Validity Date"))
    sb.append("Validity Year,");

you probably want to break after the for? or you'll overwrite the file elements.size()-1 times...

FileWriter sb = new FileWriter("C:\\convertedCSV2.csv", true);
for (Iterator<Element> it = tdElements.iterator(); it.hasNext();) { ... }
break;


来源:https://stackoverflow.com/questions/36691942/date-format-getting-disturb-when-creating-csv-file-in-java

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!