I am building a Google App Maker application that takes a user-uploaded Excel CSV spreadsheet file as input. I have thought of multiple, possible solutions to reading the data f
You want to convert the large CSV file to the split Spreadsheet. If my understanding is correct, how about this workaround?
When such large CSV file is converted to Spreadsheet, it cannot directly convert to Spreadsheet, because of both the total number of cells and the size of file. And also when the large file is tried to split, it cannot do it because the blob which can be used at GAS is less than 50 MB (52,428,800 bytes).
In my environment, when a CSV file with the size of 100 MB is used for this sample script, when the file is split by 10 MB, about 65 seconds was required to convert a chunk to Spreadsheet. In this case, when the CSV file is completely converted, it is considered that it will be over the limitation time (6 min) for executing GAS.
In order to use this sample script, please enable Drive API at Advanced Google Services and API console.
function createSplitSpreadsheet(obj) {
var accessToken = ScriptApp.getOAuthToken();
var baseUrl = "https://www.googleapis.com/drive/v3/files/";
// Retrieve file size.
var url1 = baseUrl + obj.fileId + "?fields=size";
var params1 = {
method: "get",
headers: {Authorization: "Bearer " + accessToken},
};
var fileSize = Number(JSON.parse(UrlFetchApp.fetch(url1, {headers: {Authorization: "Bearer " + accessToken}}).getContentText()).size);
// Calculate number of output files.
if (obj.files == null) {
obj.number = 1;
obj.start = 0;
}
var start = obj.start;
var end = start + obj.chunk;
var useFileSize = fileSize - start;
f = Math.floor(useFileSize / obj.chunk);
f = useFileSize % obj.chunk > 0 ? f + 1 : f;
if (f < obj.files || obj.files == null) {
obj.files = f;
}
// Split large file by chunk size (bytes).
var url2 = baseUrl + obj.fileId + "?alt=media";
var i;
for (i = 0; i < obj.files; i++) {
var params = {
method: "get",
headers: {
Authorization: "Bearer " + accessToken,
Range: "bytes=" + start + "-" + end,
},
};
var res = UrlFetchApp.fetch(url2, params).getContentText();
var e = res.lastIndexOf("\n");
start += e + 1;
end = start + obj.chunk;
Drive.Files.insert(
{mimeType: MimeType.GOOGLE_SHEETS, title: obj.fileName + (i + obj.number)},
Utilities.newBlob(res.substr(0, e), MimeType.CSV)
);
}
// Return next start value if there is a next chunk for the resume.
if (start < fileSize) {
return {nextStart: start, nextNumber: i + obj.number};
} else {
return null;
}
}
// Please run this function.
function main() {
var obj = {
fileId: "#####", // File ID of the large CSV file.
chunk: 10485760, // 10MB Please modify this for your situation.
files: 3, // Please input the number of files you want to convert.
start: 0,
fileName: "sample",
number: 1, // Counter of output files. Please input this as a next number.
};
var nextStart = createSplitSpreadsheet(obj);
Logger.log(nextStart);
}
When you use this, please modify obj
in main()
for your situation, and run main()
. A sample case is as follows.
It supposes as follows.
In this sample case, each obj
is as follows. Please input each obj
at each run.
var obj = {fileId: "#####", chunk: 10485760, files: 3, start: 0, fileName: "sample", number: 1}
{"nextStart": ### nextStart2 ###, "nextNumber": 4}
is returned from createSplitSpreadsheet()
.var obj = {fileId: "#####", chunk: 10485760, files: 3, start: ### nextStart2 ###, fileName: "sample", number: 4}
{"nextStart": ### nextStart3 ###, "nextNumber": 7}
is returned from createSplitSpreadsheet()
.var obj = {fileId: "#####", chunk: 10485760, files: 3, start: ### nextStart3 ###, fileName: "sample", number: 7}
{"nextStart": ### nextStart4 ###, "nextNumber": 10}
is returned from createSplitSpreadsheet()
.var obj = {fileId: "#####", chunk: 10485760, files: 3, start: ### nextStart4 ###, fileName: "sample", number: 10}
null
is returned from createSplitSpreadsheet()
.By this flow, 10 spreadsheets are created from the CSV file with the size of 100 MB.
If null
is used for files
in obj
, files
is automatically calculated. But in this case, the limitation time for executing GAS may be over. Please be careful this.
If this was not what you want, I'm sorry.