I am trying to use a google apps script to dump emails that I have under a specific label to a Google Docs Spreadsheet. I want to list each email message body in a thread a
This is a case of "garbage-in, garbage-out". When you're using the gmail app in thread view, Google's servers are parsing the body of emails and cleverly hiding the bodies of old messages. This makes it appear that the latest message in a thread consists of only the new lines of that message, and that you have a "chain" of smaller messages.
This is an illusion. The last message in a thread typically contains the new content first, followed by the content of all previous message bodies, as a single message body. Different email services and clients use different patterns for this.
You would need to be able to identify most or all of the ways that the content from previous messages in a thread are represented in the current message body, and use that to extract only the new content.
I was able to pull out only the body of the emails by identifying where the 'previous conversation' started.
var sheet = SpreadsheetApp.getActiveSheet();
var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var LabelWithEmails = sheet.getRange(3, 2).getValue();
function getEmails() {
clearCanvas();
var label = GmailApp.getUserLabelByName(LabelWithEmails);
var threads = label.getThreads();
var row = getFirstRow() + 1;
var firstmessageId = getfirstmsgid();
UserProperties.setProperty("firstmsgid", firstmessageId);
spreadsheet.toast("Loading emails..Please wait. It could take few seconds", "Status", -1);
var messages = GmailApp.getMessagesForThreads(threads); //gets messages in 2D array
for (var i = 0; i < 2;/*threads.length;*/ i++) {
try {
var messages = threads[i].getMessages();
for (var m = 0; m < messages.length; m++) {
var msg = messages[m];
var isForward = msg.getBody().search(/---------- Forwarded message/i) != -1;
if(!isValidMessage) continue;
sheet.getRange(row, 1).setValue(msg.getFrom());
sheet.getRange(row, 2).setValue(msg.getTo() + ";" + msg.getCc() + ";" + msg.getBcc());
sheet.getRange(row, 3).setValue(msg.getSubject());
sheet.getRange(row, 4).setValue(msg.getDate());
if(!isForward) {
// Get only this messages body, ignore the previous chain
var body = msg.getBody();
var firstIndexOfThread = body.search(/gmail_quote/i);
body = (firstIndexOfThread == -1) ? body : body.substring(0, firstIndexOfThread);
sheet.getRange(row, 5).setValue(getTextFromHtml(body));
} else {
// Use the whole body if its a forward
sheet.getRange(row, 5).setValue(getTextFromHtml(msg.getBody()));
sheet.getRange(row, 6).setValue("***");
}
row++;
}
} catch (error) {
Logger.log(error);
spreadsheet.toast("Error Occured - please see the logs.", "Status", -1);
}
if (i == threads.length - 1) {
spreadsheet.toast("Successfully loaded emails.", "Status", -1);
}
}
}