Google apps script Gmail get message without previous conversation - google-apps-script

I am trying to use a google apps script to dump emails that I have under a specific label to a Google Docs Spreadsheet. I want to list each email message body in a thread as a separate row, such that if a thread has a chain of 9 messages, each one is listed separately (without the chain) in a row.
I have managed get it to where each message body + its entire previous thread is stored, in one cell and I can get the entire thread in a cell. But this is not what I want.
This code will put the entire body of the thread in a row.
function getEmails() {
clearCanvas();
var label = GmailApp.getUserLabelByName(LabelWithEmails);
var threads = label.getThreads();
// var threads = GmailApp.getInboxThreads(0, 50);
var row = getFirstRow() + 1;
var firstmessageId = getfirstmsgid();
UserProperties.setProperty("firstmsgid", firstmessageId);
spreadsheet.toast("Loading emails..Please wait. It could take few seconds", "Status", -1);
var messages = GmailApp.getMessagesForThreads(threads); //gets messages in 2D array
for (i = 0; i < 5; ++i)
{
try {
j = messages[i].length; //to process most recent conversation in thread (contains messages from previous conversations as well, reduces redundancy
messageBody = messages[i][j-1].getBody(); //gets body of message in HTML
messageSubject = messages[i][j-1].getSubject();
messageDate = messages[i][j-1].getDate();
messageFrom = messages[i][j-1].getFrom();
Logger.log("Message Subject:" + messageSubject);
Logger.log("Message Date:" + messageDate);
Logger.log("Message From:" + messageFrom);
sheet.getRange(row, 1).setValue(messageFrom);
sheet.getRange(row, 2).setValue(messageSubject);
sheet.getRange(row, 3).setValue(messageDate);
sheet.getRange(row, 4).setValue(getTextFromHtml(messageBody));
row++;
} catch (error) {
spreadsheet.toast("Error Occured. Report it # http://techawakening.org/", "Status", -1);
}
if (i == threads.length - 1) {
spreadsheet.toast("Successfully loaded emails.", "Status", -1);
spreadsheet.toast("Now mark emails to be forwarded by changing the background color of the cells to green. Then select Forward->Forward selected emails", "Status", -1);
}
}
}
This will put each message body including it's previous thread/message chain in a row.
function getEmails() {
clearCanvas();
var label = GmailApp.getUserLabelByName(LabelWithEmails);
var threads = label.getThreads();
// var threads = GmailApp.getInboxThreads(0, 50);
var row = getFirstRow() + 1;
var firstmessageId = getfirstmsgid();
UserProperties.setProperty("firstmsgid", firstmessageId);
spreadsheet.toast("Loading emails..Please wait. It could take few seconds", "Status", -1);
var messages = GmailApp.getMessagesForThreads(threads); //gets messages in 2D array
// messages.length
// jknipp - working except it keeps the thread chain
for (var i = 0; i < threads.length; i++) {
try {
var messages = threads[i].getMessages();
for (var m = 0; m < messages.length; m++) {
sheet.getRange(row, 1).setValue(messages[m].getFrom());
sheet.getRange(row, 2).setValue(messages[m].getSubject());
sheet.getRange(row, 3).setValue(messages[m].getDate());
sheet.getRange(row, 4).setValue(getTextFromHtml(messages[m].getBody()));
row++;
}
} catch (error) {
spreadsheet.toast("Error Occured. Report it # http://techawakening.org/", "Status", -1);
}
if (i == threads.length - 1) {
spreadsheet.toast("Successfully loaded emails.", "Status", -1);
spreadsheet.toast("Now mark emails to be forwarded by changing the background color of the cells to green. Then select Forward->Forward selected emails", "Status", -1);
}
}
}
References
https://stackoverflow.com/a/11034461/39803

I was able to pull out only the body of the emails by identifying where the 'previous conversation' started.
var sheet = SpreadsheetApp.getActiveSheet();
var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var LabelWithEmails = sheet.getRange(3, 2).getValue();
function getEmails() {
clearCanvas();
var label = GmailApp.getUserLabelByName(LabelWithEmails);
var threads = label.getThreads();
var row = getFirstRow() + 1;
var firstmessageId = getfirstmsgid();
UserProperties.setProperty("firstmsgid", firstmessageId);
spreadsheet.toast("Loading emails..Please wait. It could take few seconds", "Status", -1);
var messages = GmailApp.getMessagesForThreads(threads); //gets messages in 2D array
for (var i = 0; i < 2;/*threads.length;*/ i++) {
try {
var messages = threads[i].getMessages();
for (var m = 0; m < messages.length; m++) {
var msg = messages[m];
var isForward = msg.getBody().search(/---------- Forwarded message/i) != -1;
if(!isValidMessage) continue;
sheet.getRange(row, 1).setValue(msg.getFrom());
sheet.getRange(row, 2).setValue(msg.getTo() + ";" + msg.getCc() + ";" + msg.getBcc());
sheet.getRange(row, 3).setValue(msg.getSubject());
sheet.getRange(row, 4).setValue(msg.getDate());
if(!isForward) {
// Get only this messages body, ignore the previous chain
var body = msg.getBody();
var firstIndexOfThread = body.search(/gmail_quote/i);
body = (firstIndexOfThread == -1) ? body : body.substring(0, firstIndexOfThread);
sheet.getRange(row, 5).setValue(getTextFromHtml(body));
} else {
// Use the whole body if its a forward
sheet.getRange(row, 5).setValue(getTextFromHtml(msg.getBody()));
sheet.getRange(row, 6).setValue("***");
}
row++;
}
} catch (error) {
Logger.log(error);
spreadsheet.toast("Error Occured - please see the logs.", "Status", -1);
}
if (i == threads.length - 1) {
spreadsheet.toast("Successfully loaded emails.", "Status", -1);
}
}
}

This is a case of "garbage-in, garbage-out". When you're using the gmail app in thread view, Google's servers are parsing the body of emails and cleverly hiding the bodies of old messages. This makes it appear that the latest message in a thread consists of only the new lines of that message, and that you have a "chain" of smaller messages.
This is an illusion. The last message in a thread typically contains the new content first, followed by the content of all previous message bodies, as a single message body. Different email services and clients use different patterns for this.
You would need to be able to identify most or all of the ways that the content from previous messages in a thread are represented in the current message body, and use that to extract only the new content.

Related

Exceeded maximum execution time google script while extract email addresses from my Gmail

I've a google sheet script that can extract emails from gmail labels it work very well on small number of emails but if the emails to in large number it gives me time out error "Exceeded maximum execution time" Is there anyone that can help me out from this problem? following i'm attaching the code that is woking fine with small number of emails but not on large number of emails.
I copy this script form here.
function GetAddresses ()
{
// Get the active spreadsheet
var ss = SpreadsheetApp.getActiveSpreadsheet();
// Label to search
var userInputSheet = ss.getSheets()[0];
var labelName = userInputSheet.getRange("B2").getValue();
// Create / empty the target sheet
var sheetName = "Label: " + labelName;
var sheet = ss.getSheetByName (sheetName) || ss.insertSheet (sheetName, ss.getSheets().length);
sheet.clear();
// Get all messages in a nested array (threads -> messages)
var addressesOnly = [];
var messageData = [];
var startIndex = 0;
var pageSize = 100;
while (1)
{
// Search in pages of 100
var threads = GmailApp.search ("label:" + labelName, startIndex, pageSize);
if (threads.length == 0)
break;
else
startIndex += pageSize;
// Get all messages for the current batch of threads
var messages = GmailApp.getMessagesForThreads (threads);
// Loop over all messages
for (var i = 0; i < messages.length ; i++)
{
// Loop over all messages in this thread
for (var j = 0; j < messages[i].length; j++)
{
var mailFrom = messages[i][j].getFrom ();
var mailDate = messages[i][j].getDate ();
// mailFrom format may be either one of these:
// name#domain.com
// any text <name#domain.com>
// "any text" <name#domain.com>
var name = "";
var email = "";
var matches = mailFrom.match (/\s*"?([^"]*)"?\s+<(.+)>/);
if (matches)
{
name = matches[1];
email = matches[2];
}
else
{
email = mailFrom;
}
// Check if (and where) we have this already
var index = addressesOnly.indexOf (mailFrom);
if (index > -1)
{
// We already have this address -> remove it (so that the result is ordered by data from new to old)
addressesOnly.splice(index, 1);
messageData.splice(index, 1);
}
// Add the data
addressesOnly.push (mailFrom);
messageData.push ([name, email, mailDate]);
}
}
}
// Add data to corresponding sheet
sheet.getRange (1, 1, messageData.length, 3).setValues (messageData);
}
//
// Adds a menu to easily call the script
//
function onOpen ()
{
var sheet = SpreadsheetApp.getActiveSpreadsheet ();
var menu = [
{name: "Extract email addresses",functionName: "GetAddresses"}
];
sheet.addMenu ("Start Extracting", menu);
}
In this case you are receiving this error message as your code is exceeding the maximum execution time for Apps Script. Currently the limit is of 6 minutes. You can verify this information in the Current limitations.
You will notice that this is the information displayed for the runtime quota:

Permanently delete gmail message after some days

I'm using this script for deleting old messages from gmail every X days.
It functions correctly, however messages are sent to trash.
I want to delete the messages permanently without sending them to trash.
Someone can modify this script?
// The name of the Gmail Label that is to be autopurged?
var GMAIL_LABEL = "mylabel";
// Purge messages automatically after how many days?
var PURGE_AFTER = "21";
function purgeGmail() {
var age = new Date();
age.setDate(age.getDate() - PURGE_AFTER);
var purge = Utilities.formatDate(age, Session.getTimeZone(), "yyyy-MM-dd");
var search = "label:" + GMAIL_LABEL + " before:" + purge;
// This will create a simple Gmail search
// query like label:Newsletters before:10/12/2012
try {
// We are processing 100 messages in a batch to prevent script errors.
// Else it may throw Exceed Maximum Execution Time exception in Apps Script
var threads = GmailApp.search(search, 0, 100);
// For large batches, create another time-based trigger that will
// activate the auto-purge process after 'n' minutes.
// if (threads.length == 100) {
// ScriptApp.newTrigger("purgeGmail")
// .timeBased()
// .at(new Date((new Date()).getTime() + 1000*60*10))
// .create();
// }
// An email thread may have multiple messages and the timestamp of
// individual messages can be different.
for (var i=0; i<threads.length; i++) {
var messages = GmailApp.getMessagesForThread(threads[i]);
for (var j=0; j<messages.length; j++) {
var email = messages[j];
if (email.getDate() < age) {
email.moveToTrash();
}
}
}
// If the script fails for some reason or catches an exception,
// it will simply defer auto-purge until the next day.
} catch (e) {}
}
thanks
It's pretty simple all you have to do is get all of your message id's in an array and then use the following two lines.
var request={"ids":messageIdArray};
Gmail.Users.Messages.batchDelete(request, "me");
You will have to enable the Advanced Gmail API
batchDelete
So if the rest of your code actually works then this should do it:
var GMAIL_LABEL = "mylabel";
var PURGE_AFTER = "21";
function purgeGmail() {
var age = new Date();
age.setDate(age.getDate() - PURGE_AFTER);
var purge = Utilities.formatDate(age, Session.getTimeZone(), "yyyy-MM-dd");
var search = "label:" + GMAIL_LABEL + " before:" + purge;
try {
var msgA=[];
for (var i=0; i<threads.length; i++) {
var messages = GmailApp.getMessagesForThread(threads[i]);
for (var j=0; j<messages.length; j++) {
var email = messages[j];
if (email.getDate() < age) {
msgA.push(email.getId());
}
}
}
} catch (e) {}
}

Count all eMails from a Label

i have create this script:
function myTest() {
var ss = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
var label = GmailApp.getUserLabelByName("Label1");
var threads = label.getThreads();
var countMail = 0;
//get first message in first thread
for (var i = 0; i < threads.length; i++) {
var messages=threads[i].getMessages();
countMail = countMail + messages.length;
}
ss.getRange(1,1).setValue(countMail);
}
it runs nearly perfect. Here I get all eMails back which are connected with this treads (marked with Label1 or not).
Does anyone can share a simply script how I can count all eMails which are "realy" marked with the Label.
Thanks
Try this:
function labelMsgs(){
var labels = Gmail.Users.Labels.list("me").labels;
var lblId;
for (var i = 0; i < labels.length; i ++){
if (labels[i].name == "Label1"){
lblId = labels[i].id;
break;
}
}
var optionalArgs = {
"labelIds" : lblId,
"maxResults" : 200
}
var messages = Gmail.Users.Messages.list("me", optionalArgs).messages;
}
This method uses the Gmail API directly, for this you'll need to enable Google Advanced Services, it will return a list of all messages tagged with the "Label1" label. You can play around with maxResults and with the pageToken to see more results, but this is the general approach.

Need to split the row based on Error and Warning and remove particular content

While run the provided script its working perfect, but need to modify. this is the screenshot tells what i need, i am trying to change script , please help me if you can
function autocopy() {
var label = GmailApp.getUserLabelByName("Sanmina EDI Failed Concurrent Jobs Alert");
var threads = label.getThreads();
var read = threads.getMessages();
var uread = threads.isUnread();
for(var i = 0; i <= uread.length; i++) {
var message=uread(i);
}
var message1 = new message.Date();
var day = message1.getUTCDate();
var bodycontent = message.getbody();
var action = bodyContents.search("Invoice")
var action1 = bodyContents.search("Error")
var action2 = bodyContents.search("Terminated")
if (action > 0) {
var out ="Need to create SR"
} else if (action1 > 0 || action2 > 2) {
var out ="Need to create SR"
} else {
var out ="Printing output file"
}
var activeSheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
activeSheet.appendRow([day, bodycontent, out]);
}
I want to extract the data from email to spreadsheet,by reading unread thread id and using for looping the reach from read thread id to unread thread id and print the mail body content and date from the unread email.
I reworked your code a bit. Put some comments in it so that you can see clearly what is happening in each line. Your issue was that you were trying to use methods on the wrong objects (for example, an array of threads does not have a getMessages method, so you have to loop through each thread and get the messages for each specific thread).
function autocopy() {
var label = GmailApp.getUserLabelByName("Sanmina EDI Failed Concurrent Jobs Alert");
// Get all threads belonging to this label
var threads = label.getThreads();
// Loop through each thread
for (var i = 0; i < threads.length; i++) {
// Check whether thread is unread
if (threads[i].isUnread()) {
// Get all messages for each unread thread
var messages = threads[i].getMessages();
// Loop through all messages for each unread thread
for (var j = 0; j < messages.length; j++) {
// Check whether message is unread
// (delete this condition if you want all messages in an unread
// thread to be printed in your spreadsheet, read or unread)
if (messages[j].isUnread()) {
var day = messages[j].getDate();
var bodyContent = messages[j].getBody(); // Use getPlainBody() instead if you don't want html tags;
var action = bodyContent.search("Invoice");
var action1 = bodyContent.search("Error");
var action2 = bodyContent.search("Terminated");
if (action > 0) {
var out = "Need to create SR"
} else if (action1 > 0 || action2 > 2) {
var out = "Need to create SR"
} else {
var out = "Printing output file"
}
var activeSheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
activeSheet.appendRow([day, bodyContent, out])
}
}
}
}
}
I hope this works for you!

Google Apps script stops randomly or refuses to launch

I have developed a Google Apps script in a Google Drive spreadsheet that processes e-mails with a certain label (download notifications) and adds these to the spreadsheet. I'm running this through the script editor of the spreadsheet.
My initial solution was quite inefficient - the whole analysis was repeated each time on all e-mails, which caused the runtime to increase for each day. A few days ago I got an error message "runtime exceeded", which is not strange.
My problem is that when trying to update the script to be more efficient, I get some weird problems. The script either stops randomly after processing a few e-mails, or simply refuses to start. Especially the script debugger, it begins to load but never starts.
I have tried several times over the last few days, and even created a new spreadsheet with the same code (in the same account), but still having these problems.
Sometimes when the script manages to run for a while, I have noted that the script output log does not match recent changes in the logging. I have of course saved the script before running it. It feels like there is some lock preventing my script from running/updating/refreshing?
Is there anyone here that recognize these problems?
The code is attached below.
The two relevant entry points are:
processInbox: Updates the spreadsheet based on new (starred) e-mails with a specific label. The label and star is set by an e-mail filter on reception. The star (indicating "new") is removed from each message after processing.
resetAllMsgs: Clears the spreadsheet and stars all relevant messages, causing processInbox to process all messages received with the relevant label.
function resetAllMsgs() {
Logger.log("Starting ResetAll");
var d = SpreadsheetApp.getActive();
var dl_sheet = d.getSheetByName("Download List");
var dlperday_sheet = d.getSheetByName("DownloadsPerDay");
dl_sheet.clear();
dlperday_sheet.clear();
Logger.log("Clearing spreadsheet");
dl_sheet.appendRow(["", "", "Downloads", ""]);
dl_sheet.appendRow(["", "", "Downloaders", ""]);
dl_sheet.appendRow(["Last Download Date", "First Download Date", "Email" , "Product", "Download Count"]);
dlperday_sheet.appendRow(["Date", "Download Count"]);
var label = GmailApp.getUserLabelByName("Download Notification");
// get all threads of the label
var threads = label.getThreads();
for (var i = 0; i < threads.length; i++) {
// get all messages in a given thread
var messages = threads[i].getMessages();
Logger.log("Starring thread " + i);
for (var j = 0; j < messages.length; j++) {
Logger.log(" Starring message " + j);
// Only starred messages are processed by processInbox
messages[j].star();
Utilities.sleep(100);
}
}
};
function processInbox() {
var d = SpreadsheetApp.getActive();
var dl_sheet = d.getSheetByName("Download List");
var dlperday_sheet = d.getSheetByName("DownloadsPerDay");
// If empty spreadsheet, reset the status of all relevant e-mails and add the spreadsheet headers
if (dl_sheet.getLastRow() <= 1) {
resetAll();
}
var label = GmailApp.getUserLabelByName("Download Notification");
var k = 0;
// get all threads of the label
var threads = label.getThreads();
for (var i = 0; i < threads.length; i++) {
if (threads[i].hasStarredMessages()) {
// get all messages in a given thread
var messages = threads[i].getMessages();
// iterate over each message
for (var j = 0; j < messages.length; j++) {
// Unread messages are not previously processed...
if (messages[j].isStarred()) {
var msg = messages[j].getBody();
msg = msg.replace(/\r?\n/g, "");
var email = getDownloader(msg);
if (email == "") {
Logger.log("Found no downloader info: " + messages[j].getSubject() + " " + messages[j].getDate());
}
var date = formatDate(getDownloadDate(msg));
// Check if a new date
var dateCell = find(date, dlperday_sheet.getDataRange(), 0);
if (dateCell == null) {
// If new date, append row in "downloads per day"
dlperday_sheet.appendRow([date, 1]);
dlperday_sheet.getRange(2, 1, dl_sheet.getLastRow()-1, 2).sort([1]);
}
else
{
// If existing date, update row in "downloads per day"
var dlcount = dlperday_sheet.getRange(dateCell.getRow(), dateCell.getColumn()+1).getValue();
}
var productname = getProductName(msg);
// Check if e-mail (user) already exists in the downloads list
var matchingCell = find(email, dl_sheet.getDataRange(), 0);
if ( matchingCell != null ) {
// If user e-mail exists, update this row
var lastDownloadDate = dl_sheet.getRange(matchingCell.getRow(), matchingCell.getColumn()-1).getValue();
var lastDownloadCount = dl_sheet.getRange(matchingCell.getRow(), matchingCell.getColumn()+2).getValue();
if (lastDownloadDate != date) {
dl_sheet.getRange(matchingCell.getRow(), matchingCell.getColumn()-1).setValue(date);
}
dl_sheet.getRange(matchingCell.getRow(), matchingCell.getColumn()+2).setValue(lastDownloadCount+1);
}
else // If new user e-mail, add new download row
{
dl_sheet.appendRow([date, date, email, productname, 1]);
dl_sheet.getRange(2, 4).setValue(dl_sheet.getRange(2, 4).getValue() + 1);
dl_sheet.getRange(4, 1, dl_sheet.getLastRow()-3, 5).sort([1]);
}
// Mark message as processed, to avoid processing it on the next run
messages[j].unstar();
}
}
}
}
};
/**
* Finds a value within a given range.
* #param value The value to find.
* #param range The range to search in.
* #return A range pointing to the first cell containing the value,
* or null if not found.
*/
function find(value, range, log) {
var data = range.getValues();
for (var i = 0; i < data.length; i++) {
for (var j = 0; j < data[i].length; j++) {
if (log == 1)
{
Logger.log("Comparing " + data[i][j] + " and " + value);
}
if (data[i][j] == value) {
return range.getCell(i + 1, j + 1);
}
}
}
return null;
};
function getDownloader(bodystring) {
var marker = "Buyer Info";
var marker_begin_index = bodystring.indexOf(marker, 1000);
var email_begin_index = bodystring.indexOf("mailto:", marker_begin_index) + 7;
var email_end_index = bodystring.indexOf("\"", email_begin_index);
if (email_end_index < 1000)
{
return "";
}
var email = bodystring.substring(email_begin_index, email_end_index);
if (log == 1)
{
Logger.log("Found e-mail address: " + email + "");
Logger.log(" marker_begin_index: " + marker_begin_index);
Logger.log(" email_begin_index: " + email_begin_index);
Logger.log(" email_end_index: " + email_end_index);
}
latestIndex = email_end_index;
return email;
};
function formatDate(mydate)
{
var str = "" + mydate;
var dateParts = str.split("/");
var day = dateParts[1];
if (day.length == 1)
day = "0" + day;
var month = dateParts[0];
if (month.length == 1)
month = "0" + month;
return dateParts[2] + "-" + month + "-" + day;
};
function getDownloadDate(bodystring) {
var marker = "Download Date:</strong>";
var marker_begin_index = bodystring.indexOf(marker, latestIndex);
var date_begin_index = marker_begin_index + marker.length;
var date_end_index = bodystring.indexOf("<br>", date_begin_index);
latestIndex = date_end_index;
return bodystring.substring(date_begin_index, date_end_index).trim();
};
function getProductName(bodystring) {
var marker = "Item:</strong>";
var marker_begin_index = bodystring.indexOf(marker, latestIndex);
var pname_begin_index = marker_begin_index + marker.length;
var pname_end_index = bodystring.indexOf("</td>", pname_begin_index);
latestIndex = pname_end_index;
return bodystring.substring(pname_begin_index, pname_end_index).trim();
};
Update: Any script I run stops after about 5 seconds, even if it does not call any services.
I tried the following code:
function test() {
Logger.log("Test begins");
Utilities.sleep(5000);
Logger.log("Test ends");
}
The script terminates after about 5 sec, but the last line is not printed. If decreasing the delay to 3 seconds it behaves as expected.
Moreover, to make the script update properly after modifying it, I need to save it, start it, click cancel, and then start it again, otherwise it the log output seems to come from the old version. (I'm running this through the script editor.)
Also, the debugger refuses to start, even for the small script above. Seems to be some problem with my account (johan.kraft#percepio.se). Are there any google employee out there that can check this?