Extract Inline images from Gmail Body - google-apps-script

I am trying to extract all the images store in Inline body of email, store it in drive folder.
I am tryin to use this code
function GETGMEmails(){
var label = GmailApp.getUserLabelByName ('WHOLESALE REP');
var threads = label.getThreads();
for(var i = threads.length - 1; i >= 0; i--){
var messages = threads[i].getMessages();
for (var j = 0; j < messages.length; j++){
var message = messages[j];
// extractDetails(message,folder)
fetchInlineImage(message)
}
}
}
function fetchInlineImage(message) {
var msg = message;
console.log(message)
var pattern = /<img.*src="([^"]*)"[^>]*>/;
var matches = pattern.exec(msg.getBody());
console.log(matches.length)
if(matches) {
var url = matches[1];
var urlPattern = /^https*\:\/\/.*$/;
// If this matches, that means this was copied and pasted from a browser and it's a
// standard URL that we can urlFetch
if(urlPattern.exec(url)) {
// NO OP!
} else {
// Else this means the user copied and pasted from an OS clipboard and the image
// exists on Google's servers. The image can only be retrieved when logged in. Fortunately,
// if we use URLFetchApp, this will act as a logged in user and be able to URLFetch the image.
// We'll need to prepend a Gmail URL (subject to change)
url = "https://mail.google.com/mail/u/0/" + url;
}
// TODO - there is one more case that we're not covering - embedded images that newsletters use
Logger.log("Fetching image from URL: " + url);
var response = UrlFetchApp.fetch(url);
Logger.log("Response status: " + Utilities.jsonStringify(response.getHeaders()));
var blob = response.getBlob();
Logger.log("Response blob: " + blob.getBytes().length);
Drivefolder.createFile(blob).setName('ss.jpeg')
}
};
Email look like this, with lot of images here and there, and I want to extract each one of them:-
enter image description here

You are trying to access inline images from a GMail message.
When GMail was first introduced there was no ability to access inline images.
In 2012 an Issue:Access to inline images was raised and the script shown in the question was proposed as a workaround. A question was also asked on StackOverflow Parsing inlineImages from Gmail raw content.
These (and some variants) worked for a few years until 2014 when a second Issue:GmailApp.getAttachments Issue was raised.
Workarounds were patchy until 2017 when it was announced that the Issue had been resolved.
In 2018, a new answer was added to the StackOverFlow question. and in 2022 a new StackOverflow question How can I extract inline images from a Gmail email? (all available workarounds do not work anymore) was asked and answered.
In short, the script that you are using is redundant. However, the process to access inline images is simple and straightforward. The key is to examine the body by Regex.
The following script provides a basis for you to identify and log inline images.
Put headers in row 1 of sheet="Images".
A1="ID", B1="Subject", C1="Image"
function so75327302() {
var label = GmailApp.getUserLabelByName ('WHOLESALE REP');
var threads = label.getThreads();
for (var i=0;i<threads.length;i++){
var messages = threads[i].getMessages()
for (m=0;m<messages.length;m++){
// Logger.log("DEBUG: i:"+i+",m:"+m+", subject:"+messages[m].getSubject()+", message ID:"+messages[m].getId())
var body = messages[m].getBody()
var regex1 = RegExp('<img src="([^"]*)"[^>]*>', 'g')
var array1 = new Array
var images = new Array
while ((array1 = regex1.exec(body)) !== null) {
Logger.log("message ID:"+messages[m].getId()+", Subject: "+messages[m].getSubject()+" contains inline images."+ `Found ${array1[0]}`);
images.push([array1[0]])
}
if (images.length > 0){
var ss = SpreadsheetApp.getActiveSpreadsheet()
var sheetName = "Images"
var sheet = ss.getSheetByName(sheetName)
var imageLR = sheet.getLastRow()
sheet.getRange(imageLR+1,1).setValue(messages[m].getId())
sheet.getRange(imageLR+1,2).setValue(messages[m].getSubject())
sheet.getRange(imageLR+1,3,images.length).setValues(images)
}
}
}
}

Related

Sheets to Docs to Email

I have created a spreadsheet that contains quite a large amount of data.
The plan is to consolidate this data into a readable email to be sent out weekly, each specific row of data is its own email.
I tried going directly from sheets to email, but frankly it never quite looked right, plus the idea was to have a document template, where we could easily update the body without messing with code.
So I decided to write a email template in DOCS, set out a table, then have a script that copied the email template and updated the table with the row of data the script was looking at, then send it via email.
The code works great, but there is one little snag, the table never quite copies over to the email properly.
Below is are images of how the table is formatted in the email compared to the format in the template.
I just can not figure out how or why the format does not carry over.
I have also listed my code below, any help or advice on how I achieve the correct formatting would be appreciated.
UPDATE;
I have updated the question to show the code where we find the url of the document and convert to HTML,
var classArray=[];
//get html from Doc
var subject= row[30];
var forDriveScope = DriveApp.getStorageUsed(); //needed to get Drive Scope requested
var url = "https://docs.google.com/feeds/download/documents/export/Export?id="+newID+"&exportFormat=html";
var param = {
method : "get",
headers : {"Authorization": "Bearer " + ScriptApp.getOAuthToken()},
muteHttpExceptions:true,
};
var html = UrlFetchApp.fetch(url,param).getContentText();
//docs uses css in the head, but gmail only takes it inline. need to move css inline.
//DOES NOT HANDLE HEADER CLASSES (eg h1, h2).
var headEnd = html.indexOf("</head>");
//get everything between <head> and </head>, remove quotes
var head = html.substring(html.indexOf("<head>")+6,headEnd).replace(/"/g,"");
//split on .c# with any positive integer amount of #s
var regex = /\.c\d{1,}/;
var classes = head.split(regex);
//get class info and put in an array index by class num. EG c4{size:small} will put "size:small" in classArray[4]
var totalLength = 0;
for(var i = 1; i < classes.length; i++){
//assume the first string (classes[0]) isn't a class definition
totalLength = totalLength + classes[i-1].length;
var cNum = head.substring(totalLength+2,head.indexOf("{",totalLength)); //totallength+2 chops off .c, so get what's between .c and {
totalLength = totalLength + 2 + cNum.length //add .c and the number of digits in the num
classArray[cNum] = classes[i].substring(1,classes[i].indexOf("}")); //put what's between .c#{ and } in classArray[#]
}
//now we have the class definitions, let's put it in the html
html = html.substring(headEnd+7,html.indexOf("</html>")); //get everything between <html> and </html>
var classMatch = /class=\"(c\d{1,} ){0,}(c\d{1,})\"/g
//matches class="c# c#..." where c#[space] occurs any number of times, even zero times, and c#[no space] occurs after it, exactly once
html = html.replace(classMatch,replacer); //replace class="c# c#..." with the definitions in classArray[#]
//make the e-mail!
GmailApp.sendEmail(row[31], subject, "HTML is not enabled in your email client. Sad face!", {
htmlBody: html,
});
function replacer(match){
var csOnly = match.substring(7,match.length-1); //class=" has 7 chars, remove the last "
var cs = csOnly.split(" "); //get each c#
var ret = "style=\""
for(var cCount = 0; cCount < cs.length; cCount++){
ret = ret + classArray[cs[cCount].substring(1)];
}
return ret+"\"";
}
})
}
The comments in the code says that Gmail can only use inline styling. That was true several years ago but currently Gmail allows to have a style tag inside a head tag. Considering this, the script could be much more simple that the one included in the question.
Below there is a script showing a sample that sends a Google Document content as the HTML body of an email message.
/**
* Get document as HTML
* Adapted from https://stackoverflow.com/a/28503601/1595451
*/
function getGoogleDocumentAsHTML(id) {
var forDriveScope = DriveApp.getStorageUsed(); //needed to get Drive Scope requested
var url = "https://docs.google.com/feeds/download/documents/export/Export?id=" + id + "&exportFormat=html";
var param = {
method: "get",
headers: { "Authorization": "Bearer " + ScriptApp.getOAuthToken() },
muteHttpExceptions: true,
};
var html = UrlFetchApp.fetch(url, param).getContentText();
return html;
}
/**
* Send the content of a Google Document as the HTML body of a email message
*/
function sendEmail(){
const url = /* add here the URL of your Google Document */;
const id = url.match(/[^\/]{44}/)[0];
const doc = getGoogleDocumentAsHTML(id);
const head = doc
.replace(/<meta[^>]+?>/g,'') // get rid of the meta tags
.match(/<head.+?<\/head>/)[0];
const body = doc.match(/<body[^>]+?>.+<\/body>/)[0];
const htmlBody = [head,body].join('\n');
MailApp.sendEmail({
to: /*add here the recipient email address */,
subject: /*add here the email subject */,
htmlBody: htmlBody
})
}
NOTE: You might want to clear the class of the body tag to avoid the margins set for it.

Gmail apps script edit body before forwarding

I'm using Gmail apps script to forward emails with a certain label ... But I'd like to edit the body of the message prior to forwarding.
function fwdFB(){
var label = GmailApp.getUserLabelByName('fb');
var threads = label.getThreads();
for(var m in threads){
var messages = threads[m].getMessages();
for(var y in messages){ messages[y].forward('w#yahoo.com',{from:'webmaster#gmail.com'}) }
threads[m].moveToTrash();
}
}
I'm unclear as to how to insert the getBody() method to edit the message. (Specifically, I want to delete a certain HTML chunk from the message. It's the same chunk in each message.)
The options of forward(recipient, options) do not offer an endpoint to modify the message body. You'll have to do it instead manually, by sending a message after modifying the body as desired.
Sample:
function fwdFB(){
var label = GmailApp.getUserLabelByName('fb');
var threads = label.getThreads();
for(var m in threads){
var messages = threads[m].getMessages();
for(var y in messages){
var oldBody = messages[y].getBody();
var newBody = oldBody.substring(1, 10)+" PS: This is a modification."
var oldSubject = messages[y].getSubject();
var newSubject = "Fwd: "+ oldSubject;
GmailApp.sendEmail('w#yahoo.com', newSubject, newBody, {from:'webmaster#gmail.com'})
}
threads[m].moveToTrash();
}
}
UPDATE
If it is important for you to preserve the message history, you need to perform additional steps to thread the message.
#tehhowch provides a good example of how to do so.

GmailApp (Google Apps Script) Displays Inline Images as Attachments

Hello friendly StackOverflow folks,
I am having the most difficult time getting the GmailApp sendEmail() method to successfully use an existing email (e.g. a draft) that contains images inline as a template for new messages.
It seems like this is a problem, as I have found devs having this problem here and here and proposed solutions here, here, and here. Each of these solutions is 4+ years old, so perhaps they're out of date. I have been unable to use any of these solutions to replicate a success.
Currently, I'm running this code from my Google Scripts backend:
function generateMessageFromTemplate () {
var selectedTemplate = GmailApp.getMessageById('MESSAGE_ID');
//////////////////////////////////////////////////////////////////////////////
// Get inline images and make sure they stay as inline images (via Romain Vialard)
//////////////////////////////////////////////////////////////////////////////
var emailTemplate = selectedTemplate.getBody();
var rawContent = selectedTemplate.getRawContent();
var attachments = selectedTemplate.getAttachments();
var regMessageId = new RegExp(selectedTemplate.getId(), "g");
if (emailTemplate.match(regMessageId) != null) {
var inlineImages = {};
var nbrOfImg = emailTemplate.match(regMessageId).length;
var imgVars = emailTemplate.match(/<img[^>]+>/g);
var imgToReplace = [];
if(imgVars != null){
for (var i = 0; i < imgVars.length; i++) {
if (imgVars[i].search(regMessageId) != -1) {
var id = imgVars[i].match(/realattid=([^&]+)&/);
if (id != null) {
var temp = rawContent.split(id[1])[1];
temp = temp.substr(temp.lastIndexOf('Content-Type'));
var imgTitle = temp.match(/name="([^"]+)"/);
if (imgTitle != null) imgToReplace.push([imgTitle[1], imgVars[i], id[1]]);
}
}
}
}
for (var i = 0; i < imgToReplace.length; i++) {
for (var j = 0; j < attachments.length; j++) {
if(attachments[j].getName() == imgToReplace[i][0]) {
inlineImages[imgToReplace[i][2]] = attachments[j].copyBlob();
attachments.splice(j, 1);
var newImg = imgToReplace[i][1].replace(/src="[^\"]+\"/, "src=\"cid:" + imgToReplace[i][2] + "\"");
emailTemplate = emailTemplate.replace(imgToReplace[i][1], newImg);
}
}
}
}
//////////////////////////////////////////////////////////////////////////////
GmailApp.sendEmail('test#email.com', selectedTemplate.getSubject(), '', {
attachments: attachments,
htmlBody: emailTemplate,
inlineImages: inlineImages
});
};
The Google Scripts documentation on the sendEmail() method is here.
This is the Output of this Function as Is
When I send emails from Apps Script as is, I get emails that look like this:
screenshot
I've replicated the test with an old yahoo.com email account and had the exact same results as a Gmail account.
Again, this dev also has this same issue.
If you can help, I would be extremely grateful!

How can I get more details of attachment using Google Script?

I am trying to write a small Google Script to extract the details of my emails especially the attachments. In this example I am trying to get the 'user' and 'attachment' details in the Log window.
After running the code, I can see the 'attachment details' as :-
1) GmailAttachment - wherever attachment is present
2) Undefined - wherever attachment isn't present
I would like to check that how can I get more details of Attachment like Name of Attachment, Url of Attachment, type etc. is that possible through Google Scripting?
function testing1() {
Logger.log(Session.getActiveUser().getEmail());
var mail1 = GmailApp.getInboxThreads();
for(var x=0;x<mail1.length;x++){
var msg = mail1[x].getMessages();
for(var i=0;i<msg.length;i++){
var mesg = msg[i].getBody();
var att = msg[i].getAttachments()[0];
Logger.log(att);
}
}
}
Thanks in advance for your guidance.
Regards,
Alok
You can loop through attachments (if available) and get the file name and size from the GmailAttachment class.
for(var i=0;i<msg.length;i++){
var mesg = msg[i].getBody();
var att = msg[i].getAttachments();
for (var a=0; a<att.length; a++) {
Logger.log(att[1].getName());
Logger.log(att[1].getSize());
}
}

Error in Google Sheets Script when parsing XML

I have this function running in a Google Sheets script that pulls HTML from subreddits and returns them to a spreadsheet. It works for me some/most of the time, but other times I get an error "Could not parse text. (line 13)" which is the line with var doc = Xml.parse(page, true);. Any idea why this is happening or is this just a bug with Google Scripts? Here's the code that works...sometimes.
function getRedditHTML() {
var entries_array = [];
var subreddit_array = ['https://www.reddit.com/r/news/','https://www.reddit.com/r/funny/','https://www.reddit.com/r/science/'];
for (var s = 0; s < subreddit_array.length; s++) {
var page = UrlFetchApp.fetch(subreddit_array[s]);
//this is Line 13 that is breaking
var doc = Xml.parse(page, true);
var bodyHtml = doc.html.body.toXmlString();
doc = XmlService.parse(bodyHtml);
var root = doc.getRootElement();
var entries = getElementsByClassName(root,'thing');
for (var i = 0; i < entries.length; i++) {
var title = getElementsByClassName(entries[i],'title');
title = XmlService.getRawFormat().format(title[1]).replace(/<[^>]*>/g, "");
var link = getElementsByClassName(entries[i],'comments');
link = link[0].getAttribute('href').getValue();
var rank = getElementsByClassName(entries[i],'rank');
rank = rank[0].getValue();
var likes = getElementsByClassName(entries[i],'likes');
likes = likes[0].getValue();
entries_array.push([rank, likes, title, link]);
}
}
return entries_array.sort(function (a, b) {
return b[1] - a[1];
});
}
Here is what I found upon playing with importXML (my usual way of doing this) - for some reason I cannot narrow down - it DOES appear to randomly stall out and return null for a few minutes - so I'm guessing the issue with your thing is not the code but that the site or google temporarily blocks/won't return the data -
however I found the JSON endpoint to the piece you want - and I noticed that when XML went down - the JSON didnt.
You can take that and fix it to push your own array of topics/urls - I just left it for one link for now to show you how the URL breaks down and where it should be modified:
The URL is 'https://www.reddit.com/r/news/hot.json?raw_json=1&subredditName=news&sort=top&t=day&feature=link_preview&sr_detail=true&app=mweb-client
News is mentioned in 2 places so just modify all your URLs to follow that method - you can easily load that javascript in a browser to see all the fields available
Also the portion hot.json is where you can change whether you want the ranked list (called hot), or new,top,promoted, etc. you just change that keyword.
Score is the same as the upvotes/likes
function getSubReddit() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getActiveSheet(); //get Active sheet
var subject = 'news';
var url = 'https://www.reddit.com/r/' + subject + '/hot.json?raw_json=1&subredditName=' + subject + '&sort=top&t=day&feature=link_preview&sr_detail=true&app=mweb-client'; //json endpoint for data
var response = UrlFetchApp.fetch(url); // get api endpoint
var json = response.getContentText(); // get the response content as text
var redditData = JSON.parse(json); //parse text into json
Logger.log(redditData); //log data to logger to check
//create empty array to hold data points
var statsRows = [];
var date = new Date(); //create new date for timestamp
//The following lines push the parsed json into empty stats array
for (var j=0;j<25;j++){
for (var i =0;i<25;i++){
var stats=[];
stats.push(date);//timestamp
stats.push(i+1);
stats.push(redditData.data.children[i].data.score); //score
stats.push(redditData.data.children[i].data.title); //title
stats.push(redditData.data.children[i].data.url); //article url
// stats.push('http://www.reddit.com' + redditData.data.children[i].data.permalink); //reddit permalink
statsRows.push(stats)
}
//append the stats array to the active sheet
sheet.appendRow(statsRows[j])
}
}