Extract text from several Google docs into Google Spreadsheet - google-apps-script

I am trying to extract the text from each Google document in a folder in Drive and paste the text into the first column of a Google spreadsheet so that the contents of file 1 are in A1, the contents of file 2 in A2 etc. Ultimately I am trying to recreate a database of the information stored in all these files, so if the text can be split by field so much the better, but I think this should be trivial in Excel using Text to Columns.
I have used a few snippets online to have a stab at it but I'm now stumped.
Here is my script as it stands:
//Function to extract the body from each document in a folder and copy it to a spreadsheet
function extract() {
//Define the folder we're working with ("Communication Passports") and get the file list
var folder = DocsList.getFolder("Communication Passports");
var contents = folder.getFiles();
//Define the destination spreadsheet file (CP) and set up the sheet to receive the data
var ss = SpreadsheetApp.openById("0AicdFGdf-Cx5dHFTX1R3Wm1RTEFTZ2d5ZmxuSjJSOHc");
SpreadsheetApp.setActiveSpreadsheet(ss);
Logger.log('File name: ' + ss.getName());
var sheet = SpreadsheetApp.getActiveSheet();
sheet.clear();
sheet.appendRow(["Name", "Date", "Contents", "URL", "Download", "Description"]);
//Set up other variables
var file;
var data;
//Loop through and collect the data (I don't actually need this - just borrowed the code from a snippet online - but it is SO CLOSE!)
//Sadly, getBody doesn't work on files, only on documents
for (var i = 0; i < contents.length; i++) {
file = contents[i];
data = [
file.getName(),
file.getDateCreated(),
file.getViewers(),
file.getUrl(),
"https://docs.google.com/uc?export=download&confirm=no_antivirus&id=" + file.getId(),
file.getDescription()
];
sheet.appendRow(data);
//Extract the text from the file (this doesn't work at present, but is what I actually need)
var doc = DocumentApp.openById(file.getId());
var body = doc.getBody();
//Find a way to paste the extracted body text to the spreadsheet
}
};
Any help would be very gratefully received - I'm not a programmer, I'm a teacher and the information is about children's learning needs at our school (someone deleted the database over summer and our backups only go back a month!).
Thanks,
Simon

Try to add:
var doc = DocumentApp.openById(file.getId());
body = doc.getBody().getText();
to return the actual contents of the document.
I wrote another function to parse the content into more usable chunks and then pass back to an entry in the data table and it worked fine.

Related

Appscript - Multiple excel files conversion to gsheets and updating data without creating a new gsheet

sorry for bad english.
I have a folder with multiple excel files that I need in Gsheets format. I also need to update the data (converting the same file again and again), but the output must remain the same. (the GSHEET id for example).
I actually have this code but this works for 1 xlsx file.
function importData() {
var xlsxName = "QM12.XLSX"; //Change source file name accordingly
var convertID = convert(xlsxName).toString();
var xLSX = SpreadsheetApp.openById(convertID).getSheetByName("Sheet1"); // SaĆ­da
var ss = SpreadsheetApp.openById("15GvvEYH8zuHI6cmhNxHqBWywrqQU32t6kaAbAJcclBk").getSheetByName("QM12 CGH"); // entrada
var lastColumn = xLSX.getLastColumn();
var lastRow = xLSX.getLastRow();
ss.getRange(1, 1, lastRow, lastColumn).setValues(xLSX.getDataRange().getValues()); //Sets values from converted xlsx data to output sheet
//DriveApp.getFileById(convertID).setTrashed(true); //deletes temporary file
Drive.Files.remove(convertID)
}
function convert(xlsxName) {
var files = DriveApp.getFilesByName("xlsxName");
var excelFile = (files.hasNext()) ? files.next() : null;
var blob = excelFile.getBlob();
var config = {
title: "[Converted File] " + excelFile.getName(), //sets the title of the converted file
parents: [{ id: excelFile.getParents().next().getId() }],
mimeType: MimeType.GOOGLE_SHEETS
};
var spreadsheet = Drive.Files.insert(config, blob);
return (spreadsheet.id); //Returns the ID of the converted file
}
I would like to use this for multiple xlsx.
Basically I need to:
-> FolderA with xlsx files. FolderB as the destination for the gsheets converted files.
-> If there is no gsheet with the same name as the xlsx file, it should create a new gsheet but if there it is a gsheet with the same name, just update the data on it.
I've been searching but couldn't find anything close to this.
Anyway, thank you in advance.

Using Class CellImageBuilder to import Image into Google Sheets cell

On the 19th January 2022, the CellImageBuilder class was added to the Google Sheets Spreadsheet service.
This class now allows you to import an image into a cell in Google Sheets, previously you could only add an image above the cell.
Ive been trying to use this new class to take a URL link from a Google sheet, and then create an image in the cell next to the URL. This works perfectly fine if I can hard code the URL into the script (example below)
**function insertImageIntoCell()**
{
var sheet = SpreadsheetApp.getActiveSheet();
var url='Google_Docs_Image_URL'
let image = SpreadsheetApp.newCellImage().setSourceUrl(url).setAltTextDescription('TestImage').toBuilder().build();
SpreadsheetApp.getActive().getActiveSheet().getRange('C2').setValue(image);
}
The problem I am having is that once I create an array to iterate through the column the below script creates a valid array and posts it into the correct column and rows, but when it posts it back into the spreadsheet it only returns the URL and does not convert it into an image in a cell
**function insertImageIntoCell()**
{
var sheet = SpreadsheetApp.getActiveSheet();
var myStringArray = sheet.getRange('B2:B10');
var myStringArray = sheet.getRange('B2:B10').getValues();
//Logger.log(myStringArray)
let image = SpreadsheetApp.newCellImage().setSourceUrl(myStringArray).setAltTextDescription('test').toBuilder().build();
SpreadsheetApp.getActive().getActiveSheet().getRange('C2:C10').setValues(myStringArray);
}
Im using the followign code to create the initial table of data, this pull the file name and DownloadURL from a Google Drive location and then saves this into a sheet
/* modified from #hubgit and http://stackoverflow.com/questions/30328636/google-apps-script-count-files-in-folder
for this stackexchange question http://webapps.stackexchange.com/questions/86081/insert-image-from-google-drive-into-google-sheets by #twoodwar
*/
function listFilesInFolder(folderName) {
var sheet = SpreadsheetApp.getActiveSheet();
sheet.appendRow(["Name","URL","Image"]);
//change the folder ID below to reflect your folder's ID (look in the URL when you're in your folder)
var folder = DriveApp.getFolderById("Google_Drive_Folder");
var contents = folder.getFiles();
let image=[];
var cnt = 0;
var file;
while (contents.hasNext()) {
var file = contents.next();
cnt++;
data = [
file.getName(),
file.getDownloadUrl(),
];
sheet.appendRow(data);
};
};
I am looking for the script to refresh the file information from Google Drive into sheets, then to save the image into a cell, it now appears that this functionality exists, but Im not able to get it to take an array of URL's
Suggestion
Perhaps you can try this sample implementation below.
Sample Tweaked Script
function listFilesInFolder(folderName){
var sheet = SpreadsheetApp.getActiveSheet();
sheet.appendRow(["Name","URL","Image"]);
//change the folder ID below to reflect your folder's ID (look in the URL when you're in your folder)
var folder = DriveApp.getFolderById("DRIVE_FOLDER_ID");
var contents = folder.getFiles();
let image=[];
var cnt = 0;
var file;
while (contents.hasNext()) {
var file = contents.next();
cnt++;
data = [
file.getName(),
file.getDownloadUrl(),
];
sheet.appendRow(data);
};
insertImageIntoCell(); //Insert the images on column C
};
function insertImageIntoCell(){
var sheet = SpreadsheetApp.getActiveSheet();
var row = 1;
sheet.getDataRange().getValues().forEach(url =>{
if(url[1] == "URL")return row += 1;
let image = SpreadsheetApp.newCellImage().setSourceUrl(url[1]).setAltTextDescription('TestImage').toBuilder().build();
SpreadsheetApp.getActive().getActiveSheet().getRange('C'+row).setValue(image);
row += 1;
});
}
Sample Drive Folder with sample images
Sample Result:
After running the function listFilesInFolder:
Update:
This issue is filed. Add a star to this issue for Google developers to prioritize fixing this.
Issue:
setValues() is NOT working with CellImage, while setValue() does.
If/when it starts working, You need to convert each value to cellImage using map :
function insertImagesIntoCell() {
const sheet = SpreadsheetApp.getActiveSheet(),
range = sheet.getRange('B2:B10');
range.setValues(
range.getValues().map(url => [
SpreadsheetApp.newCellImage()
.setSourceUrl(url[0])
.build(),
])
);
}
For anyone struggling with importing images from Google Drive you should know that you have to set the "Sharing" setting on every individual file for CellImageBuilder to work properly.
Like this:
const imageFileUrl = imageFolder.getFilesByName(filename).next()
.setSharing(DriveApp.Access.ANYONE_WITH_LINK, DriveApp.Permission.VIEW)
.getDownloadUrl();
const cellImage = SpreadsheetApp.newCellImage().setSourceUrl(imageFileUrl).build();
Additionally, there appears to be a rate limit on the drive download URLs, causing the '.build()' function to fail randomly on a valid URL. Retries might be necessary.
Also, the .toBuilder() call on CellImageBuilder is completely redundant.

File not found when using Drive.Files.get(path)

I am trying to automate attaching and sending emails using Gscripts. Each recipient has his own unique attachment so I have a Google sheet containing the details:
Column A contains the email addresses.
Column B contains the message.
Column D contains the filename of the attachment along with the file extension.
Here's my code:
function sendEmails() {
var sheet = SpreadsheetApp.getActiveSheet();
var rowEmails = sheet.getRange(1,1,3);
var rowMessage = sheet.getRange(1,2,3);
var rowAttachments = sheet.getRange(1,4,3);
var vEmails = rowEmails.getValues();
var vMessage = rowMessage.getValues();
var vAttachments = rowAttachments.getValues()
for (i in vEmails) {
var emailAddress = vEmails[1] + '';
var message = vMessage[1];
var subject = "Test"
var path = 'certs\\' + vAttachments[i]
var file = Drive.Files.get(path);
MailApp.sendEmail(emailAddress, subject, vMessage[1], {attachments: files});
}
}
Everything works well but when it comes to var file = Drive.Files.get(path), I get an error saying no file is found. I checked my drive and I'm sure it is there. I've also checked the Drive API. I don't know what is wrong.
Something like this might be a little closer to working:
function sendEmails() {
var sheet=SpreadsheetApp.getActiveSheet();
var Emails=sheet.getRange(1,1,3).getValues();
var Message=sheet.getRange(1,2,3).getValues();
var fileids=sheet.getRange(1,3,3).getValues();//you need to add file ids
Emails.forEach(function(r,i){
MailApp.sendEmail(Emails[i][0],"Test",Message[i][0],{attachments:[DriveApp.getFileById(fileids[i][0])]});
});
}
The code in the question has several flaws:
path : Google Drive doesn't use "path"
for in / vAttachments[i]: a different property name is assigned to the variable on each iteration but vAttachments hasn't named properties, so vAttachments[i] will return null on each iteration.
The files property on the options should be an Array.
How to "fix" the script
The way to directly get files in Google Drive is by using file ids.
If you want to get files by "path", first your script should get the folder, then look for the file inside that folder but bear in mind that getting a folder/file by name returns a folder/file iterator
Instead of for in use for of or use for (most of the scripts I have seen usen use for)

Getting Google Sheet URL from Name

I have a google sheet file that needs to find the URL of another google sheet where only its name is known (I need its URL so I can retrieve the data externally through other apps).
To simplify this, I save both g-sheets files under the same folder in Google Drive. I understand I can do this within the same sheet as below, how can I do this when the "ss" refers to another google sheet file and I only know its name.
var ss = SpreadsheetApp.getActiveSpreadsheet();
Logger.log(ss.getUrl());
Ideally I want to do it through a cell formula but will be ok if I need to add a function to "script editor".
I got it working till here where it runs ok from the Script editor:
function listFiles() {
var baseFolderObject = DriveApp.getFolderById('0B38VWNq67cFHZVdWcE5KREZXS1U');
var results = [];
var types = [MimeType.GOOGLE_SHEETS]; //, MimeType.GOOGLE_DOCS];
for (var t in types) {
var files = baseFolderObject.getFilesByType(types[t]);
var file, data, sheet = SpreadsheetApp.getActiveSheet();
sheet.appendRow(["Name", "Date", "Size", "URL", "Download", "Description", "Type"]);
while (files.hasNext()) {
var file = files.next();
results.push(file);
data = [
file.getName(),
file.getDateCreated(),
file.getSize(),
file.getUrl(),
file.getId()
];
sheet.appendRow(data);
}
}
}
If you have the id of the folder both spreadsheets are in, why not run a query on the folder and filter out the file you are looking for by name.. Once you have that file, then grab it's id.. With the ID you can then parse it into a google spreadsheet URL. Hope this helps.

Import zip data into google spreadsheet

I am trying to fetch a zip from a URL and import it automatically into a Google Spreadsheet.
The zip contains one file of CSV data.
I know I can import the CSV data into a Google Spreadsheet but I would like to be able to cut out the step of me having to download the zip and extract the file first before uploading it into a Google Spreadsheet.
So my queries, is it possible to import a zipped CSV file from a URL directly into a Google Spreadsheet? If not, how would this be done with a Google Apps Script?
To answer your question, no, you cannot directly import a Zip file containing a CSV directly into a spreadsheet. To answer your second question:
This question is kind of broad and needs to be broken down into three bits.
Retrieving the Zip from the URL
Extracting the CSV from the Zip
Inserting the CSV into your sheet
I'll briefly cover each of these three areas to get you going in the right direction, I am not going to write out a full fledged solution for you. I provide some code examples to make it easier for you to get going, this is not meant to be an end-to-end solution.
Retrieving the Zip from the URL
You can do this with the URLFetchApp Service.
Something like:
var urlData = UrlFetchApp.fetch(link);
var zipBlob = urlData.getBlob();
var files = Utilities.unzip(blob);
Extracting the CSV from the Zip
You need to get the contents of the ZIP file, find the CSV file in the ZIP, then parse it as a CSV into an array . In my example I use regex to escape the CSV as the Apps Script CSV parser is buggy.
function GetCSVFromZip(zipBlob){
var files = Utilities.unzip(zipBlob);
var csvAttachment = FindBlobByName(files, 'myPartialName');
if(csvAttachment !== -1){
var dataString = csvAttachment.getDataAsString();
var escapedString = dataString.replace(/(?=["'])(?:"[^"\\]*(?:\\[\s\S][^"\\]*)*"|'[^'\\]\r\n(?:\\[\s\S][^'\\]\r\n)*')/g, '\r\n'); //http://stackoverflow.com/a/29452781/3547347
var csv = Utilities.parseCsv(escapedString);
}
}
//Finds a blob by a partial name match, assumes no multiple matches
function FindBlobByName(blob, name){
for(var i = 0; i < blob.length; i++){
var blobName = blob[i].getName();
var regex = new RegExp(name, 'i');
var result = blobName.match(regex);
if(result){
return blob[i];
}
}
return -1;
}
Inserting the CSV into your sheet
You need to use the SpreadsheetApp Service for this. Get your spreadsheet, get a data range, and set it's values to your CSV array. Something along these lines:
var sheet = SpreadsheetApp.openById(id).getSheetByName(name);
var range = sheet.getRange(1, 1, csv.length, csv[0].length);
range.setValues(csv);
Using Douglas's answer, I managed to simplify it and read the zip from the link directly to spreadsheets. Here is my code:
function testzip(){
var url = "url goes here"
var zipblob = UrlFetchApp.fetch(url).getBlob();
var unzipblob = Utilities.unzip(zipblob);
var unzipstr=unzipblob[0].getDataAsString();
var csv = Utilities.parseCsv(unzipstr);
var ss = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('Sheet1');
ss.getRange(1, 1, csv.length, csv[0].length).setValues(csv);
}
Heading