Import new (latest) .csv file into existing Google spreadsheet automatically - google-apps-script

I receive a new CSV file every hour in my Google Drive.
I need my spreadsheet updated with the data in the latest CSV file after it has been received in the Google Drive folder.
The files coming into the folder has a unique name for each new one according to date and time.
For example: FileName_date_24hourtime.csv
FileName_20190524_1800.csv then FileName_20190524_1900.csv etc.
Firstly I'm not sure what the best approach is:
simply with a formula (probably not possible with not knowing the exact filename?) like =IMPORTDATA
a google script to find latest .csv file and automatically import as soon as file was added to Google Drive folder
Any assistance will be great!
The .csv file:
.csv file contains 28 rows and data should be split by ;
.csv file looks like this:
NAME;-63.06;-58.08;50.62;-66.67;-80.00
NAME;-61.82;-56.83;-50.55;-77.78;-70.00
NAME;-57.77;-50.21;52.88;-77.78;-70.00
NAME1;-57.69;-61.48;-55.59;-55.56;-60.00
NAME2;-61.62;-53.79;50.34;-66.67;-70.00
NAME3;-54.62;-54.57;-52.22;55.56;-60.00
... with total of 28 rows
Data should go to "Import_Stats" sheet.

The best approach here would be a script with a trigger that runs a function that performs data import to a spreadsheet.
Create a time-based trigger with 1-hour offset:
function trigger() {
var trg = ScriptApp.newTrigger('checkFiles');
trg.timeBased().everyHours(1).create();
}
Create function that checks files in a folder (e.g. "checkFiles").
function checkFiles(alreadyWaited) {
//get spreadsheet and sheet;
var id = 'yourSpreadsheetId';
var ss = SpreadsheetApp.openById(id);
var sh = ss.getSheetByName('Import_Stats');
var folderId = 'yourIdHere'; //folder by id is the simplest way;
//get folder and files in it;
var folder = DriveApp.getFolderById(folderId);
var files = folder.getFilesByType('text/csv');
var filesImport = folder.getFilesByType('text/csv'); //fetch files again;
//try to fetch number of files;
var scriptProps = PropertiesService.getScriptProperties();
var numFiles = scriptProps.getProperty('numFiles');
//count current number of files;
var numCurr = 0;
while(files.hasNext()) {
var f = files.next();
numCurr++;
}
//if this is the first time, save current number;
if(numFiles===null) {
scriptProps.setProperty('numFiles',numCurr);
}else {
numFiles = parseInt(numFiles);
}
if(numFiles===null||numFiles===(numCurr-1)) {
//get today and reset everything except hours;
var today = new Date();
today.setMinutes(0);
today.setSeconds(0);
today.setMilliseconds(0);
//iterate over files;
while(files.hasNext()) {
var file = files.next();
//get file creation date and reset;
var created = file.getDateCreated();
created.setMinutes(0);
created.setSeconds(0);
created.setMilliseconds(0);
//calculate offset, equals 0 for each file created this hour;
var offset = today.valueOf()-created.valueOf();
if(offset===0) {
//perform data import here;
var data = file.getBlob().getDataAsString();
//ignore empty files;
if(data!=='') {
//split data in rows;
var arr = data.split('\r\n');
//resplit array if only one row;
if(arr.length===1) {
arr = data.split('\n');
}
//append rows with data to sheet;
arr.forEach(function(el){
el = el.split(';');
sh.appendRow(el);
});
}
}
}
}else {
//if never waited, set minute to wait, else add minute;
if(!alreadyWaited) {
alreadyWaited = 60000;
}else {
alreadyWaited += alreadyWaited;
}
//if waited for 10 minutes -> end recursion;
if(alreadyWaited===600000) {
Logger.log('Waited 10 minutes but recieved no files!');
return;
}
//wait a minute and recheck;
Utilities.sleep(60000);
return checkFiles(alreadyWaited);
}
}
And this is what should happen:

Related

How can i remove the old converted file and replace with a new file using app script?

I am trying to replace the old converted files with the new file. This is because every time i run the script it keeps on duplicating and multiplying in the same folder.
Here is the code:
function ConvertFiles() {
var sheet =
SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Sheet1");
var r= 2;
for(r= 2;r < sheet.getLastRow(); r++){
var fileId= sheet.getRange(r,1).getValues();
var folderID = sheet.getRange(r,8).getValues();
Logger.log(fileId);
var files = DriveApp.getFileById(fileId);
var name = files.getName().split('.')[0];
var blob = files.getBlob();
var destinationFolderId = DriveApp.getFolderById(folderID);
Logger.log(folderID);
var newFile = {
title : name + '_converted', parents: [{id:
destinationFolderId.getId()}]};
Logger.log(newFile);
}
}
My goal is:
To replace/update the old converted file into the latest one everytime the script runs (if it has the same filename)
I would like to push back the converted fileId into the google sheet to be displayed.
How can i solve this issue?
I have added comments to show each part of the code. Kindly check the whole script below:
Script:
function ConvertFiles() {
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Sheet1");
var r= 2;
for(r= 2;r < sheet.getLastRow(); r++){
// Use getValue instead of getValues
var fileId = sheet.getRange(r,1).getValue();
var folderID = sheet.getRange(r,8).getValue();
var files = DriveApp.getFileById(fileId);
var name = files.getName().split('.')[0];
var blob = files.getBlob();
var newFile = {
// Remove '_converted' from name if existing to avoid duplication of the string before adding '_converted'
// This will allow to have newly converted file "replace" the old converted file properly
title: name.replace('_converted','') + '_converted',
parents: [{
id: folderID
}]
};
var destinationFolderId = DriveApp.getFolderById(folderID);
var existingFiles = destinationFolderId.getFilesByName(newFile.title);
// GOAL #1: To replace/update the old converted file into the latest one everytime the script runs (if it has the same filename)
// Find the file with same name of the file to be converted
while(existingFiles.hasNext()) {
// ID of the file with same converted name
var oldConvertedFileWithSameNameID = existingFiles.next().getId();
// Delete before writing
Drive.Files.remove(oldConvertedFileWithSameNameID);
}
// Create new converted file then get ID
var newFileID = Drive.Files.insert(newFile, blob, {
convert: true
}).id;
// Goal #2: I would like to push back the converted fileId into the google sheet to be displayed.
// Add the ID of the converted file
sheet.getRange(r,9).setValue(newFileID);
}
}
Sample sheet:
Sample files:
First run (files):
First run (sheet):
Updated original files:
Run after updating original file (files):
Run after updating original file (sheet):
Note:
This will retain the original file, but will replace the existing converted file of it everytime the script is run.
I have used Drive services.

Copy files from one folder to another on Drive with Apps Script

Team members upload content (regardless of file type) into a folder on Drive. I need to copy this content into another folder automatically with a trigger, and be able to move it around from there.
I cannot use a "MoveFile" function as I am not the owner of the original content.
I have already tried to copy files automatically into the destination folder, and this works, using the code below:
function CopyFiles() {
var srcFldr = DriveApp.getFolderById("***ID***");
var srcFiles = srcFldr.getFiles();
var desFldr = DriveApp.getFolderById("***ID***");
var desFiles = desFldr.getFiles();
var dfnA = [];
while (desFiles.hasNext()) {
var df = desFiles.next();
dfnA.push(df.getName());
}
while (srcFiles.hasNext()) {
var sf = srcFiles.next();
if (dfnA.indexOf(sf.getName()) == -1) {
sf.makeCopy(sf.getName(), desFldr);
}
}
}
However, I need to move this copied content into other files throughout the day, yet every time I do, the same file gets copied back into the destination folder above with the new trigger, creating a permanent loop.
Is there a way of either:
moving the files from the original source folder despite not being the owner of those files?
copying contents only once, upon upload or modification?
Or 3) another, better, smarter way of doing this?
Thanks for your help!
I'd suggest the following workflow:
For every file that is copied to the destination folder, store the fileId. You could use Properties Service for this.
When copying files from one folder to the other, check the fileId has not been stored before.
Code snippet:
function CopyFiles() {
var srcFldr = DriveApp.getFolderById("***ID***");
var srcFiles = srcFldr.getFiles();
var desFldr = DriveApp.getFolderById("***ID***");
var desFiles = desFldr.getFiles();
var dfnA = [];
var key = "fileIDs";
var scriptProperties = PropertiesService.getScriptProperties();
var property = scriptProperties.getProperty(key); // Retrieve fileIDs property
// Get array of fileId, or empty array if no file has been copied before:
var arrayIDs = property ? JSON.parse(property) : [];
while (desFiles.hasNext()) {
var df = desFiles.next();
dfnA.push(df.getName());
}
while (srcFiles.hasNext()) {
var sf = srcFiles.next();
// Check not only file name, but also whether fileId has been stored before:
if (dfnA.indexOf(sf.getName()) == -1 && arrayIDs.indexOf(sf.getId()) == - 1) {
sf.makeCopy(sf.getName(), desFldr);
arrayIDs.push(sf.getId()); // Add fileId to array of IDs
}
}
scriptProperties.setProperty(key, JSON.stringify(arrayIDs)); // Store updated array
}
Reference:
Properties.getProperty(key)
Properties.setProperty(key, value)

App script for pdf converter exceeded maximum execution time

This is my app script code that converts google docs in a folder into pdf format. The script stops after converting around 60 documents with maximum execution time error. I am converting around hundreds of files in a run. What can I do to avoid this error?
//Module to convert doc to pdf
function gdocToPDF() {
var documentRootfolder = DriveApp.getFolderById("xx") // replace this with the ID of the folder that contains the documents you want to convert
var pdfFolder = DriveApp.getFolderById("xx"); // replace this with the ID of the folder that the PDFs should be put in.
var documentRootFiles = documentRootfolder.getFiles()
while(documentRootFiles.hasNext()) {
createPDF(documentRootFiles.next().getId(), pdfFolder.getId(), function (fileID, folderID) {
if (fileID) createPDFfile(fileID, folderID);
})
}
}
function createPDF(fileID, folderID, callback) {
var templateFile = DriveApp.getFileById(fileID);
var templateName = templateFile.getName();
var existingPDFs = DriveApp.getFolderById(folderID).getFiles();
//in case no files exist
if (!existingPDFs.hasNext()) {
return callback(fileID, folderID);
}
for (; existingPDFs.hasNext();) {
var existingPDFfile = existingPDFs.next();
var existingPDFfileName = existingPDFfile.getName();
if (existingPDFfileName == templateName + ".pdf") {
Logger.log("PDF exists already. No PDF created")
return callback();
}
if (!existingPDFs.hasNext()) {
Logger.log("PDF is created")
return callback(fileID, folderID)
}
}
}
function createPDFfile(fileID, folderID) {
var templateFile = DriveApp.getFileById(fileID);
var folder = DriveApp.getFolderById(folderID);
var theBlob = templateFile.getBlob().getAs('application/pdf');
var newPDFFile = folder.createFile(theBlob);
var fileName = templateFile.getName().replace(".", ""); //otherwise filename will be shortened after full stop
newPDFFile.setName(fileName + ".pdf");
}
I generate a lot of files. What I do to avoid this error is open a dialog and run a function that creates one file at a time. I write the number of remaining files in a cell in the sheet, update it with every run. I use the success and failure handlers to decide whether to continue with file generation or close the dialog. You can generate files for hours in this way.

Delete file if its name starts with a date older than the current one (Google Drive)

With help from the Stackoverflow community, I was able to create this script that deletes files that are over 3 hours old:
function getOldFileIDs() {
// Old date is 3 Hours
var oldDate = new Date().getTime() - 3600*1000*3;
var cutOffDate = new Date(oldDate).toISOString();
// Get folderID using the URL on google drive
var folder = DriveApp.getFolderById('XXXXXXXXXXXXXXXXXXXXX');
var files = folder.searchFiles('modifiedDate < "' + cutOffDate + '"');
var obj = [];
while (files.hasNext()) {
var file = files.next();
obj.push({id: file.getId(), date: file.getDateCreated(), owner: file.getOwner().getEmail()}); // Modified
}
obj.sort(function(a, b) {
var a= new Date(a.date).valueOf();
var b= new Date(b.date).valueOf();
return b-a;
});
obj.shift();
return obj; // Modified
};
function deleteFiles() {
var email = "XXXXXXXXXXXXXXXXX#gmail.com"; // Added
var obj = getOldFileIDs(); // Modified
obj.forEach(function(e) { // Modified
if (e.owner == email) { // Added
Drive.Files.remove(e.id); // Modified
}
});
};
File names in this folder always start with a date, such as:
2019/10/05 SerieA Vasco da Gama x Flamengo.pdf
I would like a help to know what I need to modify in the script and what it would look like so that instead of deleting when it has more than 3 hours of creation, it from it files that have a date earlier than the current.
I tried to modify .getdate but it was totally unsuccessful, I couldn't make the script parse the beginning of the file name, I could only find the file name and its id.
There are the files with the filename like 2019/10/05 SerieA Vasco da Gama x Flamengo.pdf.
The date string and the format are always 10 characters from the top of the filename and yyyy/MM/dd, respectively.
You want to delete the files which has old date string in the filename before the today.
In this case, more than 3 hours of creation is ignored. So modifiedDate is also ignored.
You want to achieve this using Google Apps Script.
If my understanding is correct, how about this modification? Please think of this as just one of several answers. The flow of this modified script is as follows.
Flow:
Retrieve all files in the folder.
For each file, retrieve the filename and the date string.
Convert the date string in the filename to the date object. And it is compared with today. When the date in the filename is older than today, the file information is added to the array for deleting.
Delete the files from the retrieved file information
Modified script:
When your script is modified, please modify the function of getOldFileIDs() as follows.
function getOldFileIDs() {
var folder = DriveApp.getFolderById('XXXXXXXXXXXXXXXXXXXXX');
var files = folder.getFiles();
var today = new Date(new Date().toDateString()).getTime();
var obj = [];
while (files.hasNext()) {
var file = files.next();
var dateFromFilename = file.getName().substr(0, 10);
if (/\d{4}\/\d{2}\/\d{2}/.test(dateFromFilename)) { // Here, the format of date string is checked.
var date = new Date(dateFromFilename).getTime();
if (date < today) {
obj.push({id: file.getId(), date: file.getDateCreated(), owner: file.getOwner().getEmail()});
}
}
}
return obj;
};
References:
toDateString()
test()
If I misunderstood your question and this was not the result you want, I apologize.

List all files and folder in google drive

I've been trying to figure this out for a while now. I hope I can get some guidance on this. The purpose of the following script is to get a full list of folders and files with subfolders and their files included.
Here is what I currently have:
var counter = 0
var files = folder.getFiles();
var subfolders = folder.getFolders();
var folderPath = folder.getName();
while (subfolders.hasNext()){
subfolder = subfolders.next();
var row = [];
//row.push(subfolder.getName(),'',subfolder.getId(),subfolder.getUrl(),subfolder.getSize(),subfolder.getDateCreated(),subfolder.getLastUpdated());
//list.push(row);
if(counter > 0){
var files = subfolder.getFiles();
}
while (files.hasNext()){
file = files.next();
var vals = file.getUrl();
var row = [];
if(counter == 0){
row.push(folder.getName(),file.getName(),file.getId(),file.getUrl(),file.getSize(),file.getDateCreated(),file.getLastUpdated())
}else{
row.push(folderPath + '/' + subfolder.getName(),file.getName(),file.getId(),file.getUrl(),file.getSize(),file.getDateCreated(),file.getLastUpdated())
}
list.push(row);
}
counter = counter + 1
}
It currently gets the folder names and file names for the current folder and it's subfolder. It doesn't go any further than that. I'm stuck trying to figure out how to get a loop going to continue until there are no more sub-folders.
It isn't a very big drive. There are less than 10 levels but would like the flexibility to go further if needed.
Recursion is beneficial in this case. The code below calls the recursive method recurseFolder() which takes a Folder and Array as a parameter. It adds all the files in the folder to a list, then calls itself on any subfolders it finds.
function test(){
var root = DriveApp.getRootFolder();
var list = [];
var list = recurseFolder(root, list);
Logger.log(JSON.stringify(list));
//This is just how I am testing the outputed list. You can do what you need.
var sheet = SpreadsheetApp.getActiveSheet();
list.forEach(function (row){
sheet.appendRow(row);
});
}
function recurseFolder(folder, list){
var files = folder.getFiles();
var subfolders = folder.getFolders();
while (files.hasNext()){ //add all the files to our list first.
var file = files.next();
var row = [];
Logger.log("File: " + folder.getName());
row.push(folder.getName(),file.getName(),file.getId(),file.getUrl(),file.getSize(),file.getDateCreated(),file.getLastUpdated())
list.push(row);
}
while (subfolders.hasNext()){ //Recurse through child folders.
subfolder = subfolders.next();
Logger.log("Folder: " + subfolder.getName());
list = recurseFolder(subfolder, list); //Past the original list in so it stays a 2D Array suitible for inserting into a range.
}
return list;
}
I'm not sure if the output is formatted how you intended so you might need to play with it a little. Note: It will easily time out if run on a larger Drive.
You need a function that will navigate the folder structure recursively, meaning that if it runs into a subfolder within a folder, it will call itself again passing that folder as a new parent.
function listFolders(parentFolderId) {
var sourceFolder = DriveApp.getFolderById(parentFolderId) || DriveApp.getRootFolder();
var folders = sourceFolder.getFolders();
var files = sourceFolder.getFiles();
while (files.hasNext()) {
var file = files.next();
//Do something
}
while (folders.hasNext()) {
var folder = folders.next();
listFolders(folder.getId());
}
}
Note that this function will still time out if you have lots of files in your Drive, in which case you need to store the state of your app using PropertiesService and schedule the function to run again using triggers via the ScriptApp. You can achieve this by saving the continuation token for your Files Iterator between script executions
More on ContinuationToken