Gulp remove duplicates if exists - gulp

Is it possible to remove files with same name from source? For example, let's say I have the following folder structure
a
---file1.txt
---file2.txt
---file3.txt
b
---file1.txt
When I select both folder in source I want in destination folder only file that aren't duplicates. In example above result would be
result
---file2.txt
---file3.txt
Optional, it would be great if I could duplicates somehow filter and write in separate folder.
By duplicates, I mean explicitly duplicates by name, file content is not important.

It took me awhile to get there but try this:
var gulp = require('gulp');
var fs = require('fs');
var path = require('path');
var flatten = require('gulp-flatten');
var filter =  require('gulp-filter');
var folders = ['a', 'b', 'c']; // I just hard-coded your folders here
// this function is called by filter for each file in the above folders
// it should return false if the file is a duplicate, i.e., occurs
// in at least two folders
function isUnique(file) {
console.dir(file.history[0]); // just for fun
var baseName = file.history[0].split(path.sep);
baseName = baseName[baseName.length - 1];
// var fileParents = '././';
var fileParents = '.' + path.sep + '.' + path.sep;
var count = 0;
folders.forEach(function (folder) {
if (fs.existsSync(fileParents + folder + path.sep + baseName)) count++;
// could quit forEach when count >= 2 if there were a lot of folders/files
// but there is no way to break out of a forEach
});
if (count >= 2) { // the file is a duplicate
fs.unlinkSync(file.history[0]); // remove from 'Result' directory
return false;
}
else return true;
}
gulp.task('default', ['clump'], function () {
// create a filter to remove duplicates
const f = filter(function (file) { return isUnique(file); }, {restore: true, passthrough: false} );
const stream = gulp.src('./result/*.txt')
.pipe(f); // actually do the filtering here
f.restore.pipe(gulp.dest('duplicates')); // new stream with the removed duplicates
return stream;
});
// 'clump' runs first
// gathers all files into result directory
gulp.task('clump', function () {
return gulp.src('./**/*.txt')    
.pipe(flatten()) // because the original folder structure in not wanted
.pipe(gulp.dest('result'));
});
Run it with 'gulp'. The default task will trigger the 'clump' task first.
Since your OP didn't require that any particular version of duplicated files be kept - like the newest or whatever - I haven't worried about that here. If in the 'Result' folder you want each version of a duplicated file, such as file1.txt (version from one folder) and file1.txt (from another folder) but obviously must be renamed to something that could be done in the 'clump' task.
Let me know if this works for you.

Related

Is it possible to get the names of all the folders and create from each folder a custom script file for that folder

I made a code that renames multiple files in Google Drive (taken from Here) and here is the content
function renamejpgs() {
/*
// A given Goiogle Drive folder contains jpg files.
// The files have a consistent naming structure:
// "AA123_y.jpg, where y is a single or multi-digit sequence number.
//
// The function renames the files by removing the third, fourth, fifth and sixth characters of the file name and substituting a single"underscore"
// For example, "AA123_1.jpg" -> "AA_1.jpg"
*/
// set the mime type/file type to be renamed
var mimetype = 'image/jpeg';
// get the folder ID; note the id is a string
var folderid = "<<insert your folder ID>>"
// getFoldersById = Gets a specific folders in the user's Drive
var folder = DriveApp.getFolderById(folderid)
// get files in this folder
// myfiles is a File Iterator
var myfiles = folder.getFiles();
// loop through files in this folder
while (myfiles.hasNext()) {
var myfile = myfiles.next();
var myname = myfile.getName();
var ftype = myfile.getMimeType();
// find the next file that matches the mime type
var indexOfFirst = ftype.indexOf(mimetype);
if (indexOfFirst != -1){
// the next file was an image
// edit the file name
var fname = myname.replace("123_", "_");
// update the file name
myfile.setName(fname);
} //end if
} // end while loop through main folder
return false;
}
My problem is that it only works on the root folder and not on subfolders
I tried to ask in this place if subfolders could be included and they helped me change the name there. But then there was a problem of a longer time than the Google limit.
My question is Is it possible to get the names of all the folders and create from each folder a custom script file for that folder in Google Drive
function renamejpgs() {
/*
// A given Goiogle Drive folder contains jpg files.
// The files have a consistent naming structure:
// "AA123_y.jpg, where y is a single or multi-digit sequence number.
//
// The function renames the files by removing the third, fourth, fifth and sixth characters of the file name and substituting a single"underscore"
// For example, "AA123_1.jpg" -> "AA_1.jpg"
*/
// set the mime type/file type to be renamed
var mimetype = 'image/jpeg';
// get the folder ID; note the id is a string
var folderid = "<<insert your folder ID>>"
// getFoldersById = Gets a specific folders in the user's Drive
var folder = DriveApp.getFolderById(folderid)
// get files in this folder
// myfiles is a File Iterator
var myfiles = folder.getFiles();
// loop through files in this folder
while (myfiles.hasNext()) {
var myfile = myfiles.next();
var myname = myfile.getName();
var ftype = myfile.getMimeType();
// find the next file that matches the mime type
var indexOfFirst = ftype.indexOf(mimetype);
Utilities.sleep(10);
if (indexOfFirst != -1){
// the next file was an image
// edit the file name
var fname = myname.replace("123_", "_");
// update the file name
myfile.setName(fname);
} //end if
} // end while loop through main folder
return false;
}
I believe this will help stop the code from timing out.
I added a sleep function to pause it and stop it from exhaustion.
This is my basis of research for using the sleep function: https://webapps.stackexchange.com/questions/116041/how-to-set-up-delays-in-google-apps-script
In this implementation, I've decided to use Drive API service on Apps Scripts in order to use the filter functionality (q parameter) of Drive API queries.
Given that, make sure to add Drive API on the service list on your Apps Scripts project.
The code below will "scan" for a file name that includes MATCH_PATTERN set on a given starting folder ID including sub-folders.
Make sure to customize the constants in the beginning to adapt to your use case.
The function main() is a sample start point for this script.
Sample Code:
const MATCH_PATTERN = "[replace]"; //file name pattern to be renamed
const REPLACE_PATTERN = "[123456789]"; //pattern to replace MATCH_PATTERN from file name
const MIMETYPE_TARGET_FILES = "image/jpeg"; //mimetype of target files
function listFoldersIn(parentId = "root") {
/**
* Lists sub-folders in the user's Drive for a given folder ID.
* Slightly modified script from https://developers.google.com/apps-script/advanced/drive#listing_folders
*/
var query = '"'+ parentId +'" in parents and trashed = false and ' +
'mimeType = "application/vnd.google-apps.folder"';
var folders;
var pageToken;
var folderIdList = [];
do {
folders = Drive.Files.list({
q: query,
maxResults: 100,
pageToken: pageToken
});
if (folders.items && folders.items.length > 0) {
for (var i = 0; i < folders.items.length; i++) {
var folder = folders.items[i];
folderIdList.push(folder.id);
}
}
pageToken = folders.nextPageToken;
} while (pageToken);
return folderIdList;
};
function listTargetFilesForParent(parentId = "root", mime = MIMETYPE_TARGET_FILES) {
/**
* Lists files on a given folder ID matching the MIME type set.
* Slightly modified script from https://developers.google.com/apps-script/advanced/drive#listing_folders
*/
var query = '"'+ parentId +'" in parents and trashed = false and ' +
'mimeType = "'+ mime +'"';
var files;
var pageToken;
var fileIdList = [];
do {
files = Drive.Files.list({
q: query,
maxResults: 100,
pageToken: pageToken
});
if (files.items && files.items.length > 0) {
for (var i = 0; i < files.items.length; i++) {
var file = files.items[i];
fileIdList.push({title: file.title, id: file.id});
}
}
pageToken = files.nextPageToken;
} while (pageToken);
return fileIdList;
};
function renameFilesInFolders(folderList) {
if (folderList.length > 0) { //if folder ID list provided is empty, exit method;
for (folder of folderList) { //loop for folder ID list provided.
//Fetch target files in current folder
var filesList = listTargetFilesForParent(parentId = folder);
renameTargetFiles(filesList); // Rename files if applicable;
//Check for sub folders
var subFolders = listFoldersIn(folder);
if (subFolders.length > 0) { //It has subfolders?
renameFilesInFolders(subFolders); //call this same method recursively
} else { //if current folder does not have sub-folders, then do nothing and continue loop;
continue;
}
}
} else {
return;
}
}
function renameTargetFiles(filesList){
for (file of filesList) { //loop through file list provided
if (file.title.includes(MATCH_PATTERN)) { //is current file applicable to be renamed? In other words, does the current file name contain the MATCH_PATTERN?
var newName = file.title.replace(MATCH_PATTERN, REPLACE_PATTERN); //replace MATCH_PATTERN from file name for REPLACE_PATTERN
Drive.Files.patch({title: newName}, file.id); //Call Drive API patch to modify file name.
}
}
}
function main() {
/**
* Make sure to set up constants in the beginning of this file to adapt to your use case
* When calling renameFilesInFolders() without passing parameters, it will scan starting from the root of My Drive.
* Passing an array of a single Drive Folder ID, sets the root level for the scan
*/
renameFilesInFolders(["<STARTING_FOLDER_ID>"]);
}

Google Apps - How to get all files name from current directory?

How I can get all files name from current directory?
I have a code
function showAllFollderFronRoot() {
// get all files from the ROOT folder
var files = parentFolder;
while (files.hasNext()) {
var file = files.next();
// Logger.log(file.getName());
DocumentApp.getUi().alert(file.getName());
}
}
But it work only with ROOT dir.
How I can get all file names in array in current dir?
UPDATE:
I have a file structure: MT/MT.100-107/MT.100-1007.1001.doc
I need make code, if somebody open New Template from Docs - script need automatically safe this file with true structure - with next filename + 1 (example MT.100-1007.1002.doc, next new file from Template - MT.100-1007.1003.doc ...)
Script need to find all filenames => show last bigger count (1002.doc) => count + 1 => save this file with new filename MT.100-1007.1003.doc
My code work, but it make tmp file in Root dir & not work perfect, because it not calculate last bigger count in current dir and if I delete file, example MT.100-1007.1003.doc in dir MT, and make new file in dir UA - count be MT.100-1007.1004.doc no matter what the names of the files are in the folder UA.
These script with mistakes, how I can fix it?
/**
* #OnlyCurrentDoc
*/
function saveFilename() {
// Get current file name
const ui = DocumentApp.getUi(),
doc = DocumentApp.getActiveDocument(), //Added
thisFileId = doc.getId(),
thisFileName = doc.getName();
const thisFile = DriveApp.getFileById(thisFileId);//Modified from getFolderById
const parentFolder = thisFile.getParents();
const currentFolder = parentFolder.next();//Modified from currentFolderName
const currentFolderName = currentFolder.getName();//Added
//ui.alert(currentFolderName);
/*Store a init file in root to getLatestFileNumber*/
var initIter = DriveApp.getFilesByName(currentFolderName + 'init00'),
initBool = initIter.hasNext(),
init;
if (!initBool) {
init = DriveApp.createFile(currentFolderName + 'init000', '0');
} else {
init = initIter.next();
}
/*Get current Number and format it to 4 digits*/
var currentNum = init.getBlob().getDataAsString() * 1 + 1,
formatNum = ('0000' + currentNum).substr(-3);
/*If filename already contains folderName, do nothing*/
if (!(thisFileName.search(currentFolderName) + 1)) {
doc.setName(currentFolderName +'.' + formatNum).saveAndClose();
init.setContent(currentNum);
}
// delete TMP file from ROOT dir
DriveApp.getFileById(init.getId()).setTrashed(true)
}
You want to retrieve filenames of all files in the parent folder of the active document.
If my understanding is correct, how about this answer?
Flow:
The flow of this script is as follows.
Retrieve file ID of the active document.
Retrieve parent folder ID of the active document.
Retrieve files in the parent folder ID.
Retrieve filenames of files.
Modified script:
function showAllFollderFronRoot() {
var fileId = DocumentApp.getActiveDocument().getId();
var parentFolderId = DriveApp.getFileById(fileId).getParents().next().getId();
var files = DriveApp.getFolderById(parentFolderId).getFiles();
while (files.hasNext()) {
var file = files.next();
Logger.log(file.getName())
}
}
Note:
This sample script supposes as follows.
The parent of the active document is only one.
All files in the parent folder of the active document are retrieved. But the folders are not retrieved.
References:
getId()
getParents()
getFolderById(id)
getFiles()
If I misunderstand your question, please tell me. I would like to modify it.
I fix this problem, but idk how to parse last 4 digits in filename and find MAX from it. Do you have any idea? method slice(4) not working in apps script :(
function currFiles() {
const ui = DocumentApp.getUi(),
doc = DocumentApp.getActiveDocument(),
thisFileId = doc.getId(),
thisFileName = doc.getName();
const thisFile = DriveApp.getFileById(thisFileId);
const parentFolder = thisFile.getParents();
const currentFolder = parentFolder.next();
const currentFolderName = currentFolder.getName();
const currentFolderId = currentFolder.getId();
// get all files in currentFolder
var folderId = currentFolderId;
// Log the name of every file in the folder.
var files = DriveApp.getFolderById(folderId).getFiles();
while (files.hasNext()) {
var file = files.next();
Logger.log(file.getName());
}
}

Gulp generate manifest files for existing files

I'm trying to automatically generate manifest files with gulp, but can't find how to get filename, modify it and send it forward through pipe.
var fs = require('fs');
var content = 'test';
gulp.src('./wwwroot/**/*.file')
.pipe(fs.writeFileSync(??? , content))
.pipe(gulp.dest('./wwwroot/')); // should be same as original file
Where ??? on 4th line is, I'd like to have filename.file.manifest.
Code above is more of an idea, since gulp.dest and fs both write file.
Not with pipes, but nonetheless a nice solution using node glob
var content = 'test'
glob("./wwwroot/**/*.file", null, function(er, files) {
if(er)
return;
files.forEach(function(element) {
var manifestFile = element + '.manifest';
fs.writeFileSync(manifestFile, content);
console.log("Generated: " + element);
});
})

Iterate Through Folders/Subfolders/Files in Google Drive

I have a Google Drive structure setup like this:
Client A
--Project 1
----Drafts
----Presentations
----Minutes
--Project 2
----Drafts
----Presentations
----Minutes
Client B
<and so on>
I want to write a script where I can pass in the ID of Project 1, and it will loop through each of the three subfolders and count the total number of files (ultimately I want to count the number of files updated over a date range, but baby steps!).
I've tried using the Class FolderIterator code, but that wants you to assign a group of folders to a variable "folders" (it showed var folders = DriveApp.getFolders(); and I tried var folders = DriveApp.getFolderById("<id of Project 1 here>"); but in my version that's just one folder, and it really needs the collection of folders). Is there a way to assign those three subfolders to that folders variable if I have the ID of the parent folder? Or is there a better way to loop through the subfolders?
As a complement to Bryan's answer (thx, I upvoted) I wrote a test function to see results in the logger and to run the function with appropriate parameters to get the count you wanted.
(This answer should not be selected as the right answer since the main part is from Bryan P)
here is how it goes :
function testTraverse(){
var originFolder = DriveApp.getFoldersByName('Client A').next(); // use the folder name you want here
var totalCount = traverseFolder(originFolder,0);
Logger.log('total files = '+totalCount);// note that if some files are
// in more than one subfolder the count will count every occurrence of
//this file so the total might be > to the sum of intermediate values
}
function traverseFolder(folder,total) {
var name = folder.getName();
var count = 0;
var files = folder.getFiles();
while (files.hasNext()) {
count++;
Logger.log('fileName ('+count+') in '+name+' is : '+files.next().getName());
}
Logger.log('\n'+name+' has ' + count+' files\n----------------------\n' );
var subs = folder.getFolders();
while (subs.hasNext()) {
total+=traverseFolder(subs.next(),count);
}
return total;
}
Note that this code will work for a "reasonable" number of files, if you have many files and folders it might exceed the 5 minutes execution time limit. In this case you'll need to modify it in order to proceed by smaller bunches of files using the token provided by the method.
It may look some thing like this with DriveApp...
function traverseFolder(folder) {
var name = folder.getName();
if ( /^(Drafts|Presentations|Minutes)$/.test(name) ) {
var count = 0;
var files = folder.getFiles();
while ( files.hasNext() ) {
count++;
}
Logger.log(name + ' : ' + count);
}
var subs = folder.getFolders();
while (subs.hasNext()) {
traverseFolder(subs.next());
}
}
We now have access to the Advanced Drive Service too. Maybe I'll look at an example with that if some one doesn't post one soon.

How do I locate the path of the folder in which the current script file is located?

How do I locate the path of the current folder? I just want to be able to get the path of the folder so that I can manipulate the files in the folder without typing the path into the scripts.
For a Spreadsheet I found this to work:
thisFileId = SpreadsheetApp.getActive().getId();
var thisFile = DriveApp.getFileById(thisFileId);
var parentFolder = thisFile.getParents()[0].getName();
Thanks to Corey's answer and to Thomas'one, here is a "full featured" version that shows the folder tree in the logger and every parents id as well... just for fun ;-)
function getScriptFolderTree() {
var thisScript = getThisScriptInDrive();
var names = []
var Ids = []
var folder = thisScript.getParents()[0];
while (folder.getName() != "Root"){
names.unshift(folder.getName());
Ids.unshift(folder.getId());
var parents = folder.getParents();
var folder = parents[0];
}
Logger.log('Root/'+names.join().replace(/,/g,'/'))
Ids.unshift(DriveApp.getRootFolder().getId())
Logger.log(Ids)
}
function getThisScriptInDrive() {
return DriveApp.getFileById("poiuytrezazertyujhgfdsdcvcxyydryfhchfh");
}
(ID's are truncated intentionally)
Note that this script is working nicely but it strangely stops working if the 'random string' is modified... I imagine that the search engine in drive doesn't like the change but I have no serious explanation (comments welcome) but after a few minutes it works again ;-)
Add a function like this to your script
function getThisScriptInDrive() {
return DriveApp.find("some unique string that wont be anywhere else")[0];
}
This will search Drive and find this script itself because it contains that string - right there in the function call! - no need to declare it anywhere else. As long as you use an obscure enough string - i'd recommend mashing a few hundred chars on your keyboard - it will be unique across drive and therefore just work.
Once you have a File for this script, you can call getParents() etc.
I tweaked Serge's code to write a generic function
function getThisFilePath() {
//let thisFile = DriveApp.getFileById(DocumentApp.getActiveDocument().getId()); // for a Doc
let thisFile = DriveApp.getFileById(SpreadsheetApp.getActiveSpreadsheet().getId()); // for a Sheet
let path = {name: '', folder: []};
let folder = thisFile.getParents().hasNext() ? thisFile.getParents().next() : undefined;
while (folder) {
path.folder.unshift({name: folder.getName(), id: folder.getId()});
folder = folder.getParents().hasNext() ? folder.getParents().next() : undefined;
}
path.folder.forEach(folder => path.name += '/' + folder.name);
Logger.log(path.name);
return path;
}
Logging output
11:02:57 AM Info /My Drive/Technology/Development/2022
You could do this:
function myFunction() {
var thisScript = getThisScriptInDrive();
var folder = thisScript.getParents()[0];
while (folder.getName() != "Root"){
var parents = folder.getParents();
for (var i in parents){
var folder = parents[i];
Logger.log(folder.getName());
}
}
}
function getThisScriptInDrive() {
return DocsList.find("`<jj!?=(<DW+.W/m7SBF:sgu/#B(&Cs3:{ajA~ys#KmN4&]ujhpZ~z[Tv?+dk}MpK,8pY=w&dny8N'74:.9H:~uCgY=7pRt4[Tn5")[0];
}
This only works good if the folder has only 1 parent because it only takes 1 path.
edit: Thanks to Corey G
I was able to get the folder containing the script that was running:
//
// PrintScriptFolder -- print the name of the folder in which the running script resides.
//
function PrintScriptFolder()
{
var scriptId = ScriptApp.getScriptId();
console.info('scriptId = ' + scriptId);
var file = DriveApp.getFileById(scriptId);
var folders = file.getParents();
if (folders.hasNext())
{
var folder = folders.next();
var name = folder.getName();
console.info('script folder name = ' + name);
}
}
The trick is using ScriptApp to get the ID of the running script. From there it's pretty straightforward: use DriveApp to get the file ID, getParents() to get a list of (one) parent folder, next() to get it, and getName() to get its name.
First answer, long time lurker (be kind ;-)
Looking for a simple getPath, I went for a more generic approach, with limitations:
//simple test stub
function test_getFilePath() {
const scriptId = ScriptApp.getScriptId();
const fullPath = getFilePath(scriptId);
console.log(fullPath);
}
/**
* getFilePath
* #fileId id of file to find path for
*
* NB: google allows for multiple parents, which is a PITA
* so I'm just taking the first one I find up the tree
*/
function getFilePath(fileId) {
const file = DriveApp.getFileById(fileId);
const rootFolder = DriveApp.getRootFolder()
const rootFolderId = rootFolder.getId();
const rootFolderName = rootFolder.getName();
let folder = file.getParents().next();
let folderNames = [];
while (folder.getId() !== rootFolderId){
folderNames.unshift(folder.getName());
folder = folder.getParents().next();
}
const fullPath = rootFolderName+'/'+folderNames.join().replace(/,/g,'/');
return fullPath;
}
console:
11:36:40 AM Info My Drive/GAS
Had I more time/inclination, I'd have writen a recursive function to capture and return an array of possible parent paths, but there's no use case for me and who the hell wants to keep a the same file in multiple folders anyway??
(I'm sure there are use cases, eg, equivalent to using symlinks, but it stil makes me feel dirty ;-)