How to split a stream in gulp - gulp

Basically I have a set of files that I process using markdown and what not. After doing this initial processing, I'd like to split the stream into two:
First, 1..1 mapping with additional processing like layout
Secondly, mapping all the files into one, like index, without the layouts applied above
Is it ok to save the stream into a variable and just keep piping? Here's my current task:
gulp.task('default', function() {
var entries = gulp.src('./log/*.md')
.pipe(frontMatter())
.pipe(markdown());
var templated = entries
.pipe(applyTemplate())
.pipe(gulp.dest('./build/log'));
var index = entries
.pipe(index())
.pipe(applyIndexTemplate())
.pipe(gulp.dest('./build'));
return merge(templated, index);
}
I could use lazypipe and/or just construct the pipe multiple times, but is there another way?

According to the Node.js docs, "multiple destinations can be piped to safely" and the original example is correct:
var entries = gulp.src('./log/*.md')
.pipe(frontMatter())
.pipe(markdown());
var templated = entries
.pipe(applyTemplate())
.pipe(gulp.dest('./build/log'));
var index = entries
.pipe(index())
.pipe(applyIndexTemplate())
.pipe(gulp.dest('./build'));
return merge(templated, index);

var gulpClone = require("gulp-clone");
var eventStream = require('event-stream');
var entries = gulp.src('./log/*.md')
.pipe(frontMatter())
.pipe(markdown());
var templated = entries
.pipe(gulpClone())
.pipe(applyTemplate())
.pipe(gulp.dest('./build/log'));
var index = entries
.pipe(gulpClone())
.pipe(index())
.pipe(applyIndexTemplate())
.pipe(gulp.dest('./build'));
return eventStream.merge(templated, index);

Related

XmlService.parse() not able to handle HTML tables

I am looking for help from this community regarding the below issue.
// I am searching my Gmail inbox for a specific email
function getWeeklyEmail() {
var emailFilter = 'newer_than:7d AND label:inbox AND "Report: Launchpad filter"';
var threads = GmailApp.search(emailFilter, 0, 5);
var messages=[];
threads.forEach(function(threads)
{
messages.push(threads.getMessages()[0]);
});
return messages;
}
// Trying to parse the HTML table contained within the email
function getParsedMsg() {
var messages = getWeeklyEmail();
var msgbody = messages[0].getBody();
var doc = XmlService.parse(msgbody);
var html = doc.getRootElement();
var tables = doc.getDescendants();
var templ = HtmlService.createTemplateFromFile('Messages1');
templ.tables = [];
return templ.evaluate();
}
The debugger crashes when I try to step over the XmlService.parse function. The msgbody of the email contains both text and HTML formatted table. I am getting the following error: TypeError: Cannot read property 'getBody' of undefined (line 19, file "Code")
If I remove the getParsedMsg function and instead just display the content of the email, I get the email body along with the element tags etc in html format.
Workaround
Hi ! The issue you are experiencing is due to (as you previously mentioned) XmlService only recognising canonical XML rather than HTML. One possible workaround to solve this issue is to search in the string you are obtaining with getBody() for your desired tags.
In your case your main issue is var doc = XmlService.parse(msgbody);. To solve it you could iterate through the whole string looking for the table tags you need using Javascript search method. Here is an example piece of code retrieving an email with a single table:
function getWeeklyEmail() {
var emailFilter = 'newer_than:7d AND label:inbox AND "Report: Launchpad filter"';
var threads = GmailApp.search(emailFilter, 0, 5);
var messages=[];
threads.forEach(function(threads)
{
messages.push(threads.getMessages()[0]);
});
return messages;
}
// Trying to parse the HTML table contained within the email
function getParsedMsg() {
var messages = getWeeklyEmail();
var msgbody = messages[0].getBody();
var indexOrigin = msgbody.search('<table');
var indexEnd = msgbody.search('</table');
// Get what is in between those indexes of the string.
// I am adding 8 as it indexEnd only gets the first index of </table
// i.e the one before <
var Table = msgbody.substring(indexOrigin,indexEnd+8);
Logger.log(Table);
}
If you are looking for more than one table in your message, you can change getParsedMsg to the following:
function getParsedMsg() {
// If you are not sure about how many you would be expecting, use an approximate number
var totalTables = 2;
var messages = getWeeklyEmail();
var msgbody = messages[0].getBody();
var indexOrigin = msgbody.indexOf('<table');
var indexEnd = msgbody.indexOf('</table');
var Table = []
for(i=0;i<totalTables;i++){
// go over each stable and store their strings in elements of an array
var start = msgbody.indexOf('<table', (indexOrigin + i))
var end = msgbody.indexOf('</table', (indexEnd + i))
Table.push(msgbody.substring(start,end+8));
}
Logger.log(Table);
}
This will let you store each table in an element of an array. If you want to use these you would just need to retrieve the elements of this array and use them accordingly (for exaple to use them as HTML tables.
I hope this has helped you. Let me know if you need anything else or if you did not understood something. :)

Downloading a SQL database as a CSV instead of select rows

I used a function to download some selections from a Lists object as a CSV. I'd like to do the same for the entire datasource connected to the Lists object, but am unsure of how to scale this to work with, as an example, 100,000 rows. On the button to download the List data as a CSV I have:
var rows = widget.root.descendants.MainTableBody.children._values;
var csvdata = [];
csvdata.push([["Email"],["Last Login"],["Sku ID"],["Sku Name"],["Docs Added Recently"]]);
for (var i in rows) {
var t = [];
t.push(rows[i].children.EmailField.text);
t.push(rows[i].children.LastLoginField.text);
t.push(rows[i].children.SkuIdField.text);
t.push(rows[i].children.SkuNameField.text);
t.push(rows[i].children.DocsAddedField.text);
csvdata.push(t);
}
console.log(csvdata);
exportToCsv("LMexport",csvdata);
The export function is taken from this answer.I basically need the rows var to cover the entire table, but that's a lot of data.
The schema of the datasource in question:
and the calculation used:
Here's what the table looks like in the UI for reference:
Records retrieved from query are array of objects
You can use .reduce to convert them to csv
Snippet:
function exportCsv(){
var query = app.models.Employees.newQuery();
//query.filters.... use query used to populate the datasource
var records = query.run(); //[{},{},{}...]
var csv = records.reduce(function(str, rec){
var email = rec.Email;
var login = rec.LastLogin;
var skuId = rec.SkuId;
return str + '"' + [email,login,skuId].join('","') + '"\n'; // result: "email","login","skuId"\n
},'');
return csv;
}

Gulp remove duplicates if exists

Is it possible to remove files with same name from source? For example, let's say I have the following folder structure
a
---file1.txt
---file2.txt
---file3.txt
b
---file1.txt
When I select both folder in source I want in destination folder only file that aren't duplicates. In example above result would be
result
---file2.txt
---file3.txt
Optional, it would be great if I could duplicates somehow filter and write in separate folder.
By duplicates, I mean explicitly duplicates by name, file content is not important.
It took me awhile to get there but try this:
var gulp = require('gulp');
var fs = require('fs');
var path = require('path');
var flatten = require('gulp-flatten');
var filter =  require('gulp-filter');
var folders = ['a', 'b', 'c']; // I just hard-coded your folders here
// this function is called by filter for each file in the above folders
// it should return false if the file is a duplicate, i.e., occurs
// in at least two folders
function isUnique(file) {
console.dir(file.history[0]); // just for fun
var baseName = file.history[0].split(path.sep);
baseName = baseName[baseName.length - 1];
// var fileParents = '././';
var fileParents = '.' + path.sep + '.' + path.sep;
var count = 0;
folders.forEach(function (folder) {
if (fs.existsSync(fileParents + folder + path.sep + baseName)) count++;
// could quit forEach when count >= 2 if there were a lot of folders/files
// but there is no way to break out of a forEach
});
if (count >= 2) { // the file is a duplicate
fs.unlinkSync(file.history[0]); // remove from 'Result' directory
return false;
}
else return true;
}
gulp.task('default', ['clump'], function () {
// create a filter to remove duplicates
const f = filter(function (file) { return isUnique(file); }, {restore: true, passthrough: false} );
const stream = gulp.src('./result/*.txt')
.pipe(f); // actually do the filtering here
f.restore.pipe(gulp.dest('duplicates')); // new stream with the removed duplicates
return stream;
});
// 'clump' runs first
// gathers all files into result directory
gulp.task('clump', function () {
return gulp.src('./**/*.txt')    
.pipe(flatten()) // because the original folder structure in not wanted
.pipe(gulp.dest('result'));
});
Run it with 'gulp'. The default task will trigger the 'clump' task first.
Since your OP didn't require that any particular version of duplicated files be kept - like the newest or whatever - I haven't worried about that here. If in the 'Result' folder you want each version of a duplicated file, such as file1.txt (version from one folder) and file1.txt (from another folder) but obviously must be renamed to something that could be done in the 'clump' task.
Let me know if this works for you.

What alternative to ScriptDB I could use to store a big array of arrays? (without using external DB)

I was a user of the deprecated ScriptDB. The use I made of ScriptDB was fairly simple: to store a certain amount of information contained on a panel options, this way:
var db = ScriptDb.getMyDb();
function showList(folderID) {
var folder = DocsList.getFolderById(folderID);
var files = folder.getFiles();
var arrayList = [];
for (var file in files) {
file = files[file];
var thesesName = file.getName();
var thesesId = file.getId();
var thesesDoc = DocumentApp.openById(thesesId);
for (var child = 0; child < thesesDoc.getNumChildren(); child++){
var thesesFirstParagraph = thesesDoc.getChild(child);
var thesesType = thesesFirstParagraph.getText();
if (thesesType != ''){
var newArray = [thesesName, thesesType, thesesId];
arrayList.push(newArray);
break;
}
}
}
arrayList.sort();
var result = db.query({arrayName: 'savedArray'});
if (result.hasNext()) {
var savedArray = result.next();
savedArray.arrayValue = arrayList;
db.save(savedArray);
}
else {
var record = db.save({arrayName: "savedArray", arrayValue:arrayList});
}
var mydoc = SpreadsheetApp.getActiveSpreadsheet();
var app = UiApp.createApplication().setWidth(550).setHeight(450);
var panel = app.createVerticalPanel()
.setId('panel');
var label = app.createLabel("Choose the options").setStyleAttribute("fontSize", 18);
app.add(label);
panel.add(app.createHidden('checkbox_total', arrayList.length));
for(var i = 0; i < arrayList.length; i++){
var checkbox = app.createCheckBox().setName('checkbox_isChecked_'+i).setText(arrayList[i][0]);
panel.add(checkbox);
}
var handler = app.createServerHandler('submit').addCallbackElement(panel);
panel.add(app.createButton('Submit', handler));
var scroll = app.createScrollPanel().setPixelSize(500, 400);
scroll.add(panel);
app.add(scroll);
mydoc.show(app);
}
function include(arr, obj) {
for(var i=0; i<arr.length; i++) {
if (arr[i] == obj) // if we find a match, return true
return true; }
return false; // if we got here, there was no match, so return false
}
function submit(e){
var scriptDbObject = db.query({arrayName: "savedArray"});
var result = scriptDbObject.next();
var arrayList = result.arrayValue;
db.remove(result);
// continues...
}
I thought I could simply replace the ScriptDB by userProperties (using JSON to turn the array into string). However, an error warns me that my piece of information is too large to be stored in userProperties.
I did not want to use external databases (parse or MongoDB), because I think it isn't necessary for my (simple) purpose.
So, what solution I could use as a replacement to ScriptDB?
You could store a string using the HtmlOutput Class.
var output = HtmlService.createHtmlOutput('<b>Hello, world!</b>');
output.append('<p>Hello again, world.</p>');
Logger.log(output.getContent());
Google Documentation - HtmlOutput
There are methods to append, clear and get the content out of the HtmlOutput object.
OR
Maybe create a Blob:
Google Documentation - Utilities Class - newBlob Method
Then you can get the data out of the blob as a string.
getDataAsString
Then if you need to you can convert the string to an object if it's in the right JSON format.
Firstly, if you're hitting the limits on the Properties service, I would recommend you look at an alternative external store, as you're manipulating a large amount of data, and any workaround given here is possibly going to be slower and less efficient then simply using a dedicated service.
Alternatively of course, you could look at making your data come under the limits for the properties service by splitting it up and using multiple properties etc.
One other alternative would be to use a Google Doc or Sheet to store the string. When you're required to pull the data again, you can simply access the sheet and get the string, but this might be slow depending on the size of the string. At a glance it looks like you're just pulling Data on the folders in your drive, so you could consider writing it to a sheet, which would allow you to even display the information in a user friendly way. Given your use of arrays already, you can write them to a sheet easily using .setValues() if you convert them to a 2D array.
Bruce McPherson has done a lot of work on abstracting databases. Take a look at his cDbAbstraction library then you could easily chop and change which DB you use and compare performance. Maybe even create a cDbAbstraction library to use HTMLOutput (I like that idea Sandy, Bruce does some funky stuff with parallel processes via HTMLService)

How do you query ScriptDb for partial matches?

I tried using RegEx and it did not return any results:
function findRecord() {
var db = ScriptDb.getMyDb();
var toFind = /Quality/i;
var results = db.query({companyName: toFind});
while (results.hasNext()) {
var result = results.next();
Logger.log(Utilities.jsonStringify(result));
}
}
From what I can see, ScriptDb's query() will only return exact matches for strings.
The only way I can see is to return the entire database and then iterate through it. I really hope there is a way to query partial matches.
Try iterating over the results using the match method
function testQuery() {
var db = ScriptDb.getMyDb();
var results = db.query({});
var start = new Date();
while (results.hasNext()) {
var result = results.next();
if (result.companyName.match(/qual.*/i)){
Logger.log(Utilities.jsonStringify(result));
}
}
var endTime = new Date();
Logger.log("time is " + (endTime.getTime() - start.getTime()) + "ms");
}
ScriptDb currently doesn't support partial matches in strings. Depending on the data you may be able to use the anyOf method:
var results = db.query({
companyName: db.anyOf(['Quality', 'quality'])
});
I don't think that is possible. You may open an "enhancement request" on the issue tracker.
But depending on your usage, it may be possible to achieve your goal if you structured your database differently, probably creating some kind of "tag" category properties for your objects, that you set beforehand, i.e. when adding the object to the database, so you can query on it later.