How do you query ScriptDb for partial matches? - google-apps-script

I tried using RegEx and it did not return any results:
function findRecord() {
var db = ScriptDb.getMyDb();
var toFind = /Quality/i;
var results = db.query({companyName: toFind});
while (results.hasNext()) {
var result = results.next();
Logger.log(Utilities.jsonStringify(result));
}
}
From what I can see, ScriptDb's query() will only return exact matches for strings.
The only way I can see is to return the entire database and then iterate through it. I really hope there is a way to query partial matches.

Try iterating over the results using the match method
function testQuery() {
var db = ScriptDb.getMyDb();
var results = db.query({});
var start = new Date();
while (results.hasNext()) {
var result = results.next();
if (result.companyName.match(/qual.*/i)){
Logger.log(Utilities.jsonStringify(result));
}
}
var endTime = new Date();
Logger.log("time is " + (endTime.getTime() - start.getTime()) + "ms");
}

ScriptDb currently doesn't support partial matches in strings. Depending on the data you may be able to use the anyOf method:
var results = db.query({
companyName: db.anyOf(['Quality', 'quality'])
});

I don't think that is possible. You may open an "enhancement request" on the issue tracker.
But depending on your usage, it may be possible to achieve your goal if you structured your database differently, probably creating some kind of "tag" category properties for your objects, that you set beforehand, i.e. when adding the object to the database, so you can query on it later.

Related

XmlService.parse() not able to handle HTML tables

I am looking for help from this community regarding the below issue.
// I am searching my Gmail inbox for a specific email
function getWeeklyEmail() {
var emailFilter = 'newer_than:7d AND label:inbox AND "Report: Launchpad filter"';
var threads = GmailApp.search(emailFilter, 0, 5);
var messages=[];
threads.forEach(function(threads)
{
messages.push(threads.getMessages()[0]);
});
return messages;
}
// Trying to parse the HTML table contained within the email
function getParsedMsg() {
var messages = getWeeklyEmail();
var msgbody = messages[0].getBody();
var doc = XmlService.parse(msgbody);
var html = doc.getRootElement();
var tables = doc.getDescendants();
var templ = HtmlService.createTemplateFromFile('Messages1');
templ.tables = [];
return templ.evaluate();
}
The debugger crashes when I try to step over the XmlService.parse function. The msgbody of the email contains both text and HTML formatted table. I am getting the following error: TypeError: Cannot read property 'getBody' of undefined (line 19, file "Code")
If I remove the getParsedMsg function and instead just display the content of the email, I get the email body along with the element tags etc in html format.
Workaround
Hi ! The issue you are experiencing is due to (as you previously mentioned) XmlService only recognising canonical XML rather than HTML. One possible workaround to solve this issue is to search in the string you are obtaining with getBody() for your desired tags.
In your case your main issue is var doc = XmlService.parse(msgbody);. To solve it you could iterate through the whole string looking for the table tags you need using Javascript search method. Here is an example piece of code retrieving an email with a single table:
function getWeeklyEmail() {
var emailFilter = 'newer_than:7d AND label:inbox AND "Report: Launchpad filter"';
var threads = GmailApp.search(emailFilter, 0, 5);
var messages=[];
threads.forEach(function(threads)
{
messages.push(threads.getMessages()[0]);
});
return messages;
}
// Trying to parse the HTML table contained within the email
function getParsedMsg() {
var messages = getWeeklyEmail();
var msgbody = messages[0].getBody();
var indexOrigin = msgbody.search('<table');
var indexEnd = msgbody.search('</table');
// Get what is in between those indexes of the string.
// I am adding 8 as it indexEnd only gets the first index of </table
// i.e the one before <
var Table = msgbody.substring(indexOrigin,indexEnd+8);
Logger.log(Table);
}
If you are looking for more than one table in your message, you can change getParsedMsg to the following:
function getParsedMsg() {
// If you are not sure about how many you would be expecting, use an approximate number
var totalTables = 2;
var messages = getWeeklyEmail();
var msgbody = messages[0].getBody();
var indexOrigin = msgbody.indexOf('<table');
var indexEnd = msgbody.indexOf('</table');
var Table = []
for(i=0;i<totalTables;i++){
// go over each stable and store their strings in elements of an array
var start = msgbody.indexOf('<table', (indexOrigin + i))
var end = msgbody.indexOf('</table', (indexEnd + i))
Table.push(msgbody.substring(start,end+8));
}
Logger.log(Table);
}
This will let you store each table in an element of an array. If you want to use these you would just need to retrieve the elements of this array and use them accordingly (for exaple to use them as HTML tables.
I hope this has helped you. Let me know if you need anything else or if you did not understood something. :)

Pulling PubMed data into Google Sheets

I'm looking for some help. I am trying to grab an author's publications from PubMed and populate the data into Google Sheets using Apps Script. I've gotten as far as the code below and am now stuck.
Basically, what I have done was first pull all the Pubmed IDs from a particular author whose name comes from the name of the sheet. Then I have tried creating a loop to go through each Pubmed ID JSON summary and pull each field I want. I have been able to pull the pub date. I had set it up with the idea that I would do a loop for each field of that PMID I want, store it in an array, and then return it to my sheet. However, I'm now stuck trying to get the second field - title - and all the subsequent fields (e.g. authors, last author, first author, etc.)
Any help would be greatly appreciated.
function IMPORTPMID(){
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheets()[0];
var author = sheet.getSheetName();
var url = ("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=" + author + "[author]&retmode=json&retmax=1000");
var response = UrlFetchApp.fetch(url);
var AllAuthorPMID = JSON.parse(response.getContentText());
var xpath = "esearchresult/idlist";
var patharray = xpath.split("/");
for (var i = 0; i < patharray.length; i++) {
AllAuthorPMID = AllAuthorPMID[patharray[i]];
}
var PMID = AllAuthorPMID;
var PDparsearray = [PMID.length];
var titleparsearray = [PMID.length];
for (var x = 0; x < PMID.length; x++) {
var urlsum = ("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&rettype=abstract&id=" + PMID[x]);
var ressum = UrlFetchApp.fetch(urlsum);
var contentsum = ressum.getContentText();
var jsonsum = JSON.parse(contentsum);
var PDpath = "result/" + PMID[x] + "/pubdate";
var titlepath = "result/" + PMID[x] + "/title";
var PDpatharray = PDpath.split("/");
var titlepatharray = titlepath.split("/");
for (var j = 0; j < PDpatharray.length; j++) {
var jsonsum = jsonsum[PDpatharray[j]];
}
PDparsearray[x] = jsonsum;
}
var tempArr = [];
for (var obj in AllAuthorPMID) {
tempArr.push([obj, AllAuthorPMID[obj], PDparsearray[obj]]);
}
return tempArr;
}
From a PubMed JSON response for a given PubMed ID, you should be able to determine the fieldnames (and paths to them) that you want to include in your summary report. Reading them all is simpler to implement if they are all at the same level, but if some are properties of a sub-field, you can still access them if you give the right path in your setup.
Consider the "source JSON":
[
{ "pubMedId": "1234",
"name": "Jay Sahn",
"publications": [
{ "pubId": "abcd",
"issn": "A1B2C3",
"title": "Dynamic JSON Parsing: A Journey into Madness",
"authors": [
{ "pubMedId": "1234" },
{ "pubMedId": "2345" }
]
},
{ "pubId": "efgh",
...
},
...
],
...
},
...
]
The pubId and issn fields would be at the same level, while the publications and authors would not.
You can retrieve both the pubMedId and publications fields (and others you desire) in the same loop by either 1) hard-coding the field access, or 2) writing code that parses a field path and supplying field paths.
Option 1 is likely to be faster, but much less flexible if you suddenly want to get a new field, since you have to remember how to write the code to access that field, along with where to insert it, etc. God save you if the API changes.
Option 2 is harder to get right, but once right, will (should) work for any field you (properly) specify. Getting a new field is as easy as writing the path to it in the relevant config variable. There are possibly libraries that will do this for you.
To convert the above into spreadsheet rows (one per pubMedId in the outer array, e.g. the IDs you queried their API for), consider this example code:
function foo() {
const sheet = /* get a sheet reference somehow */;
const resp = UrlFetchApp.fetch(...).getContentText();
const data = JSON.parse(resp);
// paths relative to the outermost field, which for the imaginary source is an array of "author" objects
const fields = ['pubMedId', 'name', 'publications/pubId', 'publications/title', 'publications/authors/pubMedId'];
const output = data.map(function (author) {
var row = fields.map(function (f) {
var desiredField = f.split('/').reduce(delve_, author);
return JSON.stringify(desiredField);
});
return row;
});
sheet.getRange(1, 1, output.length, output[0].length).setValues(output);
}
function delve_(parentObj, property, i, fullPath) {
// Dive into the given object to get the path. If the parent is an array, access its elements.
if (parentObj === undefined)
return;
// Simple case: parentObj is an Object, and property exists.
const child = parentObj[property];
if (child)
return child;
// Not a direct property / index, so perhaps a property on an object in an Array.
if (parentObj.constructor === Array)
return collate_(parentObj, fullPath.splice(i));
console.warn({message: "Unhandled case / missing property",
args: {parent: parentObj, prop: property, index: i, pathArray: fullPath}});
return; // property didn't exist, user error.
}
function collate_(arr, fields) {
// Obtain the given property from all elements of the array.
const results = arr.map(function (element) {
return fields.slice().reduce(delve_, element);
});
return results;
}
Executing this yields the following output in Stackdriver:
Obviously you probably want some different (aka real) fields, and probably have other ideas for how to report them, so I leave that portion up to the reader.
Anyone with improvements to the above is welcome to submit a PR.
Recommended Reading:
Array#reduce
Array#map
Array#splice
Array#slice
Internet references on parsing nested JSON. There are a lot.

What alternative to ScriptDB I could use to store a big array of arrays? (without using external DB)

I was a user of the deprecated ScriptDB. The use I made of ScriptDB was fairly simple: to store a certain amount of information contained on a panel options, this way:
var db = ScriptDb.getMyDb();
function showList(folderID) {
var folder = DocsList.getFolderById(folderID);
var files = folder.getFiles();
var arrayList = [];
for (var file in files) {
file = files[file];
var thesesName = file.getName();
var thesesId = file.getId();
var thesesDoc = DocumentApp.openById(thesesId);
for (var child = 0; child < thesesDoc.getNumChildren(); child++){
var thesesFirstParagraph = thesesDoc.getChild(child);
var thesesType = thesesFirstParagraph.getText();
if (thesesType != ''){
var newArray = [thesesName, thesesType, thesesId];
arrayList.push(newArray);
break;
}
}
}
arrayList.sort();
var result = db.query({arrayName: 'savedArray'});
if (result.hasNext()) {
var savedArray = result.next();
savedArray.arrayValue = arrayList;
db.save(savedArray);
}
else {
var record = db.save({arrayName: "savedArray", arrayValue:arrayList});
}
var mydoc = SpreadsheetApp.getActiveSpreadsheet();
var app = UiApp.createApplication().setWidth(550).setHeight(450);
var panel = app.createVerticalPanel()
.setId('panel');
var label = app.createLabel("Choose the options").setStyleAttribute("fontSize", 18);
app.add(label);
panel.add(app.createHidden('checkbox_total', arrayList.length));
for(var i = 0; i < arrayList.length; i++){
var checkbox = app.createCheckBox().setName('checkbox_isChecked_'+i).setText(arrayList[i][0]);
panel.add(checkbox);
}
var handler = app.createServerHandler('submit').addCallbackElement(panel);
panel.add(app.createButton('Submit', handler));
var scroll = app.createScrollPanel().setPixelSize(500, 400);
scroll.add(panel);
app.add(scroll);
mydoc.show(app);
}
function include(arr, obj) {
for(var i=0; i<arr.length; i++) {
if (arr[i] == obj) // if we find a match, return true
return true; }
return false; // if we got here, there was no match, so return false
}
function submit(e){
var scriptDbObject = db.query({arrayName: "savedArray"});
var result = scriptDbObject.next();
var arrayList = result.arrayValue;
db.remove(result);
// continues...
}
I thought I could simply replace the ScriptDB by userProperties (using JSON to turn the array into string). However, an error warns me that my piece of information is too large to be stored in userProperties.
I did not want to use external databases (parse or MongoDB), because I think it isn't necessary for my (simple) purpose.
So, what solution I could use as a replacement to ScriptDB?
You could store a string using the HtmlOutput Class.
var output = HtmlService.createHtmlOutput('<b>Hello, world!</b>');
output.append('<p>Hello again, world.</p>');
Logger.log(output.getContent());
Google Documentation - HtmlOutput
There are methods to append, clear and get the content out of the HtmlOutput object.
OR
Maybe create a Blob:
Google Documentation - Utilities Class - newBlob Method
Then you can get the data out of the blob as a string.
getDataAsString
Then if you need to you can convert the string to an object if it's in the right JSON format.
Firstly, if you're hitting the limits on the Properties service, I would recommend you look at an alternative external store, as you're manipulating a large amount of data, and any workaround given here is possibly going to be slower and less efficient then simply using a dedicated service.
Alternatively of course, you could look at making your data come under the limits for the properties service by splitting it up and using multiple properties etc.
One other alternative would be to use a Google Doc or Sheet to store the string. When you're required to pull the data again, you can simply access the sheet and get the string, but this might be slow depending on the size of the string. At a glance it looks like you're just pulling Data on the folders in your drive, so you could consider writing it to a sheet, which would allow you to even display the information in a user friendly way. Given your use of arrays already, you can write them to a sheet easily using .setValues() if you convert them to a 2D array.
Bruce McPherson has done a lot of work on abstracting databases. Take a look at his cDbAbstraction library then you could easily chop and change which DB you use and compare performance. Maybe even create a cDbAbstraction library to use HTMLOutput (I like that idea Sandy, Bruce does some funky stuff with parallel processes via HTMLService)

Error when trying to store an array in ScriptDb

I have an array of objects that is created by my script and I am trying to copy that array into a new array and then store it in scriptDb using the following function:
function copyAndStore (currentArray) {
var db = ScriptDb.getMyDb();
var copyArray = [];
for (var i in currentArray) {
copyArray.push(currentArray[i]);
}
var id = db.save(copyArray);
return id;
}
It copies everything properly but when it gets to var id = db.save(copyArray); I get the error: Invalid argument. Expected a javascript map object.
Does ScriptDb have issues with storing arrays? Thanks in advance for the help.
As #Thomas said, you can save an array in a map object.
You don't need to perform a copy operation before putting an object into the ScriptDB, either. You could save your array by simply db.save({myArray}), and remember the ID.
Here's some minimalist code to demonstrate. I'm showing two ways to retrieve your saved array - one by ID, which seems to be the way you were planning to, but also a second way using a "key" value for a query. If you expect to retrieve the contents of ScriptDB in a later run of your code, this approach eliminates the need to somehow remember the ID of the stored array.
function saveArray (currentArray) {
var db = ScriptDb.getMyDb();
return db.save({type: "savedArray", data:currentArray}).getId();
}
function loadArrayById (id) {
var db = ScriptDb.getMyDb();
return db.load(id).data;
}
function loadArrayByType () {
var db = ScriptDb.getMyDb();
var result = db.query({type: "savedArray"});
if (result.hasNext()) {
return result.next().data;
}
else {
return [];
}
}
function test() {
var arr = ['this','is','a','test'];
var savedId = saveArray( arr );
var loaded1 = loadArrayById( savedId );
var loaded2 = loadArrayByType();
debugger; // pause if running debugger
}
Here's what you'll see at the debugger pause:
Note that by using the map tag data to pull the array from the saved object, both loaded1 and loaded2 are identical to the source array arr.
ScriptDb only stores map objects. You could however store a map that contains an array!
You can use arrays to save several objects in a single call using db.saveBatch.

Query variable arrays

How do I use an array variable as a query argument instead of the literal array itself?
For example, the documentation mentions the following:
var result = db.query({name: db.anyOf(['fred', 'barney', 'mark']});
But instead, I wish to do this:
var myTeam = ["fred","barney","mark"];
var result = db.query({name: db.anyOf(myTeam)});
So far, I have not been successful.
What am I missing?
Nothing. Your code works fine for me (besides the missing parenthesis typo).
function scriptdbTest() {
var db = ScriptDb.getMyDb();
db.save({name:'fred', age:40}); //just to get one result on my test script
var myTeam = ["fred","barney","mark"];
var result = db.query({name: db.anyOf(myTeam)});
while( result.hasNext() )
Logger.log(result.next().toJson());
}