Get the size of Document Properties Storage Object - google-apps-script

Here is the code snippted that successfully saves 35KB of text.
PropertiesService.getDocumentProperties().setProperty("...","...")
The defined limit was just 9KB, I wonder what might have happened? Did Google increase the limit? I could store above 9 KB
I see the limit is 9KB
https://developers.google.com/apps-script/guides/services/quotas#:~:text=Properties%20value%20size
What is the actual size allowed limit now?
const characters ='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
function generateString(length) {
let result = ' ';
const charactersLength = characters.length;
for ( let i = 0; i < length; i++ ) {
result += characters.charAt(Math.floor(Math.random() * charactersLength));
}
return result;
}
function myFunction() {
//throw PropertiesService.getDocumentProperties().getProperty("APP_DATA")
PropertiesService.getDocumentProperties().setProperty("APP_DATA",`${generateString(9*1024)+200}`) // added a random string for testing
}

Related

Cannot get variable from sql query in Node

Sorry, I read about handling async functions in order to get variable names from them but I am not sure what I am doing wrong and how to handle it.
for(let j = 0; j < ga.length; j++) {
var sql = "SELECT * FROM matches WHERE clh = '"+ga[j]+"'"
const dbq = db.query(sql, function(err, result) {
if (err) console.log(err);
var gs1 = 0;
var gs2 = 0;
var pts1 = 0;
var w1 = 0;
var d1 = 0;
var l1 = 0;
for (let i = 0; i < result.length; i++) {
gs1 += result[i].gsh;
gs2 += result[i].gsa;
const r = mgs1(result[i].gsh, result[i].gsa);
if (r == 3) w1 += 1;
if (r == 1) d1 += 1;
if (r == 0) l1 += 1;
var gd1 = gs1 - gs2;
var r1 = [result[i].clh, result.length, w1, d1, l1, gs1, gs2, gd1 ];
}
gs4.push(r1);
if (gs4.length == 6) {
return gs4;
}
})
}
}
This function returns the array that I want but I am not sure how to access it outside the db.query block. I read posts about handling variables from async functions but I just can't seem to do it in this example. Thanks a lot in advance
I guess you have defined const gs4 = [] somewhere in code you did not show us. That's part of the answer to your question: it will be populated after your callback from db.query() completes.
The rest of the answer: it is not populated until after the callback completes. Also, the return from inside your callback is meaningless; it just returns to db.query() .
Also, db.query() returns to its caller instantly, long before it calls its callback. So your loop tries to run multiple queries concurrently. I guess the result in gs4 will accumulate the results from all the queries.
With respect, I believe a quick jump up the learning curve for Promises or async / await lies in your near future.
This may help : node.js mysql query in a for loop
If you would like to query the database the correct way, you should use the embedded functions that comes with database driver. For string interpolation and returning data for your functions.
exports.lookupLogin = (req, res, next) => {
let sql = 'SELECT e.employee_id, e.login, e.password FROM employee e WHERE e.login=?';
postgres.client.query(sql, [req.body.login], (error, result, fields) => {
if (err) {
return res.status(500).json({ errors: ['Could not do login'] });
}
res.status(200).json(result.rows);
});
};
For more information you can check the mysql documentation to use with nodejs.

How to select all underlined text in a paragraph

I'm trying to create a google apps script that will format certain parts of a paragraph. For example, text that is underlined will become bolded/italicized as well.
One docs add-on I have tried has a similar feature: https://imgur.com/a/5Cw6Irn (this is exactly what I'm trying to achieve)
How can I write a function that will select a certain type of text and format it?
**I managed to write a script that iterates through every single letter in a paragraph and checks if it's underlined, but it becomes extremely slow as the paragraph gets longer, so I'm looking for a faster solution.
function textUnderline() {
var selectedText = DocumentApp.getActiveDocument().getSelection();
if(selectedText) {
var elements = selectedText.getRangeElements();
for (var index = 0; index < elements.length; index++) {
var element = elements[index];
if(element.getElement().editAsText) {
var text = element.getElement().editAsText();
var textLength = text.getText().length;
//For every single character, check if it's underlined and then format it
for (var i = 0; i < textLength; i++) {
if(text.isUnderline(i)) {
text.setBold(i, i, true);
text.setBackgroundColor(i,i,'#ffff00');
} else {
text.setFontSize(i, i, 8);
}
}
}
}
}
}
Use getTextAttributeIndices:
There is no need to check each character in the selection. You can use getTextAttributeIndices() to get the indices in which the text formatting changes. This method:
Retrieves the set of text indices that correspond to the start of distinct text formatting runs.
You just need to iterate through these indices (that is, check the indices in which text formatting changes), which are a small fraction of all character indices. This will greatly increase efficiency.
Code sample:
function textUnderline() {
var selectedText = DocumentApp.getActiveDocument().getSelection();
if(selectedText) {
var elements = selectedText.getRangeElements();
for (var index = 0; index < elements.length; index++) {
var element = elements[index];
if(element.getElement().editAsText) {
var text = element.getElement().editAsText();
var textRunIndices = text.getTextAttributeIndices();
var textLength = text.getText().length;
for (let i = 0; i < textRunIndices.length; i++) {
const startOffset = textRunIndices[i];
const endOffset = i + 1 < textRunIndices.length ? textRunIndices[i + 1] - 1 : textLength - 1;
if (text.isUnderline(textRunIndices[i])) {
text.setBold(startOffset, endOffset, true);
text.setBackgroundColor(startOffset, endOffset,'#ffff00');
} else {
text.setFontSize(startOffset, endOffset, 8);
}
}
}
}
}
}
Reference:
getTextAttributeIndices()
Based on the example shown in the animated gif, it seems your procedure needs to
handle a selection
set properties if the selected region is of some format (e.g. underlined)
set properties if the selected region is NOT of some format (e.g. not underlined)
finish as fast as possible
and your example code achieves all these goals expect the last one.
The problem is that you are calling the text.set...() functions at each index position. Each call is synchronous and blocks the code until the document is updated, thus your run time grows linearly with each character in the selection.
My suggestion is to build up a collection of subranges from the selection range and then for each subrange use text.set...(subrange.start, subrange.end) to apply the formatting. Now the run time will be dependent on chunks of characters, rather than single characters. i.e., you will only update when the formatting switches back and forth from, in your example, underlined to not underlined.
Here is some example code that implements this subrange idea. I separated the specific predicate function (text.isUnderline) and specific formatting effects into their own functions so as to separate the general idea from the specific implementation.
// run this function with selection
function transformUnderlinedToBoldAndYellow() {
transformSelection("isUnderline", boldYellowOrSmall);
}
function transformSelection(stylePredicateKey, stylingFunction) {
const selectedText = DocumentApp.getActiveDocument().getSelection();
if (!selectedText) return;
const getStyledSubRanges = makeStyledSubRangeReducer(stylePredicateKey);
selectedText.getRangeElements()
.reduce(getStyledSubRanges, [])
.forEach(stylingFunction);
}
function makeStyledSubRangeReducer(stylePredicateKey) {
return function(ranges, rangeElement) {
const {text, start, end} = unwrapRangeElement(rangeElement);
if (start >= end) return ranges; // filter out empty selections
const range = {
text, start, end,
styled: [], notStyled: [] // we will extend our range with subranges
};
const getKey = (isStyled) => isStyled ? "styled" : "notStyled";
let currentKey = getKey(text[stylePredicateKey](start));
range[currentKey].unshift({start: start});
for (let index = start + 1; index <= end; ++index) {
const isStyled = text[stylePredicateKey](index);
if (getKey(isStyled) !== currentKey) { // we are switching styles
range[currentKey][0].end = index - 1; // note end of this style
currentKey = getKey(isStyled);
range[currentKey].unshift({start: index}); // start new style range
}
}
ranges.push(range);
return ranges;
}
}
// a helper function to unwrap a range selection, deals with isPartial,
// maps RangeElement => {text, start, end}
function unwrapRangeElement(rangeElement) {
const isPartial = rangeElement.isPartial();
const text = rangeElement.getElement().asText();
return {
text: text,
start: isPartial
? rangeElement.getStartOffset()
: 0,
end: isPartial
? rangeElement.getEndOffsetInclusive()
: text.getText().length - 1
};
}
// apply specific formatting to satisfy the example
function boldYellowOrSmall(range) {
const {text, start, end, styled, notStyled} = range;
styled.forEach(function setTextBoldAndYellow(range) {
text.setBold(range.start, range.end || end, true);
text.setBackgroundColor(range.start, range.end || end, '#ffff00');
});
notStyled.forEach(function setTextSmall(range) {
text.setFontSize(range.start, range.end || end, 8);
});
}

Collision probability using this custom id generation code (Node.js)

Am I running an unnecessary risk of creating an id that is not unique? I'm trying to generate a unique, random id of alphanumeric characters. This ID will be used in the primary key for the database record.
const idSeed: string =
crypto.randomBytes(16).toString('base64') +
'' +
Date.now();
const orderId: string = Buffer.from(idSeed)
.toString('base64')
.replace(/[\/\+\=]/g, '');
First off, I recommend that you get rid of the .replace(/[\/\+\=]/g, '') as that is losing randomness and, in fact, mapping some unique orderIds that differ only in those characters to be the same.
My recommendation would be to use a base58 encoder base-x that will directly encode to what you want. This encoder library lets you pass in the exact character set you want to use for encoding and it just uses that.
Here's my suggested code you can insert:
const base58Encode = require('base-x')('123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz').encode;
And, then where you create the orderID, change to this:
const idSeed = crypto.randomBytes(16)
const orderId = base58Encode(idSeed);
I don't know about the probability of a dup (you'd need a crypto/statistician for that), but I ran 10,000,000 orderId values without a dup and I repeated that 10 times and still didn't get a dup. Obviously, that doesn't mean it can't happen, but I'm doing this rapid fire too where Date.now() might not even be much different. I couldn't run it more than 10,000,000 times because I run out of memory trying to store all the prior orderId values in a Set object to check for dups. You could increase memory for nodejs and run it with even higher values or put it in a shell script and run it over and over and over again.
Here's my dup checker program if you want to run it yourself over and over:
const crypto = require('crypto');
function addCommas(str) {
var parts = (str + "").split("."),
main = parts[0],
len = main.length,
output = "",
i = len - 1;
while(i >= 0) {
output = main.charAt(i) + output;
if ((len - i) % 3 === 0 && i > 0) {
output = "," + output;
}
--i;
}
// put decimal part back
if (parts.length > 1) {
output += "." + parts[1];
}
return output;
}
let set = new Set();
const numToTry = 10_000_000;
const debugMultiple = 100_000;
for (let i = 0; i < numToTry; i++) {
if (i !== 0 && i % debugMultiple === 0) {
console.log(`Attempt #${addCommas(i)}`);
}
const idSeed = crypto.randomBytes(16).toString('base64') + '' + Date.now();
const orderId = Buffer.from(idSeed).toString('base64').replace(/[\/\+\=]/g, '');
//console.log(orderId);
if (set.has(orderId)) {
console.log(`Found conflict after ${addCommas(i)} attempts`);
console.log(`Conflicting orderId = ${orderId}`);
process.exit(1);
}
set.add(orderId);
}
console.log(`No dups found after ${addCommas(numToTry)} attempts`);
Before spending a lot of time on this, I would investigate your database to see if it will generate a unique key for you that could work as the orderId. This is a common database problem.
Here's a newer version that I was able to run up to 1,000,000,000 ids through. Still no conflicts. Because there's no way I could have a giant Set object of 1,000,000,000 ids in memory, I brainstormed about a number of ways to do it. I thought about using a redis server and storing the ids in there since it can use a lot more memory. But, then I came up with a disk-based solution that can scale as high as you want. Here's the basic idea:
One of your orderId values looks like this:
zz6h6q6oRELJXmh4By4NUw1587006335064`
When I generate a new orderId, if I can separate it out into a disk-based "bucket" that contains only ids with the same beginning characters, then I can split all the ids among many different files.
The idea is that if each id that starts with the same two characters is stored in the same file, then no other id in any other file could possibly match the ids in that file.
You can then do your work in two passes. The first pass generates 1,000,000,000 ids and as they are generated, they are written out to an appropriate bucket file based on the characters the id starts with.
After all the ids are generated and written to their appropriate bucket files, the second pass is to iterate through each of the bucket files one at a time, load all the ids into a Set object and see if any conflict. If none match, clear that Set and go onto the next file. This lets you do the memory constrained part (dealing with a Set object) in pieces to use less memory for big numbers of ids.
So, then the question is how to divide the ids up into bucket files? Since each byte in the base64 id value represents up to 64 possible values, if you use just the first two characters of the id to determine the bucket, you will get up to 64*64=4096 buckets. For some reason (which must have to do with how crypto.randomBytes(16) works), I only found ~3800 buckets actually occurred in the actual orderId values.
But, if you split 1,000,000,000 values into 3800 buckets, you get about 263,000 ids per bucket. We already showed that we could easily process 15,000,000 ids in memory before, so this should be more than enough buckets to be able to process each bucket in memory one at a time. In fact, if I were patient enough, we could probably go to 10,000,000,000 with buckets based on just the first two characters.
If you wanted more buckets, they could be based on the first three characters, though then you start getting too many files for a single directory and you have to start splitting files among directories which can be done, but complicates things.
So, I need to create a bucket filename that's based on the first two characters of the id. The ids are case sensitive (base64 uses upper and lower case to represent different values). My Windows file system is case insensitive so I can't just directly use the first two letters as the filename. So, I created a simple algorithm, that takes a two character mixed case prefix and makes it into a four character lowercase name. It maps a lowercase "a" to "a_" and a non-lowercase character like "B" to "bb". So, a lowercase value is followed by a _ and an uppercase value is follows by a second copy of itself. So, you'd have id mappings like this:
"ab" => "a_b_"
"AB" => "aabb"
"aB" => "a_BB"
"Ab" => "aab_"
Non-alpha characters (like numbers) just map to a doubling of themselves just like any non-lowercase characters. So, with this, I can get an id value, grab the first two characters, see what filename it belongs to and append it to that file.
For performance reasons, I created a Bucket class which maintains a cache of ids waiting to be written in memory. When the cache inside a particular bucket gets to a certain length (which I now have set to 3000), I append them all out to the file at once and clear the bucket cache. When I'm done generating all the ids, I iterate through all the buckets and flush out any remaining ids. With this kind of write caching, the generation of ids is mostly CPU bound, not disk bound. Disk utilization runs around 30%. One core of the CPU is pegged during id generation. This could probably be sped up with some WorkerThreads.
So, once all the ids are written to bucket files and nothing is in memory at all, it's time to read through each of the bucket files one at a time, load all their ids into a Set and see if there are any conflicts. Each bucket file is a line separated list of ids that all start with same prefix like this:
zzoexm2FE8DIrHnXpp8qw1587003338798
zzuP6LpusKIMeYrfl0WJnQ1587003338885
zz1itmTqA3yaFNo1KFUhg1587003338897
zz3TEFeqH965OTFCrFTjJQ1587003338904
zz8XQKvq11fCqn9kB4O2A1587003338904
zzaKMTFPct5ls7WW3YmcQ1587003338927
zzyX3htzIqi4zOq4Cxdg1587003338928
zzoHu6vIHMEgNMVY46Qw1587003338962
So, I just read a given bucket file, line by line, check each id against a Set for that bucket file. If it's already in the set, there's a conflict. Output that conflict and abort. If it's not the Set, add it to the Set and continue with the rest of the ids in that bucket file. Since this bucket file contains all the ids that start with the same two characters, no other id in any other bucket file can conflict with these so you can just compare all these ids vs each other.
The reading of the bucket files is heavily disk bound. When running 1,000,000,000 ids into the 3844 bucket files, each bucket file is about 5MB which is 22GB of data. Each file has to be read and parsed into lines and then each id added to the Set.
I tried a couple different mechanisms for reading the files line by line and found them quite slow. I started with the readLine interface which lets you iterate through line by line via a readStream. It was sloooow. Then, I just read the whole file into memory with fs.readFile() into a giant string and then called .split("\n") on it to break it into lines. This was actually better than readLine, but still slow. I theorized that there were just too many copies of the data which meant the garbage collector was having to work at lot.
So, finally I wrote my own version of readFile that reads the entire file into a reusable Buffer and splits it into lines by parsing the binary buffer directly. This saved at least a couple copies of the data along the way and saved a lot of GC work. It wasn't fast, but it was faster. Reusing the buffer also saved me a lot of separate 5MB allocations.
The first pass (generating the ids) is CPU bound. I've theorized I could speed that up quite a bit by starting up a number of Worker Threads (probably like 6 since I have an 8-core CPU) and letting them crunch on generating the ids. I would dole out 1/6 of the quantity to each Worker Thread and when they accumulated 1000 or so, they'd message those 1000 back to the main thread which would insert them in the right buckets. But, before I adventure into using WorkerThreads, I need to do some benchmarking to see how much of the total time of the first pass is in the crypto.randomBytes() function vs. elsewhere to make sure it would be worth it.
The second pass it totally disk bound, but the actual disk throughput is horrible (like 60MB/s). Either my disk really sucks, nodejs isn't very good at this type of file I/O or there's just a lot of overhead in handling 3800 large files (read directory entry, seek to disk for first sector, read as many sequential sectors as you can, seek again, etc...). I could try it on my fastest SSD, but I don't really want to go writing 20GB to my SSD everytime I play with this.
I played with increasing the UV_THREADPOOL_SIZE thinking that maybe nodejs was queuing too many reads/writes. But, performance actually got worse when I increased the thread pool size. I guess it's default of 4 is more than enough to keep one disk controller plenty busy. Anything more than that and you're just asking the disk head to jump around between different files when it would be more efficient to read all of one file, then go to the next file and so on.
While the second pass is mostly disk bound, there's still about 30% of the time spent in non-disk related stuff (based on some high-res timers I inserted). So, if it didn't cause too much harm with disk contention, it's possible you could spread the processing of the different bucket files out among a group of WorkerThreads. You would at least get parallelism on the CPU part of that process. You would likely get more disk contention though so I'm not sure if it would help.
Lastly, bucket files could be split among drives and, even ideally among separate SATA controllers. I have plenty of drives and a couple SATA controllers to try that, but then it gets pretty specific to my system.
Here's the code for the bucket system.
// unique-test.js
const crypto = require('crypto');
const readline = require('readline');
const fs = require('fs');
const fsp = fs.promises;
const path = require('path');
const {fastReadFileLines} = require('./fast-read-file.js');
function delay(t, v) {
return new Promise(resolve => {
setTimeout(resolve, t, v);
})
}
function addCommas(str) {
var parts = (str + "").split("."),
main = parts[0],
len = main.length,
output = "",
i = len - 1;
while(i >= 0) {
output = main.charAt(i) + output;
if ((len - i) % 3 === 0 && i > 0) {
output = "," + output;
}
--i;
}
// put decimal part back
if (parts.length > 1) {
output += "." + parts[1];
}
return output;
}
// make a unique filename using first several letters of
// the string. Strings are case sensitive, bucket filenames
// cannot be so it has to be case neutralized while retaining
// uniqueness
function makeBucketKey(str) {
let piece = str.substr(0,2);
let filename = [];
// double up each character, but
for (let ch of piece) {
filename.push(ch);
if (ch >= 'a' && ch <= 'z') {
filename.push("_")
} else {
filename.push(ch);
}
}
return filename.join("").toLowerCase();
}
// this value times the number of total buckets has to fit in memory
const bucketCacheMax = 3000;
class Bucket {
constructor(filename, writeToDisk = true) {
this.items = [];
this.filename = filename;
this.cnt = 0;
this.writeToDisk = writeToDisk;
// We dither the bucketCacheMax so that buckets aren't all trying to write at the same time
// After they write once (and are thus spread out in time), then they will reset to full cache size
let dither = Math.floor(Math.random() * bucketCacheMax) + 10;
if (Math.random() > 0.5) {
dither = -dither;
}
this.bucketCacheMax = bucketCacheMax + dither;
}
// add an item to cache, flush to disk if necessary
async add(item) {
++this.cnt;
this.items.push(item);
if (this.items.length > this.bucketCacheMax) {
// the dithered cache size is only used on the first write
// to spread out the writes. After that, we want a full cache size
let priorBucketCacheMax = this.bucketCacheMax;
this.bucketCacheMax = bucketCacheMax;
await this.flush();
}
}
// write any cached items to disk
async flush() {
if (this.writeToDisk && this.items.length) {
let data = this.items.join("\n") + "\n";
this.items.length = 0;
if (this.flushPending) {
throw new Error("Can't call flush() when flush is already in progress");
}
function flushNow() {
this.flushPending = true;
return fsp.appendFile(this.filename, data).finally(() => {
this.flushPending = false;
});
}
// we write to disk with retry because we once go EBUSY (perhaps from a backup program)
let retryCntr = 0;
const retryMax = 10;
const retryDelay = 200;
const retryBackoff = 200;
let lastErr;
function flushRetry() {
if (retryCntr > retryMax) {
throw lastErr;
}
return flushNow.call(this).catch(err => {
lastErr = err;
console.log("flushNow error, retrying...", err);
return delay(retryDelay + (retryCntr++ * retryBackoff)).then(() => {
return flushRetry.call(this);
});
});
}
return flushRetry.call(this);
}
this.items.length = 0;
}
delete() {
return fsp.unlink(this.filename);
}
get size() {
return this.cnt;
}
}
class BucketCollection {
constructor(dir, writeToDisk = true) {
// map key is bucketID, value is bucket object for that key
this.buckets = new Map();
this.dir = dir;
}
add(key, data) {
let bucket = this.buckets.get(key);
if (!bucket) {
let filename = path.join(this.dir, key);
bucket = new Bucket(filename, writeToDisk);
this.buckets.set(key, bucket);
}
return bucket.add(data);
}
async flush() {
// this could perhaps be sped up by doing 4 at a time instead of serially
for (let bucket of this.buckets.values()) {
await bucket.flush();
}
}
async delete() {
// delete all the files associated with the buckets
for (let bucket of this.buckets.values()) {
await bucket.delete();
}
}
get size() {
return this.buckets.size;
}
getMaxBucketSize() {
let max = 0;
for (let bucket of this.buckets.values()) {
max = Math.max(max, bucket.size);
}
return max;
}
}
// program options
let numToTry = 100_000;
let writeToDisk = true;
let cleanupBucketFiles = true;
let skipAnalyze = false;
let analyzeOnly = false;
// -nodisk don't write to disk
// -nocleanup erase bucket files when done
// -analyzeonly analyze files in bucket directory only
if (process.argv.length > 2) {
let args = process.argv.slice(2);
for (let arg of args) {
arg = arg.toLowerCase();
switch(arg) {
case "-nodisk":
writeToDisk = false;
break;
case "-nocleanup":
cleanupBucketFiles = false;
break;
case "-skipanalyze":
skipAnalyze = true;
break;
case "-analyzeonly":
analyzeOnly = true;
break;
default:
if (/[^\d,]/.test(arg)) {
console.log(`Unknown argument ${arg}`);
process.exit(1);
} else {
numToTry = parseInt(arg.replace(/,/g, ""), 10);
}
}
}
}
let bucketDir = path.join(__dirname, "buckets");
let collection = new BucketCollection(bucketDir, writeToDisk);
console.log(`Running ${addCommas(numToTry)} random ids`);
const debugMultiple = 100_000;
async function analyze() {
let cntr = 0;
const cntrProgress = 10;
const cntrProgressN = 10n;
let buffer = null;
let times = [];
async function processFile(file) {
if (cntr !== 0 && cntr % cntrProgress === 0) {
let sum = 0n;
for (let i = 0; i < cntrProgress; i++) {
sum += times[i];
}
console.log(`Checking bucket #${cntr}, Average readFileTime = ${sum / cntrProgressN}`);
times.length = 0;
}
++cntr;
let set = new Set();
let startT = process.hrtime.bigint();
let buffer = null;
let result = await fastReadFileLines(file, buffer);
let data = result.lines;
// keep reusing buffer which may have been made larger since last time
buffer = result.buffer;
//let data = (await fsp.readFile(file, "utf8")).split("\n");
let afterReadFileT = process.hrtime.bigint();
for (const lineData of data) {
let line = lineData.trim();
if (line) {
if (set.has(line)) {
console.log(`Found conflict on ${data}`);
} else {
set.add(line);
}
}
}
let loopT = process.hrtime.bigint();
let divisor = 1000n;
let readFileTime = (afterReadFileT - startT) / divisor;
times.push(readFileTime);
// console.log(`readFileTime = ${readFileTime}, loopTime = ${(loopT - afterReadFileT) / divisor}`);
/*
let rl = readline.createInterface({input:fs.createReadStream(file), crlfDelay: Infinity});
for await (const line of rl) {
let data = line.trim();
if (data) {
if (set.has(data)) {
console.log(`Found conflict on ${data}`);
} else {
set.add(data);
}
}
}
*/
}
if (analyzeOnly) {
let files = await fsp.readdir(bucketDir);
for (let file of files) {
let fullPath = path.join(bucketDir, file)
await processFile(fullPath);
}
} else {
for (let bucket of collection.buckets.values()) {
await processFile(bucket.filename);
}
}
}
async function makeRandoms() {
let start = Date.now();
if (analyzeOnly) {
return analyze();
}
for (let i = 0; i < numToTry; i++) {
if (i !== 0 && i % debugMultiple === 0) {
console.log(`Attempt #${addCommas(i)}`);
}
const idSeed = crypto.randomBytes(16).toString('base64') + '' + Date.now();
const orderId = idSeed.toString('base64').replace(/[\/\+\=]/g, '');
//console.log(orderId);
let bucketKey = makeBucketKey(orderId);
await collection.add(bucketKey, orderId);
}
console.log(`Total buckets: ${collection.size}, Max bucket size: ${collection.getMaxBucketSize()}`);
//console.log(`No dups found after ${addCommas(numToTry)} attempts`);
await collection.flush();
let delta = Date.now() - start;
console.log(`Run time for creating buckets: ${addCommas(delta)}ms, ${addCommas((delta / numToTry) * 1000)}ms per thousand`);
if (!skipAnalyze) {
console.log("Analyzing buckets...")
await analyze();
}
if (cleanupBucketFiles) {
console.log("Cleaning up buckets...")
await collection.delete();
}
}
makeRandoms();
And, here's a dependent file (goes in the same directory) for my faster readfile function:
// fast-read-file.js
const fsp = require('fs').promises;
async function fastReadFile(filename, buffer = null) {
let handle = await fsp.open(filename, "r");
let bytesRead;
try {
let stats = await handle.stat();
if (!buffer || buffer.length < stats.size) {
buffer = Buffer.allocUnsafe(stats.size);
}
// clear any extra part of the buffer so there's no data leakage
// from a previous file via the shared buffer
if (buffer.length > stats.size) {
buffer.fill(0, stats.size);
}
let ret = await handle.read(buffer, 0, stats.size, 0);
bytesRead = ret.bytesRead;
if (bytesRead !== stats.size) {
// no data leaking out
buffer.fill(0);
throw new Error("bytesRead not full file size")
}
} finally {
handle.close().catch(err => {
console.log(err);
});
}
return {buffer, bytesRead};
}
async function fastReadFileLines(filename, buf = null) {
const {bytesRead, buffer} = await fastReadFile(filename, buf);
let index = 0, targetIndex;
let lines = [];
while (index < bytesRead && (targetIndex = buffer.indexOf(10, index)) !== -1) {
// the buffer may be larger than the actual file data
// so we have to limit our extraction of data to only what was in the actual file
let nextIndex = targetIndex + 1;
// look for CR before LF
if (buffer[targetIndex - 1] === 13) {
--targetIndex;
}
lines.push(buffer.toString('utf8', index, targetIndex));
index = nextIndex;
}
// check for data at end of file that doesn't end in LF
if (index < bytesRead) {
lines.push(buffer.toString('utf8', index, bytesRead));
}
return {buffer, lines};
}
module.exports = {fastReadFile, fastReadFileLines};
// if called directly from command line, run this test function
// A file of ids named "zzzz" must exist in this directory
if (require.main === module) {
let buffer = Buffer.alloc(1024 * 1024 * 10, "abc\n", "utf8");
fastReadFileLines("zzzz", buffer).then(result => {
let lines = result.lines;
console.log(lines[0]);
console.log(lines[1]);
console.log(lines[2]);
console.log("...");
console.log(lines[lines.length - 3]);
console.log(lines[lines.length - 2]);
console.log(lines[lines.length - 1]);
}).catch(err => {
console.log(err);
});
}
You first create a sub-directory named "buckets" under where you are running this. Then, you run this from the command line:
node unique-test.js 1,000,000,000
There are some supported command lines options (mostly used during debugging):
-nodisk Don't write to disk
-nocleanup Don't cleanup generated disk files when done
-skipAnalyze Just generate bucket files, don't analyze them
-analyzeOnly Use previously generated bucket files and analyze them
The number you pass on the command line is how many ids to generate. If you pass nothing, it defaults to 100,000. For readability, it handles commas.
That's a really superb answer by #jfriend, I'd just like to add that you can calculate the result analytically, or rather an approximation. I believe using both approaches can be the best route to go.
This is an example of the Birthday Problem.
The TLDR on this is that the approximate probability of collision can be determined using the formula:
1 − exp(−n²/(2x))
Where x is the number of possible values and n is the number of generated values, as long as n is small compared to x (It will be!)
Now, you have approximately 16 bytes of entropy in the generated ids this gives 2^128 or 3.4 x 10^38 possible ids. Since two characters are being dropped (+/), the number of possible values is more like (62^21) = 4.37 x 10^37.
As #jfriend00 has pointed out, the addition of the date means you'd have to generate the number of ids in the table below every millisecond to have the corresponding probability of collision.
This table should give an approximation of the collision probabilities.
|----------------------------|----------------------------|
| Number of Ids | Collision Probability |
|----------------------------|----------------------------|
| 10^6 (1 million) | 2.29 × 10^-26 |
|----------------------------|----------------------------|
| 10^9 (1 billion) | 2.29 × 10^-20 |
|----------------------------|----------------------------|
| 10^12 (1 trillion) | 2.29 × 10^-14 |
|----------------------------|----------------------------|
| 10^15 (1 quadrillion) | 2.29 × 10^-8 |
|----------------------------|----------------------------|
I've used the very handy Wolfram Alpha to calculate these results.

find the length of a string in google script

I'm trying to make a script for google sheet, who can count a letter in a text. But it seems that .length doesn't work. Anyone who can give directions on where to find the the solution.
function Tjekkode(tekst , bogstav){
var test = "";
// find the length of laengdeTekst
var laengdeTekst = tekst.length;
var t = 0;
// through the text character by character
for ( var i = 1; i<laengdeTekst ; i++) {
var test = tekst.substr(i,1);
if (test == bogstav) {
// if the letter is found, it is counted up
// REMEMBER == means compare
var t = t + 1;
}
}
// returns percent appearance of the letter
Return = t / længdeTekst * 100
}
Thanks in advance
length is ok in your code. To test it, run this script:
function test( ) {
var test = "123456";
// finder længden på teksten
var laengdeTekst = test.length;
Logger.log(laengdeTekst);
}
After you run it, check Log, press [Ctrl + Enter]
The correct code in your case:
function Tjekkode(tekst, bogstav) {
var test = "";
var laengdeTekst = tekst.length;
var t = 0;
// start looping from zero!
for ( var i = 0; i<laengdeTekst; i++) {
var test = tekst.substr(i,1);
if (test == bogstav) {
var t = t + 1;
}
}
// JavaScript is case sensitive: 'return != Return'
return t / laengdeTekst * 100;
}
Please, look at this tutorial for more info
thanks
I'll guess that I might get the one with the R instead of r at the end, but the script didn't run that line, because it kinda stopped at the .length line :/
the comments in danish is for my pupils (I'm a teacher in elementary school)
I'll see if google wants to cooperate today :|
This is the google script that worked for me. Note the 24 - that's the length of an empty message that has markup like <div>...</div>
function TrashEmptyDrafts() {
var thread = GmailApp.getDraftMessages();
for (var i = 0; i < thread.length; i++) {
b=thread[i].getBody();
if (b.length <= 24.0){
thread[i].moveToTrash();
}
}}

ScriptDB object size calculation

I'm trying to estimate the limits of my current GAS project. I use ScriptDB to chunk out processing to get around the 6 min execution limit. If I have an object like
var userObj{
id: //user email address
count: //integer 1-1000
trigger: //trigger ID
label: //string ~30 char or less
folder: //Google Drive folder ID
sendto: //'true' or 'false'
shareto: //'true' or 'false'
}
How would I calculate the size that this object takes up in the DB? I would like to project how many of these objects can exist concurrently before I reach the 200MB limit for our domain.
Whenever you've got a question about google-apps-script that isn't about the API, try searching for javascript questions first. In this case, I found JavaScript object size, and tried out the accepted answer in apps-script. (Actually, the "improved" accepted answer.) I've made no changes at all, but have reproduced it here with a test function so you can just cut & paste to try it out.
Here's what I got with the test stud object, in the debugger.
Now, it's not perfect - for instance, it doesn't factor in the size of the keys you'll use in ScriptDB. Another answer took a stab at that. But since your object contains some potentially huge values, such as an email address which can be 256 characters long, the key lengths may be of little concern.
// https://stackoverflow.com/questions/1248302/javascript-object-size/11900218#11900218
function roughSizeOfObject( object ) {
var objectList = [];
var stack = [ object ];
var bytes = 0;
while ( stack.length ) {
var value = stack.pop();
if ( typeof value === 'boolean' ) {
bytes += 4;
}
else if ( typeof value === 'string' ) {
bytes += value.length * 2;
}
else if ( typeof value === 'number' ) {
bytes += 8;
}
else if
(
typeof value === 'object'
&& objectList.indexOf( value ) === -1
)
{
objectList.push( value );
for( i in value ) {
stack.push( value[ i ] );
}
}
}
return bytes;
}
function Marks()
{
this.maxMarks = 100;
}
function Student()
{
this.firstName = "firstName";
this.lastName = "lastName";
this.marks = new Marks();
}
function test () {
var stud = new Student();
var studSize = roughSizeOfObject(stud);
debugger;
}