reading/writing large files with PapaParse/BabyParse - json

I have a large CSV file (~500mb) that I want to convert to JSON using BabyParse (the node version of PapaParse). With smaller files I can read the CSV into a string and then pass the string to parse. However, a 500mb file is to too big to be read into a string in this way.
I have a workaround that reads the CSV file as a stream line-by-line, but it's horrendously slow (see below). Can someone tell me a faster way to work with large CSV files in Papa/Baby parse?
var Baby = require('babyparse');
var fs = require('fs');
var readline = require('readline');
var stream = require('stream');
var file = '500mbbigtest.csv';
//var content = fs.readFileSync(file, { encoding: 'binary' }); DOESN'T WORK
var instream = fs.createReadStream('500mbbigtest.csv');
var outstream = new stream;
var rl = readline.createInterface(instream, outstream);
rl.on('line', function(line) {
parsed = Baby.parse(line, {fastMode: false});
rows = parsed.data;
rows = JSON.stringify(rows);
fs.appendFileSync("blahblahblah.json", rows);
});

Related

Faster digest(SHA256) calculation with Google App Script

I'm looking forward to have a faster SHA256 hash calculation.
My current implementation looks like this where url points to the uploaded file on telegram server
var response = UrlFetchApp.fetch(url);
var fileText = response.getContent();
var bytes = Utilities.computeDigest(Utilities.DigestAlgorithm.SHA_256, fileText);
var hexstr = bytes.map(byte => ('0' + (byte & 0xFF).toString(16)).slice(-2)).join('');
return hexstr;
which takes 4-5 seconds for a 2MB file.
For the performance comparison, I took #filehashing_bot telegram bot, and it calculates hashes way faster than my implementation.
Kindly suggest a better and faster solution. How can I improve my implementation? What could be the possible ways?
I was reading about crypto.subtle.digest() and implemented something for local files and it's working way faster
async function myFunction(){
const finput = document.getElementById('fileinput');
const file = finput.files[0];
const arrayBuffer = await file.arrayBuffer();
const hashBuffer = await crypto.subtle.digest('SHA-256', arrayBuffer); // hash the message
console.log(hashBuffer);
const hashArray = Array.from(new Uint8Array(hashBuffer)); // convert buffer to byte array
const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
document.getElementById("hash").innerHTML = hashHex;
}
Can it be used in app script?

How to export JSON-like data in spreadsheet column to json file?

I have a Google sheet with JSON-like data in a column and would like to export this column as a JSON file. I have tried using javascript along with xlsx package to convert the sheet to json file but it adds backslashes to the column and cannot be parsed (throws syntax error) using JSON.parse() as it does not recognise it as valid json. Any help is appreciated!
let xlsx = require("xlsx")
let path = require("path")
let fs = require("fs");
const inputFilePath = path.join(__dirname, './Sample.xlsx');
let File = xlsx.readFile(inputFilePath);
let content = xlsx.utils.sheet_to_json(File.Sheets['Sheet1']);
console.log(JSON.parse(content[0]["content"])); //throws error
Here is an example that will write the data without backslash (do not use JSON.stringify in this case). The file will be in 'test' folder here, that you have to create or change in the script.
// you need to activate the Advanced Drive Service (Drive Activity API).
function test() {
var content = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet().getRange('A1').getValue();
var folders = DriveApp.getFoldersByName("test");
if (folders.hasNext()) {
var folder = folders.next();
saveData(folder, 'myJSON.json',content);
}
}
function saveData(folder, fileName, content) {
var children = folder.getFilesByName(fileName);
var file = null;
if (children.hasNext()) {
file = children.next();
file.setContent(content);
} else {
file = folder.createFile(fileName, content);
}
}
https://docs.google.com/spreadsheets/d/1PWzdlaZi2m0a1xDiqLp2eJXIvx-AyvZ16CQW362q-Nw/edit?usp=sharing
Of course, replace A1 by B2 for your file.

How to set utf-8 to csvtojscon?

I want to extract the data from csv file, for that I am using csvtojson npm , it is working fine but if the csv file contains any foregin letters then it return "��" in my json so can anyone guide me here
var converter = require("csvtojson");
function(req, data){
var array = [];
var json = await convert().fromFile(filepath.csv);
array.push(json)
// continuation of my code here
}
try this
var converter = require("csvtojson");
var json = await convert().fromFile(filepath.csv,{ encoding: 'binary' });

Access JSON position in Node.js

I have a JSON string in this format:
[
{
"Origin":{
"FtpHost":"info",
"FtpFolder":"info",
"FtpUser":"info",
"FtpPassword":"info",
"FtpInsideFolder":"info",
"Pattern":"info"
},
"Destination":{
"FtpHost":"info",
"FtpFolder":"info",
"FtpUser":"info",
"FtpPassword":"info",
"FtpInsideFolder":"info"
},
"CustomFolderName":"Conad",
"OperationTraverseType":"RootOnly"
}
]
To pick up the JSON I wrote this in Node.js:
var fs = require('fs');
var obj = fs.readFileSync('Operations.json', 'utf8');
I'm wondering, how I can access for example : "Destination" fields?
You must parse this to JSON. because fs.readFile returns string
var fs = require('fs');
var obj = fs.readFileSync('Operations.json', 'utf8');
obj = JSON.parse(obj)
var Destination = obj[0].Destination
// or
var Destination = obj[0]["Destination"]
Edit (as said Diego)
You can also directly require json file
var obj = require('somejsonfile.json');
var Destination = obj[0]. Destination
Just need to simply parse the read data. Something like this:
var fs = require('fs');
var obj = fs.readFileSync('Operations.json', 'utf8').toString();
obj = JSON.parse(obj)
console.log(obj[0].Destination)
you can do like var myjson = JSON.parse(obj) or obj = JSON.parse(fs.readFileSync('Operations.json', 'utf8')) and then access it like obj[0]["Destination"]["FIELD"] where FIELD - represents the "Destination" object field you want

How to get the names of all png files in a folder using GetFilesAsync - WP8

I have a folder called AllPage. It has a number of png files. I m trying to retrieve the names of all the files from the folder using GetFilesAsync and apparently it retrieves nothing! :(
This is how I tried
IStorageFolder dataFolder = await local.CreateFolderAsync("AllPage", CreationCollisionOption.OpenIfExists);
EDIT
var obj = await dataFolder.GetFilesAsync();
var temp = obj[0];
var temp1 = obj[1];
return true;
obj gives me COM component. :( I'm so confused.
This is how I retrieved the file names :)
IStorageFolder dataFolder = await local.GetFolderAsync(App.ALL_PAGE_FOLDER);
IEnumerable<IStorageFile> files = await dataFolder.GetFilesAsync();
foreach (IStorageFile anyfile in files)
{
if (anyfile.Path.EndsWith(".png"))
{
var name = anyfile.Name;
}
}