NodeJs Store Json from multiple requests to MySQL DB - mysql

I am working on a crawler that stores the retrieved and parsed data into my MySQL DB.
Storing the results works pretty well, but I am stuck when it comes to end the connection.
I am using request, cheerio, async and mysql:
// DB params
var con = mysql.createConnection({
host: "localhost",
user: "user",
password: "password",
database: "mydatabase",
});
// Open connection to DB
con.connect(function(err) {
if (err) {
console.log('Error connecting to Db');
return;
}
console.log('Connection established');
});
// Array of Cities to crawl
var cities = ["olten", "zurich"];
// Todays Date
var today = new Date();
...
today = dd + '.' + mm + '.' + yyyy;
// Search every city
async.each(cities, function(item, callback){
// The Page to crawl
var page = "https://www.thepageto.com/search_xhr?fn=" + /*cities[i]*/ item + "&tn=&db=" + today + "&sort=trip_date&order";
console.log("Visiting page " + page);
request(page, function(error, response, body) {
if (error) {console.log("Error: " + error);}
// Check status code (200 is HTTP OK)
console.log("Status code: " + response.statusCode);
if (response.statusCode === 200) {
// Parse JSON
var data = JSON.parse(body);
// Use only result > html code
var content = data.html.results;
// Parse html
var $ = cheerio.load(content);
// Iterate through offers & Extract information & Store in a Object
$('.trip.relative').each(function() {
// Empty Object, has the same structure as the Db table
var json = {id: "", driver: "", rating: "", date: "", time: "", start: "", stops: "", end: "", price: "", url: "", query: ""};
// Assigning values
json.driver = $(this).find('.username').text();
...
// Save filled object in Db
save(json, callback);
});
}
});
}, function(err){
if (err) {
console.log('could not save');
} else {
console.log('lets end connection');
con.end(function(err) {if (err) throw err;});
}
}
);
function save(item, callback) {
con.query('INSERT INTO offers SET ?', item, callback);
}
The problem is, that the DB connection is ended before anything could be stored into the DB. The con.end() is always fired before the actual query or before the query completed. I have the intension that the save function has to return something when its done. If i run the script without the con.end(), the data is stored into the DB perfectly.
I am new to Node so I am still learning and hopefully you can help me with this.
Edit:
I now get an:
ReferenceError: callback is not defined
at save (/home/juki/crawler/crawler.js:143:46)
As well as an:
Error: Callback was already called.
at Query._callback (/home/juki/crawler/node_modules/async/dist/async.js:839: 36)

It looks like you do not call callback from your crawler.
You also do not seem to wait for save to finish. Try to rework save function, so it takes callback too. Something like:
function save(item, callback) {
con.query('INSERT INTO offers SET ?', item, callback);
}
And then call it like:
save(json, callback);
where callback is the one you get from async.each.

Related

Save Query result into Variable Alexa Skills Json

I needed a DB for an alexa app, so I set up and and it INSERTS nicely, but when im trying to SELECT and save it to a variable the values saved to the variable are [Object Object] instead of wanted value, I know it can be async problem or parsing problem but i just cant fix the code, some help would be cool,
canHandle(handlerInput) {
return Alexa.getRequestType(handlerInput.requestEnvelope) === 'IntentRequest'
&& Alexa.getIntentName(handlerInput.requestEnvelope) === 'buscaIntent';
},
handle(handlerInput) {
const mysql = require('mysql');
const connection = mysql.createConnection
({
host: 'remotemysql.com',
user: 'RBb34534sd',
password: 'xxxxxxxxx',
database: 'RBsdfewrg'
});
var stat = connection.query('SELECT `spe` FROM `prueba` WHERE `nombre` LIKE "raichu" limit 1', function (err, result, fields) {
if (err) throw err;
console.log(result);
return result[0];
});
connection.end();
return handlerInput.responseBuilder
.speak("Busc " + stat)
.reprompt("reprompt buscar")
.getResponse();
}
}; ```
The issue is that you're not waiting for your database query to complete before sending your response to the Alexa service. Requests in node.js are non-blocking, meaning you either need to nest the request with a callback, or leverage Promises / async-await patterns so that the SQL query is processed before the function is fully executed.
You can read more on converting the built-in library for SQL connections to support Promises here, or use a library like this that already has a wrapper in place.
In either scenario, the end result would be refactored to something like this:
canHandle(handlerInput) {
return Alexa.getRequestType(handlerInput.requestEnvelope) === 'IntentRequest'
&& Alexa.getIntentName(handlerInput.requestEnvelope) === 'buscaIntent';
},
async handle(handlerInput) {
const mysql = require('mysql2/promise');
const connection = await mysql.createConnection
({
host: 'remotemysql.com',
user: 'RBb34534sd',
password: 'xxxxxxxxx',
database: 'RBsdfewrg'
});
var stat = await connection.execute('SELECT `spe` FROM `prueba` WHERE `nombre` LIKE "raichu" limit 1', function (err, result, fields) {
if (err) throw err;
console.log(result);
return result[0];
});
return handlerInput.responseBuilder
.speak("Busc " + stat)
.reprompt("reprompt buscar")
.getResponse();
}
Another article describing async calls for Alexa requests here.
I think the query is returning an object you can't keep the object in speech. Check what's inside the object and if you have a field that you want inside that object then access by stat.YourField.

Write JSON to mysql database with node.js

I'm trying to write a JSON object (or string, unsure) to my mysql database using node.js. I first retrieved the JSON via an xml url using xml2js. I am able to log the json string result in my console via JSON.stringify, but I am unsure how to proceed from here.
Here is the url I took the xml from: https://water.weather.gov/ahps2/hydrograph_to_xml.php?gage=deld1&output=xml
I would like to write each instance from the JSON string to a row, with the columns as the name of the data. It would look something like this:
Here is my code in index.js, which I enact with node index.js on the console:
var parseString = require('xml2js').parseString;
var http = require('http');
var https = require('https');
var mysql = require('mysql');
var con = mysql.createConnection({
host: "localhost",
user: "root",
password: "password",
database: "mydb"
});
function xmlToJson(url, callback) {
var req = https.get(url, function(res) {
var xml = '';
res.on('data', function(chunk) {
xml += chunk;
});
res.on('error', function(e) {
callback(e, null);
});
res.on('timeout', function(e) {
callback(e, null);
});
res.on('end', function() {
parseString(xml, function(err, result) {
callback(null, result);
});
});
});
}
var url = "https://water.weather.gov/ahps2/hydrograph_to_xml.php?gage=deld1&output=xml"
xmlToJson(url, function(err, data) {
if (err) {
return console.err(err);
}
strungout = JSON.stringify(data, null, 1);
console.log(strungout);
//strungout contains my json string
})
con.connect(function(err) {
if (err) throw err;
//below is where I might make an insert statement to insert my values into a mysql table
var sql = someinsertstatement
con.query(sql, function (err, result) {
if (err) throw err;
console.log("records inserted");
res.end();
});
});
As mentioned, when I run the above code in my console, the console returns the JSON, though I am unsure how to assign this to a variable that I can then write into my mysql database.
Alternatively, if there is an easier way to write xml from a website directly to my mysql database, I would certainly appreciate any pointers. I feel like it should be easier than this, but I am new to pretty much all of it.
EDIT:
Adding the JSON. I removed the line breaks to consolidate it. Trying to assign the result '4.68' to a variable.
data = {"site": {"observed": [{"datum": [{"valid": [{"_": "2019-02-21T19:42:00-00:00","$": {"timezone": "UTC"}}],"primary": [{"_": "4.68","$": {"name": "Stage","units": "ft"}}]}]}]}};
Thank you.
This worked on my end. Found that the main data you seek is site.observed.datum
const parser = require('xml2json');
const request = require("request");
var mysql = require('mysql');
var con = mysql.createConnection({
host: "localhost",
user: "root",
password: "password",
database: "mydb"
});
var api_url = 'https://water.weather.gov/ahps2/hydrograph_to_xml.php?gage=deld1&output=xml';
function xmlToJson(url, callback){
return request({
method: 'GET',
url: api_url,
}, function (error, response, body) {
if (error) {
return callback({
errorResponse: error,
rowsToInsert: false
});
}else{
let jsonRes = JSON.parse(parser.toJson(body));
let datumResult = jsonRes.site.observed.datum;//I had to log Object.keys multple time to get the
const readyForDB = datumResult.map(x => {
let timeOfReading = x.valid.$t;
let stage = x.primary.$t;
let flow = x.secondary.$t;
return [
timeOfReading, stage, flow
]
});
return callback({
errorResponse: false,
rowsToInsert: readyForDB
});
}
})
}
return xmlToJson(api_url, ({errorResponse, rowsToInsert}) => {
if(errorResponse){
throw callback.errorResponse;
}
return con.connect(function(err) {
if (err) throw err;
//below is where I might make an insert statement to insert my values into a mysql table
var sql = "INSERT INTO forecast (timeOfReading, stage, flow) VALUES ?"
con.query(sql, [rowsToInsert], function (err, result) {
if (err) throw err;
console.log(result.affectedRows + " rows inserted");
});
});
});
Sounds like you have the JSON you want but are unsure how to access data within it. Correct me if I'm wrong.
Lets say you have this JSON object called "test":
{
a:1
b:{
x:2
}
}
You can access the value of 1 by calling test.a, and similarly access the value of 2 by calling test.b.x

Nodejs, Cloud Firestore Upload Tasks - Auth error:Error: socket hang up

I'm coding a function that runs API calls and requests JSON from a huge database in sequence via offsets. The JSON response is parsed and then the subsequent data within is uploaded to our Cloud Firestore server.
Nodejs (Node 6.11.3) & Latest Firebase Admin SDK
The information is parsed as expected, and prints to the console perfectly. When the data attempts to upload to our Firestore database however, the console is spammed with the error message:
Auth error:Error: socket hang up
(node:846) UnhandledPromiseRejectionWarning: Unhandled promise rejection
(rejection id: -Number-): Error: Getting metadata from plugin failed with
error: socket hang up
and occasionally:
Auth error:Error: read ECONNRESET
The forEach function collects the items from the downloaded JSON and processes the data before uploading to the Firestore database. Each JSON has up to 1000 items of data (1000 documents worth) to pass through the forEach function. I understand that this might be a problem if the function repeats before the upload set finishes?
I'm a coding newbie and understand that the control flow of this function isn't the best. However, I can't find any information on the error that the console prints. I can find plenty of information on socket hang ups, but none on the Auth error section.
I'm using a generated service account JSON as a credential to access our database, which uses the firebase-adminsdk account. Our read/write rules for the database are currently open to allow any access (as we're in development with no real users).
Here's my function:
Firebase initialisation & offset zero-ing
const admin = require('firebase-admin');
var serviceAccount = require("JSON");
admin.initializeApp({
credential: admin.credential.cert(serviceAccount),
databaseURL: "URL"
});
var db = admin.firestore();
var offset = 0;
var failed = false;
Running the function & setting HTTP Headers
var runFunction = function runFunction() {
var https = require('https');
var options = {
host: 'website.com',
path: (path including an offset and 1000 row specifier),
method: 'GET',
json: true,
headers: {
'content-type': 'application/json',
'Authorization': 'Basic ' + new Buffer('username' + ':' + 'password').toString('base64')
}
};
Running the HTTP Request & Re-running the function if we haven't reached the end of the response from the API
if (failed === false) {
var req = https.request(options, function (res) {
var body = '';
res.setEncoding('utf8');
res.on('data', function (chunk) {
body += chunk;
});
res.on('end', () => {
console.log('Successfully processed HTTPS response');
body = JSON.parse(body);
if (body.hasOwnProperty('errors')) {
console.log('Body ->' + body)
console.log('API Call failed due to server error')
console.log('Function failed at ' + offset)
req.end();
return
} else {
if (body.hasOwnProperty('result')) {
let result = body.result;
if (Object.keys(result).length === 0) {
console.log('Function has completed');
failed = true;
return;
} else {
result.forEach(function (item) {
var docRef = db.collection('collection').doc(name);
console.log(name);
var upload = docRef.set({
thing: data,
thing2: data,
})
});
console.log('Finished offset ' + offset)
offset = offset + 1000;
failed = false;
}
if (failed === false) {
console.log('Function will repeat with new offset');
console.log('offset = ' + offset);
req.end();
runFunction();
} else {
console.log('Function will terminate');
}
}
}
});
});
req.on('error', (err) => {
console.log('Error -> ' + err)
console.log('Function failed at ' + offset)
console.log('Repeat from the given offset value or diagnose further')
req.end();
});
req.end();
} else {
req.end();
}
};
runFunction();
Any help would be greatly appreciated!
UPDATE
I've just tried changing the rows of JSON that I pull at a time and subsequently upload at a time using the function - from 1000 down to 100. The socket hang up errors are less frequent so it is definitely due to overloading the database.
Ideally it would be perfect if each forEach array iteration waited for the previous iteration to complete before commencing.
UPDATE #2
I've installed the async module and I'm currently using the async.eachSeries function to perform one document upload at a time. All errors mid-upload disappear - however the function will take an insane amount of time to finish (roughly 9 hours for 158,000 documents). My updated loop code is this, with a counter implemented:
async.eachSeries(result, function (item, callback) {
// result.forEach(function (item) {
var docRef = db.collection('collection').doc(name);
console.log(name);
var upload = docRef.set({
thing: data,
thing2: data,
}, { merge: true }).then(ref => {
counter = counter + 1
if (counter == result.length) {
console.log('Finished offset ' + offset)
offset = offset + 1000;
console.log('Function will repeat with new offset')
console.log('offset = ' + offset);
failed = false;
counter = 0
req.end();
runFunction();
}
callback()
});
});
Also, after a period of time the database returns this error:
(node:16168) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: -Number-): Error: The datastore operation timed out, or the data was temporarily unavailable.
It seems as if now my function is taking too long... instead of not long enough. Does anyone have any advice on how to make this run faster without stated errors?
The write requests as part of this loop were simply exceeding Firestore's quota - thus the server was rejecting the majority of them.
To solve this issue I converted my requests to upload in chunks of 50 or so items at a time, with Promises confirming when to move onto the next chunk upload.
The answer was posted here -> Iterate through an array in blocks of 50 items at a time in node.js, and the template for my working code is as below:
async function uploadData(dataArray) {
try {
const chunks = chunkArray(dataArray, 50);
for (const [index, chunk] of chunks.entries()) {
console.log(` --- Uploading ${index + 1} chunk started ---`);
await uploadDataChunk(chunk);
console.log(`---Uploading ${index + 1} chunk finished ---`);
}
} catch (error) {
console.log(error)
// Catch en error here
}
}
function uploadDataChunk(chunk) {
return Promise.all(
chunk.map((item) => new Promise((resolve, reject) => {
setTimeout(
() => {
console.log(`Chunk item ${item} uploaded`);
resolve();
},
Math.floor(Math.random() * 500)
);
}))
);
}
function chunkArray(array, chunkSize) {
return Array.from(
{ length: Math.ceil(array.length / chunkSize) },
(_, index) => array.slice(index * chunkSize, (index + 1) * chunkSize)
);
}
Pass the data array through to uploadData - using uploadData(data); and post your upload code for each item into uploadDataChunk inside the setTimeout block (before the resolve() line) within the chunk.map function.
I got around this by chaining the promises in the loop with a wait of 50 milliseconds in between each.
function Wait() {
return new Promise(r => setTimeout(r, 50))
}
function writeDataToFirestoreParentPhones(data) {
let chain = Promise.resolve();
for (let i = 0; i < data.length; ++i) {
var docRef = db.collection('parent_phones').doc(data[i].kp_ID_for_Realm);
chain = chain.then(()=> {
var setAda = docRef.set({
parent_id: data[i].kf_ParentID,
contact_number: data[i].contact_number,
contact_type: data[i].contact_type
}).then(ref => {
console.log(i + ' - Added parent_phones with ID: ', data[i].kp_ID_for_Realm);
}).catch(function(error) {
console.error("Error writing document: ", error);
});
})
.then(Wait)
}
}
For me this turned out to be a network issue.
Uploading 180,000 documents in batches of 10,000 was no trouble for me before and today having used a public, slower wifi connection, I received that error.
Switching back to my 4G mobile connection sorted the problem for me. Not sure whether it's a speed issue - could have been a security issue - but I'll go with that assumption.

Large JSON Data not shown in console

I am using node.js and created a module to fetch data from the sql server database. The data is retreived as JSON.
Here is the code
b.js
var mssql = require('mssql');
var config = {
user: 'sa',
password: 'scott',
server: 'MSSQL2008',
database: 'AdventureWorks',
stream: false
};
var msconnection = new mssql.Connection(config, function (err) {
if (err) console.log(err);
});
module.exports.getCustomersDetails = function (callback, id) {
var request = new mssql.Request(msconnection);
//Add Parameters to the SP
if (id != null) {
request.input('ID', id);
}
request.execute('CUSTOMER_DETAILS_GET', function (err, recordsets, returnValue) { // get data
if (err) console.log(err);
responseContent = {
recordDataKey: 'data',
data: recordsets[0]
};
callback(responseContent);
});
};
a.js
var c = require('./b.js');
c.getCustomersDetails(function (responsecontent) {
console.log(responsecontent);
}, '101,202,303,505,808, 100, 200, 300, 400');
When I run a.js the console shows nothing and that's because there are lot of records in the table.
Can anybody suggest how can I get all the records? Any help would be much appreciated.
Edit: I have 25 columns in one row so it is bringing me top 140 records but when I write get me top 150 or all in SP then it does not show anything in the console.
Update:
can anybody help me and suggest how we can do streaming in this case?
Asynchronicity is back to bite you!
var c = require('./b.js');
// start loading the db
// it's not ready yet when you call the function!
c.getCustomersDetails(function (responsecontent) {
console.log(responsecontent);
}, '101,202,303,505,808, 100, 200, 300, 400');
You'll need to rewrite your module to use an initialization function and callback. I would do something like:
c.init(function(){
c.getCustomersDetails(); // ...
});
and
var msconnection;
exports.init = function(callback){
msconnection = new mssql.Connection(config, function (err) {
if (err) console.log(err);
callback();
});
};

Auto-create mysql table with StrongLoop

I am trying to use Strongloop with MySql but cannot figure out how to migrate or automatically create tables into a MySql database.
Is there at least a way to export the models into MySql schemas or do I have to manually create the tables?
I've been trying with the mysql demo app, and going over the docs for a while but no luck - http://docs.strongloop.com/display/DOC/MySQL+connector
Thanks!
I created /server/boot/autoupdate.js. It runs when the app boots. It loads "model-config" and "datasources" JSON and migrates or updates all models to the datasources defined for them.
# /server/boot/autoupdate.js
module.exports = function(app) {
var path = require('path');
var models = require(path.resolve(__dirname, '../model-config.json'));
var datasources = require(path.resolve(__dirname, '../datasources.json'));
function autoUpdateAll(){
Object.keys(models).forEach(function(key) {
if (typeof models[key].dataSource != 'undefined') {
if (typeof datasources[models[key].dataSource] != 'undefined') {
app.dataSources[models[key].dataSource].autoupdate(key, function (err) {
if (err) throw err;
console.log('Model ' + key + ' updated');
});
}
}
});
}
function autoMigrateAll(){
Object.keys(models).forEach(function(key) {
if (typeof models[key].dataSource != 'undefined') {
if (typeof datasources[models[key].dataSource] != 'undefined') {
app.dataSources[models[key].dataSource].automigrate(key, function (err) {
if (err) throw err;
console.log('Model ' + key + ' migrated');
});
}
}
});
}
//TODO: change to autoUpdateAll when ready for CI deployment to production
autoMigrateAll();
//autoUpdateAll();
};
You can simply migrate models by adding following lines to your server.js file before app.start method:
app.datasources['mySqlConnection'].automigrate(['orders','customers', 'User', 'ACL'], function(err) {
console.log(err);
});
Add models to the array as per your need.
Run the application by slc run.
Note: mySqlConnection is the connection name, replace it by your own connection name.
To update and/or create all mysql tables for your models:
var dataSource = app.dataSources.mysql;
dataSource.autoupdate(null, function (err) {
if(err) return cb(err);
return cb();
});
LoopBack calls it auto-migration. Check these links and search for that term:
Recipes for LoopBack Models, part 5 of 5: Model Synchronization with Relational Databases
Data sources and connectors
In my case, I manually created MySQL tables and then created the models. For existing MySQL tables, I create the models where property names are the same as MySQL field's names.
So here are my steps in using StrongLoop LoopBack with MySQL Database:
Create MySQL Database and Tables (or use existing database).
Install MySQL connector using npm install loopback-connector-mysql --save
Add your MySQL Database details on datasources.json file.
Create a model for each table using slc lb model tablename -i OR edit models.json file and add the properties manually. (document: http://docs.strongloop.com/display/DOC/Creating+a+LoopBack+application#CreatingaLoopBackapplication-Creatingmodels)
Properties' names should be the same as MySQL field's names (more information on mapping MySQL to JSON data types: http://docs.strongloop.com/display/DOC/MySQL+connector#MySQLconnector-MySQLtoJSONtypes)
In the same kind of issue, if you need to automatically create a database, you can use the createDatabase option in your dataSource JSON file.
"mysql": {
"host": "localhost",
"port": 0,
"database": "db",
"username": "root",
"password": "",
"name": "mysql",
"connector": "mysql",
"debug": false,
"createDatabase": true
}
So you don't need to write yourself the queries to create the base.
Hope it helps.
jduhls answer is beautiful, but I needed to tweak it slightly to add some static data into tables. Here's my tweaked version, along with an example of loading data into a simple SystemSettings table (id, settingName, settingValue):
var async = require('async');
var SYSTEM_SETTINGS = [
{
"settingName": "mustPayInAdvance",
"settingValue": "false",
}
];
module.exports = function(app) {
var path = require('path');
var models = require(path.resolve(__dirname, '../model-config.json'));
var datasources = require(path.resolve(__dirname, '../datasources.json'));
var modelUpdates = [];
function buildModelListForOperation(){
Object.keys(models).forEach(function(key) {
if (typeof models[key].dataSource != 'undefined') {
if (typeof datasources[models[key].dataSource] != 'undefined') {
modelUpdates.push({operation: app.dataSources[models[key].dataSource], key: key});
}
}
});
}
function createStaticData() {
app.models.SystemSettings.create(SYSTEM_SETTINGS, function(err, created) {
if (err)
throw err;
else
console.log('Sample data was imported.');
});
}
function processModelsAndData(operationType) {
buildModelListForOperation();
// Create all models
async.each(modelUpdates, function(item, callback) {
item.operation[operationType](item.key, function (err) {
if (err) throw err;
console.log('Model ' + item.key + ' migrated');
callback();
});
}, function (err) {
if (err) throw err;
createStaticData();
});
}
//TODO: change to 'autoupdate' when ready for CI deployment to production
processModelsAndData('automigrate');
};
i discovered an easy way to accomplish this task. The reference link is: Clique Here
You can use prototype or not, in my case, i do nott used.
For the documentation, you should use:
ds.autoupdate (models, function (error) {
if (!error) {
   console.log( "Updated models.");
   }else{
   console.log( "An error has occurred:" + error);
   }
   ds.disconnect();
});
Where:
var path = require ( 'path');
var app = require (path.resolve (__ dirname, '../server/server'));
var ds = app.datasources.x;
and x is datasource attribute name, example of /server/datasources.json:
{
  "x": {
    "Host": "localhost"
    "Port": 3306,
    "Database", "loopapp"
    "Password": "",
    "Name": "x"
    "User", "root"
    "Connector": "mysql"
  }
}
Note (1): Models can be the string model name or the array of string (models names).
Note (2): If you prefer not to put models, all models of the file whose base attribute equals "PersistedModel", will be updated.
With that, i used like this:
autoupdate function () {
ds.autoupdate (function (error) {
     if (!error) {
         console.log( "Updated all models");
      }else {
         console.log( "An error has occurred:" + error);
      }
      ds.disconnect();
   });
}
and i called the: autoupdate();
You can put this code in a file.js and call the command line: node file.js.
If you want this file to be called every time you start the program, put it on /server/boot/file.js path.
Obviously, if you want to use automigrate, only replace the autoupdate word in the code above, by automigrate.