I am trying to push data using the Salesforce API to BigQuery using Google Scripts. For this example, I am trying to push Opportunities to BigQuery.
I have this variable called opps, and it is prepared by calling the Salesforce API and then retrieving only the data that I care about. This looks like this:
var opps = [];
for (var i in arrOpportunities.records) {
let data = arrOpportunities.records[i];
let createDate = Utilities.formatDate(new Date(data.CreatedDate), "GMT", "dd-MM-YYYY");
let modDate = Utilities.formatDate(new Date(data.LastModifiedDate), "GMT", "dd-MM-YYYY");
let a1 = 'C' + (parseInt(i, 10) + 2);
let companyFormula = '=IFERROR(INDEX(Accounts,MATCH(' + a1 + ',Accounts!$B2:$B,0),1),"")';
opps.push([data.Name, data.Id, data.AccountId, companyFormula, data.StageName, data.IsClosed, data.IsWon, createDate, modDate, data.ContactId, data['Region_by_manager__c'], data['Industry__c'], data['Customer_type__c'], data['acc__c'], data['Reason_for_lost_deal__c'], data['hs_deal_id__c'], data['Solution__c'], data['Solution_Elements__c']]);
}
Afterwards I check to see if there is already a table in BigQuery using guideInsertData('Opportunities', 'Opportunities', opps); which comes after the for loop above. It doesn't actually check itself, it has another function for that, which is irrelevant for this discussion. This function just guides the process.
The function here looks like this:
function guideInsertData(tableName, headers, data) {
const tableExists = checkTables(tableName);
if (tableExists == false) {
let createTable = prepareSchema(headers, tableName);
if (createTable == false) {
throw 'Unable to create table';
}
}
insertData(tableName,data);
}
and then finally we get to the actual function for inserting the data which is here:
function insertData(tableName, arrData) {
const projectId = 'lateral-scion-352013', datasetId = 'CRM_Data', tableId = tableName;
const job = {
configuration: {
load: {
destinationTable: {
projectId: projectId,
datasetId: datasetId,
tableId: tableId
},
skipLeadingRows: 1
}
}
};
const strData = arrData.join("\n");
const data = Utilities.newBlob(strData,"application/octet-stream");
Logger.log(strData);
Logger.log(data.getDataAsString());
try {
BigQuery.Jobs.insert(job, projectId, data);
let success = 'Load job started. Check on the status of it here: ' +
'https://console.cloud.google.com/home/activity?project='+projectId;
Logger.log(success);
return success;
} catch (err) {
Logger.log(err);
Logger.log('unable to insert job');
return 'unable to insert data';
}
}
This is based on https://developers.google.com/apps-script/advanced/bigquery#load_csv_data.
I convert everything into the blob as requested. I can't see anything wrong in the Log. Not only that, I get the message that the jobs have loaded successfully.
However, when I check the status of the activity in BigQuery, I get Failed:Complete BigQuery job, these are the error messages:
Invalid argument (HTTP 400): Error while reading data, error message: Too many values in row starting at position: 234. Found 23 column(s) while expected 18.
Error while reading data, error message: CSV processing encountered too many errors, giving up. Rows: 0; errors: 1; max bad: 0; error percent: 0
You are loading data without specifying data format, data will be treated as CSV format by default. If this is not what you mean, please specify data format by --source_format.
How do I fix this error?
Ok, so it looks like there was an issue in how it was stringified. I had to stringify first each nested array and then stringify the whole thing.
So in the end, the code looks like this:
const strData = arrData.map(values => values.map(value => JSON.stringify(value).replace(/\\"/g, '""')));
const csvData = strData.map(values => values.join(',')).join('\n');
const data = Utilities.newBlob(csvData, "application/octet-stream");
Also, all date values need to be in yyyy-MM-dd format not dd-MM-yyy
Related
sorry in advance for the long question. I am trying to create a Google Sheet that tells me how many hours each of my contractors has logged on Clockify each the month. (Full code at the bottom)
In short my problem is creating a JSON file for the UrlFetchApp.fetch() request to the Clockify API using input from the google sheet.
I want the JSON to look like this:
var newJSON = {
"dateRangeStart": "2022-01-01T00:00:00.000",
"dateRangeEnd": "2022-01-31T23:59:59.000",
"summaryFilter": {
"groups": ["USER"],
"sortColumn": "GROUP"
}
}
var payload = JSON.stringify (newJSON);
And when I use this code, it works perfectly. However, the start and end dates are variables that I compute in the google sheet, as I need these dates to change each month. I wrote a function that gives me the correct outputs ("2022-01-01T00:00:00.000", "2022-01-31T23:59:59.000"), but when I reference the cells in google sheets, I get a 400 error saying that the API was not able to parse the JSON.
Function in Script:
function GetHours(userName, startDate, endDate) {
var newJSON = {
"dateRangeStart": startDate,
"dateRangeEnd": endDate,
"summaryFilter": {
"groups": ["USER"],
"sortColumn": "GROUP"
}
}
var payload = JSON.stringify (newJSON);
...}
Calling the function in sheets:
=GetHours(C3,$D$45,$D$46)
Output error message:
Exception: Request failed for https://reports.api.clockify.me returned code 400. Truncated server response: {"code":400,"message":"generateSummaryReport.arg1.dateRangeEnd: Field dateRangeEnd is required, generateSummaryReport.arg1.dateRangeStart: Field da... (use muteHttpExceptions option to examine full response)
A weird thing is happening when I use Logger.log(payload), which may be the root of the problem. It appears that the code runs twice, and the first time the payload JSON is correct, but the second it is incorrect.
First time:
{"dateRangeStart":"2022-01-01T00:00:00.000","dateRangeEnd":"2022-01-31T23:59:59.000","summaryFilter":{"groups":["USER"],"sortColumn":"GROUP"}}
Second time:
{"summaryFilter":{"groups":["USER"],"sortColumn":"GROUP"}}
I have tried a bunch of solutions, but really it boils down to referencing the Google sheet in the JSON. When I copy and paste the output of my date calculation into the JSON, it works. When I create a variable in Scripts with the date calculation output, it works. When I return startDate, it gives me "2022-01-01T00:00:00.000", which is correct. I just don't understand what is going wrong. Thanks for your help!
Full code:
const APIbase = "https://api.clockify.me/api/v1"
const APIreportsbase = "https://reports.api.clockify.me/v1"
const myAPIkey = "[HIDDEN FOR PRIVACY]"
const myWorkspaceID = "[HIDDEN FOR PRIVACY]"
function GetHours(userName, startDate, endDate) {
var newJSON = {
"dateRangeStart": startDate,
"dateRangeEnd": endDate,
"summaryFilter": {
"groups": [
"USER"
],
"sortColumn": "GROUP"
}
}
var payload = JSON.stringify (newJSON);
var headers = {"X-Api-Key" : myAPIkey, "content-type" : "application/json"};
var url = APIreportsbase + '/workspaces/' + myWorkspaceID + '/reports/summary'
var options = {
"method": "post",
"contentType": "application/json",
"headers": headers,
"payload": payload,
"muteHttpExceptions" : false
};
var response = UrlFetchApp.fetch(url, options)
var json = response.getContentText();
var data = JSON.parse(json);
var people = data.groupOne;
for (let i = 0; i < people.length; i++) {
if (people[i].name == userName) {
if (people[i].duration == 0) {
return 0;
} else {
return people[i].duration/3600;
}
}
}
}
GetHours();
I got the program working by adding filter so that the second time the program ran, it didn't affect the return value.
if (startDate != null) {
var response = UrlFetchApp.fetch(url, options)
var json = response.getContentText();
.....
}
I have written an app script for Google Sheet which updates a table in BQ. The script should return among other information the total rows of the table. The script returns the job status and totalbytes but does not return the totalRows. I do not see a difference between the different values: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults
What do I have to change in order to get also TotalRows?
// Need to provoke a drive dialog
// DriveApp.getFiles()
// Replace this value with your project ID and the name of the sheet to update.
var projectId = 'my-project';
var sheetName = 'my-sheet';
// Use standard SQL to query BigQuery
var request = {
query: 'DROP TABLE `my-project.xyz.targetgroup_5_tbl`; CREATE TABLE `my-project.xyz.targetgroup_5_tbl` AS SELECT * FROM `waipu-app-prod.Access.targetgroup_5_std`;',
useLegacySql: false
};
var queryResults = BigQuery.Jobs.query(request, projectId);
var jobId = queryResults.jobReference.jobId;
// Check on status of the Query Job.
var sleepTimeMs = 500;
while (!queryResults.jobComplete) {
Utilities.sleep(sleepTimeMs);
sleepTimeMs *= 2;
queryResults = BigQuery.Jobs.getQueryResults(projectId, jobId);
}
if (queryResults.jobComplete=true) {
// Append the results.
var status= queryResults.jobComplete;
var dayc = new Date();
var totalbytes = queryResults.totalBytesProcessed;
var totalRows = queryResults.totalRows;
var rows = [
['target group',status,dayc,totalbytes,totalRows],
]
var status = queryResults.jobComplete;
var ss = SpreadsheetApp.getActiveSpreadsheet();
var currentSheet = ss.getSheetByName(sheetName);
currentSheet.getRange(23,1, 1, 5).setValues(rows);
console.info('%d rows inserted.', queryResults.totalRows);
} else {
console.info('No results found in BigQuery');
}
}
```
The code just have a small error, in one of the comparisons, you used:
if (queryResults.jobComplete=true) {
Which always will evaluate to true because this is an assignation, not a comparison. To compare if jobComplete is true, you should use '==' instead, i.e.,
if (queryResults.jobComplete==true) {
Or even better, since jobComplete is already a boolean variable, you can simply do:
if (queryResults.jobComplete) {
Now, the reason why you're not getting totalRows is because you are executing a sql script, which doesn't return any rows. Even if you consider only the last part of the script, you are executing a CREATE TABLE AS statement, which doesn't return any rows.
I understand that you want to know how many rows were inserted on this table. There are many options to do so, which includes getting the table details, or execute a SELECT COUNT query; however, based on what you're doing in the sql script: deleting and re-creating a table I recommend you a different approach: Use the API method Jobs.insert to specify a destination table so you can execute your query as a SELECT statement. Finally, use the writeDisposition:"WRITE_TRUNCATE" to delete the previous data, as in your script.
Here's a code that you can use as a reference:
var projectId = 'my-project';
// The request object changed to adapt to a Jobs.insert request
var request = {
configuration: {
query: {
destinationTable: {
projectId: projectId,
datasetId: 'my-dataset',
tableId: 'my-table'
},
query: 'SELECT * FROM ...', // Here goes the query used to create the table
useLegacySql: false,
writeDisposition: 'WRITE_TRUNCATE' // Truncate the data before writing it again
}
}
};
var queryResults = BigQuery.Jobs.insert(request, projectId); // Use Jobs.insert instead of Jobs.query
var jobId = queryResults.jobReference.jobId;
Good day all,
Background story:
I have a report in a CSV file emailed to me daily. I'm trying to create a script in GAS to download the CSV file in Google Drive and then upload it into a BigQuery table. GAS is the only scripting tool we have enabled so I'm stuck with it. I'm new both to JavaScript in general as well as BigQuery API
code:
function testFunction() {
var file = DriveApp.getFilesByName("my_csv_file.csv")
var csv = file.next().getBlob().setContentType('application/octet-stream').getDataAsString();
var csvData = Utilities.parseCsv(csv);
// I need to do the following to get the yyyy-MM-dd format
for (var i = 1; i < csvData.length; i++) {
var csvdate = csvData[i][1];
csvData[i][1] = csvdate.substring(6, 10) + "-" + csvdate.substring(3, 5) + "-" + csvdate.substring(0, 2);
}
var csvString = csvData.join("\n");
var blob = Utilities.newBlob(csvString, "text/csv")
var data = blob.setContentType('application/octet-stream');
var projectId = 'my_project_id';
var datasetId = 'my_dataset';
var tableId = 'bigquery_table';
var job = {
configuration: {
load: {
destinationTable: {
projectId: projectId,
datasetId: datasetId,
tableId: tableId
},
source_format: 'CSV',
skipLeadingRows: 1,
allowJaggedRows: 'TRUE',
allow_quoted_newlines: 'TRUE',
}
}
};
job = BigQuery.Jobs.insert(job, projectId, data);
}
And the job error I'm getting:
Error encountered during job execution:
Error while reading data, error message: CSV table encountered too many errors, giving up. Rows: 1290; errors: 1. Please look into the errors[] collection for more details.
Failure details:
- Error while reading data, error message: Too many values in row
starting at position: 239165.
- You are loading data without specifying data format, data will be
treated as CSV format by default. If this is not what you mean,
please specify data format by --source_format.
The thing I don't get:
I am specifying the source_format - am I doing it wrong?
Where I found the problem is:
There are too many values in some rows because there are too many columns. There are too many columns because some of the product descriptions have bleeding commas in them. The original CSV file has all cells encapsulated in quotation marks - nice way to get around the problem I guess. The thing is I need to change the format of the day column in order to BigQuery accept is as a date and by doing so, I seem to erase all quotation marks...
Any pointers on how I could fix it, please?
It seems that I've found a solution. The additional commas appear only in one column, so I used this loop:
for (var i = 1; i < csvData.length; i++) {
var csvdate = csvData[i][1];
csvData[i][1] = csvdate.substring(6, 10) + "-" + csvdate.substring(3, 5) + "-" + csvdate.substring(0, 2);
}
to add an additional step:
for (var i = 1; i < csvData.length; i++) {
var csvdate = csvData[i][1];
csvData[i][1] = csvdate.substring(6, 10) + "-" + csvdate.substring(3, 5) + "-" + csvdate.substring(0, 2);
var csvdesc = csvData[i][4];
csvData[i][4] = csvdesc.replace(/([,])+/g, "") ;
}
which removes all commas from the column. Phew!
The original CSV file has all cells encapsulated in quotation marks
Then, do the same.
var csvString = csvData.map(function (row){
return '"' + row.join('","') + '"';
}).join('\n')
I'm trying to use Google Apps Script to append data into a BigQuery table using the BigQuery API. The data to append is currently CSV format. So far I've found that you can stream data into BigQuery using tabledata().insertAll() but it looks like that requires json format and I'm not even convinced that it would do what I need to. Is there a straightforward solution to this that I'm missing? Because I know BigQuery supports appending, and yet everything I'm finding is really focused on loading data into new tables.
EDIT:
Sounds like tabledata().insertAll() is indeed the right method to use (hopefully). So I converted my file to json instead, but now I'm stuck on how to actually use it. I'm trying to base what I'm doing off of the reference page for it but it's still really confusing for me. Currently I am getting a 404 error when I run my code and it hits the fetch call. I'm trying to do a URL fetch, maybe that's not how I'm supposed to be doing things? I'm really new to APIs and I'm still figuring out how they work. Here's the code I currently have that's causing this:
var tableId = 'users';
var file = DriveApp.getFileById(jsonId);
//I don't know if a blob is the type that I want or not, but I'm trying it
var data = file.getBlob();
var url = 'https://www.googleapis.com/bigquery/v2/projects/PROJECT_ID/datasets/DATASET_ID/tables/tableId/insertAll'
.replace("PROJECT_ID", params.PROJECT_ID)
.replace("DATASET_ID", params.DATASET_ID)
.replace("tableId", tableId);
var response = UrlFetchApp.fetch(url, {
"kind": "bigquery#tableDataInsertAllRequest",
"skipInvalidRows": 0,
"ignoreUnknownValues": 0,
"rows": [
{
"json": data
}
],
headers: {
Authorization: 'Bearer ' + service.getAccessToken()
}
});
var result = JSON.parse(response.getContentText());
Logger.log(JSON.stringify(result, null, 2));
This is not the most direct from csv to BQ JSON but it's some code that I'm using that should help you on the BigQuery side.
var PROJECT_ID = "xxx";
var DATASET_ID = "yyy";
function convertValuesToRows(data) {
var rows = [];
var headers = data[0];
for (var i = 1, numColumns = data.length; i < numColumns; i++) {
var row = BigQuery.newTableDataInsertAllRequestRows();
row.json = data[i].reduce(function(obj, value, index) {
obj[headers[index]] = value;
return obj
}, {});
rows.push(row);
};
return rows;
}
function bigqueryInsertData(data, tableName) {
var insertAllRequest = BigQuery.newTableDataInsertAllRequest();
insertAllRequest.rows = convertValuesToRows(data);
var response = BigQuery.Tabledata.insertAll(insertAllRequest, PROJECT_ID, DATASET_ID, tableName);
if (response.insertErrors) {
Logger.log(response.insertErrors);
}
}
This allows you to supply any GAS style value matrix (from getValues or indeed Utilities.parseCsv)
convertValuesToRows will take a 2d array of strings (with headers) and encode it in the format BigQuery needs, e.g.
[["H1", "H2", "H3"],
[1 , 2 , 3 ],
[4 , 5 , 6 ]];
will be added to the insertRows request int he form of key value pairs i.e.
[{H1: 1, H2: 2, H3: 3},
{H1: 4, H2: 5, H3: 6}]
You only need to worry about the first representation as that is what you pass into bigQueryInsertData together with the table name you want to feed the data in to (The schema of the table needs to match what you are sending) and the converter function is called from within.
Utilities.parseCsv already returns a 2d array of strings so you can basically call bigQueryInsertData(Utilities.parseCsv(data.getDataAsString()), "myTable")
I have a table and need to add more records based on new data from Google Sheets.
I see how I can do it with union, meaning running
Select * from (SELECT * from table),(select * from temp_table_from_sheets)
I.e: querying old table, new table. Delete old table and save the result of the query as old table.
BUT IT MUST be possible to append instead of BigQuery.Jobs.insert only.
Can you help me please?
EDIT - solution
After getting the answer below, I googled a lot and eventually came up with the following solution in Apps Script:
var sql = 'select ...'
var projectId = '...'
var datasetId = '...'
var tableId = '...'
var job = {
configuration: {
query: {
query: sql,
writeDisposition:'WRITE_APPEND',
destinationTable: {
projectId: projectId,
datasetId: datasetId,
tableId: tableId
}
}
}
};
var queryResults = BigQuery.Jobs.insert(job, projectId)
From BigQuery API Basics - Managing Tables:
Appending data
You can load additional data into a table either from source files or
by appending query results. Note that the schema of the loaded data
must match the schema of the existing table, but you can update the
schema before appending.
...
To append data from a query result:
Run an asynchronous query, pass in the name of your existing table,
and set writeDisposition=WRITE_APPEND.
Pushing Google Sheets content to BigQuery
I found it here
The tricky is sheet data to CSV.
var file = SpreadsheetApp.openByUrl(url).getSheetByName(sheetName);
// This represents ALL the data
var rows = file.getDataRange().getValues();
var rowsCSV = rows.join("\n");
var blob = Utilities.newBlob(rowsCSV, "text/csv");
var data = blob.setContentType('application/octet-stream');
// Create the data upload job.
var job = {
configuration: {
load: {
destinationTable: {
projectId: projectId,
datasetId: datasetId,
tableId: tableId
},
skipLeadingRows: 1,
writeDisposition: writeDispositionSetting
}
}
};
// send the job to BigQuery so it will run your query
var runJob = BigQuery.Jobs.insert(job, projectId, data);
Logger.log(runJob.status);
var jobId = runJob.jobReference.jobId
Logger.log('jobId: ' + jobId);
var status = BigQuery.Jobs.get(projectId, jobId);
// wait for the query to finish running before you move on
while (status.status.state === 'RUNNING') {
Utilities.sleep(500);
status = BigQuery.Jobs.get(projectId, jobId);
Logger.log('Status: ' + status);
}
Logger.log('FINNISHED!');
}