HTML file saved in Google Drive to be parsed into a Spreadsheet - google-apps-script

I have a file that comes from the bank in an HTML format. They send this file as a transaction happens and I have a appscript to save the attachment to a Drive folder. I want to parse the HTML and copy the table to a spreadsheet appending it at the bottom.
I have created this piece of code, but XML parsing of HTML is a mystery for me, so It doens't work
function parseTablesFromHTML() {
var folderId = "1NrgsTgB3q573wav3cQsse4sAT8poeI77";
var folder = DriveApp.getFolderById(folderId);
var htmls = folder.getFilesByType(MimeType.HTML);
var sheetId = "https://docs.google.com/spreadsheets/d/1B-mjEUqvy49Wvct13XrWi6TU1dw1VPwesfYJRKJ5T6s/edit#gid=0";
//Set up spreadsheet
var ss = SpreadsheetApp.openByUrl(sheetId);
SpreadsheetApp.setActiveSpreadsheet(ss);
Logger.log('File name: ' + ss.getName());
var sheet = ss.getSheetByName("test");
var range = sheet.getRange(sheet.getLastRow(),sheet.getLastColumn());
while (htmls.hasNext()) {
var html = htmls.next().getId();
var response = DriveApp.getFileById(html).getBlob().getDataAsString();
var xmlDoc = XmlService.parse(response);
var b = xmlDoc.getElement().getElement("body");
var table = b.getElement("div").getElement("div").getElement("div").getElements("div")[1].getElement("table");
var rows = [];
var trs = table.getElements("tr");
for (var r=0,rlength=trs.length; r<rlength; r++) {
var tds = trs[r].getElements("td");
var row = [];
for (var c=0,clength=tds.length; c<clength; c++) {
row.push(tds[c].getText());
}
rows.push(row);
}
Logger.log(Utilities.jsonStringify(rows));
}
}
the html code looks like this
<table bordercolor="black" cellspacing="0" cellpadding="4" width="100%" class="style0" BORDER="1" FRAME="BOX" RULES="NONE">
<tr>
<td class="td-header" align="center" width="15%">Process date</td>
<td class="td-header" align="center" width="5%">Reference</td>
<td class="td-header" align="center" width="10%">Value date</td>
<td class="td-header" align="right" width="5%">Amount</td>
<td class="td-header" align="center" width="5%">Type</td>
<td class="td-header" align="left" width="40%">Description</td>
<td class="td-header" align="center" width="25%">details BISERA</td>
</tr>
<tr>
<td colspan="7">
<hr size="1" />
</td>
</tr>
<tr>
<td nowrap="nowrap" align="center">02.10.2020 16:13:22</td>
<td nowrap="nowrap" align="right">
<font color="blue">286B2P12027600HM</font>
</td>
<td nowrap="nowrap" align="center">02.10.2020</td>
<td nowrap="nowrap" align="right">317.63</td>
<td nowrap="nowrap" align="center">CT</td>
<td nowrap="nowrap" align="left">Получен междубанков превод<br /><br />PO F 303<br />.</td>
<td align="center">
<table>
<tr>
<td align="right" nowrap="nowrap">test iban</td>
</tr>
<tr>
<td align="right" nowrap="nowrap">test</td>
</tr>
</table>

Considerations
.getElement() is not a function. I consider this the root cause of your problems since you won't be able to call an nonexistent method.
Here is the XmlService's Document Class documentation please refer to it when adjusting the example in this answer to your solution.
Code snippet
This is how to get the first row's cells text: (table's headers)
var html = 'the html code you posted in the question';
var parsed = XmlService.parse(html);
Logger.log(
parsed.getRootElement()
.getChildren('tr')[0] // Gets the first row (headers)
.getChildren('td')
.map(cell => cell.getText())
); //[Process date, Reference, Value date, Amount, Type, Description, details BISERA]
Reference
XmlService Document
XmlService

Related

how to display data for a specific month in a table

is it possible to display the data on which column month of the table?
below is the sample dashboard to achieve
and this is my DB
currently this is my dashboard. I want to align the data base on the period in the DB.
what should be my tboby look like to achieve the desired dashboard
this is my index html
<div class="w3-container w3-padding-64 w3-theme-l5">
<div class="w3-padding-16"><span class="w3-xlarge w3-border-purple w3-bottombar">COUNT PER BUSINESS UNIT</span></div>
<div>
<table id="businessUnit-tbl" class="table table-bordered display nowrap">
<thead>
<tr style="/*color:white;*/ align-content:center">
<td>MONTH</td>
</tr>
<tr #*style="color:white"*#>
<td>Business Unit</td>
<td id="jan">January</td>
<td id="feb">February</td>
<td id="mar">March</td>
<td id="apr">April</td>
<td id="may">May</td>
<td id="june">June</td>
<td id="july">July</td>
<td id="aug">August</td>
<td id="sept">September</td>
<td id="oct">October</td>
<td id="nov">November</td>
<td id="dec">December</td>
<td id="total">Total</td>
</tr>
</thead>
<tbody></tbody>
</table>
</div>
According to my understanding of the topic, I suggest leaving tbody blank and trying to fill it with javascript
// append column to the HTML table
function appendColumn() {
var tbl = document.getElementById('my-table'), // table reference
i;
// open loop for each row and append cell
for (i = 0; i < tbl.rows.length; i++) {
createCell(tbl.rows[i].insertCell(tbl.rows[i].cells.length), i, 'col');
}
}

How to calculate difference of two column into third column at runtime

I have created a web application which fetch data from RTC tool.
So In that web application I have to calculate difference at runtime(for third column) basis on two column values.
First column value will be taken from RTC tool programmatically and for second column user will enter value in text boxes and for third column it will calculate difference automatically.
Let me know if we can calculate difference for this third column automatically and how?
Thanks
Hard to tell without rendered HTML. If your columns are in a table, you can do
const makeNum = str => isNaN(str) || str.trim() === ""? 0:+str;
document.getElementById("table").addEventListener("input", function(e) {
const tgt = e.target;
if (tgt.classList.contains("userInput")) { // <input class="userInput"
const parent = tgt.closest("tr");
const rtc = makeNum(parent.querySelector(".rtc").textContent); // <td class="rtc>value</td>
const val = makeNum(tgt.value);
parent.querySelector(".diff").textContent = rtc - val; // <td class="diff"></td>
}
})
<table>
<thead></thead>
<tbody id="table">
<tr>
<td class="rtc">1000</td>
<td><input class="userInput"></td>
<td class="diff"></td>
</tr>
<tr>
<td class="rtc">2000</td>
<td><input class="userInput"></td>
<td class="diff"></td>
</tr>
<tr>
<td class="rtc">3000</td>
<td><input class="userInput"></td>
<td class="diff"></td>
</tr>
<tr>
<td class="rtc">4000</td>
<td><input class="userInput"></td>
<td class="diff"></td>
</tr>
<tr>
<td class="rtc">5000</td>
<td><input class="userInput"></td>
<td class="diff"></td>
</tr>
</table>
If you are using jQuery in you project they you can also try this:
Check the working here
JS:
$('.user-input').on('input', function() {
var static_val = $(this).parent().prev().text();
var user_val = $(this).val();
var diff = parseInt(static_val) - parseInt(user_val);
if(!isNaN(diff)){
$(this).parent().next().text(diff);
}else{
$(this).parent().next().text('');
}
});

How to parse tables without id on HTML using HtmlAgilityPack

I have a problem getting the values of a table in HTML cause it doesn't have a ids. I need to get all the values on the second column and keep them into an array. I am using HtmlAgilityPack and my problems comes when selecting nodes:
Dim doc As HtmlDocument
Dim web As New HtmlWeb()
Dim str As String
doc = Web.Load("http://www.dietas.net/tablas-y-calculadoras/tabla-de-composicion-nutricional-de-los-alimentos/carnes-y-derivados/aves/pechuga-de-pollo.html#")
Dim nodes_filas As HtmlNode() = doc.DocumentNode.SelectNodes("//table[#id='']//tr").ToArray
Dim nodes_columnas As HtmlNode() = doc.DocumentNode.SelectNodes("//td").ToArray
For Each row As HtmlNode In nodes_filas
For Each column As HtmlNode In nodes_columnas
str = column.InnerHtml & vbCrLf
Next
Next
This is the table:
<table cellspacing="1" cellpadding="3" width="100%" border="0">
<tr>
<td colspan="2" style="font-size:13px;color:#55711C;padding-bottom:5px;">Aporte por ración</td>
</tr>
<tr style="background-color:#EBEBEB">
<td width="125">Energía [Kcal]</td>
<td class="td_right">145,00</td>
</tr>
<tr>
<td>Proteína [g]</td>
<td class="td_right">22,20</td>
</tr>
<tr style="background-color:#EBEBEB">
<td>Hidratos carbono [g]</td>
<td class="td_right">0,00</td>
</tr>
<tr>
<td>Fibra [g]</td>
<td class="td_right">0,00</td>
</tr>
<tr style="background-color:#EBEBEB">
<td>Grasa total [g]</td>
<td class="td_right">6,20</td>
</tr>
<tr>
<td>AGS [g]</td>
<td class="td_right">1,91</td>
</tr>
<tr style="background-color:#EBEBEB">
<td>AGM [g]</td>
<td class="td_right">1,92</td>
</tr>
<tr>
<td>AGP [g]</td>
<td class="td_right">1,52</td>
</tr>
<tr style="background-color:#EBEBEB">
<td>AGP /AGS</td>
<td class="td_right">0,79</td>
</tr>
<tr>
<td>(AGP + AGM) / AGS</td>
<td class="td_right"> 1,80</td>
</tr>
<tr style="background-color:#EBEBEB">
<td>Colesterol [mg]</td>
<td class="td_right">62,00</td>
</tr>
<tr>
<td>Alcohol [g]</td>
<td class="td_right">0,00</td>
</tr>
<tr style="background-color:#EBEBEB">
<td>Agua [g]</td>
<td class="td_right">71,60</td>
</tr>
</table>
Sorry I don't have VB installed but C# version should be enough to give you an idea. You have td_right class, you can use either lambda or xpath to query it.
I like lambda/linq version more because I am familiar with linq, and I don't need to remember XPATH syntax.
Lambda:
public static bool HasClass(this HtmlNode node, params string[] classValueArray)
{
var classValue = node.GetAttributeValue("class", "");
var classValues = classValue.Split(' ');
return classValueArray.All(c => classValues.Contains(c));
}
var url = "http://www.dietas.net/tablas-y-calculadoras/tabla-de-composicion-nutricional-de-los-alimentos/carnes-y-derivados/aves/pechuga-de-pollo.html#";
var htmlWeb = new HtmlWeb();
var htmlDoc = htmlWeb.Load(url);
var nodes = htmlDoc.DocumentNode.Descendants("td").Where(_ => _.HasClass("td_right")).Select(_ => _.InnerText);
XPATH:
var nodes2 = htmlDoc.DocumentNode.SelectNodes("//td[#class='td_right']");

Generate a pdf from html page by using jspdf in angularjs

I am trying to generate pdf from HTML table using jspdf.In this case the pdf is generated but the format is not suitable to original.
This is my code.
html code is
<div class="invoice" id="customers">
<table ng-repeat="aim in input" id="example">
<tr>
<th class="inv-left"><div align="left"><img src="./images/logo.png" alt=""></div></th>
<th class="inv-right"><div align="right"><br>
101 Convention Center<br>
dr #700, Las Vegas, <br>
NV - 89019
</div></th>
</tr>
<tr >
<th><div cg-busy="{promise:viewPromise}" align="left">
<b>Invoiced to</b><br>
{{aim.user.username}}<br>
{{aim.vendor.address}}
</div></th>
<th class="inv-right">
<div align="right"><b>INVOICE</b><br>
Invoice ID: {{aim.invoiceId}}<br>
Invoice Date: {{aim.invoiceDate.date| dateFormat | date:'MM-dd-yyyy'}}<br>
Due Date: {{aim.dueDate.date| dateFormat | date:'MM-dd-yyyy'}}
</div></th>
</tr>
<div class="invoice-content clearfix" cg-busy="{promise:viewPromise}" >
<tr>
<td class="inv-thours">Total Hours</td>
<td align="center">{{aim.totalHours}}</td>
</tr>
<tr>
<td class="inv-rate">Rate</td>
<td align="center">{{aim.billRate}}</td>
</tr>
<tr>
<td class="inv-rate">Amount</td>
<td align="center">{{(aim.totalHours) * (aim.billRate)}}</td>
</tr>
<tr>
<td class="inv-thours">totalExpenses</td>
<td align="center">{{aim.totalExpenses}}</td>
</tr>
<tr>
<td class="inv-thours">Total Amount</td>
<td align="center">{{aim.amount}}</td>
</tr>
<tr>
<td>
</td>
<td ng-if="aim.status === 'UNCONFIRMED'">
<div align="right" style="margin-right:10px;"><input type="submit" value="Confirm" data-ng-click="confirmStatus(aim)"> |
<button onclick="goBack()">Cancel</button></div>
</td>
<td ng-if="aim.status === 'CONFIRMED'">
<div align="right" style="margin-right:10px;">
<button onclick="goBack()">BACK</button></div>
</td>
<td ng-if="!(aim.status === 'UNCONFIRMED') && !(aim.status === 'CONFIRMED')">
<button onclick="javascript:demoFromHTML();">PDF</button>
</td>
</tr>
</table>
<script type="text/javascript" src="http://mrrio.github.io/jsPDF/dist/jspdf.debug.js"></script>
<script>
function demoFromHTML() {
var pdf = new jsPDF('p', 'pt', 'letter');
var imgData = '.............';
pdf.setFontSize(40);
pdf.addImage(imgData, 'PNG', 12, 30, 130, 40);
pdf.cellInitialize();
pdf.setFontSize(10);
$.each($('#customers tr'), function (i, row) {
$.each($(row).find("th"), function (j, cell) {
var txt = $(cell).text();
var width = (j == 4) ? 300 : 300; //make with column smaller
pdf.cell(10, 30, width, 70, txt, i);
});
$.each($(row).find("td"), function (j, cell) {
var txt = $(cell).text().trim() || " ";
var width = (j == 4) ? 200 : 300; //make with column smaller
pdf.cell(10, 50, width, 30, txt, i);
});
});
pdf.save('sample-file.pdf');
}
I whant to generate pdf to this formate
http://i.stack.imgur.com/nrR7l.png
but generate pdf formate is
http://i.stack.imgur.com/DGSxE.png
please help me to this problem.
Thank you.
I think CSS is missing in your generated PDF, and found this,
github issue link
diegocr commented on 25 Sep 2014
I'm afraid the fromHTML plugin is kinda limited when it comes to support css styles. Also, we have an addSVG plugin to deal with SVG elements, but the fromHTML does not uses it. So, no, the issue isn't Angular, you may could use the new addHTML (#270) but i dunno if that will deal with SVG. (html2canvas, that is)

HTML Sum of Column Needed

I have the following html code for a table I am generating information from but I also need it to calculate the sum of the "TranAmt" column. Can someone help me with this?
<p>Hi Marly,</p>
<p>The following customer invoices were posted today:</p>
<table style="width: 1300px;" border="1" cellspacing="1.5" cellpadding="1.5" align="left">
<thead>
<tr style="background-color: #81BEF7;" align="center" valign="middle">
<td style="text-align: center;"><strong>Customer ID</strong></td>
<td style="text-align: center;"><strong>Customer Name</strong></td>
<td style="text-align: center;"><strong>Customer PO Number</strong></td>
<td style="text-align: center;"><strong>Invoice Number</strong></td>
<td style="text-align: center;"><strong>Invoice Date</strong></td>
<td style="text-align: center;"><strong>Post Date</strong></td>
<td style="text-align: center;"><strong>Invoice Amount</strong></td>
<td style="text-align: center;"><strong>Invoice Sales Tax Amount</strong></td>
<td style="text-align: center;"><strong>Create User</strong></td>
</tr>
</thead>
{BEGIN*REPEAT}
<tbody>
<tr>
<td>{CustID}</td>
<td>{CustName}</td>
<td>{CustPONo}</td>
<td>{TranID}</td>
<td>{TranDate}</td>
<td>{PostDate}</td>
<td>{TranAmt}</td>
<td>{STaxAmt}</td>
<td>{CreateUserID}</td>
</tr>
{END*REPEAT}
</tbody>
</table>
If you are using JQuery you can use the n-th child selector to add all the items of a column within a table.
http://api.jquery.com/nth-child-selector/
Here's an example
var rows = $("#table_id tr:gt(0)");
rows.children("td:nth-child(7)").each(function() {
the_sum += parseInt($(this).text());
});
This function will help you for getting the total of any column whose column header name you supplied to it. But I want to suggest one thing instead of tag names thead and tbody plz use some id, as it will create problem if you have multiple tables in the same page.
function someFunction() {
var invoiceTotalAmount = getTotal(("invoice amount").toUpperCase());
}
function getTotal(myString) {
var CellListing = $("thead > tr > td");
var selIndex=-1;
var sum = 0;
CellListing.each(function(index){
if($(this).children("strong").html().toUpperCase()===myString)
selndex= index+1;
});
var rows = $("tbody >tr");
rows.children("td:nth-child("+selIndex+")").each(function() {
sum += parseInt($(this).text());
});
return sum;
}
running example html sum of column needed
I don't know how to define variable in {FinalTranAmt} so i'm explain you
Tack a variable with initially 0 as value
For every repeat add {TranAmt} in {FinalTranAmt} & finally print it.
I don't know my structure is right or not.
{FinalTranAmt} = 0;
{BEGIN*REPEAT}
<tbody>
<tr>
<td>{CustID}</td>
<td>{CustName}</td>
<td>{CustPONo}</td>
<td>{TranID}</td>
<td>{TranDate}</td>
<td>{PostDate}</td>
<td>{TranAmt}</td>
<td>{STaxAmt}</td>
<td>{CreateUserID}</td>
</tr>
{FinalTranAmt} += {TranAmt} ;
{END*REPEAT}