Google App Script parse table from messed html - html

I want to create a script which download html, parse a table and save it to a SpreadSheet. I am stuck on downloading and parsing.
Xpath to table is:
/html/body/table/tbody/tr[5]/td/table/tbody/tr/td[2]/table
Currently I am stuck at parsing Xpath.
function fetchIt() {
var fetchString="http://www.zbranebrymova.com/index.php?s_lev=22&type=nabku*signa"
var response = UrlFetchApp.fetch(fetchString);
var xmlDoc = Xml.parse(response.getBlob().getDataAsString(),true);
var b = xmlDoc.getElement().getElement("body").getElement("table") ;
Logger.log(b);
}

I don't know if this will be helpful, here is a snippet of my table parsing code:
html file FOO.HTM:
<html>
<head> </head>
<body style="margin-left:10px">
<table title="">
<tbody>
<tr>
<th align="center" abbr="Sunday">Sun</th>
<th align="center" abbr="Monday">Mon</th>
</tr>
<tr>
<td align="left"><a title="January 01">1</a>
<div>Joe,Doe</div>
<div>Murphy,Jack</div>
</td>
<td align="left"><a title="January 02">2</a>
<div>Carlson,Carl</div>
<div>Guy,Girl</div>
<div>Lenin,Vladimir</div>
</td>
</tr>
</tbody>
</table>
</body>
<html>
and this is how I parse it:
function foo() {
var page = UrlFetchApp.fetch('foo.htm');
var rows = Xml.parse(page,true).getElement()
.getElement("html")
.getElement("body")
.getElement("table")
.getElement("tbody")
.getElements("tr");
for (var ii = 0; ii < rows.length; ii++) {
var cols = rows[ii].getElements("td");
for (var jj = 0; jj < cols.length; jj++) {
var divs = cols[jj].getElements("div");
for (var kk = 0; kk < divs.length; kk++) {
var div = divs[kk];
}
}
}
}
cheers, sean

Related

Download html table when clicked on button

I am trying to download the contents in the html table. Can anyone help me download its contents when clicked on button. I tried with following code but not working.
function exportTableToCSV(filename) {
alert("I am an alert box!");
var csv = [];
var rows = document.querySelectorAll("table tr");
for (var i = 0; i < rows.length; i++) {
var row = [],
cols = rows[i].querySelectorAll("td, th");
for (var j = 0; j < cols.length; j++)
row.push(cols[j].innerText);
csv.push(row.join(","));
}
// Download CSV file
alert('Hey')
downloadCSV(csv.join("\n"), filename);
}
function downloadCSV(csv, filename) {
var csvFile;
var downloadLink;
// CSV file
csvFile = new Blob([csv], {
type: "text/csv"
});
// Download link
downloadLink = document.createElement("a");
// File name
downloadLink.download = filename;
// Create a link to the file
downloadLink.href = window.URL.createObjectURL(csvFile);
// Hide download link
downloadLink.style.display = "none";
// Add the link to DOM
document.body.appendChild(downloadLink);
// Click download link
downloadLink.click();
}
<table border=1><button onclick="exportTableToCSV(\'members.csv\');">Export HTML Table To CSV File</button>
<tr>
<th></th>
<th> sub_domain</th>
<th> nReviews</th>
<th> Ratings_e</th>
<th> Text_e</th>
<th> rn</th>
</tr>
<tr>
<td align=\ "right\"> 1</td>
<td> a1</td>
<td align=\ "right\"> 2</td>
<td> 1, 2</td>
<td> asd, dfdsf</td>
<td align="left"> 1</td>
</tr>
</table>
The filename given format is the issue.
You can replace the button code as follows.
<button onclick="exportTableToCSV('members.csv');">Export HTML Table To CSV File</button>
function exportTableToCSV(filename) {
alert("I am an alert box!");
var csv = [];
var rows = document.querySelectorAll("table tr");
for (var i = 0; i < rows.length; i++) {
var row = [],
cols = rows[i].querySelectorAll("td, th");
for (var j = 0; j < cols.length; j++)
row.push(cols[j].innerText);
csv.push(row.join(","));
}
// Download CSV file
alert('Hey')
downloadCSV(csv.join("\n"), filename);
}
function downloadCSV(csv, filename) {
var csvFile;
var downloadLink;
// CSV file
csvFile = new Blob([csv], {
type: "text/csv"
});
// Download link
downloadLink = document.createElement("a");
// File name
downloadLink.download = filename;
// Create a link to the file
downloadLink.href = window.URL.createObjectURL(csvFile);
// Hide download link
downloadLink.style.display = "none";
// Add the link to DOM
document.body.appendChild(downloadLink);
// Click download link
downloadLink.click();
}
<body>
<table border=1>
<button onclick="exportTableToCSV('members.csv');">Export HTML Table To CSV File</button>
<tr>
<th></th>
<th> sub_domain</th>
<th> nReviews</th>
<th> Ratings_e</th>
<th> Text_e</th>
<th> rn</th>
</tr>
<tr>
<td align=\ "right\"> 1</td>
<td> a1</td>
<td align=\ "right\"> 2</td>
<td> 1, 2</td>
<td> asd, dfdsf</td>
<td align="left"> 1</td>
</tr>
</table>
</body>

How to convert HTML table into jQuery key-value pair array?

I have some HTML table (without tbody tag) where I want to convert the HTML data into key-value pair(header-data) in jQuery array.
When you run code snippet, the output I get is only for last row.
************Actual Output************
[
"Company:Swiggy",
"Contact:Gotya",
"Country:Japan"
]
************Needed Output************
[
"Company:Creamy Cola",
"Contact:Atharva",
"Country:Switzerland"
],
[
"Company:Tuty Fruity",
"Contact:Suyog",
"Country:UK"
],
[
"Company:Milky Way",
"Contact:Santosh",
"Country:US"
],
[
"Company:Swiggy",
"Contact:Gotya",
"Country:Japan"
]
I checked below references too, but couldn't find logic.
Convert HTML Rows into Name Value Pair in JQuery
get values from table as key value pairs with jquery
Thanks in advance for your help.This is what I have tried so far, please run snippet to get answer in console.
/******JQUERY SCRIPT******/
/*Get Header*/
var xKey = [];
$("#xxx th").each(function() {
var tempColName = $(this).text(); {
xKey.push(tempColName);
}
});
//console.log("Key");
//console.log(xKey);
/*Get Data*/
var xValue = [];
$("#xxx tr").each(function() {
var arrayOfThisRow = [];
var tableData = $(this).find('td');
if (tableData.length > 0) {
tableData.each(function() {
arrayOfThisRow.push($(this).text());
});
xValue.push(arrayOfThisRow);
}
});
//console.log("Value");
//console.log(xValue);
//My logic below
var xFinalArr = [];
for (var i = 0; i < xKey.length; i++) {
var su = "";
for (var j = 0; j < xValue.length; j++) {
su = xKey[i] + ":" + xValue[j][i];
}
xFinalArr.push(su);
}
console.log("Output");
console.log(xFinalArr);
/******CSS STYLE******/
table {
font-family: arial, sans-serif;
border-collapse: collapse;
width: 100%;
}
td,
th {
border: 1px solid #dddddd;
text-align: left;
padding: 8px;
}
tr:nth-child(even) {
background-color: #dddddd;
}
<!--HTML-->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<!DOCTYPE html>
<html>
<body>
<table id="xxx">
<tr>
<th>Company</th>
<th>Contact</th>
<th>Country</th>
</tr>
<tr>
<td>Creamy Cola</td>
<td>Atharva</td>
<td>Switzerland</td>
</tr>
<tr>
<td>Tuty Fruity</td>
<td>Suyog</td>
<td>UK</td>
</tr>
<tr>
<td>Milky Way</td>
<td>Santosh</td>
<td>US</td>
</tr>
<tr>
<td>Swiggy</td>
<td>Gotya</td>
<td>Japan</td>
</tr>
</table>
</body>
</html>
It looks like what you are asking for is a 2-D array of results, though you are only currently storing results in a single-dimensional array. Perhaps you should try something like this:
var xFinalArr = [];
for (var i = 0; i < xValue.length; i++) {
var xRowArr = [];
for (var j = 0; j < xKey.length; j++) {
xRowArr.push(xKey[j] + ":" + xValue[i][j]);
}
xFinalArr.push(xRowArr);
}
I suspect, however, that what you actually want is an array of objects, which is only slightly different code:
var xFinalArr = [];
for (var i = 0; i < xValue.length; i++) {
var xRowObj = {};
for (var j = 0; j < xKey.length; j++) {
xRowObj[xKey[j]] = xValue[i][j];
}
xFinalArr.push(xRowObj);
}
This is, of course, predicated on the assumption that no cells span multiple rows or columns, that each row has an identical number of columns, and that the header values translate into reasonable property names (though technically the code will still work if they do not).

How to export multiple HTML tables into a single excel file, along with the html table's css/style?

I know there are a lot of this type of question in this site, but i cant find any working solution for this that suit my need. What i need is
1. export multiple html table to excel
2. the table in the excel look exactly like the html table (the css, styling are the same. e.g background-color)
Is there any way to do this using javascript,etc.?
I found one question for this problem here, but the solution shown there did not solve the css problem. Means that the excel table not have the css like the html table (e.g The excel table doesnt have yellow background eventhough the html table have it)
So basically the main problem is I need the format for the excel file to be same as the html table (css,etc.). Sorry I am new to programming..
Here you can access the jsfiddle. Or if the link doesnt work, you can just try it below (The codes are taken from the original post , but with an addition of a simple yellow-colored background css in it's tr)Thanks in advance.
var tablesToExcel = (function() {
var uri = 'data:application/vnd.ms-excel;base64,'
, tmplWorkbookXML = '<?xml version="1.0"?><?mso-application progid="Excel.Sheet"?><Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet" xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet">'
+ '<DocumentProperties xmlns="urn:schemas-microsoft-com:office:office"><Author>Axel Richter</Author><Created>{created}</Created></DocumentProperties>'
+ '<Styles>'
+ '<Style ss:ID="Currency"><NumberFormat ss:Format="Currency"></NumberFormat></Style>'
+ '<Style ss:ID="Date"><NumberFormat ss:Format="Medium Date"></NumberFormat></Style>'
+ '</Styles>'
+ '{worksheets}</Workbook>'
, tmplWorksheetXML = '<Worksheet ss:Name="{nameWS}"><Table>{rows}</Table></Worksheet>'
, tmplCellXML = '<Cell{attributeStyleID}{attributeFormula}><Data ss:Type="{nameType}">{data}</Data></Cell>'
, base64 = function(s) { return window.btoa(unescape(encodeURIComponent(s))) }
, format = function(s, c) { return s.replace(/{(\w+)}/g, function(m, p) { return c[p]; }) }
return function(tables, wsnames, wbname, appname) {
var ctx = "";
var workbookXML = "";
var worksheetsXML = "";
var rowsXML = "";
for (var i = 0; i < tables.length; i++) {
if (!tables[i].nodeType) tables[i] = document.getElementById(tables[i]);
for (var j = 0; j < tables[i].rows.length; j++) {
rowsXML += '<Row>'
for (var k = 0; k < tables[i].rows[j].cells.length; k++) {
var dataType = tables[i].rows[j].cells[k].getAttribute("data-type");
var dataStyle = tables[i].rows[j].cells[k].getAttribute("data-style");
var dataValue = tables[i].rows[j].cells[k].getAttribute("data-value");
dataValue = (dataValue)?dataValue:tables[i].rows[j].cells[k].innerHTML;
var dataFormula = tables[i].rows[j].cells[k].getAttribute("data-formula");
dataFormula = (dataFormula)?dataFormula:(appname=='Calc' && dataType=='DateTime')?dataValue:null;
ctx = { attributeStyleID: (dataStyle=='Currency' || dataStyle=='Date')?' ss:StyleID="'+dataStyle+'"':''
, nameType: (dataType=='Number' || dataType=='DateTime' || dataType=='Boolean' || dataType=='Error')?dataType:'String'
, data: (dataFormula)?'':dataValue
, attributeFormula: (dataFormula)?' ss:Formula="'+dataFormula+'"':''
};
rowsXML += format(tmplCellXML, ctx);
}
rowsXML += '</Row>'
}
ctx = {rows: rowsXML, nameWS: wsnames[i] || 'Sheet' + i};
worksheetsXML += format(tmplWorksheetXML, ctx);
rowsXML = "";
}
ctx = {created: (new Date()).getTime(), worksheets: worksheetsXML};
workbookXML = format(tmplWorkbookXML, ctx);
console.log(workbookXML);
var link = document.createElement("A");
link.href = uri + base64(workbookXML);
link.download = wbname || 'Workbook.xls';
link.target = '_blank';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
})();
<table id="tbl1" class="table2excel">
<tr style="background-color:yellow;">
<td>Product</td>
<td>Price</td>
<td>Available</td>
<td>Count</td>
</tr>
<tr>
<td>Bred</td>
<td>1
</td>
<td>2
</td>
<td>3
</td>
</tr>
<tr>
<td>Butter</td>
<td>4
</td>
<td>5
</td>
<td >6
</td>
</tr>
</table>
<hr>
<table id="tbl2" class="table2excel">
<tr>
<td>Product</td>
<td>Price</td>
<td>Available</td>
<td>Count</td>
</tr>
<tr>
<td>Bred</td>
<td>7
</td>
<td>8
</td>
<td>9
</td>
</tr>
<tr>
<td>Butter</td>
<td>14
</td>
<td>15
</td>
<td >16
</td>
</tr>
</table>
<button onclick="tablesToExcel(['tbl1','tbl2'], ['ProductDay1','ProductDay2'], 'TestBook.xls', 'Excel')">Export to Excel</button>
Found the solution for this. I use the DataTables. Read more here DataTables forum

How to get the value of the table using HtmlAgilityPack

i Need to get the content of the table. Below is my table
<tr class="oddRow" id="activeFiles25472825Row">
<td>Author Photo</td>
<td></td>
<td>
<div id="active_25472825_CALLOUT" class="fileTruncateCallout" style="position:absolute; display:none">JCS.jpg</div>
</td>
<td>01/28/2017</td>
<td>30.7 KB</td>
<td>Member</td>
<td>
download
</td>
</tr>
Here is my code. But i don't know what's next. I tried getting the value i need but i can't make it work. it didn't give me output, or error
HtmlAgilityPack.HtmlDocument newdoc = new HtmlAgilityPack.HtmlDocument();
newdoc.LoadHtml(htmlString);
GeckoElementCollection links = wb.Document.GetElementsByTagName("td");
foreach (var itm in links)
{
}
And the output
is
Name: JCS.jpg
Link: https://google.com.download
You can try something like this, I didn't test it but hopefully you understand the idea behind
foreach (var td in newDoc.DocumentNode.Descendants("td"))
{
var div = td.ChildNodes.FirstOrDefault(c => c.Name.Equals("div") && c.GetAttributeValue("class", "").Equals("fileTruncateCallout"));
if (div != null)
{
var name = div.InnerText;
}
else
{
var aNode = td.ChildNodes.FirstOrDefault(c => c.Name.Equals("a"));
if(aNode != null)
{
var href = aNode.GetAttributeValue("href", "");
}
}
}

How to get the column index of particular value of td in HTML tables

Hi there,
I created an HTML table and what I need is to find the column index of a particular value,
but I don't know how to access each value of a table. That is why I am unable to get the index of the column.
Here is my table.
<table>
<tr>
<td>01</td>
<td>02</td>
<td>03</td>
</tr>
<tr>
<td>04</td>
<td>05</td>
<td>06</td>
</tr>
<tr>
<td>07</td>
<td>08</td>
<td>09</td>
</tr>
</table>
and I want the index of the value 03.
The following is a working code, first traverse the table to get the values in this, then compare with your required value and find the index of it..,
<html>
<head>
<script>
function cellvalues() {
var table = document.getElementById('mytable');
for (var r = 0, n = table.rows.length; r < n; r++) {
for (var c = 0, m = table.rows[r].cells.length; c < m; c++) {
var hi=table.rows[r].cells[c].innerHTML;
if(hi==03)
{
alert(table.rows[r].cells[c].cellIndex);
}
}
}
}
</script>
</head>
<body>
<table id="mytable">
<tr><td>01</td><td>02</td><td>03</td>
</tr>
<tr><td>04</td><td>05</td><td>06</td>
</tr>
<tr><td>07</td><td>08</td><td>09</td>
</tr>
</table>
<input type="button" onclick=cellvalues()>
</body>
</html>
You can try this (untested)
var tab = document.getElementsByTagName("table")[0];
var tr = tab.childNodes;
for (var i = 0; i < tr.length; i++) {
var td = tr[i].childNodes;
for (var j = 0; j < tr.length; j++) {
if (td[j].innerHTML == "03") {
alert(i + "," + j);
}
}
}
I suggest using jQuery index for this:
var trCollection = $('table tr');
$('table td').each(function () {
console.log(trCollection.index($(this).parent()));
});