I am trying to decode this HTML page using Node.js with Request module: http://www.receita.fazenda.gov.br/PessoaJuridica/CNPJ/cnpjreva/Cnpjreva_Erro.asp
javascript console returns the charset windows-1252:
document.characterSet = "windows-1252";
I tried using all avaliable encodings in iconv-lite but all return the wrong text.
var body = iconv.decode(new Buffer(body), "windows1252");
Anyone have any idea how to decode this page?
Sample code:
request('http://www.receita.fazenda.gov.br/PessoaJuridica/CNPJ/cnpjreva/Cnpjreva_Erro.asp', function (err, res, body) {
var body = iconv.decode(new Buffer(body), "windows1252");
console.log(body);
});
Returns:
...
<td valign="middle" align="left"><b><font face="Arial" size="2">
Acesso n�o permitido.
</td>
...
Decoded string should be:
...
<td valign="middle" align="left"><b><font face="Arial" size="2">
Acesso não permitido.
</td>
...
Thanks.
This code
var request = require('request');
request('http://www.receita.fazenda.gov.br/PessoaJuridica/CNPJ/cnpjreva/Cnpjreva_Erro.asp', function (err, res, body) {
console.log(body);
});
Outputs the page
<script language="JavaScript">
function proxima(link)
{
location.replace(link);
return false;
}
function carrega(vobjeto) {
for (var va = 0 ; va < document.forms[0].elements.length; va++) {
if (document.forms[0].elements[va].name == vobjeto) {
document.forms[0].elements[va].focus();
}
}
return false;
}
function volta(vvolta) {
history.go(vvolta*-1);
return false;
}
function SaltaCampo (campo, prox, tammax, teclapres)
{
var tecla = teclapres.keyCode;
vr = campo.value;
tam = vr.length;
if (tecla != 0 && tecla != 10 && tecla != 24)
if (tam == tammax)
prox.focus();
}
</script>
<html>
<head>
<title>Tela de respostas</title>
</head>
<body background="area_texto_back.jpg">
<table border="0" width="100%">
<tr>
<td valign="middle" align="left">
<table border="0" cellspacing="0" cellpadding="0">
<!-- Inibido tendo em vista novo modelo site da SRF (Luis Carlos-22/11/2003)
<tr>
<td>
<img src="srf.gif" height="48" alt="srf.gif (2074 bytes)" width="184">
</td>
</tr> -->
<tr>
<td>
<font color="#000080" face="Arial">
<b>Acesso indevido</b></font>
</td>
</tr>
</table>
</td>
</tr>
<tr>
<td valign="middle" align="left"><hr size="1">
</td>
</tr>
</table>
<table border="0" width="100%">
<tr>
<td>
</td>
</tr>
<tr>
<td valign="middle" align="left"><b><font face="Arial" size="2">
Contribuinte,
</td>
<td valign="middle" align="right" >
</td>
</tr>
<tr>
</table>
<table border="0" width="100%">
<tr>
<td>
</td>
</tr>
<tr>
<td valign="middle" align="left"><b><font face="Arial" size="2">
Acesso n�o permitido.
</td>
</tr>
<tr>
<td>
</td>
</tr>
<tr>
<td valign="middle" align="left" colspan="2"><hr size="1">
</td>
</tr>
</table>
</body>
</html>
The encoding the page returns using document.characterSet is wrong, the correct encoding is ISO-8859-1
body = iconv.decode(body, "ISO-8859-1");
Related
I am working with an old Japanese Website Project. I download the old files from the server and it's working fine in offline mode with browser.
But when I open it with editor the text is broken. Then I try to fix this problem by changing the encoding to most of them. like below.
utf-8
utf-8 with bom
Shift JIS
etc
Then I try it with several modern editors and by changing meta charset. Also I tried to find a solution on google but I got nothing positive.
Only HTML Example
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS">
<meta name="keywords" content="�ソスp�ソス�ソス,�ソスe�ソス[�ソスv�ソスN�ソス�ソス�ソス�ソス,�ソス�ソス�ソス�ソス\,�ソスp�ソス�ソスe�ソス[�ソスv�ソスN�ソス�ソス�ソス�ソス,,�ソスp�ソス鼬暦ソスフ外�ソス�ソス�ソスl�ソスl�ソズ派�ソス�ソス">
<meta name="description" content="�ソスp�ソス�ソスフ翻�ソス�ソスE�ソスハ厄ソスヘビ�ソス[�ソスR�ソスX�ソスI�ソスp�ソス�ソスl�ソスC�ソスe�ソスB�ソスu�ソスノゑソス�ソスm�ソス�ソス�ソスネテ�ソス[�ソスv�ソスN�ソス�ソス�ソス�ソス�ソスB�ソスr�ソスW�ソスl�ソスX�ソスA�ソス�ソス�ソスI�ソスE�ソスZ�ソスp�ソスI�ソスネ難ソスe�ソスフテ�ソス[�ソスv�ソスN�ソス�ソス�ソス�ソス�ソスワで包ソス�ソスL�ソス�ソス�ソス�ソス�ソス�ソスナのテ�ソス[�ソスv�ソスN�ソス�ソス�ソス�ソス�ソス�ソスs�ソス�ソス�ソストゑソス�ソス�ソスワゑソス�ソスB">
<meta name="robots" content="index,follow">
<title>�ソスp�ソス�ソス �ソスe�ソス[�ソスv�ソスN�ソス�ソス�ソス�ソス �ソス�ソス�ソス�ソス\</title>
<script language="JavaScript" type="text/JavaScript">
function MM_preloadImages() { //v3.0 var d=document; if(d.images){ if(!d.MM_p) d.MM_p=new Array(); var i,j=d.MM_p.length,a=MM_preloadImages.arguments;
for(i=0; i
<a.length; i++) if (a[i].indexOf( "#")!=0){ d.MM_p[j]=new Image; d.MM_p[j++].src=a[i];}} } function MM_swapImgRestore() {
//v3.0 var i,x,a=document.MM_sr; for(i=0;a&&i<a.length&&(x=a[i])&&x.oSrc;i++) x.src=x.oSrc; } function MM_findObj(n,
d) { //v4.01 var p,i,x; if(!d) d=document; if((p=n.indexOf( "?"))>0&&parent.frames.length) { d=parent.frames[n.substring(p+1)].document; n=n.substring(0,p);} if(!(x=d[n])&&d.all) x=d.all[n];
for (i=0;!x&&i
<d.forms.length;i++) x=d.forms[i][n]; for(i=0;!x&&d.layers&&i<d.layers.length;i++) x=MM_findObj(n,d.layers[i].document);
if(!x && d.getElementById) x=d.getElementById(n); return x; } function MM_swapImage() { //v3.0 var i,j=0,x,a=MM_swapImage.arguments;
document.MM_sr=new Array; for(i=0;i<(a.length-2);i+=3) if ((x=MM_findObj(a[i]))!=null){document.MM_sr[j++]=x; if(!x.oSrc)
x.oSrc=x.src; x.src=a[i+2];} } function MM_jumpMenu(targ,selObj,restore){ //v3.0 eval(targ+ ".location='"+selObj.options[selObj.selectedIndex].value+
"'"); if (restore) selObj.selectedIndex=0; } </script>
<link href="../../../styleseet.css" rel="stylesheet" type="text/css">
</head>
<body class="bodycss" id="02">
<table class="top" cellpadding="0" cellspacing="0">
<tr>
<td align="center">
<table class="top_text" cellpadding="0" cellspacing="0">
<tr>
<td>
�ソス#�ソスp�ソス�ソス|�ソス�ソス �ソスp�ソス�ソスハ厄ソス �ソスp�ソス�ソスフネ�ソスC�ソスe�ソスB�ソスu�ソスX�ソス^�ソスb�ソスt�ソス|�ソス�ソスメにゑソス�ソスT�ソス|�ソス[�ソスg
</td>
</tr>
</table>
</td>
</tr>
</table>
<table width="790" border="0" align="center" cellpadding="0" cellspacing="0">
<tr>
<td bgcolor="#FFFFFF">
<table class="sub_bg" cellpadding="0" cellspacing="0">
<tr>
<td rowspan="2">
<a href="/">
<img src="../../img/spacer.gif" width="197" height="47" alt="�ソスr�ソス[�ソスR�ソスX" border="0">
</a>
</td>
<td class="top_Navi">
<ul>
<li class="gnv01">
�ソス�ソス�ソス{�ソス�ソス
</li>
<li class="gnv02">
english
</li>
<li class="gnv03">
�ソス�ソス�ソス�ソス�ソス�ソス
</li>
<li class="gnv04">
�ソスリ搾ソス�ソス�ソス
</li>
</ul>
</td>
</tr>
<tr>
<td height="35" align="right">
<table class="ken" cellspacing="0" cellpadding="0">
<tr>
<td class="ken_bg">�ソスT�ソスC�ソスg�ソス�ソス�ソス�ソス</td>
<td>
<input type=text name=q maxlength=255 class="ken_input">
</td>
<td width="50">
<input type=submit name=btnG value="�ソス�ソス�ソス�ソス" class="ken_btn">
</td>
</tr>
</table>
</td>
</tr>
</table>
<table class="sub_bg02" cellspacing="0" cellpadding="0" border="0">
<tr>
<td class="sub_tdregi">
<table class="sub_regi" cellspacing="0" cellpadding="0">
<tr>
<td class="title">�ソスo�ソス^�ソス�ソス�ソス�ソス</td>
<td class="con">
<script type="text/javascript" src="../../js/country.js"></script>
�ソス�ソス�ソス�ソス</td>
</tr>
<tr>
<td class="title">�ソスo�ソス^�ソスl�ソス�ソス</td>
<td class="con">
<script type="text/javascript" src="../../js/people.js"></script>
�ソスl</td>
</tr>
<tr>
<td class="title">�ソスo�ソス^�ソス�ソス�ソス齔�</td>
<td class="con">
<script type="text/javascript" src="../../js/language.js"></script>
�ソス�ソス�ソス�ソス</td>
</tr>
</table>
</td>
<td class="text999">�ソスr�ソス[�ソスR�ソスX�ソスナは外�ソス�ソス�ソスl�ソスl�ソスb�ソスg�ソス�ソス�ソス[�ソスN�ソス�ソス�ソス�ソス�ソス�ソスA�ソス�ソス�ソスネゑソス�ソスニ撰ソス�ソスE�ソス�ソスツなゑソス�ソスワゑソス�ソスB</td>
</tr>
</table>
<div class="down_Navi">
<ul>
<li class="gn01">
�ソスz�ソス[�ソス�ソス
</li>
<li class="gn02">
�ソス�ソス闊オ�ソス�ソス�ソス�ソス�ソス�ソス
</li>
<li class="gn03">
�ソスO�ソス�ソス�ソスl�ソスh�ソス�ソス�ソスT�ソス[�ソスr�ソスX
</li>
<li class="gn04">
�ソスC�ソスO�ソスr�ソスW�ソスl�ソスX�ソスT�ソス|�ソス[�ソスg
</li>
<li class="gn05">
�ソス�ソス�ソスロ交流奇ソス�ソス
</li>
<li class="gn06">
�ソスX�ソス^�ソスb�ソスt�ソスo�ソス^
</li>
<li class="gn07">
�ソス�ソス�ソス�ソス齬暦ソス\
</li>
<li class="gn08">
�ソス�ソス�ソスマゑソスヒ暦ソス
</li>
</ul>
</div>
<table width="100%" border="0" cellspacing="0" cellpadding="0" style="border:1px #000 solid;">
<tr>
<td width="140" valign="top" bgcolor="#42418c">
<!--#include Virtual="../left-->
</td>
<td width="641" valign="top">
<br>
<table width="630" border="0" cellpadding="0" cellspacing="0" class="f12">
<tr>
<td>
TOP�ソス�ソス
�ソス�ソス闊オ�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス
�ソスp�ソス�ソス�ソス�ソス
�ソスp�ソス�ソスe�ソス[�ソスv�ソスN�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス\
</td>
</tr>
<tr>
<td> </td>
</tr>
<tr align="center ">
<td>
<br>
�ソスp�ソス�ソスe�ソス[�ソスv�ソスN�ソス�ソス�ソス�ソス�ソスs�ソスn�ソスo |
�ソスi�ソス�ソス�ソスロ擾ソス
|
<a href="price>�ソス�ソス�ソス�ソス\</a> |
<a href=" estimation>�ソス�ソス�ソスマゑソス</a>
|
�ソスp�ソス�ソスT�ソス[�ソスr�ソスX�ソス齬�
</td>
</tr>
<tr>
<td align="center ">
<br>
<br>
<font color="#000000 " size="3 ">�ソス�ソス�ソスp�ソス�ソスe�ソス[�ソスv�ソスN�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス\
<br>
</font>
</td>
</tr>
<tr>
<td align="center ">
<table width="95% " border="0 " cellspacing="5 " cellpadding="0 " bgcolor="#CCCCCC
" height="200 ">
<tr>
<td bgcolor="#FFFFFF " valign="top ">
<div align="left ">
<table border="0 " width="100% " cellspacing="3 ">
<tr>
<td width="49% ">
<div align="center ">
<center>
<table border="1 " width="300 " cellspacing="0 " bordercolor="#000080
" bordercolordark="#000080 " bordercolorlight="#000080 ">
<tr>
<td width="293 " align="center ">
<p align="center ">�ソスp�ソス�ソスe�ソス[�ソスv�ソスN�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス\</p>
</td>
</tr>
<tr>
<td align="center ">�ソス�ソス600/1�ソス�ソス</td>
</tr>
</table>
</center>
</div>
</td>
</tr>
<tr>
<td> </td>
</tr>
<tr>
<td>
<table width="300 " border="1 " align="center " cellspacing="0 " bordercolor="#000080
" bordercolorlight="#000080 " bordercolordark="#000080 ">
<tr>
<td width="294 " align="center ">
<p align="center ">�ソスp�ソス�ソスe�ソス[�ソスv�ソスN�ソス�ソス�ソス�ソス+�ソス�ソス�ソス{�ソス�ソスソ具ソス\</p>
</td>
</tr>
<tr>
<td align="center ">�ソス�ソス2,500/1�ソス�ソス</td>
</tr>
</table>
</td>
</tr>
<tr>
<td>
<br> �ソス�ソス�ソスl�ソスF
<br> �ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソスヤゑソスm�ソスF�ソス�ソスフ鯉ソス�ソスマゑソス�ソスニなゑソスワゑソス�ソスB
<br> �ソス�ソス�ソス[�ソス�ソス�ソスヘ通擾ソスA�ソスc�ソスニ難ソス3�ソス�ソス�ソスネ擾ソスニなゑソスワゑソス�ソスB
<br> �ソス�ソス�ソスX�ソス�ソス�ソスノゑソス�ソスe�ソス[�ソスv�ソスフ受け渡�ソス�ソス�ソスフ場合�ソスヘ郵�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス[�ソス�ソス�ソスノ会ソス�ソスZ�ソス�ソス�ソス�ソスワゑソス�ソスB
<br> �ソス�ソス�ソス[�ソスi�ソスヘ��ソス�ソス[�ソスh�ソスf�ソス[�ソス^�ソス[�ソスi�ソスニなゑソスワゑソス�ソスB
<br> �ソス�ソス�ソスナ低注�ソス�ソス�ソス�ソス�ソスi3,000�ソス~�ソス�ソス�ソス�ソスニなゑソスワゑソス�ソスB
<br> �ソス�ソス�ソス�ソス�ソスヒ暦ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソスe�ソスノ関ゑソス�ソストキ�ソス�ソス�ソス�ソス�ソスZ�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス鼾��ソスヘ、�ソスL�ソス�ソス�ソス�ソス�ソスZ�ソス�ソス�ソス�ソス�ソスクゑソス�ソスワゑソス�ソスB
<br> �ソス�ソス�ソス�ソス�ソス�ソスナは別途�ソス�ソス�ソス�ソス�ソスニなゑソスワゑソス�ソスB
<br> �ソス�ソス�ソス�ソス�ソス�ソスネ擾ソスヤなゑソスフに関ゑソス�ソストは以会ソス�ソスフ表�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソスB
<tr>
<TD>
<TABLE width="100% " border=1 cellspacing="0 " bordercolor="#000080 " cellpadding="2
" bordercolorlight="#000080 " bordercolordark="#000080 ">
<TBODY>
<TR>
<TD width="164 ">
<DIV align="center">�ソス�ソス�ソス�ソス</DIV> </TD>
<TD width="221 ">
<DIV align="center">�ソス�ソスe</DIV>
</TD>
<TD width="165 ">
<DIV align="center">�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス</DIV>
</TD>
</TR>
<TR>
<TD width="164 ">�ソス�ソス蜷ォ�ソスフ搾ソス�ソス�ソス�ソス�ソス�ソス</TD>
<TD width="221 ">�ソス�ソステ関連�ソスネど撰ソス蜷ォ�ソスフ搾ソス�ソス�ソス�ソス�ソス�ソス </TD>
<TD width="165 ">25�ソス�ソス�ソス`50�ソス�ソス�ソス�ソス�ソス�ソス </TD>
</TR>
<TR>
<TD width="164 ">�ソスフ趣ソス�ソスA�ソスj�ソス�ソス�ソス[�ソスX�ソスネゑソス</TD>
<TD width="221 ">�ソスフの歌趣ソス�ソスA�ソス�ソスb�ソス�ソス�ソスx�ソス�ソス�ソス�ソス�ソス�ソス�ソスフゑソスネゑソス </TD>
<TD width="165 ">100�ソス�ソス�ソス�ソス�ソス�ソス </TD>
</TR>
<TR>
<TD width="164 ">�ソス�ソス�ソス}</TD>
<TD width="221 ">�ソスS�ソス`�ソスT�ソスc�ソスニ難ソス�ソスネ難ソス </TD>
<TD width="165 ">50�ソス�ソス�ソス�ソス�ソス�ソス </TD>
</TR>
<TR>
<TD width="164 ">�ソス�ソス�ソス�ソス�ソス}</TD>
<TD width="221 ">�ソスP�ソス`�ソスR�ソスc�ソスニ難ソス�ソスネ難ソス </TD>
<TD width="165 ">100�ソス�ソス�ソス�ソス�ソス�ソス </TD>
</TR>
<TR>
<TD width="164 ">�ソス^�ソス�ソス�ソス�ソスヤゑソス�ソスヌゑソス�ソスネゑソス�ソス�ソスフ(�ソス�ソス�ソスj</TD>
<TD width="221 ">�ソス�ソス�ソス�ソス�ソス�ソス�ソスノゑソス�ソス�ソス�ソス�ソス�ソス </TD>
<TD width="165 ">25�ソス�ソス�ソス`50�ソス�ソス�ソス�ソス�ソス�ソス </TD>
</TR>
</TBODY>
</TABLE>
</TD>
<tr>
<TD> �ソスi�ソス�ソス�ソスj�ソスヌゑソス�ソス�ソス�ソストゑソス�ソス�ソス�ソス�ソス�ソスネゑソス�ソスモ擾ソス�ソスノ関ゑソス�ソストは×�ソス~�ソス~�ソスニ表�ソスL�ソス�ソス�ソス�ソス鼾��ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソスワゑソス�ソスB
<br>
<br> �ソス�ソス�ソスマゑソスノ関ゑソス�ソス�ソス
<br> �ソス�ソス�ソス�ソス�ソスマ対会ソス�ソスi1�ソス�ソス�ソスヤ以難ソスj�ソスA�ソス�ソス�ソスマ厄ソス�ソス�ソス
<br> �ソス�ソス�ソス{�ソス�ソス�ソス�ソス�ソス[�ソス�ソス�ソスf�ソスB�ソスX�ソスJ�ソスE�ソス�ソス�ソスg�ソスノ関ゑソス�ソストはゑソス�ソス竝��ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソス�ソスB
</TD>
</TR>
</table>
</div>
</td>
</tr>
</table>
</td>
</tr>
</table>
<br>
<table width="600 " border="0 " align="center " cellspacing="0 ">
<tr>
<td height="20 "></td>
</tr>
<tr>
<td align="center ">
<!--#include Virtual="../foot_price-->
</td>
</tr>
</table>
<br>
</td>
</tr>
<tr>
<td colspan="5">
<table width="100%" class="footer" border="0" cellpadding="0" cellspacing="0">
<tr>
<th width="397" height="50" class="text_no">�ソス�ソス�ソス�ソス�ソスマゑソスE�ソス�ソス�ソス竄「�ソス�ソス�ソス墲ケ�ソスヘ気�ソスy�ソスノどゑソス�ソス�ソス</th>
<td width="395">
<a href="/02foreigner/inquiry-for-dispatch>
<img src="../../img/spacer.gif" width="125" height="68" alt="�ソス�ソス�ソス竝��ソス�ソス">
</a>
<a href="/06price/">
<img src="../../img/spacer.gif" width="121" height="68" alt="�ソス�ソス�ソス�ソス\">
</a>
</td>
</tr>
<tr>
<td height="50" colspan="2" class="footer_text">�ソス�ソス�ソス�ソス�ソス �ソスミ �ソス�ソス105-0013�ソス�ソス�ソス�ソス�ソスs�ソス`�ソス�ソスl�ソス�ソス�ソス�ソス2-1-3 �ソス�ソス�ソスX�ソスr�ソス�ソス4F
Tel�ソスF03-5733-4264 Fax�ソスF03-3433-3320
<br /> Copyright © by b-cause,Inc. 2003-2016 </td>
</tr>
</table>
</td>
</tr>
</table>
</td>
</tr>
</table>
</body>
</html>
All the Japanese content in the HTML seems completely broken.
I have tried to decode the (garbled) strings in various encoding with ひたすらデコード, but all the result doesn't seem meaningful strings.
Maybe the HTML file is wrongly transformed and saved when you open the file with the editor. The reliable way to recover the content is downloading the HTML file from the server again, and then open the file with another editor that can handle Japanese encoding like Notepad++.
I would like some help not modifying this code
<!DOCTYPE html>
<html>
<head>
<script type="text/javascript">
function fillHidTable(){
var htqf; //-- hidden field
var rf; //-- retrieved field
for ( var i = 1; i < 5; i++ ) {
rf = "htqf"+i;
document.getElementById(rf).innerHTML = document.getElementById("Q"+i+"CALC").value;
}
tableToExcel('hidTable', 'Analysis Results');
}
var tableToExcel = (function() {
var uri = 'data:application/vnd.ms-excel;base64,'
, template = '<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns="http://www.w3.org/TR/REC-html40"><head><!--[if gte mso 9]><xml><x:ExcelWorkbook><x:ExcelWorksheets><x:ExcelWorksheet><x:Name>{worksheet}</x:Name><x:WorksheetOptions><x:DisplayGridlines/></x:WorksheetOptions></x:ExcelWorksheet></x:ExcelWorksheets></x:ExcelWorkbook></xml><![endif]--></head><body><table>{table}</table></body></html>'
, base64 = function(s) { return window.btoa(unescape(encodeURIComponent(s))) }
, format = function(s, c) { return s.replace(/{(\w+)}/g, function(m, p) { return c[p]; }) }
return function(table, name) {
if (!table.nodeType) table = document.getElementById(table)
var ctx = {worksheet: name || 'Worksheet', table: table.innerHTML}
window.location.href = uri + base64(format(template, ctx))
}
})()
</script>
<title>HTML Form Data to Excel</title>
<META NAME="Generator" CONTENT="EditPlus">
<META NAME="Author" CONTENT="">
<META NAME="Keywords" CONTENT="">
<META NAME="Description" CONTENT="">
<style type="text/css" media="screen">
.divCenMid{font-family:Arial,sans-serif;font-size:14pt;font-style:normal;font-weight:700;text-align:center;vertical-align:middle;margin:0;}
.allbdrCenMid{border:.75pt solid windowtext;color:#000;font-family:Arial,sans-serif;font-size:10pt;font-style:normal;font-weight:400;text-align:center;vertical-align:middle;margin:0;}
.allbdrCenTop{border:.75pt solid windowtext;color:#000;font-family:Arial,sans-serif;font-size:10pt;font-style:normal;font-weight:400;text-align:center;vertical-align:top;margin:0;}
.allbdrLtMid{border:.75pt solid windowtext;color:#000;font-family:Arial,sans-serif;font-size:10pt;font-style:normal;font-weight:400;text-align:left;vertical-align:middle;margin:0;}
.allbdrLtTop{border:.75pt solid windowtext;color:#000;font-family:Arial,sans-serif;font-size:10pt;font-style:normal;font-weight:400;text-align:left;vertical-align:top;margin:0;}
</style>
</head>
<body>
<table width= "565px" cellspacing="0" cellpadding="0" style="border-spacing:0;" id="QMSTable">
<col width="25px"/>
<col width="120px"/>
<col width="360px"/>
<col width="60px"/>
<tr>
<td class="divCenMid" colspan = "4"> QMS Assessment</td>
</tr>
<tr>
<td class="allbdrCenMid"> No</td>
<td class="allbdrCenMid"> Criteria</td>
<td class="allbdrLtMid"> Question</td>
<td class="allbdrCenMid"> Score</td>
</tr>
<tr>
<td class="allbdrCenTop"> Q1</td>
<td class="allbdrLtTop"> Quality Unit Independency</td>
<td class="allbdrLtTop"> Do you have the Quality Unit?</td>
<td class="allbdrCenMid">
<input id="Q1CALC" type="text" value="" class="nobdrCenMid" style="overflow:hidden; width:93% " name="Q1CALC"/>
</td>
</tr>
<tr>
<td class="allbdrCenTop"> Q2</td>
<td class="allbdrLtTop"> Apply PICS GMP</td>
<td class="allbdrLtTop"> Which GMP regulation do you use?</td>
<td class="allbdrCenMid">
<input id="Q2CALC" type="text" value="" class="nobdrCenMid" style="overflow:hidden; width:93% " name="Q2CALC"/>
</td>
</tr>
<tr>
<td class="allbdrCenTop"> Q3</td>
<td class="allbdrLtTop"> Deviation or Non-conformance</td>
<td class="allbdrLtTop"> Do you have a deviation or non-conformance procedure?</td>
<td class="allbdrCenMid">
<input id="Q3CALC" type="text" value="" class="nobdrCenMid" style="overflow:hidden; width:93% " name="Q3CALC"/>
</td>
</tr>
<tr>
<td class="allbdrCenTop"> Q4</td>
<td class="allbdrLtTop"> Complaint</td>
<td class="allbdrLtTop"> Do you have a customer complaint procedure?</td>
<td class="allbdrCenMid">
<input id="Q4CALC" type="text" value="" class="nobdrCenMid" style="overflow:hidden; width:93% " name="Q4CALC"/>
</td>
</tr>
</table>
<div id="hidTable" style="display: none">
<table id="testTable">
<caption>Supplier Risk Analysis</caption>
<colgroup></colgroup>
<colgroup></colgroup>
<colgroup></colgroup>
<thead>
<tr>
<th>No.</th>
<th>Question</th>
<th>Score</th>
</tr>
</thead>
<tbody>
<tr>
<td>Q1</td>
<td>Do you have the Quality Unit?</td>
<td id="htqf1">-</td>
</tr>
<tr>
<td>Q2</td>
<td>Apply PICS GMP?</td>
<td id="htqf2">-</td>
</tr>
<tr>
<td>Q3</td>
<td>Do you have a deviation or non-conformance procedure?</td>
<td id="htqf3">-</td>
</tr>
<tr>
<td>Q4</td>
<td>Do you have a customer complaint procedure?</td>
<td id="htqf4">-</td>
</tr>
</tbody>
</table>
</div>
<input type="button" onclick="fillHidTable()" name=PatientDatabase value="Export Data to Excel">
</body>
</html>
It makes a web based form which I can play with and edit, but is there a way I can just change the last button to save the excel table into cells to make more of a database and is there a way I can save it to a specific file and to have it fill in or append the data.
you can convert HTML div to excel
var tableToExcel = (function() {
var uri = 'data:application/vnd.ms-excel;base64,'
, template = '<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns="http://www.w3.org/TR/REC-html40"><head><!--[if gte mso 9]><xml><x:ExcelWorkbook><x:ExcelWorksheets><x:ExcelWorksheet><x:Name>{worksheet}</x:Name><x:WorksheetOptions><x:DisplayGridlines/></x:WorksheetOptions></x:ExcelWorksheet></x:ExcelWorksheets></x:ExcelWorkbook></xml><![endif]--></head><body><table>{table}</table></body></html>'
, base64 = function(s) { return window.btoa(unescape(encodeURIComponent(s))) }
, format = function(s, c) { return s.replace(/{(\w+)}/g, function(m, p) { return c[p]; }) }
return function(table, name) {
if (!table.nodeType) table = document.getElementById(table)
var ctx = {worksheet: name || 'Worksheet', table: table.innerHTML}
window.location.href = uri + base64(format(template, ctx))
}
})()
I am writing a selenium java code and at a particular webpage i want to select a link from a group of links where each link has an emebeded text within a table. How can i select particular link at this situtation. for eg.
run title--text 1
run title--text 2
run title--text 3
how can select specific run link for a specific title text? the text is not a label rather it is just simple text on the webpage.
i am using the following code:
verify.text("text 1");
it will only verify the presence of text, it wont go towards the link as link of every execution is named RUN. so it will identify the corresponding run link? The HTML code for the above is:
<HTML>
<HEAD>
<TITLE>TEST</TITLE>
</HEAD>
<BODY>
<div align="center"><table class="module" width="630">
<tr>
<th class="banner" width="70">ACTION</th>
<th class="banner" width="560">REPORT TEMPLATE</th>
</tr>
<tr>
<td class="modulenav" width="70">
<table class="innermodule" width="100%">
<tr><td class="moduleNav"><a class="listingLink"
href="www.abc.com/">Run</a></td></tr>
<tr><td class="moduleNav"><a class="listingLink"
href="www.zxc.com">UnShare</a></td></tr>
</table>
</td>
<td>
<table class="innerModule" width="100%">
<tr>
<td class="label" width="70">Title</td>
<td width="490"><span class="listingHead">Incident Performance by Priority</span></td>
</tr>
<tr>
<td class="Label" width="70">Description</td>
<td class="listing"></td>
</tr>
<tr>
<td class="Label" width="70">Owner</td>
<td class="listing"> Software Engineer Tel: </td>
</tr>
<tr>
<td class="Label" width="70">Shared With</td>
<td class="listing">
Software Engineer Tel: <br>
</td>
</tr>
<tr>
<td class="label">Report Type</td>
<td class="listing">Performance by Priority</td>
</tr>
</table>
</td>
</tr>
<tr><td class="tableRuleNavy" colspan="2"></td></tr>
<tr>
<td class="modulenav" width="70">
<table class="innermodule" width="100%">
<tr><td class="moduleNav"><a class="listingLink"
href="www.abc.com">Run</a></td></tr>
<tr><td class="moduleNav"><a class="listingLink"
href="www.cxd.com">UnShare</a></td></tr>
</table>
</td>
<td>
<table class="innerModule" width="100%">
<tr>
<td class="label" width="70">Title</td>
<td width="490"><span class="listingHead">Incident Trend Analysis Report</span></td>
</tr>
<tr>
<td class="Label" width="70">Description</td>
<td class="listing"></td>
</tr>
<tr>
<td class="Label" width="70">Owner</td>
<td class="listing">Software Engineer Tel: </td>
</tr>
<tr>
<td class="Label" width="70">Shared With</td>
<td class="listing">
Software Engineer Tel: <br>
</td>
</tr>
<tr>
<td class="label">Report Type</td>
<td class="listing">Trend Analysis</td>
</tr>
</table>
</td>
</tr>
<tr><td class="tableRuleNavy" colspan="2"></td></tr>
<tr>
<td class="modulenav" width="70">
<table class="innermodule" width="100%">
<tr><td class="moduleNav"><a class="listingLink"
href="www.sdfds.com">Run</a></td></tr>
<tr><td class="moduleNav"><a class="listingLink"
href="www.asdg.com">UnShare</a></td></tr>
</table>
</DIV>
</td>
</BODY>
</HTML>
Get a list of web elements and iterate over them.
This is not tested
E.g.
List<WebElement> we = findElements(By.cssSelector("tr a"));
Iterator itor = we.iterator()
While(itor.hasNext()) {
WebElement we = itor.next();
if (we.getText().equals("your visible text")) {
we.click // click link
}
}
Please help me to construct a jquery (phpquery) to parse the below sample to extract all the url's with the class "myblue". I am trying to make an app that displays the data from those url's.
<table width="100%" cellspacing="1" cellpadding="2" border="0">
<tbody>
<tr>
<td class="inputtxt" height="20" bgcolor="#E4E4E4" colspan="2">
<b>Notices</b>
</td>
</tr>
<tr valign="top">
<td class="inputtxt" width="7%" valign="top" align="center">»</td>
<td width="93%" valign="top">
<a class="myblue" target="_blank" href="http://example.comn/"> Some Text</a>
</td>
</tr>
</tbody>
</table>
<table width="100%" cellspacing="1" cellpadding="2" border="0">
<tbody>
<tr>
<td class="inputtxt" height="20" bgcolor="#E4E4E4" colspan="2">
<b>Info</b>
</td>
</tr>
<tr valign="top">
<td class="inputtxt" width="7%" valign="top" align="center">»</td>
<td width="93%" valign="top">
<a class="myblue" target="_blank" href="xxxx.html"> Some Text</a>
</td>
</tr>
var urls=[];
$('a.myblue').each(function(){
urls.push($(this).attr('href'));
})
or
var urls = $('a.myblue').map(function () {
return $(this).attr('href');
})
If you need to extract them all you can loop through like this -
$('a.myblue').each(function() {
console.log( $(this).attr('href') );
});
I want to ask about parsing a value colspan and rowspan from <table>.
for example like this:
<table cellpadding="2" cellspacing="2" border="1" width="50%">
<tbody>
<tr>
<td valign="top" rowspan="2" colspan="1" align="center">NO<br>
</td>
<td valign="top" rowspan="1" colspan="3" align="center">NAMA<br>
</td>
<td valign="top" rowspan="1" colspan="2" align="center">TELEPON<br>
</td>
<td valign="top" rowspan="2" colspan="1" align="center">KODE<br>
</td>
</tr>
<tr>
<td valign="top" align="center">DEPAN<br>
</td>
<td valign="top" align="center">TENGAH<br>
</td>
<td valign="top" align="center">BELAKANG<br>
</td>
<td valign="top" align="center">KODE<br>
</td>
<td valign="top" align="center">NO TLP<br>
</td>
</tr>
<tr>
<td valign="top" align="center">1<br>
</td>
<td valign="top">Ani<br>
</td>
<td valign="top">Tiara<br>
</td>
<td valign="top">Ramadika<br>
</td>
<td valign="top" align="center">021<br>
</td>
<td valign="top" align="center">8466729<br>
</td>
<td valign="top" align="center">17412<br>
</td>
</tr>
<tr>
<td valign="top" align="center">2<br>
</td>
<td valign="top">Dia<br>
</td>
<td valign="top">Andari<br>
</td>
<td valign="top">Putri<br>
</td>
<td valign="top" align="center">022<br>
</td>
<td valign="top" align="center">5930290<br>
</td>
<td valign="top" align="center">18291<br>
</td>
</tr>
<tr>
<td valign="top" align="center">3<br>
</td>
<td valign="top">Rangga<br>
</td>
<td valign="top">Dimas<br>
</td>
<td valign="top">Putra<br>
</td>
<td valign="top" align="center">023<br>
</td>
<td valign="top" align="center">8349829<br>
</td>
<td valign="top" align="center">13901<br>
</td>
</tr>
<tr>
<td valign="top" align="center">4<br>
</td>
<td valign="top">Niko<br>
</td>
<td valign="top">Reza<br>
</td>
<td valign="top">Anggara<br>
</td>
<td valign="top" align="center">024<br>
</td>
<td valign="top" align="center">4284982<br>
</td>
<td valign="top" align="center">21211<br>
</td>
</tr>
</tbody>
</table>
I use python for HTML parsing, like this:
from bs4 import BeautifulSoup
soup = BeautifulSoup(html)
t = soup.find("table")
dat = [ map(str, row.findAll('td', { 'rowspan' })) for row in t.findAll("tr") ]
print dat[1]
But still, I am really confused how to get the value of colspan.
I've already got the parsing tag table, but I don't know how to get the value of the colspan attribute. I've tried using a regex, but I didn't succeed.
I recommend using the CSS selectors:
from bs4 import BeautifulSoup
s = open("colspan_rowspan.html").read()
soup = BeautifulSoup(s)
# select all td children of tr that have both colspan & rowspan
tags = soup.select('tr td[colspan,rowspan]')
# print out the values, for example:
print [(td['colspan'], td['rowspan']) for tags]
# will return [('1', '2'), ('3', '1'), ('2', '1'), ('1', '2')]
The easiest way to parse tables with colspan and rowspan is to use nodejs with cheerio and cheerio-tableparser plugin.
var cheerio = require('cheerio'),
cheerioTableparser = require('cheerio-tableparser'),
fs = require('fs');
var text = fs.readFileSync("colspan_rowspan.html", 'utf8');
$ = cheerio.load(text);
cheerioTableparser($);
data = $("table").parsetable(true, true, true);
console.log(data);
//data = >
//[ [ 'NO', 'NO', '1', '2', '3', '4' ],
// [ 'NAMA', 'DEPAN', 'Ani', 'Dia', 'Rangga', 'Niko' ],
// [ 'NAMA', 'TENGAH', 'Tiara', 'Andari', 'Dimas', 'Reza' ],
// [ 'NAMA', 'BELAKANG', 'Ramadika', 'Putri', 'Putra', 'Anggara' ],
// [ 'TELEPON', 'KODE', '021', '022', '023', '024' ],
// [ 'TELEPON', 'NO TLP', '8466729', '5930290', '8349829', '4284982' ],
// [ 'KODE', 'KODE', '17412', '18291', '13901', '21211' ] ]
Result will be array of columns. All merged by colspan and rowspan cells will have same values in array.