VB.NET getElementById - html

I'm stumped. I don't want to use a WebBrowser in my application, and I want to get a specific element by id. my code is:
Dim request As System.Net.HttpWebRequest = System.Net.HttpWebRequest.Create("http://www.google.com/finance?q=NASDAQ:GOOG")
Dim response As System.Net.HttpWebResponse = request.getresponse()
Dim sr As System.IO.StreamReader = New System.IO.StreamReader(response.GetResponseStream())
Dim sourcecode As String = sr.ReadToEnd()
TextBox1.Text = sourcecode
This gets me the source code. But how do I get a specific element? I would think that there is an easy way to do this... Btw I don't want to use Regex, or download HTML Agility Pack.

You can make a parse table to recognize html tags, and search for id=elementname (plus possible whitespace characters) inside the tags. It's not the impossible task it may seem, because you can ignore most tags and you don't have to validate the html. Just consider <>, and ignore the contents of quotes, scripts, etc. There are lots more details and it takes a little work, but it's fun programming.
The alternative would be to download something like html agility pack, use a browser, or use a regex, which you'd like to avoid.

Heres a very rough idea and it does not work for BLOCK elements that need a SEPARATE closing tag (like ) but it works fine for self closing elements like
also i noted that some of tag id's are enclosed in speech marks and some are not, so you would have to tweak that possibly...
I just roughed this code up and copy pasted the routine to detect unenclosed id tags but it still needs work on it and could be shortened too.
<script runat="server">
Dim sourcecode As String
Dim bodycode As String
Dim RetVal As String
Protected Sub Page_Load(sender As Object, e As System.EventArgs)
'
LoadHttpStuff()
If Request.Form("Button1") = "Submit" Then
RetVal = MyGetElementById(Request("Text1"))
End If
End Sub
Private Sub LoadHttpStuff()
Dim request As System.Net.HttpWebRequest
Dim response As System.Net.HttpWebResponse
Dim sr As System.IO.StreamReader
Dim finishat As Long
Dim startat As Long
request = System.Net.HttpWebRequest.Create("http://www.google.com/finance?q=NASDAQ:GOOG")
response = request.GetResponse()
sr = New System.IO.StreamReader(response.GetResponseStream())
sourcecode = sr.ReadToEnd()
startat = InStr(sourcecode, "<body>")
finishat = InStr(sourcecode, "</body>") + 7
bodycode = Mid(sourcecode, startat, finishat - startat)
bodycode = LCase(bodycode)
End Sub
Private Function MyGetElementById(Id As String) As String
Dim tagstart As Long
Dim tagend As Long
Dim posx As Long
Dim item As System.Web.UI.HtmlControls.HtmlGenericControl
Dim test As Boolean
Dim letter As Char
Dim text As String
item = Nothing
test = False
text = ""
If Trim(Id) <> "" Then
'-> with SPEECHMARKS
posx = InStr(bodycode, LCase("id=" & Chr(34) & Id & Chr(34)))
If posx > 0 Then
'find start of tag
Do
posx = posx - 1
letter = Mid(bodycode, posx, 1)
If letter = "<" Then
'found tag start
tagstart = posx
Exit Do
End If
Loop Until posx < 1
If tagstart > 0 Then
posx = InStr(bodycode, LCase("id=" & Chr(34) & Id & Chr(34)))
Do
posx = posx + 1
letter = Mid(bodycode, posx, 1)
If letter = ">" Then
tagend = posx + 1
Exit Do
End If
Loop Until posx >= Len(bodycode)
If tagend > 0 Then
text = Mid(bodycode, tagstart, tagend - tagstart)
test = True
End If
End If
Else
posx = InStr(bodycode, LCase("id=" & Id))
If posx > 0 Then
'find start of tag
Do
posx = posx - 1
letter = Mid(bodycode, posx, 1)
If letter = "<" Then
'found tag start
tagstart = posx
Exit Do
End If
Loop Until posx < 1
If tagstart > 0 Then
posx = InStr(bodycode, LCase("id=" & Id))
Do
posx = posx + 1
letter = Mid(bodycode, posx, 1)
If letter = ">" Then
tagend = posx + 1
End If
Loop Until posx >= Len(bodycode)
If tagend > 0 Then
text = Mid(bodycode, tagstart, tagend - tagstart)
test = True
End If
End If
End If
End If
End If
Return Text
End Function
</script>
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
<title></title>
</head>
<body>
<form id="form1" runat="server">
<table style="width: 100%;">
<tr>
<td style="text-align:left; vertical-align: top; width: 75%;"><textarea rows="20" cols="80" style="width: 90%;" disabled="disabled"><%=sourcecode%></textarea></td>
<td style="width: 25%; text-align: left; vertical-align: top;">
<table style="width:100%;">
<tr>
<td>Element Id <input id="Text1" name="Text1" type="text" /></td>
</tr><tr>
<td> </td>
</tr><tr>
<td> </td>
</tr><tr>
<td><input id="Button1" type="Submit" value="Submit" name="Button1" /></td>
</tr><tr>
<td> </td>
</tr><tr>
<td> </td>
</tr>
</table>
</td>
</tr><tr>
<td style="width: 75%;"> </td>
<td style="width: 25%;"> </td>
</tr><tr>
<td style="width: 100%;" colspan="2"><textarea rows="20" cols="80" style="width: 75%;" disabled="disabled"><%=RetVal%></textarea></td>
<td style="width: 25%;"> </td>
</tr>
</table>
</form>
</body>
</html>
Hope it helps a little

Related

How to click HTML text with VBA

I'm writing a code to automatically fill some website with cells values:
Sub prueba()
Dim oIE As InternetExplorer: Set oIE = New InternetExplorer
Dim oDocument As HTMLDocument
Dim ECICOR As HTMLSelectElement
Dim i, j As Integer
Dim x As Long
oIE.Visible = True
oIE.Navigate "http://sirem.eci.geci/smcfs/console/login.jsp"
Do While oIE.readyState <> 4: DoEvents: Loop
With oDocument
Set oDocument = oIE.Document
End With
Call oDocument.parentWindow.execScript("window.parent.sc.postDummyFormForWindow('/smcfs/console/inventory.search');", "JScript")
Set ECICOR = oDocument.getElementById("enterpriseFieldObj")
ECICOR.Focus
ECICOR.Click
ECICOR.Value = "ECICOR"
ECICOR.FireEvent ("onChange")
oDocument.getElementsByClassName("unprotectedinput")(0).Value = Cells(i, 1)
oDocument.getElementsByTagName("a")(0).Click
oDocument.getElementsbyClassName("evenrow")(1).click
End Sub
So my problem is that my program doesn't do anything after the last line on the code and I don't know what problem it is because it worked before.
Here you can see the HTML code:
<
<TR class=evenrow><TD class=checkboxcolumn><INPUT type=checkbox value=%3CInventoryItem+ItemID%3D%22000000000152030052%22+OrganizationCode%3D%22ECICOR%22+ProductClass%3D%22%22+UnitOfMeasure%3D%22%22%2F%3E name=EntityKey oldChecked="false"> <INPUT type=hidden value=000000000152030052 name=ItemID_1> <INPUT type=hidden name=UOM_1> <INPUT type=hidden name=PC_1> <INPUT type=hidden value=ECICOR name=OrgCode_1> </TD>
<TD class=tablecolumn><A onclick="javascript:showDetailFor('%3CInventoryItem+ItemID%3D%22000000000152030052%22+OrganizationCode%3D%22ECICOR%22+ProductClass%3D%22%22+UnitOfMeasure%3D%22%22%2F%3E');return false;" href="">000000000152030052</A> </TD>
<TD class=tablecolumn></TD>
<TD class=tablecolumn></TD>
<TD class=tablecolumn>001097578527174</TD></TR>">
How can I find a solution?
document.getElementsByClassName() will return an array, not an Element. If you have only one element with the unprotectedinput class, then you need to get the first element in the array returned by document.getElementsByClassName().

How to click in HTML with vba?

I'm writing a code to automatically fill some website with cells values:
Sub prueba()
Dim oIE As InternetExplorer: Set oIE = New InternetExplorer
Dim oDocument As HTMLDocument
Dim ECICOR As HTMLSelectElement
Dim i, j As Integer
Dim x As Long
oIE.Visible = True
oIE.Navigate "http://sirem.eci.geci/smcfs/console/login.jsp"
Do While oIE.readyState <> 4: DoEvents: Loop
With oDocument
Set oDocument = oIE.Document
End With
Call oDocument.parentWindow.execScript("window.parent.sc.postDummyFormForWindow('/smcfs/console/inventory.search');", "JScript")
Set ECICOR = oDocument.getElementById("enterpriseFieldObj")
ECICOR.Focus
ECICOR.Click
ECICOR.Value = "ECICOR"
ECICOR.FireEvent ("onChange")
oDocument.getElementsByClassName("unprotectedinput")(0).Value = Cells(i, 1)
oDocument.getElementsByTagName("a")(0).Click
oDocument.getElementsbyClassName("evenrow")(1).click
End Sub
So my problem is that my program doesn't do anything on the last line of the code and I don't know what problem it is because it worked before.
Here you can see the HTML code:
<
<TR class=evenrow><TD class=checkboxcolumn><INPUT type=checkbox value=%3CInventoryItem+ItemID%3D%22000000000152030052%22+OrganizationCode%3D%22ECICOR%22+ProductClass%3D%22%22+UnitOfMeasure%3D%22%22%2F%3E name=EntityKey oldChecked="false"> <INPUT type=hidden value=000000000152030052 name=ItemID_1> <INPUT type=hidden name=UOM_1> <INPUT type=hidden name=PC_1> <INPUT type=hidden value=ECICOR name=OrgCode_1> </TD>
<TD class=tablecolumn><A onclick="javascript:showDetailFor('%3CInventoryItem+ItemID%3D%22000000000152030052%22+OrganizationCode%3D%22ECICOR%22+ProductClass%3D%22%22+UnitOfMeasure%3D%22%22%2F%3E');return false;" href="">000000000152030052</A> </TD>
<TD class=tablecolumn></TD>
<TD class=tablecolumn></TD>
<TD class=tablecolumn>001097578527174</TD></TR>">
How can I find a solution?

How to click on a dropdown element from a list in a table using VBA

using vba with selenium I am trying to get within a dropdown box to the option value of BO_test and click. I have tried many things. Here is the last try:
Option Explicit
Private ch As Selenium.ChromeDriver
Sub test()
Dim FindBy As New Selenium.By
Dim ResultSections As Selenium.WebElements
Dim ResultSection As Selenium.WebElement
Dim ResultSections2 As Selenium.WebElements
Dim ResultSection2 As Selenium.WebElement
Dim TableByTag As Selenium.WebElement
Dim tr, c, r, td, li, cc, t, columnC, rowc
Dim size As Integer
Dim currentWindow As Selenium.Window
Dim html As HTMLDocument
Set ch = New Selenium.ChromeDriver
ch.Start baseUrl:=""
ch.Get "/"
With ch
Set ResultSections2 = .FindElementsById("SPFrameWorkTable")
For Each ResultSection2 In ResultSections2
Application.Wait Now + TimeValue("00:00:2")
'Debug.Print ResultSection2.Text
.FindElementById("AQPanelQueryList").Click
.FindElementById("SPSideContainerTD").Click
Next ResultSection2
End With
end sub
this is what the debug prints out:
Query Management
Query:
(add new query)
BO_test
Set As Default
Run query when selected
Clear form when selected
Conditions:
Match AllMatch Any
Additional Fields Selection
---html---
<lable class="SPLayoulTable" cellspacing="0" cellpadding="0">
<tbody
+ <tr> </tr>
}<tr> </tr>
<tr>
<td>
<div id="SPFormDiv" class="SPFormDiv" style="width: 350px; height: 782px; overflow: auto;"> == $0
<table aginfotop="truc" class"SPLayoutTable" id="AQContentTable">
<tbody>
<tr>
<td style="width: 20%">...</td>
<td align="left" style="width: 80%">
(select id="AQranclQueryList">
<optgroup label="My Queries">
<option value="(new)">(add new query)</option>
<option value="7c5a41f1-bala-444a-b7d0-97f5c1ce5052">BO_test</option>
</optgroup>
</sclcct>
</td>
</tr>
* <tr> </tr>
<tr> </tr>
Try this.
Public MyElement As Selenium.WebElement
Sub Test()
' ...
Set MyElement = MyBrowser.FindElementById("YourDropdownBox")
MyElement.WaitDisplayed
If MyElement.IsDisplayed Then
MyElement.Click
MyElement.AsSelect.SelectByText ("BO_test")
End If
' ...
End Sub

Table scraping Excel VBA

I need help scraping the tags onto my excel from an internal company website.
This is the source code.
<br />
<span class="RptTitle"><input id="chkPromisDataLog" type="checkbox" name="chkPromisDataLog" checked="checked" onclick="showOnOffPromisLog();" /><label for="chkPromisDataLog">Promis Processing data log [83508442.1].</label></span>
<div id="divPromisDataLog" style="display: none;">
<table id="tblPromisDataLog" cellspacing="0" cellpadding="0" width="100%" border="0" class="table">
<tr>
<td width="60%"></td>
<td>
<a class="textnormal" href="javascript:popwnd=window.open('../Tools/ExportExcel.aspx?KEY=LOT_GEN_PROMIS','popwnd','status=no,toolbar=Yes,menubar=Yes,location=no,scrollbars=yes,resizable=Yes');popwnd.focus()">
Export to Excel
</a>
</td>
</tr>
<tr>
<td colspan="2">
<table cellspacing="0" rules="all" border="1" id="dgPromisDataLog" style="border-color: Black; border-collapse: collapse;">
<tr class="rptDetailsHeaderMgt" align="center">
<td>LotID</td>
<td>Hist Stage</td>
<td>Datein</td>
<td>Dateout</td>
<td>Qtyin</td>
<td>Qtyout</td>
<td>M/C ID</td>
<td>Emp TrackOut</td>
<td>Hold Code</td>
<td>Hold Reason</td>
<td>Staging (Hrs)</td>
</tr>
<tr class="rptDetailsItemMgt" align="center" style="white-space: nowrap;">
<td>83508442.1</td>
<td>
<a
href="javascript:popwnd=window.open('LotGen_Dtl.aspx?iDate=04/09/2021 09:07:07 PM&amp;oDate=04/10/2021 03:47:59 PM&amp;oLotid=83508442.1&amp;oStage=C-WFRPROCS&amp;oLastRow=N','popwnd','width=900,height=600,status=no,toolbar=no,menubar=no,location=no,scrollbars=yes,top=100,right=50,left=50');popwnd.focus();"
>
C-WFRPROCS
</a>
</td>
<td>4/9/2021 9:07:07 PM</td>
<td>4/10/2021 3:47:59 PM</td>
<td>0</td>
<td>9</td>
<td></td>
<td>10911700</td>
<td> </td>
<td> </td>
<td>18.68</td>
</tr>
</table>
</td>
</tr>
</table>
</div>
This is roughly my code
Sub Lotsearch()
Dim ie As InternetExplorer
Dim htmlEle As IHTMLElement
Dim i As Integer
Set ie = New InternetExplorer 'start new IE page
ie.Visible = True 'View what is happening in IE
ie.navigate "www.internalcompanywebsite.aspx" 'Open link in IE
While ie.readyState <> 4 'Waits for IE to finish loading
DoEvents
Wend
i = 1
'ie.document.getElementById("tblPromisDataLog") = Cells(2, 1).Value
'ie.document.getElementsByTagName("td").Value = Cells(5, 1).Value
'Set Data = ie.document.getElementByTagName("rptDetailsItemMgt")
'Dim myValue As String
'myValue = allRowOfData.Cells(0).innerHTML
'Cells(3, 13) = myValue
'Range("L1").Value = myValue
'For Each htmlEle In ie.document.getElementById("tblPromisDataLog")(0).getElementsByClassName("rptDetailsItemMgt")
With ActiveSheet
.Range("A" & i).Value = htmlEle.Children(0).textContent
' .Range("B" & i).Value = htmlEle.Children(1).textContent
' .Range("C" & i).Value = htmlEle.Children(2).textContent
' .Range("D" & i).Value = htmlEle.Children(3).textContent
' .Range("E" & i).Value = htmlEle.Children(4).textContent
' .Range("F" & i).Value = htmlEle.Children(5).textContent
' .Range("G" & i).Value = htmlEle.Children(6).textContent
' .Range("H" & i).Value = htmlEle.Children(7).textContent
' .Range("I" & i).Value = htmlEle.Children(8).textContent
' .Range("J" & i).Value = htmlEle.Children(9).textContent
' .Range("K" & i).Value = htmlEle.Children(10).textContent
' .Range("L" & i).Value = htmlEle.Children(11).textContent
End With
i = i + 1
Next htmlEle
ie.Quit
End Sub
As you can see, I have tried various methods but to no avail.
getElementbyID not working
getElementsbyTagName not working
getElementsByClassName not working
Any help would be appreciated. Thanks.
it may not actually be the most efficient way to deal with HTML extraction, but you might consider using Regex matching.. Raw Coding on youtube just made a killer regex tutorial, and I remembered seeing this question, and thought it might be a good alternative if you didn't like dealing with html explicitly.
Regex Tutorial for Beginners from Raw Coding on Youtube
like, if you only wanted normal text between td tags, you could regex search for
(?<OpenTag>[\<]+td[\>]+)(?<Contents>[\w\/\(\)\[\]\.\&\:\;\s]*?)(?<CloseTag>[\<]+[\/]+[td]+[\>]+)
here's an example at Regex101
Regex101 example using your html
Dim ht As HTMLDocument
Dim i As Integer
Dim htmltable As MSHTML.htmltable
Set htmltable = ht.getElementById("dgPromisDataLog")
myValue = htmltable.getElementsByClassName("rptDetailsItemMgt")(0).getElementsByTagName("td")(0).innerText
After messing with it for a few days, I found that the code works if I split up the getElementbyId from the other 'getElements'.
Changed htmlEle As IHTMLElement into ht As HTMLDocument. Also added htmltable As MSHTML.htmltable
For some reason the code returns an error if I chain the entire 'getelement' together. Hope this helps someone else with the same problem.

How to get a tag in html with queryselectorall provided with a condition in VBA?

Snippet:
<table>
<tbody>
<tr>
<td valign="top" align="left">
<nobr>FILENAME</nobr>
</td>
<td valign="center" align="left">
<b>
<font size="2px">
<nobr>FILENUMBER0311</nobr>
</font>
</b>
<font size="2px"> </font>
</td>
<td valign="top" align="right"></td>
<tr>
<td valign="top" align="left">Date</td>
<td colspan="2" valign="center" align="left">
<font color="#C00000">
<b>
CANCELED
</b>
</tr>
…
<tr>
<td valign="top" align="left">
<nobr>FILENAME</nobr>
</td>
<td valign="center" align="left">
<b>
<font size="2px">
<nobr>FILENUMBER0345</nobr>
</font>
</b>
<font size="2px"> </font>
</td>
<td valign="top" align="right"></td>
<tr>
<td valign="top" align="left">Date</td>
<td colspan="2" valign="center" align="left">
<font color="#C00000">
<b>
CONFIRMED
</b>
</tr>
The website-html has a table with several tr-tags. In each tr-tag there is either the entry CONFIRMED between b-tags or the entry CANCELED. I need a code that returns the value of FILENUMBERxxxx in the case of CONFIRMED. I have no idea how to combine a "selector", "instr" and possible other operations with each other in this case.
My Code: (nothing happens!). Does anyone know a solution? THX
Sub GetData()
Const url = "https://www.zvg-portal.de/index.php?button=Suchen&all=1"
Dim Html As MSHTML.HTMLDocument
Dim xhr As Object, elm As Object
Dim I As Long
Set Html = New MSHTML.HTMLDocument
Set xhr = CreateObject("MSXML2.ServerXMLHTTP.6.0")
With xhr
.Open "POST", url, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send "ger_name=--" & " " & "Alle" & " " & "Amtsgerichte" & " " & "--&" & "order_by=2&land_abk=ni&ger_id=0"
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll("tr")
Set elm = Html.querySelectorAll("tr")
For I = 0 To 500
'right now I do not know how to set the number of repeats, therefore 0 to 500
If InStr(elm.Item(I).innerText, "Termin") > 0 Then
ActiveSheet.Cells(I + 2, 3) = elm.Item(I).ParentNode.PreviousSibling.FirstChild.NextSibling.innerText
'need the numeric value of Aktenzeichen
Exit For
End If
Next I
End With
End Sub
The following processes the rows and when it sees a listing separator (tr with only 1 child td), it increments the row counter for the output array.
It uses an Instr test, for aufgehoben, to determine if the row with termin indicates a cancellation and returns a True/False value in the output array.
During the loop, the Aktenzeichen value is extracted; this is written out in the first column of the output array, before the True/False for cancelled.
Option Explicit
Public Sub GetData()
Const url = "https://www.zvg-portal.de/index.php?button=Suchen&all=1"
Dim html As MSHTML.HTMLDocument, xhr As Object
Set html = New MSHTML.HTMLDocument
Set xhr = CreateObject("MSXML2.XMLHTTP")
With xhr
.Open "POST", url, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send "ger_name=--" & " " & "Alle" & " " & "Amtsgerichte" & " " & "--&" & "order_by=2&land_abk=ni&ger_id=0"
html.body.innerHTML = .responseText
End With
Dim table As MSHTML.HTMLTable
Set table = html.querySelector("table[border='0']")
Dim row As MSHTML.HTMLTableRow, newBlock As Boolean
Dim r As Long, cancellations(), aktenzeichen As String
ReDim cancellations(1 To 1000, 1 To 2)
r = 1
For Each row In table.Rows
If newBlock Then r = r + 1
If InStr(1, row.innerHTML, "Aktenzeichen", vbTextCompare) > 0 Then
aktenzeichen = Replace$(row.Children(1).getElementsByTagName("nobr")(0).innerText, " (Detailansicht)", vbNullString)
cancellations(r, 1) = aktenzeichen
End If
If Trim$(row.Children(0).innerText) = "Termin" Then
cancellations(r, 2) = (InStr(1, row.Children(1).innerText, "aufgehoben", vbTextCompare) > 0)
End If
newBlock = (row.Children.Length = 1)
Next
cancellations = Application.Transpose(cancellations)
Dim headers()
headers = Array(" Aktenzeichen", "Cancelled")
ReDim Preserve cancellations(1 To UBound(headers) + 1, 1 To r)
cancellations = Application.Transpose(cancellations)
With ActiveSheet
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(cancellations, 1), UBound(cancellations, 2)) = cancellations
End With
End Sub