How do I insert HTML elements into an HTML file with VBA - html

WHAT I ALREADY HAVE
I use the following for extracting data from an HTML file.
This example lists all Table Rows within an HTML file
Dim IE As InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim TRelements As IHTMLElementCollection
Dim TRelement As HTMLTableCell
Dim r As Long
Set IE = New InternetExplorer
With IE
.Navigate filePath
While .Busy Or .ReadyState <> READYSTATE_COMPLETE: DoEvents: Wend
Set HTMLdoc = .Document
End With
Set TRelements = HTMLdoc.getElementsByTagName("TR")
This allows me to pinpoint data in the following way (5th row, 1st cell), Example:
A = TRelements.Item(5).ChildNodes.Item(1).innerText
WHAT I AM LOOKING FOR
I'd like to insert a new cell (TD-element) at the beginning of a row (TR-element)
DESIRED OUTCOME
NAME
SURNAME
Walter
White
New TD-element for DOB
DOB
NAME
SURNAME
09-07-58
Walter
White

In the example below I use Element.insertAdjacentHTML() and Element.insertAdjacentElement() to insert the new cells.
Sub Example()
Dim IE As InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim TRelements As IHTMLElementCollection
Dim TRelement As HTMLTableCell
Dim r As Long
Set IE = New InternetExplorer
With IE
.navigate filePath
While .Busy Or .readyState <> READYSTATE_COMPLETE: DoEvents: Wend
Set HTMLdoc = .document
End With
Set TRelements = HTMLdoc.getElementsByTagName("TR")
Dim TR As HTMLTableRow
Set TR = TRelements(0)
TR.insertAdjacentHTML "afterbegin", "<TH>DOB</TH>"
Dim TD As HTMLTableCell
Set TD = HTMLdoc.createElement("TD")
Set TR = TRelements(1)
TD.innerText = "09-07-58"
TR.insertAdjacentElement "afterbegin", TD
IE.Visible = True
End Sub
This subroutine will update the original file.
Sub OverWriteHTMLDocument(Document As HTMLDocument, FilePath As String)
Rem VBA OpenTextFile: https://analystcave.com/vba-filesystemobject-fso-in-excel/vba-opentextfile/
Const ForReading = 1, ForWriting = 2, ForAppending = 8 'Need to define constants manually
Const TristateUseDefault = -2, TristateTrue = -1, TristateFalse = 0 'Need to define constants manually
Dim Url As String
Url = Replace(Document.Url, "file://", "", , , vbTextCompare)
With CreateObject("Scripting.FileSystemObject").OpenTextFile(Url, ForWriting, True, TristateFalse)
.WriteLine Document.DocumentElement.outerHTML
.Close
End With
End Sub

Related

I am trying to get hyperlinks printed onto the excel sheet under the class "search-results". What needs to be changed in the VBA elements?

Edit: Thank your DearDeer for the solution
'GRV website copy and collect hyperlink
Sub Get_HyperLink1()
Dim ie As InternetExplorer
Application.ScreenUpdating = False
Set ie = New InternetExplorer
ie.Visible = True
Set ws = ThisWorkbook.Worksheets("Sheet1")
Dim http As New XMLHTTP60, html As New HTMLDocument
Dim nodeRaceResultsTable As HTMLHtmlElement
Dim nodeTr As HTMLHtmlElement
Dim nodeDiv As HTMLHtmlElement
Dim Element1 As HTMLHtmlElement
Dim node1 As HTMLHtmlElement
Dim currentUrl As String
With ie
ie.Visible = True
The website below is where I want the VBA to navigate
ie.Navigate "https://fasttrack.grv.org.au/Meeting/Search?MeetingDateFrom=22%2F04%2F2020&MeetingDateTo=22%2F04%2F2020&Status=&TimeSlot=&DayOfWeek=&DisplayAdvertisedEvents=false&AllTracks=True&SelectedTracks=AllTracks&searchbutton=Search"
Do Until .readyState = 4: DoEvents: Loop
End With
I'm trying to get the hyperlinks with the VBA elements below
For Each nodeRaceResultsTable In html.getElementsByClassName("search-results")
For Each nodeTr In nodeRaceResultsTable.getElementsByTagName("tr")
With nodeTr.getElementsByTagName("td")
The part below is where I want this VBA to grab the hyperlink and print it on the excel sheet
ws.Cells(5, 5) = .Item(1).getElementsByTagName("a")(0).href
End With
Next
Next
ie.Quit
Set ie = Nothing
Application.StatusBar = ""
Application.ScreenUpdating = True
End Sub
Try something like this:
'GRV website copy and collect hyperlink
Sub Get_HyperLink1()
Dim IE As New InternetExplorer
With IE
.Visible = True
.navigate "https://fasttrack.grv.org.au/Meeting/Search?MeetingDateFrom=22%2F04%2F2020&MeetingDateTo=22%2F04%2F2020&Status=&TimeSlot=&DayOfWeek=&DisplayAdvertisedEvents=false&AllTracks=True&SelectedTracks=AllTracks&searchbutton=Search"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim aTag As Object, i As Long
Set aTag = IE.document.querySelectorAll(".search-results [href]")
For i = 0 To aTag.Length - 1
ActiveSheet.Cells(i + 1, 1) = aTag.Item(i)
Next i
IE.Quit
End With
End Sub

Excel VBA to Login to Website and Grab TR & TD Elements

I am testing the code below. I think this is very close, but I can't seem to login to the site for some reason.
Sub Website_Login_Test()
Dim oHTML_Element As IHTMLElement
Dim sURL As String
On Error GoTo Err_Clear
sURL = "https://login.my_site_here.jsp?"
Set oBrowser = New InternetExplorer
oBrowser.Silent = True
oBrowser.timeout = 60
oBrowser.navigate sURL
oBrowser.Visible = True
Do
' Wait till the Browser is loaded
Loop Until oBrowser.readyState = READYSTATE_COMPLETE
Set HTMLDoc = oBrowser.document
For Each oHTML_Element In HTMLDoc
Debug.Print oHTML_Element
Next
HTMLDoc.all.UserId.Value = "my_id"
HTMLDoc.all.Password.Value = "my_pass"
For Each oHTML_Element In HTMLDoc.getElementsByTagName("Button")
Debug.Print oHTML_Element.Name
'oHTML_Element.Click: Exit For
'Debug.Print oHTML_Element.Type
'If oHTML_Element.Type = "submit" Then oHTML_Element.Click: Exit For
Next
' oBrowser.Refresh ' Refresh If Needed
Err_Clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
Call Test
End Sub
Sub Test()
Dim ie As Object, i As Long, strText As String
Dim doc As Object, hTable As Object, hBody As Object, hTR As Object, hTD As Object
Dim tb As Object, bb As Object, tr As Object, td As Object
Dim y As Long, z As Long, wb As Excel.Workbook, ws As Excel.Worksheet
Set wb = Excel.ActiveWorkbook
Set ws = wb.ActiveSheet
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
y = 1 'Column A in Excel
z = 1 'Row 1 in Excel
ie.navigate "https://after_login_move_to_page_for_scraping.jsp"
Do While ie.busy: DoEvents: Loop
Do While ie.readyState <> 4: DoEvents: Loop
Set doc = ie.document
Set hTable = doc.getElementsByTagName("table")
For Each tb In hTable
Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each tr In hTR
Set hTD = tr.getElementsByTagName("td")
y = 1 ' Resets back to column A
For Each td In hTD
ws.Cells(z, y).Value = td.innertext
y = y + 1
Next td
DoEvents
z = z + 1
Next tr
Exit For
Next bb
Exit For
Next tb
End Sub
I can't login to the site, so I can't do the scraping, but I think the code is pretty close. Here is the HTML for the id object, the password object and the button object. What am I doing wrong?
I think you must trigger the keypress event of the input fields. If there are other events you must trigger, have a look here, how you can find them:
Automate IE via Excel to fill in a dropdown and continue
Sub WebsiteLogin()
Const url As String = "https://login.my_site_here.jsp"
Const userName As String = "Here Your LogIn Name"
Const passWord As String = "Here Your Password"
Dim ie As Object
Dim htmlDoc As Object
Dim nodeInputUserName As Object
Dim nodeInputPassWord As Object
'Initialize Internet Explorer, set visibility,
'call URL and wait until page is fully loaded
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
ie.navigate url
Do Until ie.readyState = 4: DoEvents: Loop
Set htmlDoc = ie.document
'Set the log in name
Set nodeInputUserName = htmlDoc.getElementById("USERID")
nodeInputUserName.Value = userName
Call TriggerEvent(htmlDoc, nodeInputUserName, "onkeypress")
'Set the password
Set nodeInputPassWord = htmlDoc.getElementById("PASSWORD")
nodeInputPassWord.Value = passWord
Call TriggerEvent(htmlDoc, nodeInputPassWord, "onkeypress")
'Click submit button
htmlDoc.querySelector("a[role='button']").Click
End Sub
This is the procedure to trigger events:
Private Sub TriggerEvent(htmlDocument As Object, htmlElementWithEvent As Object, eventType As String)
Dim theEvent As Object
htmlElementWithEvent.Focus
Set theEvent = htmlDocument.createEvent("HTMLEvents")
theEvent.initEvent eventType, True, False
htmlElementWithEvent.dispatchEvent theEvent
End Sub

Scraping data with VBA: Why can't I get access to html elements on certain webpages?

On certain webpages I cannot get access to HTML elements using VBA. What am I doing wrong? For example I have two different pages on the same website.
This code returns number of matches on the page.
Sub Oddsportalmatches()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim Matches As MSHTML.IHTMLElementCollection
IE.Visible = True
IE.Navigate "https://www.oddsportal.com/matches/soccer/"
Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop
Set HTMLDoc = IE.Document
Set Matches = HTMLDoc.getElementsByClassName("name table-participant")
Debug.Print Matches.Length
End Sub
This code returns 0. In fact I cannot get access to any element inside the matches table.
Sub Oddsportalmatches()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim Matches As MSHTML.IHTMLElementCollection
IE.Visible = True
IE.Navigate "https://www.oddsportal.com/predictions/"
Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop
Set HTMLDoc = IE.Document
Set Matches = HTMLDoc.getElementsByClassName("table-participant")
Debug.Print Matches.Length
End Sub

How to download a table from a web with VBA?

I'am trying to download a table from this page
to excel with VBA: http://www.merval.sba.com.ar/Vistas/Cotizaciones/Acciones.aspx --> table "Panel General"
I can download the table "Panel Merval" but i couldn't download the other table.
I use this code for table "Panel Merval":
Sub GetTable()
Dim ieApp As InternetExplorer
Dim ieDoc As Object
Dim ieTable As Object
Dim clip As DataObject
'create a new instance of ie
Set ieApp = New InternetExplorer
'you don’t need this, but it’s good for debugging
ieApp.Visible = False
'now that we’re in, go to the page we want
ieApp.Navigate "http://www.merval.sba.com.ar/Vistas/Cotizaciones/Acciones.aspx"
Do While ieApp.Busy: DoEvents: Loop
Do Until ieApp.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop
'get the table based on the table’s id
Set ieDoc = ieApp.Document
Set ieTable = ieDoc.all.Item("ctl00_ContentCentral_tcAcciones_tpMerval_grdMerval")
'copy the tables html to the clipboard and paste to teh sheet
If Not ieTable Is Nothing Then
Set clip = New DataObject
clip.SetText "" & ieTable.outerHTML & ""
clip.PutInClipboard
Sheet1.Select
Sheet1.Range("b2").Select
Sheet1.PasteSpecial "Unicode Text"
End If
'close 'er up
ieApp.Quit
Set ieApp = Nothing
End Sub
or this one
Public Sub PanelLider()
Dim oDom As Object: Set oDom = CreateObject("htmlFile")
Dim x As Long, y As Long
Dim oRow As Object, oCell As Object
Dim vData As Variant
Dim link As String
link = "http://www.merval.sba.com.ar/Vistas/Cotizaciones/Acciones.aspx"
y = 1: x = 1
With CreateObject("msxml2.xmlhttp")
.Open "GET", link, False
.Send
oDom.body.innerHTML = .ResponseText
End With
With oDom.getElementsByTagName("table")(27)
Dim dataObj As Object
Set dataObj = CreateObject("new:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
dataObj.SetText "<table>" & .innerHTML & "</table>"
dataObj.PutInClipboard
End With
Sheets(2).Paste Sheets(2).Cells(1, 1)
End Sub
Could someone help me to download the table "Panel General"?
Many thanks.
Selenium
The following gets the table using selenium basic.
Option Explicit
Public Sub GetTable()
Dim html As New HTMLDocument, htable As HTMLTable, headers()
headers = Array("Especie", "Hora Cotización", "Cierre Anterior", "Precio Apertura", "Precio Máximo", _
"Precio Mínimo", "Último Precio", "Variación Diaria", "Volumen Efectivo ($)", "Volumen Nominal", "Precio Prom. Pon")
With New ChromeDriver
.get "http://www.merval.sba.com.ar/Vistas/Cotizaciones/Acciones.aspx"
.FindElementById("__tab_ctl00_ContentCentral_tcAcciones_tpGeneral").Click
Do
DoEvents
Loop While .FindElementById("ctl00_ContentCentral_tcAcciones_tpGeneral_dgrGeneral", timeout:=7000).Text = vbNullString
html.body.innerHTML = .PageSource
Set htable = html.getElementById("ctl00_ContentCentral_tcAcciones_tpGeneral_dgrGeneral")
WriteTable2 htable, headers, 1, ActiveSheet
.Quit
End With
End Sub
Public Sub WriteTable2(ByVal htable As HTMLTable, ByRef headers As Variant, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tRow As Object, tCell As Object, tr As Object, td As Object, R As Long, c As Long, tBody As Object
R = startRow: c = 1
With ActiveSheet
Set tRow = htable.getElementsByTagName("tr")
For Each tr In tRow
Set tCell = tr.getElementsByTagName("td")
For Each td In tCell
.Cells(R, c).Value = td.innerText
c = c + 1
Next td
R = R + 1: c = 1
Next tr
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
End With
End Sub
References:
HTML Object Library
Selenium Type Library
With IE (Using WriteTable2 sub from above):
Option Explicit
Public Sub GetInfo()
Dim ie As New InternetExplorer, html As HTMLDocument, hTable As HTMLTable, headers(), a As Object
headers = Array("Especie", "Hora Cotización", "Cierre Anterior", "Precio Apertura", "Precio Máximo", _
"Precio Mínimo", "Último Precio", "Variación Diaria", "Volumen Efectivo ($)", "Volumen Nominal", "Precio Prom. Pon")
Application.ScreenUpdating = False
With ie
.Visible = True
.navigate "http://www.merval.sba.com.ar/Vistas/Cotizaciones/Acciones.aspx"
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("__tab_ctl00_ContentCentral_tcAcciones_tpGeneral").Click
Do
DoEvents
On Error Resume Next
Set hTable = .document.getElementById("ctl00_ContentCentral_tcAcciones_tpGeneral_dgrGeneral")
On Error GoTo 0
Loop While hTable Is Nothing
WriteTable2 hTable, headers, 1, ActiveSheet
.Quit '<== Remember to quit application
Application.ScreenUpdating = True
End With
End Sub
References:
Microsoft Internet Explorer Controls

How can I get the "tr id" from HTML code?

I want to use VBA for taking the URL adress of diferent links from a web page, but without success. Has anyone any idea why my code don't work?
My code is below:
Sub Test()
Dim URL As String
Dim IE As New InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim dictObj As Object: Set dictObj = CreateObject("Scripting.Dictionary")
Dim tRowID As String
URL = "http://www.flashscore.com/soccer/england/premier-league/";
With IE
.Navigate URL
.Visible = True Do Until
.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop Set HTMLdoc = .Document
End With
With HTMLdoc
Set tblSet = .getElementById("fs-results")
Set mTbl = tblSet.getElementsByTagName("tbody")(1)
Set tRows = mTbl.getElementsByTagName("tr")
With dictObj
For Each tRow In tRows
tRowID = Mid(tRow.getAttribute("id"), 5)
If Not .Exists(tRowID) Then .Add tRowID, Empty
End If
Next tRow
End With
End With
For Each Key In dictObj
Debug.Print Key
Next Key
Set IE = Nothing
End Sub