I want to use VBA for taking the URL adress of diferent links from a web page, but without success. Has anyone any idea why my code don't work?
My code is below:
Sub Test()
Dim URL As String
Dim IE As New InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim dictObj As Object: Set dictObj = CreateObject("Scripting.Dictionary")
Dim tRowID As String
URL = "http://www.flashscore.com/soccer/england/premier-league/";
With IE
.Navigate URL
.Visible = True Do Until
.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop Set HTMLdoc = .Document
End With
With HTMLdoc
Set tblSet = .getElementById("fs-results")
Set mTbl = tblSet.getElementsByTagName("tbody")(1)
Set tRows = mTbl.getElementsByTagName("tr")
With dictObj
For Each tRow In tRows
tRowID = Mid(tRow.getAttribute("id"), 5)
If Not .Exists(tRowID) Then .Add tRowID, Empty
End If
Next tRow
End With
End With
For Each Key In dictObj
Debug.Print Key
Next Key
Set IE = Nothing
End Sub
Related
WHAT I ALREADY HAVE
I use the following for extracting data from an HTML file.
This example lists all Table Rows within an HTML file
Dim IE As InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim TRelements As IHTMLElementCollection
Dim TRelement As HTMLTableCell
Dim r As Long
Set IE = New InternetExplorer
With IE
.Navigate filePath
While .Busy Or .ReadyState <> READYSTATE_COMPLETE: DoEvents: Wend
Set HTMLdoc = .Document
End With
Set TRelements = HTMLdoc.getElementsByTagName("TR")
This allows me to pinpoint data in the following way (5th row, 1st cell), Example:
A = TRelements.Item(5).ChildNodes.Item(1).innerText
WHAT I AM LOOKING FOR
I'd like to insert a new cell (TD-element) at the beginning of a row (TR-element)
DESIRED OUTCOME
NAME
SURNAME
Walter
White
New TD-element for DOB
DOB
NAME
SURNAME
09-07-58
Walter
White
In the example below I use Element.insertAdjacentHTML() and Element.insertAdjacentElement() to insert the new cells.
Sub Example()
Dim IE As InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim TRelements As IHTMLElementCollection
Dim TRelement As HTMLTableCell
Dim r As Long
Set IE = New InternetExplorer
With IE
.navigate filePath
While .Busy Or .readyState <> READYSTATE_COMPLETE: DoEvents: Wend
Set HTMLdoc = .document
End With
Set TRelements = HTMLdoc.getElementsByTagName("TR")
Dim TR As HTMLTableRow
Set TR = TRelements(0)
TR.insertAdjacentHTML "afterbegin", "<TH>DOB</TH>"
Dim TD As HTMLTableCell
Set TD = HTMLdoc.createElement("TD")
Set TR = TRelements(1)
TD.innerText = "09-07-58"
TR.insertAdjacentElement "afterbegin", TD
IE.Visible = True
End Sub
This subroutine will update the original file.
Sub OverWriteHTMLDocument(Document As HTMLDocument, FilePath As String)
Rem VBA OpenTextFile: https://analystcave.com/vba-filesystemobject-fso-in-excel/vba-opentextfile/
Const ForReading = 1, ForWriting = 2, ForAppending = 8 'Need to define constants manually
Const TristateUseDefault = -2, TristateTrue = -1, TristateFalse = 0 'Need to define constants manually
Dim Url As String
Url = Replace(Document.Url, "file://", "", , , vbTextCompare)
With CreateObject("Scripting.FileSystemObject").OpenTextFile(Url, ForWriting, True, TristateFalse)
.WriteLine Document.DocumentElement.outerHTML
.Close
End With
End Sub
Edit: Thank your DearDeer for the solution
'GRV website copy and collect hyperlink
Sub Get_HyperLink1()
Dim ie As InternetExplorer
Application.ScreenUpdating = False
Set ie = New InternetExplorer
ie.Visible = True
Set ws = ThisWorkbook.Worksheets("Sheet1")
Dim http As New XMLHTTP60, html As New HTMLDocument
Dim nodeRaceResultsTable As HTMLHtmlElement
Dim nodeTr As HTMLHtmlElement
Dim nodeDiv As HTMLHtmlElement
Dim Element1 As HTMLHtmlElement
Dim node1 As HTMLHtmlElement
Dim currentUrl As String
With ie
ie.Visible = True
The website below is where I want the VBA to navigate
ie.Navigate "https://fasttrack.grv.org.au/Meeting/Search?MeetingDateFrom=22%2F04%2F2020&MeetingDateTo=22%2F04%2F2020&Status=&TimeSlot=&DayOfWeek=&DisplayAdvertisedEvents=false&AllTracks=True&SelectedTracks=AllTracks&searchbutton=Search"
Do Until .readyState = 4: DoEvents: Loop
End With
I'm trying to get the hyperlinks with the VBA elements below
For Each nodeRaceResultsTable In html.getElementsByClassName("search-results")
For Each nodeTr In nodeRaceResultsTable.getElementsByTagName("tr")
With nodeTr.getElementsByTagName("td")
The part below is where I want this VBA to grab the hyperlink and print it on the excel sheet
ws.Cells(5, 5) = .Item(1).getElementsByTagName("a")(0).href
End With
Next
Next
ie.Quit
Set ie = Nothing
Application.StatusBar = ""
Application.ScreenUpdating = True
End Sub
Try something like this:
'GRV website copy and collect hyperlink
Sub Get_HyperLink1()
Dim IE As New InternetExplorer
With IE
.Visible = True
.navigate "https://fasttrack.grv.org.au/Meeting/Search?MeetingDateFrom=22%2F04%2F2020&MeetingDateTo=22%2F04%2F2020&Status=&TimeSlot=&DayOfWeek=&DisplayAdvertisedEvents=false&AllTracks=True&SelectedTracks=AllTracks&searchbutton=Search"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim aTag As Object, i As Long
Set aTag = IE.document.querySelectorAll(".search-results [href]")
For i = 0 To aTag.Length - 1
ActiveSheet.Cells(i + 1, 1) = aTag.Item(i)
Next i
IE.Quit
End With
End Sub
I am testing the code below. I think this is very close, but I can't seem to login to the site for some reason.
Sub Website_Login_Test()
Dim oHTML_Element As IHTMLElement
Dim sURL As String
On Error GoTo Err_Clear
sURL = "https://login.my_site_here.jsp?"
Set oBrowser = New InternetExplorer
oBrowser.Silent = True
oBrowser.timeout = 60
oBrowser.navigate sURL
oBrowser.Visible = True
Do
' Wait till the Browser is loaded
Loop Until oBrowser.readyState = READYSTATE_COMPLETE
Set HTMLDoc = oBrowser.document
For Each oHTML_Element In HTMLDoc
Debug.Print oHTML_Element
Next
HTMLDoc.all.UserId.Value = "my_id"
HTMLDoc.all.Password.Value = "my_pass"
For Each oHTML_Element In HTMLDoc.getElementsByTagName("Button")
Debug.Print oHTML_Element.Name
'oHTML_Element.Click: Exit For
'Debug.Print oHTML_Element.Type
'If oHTML_Element.Type = "submit" Then oHTML_Element.Click: Exit For
Next
' oBrowser.Refresh ' Refresh If Needed
Err_Clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
Call Test
End Sub
Sub Test()
Dim ie As Object, i As Long, strText As String
Dim doc As Object, hTable As Object, hBody As Object, hTR As Object, hTD As Object
Dim tb As Object, bb As Object, tr As Object, td As Object
Dim y As Long, z As Long, wb As Excel.Workbook, ws As Excel.Worksheet
Set wb = Excel.ActiveWorkbook
Set ws = wb.ActiveSheet
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
y = 1 'Column A in Excel
z = 1 'Row 1 in Excel
ie.navigate "https://after_login_move_to_page_for_scraping.jsp"
Do While ie.busy: DoEvents: Loop
Do While ie.readyState <> 4: DoEvents: Loop
Set doc = ie.document
Set hTable = doc.getElementsByTagName("table")
For Each tb In hTable
Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each tr In hTR
Set hTD = tr.getElementsByTagName("td")
y = 1 ' Resets back to column A
For Each td In hTD
ws.Cells(z, y).Value = td.innertext
y = y + 1
Next td
DoEvents
z = z + 1
Next tr
Exit For
Next bb
Exit For
Next tb
End Sub
I can't login to the site, so I can't do the scraping, but I think the code is pretty close. Here is the HTML for the id object, the password object and the button object. What am I doing wrong?
I think you must trigger the keypress event of the input fields. If there are other events you must trigger, have a look here, how you can find them:
Automate IE via Excel to fill in a dropdown and continue
Sub WebsiteLogin()
Const url As String = "https://login.my_site_here.jsp"
Const userName As String = "Here Your LogIn Name"
Const passWord As String = "Here Your Password"
Dim ie As Object
Dim htmlDoc As Object
Dim nodeInputUserName As Object
Dim nodeInputPassWord As Object
'Initialize Internet Explorer, set visibility,
'call URL and wait until page is fully loaded
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
ie.navigate url
Do Until ie.readyState = 4: DoEvents: Loop
Set htmlDoc = ie.document
'Set the log in name
Set nodeInputUserName = htmlDoc.getElementById("USERID")
nodeInputUserName.Value = userName
Call TriggerEvent(htmlDoc, nodeInputUserName, "onkeypress")
'Set the password
Set nodeInputPassWord = htmlDoc.getElementById("PASSWORD")
nodeInputPassWord.Value = passWord
Call TriggerEvent(htmlDoc, nodeInputPassWord, "onkeypress")
'Click submit button
htmlDoc.querySelector("a[role='button']").Click
End Sub
This is the procedure to trigger events:
Private Sub TriggerEvent(htmlDocument As Object, htmlElementWithEvent As Object, eventType As String)
Dim theEvent As Object
htmlElementWithEvent.Focus
Set theEvent = htmlDocument.createEvent("HTMLEvents")
theEvent.initEvent eventType, True, False
htmlElementWithEvent.dispatchEvent theEvent
End Sub
I am pulling data from NSE site,
the URL is:https://www1.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=VOLTAS&instrument=FUTSTK&type=-&strike=-&expiry=28MAY2020#
I am successfully extract the item using Internet explorer,How ever this method is slow,
so i moved to MSXML2.XMLHTTP60 method,but this method returns null string
please find my codes
Method 1:Works fine
Sub OI_Slow_Method()
Dim ie As New InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
Dim Link As String
Link = ActiveSheet.Range("C4").Value
ie.Visible = False
ie.navigate Link
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim doc As HTMLDocument
Set doc = ie.document
Dim objElement As HTMLObjectElement
Dim sDD As String
doc.Focus
ActiveSheet.Cells(1, 1).Value = doc.getElementById("openInterest").innerText 'Open Interest Value
ie.Quit
ie.Visible = True
Set doc = Nothing
Set ie = Nothing
End Sub
'--------------------------
Method 2:Help required in this method only
Sub OI_Fast_Method()
Dim xhr As MSXML2.XMLHTTP60, html As MSHTML.HTMLDocument
Set xhr = New MSXML2.XMLHTTP60
Set html = New MSHTML.HTMLDocument
With xhr
.Open "GET", "https://www1.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=VOLTAS&instrument=FUTSTK&type=-&strike=-&expiry=30APR2020#", False
.send
html.body.innerHTML = StrConv(.responseBody, vbUnicode)
End With
Debug.Print html.getElementById("openInterest").Innertext
'The output of this is "<SPAN id=openInterest>??</SPAN>" only question mark returned inside the SPAN
End Sub
I think Tim hit the nail on the head, as always. You are getting some raw XML and the stuff you want is not in that XML. You can do a data dump and get what you want.
Sub DumpData()
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
URL = "https://www1.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=VOLTAS&instrument=FUTSTK&type=-&strike=-&expiry=28MAY2020#"
'Wait for site to fully load
ie.Navigate2 URL
Do While ie.Busy = True
DoEvents
Loop
RowCount = 1
With Sheets("Sheet1")
.Cells.ClearContents
RowCount = 1
For Each itm In ie.Document.all
.Range("B" & RowCount) = Left(itm.innerText, 1024)
RowCount = RowCount + 1
Next itm
End With
End Sub
Then you would have to parse the text. It's not hard, but it will be a little extra labor.
Another option may be to download the entire contents of the website, save it as a text file, import the data, and then parse that data.
Sub Sample()
Dim ie As Object
Dim retStr As String
Set ie = CreateObject("internetexplorer.application")
With ie
.Navigate "https://www1.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=VOLTAS&instrument=FUTSTK&type=-&strike=-&expiry=28MAY2020#"
.Visible = True
End With
Do While ie.readystate <> 4: Wait 5: Loop
DoEvents
retStr = ie.document.body.innerText
'~> Write the above to a text file
Dim filesize As Integer
Dim FlName As String
'~~> Change this to the relevant path
FlName = "C:\Users\ryans\OneDrive\Desktop\Sample.Txt"
filesize = FreeFile()
Open FlName For Output As #filesize
Print #filesize, retStr
Close #filesize
End Sub
Private Sub Wait(ByVal nSec As Long)
nSec = nSec + Timer
While nSec > Timer
DoEvents
Wend
End Sub
I couldn't get either of your code samples to run on my machine.
I have created Macro which can read all the HTML of provided URL, however I want to fetch all the url from that HTML.
Sub GetHTML_Click()
Dim ie As InternetExplorer
Dim html As HTMLDocument
Dim j As Integer
Set ie = New InternetExplorer
ie.Visible = True
url = Cells(1, 2)
ie.navigate url
Do While ie.READYSTATE <> READYSTATE_COMPLETE
Application.StatusBar = "Trying to go to website ..."
Loop
Application.StatusBar = " "
Set html = ie.document
'Dim htmltext As Collection
Dim htmltext As String
htmltext = html.DocumentElement.innerHTML
'Need to add iURL
Dim htmlurl As IHTMLElement
For Each htmlurl In hmtltext
iurl = htmlurl.toString
Cells(j, 1).Value = CLng(iurl)
j = j + 1
Next
End Sub
I tried to code this to fetch the URLs however its giving "Object Required error"
can anyone please help to modify this macro which will help me to fetch all the URL from HTML page.
I am using www.mini.in website for testing.
Mayur.
Try this :
Dim ie As Object
Dim html As Object
Dim j As Integer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
URL = "google.com"
ie.Navigate URL
Do While ie.ReadyState <> READYSTATE_COMPLETE
Application.StatusBar = "Trying to go to website ..."
Loop
Application.StatusBar = " "
Set html = ie.Document
'Dim htmltext As Collection
Dim htmlElements As Object
Dim htmlElement As Object
Set htmlElements = html.getElementsByTagName("*")
For Each htmlElement In htmlElements
If htmlElement.getAttribute("href") <> "" Then Debug.Print htmlElement.getAttribute("href")
Next