Extract data from a site that does query with Java script

Extract data from a site that does query with Java script - html

I want to get data from an Internet site.
I get
run-time error '91'
I modified earlier codes.
Sub DENEME()
Dim S As String
Dim html As HTMLDocument
Dim hTable As HTMLTable
Dim clipboard As Object
Set html = New HTMLDocument
With New XMLHTTP60
.Open "GET", "https://www.scorespro.com/basketball/results/date/2019-02-15", False
.setRequestHeader "content-type", "application/x-www-form-urlencoded; charset=UTF-8"
.send
S = .responseText
End With
html.body.innerHTML = S
Set hTable = html.querySelector(".matches-data")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
clipboard.SetText hTable.outerHTML
clipboard.PutInClipboard
Range("A1").PasteSpecial
End Sub

A few points to note.
Yes it is id not class for that element but that element is a div and copy pasting via clipboard the outer html will only paste the html of the div - not all tables within.
What you want is a list of tables so you need to change your selector to get the tables within that div
As using clipboard and there are merged output cells you need to find the last used row independent of column and add 1 to write out next table to a few row.
If unsure about your selector use the search bar in browser as shown [here].1
VBA:
Option Explicit
Public Sub Deneme()
Dim s As String, ws As Worksheet, tables As Object, i As Long
Dim html As HTMLDocument, clipboard As Object
Set html = New HTMLDocument
Set ws = ThisWorkbook.Worksheets("Sheet1")
Application.ScreenUpdating = False
With New XMLHTTP60
.Open "GET", "https://www.scorespro.com/basketball/results/date/2019-02-15", False
.setRequestHeader "content-type", "application/x-www-form-urlencoded; charset=UTF-8"
.send
s = .responseText
End With
html.body.innerHTML = s
Set tables = html.querySelectorAll("#matches-data table")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
For i = 0 To tables.Length - 1
clipboard.SetText tables.item(i).outerHTML
clipboard.PutInClipboard
ws.Range("A" & GetLastRow(ws) + 1).PasteSpecial
Next
Application.ScreenUpdating = True
End Sub
Public Function GetLastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
GetLastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function

Your selector string is set up so that it looks for the first element whose class is matches-data. However, matches-data is the id. As a result, hTable is being assigned Nothing, hence the error. Instead, try the following...
Set hTable = html.querySelector("#matches-data")

Related

Exploring the Instr VBA Function In Webscraping

I want to scrape this URL https://www.realtor.com/realestateandhomes-search/06510 using the VBA InStr function and extract all URLs with this substring "06510"
Here's is a sample code I've been trying to make work.
Option Explicit
Sub GetLinks()
'
'To use HTMLDocument you need to set a reference to Tools -> References -> Microsoft HTML Object Library
Dim HTML As New HTMLDocument
Dim http As Object
Dim links As Object
Dim link As HTMLHtmlElement
Dim counter As Long
Dim website As Range
Dim LastRange As Range
Dim row As Long
Dim continue As Boolean
Dim respHead As String
Dim lRow As Long
Application.ScreenUpdating = False
' The row where website addresses start
row = 30
continue = True
lRow = Cells(Rows.count, 1).End(xlUp).row + 1
' XMLHTTP gives errors where ServerXMLHTTP does not
' even when using the same URL's
'Set http = CreateObject("MSXML2.XMLHTTP")
Set http = CreateObject("MSXML2.ServerXMLHTTP.6.0")
Do While continue
' Could set this to first cell with URL then OFFSET columns to get next web site
Set website = Range("A" & row)
Set LastRange = Range("B" & lRow)
If Len(website.Value) < 1 Then
continue = False
Exit Sub
End If
If website Is Nothing Then
continue = False
End If
'Debug.Print website
With http
On Error Resume Next
.Open "GET", website.Value, False
.send
' If Err.Num is not 0 then an error occurred accessing the website
' This checks for badly formatted URL's. The website can still return an error
' which should be checked in .Status
'Debug.Print Err.Number
' Clear the row of any previous results
Range("B" & row & ":e" & row).Clear
' If the website sent a valid response to our request
If Err.Number = 0 Then
If .Status = 200 Then
HTML.body.innerHTML = http.responseText
Set links = HTML.getElementsByTagName("a")
For Each link In links
If InStr(link.outerHTML, "06510") Then
LastRange.Value = link.href
End If
Next
End If
Set website = Nothing
Else
'Debug.Print "Error loading page"
LastRange.Value = "Error with website address"
End If
On Error GoTo 0
End With
row = row + 1
Loop
Application.ScreenUpdating = True
End Sub
After inspecting the page, here's a sample of the kind of URL to extract - https://www.realtor.com/realestateandhomes-detail/239-Bradley-St_New-Haven_CT_06510_M36855-92189. Any help will be appreciated
Using QHarr's code in a simplified way...
Sub GetLinks()
Dim url As String, links_count As Integer
Dim j As Integer, row As Integer
Dim XMLHTTP As Object, html As Object
'Dim tr_coll As Object, tr As Object
'Dim elements As Object
Dim i As Long, allLinksOfInterest As Object
'Dim td_coll As Object, td As Object, td_col, objT
url = "https://www.realtor.com/realestateandhomes-search/06510"
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.responseText
Set allLinksOfInterest = html.querySelectorAll("[href*='06510']")
For i = 0 To allLinksOfInterest.Length - 1
Debug.Print allLinksOfInterest.Item(i).href
Next
End Sub
Please check if I'm missing anything. I'm still getting the error "Object doesn't support this property or method"

Don't use Instr on entire node outerHTML during a loop of all a tags. There are times when this is required but this shouldn't be one of them (hopefully).
You want to use attribute = value css selector with contains, *, operator. It is specifically for the purpose of matching on substrings in attribute values. This is more efficient.
Set allLinksOfInterest = html.querySelectorAll("[href*='06510']")
So,
Dim i As Long, allLinksOfInterest As Object
Set allLinksOfInterest = html.querySelectorAll("[href*='06510']")
For i = 0 To allLinksOfInterest.Length - 1
Debug.Print allLinksOfInterest.Item(i).href
Next
Attribute = value with contains operator:
[attr*=value]
Represents elements with an attribute name of attr whose
value contains at least one occurrence of value within the string.
VBA:
Produces 26 links currently.All are relative links so need domain added as shown in loop. Some are duplicates so consider adding to a dictionary as keys so as remove duplicates.
Option Explicit
Public Sub GetLinks()
Dim html As HTMLDocument
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.realtor.com/realestateandhomes-search/06510", False
.send
html.body.innerHTML = .responseText
End With
Dim i As Long, allLinksOfInterest As Object
Set allLinksOfInterest = html.querySelectorAll("[href*='06510']")
For i = 0 To allLinksOfInterest.Length - 1
Debug.Print Replace$(allLinksOfInterest.item(i).href,"about:","https://www.realtor.com")
Next
End Sub

If InStr(link.outerHTML, "06510") Then
In the code above, InStr function was used like boolean function. But it is not boolean, instead it returns integer. So, you should add comparison operator after function. May be like:
If InStr(link.outerHTML, "06510")>0 Then

Scraping a specific <p> class from Yahoo Finance (VBA, Excel)

I have been trying to extract specific information from a certain nested class from the code at the following location
https://finance.yahoo.com/quote/ASUR?p=ASUR
The class where in innertext is located is named "D(ib) Va(t)" and as far as I have seen at least this text is unique for the class name. I am using the following code to get the data.
Private Sub CommandButton1_Click()
Dim IE4 As Object
Dim strURL3 As String
Dim divs1 As Object
Dim symbol1 As String
Dim rowd As Integer
Dim divs2 As Object
'turn calculation off
Application.ScreenUpdating = False
Application.DisplayAlerts = False
Application.Calculation = xlCalculationManual
rowd = 1
'Sheets.Add After:=Sheets(ActiveWorkbook.Sheets.Count)
'Sheets(ActiveSheet.Name).Name = "Industry"
'Worksheets("Panel").Activate
'Range("B9").Select
Set IE4 = CreateObject("InternetExplorer.Application") 'Create only one IE instance
'Do Until ActiveCell.Value = "" 'Loop
'symbol1 = ActiveCell.Value
strURL3 = "https://finance.yahoo.com/quote/ASUR?p=ASUR"
IE4.Visible = True 'Flag to remove IE visibility
VBA.Shell "RunDll32.exe InetCpl.Cpl, ClearMyTracksByProcess 264", vbMinimizedNoFocus
VBA.Shell "RunDll32.exe InetCpl.Cpl, ClearMyTracksByProcess 258", vbMinimizedNoFocus
IE4.Navigate strURL3
While IE4.Busy: DoEvents: Wend 'Break of 4 seconds after loading
Application.Wait (Now + TimeValue("0:00:04"))
Set divs1 = IE4.Document.getelementsbytagname("div")
Worksheets("Industry").Activate
ActiveSheet.Cells("1,2").Select
For Each div In divs1
Set divs2 = IE4.Document.getelementsbytagname("p")
For Each p In divs2
If p.classname = "D(ib) Va(t)" Then
Debug.Print p.innertext
'Cells(rowd, 2) = p.innertext
'rowd = rowd + 1
End If
Next p
Next div
'Sheets("Panel").Select
' ActiveCell.Offset(1, 0).Select
'Loop
'Sheets("Panel").Select
'Range("B9").Select 'range that selects rows and columns to paste in every company sheet
'Range(Selection, Selection.End(xlDown)).Select
'Range(Selection, Selection.End(xlDown)).Select
'Range(Selection, Selection.End(xlDown)).Select
' Selection.Copy
' Sheets("Industry").Select
'Range("A1").Select
'ActiveSheet.Paste
' Application.CutCopyMode = False
IE4.Quit
'turn calculation on
Application.Calculation = xlCalculationAutomatic
Application.DisplayAlerts = True
Application.ScreenUpdating = True
End Sub
It does not capture the class, can anyone help me with this?

It's not a good practice to go for selecting compound classes as long as there is a way to avoid that. Check out the following implementation to achieve the same result:
Sub FetchText()
Const Link$ = "https://finance.yahoo.com/quote/ASUR?p=ASUR"
Dim Http As New XMLHTTP60, Htmldoc As New HTMLDocument
With Http
.Open "GET", Link, False
.send
Htmldoc.body.innerHTML = .responseText
End With
MsgBox Htmldoc.querySelector("p.businessSummary").PreviousSibling.LastChild.innerText
End Sub

The p tag element with that class includes the company sector, industry and employee info. You can use a faster method of xmlhttp to retrieve by avoiding opening a browser. Then use a css selector combination to target the element
Option Explicit
Public Sub GetInfo()
Const URL As String = "https://finance.yahoo.com/quote/ASUR?p=ASUR"
Dim html As HTMLDocument
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.Send
html.body.innerHTML = .responsetext
End With
Debug.Print html.querySelector("p.D\(ib\).Va\(t\)").innerText
End Sub
If you want to avoid compound classes you can use the following:
Debug.Print html.querySelector("p + .D\(ib\)").innerText
References (VBE > Tools > References):
Microsoft HTML Object Library

XMLHTTP60 Req not showing entire HTML Document

I am trying to obtain the HTML document from a website to what else, scrape for data!
Unfortunately, I cannot obtain the entire HTML Document associated with the web page. My debug.print statement doesn't show the entire web page as I would like, it gets cut off. I'm somewhat new to programming, help would be greatly appreciated!
My code is below:
Const SecForm4 As String = "https://www.secform4.com/significant-buys.htm"
Sub LoadWebPage()
Dim XMLReq As New MSXML2.XMLHTTP60
XMLReq.Open "GET", SecForm4, False
XMLReq.send
If XMLReq.Status <> 200 Or XMLReq.readyState <> 4 Then
MsgBox "Problem" & vbNewLine & XMLReq.Status & "-" & XMLReq.statusText
Exit Sub
End If
ParsingHTMLDocument XMLReq.responseText
End Sub
Sub ParsingHTMLDocument(HTMLText As String)
Dim HTMLDoc As New MSHTML.HTMLDocument
HTMLDoc.body.innerHTML = HTMLText
Debug.Print HTMLText
End Sub

The following works in terms of grabbing the document and the table is present. You are unlikely to be able to print the entire document to the immediate window as it has limitations on capacity. Instead you could write to a text file and inspect.
Change the filepath ,"C:\Users\User\Desktop\Test.txt", to one for you.
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, i As Long, html As New HTMLDocument, hTable As HTMLTable
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.secform4.com/significant-buys.htm", False
.Send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
WriteTxtFile sResponse
With html
.body.innerHTML = sResponse
Set hTable = .getElementById("filing_table")
MsgBox hTable.localName
End With
End Sub
Public Sub WriteTxtFile(ByVal aString As String, Optional ByVal filePath As String = "C:\Users\User\Desktop\Test.txt")
Dim fso As Object, Fileout As Object
Set fso = CreateObject("Scripting.FileSystemObject")
Set Fileout = fso.CreateTextFile(filePath, True, True)
Fileout.Write aString
Fileout.Close
End Sub
Reference to HTML Object Library required.

Web scraping html page with no tags as delimiter

I'm trying to import into a string array all lines of text in a web page. The URL is here: Vaticano-La Sacra Bibbia-Genesi-Cap.1.
Unfortunately (maybe a choice of the web designer), in the tag there aren't ID's or CLASS. All the rows are separated by 1 or more < BR > element. Start and end text is separated from a simple menu by 2 tag < HR >.
A clean extract of page code is here: jsfiddle.
I find a way to bring the text. And now what I do in VBA till now:
Note: objDoc is a Public variable coming from another module, fill with a .responseText without problems.
Public Sub ScriviXHTML(strBook As String, intNumCap As Integer)
Dim strDati2 As String
Dim TagBr As IHTMLElementCollection
Dim BrElement As IHTMLElement
Dim intElement As Integer
Dim objChild as Object
Dim strData, strTextCont, strNodeVal, strWholeText As String
Set objDoc2 = New HTMLDocument
Set objDoc2 = objDoc
Set objDoc = Nothing
'Put in variable string HTML code of the web page.
strDati2 = objDoc2.body.innerHTML
'Set in the variable object TAG type BR.
Set TagBr = objDoc2.body.getElementsByTagName("BR")
'Loop for all BRs in the page.
For Each BrElement In TagBr
'Here I try to get the NextSibling element of the <br>
' because seems contain the text I'm looking for.
Set objChild = BrElement.NextSibling
With objChild
' Here I try to put in the variables
strData = Trim("" & .Data & "")
strTextCont = Trim("" & .textContent & "")
strNodeVal = Trim("" & .NodeValue & "")
strWholeText = Trim("" & .wholeText & "")
End With
intElement = intElement + 1
Next BrElement
Two questions:
1) Is it, about you, the best way to achieve what I'm trying to do?
2) Sometimes the Element.NextSibling.Data doesn't exist, with an Error of runtime '438', so I manually move the point of sospension of the routine to by-pass the error. How can I intercept this error? [Please not with a simple On Error Resume Next!]... better: how can I use an If...Then... End If statement to check if in NextSibling exist the Data member?
Thanks at all.

Well you can get all the text as follows:
Public Sub GetInfo()
Dim sResponse As String, xhr As Object, html As New HTMLDocument
Set xhr = CreateObject("MSXML2.XMLHTTP")
With xhr
.Open "GET", "http://www.vatican.va/archive/ITA0001/__P1.HTM", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
html.body.innerHTML = sResponse
[A1] = Replace$(Replace$(regexRemove(html.body.innerHTML, "<([^>]+)>"), " ", Chr$(32)), Chr$(10), Chr$(32))
End With
End Sub
Public Function regexRemove(ByVal s As String, ByVal pattern As String) As String
Dim regex As Object
Set regex = CreateObject("VBScript.RegExp")
With regex
.Global = True
.MultiLine = True
.IgnoreCase = False
.pattern = pattern
End With
If regex.test(s) Then
regexRemove = regex.Replace(s, vbNullString)
Else
regexRemove = s
End If
End Function

VBA Excel pulling new webpage data after clicking on "submit"

I'm trying to pull some info from a website that provides oil well data by API number (API is a unique number for every well in the US)
Website: http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellapi1
API example: 1708300502
The issue is, when I get to the 2nd page, IE.document.getElementsByTagName("body")(0).innerText still returns data from the initial page. How do I fetch the updated page data?
The ultimate goal is to get to the 2nd page, click on "30570" via IE.document.getElementsByTagName("a")(0).Click and then read the final 3rd page. I just cannot figure out how to read the updated page :(
Option Explicit
Sub sonris_WellData()
Dim IE As InternetExplorer
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
Dim i As Integer
'Open SONRIS website
Application.StatusBar = "Opening Website"
IE.navigate "http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellapi1"
Do While IE.readyState <> 4: DoEvents: Loop
Application.Wait Now() + TimeValue("00:00:01")
Application.StatusBar = False
IE.document.forms(0).p_apinum.Value = "1708300502" 'plug-in API
IE.document.forms(0).submit
' Wait until the next page opens
Application.StatusBar = "Opening Website"
Do While IE.readyState <> 4: DoEvents: Loop
Application.Wait Now() + TimeValue("00:00:01")
Application.StatusBar = False
' Read the page - this is where the issue occurs, MsgBox keeps returning text from the very 1st page
MsgBox IE.document.getElementsByTagName("body")(0).innerText
IE.Quit
End Sub

This seems to be working. Rather than DoEvents use the WinAPI Sleep function. I also added a call to the Sleep function after the form submit.
MOre often we are seeing sites that are dynamically served by some javascript/etc., in these cases the browser may appear to be READYSTATE_COMPLETE or not Busy but the page has not yet rendered the "new" results.
Option Explicit
Public Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
Sub sonris_WellData()
Dim IE As Object 'InternetExplorer
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
Dim i As Integer
'Open SONRIS website
Application.StatusBar = "Opening Website"
IE.navigate "http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellapi1"
Do While IE.readyState <> 4
Sleep 1000
Loop
Application.StatusBar = False
IE.document.forms(0).p_apinum.Value = "1708300502" 'plug-in API
IE.document.forms(0).submit
Sleep 1000
' Wait until the next page opens
Application.StatusBar = "Opening Website"
Do While IE.readyState <> 4
Sleep 1000
Loop
Application.StatusBar = False
' Read the page - this is where the issue occurs, MsgBox keeps returning text from the very 1st page
MsgBox IE.document.getElementsByTagName("body")(0).innerText
IE.Quit
End Sub
You can experiment maybe with a slightly longer Sleep after the .submit.
Alternatively, I notice that after you submit, the URL changes, so you could also try changing the second waiting loop to:
Do While IE.LocationURL ="http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellapi1"
Sleep 1000
Loop
This should put the Excel.Application to wait until the URL has changed.
Alternatively, you may have better luck using an XMLHTTPRequest (there are many examples of this here on SO and elsewhere on the internet). This allows you to send a request just like the browser, without actually using a web browser. Then you can simply parse the return text as HTML or XML. I would use the Microsoft XML, v6.0 library reference for this.

POST requests:
① Entering the Well API number
I examined the web page making the selections you mention. I inspected the web traffic using fiddler and noticed that the initial request, when you submit the API number is handled by a POST request.
② POST request:
The POST body has the following parameter:
p_apinum is the key and the associated value is the original Well API number.
Using this info I formulated a POST request direct thus avoiding your first landing page.
③ Pressing the hyperlink:
Next, I noticed that the element you wanted to press:
Looking at the associated HTML it has an associated relative hyperlink:
I use a helper function to parse the page HTML to get this relative link and construct the absolute path: GetNextURL(page.body.innerHTML).
④ Making a new request:
I re-use my HTTPRequest function GetPage to send a second request, with an empty body, and grab all the tables from the HTML document returned via: page.getElementsByTagName("table").
⑤ Writing the tables to the Excel worksheet:
I loop all the tables on the page using helper function AddHeaders to write out the table headers, and WriteTables to write the current table to the sheet.
Example page content:
Example code output:
VBA:
Option Explicit
Public Sub GetWellInfo()
Dim ws As Worksheet, page As HTMLDocument, targetTable As HTMLTable, apiNumbers(), currNumber As Long
Const PARAM1 As String = "p_apinum"
Const BASESTRING As String = "http://sonlite.dnr.state.la.us/sundown/cart_prod/"
apiNumbers = Array(1708300502, 1708300503)
Application.ScreenUpdating = False
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ws
.Cells.ClearContents
For currNumber = LBound(apiNumbers) To UBound(apiNumbers)
Set page = GetPage(BASESTRING & "cart_con_wellapi2", apiNumbers(currNumber), PARAM1)
Set page = GetPage(BASESTRING & GetNextURL(page.body.innerHTML))
Dim allTables As Object
Set allTables = page.getElementsByTagName("table")
For Each targetTable In allTables
AddHeaders targetTable, GetLastRow(ws, 1) + 2, ws
WriteTables targetTable, GetLastRow(ws, 1), ws
Next targetTable
Next currNumber
End With
Application.ScreenUpdating = True
End Sub
Public Function GetPage(ByVal url As String, Optional ByVal apiNumber As Long, Optional ByVal paramN As String = vbNullString) As HTMLDocument
Dim objHTTP As Object, html As New HTMLDocument
Set objHTTP = CreateObject("WinHttp.WinHttpRequest.5.1")
Dim sBody As String
If Not paramN = vbNullString Then sBody = paramN & "=" & apiNumber
With objHTTP
.SetTimeouts 10000, 10000, 10000, 10000
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
On Error Resume Next
.send (sBody)
If Err.Number = 0 Then
If .Status = "200" Then
html.body.innerHTML = .responseText
Set GetPage = html
Else
Debug.Print "HTTP " & .Status & " " & .statusText
Exit Function
End If
Else
Debug.Print "Error " & Err.Number & " " & Err.Source & " " & Err.Description
Exit Function
End If
On Error GoTo 0
End With
End Function
Public Function GetNextURL(ByVal inputString As String)
GetNextURL = Replace$(Replace$(Split(Split(inputString, "href=")(1), ">")(0), Chr$(34), vbNullString), "about:", vbNullString)
End Function
Public Sub AddHeaders(ByVal hTable As Object, ByVal startRow As Long, ByVal ws As Worksheet)
Dim headers As Object, header As Object, columnCounter As Long
Set headers = hTable.getElementsByTagName("th")
For Each header In headers
columnCounter = columnCounter + 1
ws.Cells(startRow, columnCounter) = header.innerText
Next header
End Sub
Public Sub WriteTables(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByRef ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
r = startRow
With ActiveSheet
Set tRow = hTable.getElementsByTagName("tr")
For Each tr In tRow
Set tCell = tr.getElementsByTagName("td")
For Each td In tCell
.Cells(r, c).Value = td.innerText
c = c + 1
Next td
r = r + 1: c = 1
Next tr
End With
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With
End Function
References:
VBE > Tools > References > HTML Object Library.

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

Extract data from a site that does query with Java script - html

Your selector string is set up so that it looks for the first element whose class is matches-data. However, matches-data is the id. As a result, hTable is being assigned Nothing, hence the error. Instead, try the following... Set hTable = html.querySelector("#matches-data")

Related

Exploring the Instr VBA Function In Webscraping

Scraping a specific <p> class from Yahoo Finance (VBA, Excel)

XMLHTTP60 Req not showing entire HTML Document

Web scraping html page with no tags as delimiter

VBA Excel pulling new webpage data after clicking on "submit"

Categories

Resources