Data Scraping Elements By ClassName - html

I am trying to pull data from a web-site, I want to copy the '10' x 5'unit (class name is "unit_size medium") ' in row 1 for which I am able to copy data successfully but I also want promo (Class name is "promo_offers") '1st Month Free!' in row 2, the problem is this promo is given for specific cells only. hence the data is misleading and I am getting promo in 1st 4 cells and then getting error. However, I want to copy promo for only those units where promo information is given else the cell should be blank or any other value needs to be set. Below is the code...
Please suggest how to frame the code.
Sub GetClassNames()
Dim html As HTMLDocument
Dim objIE As Object
Dim element As IHTMLElement
Dim ie As InternetExplorer
Dim elements As IHTMLElementCollection
Dim result As String 'string variable that will hold our result link
Dim count As Long
Dim erow As Long
'initiating a new instance of Internet Explorer and asigning it to objIE
Set objIE = New InternetExplorer
'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True
'navigate IE to this web page (a pretty neat search engine really)
objIE.navigate "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
count = 0
Set html = objIE.document
Set elements = html.getElementsByClassName("unit_size medium")
For Each element In elements
If element.className = "unit_size medium" Then
erow = Sheet2.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
Cells(erow, 1) = html.getElementsByClassName("unit_size medium")(count).innerText
Cells(erow, 2) = html.getElementsByClassName("promo_offers")(count).innerText
count = count + 1
End If
Next element
End Sub

I would simply wrap in an On Error Resume Next when attempting to access the element. Have a place already reserved for it in an output array so if not present the place remains empty.
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
Dim ie As New InternetExplorer, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ie
.Visible = True
.Navigate2 "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long
headers = Array("size", "features", "promo", "in store", "web")
Set listings = .document.getElementById("small_units_accordion_panel").getElementsByTagName("li")
'.unit_size medium, .features, .promo_offers, .board_rate_wrapper p, .board_rate
ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
For Each listing In listings
r = r + 1
On Error Resume Next
results(r, 1) = listing.getElementsByClassName("unit_size medium")(0).innerText
results(r, 2) = listing.getElementsByClassName("features")(0).innerText
results(r, 3) = listing.getElementsByClassName("promo_offers")(0).innerText
results(r, 4) = listing.getElementsByClassName("board_rate")(0).innerText
results(r, 5) = listing.getElementsByClassName("price")(0).innerText
On Error GoTo 0
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit
End With
End Sub
All boxes:
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
Dim ie As New InternetExplorer, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ie
.Visible = True
.Navigate2 "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, listing As Object, headers(), results()
Dim r As Long, list As Object, item As Object
headers = Array("size", "features", "promo", "in store", "web")
Set list = .document.getElementsByClassName("main_unit")
'.unit_size medium, .features, .promo_offers, .board_rate_wrapper p, .board_rate
Dim rowCount As Long
rowCount = .document.querySelectorAll(".main_unit li").Length
ReDim results(1 To rowCount, 1 To UBound(headers) + 1)
For Each listing In list
For Each item In listing.getElementsByTagName("li")
r = r + 1
On Error Resume Next
results(r, 1) = item.getElementsByClassName("unit_size medium")(0).innerText
results(r, 2) = item.getElementsByClassName("features")(0).innerText
results(r, 3) = item.getElementsByClassName("promo_offers")(0).innerText
results(r, 4) = item.getElementsByClassName("board_rate")(0).innerText
results(r, 5) = item.getElementsByClassName("price")(0).innerText
On Error GoTo 0
Next
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit
End With
End Sub

Related

VBA: Web scraping with <ul and <li and <div and <span

I am using VBA to extract the data from HTML in <span code which is under <Div which is under <li which is under <ul
I'm trying to extract the "date and matter" from HTML. "Date" should be in A column and "Matter" should be in B Column in Excel.
The drawback of my code is, it is pulling all the Date and matter into single cell.
Sub GetDat()
Dim IE As New InternetExplorer, html As HTMLDocument
Dim elem As Object, data As String
With IE
.Visible = True
.navigate "https://www.MyURL/sc/wo/Worders/index?id=76888564"
Do While .readyState <> READYSTATE_COMPLETE: Loop
Set html = .document
End With
data = ""
For Each elem In html.getElementsByClassName("simple-list")(0).getElementsByTagName("li")
data = data & " " & elem.innerText
Next elem
Range("A1").Value = data
IE.Quit
End Sub
The output that I need is shown in the image:
HTML:
You could grab two nodeLists, one for dates and one for matters, and then loop those writing out to sheet. Match dates based on data-bind attribute value; matters on classname:
Dim dates As Object, matters As Object, i As Long, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set dates = ie.document.querySelectorAll("[data-bind^='text:createdDate']") '.wo-notes-col-1 [data-bind^='text:createdDate']
Set matters = ie.document.querySelectorAll(".wo-notes")
With ws
For i = 0 To dates.Length - 1
.Cells(i + 1, 1) = dates.Item(i).innertext
.Cells(i + 1, 2) = matters.Item(i).innertext
Next
End With
Example reading values from column C:
Option Explicit
Public Sub GetMatters()
Dim ws As Worksheet, lastRow As Long, urls(), results(), ie As SHDocVw.InternetExplorer, r As Long
Set ie = New SHDocVw.InternetExplorer
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.Rows.Count, "C").End(xlUp).Row
urls = Application.Transpose(ws.Range("C2:C" & lastRow).Value)
ReDim results(1 To 1000, 1 To 2)
With ie
.Visible = True
For i = LBound(urls) To UBound(urls)
.navigate2 "https://www.MyURL/sc/wo/Worders/index?id=" & urls(i)
While .Busy Or .readyState <> 4: DoEvents: Wend
Dim dates As Object, matters As Object, i As Long
Set dates = .document.querySelectorAll("[data-bind^='text:createdDate']") '.wo-notes-col-1 [data-bind^='text:createdDate']
Set matters = .document.querySelectorAll(".wo-notes")
For i = 0 To dates.Length - 1
r = r + 1
results(r, 1) = dates.Item(i).innertext
results(r, 2) = matters.Item(i).innertext
Next
Set dates = Nothing: Set matter = dates
Next
.Quit
End With
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
References:
document.querySelectorAll
css selectors

How to extract data from HTML divs into Excel

I am trying to extract the details in this webpage and they seem to be under certain "divs" with "selection-left" and "selection-right" right. I haven't found a way to successfully pull it yet.
This is the URL - https://sports.ladbrokes.com/en-af/betting/golf/golf-all-golf/us-masters/2020-us-masters/228648232/
And here is an image of what I want to extract. I want to copy the competition name and each participant and score.
I have tried using QHar's approach in this link - How to extract values from nested divs using VBA. But I'm getting errors along this line -
ReDim results(1 To countries.Length / 2, 1 To 4)
Here is the code I've been trying to make work
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As
Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://sports.ladbrokes.com/en-af/betting/golf/golf-all-golf/us-masters/2020-us-masters/228648232/", False
.send
html.body.innerHTML = .responseText
End With
Set participant = html.querySelectorAll(".market-content .selection-left"): Set scores = html.querySelectorAll("..market-content .selection-right")
ReDim results(1 To countries.Length / 2, 1 To 4)
For i = 0 To participant.Length - 1 Step 2
results(r, 1) = participant.item(i).innerText: results(r, 2) = "'" & scores.item(i).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 4) = Array("Competition", "Participant", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
I will need help to make this code work
Content is dynamically added so will not be present in your current request format; hence your error as you have a nodeList of Length 0. You could try making POST requests as the page does but it doesn't look like a quick and easy bit of coding. I would go with browser automation, if this is a small project, so that js can run on the page and you can click the show more button. You will need a wait condition for the page to have properly loaded. I use the presence of the show more button.
Option Explicit
Public Sub GetOddsIE()
Dim d As InternetExplorer, odds As Object, names As Object, i As Long
Dim ws As Worksheet, results(), competition As String
Set d = New InternetExplorer
Set ws = ThisWorkbook.Worksheets("Sheet1")
Const URL = "https://sports.ladbrokes.com/en-af/betting/golf/golf-all-golf/us-masters/2020-us-masters/228648232/"
With d
.Visible = False
.Navigate2 URL
While .Busy Or .ReadyState <> 4: DoEvents: Wend
With .Document.getElementsByClassName("expandable-below-container-button")
Do
DoEvents
Loop While .Length = 0 'wait for element to be present
.Item(0).Click 'click on show more
End With
Set names = .Document.getElementsByClassName("selection-left-selection-name")
Set odds = .Document.getElementsByClassName("odds-convert")
competition = .Document.getElementsByClassName("league")(0).innerText
ReDim results(1 To names.Length, 1 To 3)
For i = 0 To names.Length - 1
results(i + 1, 1) = competition
results(i + 1, 2) = names.Item(i).innerText
results(i + 1, 3) = "'" & odds.Item(i).innerText
Next
.Quit
End With
ws.Cells(1, 1).Resize(1, 3) = Array("Competition", "Participant", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

Scraping web with url in milliseconds Unix

The web in which I browse has several pages. I want to click on those elements or play with urls and thus be able to copy the data. The initial URL ends at = 1 & playerType = ALL & ts = 1558502019375, in my code there is a loop that is supposed to go page after page to get the data but I can not get it done.
Sub UPDATE_DATA_MLB()
Application.ScreenUpdating = False
'dimension (set aside memory for) our variables
Dim objIE As InternetExplorer
Dim ele As Object
Dim y As Integer
Dim EstaPagina As Byte
Dim EstaURL As String
'Página inicial
EstaPagina = 1
'we will output data to excel, starting on row 1
y = 1
EstaURL = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&click_text=Sortable+Player+hitting&game_type='R'&season=2018&season_type=ANY&league_code='MLB'&sectionType=sp&statType=hitting&page=1&playerType=ALL&ts=1558502019375" '&ts=1526432697176"
'start a new browser instance
Set objIE = New InternetExplorer
'make browser visible
objIE.Visible = False
Do Until EstaPagina = 255
'navigate to page with needed data
objIE.navigate EstaURL & EstaPagina
'wait for page to load
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
'If UCase(Left(EstaURL, 211) & "1") = UCase(Left(objIE.LocationURL, (211 + Len(EstaPagina)))) And y > 1 Then Exit Do
'look at all the 'tr' elements in the 'table' with id 'myTable',
'and evaluate each, one at a time, using 'ele' variable
For Each ele In objIE.document.getElementById("datagrid").getElementsByTagName("tr")
'show the text content of 'tr' element being looked at
'Debug.Print ele.textContent
'each 'tr' (table row) element contains 4 children ('td') elements
'put text of 1st 'td' in col A
Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
'put text of 2nd 'td' in col B
Sheets("Sheet1").Range("B" & y).Value = ele.Children(1).textContent
'put text of 3rd 'td' in col C
Sheets("Sheet1").Range("C" & y).Value = ele.Children(2).textContent
'put text of 4th 'td' in col D
Sheets("Sheet1").Range("D" & y).Value = ele.Children(5).textContent
'put text of 4th 'td' in col f
Sheets("Sheet1").Range("E" & y).Value = ele.Children(22).textContent
'increment row counter by 1
y = y + 1
Next
EstaPagina = EstaPagina + 1
Loop
lobjIE.Quit
Set objIE = Nothing
Set ele = Nothing
Range("A1").Select
Range(Selection, Selection.End(xlToRight)).Select
Range(Selection, Selection.End(xlDown)).Select
Selection.RemoveDuplicates Columns:=Array(1, 2, 3, 4, 5), _
Header:=xlNo
Application.ScreenUpdating = True
MsgBox "Volcado terminado", vbInformation
Range("A1").Select
'save the Excel workbook
ActiveWorkbook.Save
End Sub ```
XMLHTTP:
The page does ajax calls to get json which it uses to update the content for each page. It does this using query string parameters, one of which is the records per page (default 50). You can view this activity in the browser network tab via dev tools F12
You can avoid using a browser and issue xmlhttp requests to the same endpoint, altering the query string parameters to get all the results. It seems, in my testing, you cannot request all the results in one go. Instead, I issue a request for 1000 results, then check if there are any additional results to obtain and issue further requests, in batches of 1000, to get them.
I am using jsonconverter.bas to parse the json. I extract the info from the json and load into an array, results, so I can write all results out to the sheet in one go - much more efficient this way as reduces i/o with sheet.
After copying the code from the link above into a module called jsonConverter, you need to go 'VBE > Tools > References > Add a reference to Microsoft Scripting Runtime
TODO:
Add error handling for failed request
VBA:
Option Explicit
Public Sub GetResults()
'VBE > Tools > References > Microsoft Scripting Runtime
Dim ws As Worksheet, results(), i As Long, totalResults As Long
Dim headers(), columnCount As Long, pageNumber As Long
Dim numberOfPages As Long, resultsPerPage As Long, json As Object
resultsPerPage = 1000
pageNumber = 1
Set ws = ThisWorkbook.Worksheets("Sheet1")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://mlb.mlb.com/pubajax/wf/flow/stats.splayer?season=2018&sort_order=%27desc%27&sort_column=%27avg%27&stat_type=hitting&page_type=SortablePlayer&game_type=%27R%27&player_pool=ALL&season_type=ANY&sport_code=%27mlb%27&results=" & resultsPerPage & "&recSP=" & pageNumber & "&recPP=" & resultsPerPage, False
.send
Set json = JsonConverter.ParseJson(.responseText)
totalResults = json("stats_sortable_player")("queryResults")("totalSize")
headers = json("stats_sortable_player")("queryResults")("row").item(1).keys
numberOfPages = json("stats_sortable_player")("queryResults")("totalP")
columnCount = UBound(headers) + 1
ReDim results(1 To totalResults, 1 To columnCount)
Dim r As Long, c As Long, dict As Object, key As Variant
For pageNumber = 1 To numberOfPages
If pageNumber > 1 Then
.Open "GET", "http://mlb.mlb.com/pubajax/wf/flow/stats.splayer?season=2018&sort_order=%27desc%27&sort_column=%27avg%27&stat_type=hitting&page_type=SortablePlayer&game_type=%27R%27&player_pool=ALL&season_type=ANY&sport_code=%27mlb%27&results=" & resultsPerPage & "&recSP=" & pageNumber & "&recPP=" & resultsPerPage, False
.send
Set json = JsonConverter.ParseJson(.responseText)
End If
For Each dict In json("stats_sortable_player")("queryResults")("row")
r = r + 1: c = 1
For Each key In dict.keys
results(r, c) = dict(key)
c = c + 1
Next
Next
Next
End With
With ws
.Cells(1, 1).Resize(1, columnCount) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Sample of output ( appreciate can't really read as is - but idea of layout):
image link: https://i.stack.imgur.com/jiDTP.png
Internet Explorer:
If you want to use slower browser solution you can concatenate the page number into the url and loop to cover all pages. The number of pages can be extracted from the pagination on page 1.
You can see how to write the tables out below each other by looking at this answer. Change the lines with GetLastRow(ws, 1) + 2 to GetLastRow(ws, 1) + 1
Option Explicit
'VBE > Tools > References: Microsoft Internet Controls
Public Sub GetData()
Dim ie As New InternetExplorer, numberOfPages As Long
Dim url As String, i As Long
Const PLAYERS_PER_PAGE = 50
url = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&click_text=Sortable+Player+hitting&game_type='R'&season=2018&season_type=ANY&league_code='MLB'&sectionType=sp&statType=hitting&page=1&playerType=ALL&ts="
With ie
.Visible = True
.Navigate2 url
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
numberOfPages = CLng(.querySelector(".paginationWidget-last").innerText)
'do something with page 1
If numberOfPages > 1 Then
For i = 2 To numberOfPages
ie.Navigate2 Replace$(url, "page=1", "page=" & CStr(i))
While ie.Busy Or ie.readyState < 4: DoEvents: Wend
'do something with other pages
Next
Stop 'delete me later
End If
End With
.Quit
End With
End Sub
I have managed to complement his second code with mine, although I have not been very professional, I am interested in how to achieve combertir the url of "official page" to this "official page for jsone"
Option Explicit
'VBE > Tools > References: Microsoft Internet Controls
Public Sub GetData()
Dim ele As Object
Dim y As Integer
Dim EstaPagina As Byte
EstaPagina = 1
'we will output data to excel, starting on row 1
y = 1
Dim ie As New InternetExplorer, numberOfPages As Long
Dim url As String, i As Long
Const PLAYERS_PER_PAGE = 50
url = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&click_text=Sortable+Player+pitching&game_type='R'&season=2018&season_type=ANY&league_code='MLB'&sectionType=sp&statType=pitching&page=1&playerType=ALL&ts="
With ie
.Visible = True
.Navigate2 url
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
numberOfPages = CLng(.querySelector(".paginationWidget-last").innerText)
'do something with page 1
If numberOfPages > 1 Then
For i = 1 To numberOfPages
ie.Navigate2 Replace$(url, "page=1", "page=" & CStr(i))
For Each ele In ie.document.getElementById("datagrid").getElementsByTagName("tr")
'show the text content of 'tr' element being looked at
'Debug.Print ele.textContent
'each 'tr' (table row) element contains 4 children ('td') elements
'put text of 1st 'td' in col A
Sheets("Sheet1").Range("A" & y).value = ele.Children(1).textContent
'put text of 2nd 'td' in col B
Sheets("Sheet1").Range("B" & y).value = ele.Children(2).textContent
'put text of 3rd 'td' in col C
Sheets("Sheet1").Range("C" & y).value = ele.Children(3).textContent
'put text of 4th 'td' in col D
Sheets("Sheet1").Range("D" & y).value = ele.Children(4).textContent
'put text of 4th 'td' in col f
Sheets("Sheet1").Range("E" & y).value = ele.Children(5).textContent
'increment row counter by 1
y = y + 1
Next
While ie.Busy Or ie.readyState < 4: DoEvents: Wend
' do something with other pages
Next
' Stop 'delete me later
End If
End With
.Quit
End With
On Error Resume Next
Range("A1").Select
Range(Selection, Selection.End(xlToRight)).Select
Range(Selection, Selection.End(xlDown)).Select
Selection.RemoveDuplicates Columns:=Array(1, 2, 3, 4), _
header:=xlNo
On Error Resume Next
Application.ScreenUpdating = True
MsgBox "Volcado terminado", vbInformation
Range("A1").Select
'save the Excel workbook
ActiveWorkbook.Save
End Sub

Loop the header array with elements

I am looking for the code which can be loop the header array with the class names but it must not include the the tag name or id. This is just to ensure if any class does not exist then the corresponding cell should be left blank and the next element should be copied.
I tried to add the header array like
headers = Array("size", "features", "promo", "in store", "web")
But it needs to be loop with the tag name which I don't want.
also want promo (Class name is "promo_offers") '1st Month Free!' in row 2, the problem is this promo is given for specific cells only - hence the data is misleading and I am getting promo in 1st 4 cells and then getting error.
However, I want to copy promo for only those units where promo information is given else the cell should be blank or any other value needs to be set. Below is the code...
Please suggest how to frame the code.
Sub GetClassNames()
Dim html As HTMLDocument
Dim objIE As Object
Dim element As IHTMLElement
Dim ie As InternetExplorer
Dim elements As IHTMLElementCollection
Dim result As String 'string variable that will hold our result link
Dim count As Long
Dim erow As Long
'initiating a new instance of Internet Explorer and asigning it to objIE
Set objIE = New InternetExplorer
'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True
'navigate IE to this web page (a pretty neat search engine really)
objIE.navigate "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
count = 0
Set html = objIE.document
Set elements = html.getElementsByClassName("unit_size medium")
For Each element In elements
If element.className = "unit_size medium" Then
erow = Sheet2.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
Cells(erow, 1) = html.getElementsByClassName("unit_size medium")(count).innerText
Cells(erow, 2) = html.getElementsByClassName("promo_offers")(count).innerText
count = count + 1
End If
Next element
End Sub
For anything i.e promo is null then the corresponding cell should be left blank the next element should be copied
You can get all that info using xmlhttp.
I grab all the li elements for the boxes and loop those putting the html of each li into a new HTMLDocument. I use querySelector method of that object to get all the other items within each row using css selectors. I wrap selection in On Error Resume Next On Error GoTo 0 to mask errors for when attempting to access elements not present e.g. some rows do not have a promo. Those entries then get left blank as requested.
Option Explicit
Public Sub GetInfo()
Dim ws As Worksheet, html As HTMLDocument, s As String
Const URL As String = "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423"
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
html.body.innerHTML = s
Dim headers(), results(), listings As Object
headers = Array("Size", "Features", "Promo", "In store", "Web")
Set listings = html.querySelectorAll(".li_unit_listing")
Dim rowCount As Long, numColumns As Long, r As Long, c As Long, item As Long
rowCount = listings.Length
numColumns = UBound(headers) + 1
ReDim results(1 To rowCount, 1 To numColumns)
Dim html2 As HTMLDocument
Set html2 = New HTMLDocument
For item = 0 To listings.Length - 1
r = r + 1
html2.body.innerHTML = listings.item(item).innerHTML
On Error Resume Next
results(r, 1) = Trim$(html2.querySelector(".unit_size").innerText)
results(r, 2) = Trim$(html2.querySelector(".features").innerText)
results(r, 3) = Trim$(html2.querySelector(".promo_offers").innerText)
results(r, 4) = html2.querySelector(".board_rate").innerText
results(r, 5) = html2.querySelector("[itemprop=price]").getAttribute("content")
On Error GoTo 0
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Output:

Click related to doPostBack & href in VBA

I was having trouble with coding and I could no longer work.
https://tradingeconomics.com/
I posted the coding logic I was working on below. I imported the excel sheet to Portugal of the countries shown on the homepage.
But,
<a id="ctl00_ContentPlaceHolder1_defaultUC1_CurrencyMatrixAllCountries1_LinkButton1" class="btn-group btn-group-sm" href="javascript:__doPostBack('ctl00$ContentPlaceHolder1$defaultUC1$CurrencyMatrixAllCountries1$LinkButton1','')">
<button type = "button" class = "btn btn-default">
<i class = "glyphicon glyphicon-plus"> </ i>
</ button>
</a>
How can I code doPostBack to finish my work here? I looked through stackoverflow homepage and tried various trial and error, but I could not finish it.
Option Explicit
Public Sub New_Listing()
Application.ScreenUpdating = False
Dim IE As New InternetExplorer
Const MAX_WAIT_SEC As Long = 5
Dim http As New MSXML2.XMLHTTP60
Dim html As New HTMLDocument
Dim ws As Worksheet
Dim sResponse0 As String
Dim g As Integer
Set http = CreateObject("MSXML2.XMLHTTP")
Set ws = ThisWorkbook.Worksheets("Sheet1")
Dim url As String
url = "https://tradingeconomics.com/"
With IE
.Visible = True
.navigate "https://tradingeconomics.com/"
While .Busy Or .readyState < 4: DoEvents: Wend
End With
Dim tarTable As HTMLTable
Dim hTable As HTMLTable
For Each tarTable In IE.document.getElementsByTagName("table")
If InStr(tarTable.ID, "ctl00_ContentPlaceHolder1_defaultUC1_CurrencyMatrixAllCountries1_GridView1") <> 0 Then
Set hTable = tarTable
End If
Next
Dim startRow As Long
Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
r = startRow
With ws
Set tRow = hTable.getElementsByTagName("tr")
ReDim arr0(tRow.Length - 1, 0)
For Each tr In tRow
r = r + 1
Set tCell = tr.getElementsByTagName("td")
If tCell.Length > UBound(arr0, 2) Then
ReDim Preserve arr0(tRow.Length - 1, tCell.Length)
End If
c = 1
For Each td In tCell
arr0(r - 1, c - 1) = td.innerText
c = c + 1
Next td
Next tr
Dim k As Integer
Dim i As Integer
k = 0
For i = LBound(arr0, 1) To UBound(arr0, 1)
.Cells(2 + k, 2) = arr0(i, 0)
.Cells(2 + k, 3) = arr0(i, 1)
.Cells(2 + k, 4) = arr0(i, 2)
.Cells(2 + k, 5) = arr0(i, 3)
.Cells(2 + k, 6) = arr0(i, 4)
.Cells(2 + k, 7) = arr0(i, 5)
.Cells(2 + k, 8) = arr0(i, 6)
.Cells(2 + k, 9) = arr0(i, 7)
k = k + 1
Next i
End With
With IE
.Visible = True
.document.querySelector("a.btn-group btn-group-sm[href='javascript:__doPostBack('ctl00$ContentPlaceHolder1$defaultUC1$CurrencyMatrixAllCountries1$LinkButton1','')']").Click
End With
Set tRow = Nothing: Set tCell = Nothing: Set tr = Nothing: Set td = Nothing
Set hTable = Nothing: Set tarTable = Nothing
Application.ScreenUpdating = True
End Sub
I have completed my work up to Portugal, how can I fix it to get the next data, Czech Republic ?, I would be very grateful if you could give me details of how to modify the coding. I have learned vba very soon, so I have a lot of difficulties.
enter image description here
The Czech Republic value is part of a values set which is returned from the server after clicking a button in the web page itself, you'll have to simulate a click on this button by the VBA code, wait for IE to fetch the result and then continue with your code:
As this is the HTML of the button for fetching the next set of values:
<button type="button" class="btn btn-default">...
Use this to simulate a click and wait for the result fetched:
IE.document.getElementsByClassName("btn-default")(0).Click
Application.Wait(Now + TimeValue("0:00:05"))
After it you will be able to read the Czech Republic and the next set of values by the VBA code.
Internet Explorer
Here with IE - get's all results (the whole World) in one go including links.
Condition based wait with timeout:
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, ws As Worksheet, clipboard As Object, t As Date
Const MAX_WAIT_SEC As Long = 5
Const URL = "https://tradingeconomics.com/"
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
With IE
.Visible = True
.navigate URL
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("[value=world]").Selected = True
.querySelector("select").FireEvent "onchange"
t = Timer
Do
DoEvents
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While .getElementById("ctl00_ContentPlaceHolder1_defaultUC1_CurrencyMatrixAllCountries1_ParameterContinent").Value <> "world"
clipboard.SetText .getElementById("ctl00_ContentPlaceHolder1_defaultUC1_CurrencyMatrixAllCountries1_GridView1").outerHTML
clipboard.PutInClipboard
End With
.Quit
ws.Cells(1, 1).PasteSpecial
End With
End Sub
Explicit wait based:
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, ws As Worksheet, clipboard As Object
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Const URL = "https://tradingeconomics.com/"
With IE
.Visible = True
.navigate URL
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("[value=world]").Selected = True
.querySelector("select").FireEvent "onchange"
Application.Wait Now + TimeSerial(0, 0, 5)
clipboard.SetText .getElementById("ctl00_ContentPlaceHolder1_defaultUC1_CurrencyMatrixAllCountries1_GridView1").outerHTML
clipboard.PutInClipboard
End With
.Quit
ws.Cells(1, 1).PasteSpecial
End With
End Sub
Selenium basic
This is easy enough using selenium basic and allowing enough time for the postBack to update the page. After selenium basic install add reference via VBE> Tools > References > Selenium Type Library. More selenium info [here]. The below gets all the World data in one go.
Option Explicit
Public Sub GetInfo()
Dim d As WebDriver, ws As Worksheet, clipboard As Object
Set d = New ChromeDriver
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Const URL = "https://tradingeconomics.com/"
With d
.Start "Chrome"
.get URL
.FindElementByCss("[value=world]").Click
Application.Wait Now + TimeSerial(0, 0, 5)
clipboard.SetText .FindElementById("ctl00_ContentPlaceHolder1_defaultUC1_CurrencyMatrixAllCountries1_GridView1").Attribute("outerHTML")
clipboard.PutInClipboard
.Quit
ws.Cells(1, 1).PasteSpecial
End With
End Sub