Loop the header array with elements - html

I am looking for the code which can be loop the header array with the class names but it must not include the the tag name or id. This is just to ensure if any class does not exist then the corresponding cell should be left blank and the next element should be copied.
I tried to add the header array like
headers = Array("size", "features", "promo", "in store", "web")
But it needs to be loop with the tag name which I don't want.
also want promo (Class name is "promo_offers") '1st Month Free!' in row 2, the problem is this promo is given for specific cells only - hence the data is misleading and I am getting promo in 1st 4 cells and then getting error.
However, I want to copy promo for only those units where promo information is given else the cell should be blank or any other value needs to be set. Below is the code...
Please suggest how to frame the code.
Sub GetClassNames()
Dim html As HTMLDocument
Dim objIE As Object
Dim element As IHTMLElement
Dim ie As InternetExplorer
Dim elements As IHTMLElementCollection
Dim result As String 'string variable that will hold our result link
Dim count As Long
Dim erow As Long
'initiating a new instance of Internet Explorer and asigning it to objIE
Set objIE = New InternetExplorer
'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True
'navigate IE to this web page (a pretty neat search engine really)
objIE.navigate "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
count = 0
Set html = objIE.document
Set elements = html.getElementsByClassName("unit_size medium")
For Each element In elements
If element.className = "unit_size medium" Then
erow = Sheet2.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
Cells(erow, 1) = html.getElementsByClassName("unit_size medium")(count).innerText
Cells(erow, 2) = html.getElementsByClassName("promo_offers")(count).innerText
count = count + 1
End If
Next element
End Sub
For anything i.e promo is null then the corresponding cell should be left blank the next element should be copied

You can get all that info using xmlhttp.
I grab all the li elements for the boxes and loop those putting the html of each li into a new HTMLDocument. I use querySelector method of that object to get all the other items within each row using css selectors. I wrap selection in On Error Resume Next On Error GoTo 0 to mask errors for when attempting to access elements not present e.g. some rows do not have a promo. Those entries then get left blank as requested.
Option Explicit
Public Sub GetInfo()
Dim ws As Worksheet, html As HTMLDocument, s As String
Const URL As String = "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423"
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
html.body.innerHTML = s
Dim headers(), results(), listings As Object
headers = Array("Size", "Features", "Promo", "In store", "Web")
Set listings = html.querySelectorAll(".li_unit_listing")
Dim rowCount As Long, numColumns As Long, r As Long, c As Long, item As Long
rowCount = listings.Length
numColumns = UBound(headers) + 1
ReDim results(1 To rowCount, 1 To numColumns)
Dim html2 As HTMLDocument
Set html2 = New HTMLDocument
For item = 0 To listings.Length - 1
r = r + 1
html2.body.innerHTML = listings.item(item).innerHTML
On Error Resume Next
results(r, 1) = Trim$(html2.querySelector(".unit_size").innerText)
results(r, 2) = Trim$(html2.querySelector(".features").innerText)
results(r, 3) = Trim$(html2.querySelector(".promo_offers").innerText)
results(r, 4) = html2.querySelector(".board_rate").innerText
results(r, 5) = html2.querySelector("[itemprop=price]").getAttribute("content")
On Error GoTo 0
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Output:

Related

How to extract data from HTML divs into Excel

I am trying to extract the details in this webpage and they seem to be under certain "divs" with "selection-left" and "selection-right" right. I haven't found a way to successfully pull it yet.
This is the URL - https://sports.ladbrokes.com/en-af/betting/golf/golf-all-golf/us-masters/2020-us-masters/228648232/
And here is an image of what I want to extract. I want to copy the competition name and each participant and score.
I have tried using QHar's approach in this link - How to extract values from nested divs using VBA. But I'm getting errors along this line -
ReDim results(1 To countries.Length / 2, 1 To 4)
Here is the code I've been trying to make work
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As
Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://sports.ladbrokes.com/en-af/betting/golf/golf-all-golf/us-masters/2020-us-masters/228648232/", False
.send
html.body.innerHTML = .responseText
End With
Set participant = html.querySelectorAll(".market-content .selection-left"): Set scores = html.querySelectorAll("..market-content .selection-right")
ReDim results(1 To countries.Length / 2, 1 To 4)
For i = 0 To participant.Length - 1 Step 2
results(r, 1) = participant.item(i).innerText: results(r, 2) = "'" & scores.item(i).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 4) = Array("Competition", "Participant", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
I will need help to make this code work
Content is dynamically added so will not be present in your current request format; hence your error as you have a nodeList of Length 0. You could try making POST requests as the page does but it doesn't look like a quick and easy bit of coding. I would go with browser automation, if this is a small project, so that js can run on the page and you can click the show more button. You will need a wait condition for the page to have properly loaded. I use the presence of the show more button.
Option Explicit
Public Sub GetOddsIE()
Dim d As InternetExplorer, odds As Object, names As Object, i As Long
Dim ws As Worksheet, results(), competition As String
Set d = New InternetExplorer
Set ws = ThisWorkbook.Worksheets("Sheet1")
Const URL = "https://sports.ladbrokes.com/en-af/betting/golf/golf-all-golf/us-masters/2020-us-masters/228648232/"
With d
.Visible = False
.Navigate2 URL
While .Busy Or .ReadyState <> 4: DoEvents: Wend
With .Document.getElementsByClassName("expandable-below-container-button")
Do
DoEvents
Loop While .Length = 0 'wait for element to be present
.Item(0).Click 'click on show more
End With
Set names = .Document.getElementsByClassName("selection-left-selection-name")
Set odds = .Document.getElementsByClassName("odds-convert")
competition = .Document.getElementsByClassName("league")(0).innerText
ReDim results(1 To names.Length, 1 To 3)
For i = 0 To names.Length - 1
results(i + 1, 1) = competition
results(i + 1, 2) = names.Item(i).innerText
results(i + 1, 3) = "'" & odds.Item(i).innerText
Next
.Quit
End With
ws.Cells(1, 1).Resize(1, 3) = Array("Competition", "Participant", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

Web-Scraping for Table Formate

I am trying to copy the data from TD and TR format. I am new in Data scraping so I did not find any results in google search.
I want to copy All Unit Size (example 5*5) in 'A' column, Regular
Price in 'B', Cash Price 'D', First 3 months 50% Off(offers) in 'F' and Reserve in 'G' column.
http://westgateselfstorage.com/index.php?page=estimator
Frankly saying, I'm not able to frame the code so it would be really helpful to me if someone helped me to build the code.
Clipboard
Do you care about layout? You can copy direct via clipboard and have same layout as page
Option Explicit
Public Sub GetTable()
Dim html As HTMLDocument, clipboard As Object, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://westgateselfstorage.com/index.php?page=estimator", False
.send
html.body.innerHTML = .responseText
End With
clipboard.SetText html.querySelector("#hiderow").outerHTML
clipboard.PutInClipboard
ws.Range("A1").PasteSpecial
End Sub
QuerySelector and surrogate:
If you do care more then we can use our usual method of finding the rows and looping, using a surrogate HTMLDocument variable to house html so we can leverage querySelector at a more granular level given we can't chain. The only difference here being that if we chuck html from tr level into HTMLDocument we need to add the table tags onto the html to ensure querySelectorAll can pick up the tds within the row i.e. the columns.
Now, I haven't split out old price from reduced price in the Cash Price column. If you need that just let me know. For now, I have left both.
Option Explicit
Public Sub GetTable()
Dim html As HTMLDocument, html2 As HTMLDocument, ws As Worksheet, i As Long
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
Set html2 = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://westgateselfstorage.com/index.php?page=estimator", False
.send
html.body.innerHTML = .responseText
End With
html.body.innerHTML = html.querySelector("#hiderow").outerHTML
Dim headers(), rows As Object, results(), columns As Object
headers = Array("Size", "Reg price", vbNullString, "Cash price", vbNullString, "Offers", "Reserve")
'grab the rows
Set rows = html.querySelectorAll("tr")
ReDim results(1 To rows.Length, 1 To UBound(headers) + 1)
For i = 1 To rows.Length - 1 'skip headers row
html2.body.innerHTML = "<table>" & rows.item(i).outerHTML & "</table>"
Set columns = html2.querySelectorAll("td")
results(i, 1) = columns.item(0).innerText
results(i, 2) = columns.item(3).innerText
results(i, 4) = columns.item(4).innerText
results(i, 6) = columns.item(5).innerText
results(i, 7) = "Reserve this unit"
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
getElement(s)Bymethod and chaining:
We could also be more traditional and loop tr/td within a table as with getElement(s)Bymethod allows us chaining to some extent (though I store in variables to make it easy to compare with the above)
Note:
rows(i).getElementsByTagName("td")
is basically chaining onto an individual element (row) within
Set rows = hTable.getElementsByTagName("tr")
e.g.
hTable.getElementsByTagName("tr")(0).getElementsByTagName("td")
All columns in first row via chaining.
VBA:
Option Explicit
Public Sub GetTable()
Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, i As Long
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://westgateselfstorage.com/index.php?page=estimator", False
.send
html.body.innerHTML = .responseText
End With
Set hTable = html.getElementById("hiderow")
Dim headers(), rows As Object, results(), columns As Object
headers = Array("Size", "Reg price", vbNullString, "Cash price", vbNullString, "Offers", "Reserve")
'grab the rows
Set rows = hTable.getElementsByTagName("tr")
ReDim results(1 To rows.Length, 1 To UBound(headers) + 1)
For i = 1 To rows.Length - 1 'skip headers row
Set columns = rows(i).getElementsByTagName("td")
results(i, 1) = columns(0).innerText
results(i, 2) = columns(3).innerText
results(i, 4) = columns(4).innerText
results(i, 6) = columns(5).innerText
results(i, 7) = "Reserve this unit"
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
You can use the python library pandas for this:
import pandas as pd
tabs = pd.read_html('http://westgateselfstorage.com/index.php?page=estimator', header =0)
tabs[0].drop(tabs[0].columns[2], axis=1)
You may also want to drop column G since all rows there are identical.

Scraping web with url in milliseconds Unix

The web in which I browse has several pages. I want to click on those elements or play with urls and thus be able to copy the data. The initial URL ends at = 1 & playerType = ALL & ts = 1558502019375, in my code there is a loop that is supposed to go page after page to get the data but I can not get it done.
Sub UPDATE_DATA_MLB()
Application.ScreenUpdating = False
'dimension (set aside memory for) our variables
Dim objIE As InternetExplorer
Dim ele As Object
Dim y As Integer
Dim EstaPagina As Byte
Dim EstaURL As String
'Página inicial
EstaPagina = 1
'we will output data to excel, starting on row 1
y = 1
EstaURL = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&click_text=Sortable+Player+hitting&game_type='R'&season=2018&season_type=ANY&league_code='MLB'&sectionType=sp&statType=hitting&page=1&playerType=ALL&ts=1558502019375" '&ts=1526432697176"
'start a new browser instance
Set objIE = New InternetExplorer
'make browser visible
objIE.Visible = False
Do Until EstaPagina = 255
'navigate to page with needed data
objIE.navigate EstaURL & EstaPagina
'wait for page to load
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
'If UCase(Left(EstaURL, 211) & "1") = UCase(Left(objIE.LocationURL, (211 + Len(EstaPagina)))) And y > 1 Then Exit Do
'look at all the 'tr' elements in the 'table' with id 'myTable',
'and evaluate each, one at a time, using 'ele' variable
For Each ele In objIE.document.getElementById("datagrid").getElementsByTagName("tr")
'show the text content of 'tr' element being looked at
'Debug.Print ele.textContent
'each 'tr' (table row) element contains 4 children ('td') elements
'put text of 1st 'td' in col A
Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
'put text of 2nd 'td' in col B
Sheets("Sheet1").Range("B" & y).Value = ele.Children(1).textContent
'put text of 3rd 'td' in col C
Sheets("Sheet1").Range("C" & y).Value = ele.Children(2).textContent
'put text of 4th 'td' in col D
Sheets("Sheet1").Range("D" & y).Value = ele.Children(5).textContent
'put text of 4th 'td' in col f
Sheets("Sheet1").Range("E" & y).Value = ele.Children(22).textContent
'increment row counter by 1
y = y + 1
Next
EstaPagina = EstaPagina + 1
Loop
lobjIE.Quit
Set objIE = Nothing
Set ele = Nothing
Range("A1").Select
Range(Selection, Selection.End(xlToRight)).Select
Range(Selection, Selection.End(xlDown)).Select
Selection.RemoveDuplicates Columns:=Array(1, 2, 3, 4, 5), _
Header:=xlNo
Application.ScreenUpdating = True
MsgBox "Volcado terminado", vbInformation
Range("A1").Select
'save the Excel workbook
ActiveWorkbook.Save
End Sub ```
XMLHTTP:
The page does ajax calls to get json which it uses to update the content for each page. It does this using query string parameters, one of which is the records per page (default 50). You can view this activity in the browser network tab via dev tools F12
You can avoid using a browser and issue xmlhttp requests to the same endpoint, altering the query string parameters to get all the results. It seems, in my testing, you cannot request all the results in one go. Instead, I issue a request for 1000 results, then check if there are any additional results to obtain and issue further requests, in batches of 1000, to get them.
I am using jsonconverter.bas to parse the json. I extract the info from the json and load into an array, results, so I can write all results out to the sheet in one go - much more efficient this way as reduces i/o with sheet.
After copying the code from the link above into a module called jsonConverter, you need to go 'VBE > Tools > References > Add a reference to Microsoft Scripting Runtime
TODO:
Add error handling for failed request
VBA:
Option Explicit
Public Sub GetResults()
'VBE > Tools > References > Microsoft Scripting Runtime
Dim ws As Worksheet, results(), i As Long, totalResults As Long
Dim headers(), columnCount As Long, pageNumber As Long
Dim numberOfPages As Long, resultsPerPage As Long, json As Object
resultsPerPage = 1000
pageNumber = 1
Set ws = ThisWorkbook.Worksheets("Sheet1")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://mlb.mlb.com/pubajax/wf/flow/stats.splayer?season=2018&sort_order=%27desc%27&sort_column=%27avg%27&stat_type=hitting&page_type=SortablePlayer&game_type=%27R%27&player_pool=ALL&season_type=ANY&sport_code=%27mlb%27&results=" & resultsPerPage & "&recSP=" & pageNumber & "&recPP=" & resultsPerPage, False
.send
Set json = JsonConverter.ParseJson(.responseText)
totalResults = json("stats_sortable_player")("queryResults")("totalSize")
headers = json("stats_sortable_player")("queryResults")("row").item(1).keys
numberOfPages = json("stats_sortable_player")("queryResults")("totalP")
columnCount = UBound(headers) + 1
ReDim results(1 To totalResults, 1 To columnCount)
Dim r As Long, c As Long, dict As Object, key As Variant
For pageNumber = 1 To numberOfPages
If pageNumber > 1 Then
.Open "GET", "http://mlb.mlb.com/pubajax/wf/flow/stats.splayer?season=2018&sort_order=%27desc%27&sort_column=%27avg%27&stat_type=hitting&page_type=SortablePlayer&game_type=%27R%27&player_pool=ALL&season_type=ANY&sport_code=%27mlb%27&results=" & resultsPerPage & "&recSP=" & pageNumber & "&recPP=" & resultsPerPage, False
.send
Set json = JsonConverter.ParseJson(.responseText)
End If
For Each dict In json("stats_sortable_player")("queryResults")("row")
r = r + 1: c = 1
For Each key In dict.keys
results(r, c) = dict(key)
c = c + 1
Next
Next
Next
End With
With ws
.Cells(1, 1).Resize(1, columnCount) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Sample of output ( appreciate can't really read as is - but idea of layout):
image link: https://i.stack.imgur.com/jiDTP.png
Internet Explorer:
If you want to use slower browser solution you can concatenate the page number into the url and loop to cover all pages. The number of pages can be extracted from the pagination on page 1.
You can see how to write the tables out below each other by looking at this answer. Change the lines with GetLastRow(ws, 1) + 2 to GetLastRow(ws, 1) + 1
Option Explicit
'VBE > Tools > References: Microsoft Internet Controls
Public Sub GetData()
Dim ie As New InternetExplorer, numberOfPages As Long
Dim url As String, i As Long
Const PLAYERS_PER_PAGE = 50
url = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&click_text=Sortable+Player+hitting&game_type='R'&season=2018&season_type=ANY&league_code='MLB'&sectionType=sp&statType=hitting&page=1&playerType=ALL&ts="
With ie
.Visible = True
.Navigate2 url
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
numberOfPages = CLng(.querySelector(".paginationWidget-last").innerText)
'do something with page 1
If numberOfPages > 1 Then
For i = 2 To numberOfPages
ie.Navigate2 Replace$(url, "page=1", "page=" & CStr(i))
While ie.Busy Or ie.readyState < 4: DoEvents: Wend
'do something with other pages
Next
Stop 'delete me later
End If
End With
.Quit
End With
End Sub
I have managed to complement his second code with mine, although I have not been very professional, I am interested in how to achieve combertir the url of "official page" to this "official page for jsone"
Option Explicit
'VBE > Tools > References: Microsoft Internet Controls
Public Sub GetData()
Dim ele As Object
Dim y As Integer
Dim EstaPagina As Byte
EstaPagina = 1
'we will output data to excel, starting on row 1
y = 1
Dim ie As New InternetExplorer, numberOfPages As Long
Dim url As String, i As Long
Const PLAYERS_PER_PAGE = 50
url = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&click_text=Sortable+Player+pitching&game_type='R'&season=2018&season_type=ANY&league_code='MLB'&sectionType=sp&statType=pitching&page=1&playerType=ALL&ts="
With ie
.Visible = True
.Navigate2 url
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
numberOfPages = CLng(.querySelector(".paginationWidget-last").innerText)
'do something with page 1
If numberOfPages > 1 Then
For i = 1 To numberOfPages
ie.Navigate2 Replace$(url, "page=1", "page=" & CStr(i))
For Each ele In ie.document.getElementById("datagrid").getElementsByTagName("tr")
'show the text content of 'tr' element being looked at
'Debug.Print ele.textContent
'each 'tr' (table row) element contains 4 children ('td') elements
'put text of 1st 'td' in col A
Sheets("Sheet1").Range("A" & y).value = ele.Children(1).textContent
'put text of 2nd 'td' in col B
Sheets("Sheet1").Range("B" & y).value = ele.Children(2).textContent
'put text of 3rd 'td' in col C
Sheets("Sheet1").Range("C" & y).value = ele.Children(3).textContent
'put text of 4th 'td' in col D
Sheets("Sheet1").Range("D" & y).value = ele.Children(4).textContent
'put text of 4th 'td' in col f
Sheets("Sheet1").Range("E" & y).value = ele.Children(5).textContent
'increment row counter by 1
y = y + 1
Next
While ie.Busy Or ie.readyState < 4: DoEvents: Wend
' do something with other pages
Next
' Stop 'delete me later
End If
End With
.Quit
End With
On Error Resume Next
Range("A1").Select
Range(Selection, Selection.End(xlToRight)).Select
Range(Selection, Selection.End(xlDown)).Select
Selection.RemoveDuplicates Columns:=Array(1, 2, 3, 4), _
header:=xlNo
On Error Resume Next
Application.ScreenUpdating = True
MsgBox "Volcado terminado", vbInformation
Range("A1").Select
'save the Excel workbook
ActiveWorkbook.Save
End Sub

Data Scraping Elements By ClassName

I am trying to pull data from a web-site, I want to copy the '10' x 5'unit (class name is "unit_size medium") ' in row 1 for which I am able to copy data successfully but I also want promo (Class name is "promo_offers") '1st Month Free!' in row 2, the problem is this promo is given for specific cells only. hence the data is misleading and I am getting promo in 1st 4 cells and then getting error. However, I want to copy promo for only those units where promo information is given else the cell should be blank or any other value needs to be set. Below is the code...
Please suggest how to frame the code.
Sub GetClassNames()
Dim html As HTMLDocument
Dim objIE As Object
Dim element As IHTMLElement
Dim ie As InternetExplorer
Dim elements As IHTMLElementCollection
Dim result As String 'string variable that will hold our result link
Dim count As Long
Dim erow As Long
'initiating a new instance of Internet Explorer and asigning it to objIE
Set objIE = New InternetExplorer
'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True
'navigate IE to this web page (a pretty neat search engine really)
objIE.navigate "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
count = 0
Set html = objIE.document
Set elements = html.getElementsByClassName("unit_size medium")
For Each element In elements
If element.className = "unit_size medium" Then
erow = Sheet2.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
Cells(erow, 1) = html.getElementsByClassName("unit_size medium")(count).innerText
Cells(erow, 2) = html.getElementsByClassName("promo_offers")(count).innerText
count = count + 1
End If
Next element
End Sub
I would simply wrap in an On Error Resume Next when attempting to access the element. Have a place already reserved for it in an output array so if not present the place remains empty.
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
Dim ie As New InternetExplorer, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ie
.Visible = True
.Navigate2 "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long
headers = Array("size", "features", "promo", "in store", "web")
Set listings = .document.getElementById("small_units_accordion_panel").getElementsByTagName("li")
'.unit_size medium, .features, .promo_offers, .board_rate_wrapper p, .board_rate
ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
For Each listing In listings
r = r + 1
On Error Resume Next
results(r, 1) = listing.getElementsByClassName("unit_size medium")(0).innerText
results(r, 2) = listing.getElementsByClassName("features")(0).innerText
results(r, 3) = listing.getElementsByClassName("promo_offers")(0).innerText
results(r, 4) = listing.getElementsByClassName("board_rate")(0).innerText
results(r, 5) = listing.getElementsByClassName("price")(0).innerText
On Error GoTo 0
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit
End With
End Sub
All boxes:
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
Dim ie As New InternetExplorer, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ie
.Visible = True
.Navigate2 "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, listing As Object, headers(), results()
Dim r As Long, list As Object, item As Object
headers = Array("size", "features", "promo", "in store", "web")
Set list = .document.getElementsByClassName("main_unit")
'.unit_size medium, .features, .promo_offers, .board_rate_wrapper p, .board_rate
Dim rowCount As Long
rowCount = .document.querySelectorAll(".main_unit li").Length
ReDim results(1 To rowCount, 1 To UBound(headers) + 1)
For Each listing In list
For Each item In listing.getElementsByTagName("li")
r = r + 1
On Error Resume Next
results(r, 1) = item.getElementsByClassName("unit_size medium")(0).innerText
results(r, 2) = item.getElementsByClassName("features")(0).innerText
results(r, 3) = item.getElementsByClassName("promo_offers")(0).innerText
results(r, 4) = item.getElementsByClassName("board_rate")(0).innerText
results(r, 5) = item.getElementsByClassName("price")(0).innerText
On Error GoTo 0
Next
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit
End With
End Sub

How do i extract specific data (name, details )from the website into excel with excel vba?

How do i extract specific data (name, details )from the website into excel with excel vba?
Below I am trying to get processor and warranty:
Option Explicit
Sub GetData()
Dim objIE As InternetExplorer
Dim itemELE As Object
Dim html As IHTMLDocument
Dim Processor As String
Dim warranty As String
Dim y As Integer
'start a new browser instance
Set objIE = New InternetExplorer
'make browser visible
objIE.Visible = True
'navigate to page with needed data
objIE.navigate "https://www.harveynorman.com.sg/computers-tablets-and-gaming/computers/laptops/"
'wait for page to load
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
y = 1
For Each itemELE In objIE.document.getElementsByClassName("expandabaleContent")
Processor = itemELE.getElementsByTagName("d1")(0).innerText
warranty = itemELE.getElementsByClassName("d1")(0).getElementsByTagName("a")(0).textContent
Sheets("Sheet1").Range("A" & y).Value = Processor
Sheets("Sheet1").Range("B" & y).Value = warranty
y = y + 1
Next
End Sub
Screenshot of the page:
For the page shown (in your image) you can issue an XMLHTTP (XHR) GET request to grab the product info without opening a slow IE browser instance.
For the specific information:
Processor and warranty info:
If you inspect the page the info about processor and warranty appears associated with a classname facetedResults-feature-list
You can see the classname and then a dl tag housing a dt tag which has sibling dd tags. Two of these sibling dd tags are associated with the info for processor and warranty.
I use a CSS selector to grab all these dd tags which can be simplified,in this instance, to ignore the sibling dt and parent dl tags and use just:
.facetedResults-feature-list dd
The "." is a class selector. The CSS combination selection above says get the dd tags within elements with class facetedResults-feature-list
Product titles info:
The titles I get using another CSS selector of:
.facetedResults-title
This is elements with class facetedResults-title. This contains the product title.
Writing out product titles, processor and warranty info to the sheet:
A little maths shows me that the processor info repeats every 14, and that if I add 8 to the index for the processor I get the warranty info. You can see how you could write out each of the details as they occur at indices that repeat every 14. I combine the loop over the nodeList of dd elements with the titles to write out to the sheet.
VBA:
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, i As Long, html As New HTMLDocument
Application.ScreenUpdating = False
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.lenovo.com/sg/en/laptops/c/LAPTOPS", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Dim titles As Object, targetedInfo As Object, rowCounter As Long
With html
.body.innerHTML = sResponse
Set titles = .querySelectorAll(".facetedResults-title")
Set targetedInfo = .querySelectorAll(".facetedResults-feature-list dd")
End With
With Worksheets("Sheet1")
For i = 0 To targetedInfo.Length - 1
If i Mod 14 = 0 Then
rowCounter = rowCounter + 1
.Cells(rowCounter, 1) = titles(rowCounter - 1).innerText
.Cells(rowCounter, 2) = targetedInfo(i).innerText
.Cells(rowCounter, 3) = targetedInfo(i + 8).innerText
End If
Next i
End With
Application.ScreenUpdating = True
End Sub
Output sample:
More general info:
CSS selectors:
The product info is associated with an 'expandableContent facetedResults-expandableContent-features expandableContent-is-collapsed facetedResults-expandableContent-69' class name
The prices are associated with an 'expandableContent facetedResults-expandableContent-price expandableContent-is-collapsed' class name.
You can select these by the traditional .getElementsByClassName and then loop over the collection, or, in my case, use a CSS selector for class to do the same thing, and then traverse the length of the returned nodeList.
.getElementsByClassName("expandableContent facetedResults-expandableContent-features expandableContent-is-collapsed facetedResults-expandableContent-69")
is the same as
.querySelectorAll(".expandableContent.facetedResults-expandableContent-features.expandableContent-is-collapsed.facetedResults-expandableContent-69")
The "." is the class selector.
Titles are associated with a class facetedResults-title
VBA:
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, i As Long, html As New HTMLDocument
Application.ScreenUpdating = False
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.lenovo.com/sg/en/laptops/c/LAPTOPS", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Dim info As Object, prices As Object, titles As Object
With html
.body.innerHTML = sResponse
Set titles = .querySelectorAll(".facetedResults-title")
Set info = .querySelectorAll(".expandableContent.facetedResults-expandableContent-features.expandableContent-is-collapsed.facetedResults-expandableContent-69")
Set prices = .querySelectorAll(".expandableContent.facetedResults-expandableContent-price.expandableContent-is-collapsed")
End With
With Worksheets("Sheet1")
For i = 0 To titles.Length - 1
.Cells(i + 1, 1) = titles(i).innerText
.Cells(i + 1, 2) = info(i).innerText
.Cells(i + 1, 3) = prices(i).innerText
Next i
End With
Application.ScreenUpdating = True
End Sub
References required (VBE>Tools>References):
Microsoft HTML Object Library
Qharr has already provided some good options but in case still you want to try IE then see below code
Option Explicit
Sub GetData()
Dim objIE As InternetExplorer
Dim itemELE As Object
Dim html As IHTMLDocument
Dim Processor As String
Dim warranty As String
Dim y As Integer
'start a new browser instance
Set objIE = New InternetExplorer
'make browser visible
objIE.Visible = True
'navigate to page with needed data
objIE.navigate "https://www.lenovo.com/sg/en/laptops/c/LAPTOPS"
'wait for page to load
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
Application.Wait Now + TimeSerial(0, 0, 3)
y = 1
For Each itemELE In objIE.document.getElementsByClassName("facetedResults-feature-list")
If InStr(1, itemELE.className, "bundleList", vbTextCompare) = 0 Then
Application.Wait Now + TimeSerial(0, 0, 2)
Processor = itemELE.getElementsByTagName("dl")(0).innerText
warranty = itemELE.getElementsByTagName("dl")(4).innerText
Sheets("Sheet1").Range("A" & y).Value = Processor
Sheets("Sheet1").Range("B" & y).Value = warranty
y = y + 1
End If
Next
End Sub
Results