Inner Loop design for webscraping - json

I want to import restaurant data like Restaurant name, phone number, website & address to excel but unfortunately, I am getting sponsored results & also not getting website & full address as it in on the inner page when we click on the hotel name. I with some help at platforms have created a code using but it is not helping out. Please rectify the issue in my code. Website:https://www.yelp.com/searchcflt=restaurants&find_loc=San%20Francisco%2C%20CA&start=
Here is my code:
Sub GetInfo()
Const URL$ = "https://www.yelp.com/search?cflt=restaurants&find_loc=San%20Francisco%2C%20CA&start="
Dim Http As New XMLHTTP60, Html As New HTMLDocument, Htmldoc As New HTMLDocument, page&, I&
For page = 0 To 1 ' this is where you change the last number for the pages to traverse
With Http
.Open "GET", URL & page * 30, False
.send
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll("[class*='searchResult']")
For I = 0 To .Length - 1
Htmldoc.body.innerHTML = .Item(I).outerHTML
On Error Resume Next
r = r + 1: Cells(r, 1) = Htmldoc.querySelector("[class*='heading--h3'] > a").innerText
Cells(r, 2) = Htmldoc.querySelector("[class*='container'] > [class*='display--inline-block']").innerText
' Cells(r, 3) = Htmldoc.querySelector("[class*='container'] > address").innerText
'Cells(r, 4) = Htmldoc.querySelector("[class*='container'] > address").NextSibling.innerText
'Inner loop creation
Cells(r, 5) = Htmldoc.querySelector("[class*='container'] > website").href ' Extract from window after clicking on hotel name
Cells(r, 6) = Htmldoc.querySelector("[class*='container'] > fulladdress").innerText ' Extract from window after clicking on hotel name
On Error GoTo 0
Next I
End With
Next page
End Sub

You can use the free API to get the top 50 from the business_search endpoint. Pass sort parameter in query string to get top rated.
Use a json parser, such as jsonconverter.bas to handle the response. After installing the code from that link in a standard module called JsonConverter, go to VBE > Tools > References > Add a reference to Microsoft Scripting Runtime.
API instructions are here. You need to set up a test app, which requires some basic user info, and verify your email. You will then receive an API key for authentication which is passed in the authorization header as shown below.
There is other info returned which you can parse if wanted.
Option Explicit
Public Sub GetTopRestuarants()
Dim json As Object, headers(), r As Long, c As Long
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://api.yelp.com/v3/businesses/search?term=restuarant&location=san-francisco&limit=50&sort_by=rating", False
.setRequestHeader "Authorization", "Bearer yourAPIkey"
.send
Set json = JsonConverter.ParseJson(.responseText)("businesses")
headers = Array("Restaurant name", "phone", "website", "address")
Dim results(), item As Object
ReDim results(1 To json.Count, 1 To UBound(headers) + 1)
For Each item In json
r = r + 1
results(r, 1) = item("name")
results(r, 2) = item("phone")
results(r, 3) = item("url")
Dim subItem As Variant, address As String
address = vbNullString
For Each subItem In item("location")("display_address")
address = address & Chr$(32) & subItem
Next
results(r, 4) = Trim$(address)
Next
End With
With ActiveSheet
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Example top 20 of 50 returned:
Caveat emptor
Note that specifying the sort_by is a suggestion (not strictly enforced) to Yelp's search, which considers multiple input parameters to return the most relevant results. For example, the rating sort is not strictly sorted by the rating value, but by an adjusted rating value that takes into account the number of ratings, similar to a Bayesian average. This is to prevent skewing results to businesses with a single review.

This is one of the ways which will let you parse the results from it's inner pages. I can't access that webpage anymore to assist you further. However, give it a shot. I suppose it will work:
Sub GetInfo()
Const URL$ = "https://www.yelp.com/search?cflt=restaurants&find_loc=San%20Francisco%2C%20CA&start="
Const base$ = "https://www.yelp.com"
Dim Http As New XMLHTTP60, Html As New HTMLDocument
Dim oTitle$, oPhone As Object, Htmldoc As New HTMLDocument
Dim R&, newUrl$, I&, oWeb As Object, page&, oAddress As Object
[A1:D1] = [{"Name","Phone","Address","Website"}]
For page = 1 To 3 'this is where you change the last number for this script to traverse
With Http
.Open "GET", URL & page * 30, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll("[class*='searchResult'] [class*='heading--h3'] > a")
For I = 0 To .Length - 1
If Not InStr(.item(I).getAttribute("href"), "/adredir?") > 0 Then
oTitle = .item(I).innerText
newUrl = Replace(.item(I).getAttribute("href"), "about:", base)
With Http
.Open "GET", newUrl, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
Htmldoc.body.innerHTML = .responseText
End With
R = R + 1: Cells(R + 1, 1) = oTitle
Set oPhone = Htmldoc.querySelector(".biz-phone")
If Not oPhone Is Nothing Then
Cells(R + 1, 2) = oPhone.innerText
End If
Set oAddress = Htmldoc.querySelector(".map-box-address")
If Not oAddress Is Nothing Then
Cells(R + 1, 3) = WorksheetFunction.Clean(oAddress.innerText)
End If
Set oWeb = Htmldoc.querySelector(".biz-website > a")
If Not oWeb Is Nothing Then
Cells(R + 1, 4) = oWeb.innerText
End If
End If
Next I
End With
Next page
End Sub
Btw, the ads have been kicked out.

Related

Edit 14 day weather forecast Excel VBA to include precipitation

I found the code below which works nicely and I think I can repurpose it for my needs, but does not include the precipitation. I'm relatively new to HTML so having trouble understanding what each line of code's purpose is. I've gone to the website and looked at the elements and console but can't find "p[data-testid='wxPhrase']" or the word 'children' or 'child'.
I presumed precipitation was just another child so tried adding lines like these after editing the column headers in the first sub:
Results(r + 1, 3) = Children(r).FirstChild.innerText
Results(r + 1, 4) = Children(r).PreviousSibling.PreviousSibling.PreviousSibling.FirstChild.innerText
but it gives Run-time error '438': Object doesn't support this property or method. I appreciate very much some help and education. Thanks, in advance!
Sub MiamiWeather()
Dim Data As Variant
Data = MiamiWeatherData
Range("A1:B1").Value = Array("Date", "Temperature")
Range("A2").Resize(UBound(Data), 2).Value = Data
End Sub
Function MiamiWeatherData()
Const URL = "https://weather.com/weather/tenday/l/3881cd527264bc7c99b6b541473c0085e75aa026b6bd99658c56ad9bb55bd96e"
Dim responseText As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
responseText = .responseText
End With
Dim Document As HTMLDocument
Set Document = CreateObject("HTMLFILE")
Document.body.innerHTML = responseText
Dim Children As IHTMLDOMChildrenCollection
Set Children = Document.querySelectorAll("p[data-testid='wxPhrase']")
Dim Results As Variant
ReDim Results(1 To Children.Length, 1 To 2)
Dim r As Long
For r = 0 To Children.Length - 1
Results(r + 1, 1) = Children(r).PreviousSibling.PreviousSibling.FirstChild.innerText
Results(r + 1, 2) = Children(r).PreviousSibling.FirstChild.innerText
Next
MiamiWeatherData = Results
End Function
Assuming you want the percentage, you need to resize the array to hold an extra dimension then add an extra selector within the loop. That selector can select by attribute = value and will need to work off .Children(r).PreviousSibling. Assuming, you have a still maintained Microsoft set-up you can chain querySelector at this point as shown below.
For older versions e.g., <= Windows 7 then use Results(r + 1, 3) = Children(r).PreviousSibling.Children(2).Children(0).Children(1).innerText
Option Explicit
Public Sub MiamiWeather()
Dim Data As Variant
Data = MiamiWeatherData
Range("A1:C1").value = Array("Date", "Temperature", "Precipitation")
Range("A2").Resize(UBound(Data), 3).value = Data
End Sub
Function MiamiWeatherData()
Const URL = "https://weather.com/weather/tenday/l/3881cd527264bc7c99b6b541473c0085e75aa026b6bd99658c56ad9bb55bd96e"
Dim responseText As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
responseText = .responseText
End With
Dim Document As HTMLDocument
Set Document = CreateObject("HTMLFILE")
Document.body.innerHTML = responseText
Dim Children As IHTMLDOMChildrenCollection
Set Children = Document.querySelectorAll("p[data-testid='wxPhrase']")
Dim Results As Variant
ReDim Results(1 To Children.length, 1 To 3)
Dim r As Long
For r = 0 To Children.length - 1
Results(r + 1, 1) = Children(r).PreviousSibling.PreviousSibling.FirstChild.innerText
Results(r + 1, 2) = Children(r).PreviousSibling.FirstChild.innerText
Results(r + 1, 3) = Children(r).PreviousSibling.querySelector("[data-testid=PercentageValue]").innerText
Next
MiamiWeatherData = Results
End Function

How to extract data from HTML divs into Excel

I am trying to extract the details in this webpage and they seem to be under certain "divs" with "selection-left" and "selection-right" right. I haven't found a way to successfully pull it yet.
This is the URL - https://sports.ladbrokes.com/en-af/betting/golf/golf-all-golf/us-masters/2020-us-masters/228648232/
And here is an image of what I want to extract. I want to copy the competition name and each participant and score.
I have tried using QHar's approach in this link - How to extract values from nested divs using VBA. But I'm getting errors along this line -
ReDim results(1 To countries.Length / 2, 1 To 4)
Here is the code I've been trying to make work
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As
Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://sports.ladbrokes.com/en-af/betting/golf/golf-all-golf/us-masters/2020-us-masters/228648232/", False
.send
html.body.innerHTML = .responseText
End With
Set participant = html.querySelectorAll(".market-content .selection-left"): Set scores = html.querySelectorAll("..market-content .selection-right")
ReDim results(1 To countries.Length / 2, 1 To 4)
For i = 0 To participant.Length - 1 Step 2
results(r, 1) = participant.item(i).innerText: results(r, 2) = "'" & scores.item(i).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 4) = Array("Competition", "Participant", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
I will need help to make this code work
Content is dynamically added so will not be present in your current request format; hence your error as you have a nodeList of Length 0. You could try making POST requests as the page does but it doesn't look like a quick and easy bit of coding. I would go with browser automation, if this is a small project, so that js can run on the page and you can click the show more button. You will need a wait condition for the page to have properly loaded. I use the presence of the show more button.
Option Explicit
Public Sub GetOddsIE()
Dim d As InternetExplorer, odds As Object, names As Object, i As Long
Dim ws As Worksheet, results(), competition As String
Set d = New InternetExplorer
Set ws = ThisWorkbook.Worksheets("Sheet1")
Const URL = "https://sports.ladbrokes.com/en-af/betting/golf/golf-all-golf/us-masters/2020-us-masters/228648232/"
With d
.Visible = False
.Navigate2 URL
While .Busy Or .ReadyState <> 4: DoEvents: Wend
With .Document.getElementsByClassName("expandable-below-container-button")
Do
DoEvents
Loop While .Length = 0 'wait for element to be present
.Item(0).Click 'click on show more
End With
Set names = .Document.getElementsByClassName("selection-left-selection-name")
Set odds = .Document.getElementsByClassName("odds-convert")
competition = .Document.getElementsByClassName("league")(0).innerText
ReDim results(1 To names.Length, 1 To 3)
For i = 0 To names.Length - 1
results(i + 1, 1) = competition
results(i + 1, 2) = names.Item(i).innerText
results(i + 1, 3) = "'" & odds.Item(i).innerText
Next
.Quit
End With
ws.Cells(1, 1).Resize(1, 3) = Array("Competition", "Participant", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

get web page data through class name

I need to get the dates and temp from a weather website and record it on cells but I am getting a object variable or with block variable not set error.
I tried to data from web in excel but I think the website is protected or something because I keep getting "under maintenance" page when trying to load the page from excel. I got the codes below from a tutorial but I can't make it work.
Sub record()
Dim request As Object
Dim response As String
Dim html As New HTMLDocument
Dim websie As String
Dim temps As Variant
'provide link
'website = "https://finance.yahoo.com/quote/EURUSD=X?p=EURUSD=X"
website = "https://www.accuweather.com/en/us/chicago/60608/september-weather/348308"
'create the object that will make the webpage request
Set request = CreateObject("MSXML2.XMLHTTP")
'go to the link
request.Open "GET", website, False
'send request for webpage
request.send
'get web response data to variable
response = StrConv(request.responseBody, vbUnicode)
'put webpage to an html object
html.body.innerHTML = response
'get temperature from specified element
'temps = html.getElementsByClassName("Trsdu(0.3s) Fw(b) Fz(36px) Mb(-4px) D(ib)")(0).innerText
temps = html.getElementsByClassName("high")(0).innerText
Sheets("record").Range("A1") = temps
End Sub
Sample lines from the website:
<a class="monthly-daypanel is-past">
<div class="date">2</div>
<div class="icon-container"...</div>
<div class="temp">
<div class="high">83</div>
<div class="low">83</div>
</div>
</a>
I want to get the date, high and low.
You need an User-Agent header. I would also extract the json string from one of the script tags (I use regex for this) and use that as source. I add in a date comparison to work out if it is a forecast or actual value. I read the json string into json object using json library and loop the resultant collection storing items of interest in an array for faster writing out to sheet at end.
json library:
I use jsonconverter.bas. Download raw code from here and add to standard module called jsonConverter . You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime.
Option Explicit
Public Sub GetWeatherListings()
Dim s As String, re As Object, ws As Worksheet
Set re = CreateObject("vbscript.regexp")
Set ws = ThisWorkbook.Worksheets("Sheet1")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.accuweather.com/en/us/chicago/60608/september-weather/348308", False
' .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responsetext
End With
Dim results(), r As Long, jsonSource As String, json As Object, item As Object
jsonSource = GetString(re, s, "dailyForecast = (.*?\])")
If jsonSource = "No match" Then Exit Sub
Set json = JsonConverter.ParseJson(jsonSource)
ReDim results(1 To json.count, 1 To 4) 'date, datetime, day > dActual, night > dActual
Dim dateTime() As String, datePart As String, forecast As Boolean
For Each item In json
r = r + 1
dateTime = Split(item("dateTime"), "T")
datePart = dateTime(LBound(dateTime))
forecast = CDate(datePart) >= Date
results(r, 1) = datePart
results(r, 2) = item("dateTime")
results(r, 3) = IIf(forecast, item("day")("dTemp"), item("day")("dActual"))
results(r, 4) = IIf(forecast, item("night")("dTemp"), item("night")("dActual"))
Next
Dim headers()
headers = Array("Date", "DateTime", "Day temp", "Night temp")
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetString(ByVal re As Object, ByVal inputString As String, ByVal pattern As String) As String
Dim matches As Object
With re
.Global = True
.MultiLine = True
.IgnoreCase = True
.pattern = pattern
If .Test(inputString) Then
Set matches = .Execute(inputString)
GetString = matches(0).SubMatches(0)
Exit Function
End If
End With
GetString = "No match"
End Function
Sample of end of output:

Import Data using JSON

I have developed a code to scrape data from a websites but since I know very little about JSON I could be able to get the output as I want, the code was developed for this web: https://ngodarpan.gov.in/index.php/home/statewise_ngo/76/35/1 now I am replicating my code for other websites having json like this web :https://www.yelp.com/search?cflt=hvac&find_loc=San+Francisco%2C+CA ; but this code is not functioning properly. Here is my code(I want it to be generic for most webs)
Option Explicit
Public Sub FetchTabularInfo()
Dim Http As XMLHTTP60, Html As HTMLDocument, col As Variant, csrf As Variant, i&, page As Long
Dim headers(), ws As Worksheet, iCol As Collection
Set ws = ThisWorkbook.Worksheets("Sheet1")
headers = Array("SrNo", "Name", "Address", "Mobile", "Email")
Set Http = New XMLHTTP60
Set Html = New HTMLDocument
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For page = 1 To 8 'To cover all pages
With Http
.Open "GET", "https://www.yelp.com/search?cflt=hvac&find_loc=San+Francisco%2C+CA" & CStr(page), Falsev 'Last letter of URL is page number whose range will be given in outerloop
.send
Html.body.innerHTML = .responseText
End With
Set iCol = New Collection
With Html.querySelectorAll(".table tr a[onclick^='show_ngo_info']")
For i = 0 To .Length - 1
iCol.Add Split(Split(.Item(i).getAttribute("onclick"), "(""")(1), """)")(0)
Next i
End With
Dim r As Long, results()
ReDim results(1 To iCol.Count, 1 To UBound(headers) + 1)
r = 0
For Each col In iCol
r = r + 1
With Http
.Open "GET", "https://www.yelp.com/index.php/ajaxcontroller/get_csrf", False
.send
csrf = .responseText
End With
csrf = Split(Replace(Split(csrf, ":")(1), """", ""), "}")(0)
Dim json As Object
With Http
.Open "POST", "https://www.yelp.com/index.php/ajaxcontroller/show_ngo_info", False
.setRequestHeader "X-Requested-With", "XMLHttpRequest"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.send "id=" & col & "&csrf_test_name=" & csrf
Set json = JsonConverter.ParseJson(.responseText)
Dim orgName As String, address As String, srNo As Long, city As String
Dim state As String, tel As String, mobile As String, website As String, email As String
On Error Resume Next
orgName = json("registeration_info")(1)("nr_orgName")
address = json("registeration_info")(1)("nr_add")
srNo = r '<unsure where this is coming from.
mobile = json("infor")("0")("Mobile")
email = json("infor")("0")("Email")
On Error GoTo 0
Dim arr()
arr = Array(srNo, orgName, address, tel, email)
For i = LBound(headers) To UBound(headers)
results(r, i + 1) = arr(i)
Next
End With
Next col
Set iCol = Nothing: Set json = Nothing
ws.Cells(GetLastRow(ws) + 1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
Next
End Sub
Public Function GetLastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
GetLastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function
Please also let me know mistakes I am doing, so that i will take care of those in future.
Short answer:
No.
I would go so far as to say it is impossible you can write something generic for most webs . One could say the generic part is the parser itself. But you need to have some familiarity with the json structure of each endpoint to appropriately direct parsing. Json itself has defined structural syntax/components, but what you want from those will have different access paths and require different handling to do so. Then there are the arguments that may need to be supplied and differences in output format.
What is the best scenario?
If you have a set list of urls (ideally API endpoints) you have a better chance of writing something that might last for a while as you can familiarise yourself with the json returned. But how generic is this? It's really just branched code.
Re-usable stuff:
Might be the non parser stuff which can be generalised e.g. methods and classes you create that parse out the paths for the entire structure and look for key words and return you those paths? Helper functions you write that might recursively write out nested structures etc. Code that makes the request and handles fails etc.... I would definitely recommend looking into classes for re-usable code in web-scraping.
Class based examples:
I will add to this as and when
https://stackoverflow.com/a/52301153/6241235
https://codereview.stackexchange.com/questions/69009/vba-clickbot-featuring-ajax-waiting-and-element-searching

Scraping data from website to Excel using a macro...lost

I am totally new to this but here is my scope.
I am running a macro to pull data from a business system.
After this info is pulled, I want a macro to take certain fields, put them into a website form, click submit and then scrape and paste certain data results back into excel.
Everything works minus the scraping and pasting back into excel.
Help please!
I have searched all over stack overflow and watched vids to try and figure out what I need to do but I must be misunderstanding something.
Sub Track()
Range("B2").Select
'This should call to PT and deliver tracking info
Dim IE As Object
Dim tbl As Object, td As Object
Set IE = CreateObject("InternetExplorer.Application") 'Set IEapp =
InternetExplorer
IE.Visible = True
IE.Navigate "https://www.partstown.com/track-my-order"
With IEapp
Do
DoEvents
Loop Until IE.readyState = 4
'Input PO and zip
Call IE.Document.getElementById("orderNo").SetAttribute("value",
"4500969111")
'ActiveCell.Offset(0, 2).Select
Call IE.Document.getElementById("postalCode").SetAttribute("value",
"37040")
IE.Document.forms(7).Submit
Application.Wait Now + TimeValue("00:00:09")
'this is where i am stuck. I know this isnt right but tried to piece it
together
Set elemCollection = IE.Document.getelElementsByTagname("table.account-
table details _tc_table_highlighted")
For t = 0 To (elemCollection.Length - 1)
For r = 0 To (elemCollection(t).Rows.Length - 1)
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ThisWorkbook.Worksheets(1).Cells(r + 1, c + 1) =
elemCollection(t).Rows.Cells(c).innertext
Next c
Next r
Next t
End With
End Sub
Here is what I want it to pull:
Shipping column
QTY ordered
QTY shipped Product
And to display in a linear fashion:
Shipping, QTY ordered, QTY shipped, Product
Internet Explorer:
I have made this a little more verbose than usual so you can see each step.
Key things:
1) proper page loads waits with While .Busy Or .readyState < 4: DoEvents: Wend
2) selecting elements by id where possible. The # is a css id selector. css selectors are applied by querySelector method of .document and retrieve the first element in the page which matches the specified pattern
3) a timed loop is needed to wait for results to be present
4) the order qty etc info is a newline divided string. It seemed easiest to split on these newlines and then access individual items from the resultant array by index
5) I order, per your specification, the results in an array and write that array out in one go to the sheet
6) The "." is a class selector in .order-history__item-descript--min i.e. return the first element with class of order-history__item-descript--min
7) The [x=y] is an attribute = value selector in [data-label=Shipping] i.e. return the first element with data-label attribute having value Shipping
8) The combination of .details-table a is using a descendant combinator, " ", to specify I want a tag elements that have a parent with class .details-table
VBA:
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub RetrieveInfo()
Dim ie As InternetExplorer, ele As Object, t As Date
Const MAX_WAIT_SEC As Long = 5
Set ie = New InternetExplorer
With ie
.Visible = True
.Navigate2 "https://www.partstown.com/track-my-order"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("#orderNo").Value = "4500969111"
.querySelector("#postalCode").Value = "37040"
.querySelector("#orderLookUpForm").submit
End With
While .Busy Or .readyState < 4: DoEvents: Wend
Dim shipping As String, order As String, items() As String
With .document
t = Timer
Do
On Error Resume Next
Set ele = .querySelector("[data-label=Shipping]")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While ele Is Nothing
If ele Is Nothing Then Exit Sub
shipping = ele.innerText
order = .querySelector(".order-history__item-descript--min").innerText
items = Split(order, vbNewLine)
Dim qtyOrdered As Long, qtyShipped As String, product As String
qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
product = .querySelector(".details-table a").Title
Dim results()
results = Array(shipping, qtyOrdered, qtyShipped, product)
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).Resize(1, UBound(results) + 1) = results
End With
.Quit
End With
End Sub
If new to HTML please look at:
https://developer.mozilla.org/en-US/docs/Web/HTML
If new to css selectors please look at:
https://flukeout.github.io/
XMLHTTP:
The whole thing can also be done with XHR. This is much faster than opening a browser.
XHR:
Use XMLHttpRequest (XHR) objects to interact with servers. You can
retrieve data from a URL without having to do a full page [render]
In this case I do an initial GET request to the landing page to retrieve the
CSRFToken to use in my re-enactment of the POST request the page makes to the server when you manually input data and press submit. You get the data you want in the server response. I pass a query string in the body of the POST send line
.send "orderNo=4500969111&postalCode=37040&CSRFToken=" & csrft ; you can see your parameters there.
Option Explicit
Public Sub GetInfo()
Dim html As HTMLDocument, csrft As String '< VBE > Tools > References > Microsoft HTML Object Library
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.partstown.com", False
.send
html.body.innerHTML = .responseText
csrft = html.querySelector("[name=CSRFToken]").Value
.Open "POST", "https://www.partstown.com/track-my-order", False
.setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
.setRequestHeader "Accept-Encoding", "gzip, deflate"
.setRequestHeader "Accept-Language", "en-US,en;q=0.9"
.send "orderNo=4500969111&postalCode=37040&CSRFToken=" & csrft
html.body.innerHTML = .responseText
End With
Dim shipping As String, order As String, items() As String
shipping = html.querySelector("[data-label=Shipping]").innerText
order = html.querySelector(".order-history__item-descript--min").innerText
items = Split(order, vbNewLine)
Dim qtyOrdered As Long, qtyShipped As String, product As String
qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
product = html.querySelector(".details-table a").Title
Dim results()
results = Array(shipping, qtyOrdered, qtyShipped, product)
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).Resize(1, UBound(results) + 1) = results
End Sub
Example of loop:
Option Explicit
Public Sub GetInfo()
Dim html As HTMLDocument, csrft As String, lastRow As Long, sourceValues() '< VBE > Tools > References > Microsoft HTML Object Library
Set html = New HTMLDocument
Dim ws As Worksheet, i As Long
Set ws = ThisWorkbook.Worksheets("Sheet4")
lastRow = ws.Cells(ws.rows.Count, "B").End(xlUp).Row
sourceValues = ws.Range("B2:D" & lastRow).Value
Dim results()
ReDim results(1 To UBound(sourceValues, 1), 1 To 4)
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.partstown.com", False
.send
html.body.innerHTML = .responseText
csrft = html.querySelector("[name=CSRFToken]").Value
Stop
For i = LBound(sourceValues, 1) To UBound(sourceValues, 1)
If sourceValues(i, 1) <> vbNullString And sourceValues(i, 3) <> vbNullString Then
DoEvents
.Open "POST", "https://www.partstown.com/track-my-order", False
.setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
.setRequestHeader "Accept-Encoding", "gzip, deflate"
.setRequestHeader "Accept-Language", "en-US,en;q=0.9"
.send "orderNo=" & sourceValues(i, 1) & "&postalCode=" & sourceValues(i, 3) & "&CSRFToken=" & csrft
html.body.innerHTML = .responseText
Dim shipping As String, order As String, items() As String
shipping = html.querySelector("[data-label=Shipping]").innerText
order = html.querySelector(".order-history__item-descript--min").innerText
items = Split(order, vbNewLine)
Dim qtyOrdered As Long, qtyShipped As String, product As String
qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
product = html.querySelector(".details-table a").Title
results(i, 1) = shipping
results(i, 2) = qtyOrdered
results(i, 3) = qtyShipped
results(i, 4) = product
End If
'Application.Wait Now + TimeSerial(0, 0, 1)
Next
End With
'results written out from row 2 column E
ws.Cells(2, 5).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub