Get data from HTML Table to Excel with VBA - html

I am trying to get historical data from Nasdaq directly into excel. I have managed to call up the website, change the "FromDate" input and hit the search button, but I can't scrape the table "historical Output" in a nice format to Excel - please help.
MY VBA CODE
Sub OMX_data()
Dim URL As String
Dim ie As InternetExplorer
Set ie = New InternetExplorer
ie.Visible = True
ie.navigate ("http://www.nasdaqomxnordic.com/indexes/historical_prices?Instrument=DK0060368991")
Do
DoEvents
Loop Until ie.readyState = 4
ie.document.all("FromDate").Value = "2018-01-01"
Set search_button = ie.document.getElementsByClassName("doSearch")
search_button(0).Click
End Sub
MY HTML
<div id="historicalOutput"><table id="historicalTable" class="tablesorter tablesorter-default" border="0" cellpadding="0" cellspacing="0" role="grid">

Adding on to your code supplied, something like so
......search_button(0).Click
Dim d As MSHTML.HTMLDocument
Set d = ie.document
Dim e As MSHTML.HTMLTable
Set e = d.getElementById("historicalTable")
You can then look at the intellisense for this object e and get properties like
e.rows(100).innertext
e.rows.length
Have a look and build a loop out of it
I also changed Loop Until ie.readyState = 4 And Not ie.Busy

Try the script below to get the tabular content from that webpage using the date you wanna start from.
Sub FetchTable()
Const postUrl$ = "http://www.nasdaqomxnordic.com/webproxy/DataFeedProxy.aspx"
Dim Http As New XMLHTTP60, Html As New HTMLDocument
Dim sPostData$, dateFrom$, dateupto$, C&, R&
dateFrom = "2018-01-01"
dateupto = "2020-04-02"
sPostData = "xmlquery=%3Cpost%3E%0A%3Cparam+name%3D%22Exchange%22+value%3D%22NMF%22%2F%3E%0A%3Cparam+name%3D%22SubSystem%22+value%3D%22History%22%2F%3E%0A%3Cparam+name%3D%22Action%22+value%3D%22GetDataSeries%22%2F%3E%0A%3Cparam+name%3D%22AppendIntraDay%22+value%3D%22no%22%2F%3E%0A%3Cparam+name%3D%22Instrument%22+value%3D%22DK0060368991%22%2F%3E%0A%3Cparam+name%3D%22FromDate%22+value%3D%22" & dateFrom & "%22%2F%3E%0A%3Cparam+name%3D%22ToDate%22+value%3D%22" & dateupto & "%22%2F%3E%0A%3Cparam+name%3D%22hi__a%22+value%3D%220%2C1%2C2%2C4%2C21%2C8%2C10%2C11%2C12%22%2F%3E%0A%3Cparam+name%3D%22ext_xslt%22+value%3D%22%2FnordicV3%2Fhi_table.xsl%22%2F%3E%0A%3Cparam+name%3D%22ext_xslt_lang%22+value%3D%22en%22%2F%3E%0A%3Cparam+name%3D%22ext_xslt_hiddenattrs%22+value%3D%22%2Cip%2Civ%2C%22%2F%3E%0A%3Cparam+name%3D%22ext_xslt_tableId%22+value%3D%22historicalTable%22%2F%3E%0A%3Cparam+name%3D%22app%22+value%3D%22%2Findexes%2Fhistorical_prices%22%2F%3E%0A%3C%2Fpost%3E"
With Http
.Open "POST", postUrl, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send sPostData
Html.body.innerHTML = .responseText
End With
For Each elem In Html.getElementById("historicalTable").Rows
For Each trow In elem.Cells
C = C + 1: Cells(R + 1, C) = trow.innerText
Next trow
C = 0: R = R + 1
Next elem
End Sub
Make sure to add the following references before executing the above script:
Microsoft XML, v6.0
Microsoft HTML Object Library

Related

VBA automate Edge Browser without downloading any external things

I have the below VBA codes to automate IE, and then extract the figures of the HTML table and populate the data to Excel table. Is it possible to do the same thing by automate Edge Browser? Since my company don't allow us to install any 3rd party application, Selenium is not an option. As I am not too familarize with coding, highly apprecipate if someone can offer some sample codes
Sub sfc_esg_list()
Dim IE As New InternetExplorer
Dim doc As New MSHTML.HTMLDocument
IE.Visible =*emphasized text* True
'use IE browser to navigate SFC website
IE.navigate "https://www.sfc.hk/en/Regulatory-functions/Products/List-of-ESG-funds"
Do
DoEvents
'Application.Wait (Now() + TimeValue("00:00:04"))
Loop Until IE.readyState = 4
Set doc = IE.Document
Set TRs = doc.getElementsByTagName("tr")
Sheets("ESG list_SFC").Activate
'copy and paste the ESG fund list from SFC website to sheets<ESG list_SFC>
With Sheets("ESG list_SFC")
.Cells.Clear
For Each TR In TRs
r = r + 1
For Each Cell In TR.Children
C = C + 1
.Cells(r, C).NumberFormat = "#"
.Cells(r, C) = Cell.innerText
Next Cell
C = 0
Next TR
End With
IE.Quit
Set doc = Nothing
Set IE = Nothing
'Save the file
Application.ScreenUpdating = False
Application.DisplayAlerts = False
'ActiveWorkbook.Save
End Sub
IE is pretty much dead at this point. I think it should be something like this.
Sub TryMe()
Dim xmlhttp As Object
Set xmlhttp = CreateObject("MSXML2.serverXMLHTTP")
Set request = CreateObject("MSXML2.XMLHTTP")
Dim oHtml As HTMLDocument
Dim oElement As Object
Dim htmlText As String
Set oHtml = New HTMLDocument
request.Open "GET", "https://www.sfc.hk/en/Regulatory-functions/Products/List-of-ESG-funds/", False
request.send
oHtml.body.innerHTML = request.responseText
htmlText = oHtml.getElementsByClassName("tablesorter tablesorter-default tablesorterfcd4c178102ad8")(0).outerhtml
With CreateObject("new:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}") 'Clipboard
.SetText htmlText
.PutInClipboard
Sheets(1).Range("A1").Select
Sheets(1).PasteSpecial Format:="Unicode Text"
End With
End Sub
I thought the class name was 'tablesorter tablesorter-default tablesorterfcd4c178102ad8' but it doesn't seem to work, and I'm not sure why. Can you play around with some other class names? When you hit F12, you will see the HTML code behind the page.

Question about extracting text from a specific website and printing it in excel using VBA

The webpage is "https://www.racingandsports.com/thoroughbred/jockey/jake-bayliss/27461". Below is the VBA code and questions.
Edit1: For QHarr. the way its set up it takes the data from the first 3 columns of the table. The xmlhttp is just there because I copy pasted it from a previous VBA I was working on.
Edit2: Thank you for the advice, Ron.
Edit3: #QHarr. yes I would like to be able to grab the first 3 columns from all the tables.
Sub Horse2()
Dim IE As InternetExplorer
Application.ScreenUpdating = False
Set IE = New InternetExplorer
IE.Visible = True
Set ws = ThisWorkbook.Worksheets("Sheet1")
Dim http As New XMLHTTP60, html As New HTMLDocument
Dim nodeRaceResultsTable As HTMLHtmlElement
Dim nodeTr As HTMLHtmlElement
Dim nodeDiv As HTMLHtmlElement
Dim Element1 As HTMLHtmlElement
Dim node1 As HTMLHtmlElement
Dim currentUrl As String
With IE
IE.Visible = True
IE.Navigate "https://www.racingandsports.com/thoroughbred/jockey/jake-bayliss/27461"
Do Until .readyState = 4: DoEvents: Loop
End With
With http
.Open "GET", "https://www.racingandsports.com/thoroughbred/jockey/jake-bayliss/27461", False
.send
html.body.innerHTML = .responseText
End With
The issue I have is there are multiple instances of the same class name so it will print the first table on the page. The class name here, along with the inner text, is what enables me to get text from the tables from the webpage. However I would like to be able to extract the first 3 data points from all tables on the webpage and have it printed onto excel.
For Each nodeRaceResultsTable In html.getElementsByClassName("col-md-12 table-responsive")
r = r + 1: c = 4
For Each nodeTr In nodeRaceResultsTable.getElementsByTagName("tr")
With nodeTr.getElementsByTagName("td")
If .Length Then
ws.Cells(r + 1, c + 3) = .Item(0).innerText
ws.Cells(r + 1, c + 4) = .Item(1).innerText
ws.Cells(r + 1, c + 5) = .Item(2).innerText
r = r + 1
End If
End With
Next
Next
IE.Quit
Set IE = Nothing
Application.StatusBar = ""
Application.ScreenUpdating = True
MsgBox "Input complete."
End Sub>

Grabbing a single piece of data from a website's HTML and assign it to a variable

I'm working on a project where I grab data that I stored in an excel sheet and search a specific website that can be seen in the code below. Once the website completes the search, I want to grab the "worth" from the top right of the page. I'm fairly new to using VBA with HTML, so I'm not sure how to take the element (worth) that I'm looking for from the web page, and assign it to a variable in VBA so I can paste it into my excel sheet.
Right now I'm able to open IE, insert my data into the search bar of the specific website that I'm using, and click search. What I have is seen below. Thank you in advance!
Sub BrowsetoSite()
Dim IE As New SHDocVw.InternetExplorer
Dim website As String
Dim i As Integer
i = 2
'Set ie = New SHDocVw.InternetExplorer
website = "https://cardmavin.com/category/football"
IE.navigate website
IE.Visible = False
Do While IE.readyState <> READYSTATE_COMPLETE
'assign info to variable to enter into the search bar
Loop
Dim idoc As MSHTML.HTMLDocument
Set idoc = IE.document
Dim Brand As String
Dim Year As String
Dim Num As String
Dim Name As String
Dim search As String
Dim value As Variant
Brand = Range("A" & i).value
Year = Range("B" & i).value
Num = Range("D" & i).value
Name = Range("E" & i).value
search = (Year & " " & Brand & " " & Name & " " & Num)
i=i+1
idoc.getElementById("search-field").value = search
idoc.getElementById("to-mavin").Click
While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
Dim value As Variant
value = idoc.getElementsByTagName("h4")(0).innerText
MsgBox value
IE.Quit
End Sub
The issue that I'm having is the value = idoc.getElementsByTagName("h4")(0).innerText. I've tried to get the element a few different ways, but have been unsuccessful so far.
You need Set idoc = IE.document after you've submitted the search, to get a reference to that new page. Otherwise you're still trying to access the previous page.
i=i+1
idoc.getElementById("search-field").value = search
idoc.getElementById("to-mavin").Click
While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
Set idoc = IE.document '<<<<<<<<<<<<<<
Dim value As Variant
value = idoc.getElementById("worthBox") _
.getElementsByTagName("h4")(0).innerText
MsgBox value
Try this approach. Suppose in cell A1 the string 2008 Topps Thomas DeCoud
Sub Test()
Const sURL As String = "https://mavin.io/search?q="
Dim json As Object
Set json = GetJSONFromHTMLHead(sURL & Application.WorksheetFunction.EncodeURL(Range("A1").Value))
Debug.Print json("offers")("priceCurrency")
Debug.Print json("offers")("price")
End Sub
Function GetJSONFromHTMLHead(ByVal sURL As String) As Object
Dim http As MSXML2.XMLHTTP60, html As MSHTML.HTMLDocument, re As Object, json As Object
Set http = New MSXML2.XMLHTTP60
Set html = New MSHTML.HTMLDocument
Set re = CreateObject("VBScript.RegExp")
re.Pattern = "<head>([\s\S]+)<\/head>"
With http
.OPEN "Get", sURL, False
.send
html.body.innerHTML = Replace$(Replace$(re.Execute(.responseText)(0), "<head>", "<body>"), "</head>", "</body>")
End With
Set json = JSONConverter.ParseJson(html.querySelector("script[type='application/ld+json']").innerHTML)
Set GetJSONFromHTMLHead = json
End Function

VBA reads HTML from the old page after clicking submit button

I am not a programmer but I have managed to learn just a few things in VBA but now on a certain website I face a problem that does not exist on some other.
What should happen is that a page form should be completed with data, submit button clicked and then I want to get some data from the result page.
The first phase works fine but it seems that no matter what I do the VBA still reads data from the page before submit was clicked.
The code is:
Sub VIES2()
'Uruchomienie Internet Explorera i wstrzymanie dalszej akcji aż uzyska stan gotowości
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.Navigate "http://ec.europa.eu/taxation_customs/vies/?locale=pl"
Do While IE.ReadyState <> 4: DoEvents: Loop
'Wypełnienie formularza odpowiednimi wartościami i kliknięcie przycisku sprawdzenia
IE.document.getElementbyId("countryCombobox").Value = "IT"
IE.document.getElementbyId("number").Value = "01802840023"
IE.document.getElementbyId("requesterCountryCombobox").Value = "IT"
IE.document.getElementbyId("requesterNumber").Value = "01802840023"
IE.document.getElementbyId("submit").Click
'Test uzyskiwania opisu i identyfikatora zapytania
For t = 1 To 999999
Next t
Application.Wait Now + TimeValue("00:00:10")
Do While IE.ReadyState <> 4: DoEvents: Loop
For t = 1 To 999999
Next t
Application.Wait Now + TimeValue("00:00:10")
MsgBox IE.LocationURL
Set Text = IE.document.getElementsbyClassName("layout-content")
For Each Element In Text
MsgBox Element.innerText
Next
Set Test = IE.document.getElementsbyTagName("TABLE")
For Each Element In Test
MsgBox Element.innerText
Next
End Sub
I have tried putting break, various wait loops and Application.Wait as suggested in similar questions where it seems to have worked. Here, even after the page is long after fully loaded the code still reads the old page - at least pulling the URL and some data seems to point that it is the case.
UPDATE: I should also add that I have tried to make the macro refresh the page but it clears the input content. What is interesting that target URL is:
http://ec.europa.eu/taxation_customs/vies/vatResponse.html
If I change the initial page to this the browser instantly redirects to the original page with notification that initial data is needed. The macro then completes the data and clicks submit button. In this case IE.LocationURL indicates this URL:
http://ec.europa.eu/taxation_customs/vies/vatResponse.html
but according to the content I get with getElementsbyClassName still reads elements from the initial page:
http://ec.europa.eu/taxation_customs/vies/?locale=pl
This worked to print out the VAT response table
Note:
If on 32-bit remove the PtrSafe.
Code:
Option Explicit
Declare PtrSafe Sub sleep Lib "kernel32" Alias "Sleep" (ByVal dwmilliseconds As Long)
Public Sub VIES2()
Application.ScreenUpdating = False
Dim IE As Object
'Uruchomienie Internet Explorera i wstrzymanie dalszej akcji az uzyska stan gotowosci
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.navigate "http://ec.europa.eu/taxation_customs/vies/?locale=pl"
Do While IE.ReadyState <> 4: DoEvents: Loop
'Wypelnienie formularza odpowiednimi wartosciami i klikniecie przycisku sprawdzenia
IE.document.getElementById("countryCombobox").Value = "IT"
IE.document.getElementById("number").Value = "01802840023"
IE.document.getElementById("requesterCountryCombobox").Value = "IT"
IE.document.getElementById("requesterNumber").Value = "01802840023"
IE.document.getElementById("submit").Click
sleep (5000) 'or increase to 10000
Dim tbl As Object
Set tbl = IE.document.getElementById("vatResponseFormTable")
Dim ws As Worksheet
Set ws = ActiveWorkbook.Worksheets.Add
ws.Name = "Results"
Dim rng As Range, currentRow As Object, currentColumn As Object, i As Long, outputRow As Long
outputRow = outputRow + 1
Set rng = ws.Range("B" & outputRow)
For Each currentRow In tbl.Rows
For Each currentColumn In currentRow.Cells
rng.Value = currentColumn.outerText
Set rng = rng.Offset(, 1)
i = i + 1
Next currentColumn
outputRow = outputRow + 1
Set rng = rng.Offset(1, -i)
i = 0
Next currentRow
Application.ScreenUpdating = True
End Sub
Output:
Although QHarr's solution is working in my end, I'm providing with another with no hardcoded delay within the script.
Using IE as your question was:
Sub Get_Data()
Dim HTML As HTMLDocument, post As Object, elems As Object
Dim elem As Object, r&, c&
With New InternetExplorer
.Visible = False
.navigate "http://ec.europa.eu/taxation_customs/vies/?locale=pl"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set HTML = .document
With HTML
.getElementById("countryCombobox").Value = "IT"
.getElementById("number").Value = "01802840023"
.getElementById("requesterCountryCombobox").Value = "IT"
.getElementById("requesterNumber").Value = "01802840023"
.getElementById("submit").Click
Do: Set post = .getElementById("vatResponseFormTable"): DoEvents: Loop While post Is Nothing
For Each elems In post.Rows
For Each elem In elems.Cells
c = c + 1: Cells(r + 1, c) = elem.innerText
Next elem
c = 0: r = r + 1
Next elems
End With
.Quit
End With
End Sub
Reference to add to the library:
1. Microsoft Internet Controls
2. Microsoft HTML Object Library
Using xmlhttp request (It is way faster than IE):
Sub Get_Data()
Dim elems, elem As Object
Dim QueryString$, S$, r&, c&
QueryString = "memberStateCode=IT&number=01802840023&traderName=&traderStreet=&traderPostalCode=&traderCity=&requesterMemberStateCode=IT&requesterNumber=01802840023&action=check&check=Weryfikuj"
With New XMLHTTP
.Open "POST", "http://ec.europa.eu/taxation_customs/vies/vatResponse.html", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.send QueryString
S = .responseText
End With
With New HTMLDocument
.body.innerHTML = S
For Each elems In .getElementById("vatResponseFormTable").Rows
For Each elem In elems.Cells
c = c + 1: Cells(r + 1, c) = elem.innerText
Next elem
c = 0: r = r + 1
Next elems
End With
End Sub
Reference to add to the library:
1. Microsoft XML, V6
2. Microsoft HTML Object Library
Most of the time you should search if there isn't a REST/SOAP available to achieve that kind of task.
Using an Internet Explorer instance for this is a total overkill.
Try this simple function, that uses the SOAP service to validate VAT numbers:
Function IsVatValid(country_code, vat_number)
Dim objHTTP As Object
Dim xmlDoc As Object
Set objHTTP = CreateObject("WinHttp.WinHttpRequest.5.1")
sURL = "http://ec.europa.eu/taxation_customs/vies/services/checkVatService"
sEnv = "<s11:Envelope xmlns:s11='http://schemas.xmlsoap.org/soap/envelope/'>" & _
"<s11:Body>" & _
"<tns1:checkVat xmlns:tns1='urn:ec.europa.eu:taxud:vies:services:checkVat:types'>" & _
"<tns1:countryCode>" & country_code & "</tns1:countryCode>" & _
"<tns1:vatNumber>" & vat_number & "</tns1:vatNumber>" & _
"</tns1:checkVat>" & _
"</s11:Body>" & _
"</s11:Envelope>"
objHTTP.Open "Post", sURL, False
objHTTP.setRequestHeader "Content-Type", "text/xml"
objHTTP.setRequestHeader "SOAPAction", "checkVatService"
objHTTP.send (sEnv)
objHTTP.waitForResponse
Set xmlDoc = CreateObject("HTMLFile")
xmlDoc.body.innerHTML = objHTTP.responsetext
IsVatValid = CBool(xmlDoc.getElementsByTagName("valid")(0).innerHTML)
Set xmlDoc = Nothing
Set objHTTP = Nothing
End Function
And then you can simply validate all your vat numbers:
Debug.Print IsVatValid("IT", "01802840023")
>>> True

VBA Excel pulling new webpage data after clicking on "submit"

I'm trying to pull some info from a website that provides oil well data by API number (API is a unique number for every well in the US)
Website: http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellapi1
API example: 1708300502
The issue is, when I get to the 2nd page, IE.document.getElementsByTagName("body")(0).innerText still returns data from the initial page. How do I fetch the updated page data?
The ultimate goal is to get to the 2nd page, click on "30570" via IE.document.getElementsByTagName("a")(0).Click and then read the final 3rd page. I just cannot figure out how to read the updated page :(
Option Explicit
Sub sonris_WellData()
Dim IE As InternetExplorer
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
Dim i As Integer
'Open SONRIS website
Application.StatusBar = "Opening Website"
IE.navigate "http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellapi1"
Do While IE.readyState <> 4: DoEvents: Loop
Application.Wait Now() + TimeValue("00:00:01")
Application.StatusBar = False
IE.document.forms(0).p_apinum.Value = "1708300502" 'plug-in API
IE.document.forms(0).submit
' Wait until the next page opens
Application.StatusBar = "Opening Website"
Do While IE.readyState <> 4: DoEvents: Loop
Application.Wait Now() + TimeValue("00:00:01")
Application.StatusBar = False
' Read the page - this is where the issue occurs, MsgBox keeps returning text from the very 1st page
MsgBox IE.document.getElementsByTagName("body")(0).innerText
IE.Quit
End Sub
This seems to be working. Rather than DoEvents use the WinAPI Sleep function. I also added a call to the Sleep function after the form submit.
MOre often we are seeing sites that are dynamically served by some javascript/etc., in these cases the browser may appear to be READYSTATE_COMPLETE or not Busy but the page has not yet rendered the "new" results.
Option Explicit
Public Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
Sub sonris_WellData()
Dim IE As Object 'InternetExplorer
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
Dim i As Integer
'Open SONRIS website
Application.StatusBar = "Opening Website"
IE.navigate "http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellapi1"
Do While IE.readyState <> 4
Sleep 1000
Loop
Application.StatusBar = False
IE.document.forms(0).p_apinum.Value = "1708300502" 'plug-in API
IE.document.forms(0).submit
Sleep 1000
' Wait until the next page opens
Application.StatusBar = "Opening Website"
Do While IE.readyState <> 4
Sleep 1000
Loop
Application.StatusBar = False
' Read the page - this is where the issue occurs, MsgBox keeps returning text from the very 1st page
MsgBox IE.document.getElementsByTagName("body")(0).innerText
IE.Quit
End Sub
You can experiment maybe with a slightly longer Sleep after the .submit.
Alternatively, I notice that after you submit, the URL changes, so you could also try changing the second waiting loop to:
Do While IE.LocationURL ="http://sonlite.dnr.state.la.us/sundown/cart_prod/cart_con_wellapi1"
Sleep 1000
Loop
This should put the Excel.Application to wait until the URL has changed.
Alternatively, you may have better luck using an XMLHTTPRequest (there are many examples of this here on SO and elsewhere on the internet). This allows you to send a request just like the browser, without actually using a web browser. Then you can simply parse the return text as HTML or XML. I would use the Microsoft XML, v6.0 library reference for this.
POST requests:
① Entering the Well API number
I examined the web page making the selections you mention. I inspected the web traffic using fiddler and noticed that the initial request, when you submit the API number is handled by a POST request.
② POST request:
The POST body has the following parameter:
p_apinum is the key and the associated value is the original Well API number.
Using this info I formulated a POST request direct thus avoiding your first landing page.
③ Pressing the hyperlink:
Next, I noticed that the element you wanted to press:
Looking at the associated HTML it has an associated relative hyperlink:
I use a helper function to parse the page HTML to get this relative link and construct the absolute path: GetNextURL(page.body.innerHTML).
④ Making a new request:
I re-use my HTTPRequest function GetPage to send a second request, with an empty body, and grab all the tables from the HTML document returned via: page.getElementsByTagName("table").
⑤ Writing the tables to the Excel worksheet:
I loop all the tables on the page using helper function AddHeaders to write out the table headers, and WriteTables to write the current table to the sheet.
Example page content:
Example code output:
VBA:
Option Explicit
Public Sub GetWellInfo()
Dim ws As Worksheet, page As HTMLDocument, targetTable As HTMLTable, apiNumbers(), currNumber As Long
Const PARAM1 As String = "p_apinum"
Const BASESTRING As String = "http://sonlite.dnr.state.la.us/sundown/cart_prod/"
apiNumbers = Array(1708300502, 1708300503)
Application.ScreenUpdating = False
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ws
.Cells.ClearContents
For currNumber = LBound(apiNumbers) To UBound(apiNumbers)
Set page = GetPage(BASESTRING & "cart_con_wellapi2", apiNumbers(currNumber), PARAM1)
Set page = GetPage(BASESTRING & GetNextURL(page.body.innerHTML))
Dim allTables As Object
Set allTables = page.getElementsByTagName("table")
For Each targetTable In allTables
AddHeaders targetTable, GetLastRow(ws, 1) + 2, ws
WriteTables targetTable, GetLastRow(ws, 1), ws
Next targetTable
Next currNumber
End With
Application.ScreenUpdating = True
End Sub
Public Function GetPage(ByVal url As String, Optional ByVal apiNumber As Long, Optional ByVal paramN As String = vbNullString) As HTMLDocument
Dim objHTTP As Object, html As New HTMLDocument
Set objHTTP = CreateObject("WinHttp.WinHttpRequest.5.1")
Dim sBody As String
If Not paramN = vbNullString Then sBody = paramN & "=" & apiNumber
With objHTTP
.SetTimeouts 10000, 10000, 10000, 10000
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
On Error Resume Next
.send (sBody)
If Err.Number = 0 Then
If .Status = "200" Then
html.body.innerHTML = .responseText
Set GetPage = html
Else
Debug.Print "HTTP " & .Status & " " & .statusText
Exit Function
End If
Else
Debug.Print "Error " & Err.Number & " " & Err.Source & " " & Err.Description
Exit Function
End If
On Error GoTo 0
End With
End Function
Public Function GetNextURL(ByVal inputString As String)
GetNextURL = Replace$(Replace$(Split(Split(inputString, "href=")(1), ">")(0), Chr$(34), vbNullString), "about:", vbNullString)
End Function
Public Sub AddHeaders(ByVal hTable As Object, ByVal startRow As Long, ByVal ws As Worksheet)
Dim headers As Object, header As Object, columnCounter As Long
Set headers = hTable.getElementsByTagName("th")
For Each header In headers
columnCounter = columnCounter + 1
ws.Cells(startRow, columnCounter) = header.innerText
Next header
End Sub
Public Sub WriteTables(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByRef ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
r = startRow
With ActiveSheet
Set tRow = hTable.getElementsByTagName("tr")
For Each tr In tRow
Set tCell = tr.getElementsByTagName("td")
For Each td In tCell
.Cells(r, c).Value = td.innerText
c = c + 1
Next td
r = r + 1: c = 1
Next tr
End With
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With
End Function
References:
VBE > Tools > References > HTML Object Library.