Web Scraping: reformat infomation from an HTML table - html

I am trying to extract the Futures data from MRCI.com and restructure it into one continous table in an excel worksheet so I can manipulate from there.
How can I repeat the Futures Contract in each row to get the following table layout:
Table Structure
Here's my code so far:
Sub MRCIData()
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim FutTable As MSHTML.IHTMLElement
Dim FutRows As MSHTML.IHTMLElementCollection
Dim FutRow As MSHTML.IHTMLElement
Dim FutCells As MSHTML.IHTMLElementCollection
Dim FutCell As MSHTML.IHTMLElement
Dim FutContracts As MSHTML.IHTMLElementCollection
Dim FutContract As MSHTML.IHTMLElement
Dim FutRowText As String
Dim MrciURLHist As String
MrciURLHist = "https://www.mrci.com/ohlc/2020/200320.php"
XMLReq.Open "GET", MrciURLHist, False
XMLReq.send
If XMLReq.Status <> 200 Then
MsgBox "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Exit Sub
End If
HTMLDoc.body.innerHTML = XMLReq.responseText
Set XMLReq = Nothing
Set FutContracts = HTMLDoc.getElementsByClassName("note1")
For Each FutContract In FutContracts
Next
Set FutTable = HTMLDoc.getElementsByClassName("strat")(0)
Set FutRows = FutTable.getElementsByTagName("tr")
For Each FutRow In FutRows
Set FutCells = FutRow.getElementsByTagName("td")
FutRowText = ""
If InStr(FutRow.innerText, "Total Volume") = 0 Then
For Each FutCell In FutCells
FutRowText = FutRowText & vbTab & FutCell.innerText
Next
End If
Debug.Print , FutRowText
Next
End Sub

The following code looks through the table line by line and determines what future to apply to the next rows until it finds a the next one, and so on. the output is not pretty so more a proof of concept. The code now outputs the table correctly.
Sub Main(ByVal Sheet As Worksheet)
Dim oRequest As New MSXML2.XMLHTTP60
Dim oDocument As New MSHTML.HTMLDocument
Dim oRows As MSHTML.IHTMLElementCollection
Dim oRow As MSHTML.IHTMLElement
Dim oCells As MSHTML.IHTMLElementCollection
Dim oCell As MSHTML.IHTMLElement
oRequest.Open "GET", "https://www.mrci.com/ohlc/2020/200320.php", False
oRequest.send
If oRequest.Status <> 200 Then
MsgBox "Error"
Exit Sub
End If
oDocument.body.innerHTML = oRequest.responseText
Set oRequest = Nothing
Dim Skip As Boolean
Dim Current As String
Dim RowIndex As Integer
Dim ColumnIndex As Integer
Set oRows = oDocument.getElementsByClassName("strat")(0).getElementsByTagName("tr")
Current = ""
Application.ScreenUpdating = False
For Each oRow In oRows
Skip = False
If oRow.getElementsByTagName("th").Length > 0 Then
Current = oRow.innerText
Skip = True
End If
If Not Current = "" And Skip = False Then
If InStr(oRow.innerText, "Total Volume") = 0 Then
Set oCells = oRow.getElementsByTagName("td")
ColumnIndex = 2
Sheet.Cells(RowIndex, 1).Value = Current
For Each oCell In oCells
Sheet.Cells(RowIndex, ColumnIndex).Value = oCell.innerText
ColumnIndex = ColumnIndex + 1
Next oCell
RowIndex = RowIndex + 1
End If
End If
Next oRow
Application.ScreenUpdating = True
End Sub

Related

Webscrape VBA with if condition

I am trying to import the bullet point from a website into an excel table (each bulletpoint filling with a li tag).
Yet I am facing an important difficulty as some page I would like to scrape have several "Part" (Part #1, Part #2, like this one https://www.thewindpower.net/windfarm_en_793_virtsu-i.php) and other haven't (like this one https://www.thewindpower.net/windfarm_en_7410_khizi.php)
I having already come up with a draft of a code that I believe could start, yet, I still have some issue and I get an error message ("Time out").
Do you have any idea how I could fix it ?
Thanks in advance for your help,
Sub Page()
GetPage ("https://www.thewindpower.net/windfarm_en_1922_a-capelada-i.php")
End Sub
Sub GetPage(URL As String)
Dim count As Integer
Dim Request As MSXML2.ServerXMLHTTP60: Set Request = New MSXML2.ServerXMLHTTP60
Dim Result As HTMLDocument: Set Result = New HTMLDocument
Request.Open "GET", URL, False
Request.send
Result.body.innerHTML = Request.responseText
Dim oRows As MSHTML.IHTMLElementCollection
Dim oRow As MSHTML.IHTMLElement
Dim oCells As MSHTML.IHTMLElementCollection
Dim oCell As MSHTML.IHTMLElement
Dim oLinks As MSHTML.IHTMLElementCollection
'Set Generalities
Set oRows = Result.getElementsByTagName("ul")(4).getElementsByTagName("li")
Dim iRow As Integer 'output li counter
Dim iColumn As Integer 'output column counter
Dim Sheet As Worksheet 'output sheet
iRow = 1
iColumn = 1
Set Sheet = ThisWorkbook.Worksheets("Sheet1")
count = Result.getElementsByTagName("h3").Length
If count > 0 Then
'# f Part on the page, 2 for the moment
Dim p As Integer
Dim o As Integer
p = count / 2
'Counter for each Part identified
For o = 1 To p
'Set Generalities data
iRow = 1
iColumn = 1
For Each oRow In oRows
Set oCells = oRow.getElementsByTagName("li")
For Each oCell In oCells
Sheet.Cells(iRow, iColumn).Value = oCell.innerText
iColumn = iColumn + 1
Next oCell
iRow = iRow + 1
Next oRow
'Set Detail data
Set oRows2 = Result.getElementsByTagName("h3")(o).getElementsByTagName("li")
For Each oRow In oRows2
Set oCells = oRow.getElementsByTagName("li")
For Each oCell In oCells
Sheet.Cells(iRow, iColumn).Value = oCell.innerText
iColumn = iColumn + 1
Next oCell
iRow = iRow + 1
iColumn = 1
Next oRow
iRow = iRow + 1
'insert a row
Range("iRow").Insert CopyOrigin:=xlFormatFromRightOrBelow
'increment Part counter
Next o
Else
'Set Generalities data
For Each oRow In oRows
Set oCells = oRow.getElementsByTagName("li")
For Each oCell In oCells
Sheet.Cells(iRow, iColumn).Value = oCell.innerText
iColumn = iColumn + 1
Next oCell
iRow = iRow + 1
Next oRow
'Set Detail data
Set oRows2 = Result.getElementsByTagName("ul")(5).getElementsByTagName("li")
For Each oRow In oRows2
Set oCells = oRow.getElementsByTagName("li")
For Each oCell In oCells
Sheet.Cells(iRow, iColumn).Value = oCell.innerText
iColumn = iColumn + 1
Next oCell
iRow = iRow + 1
iColumn = 1
Next oRow
End If
End Sub
Summary
I would gather a nodeList via css selectors to match on the relevant nodes. I would have two separate nodeLists. One for the generalities and another for the parts. I would determine the number of parts (as they repeat) and loop to those number of parts; concatenating the html for the repeated part that comes later with the former. Then put that combined html into a surrogate HTMLDocument variable and make a new nodeList of all the li elements contained. Use a helper function to return the text of the nodeList nodes in an array and then write that out to the sheet on a new combined text per row basis.
VBA:
Option Explicit
Public Sub WindInfo()
'VBE> Tools > References:
'1. Microsoft, XML v6
'2. Microsoft HTML Object Library
'3. Microsoft Scripting Runtime
Dim xhr As MSXML2.XMLHTTP60: Set xhr = New MSXML2.XMLHTTP60
Dim html As MSHTML.HTMLDocument: Set html = New MSHTML.HTMLDocument
Dim ws As Worksheet: Set ws = ThisWorkbook.Worksheets("Sheet1")
With xhr
.Open "GET", "https://www.thewindpower.net/windfarm_en_7410_khizi.php", False
.send
html.body.innerHTML = .responseText
End With
Dim generalities As Object, arrGen(), partsList As Object
Dim r As Long
Set generalities = html.querySelectorAll("#bloc_texte table ~ table li")
arrGen = GetNodesTextAsArray(generalities)
Dim parts As Object, numberOfParts As Long
Set partsList = html.querySelectorAll("h1 ~ h3, ul ~ h3")
r = 1
If partsList.Length > 0 Then
numberOfParts = html.querySelectorAll("h1 ~ h3, ul ~ h3").Length / 2
Set parts = html.querySelectorAll("h3 + ul")
Dim i As Long, liNodes As Object, arr()
Dim html2 As MSHTML.HTMLDocument: Set html2 = New MSHTML.HTMLDocument
For i = 0 To numberOfParts - 1
ws.Cells(r, 1).Resize(1, UBound(arrGen)) = arrGen
html2.body.innerHTML = parts.Item(i).outerHTML & parts.Item(i + numberOfParts).outerHTML
Set liNodes = html2.querySelectorAll("li")
arr = GetNodesTextAsArray(liNodes)
ws.Cells(r, 5).Resize(1, UBound(arr)) = arr
r = r + 1
Next
Else
Dim alternateNodeList As Object: Set alternateNodeList = html.querySelectorAll("#bloc_texte h1 + ul")
If alternateNodeList.Length >= 1 Then
arr = GetNodesTextAsArray(alternateNodeList.Item(1).getElementsByTagName("li"))
Else
arr = Array("No", "Data", vbNullString)
End If
ws.Cells(r, 1).Resize(1, UBound(arrGen)) = arrGen
ws.Cells(r, 5).Resize(1, UBound(arr)) = arr
End If
End Sub
Public Function GetNodesTextAsArray(ByVal nodeList As Object) As Variant()
Dim i As Long, results()
If nodeList.Length = 0 Then
GetNodesTextAsArray = Array("No", "Data", vbNullString)
Exit Function
End If
ReDim results(1 To nodeList.Length)
For i = 0 To nodeList.Length - 1
results(i + 1) = nodeList.Item(i).innerText
Next i
GetNodesTextAsArray = results
End Function
References:
CSS selectors

Modifying the program for parsing

There is a program that parse a certain table from the site . Works great . I want to parse another table from the site . By the tag number “table” they are the same . I am trying to use the same program , but it gives an error : Run-time error 91 in the line :
If oRow.Cells(y).Children.Length > 0 Then
New table : http://allscores.ru/soccer/fstats.php?champ=2604&team=439&team2=420&tour=110
Old table : http://allscores.ru/soccer/new_ftour.php?champ=2604&f_team=439
New table : in the attached picture
Function extractTable(Ssilka As String, book1 As Workbook, iLoop As Long)
Dim oDom As Object, oTable As Object, oRow As Object
Dim iRows As Integer, iCols As Integer
Dim x As Integer, y As Integer
Dim data()
Dim vata()
Dim tata()
Dim oHttp As Object
Dim oRegEx As Object
Dim sResponse As String
Dim oRange As Range
Dim odRange As Range
' get page
Set oHttp = CreateObject("MSXML2.XMLHTTP")
oHttp.Open "GET", Ssilka, False
oHttp.Send
' cleanup response
sResponse = StrConv(oHttp.responseBody, vbUnicode)
Set oHttp = Nothing
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Set oRegEx = CreateObject("vbscript.regexp")
With oRegEx
.MultiLine = True
.Global = True
.IgnoreCase = False
.Pattern = "<(script|SCRIPT)[\w\W]+?</\1>"
sResponse = .Replace(sResponse, "")
End With
Set oRegEx = Nothing
' create Document from response
Set oDom = CreateObject("htmlFile")
oDom.Write sResponse
DoEvents
' table with results, indexes starts with zero
Set oTable = oDom.getelementsbytagname("table")(3)
DoEvents
iRows = oTable.Rows.Length
iCols = oTable.Rows(1).Cells.Length
' first row and first column contain no intresting data
ReDim data(1 To iRows - 1, 1 To iCols - 1)
ReDim vata(1 To iRows - 1, 1 To iCols - 1)
ReDim tata(1 To iRows - 1, 1 To iCols - 1)
' fill in data array
For x = 1 To iRows - 1
Set oRow = oTable.Rows(x)
For y = 1 To iCols - 1
If oRow.Cells(y).Children.Length > 0 Then
data(x, y) = oRow.Cells(y).getelementsbytagname("a")(0).getattribute("href")
data(x, y) = Replace(data(x, y), "about:", "http://allscores.ru/soccer/")
vata(x, y) = oRow.Cells(y).innerText
End If
Next y
Next x
Set oRow = Nothing
Set oTable = Nothing
Set oDom = Nothing
Set oRange = book1.ActiveSheet.Cells(110, 26 + (iLoop * 21)).Resize(iRows - 1, iCols - 1)
oRange.NumberFormat = "#"
oRange.Value = data
Set odRange = book1.ActiveSheet.Cells(34, 26 + (iLoop * 21)).Resize(iRows - 1, iCols - 1)
odRange.NumberFormat = "#"
odRange.Value = vata
Set oRange = Nothing
Set odRange = Nothing
End Function
This is not particularly robust but does grab the values from the table. iLoop is not used.
Option Explicit
Public Sub test()
extractTable "http://allscores.ru/soccer/fstats.php?champ=2604&team=439&team2=420&tour=110", ThisWorkbook, 1
End Sub
Public Sub extractTable(Ssilka As String, book1 As Workbook)
Dim oDom As Object, oTable As Object
Dim oHttp As Object
Dim oRegEx As Object
Dim sResponse As String
Set oHttp = CreateObject("MSXML2.XMLHTTP")
oHttp.Open "GET", Ssilka, False
oHttp.send
sResponse = StrConv(oHttp.responseBody, vbUnicode)
Set oHttp = Nothing
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Set oRegEx = CreateObject("vbscript.regexp")
With oRegEx
.MultiLine = True
.Global = True
.IgnoreCase = False
.Pattern = "<(script|SCRIPT)[\w\W]+?</\1>"
sResponse = .Replace(sResponse, "")
End With
Set oRegEx = Nothing
Set oDom = CreateObject("htmlFile")
oDom.Write sResponse
Set oTable = oDom.getElementsByTagName("table")(3)
Dim b As Object, a As Object
Set b = oTable.getElementsByTagName("TR") 'DispHTMLElementCollection
Dim i As Long, y As Long
With ActiveSheet
For i = 3 To 17 '17-3 gives the 15 rows of interest. Start at 3 to avoid header and empty row.
Set a = b(i).ChildNodes
For y = 1 To a.Length - 1
.Cells(i - 2, y) = a(y).innerText
Next y
Next i
End With
End Sub

Why isn't my VBA code pulling information from a website's HTMLDoc?

I'm not sure why my code isn't working (returning business names, phone numbers, and contact numbers from a website's HTMLDoc I'm trying to pull information from. Can you help identify what I'm doing incorrectly (most likely with the IHTMLElement and IHTMLElementCollection data types, and/or accessing the HTML through getElementsByTagName, getElementsByClassName, etc). Thank you!!
Option Explicit
Sub FinalMantaSub()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
IE.Visible = False
IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"
Do While IE.READYSTATE <> READYSTATE_COMPLETE
DoEvents
Loop
Set HTMLDoc = IE.document
Range("A3").Value = "Name"
Range("B3").Value = "Address"
Range("C3").Value = "Phone"
'variables to output on excel sheet
Dim BusinessNameFinal As String
Dim BusinessAddressFinal As String
Dim BusinessPhoneFinal As String
'variables used to create final BusinessAddress variable
Dim streetAddress As IHTMLElement
Dim addressLocality As IHTMLElement
Dim addressRegion As IHTMLElement
Dim postalCode As IHTMLElement
Dim itemprop As String
Dim itemprop2 As String
Dim BusinessNameCollection As IHTMLElementCollection
Dim BusinessName As IHTMLElement
Dim BusinessAddressCollection As IHTMLElementCollection
Dim BusinessAddress As IHTMLElement
Dim BusinessPhoneCollection As IHTMLElementCollection
Dim BusinessPhone As IHTMLElement
Dim RowNumber As Long
'get ready for business name looping
RowNumber = 4
Set BusinessName = HTMLDoc.getElementsByClassName("media-heading text-primary h4")(0).getElementsByTagName("strong").innerText
Set BusinessNameCollection = BusinessName.all
'loop for business names
For Each BusinessName In BusinessNameCollection
Cells(RowNumber, 1).Value = BusinessName
RowNumber = RowNumber + 1
Next BusinessName
'get ready for business address looping
RowNumber = 4
itemprop = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").getAttribute("itemprop")
If itemprop = "streetAddress" Then
Set streetAddress = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").innerText
ElseIf itemprop = "addressLocality" Then
Set addressLocality = HTMLDoc.getElementsByTagName("span").innerText
ElseIf itemprop = "addressRegion" Then
Set addressRegion = HTMLDoc.getElementsByTagName("span").innerText
ElseIf itemprop = "postalCode" Then
Set postalCode = HTMLDoc.getElementsByTagName("span").innerText
End If
Set BusinessAddress = streetAddress & addressLocality & addressRegion & postalCode
Set BusinessAddressCollection = BusinessAddress.all
'loop for business addresses
For Each BusinessAddress In BusinessAddressCollection
BusinessAddress = streetAddress & vbNewLine & addressLocality & ", " & addressRegion & " " & postalCode
Cells(RowNumber, 2).Value = BusinessAddress
RowNumber = RowNumber + 1
Next BusinessAddress
'get ready for business phone looping
RowNumber = 4
itemprop2 = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getAttribute("itemprop")
If itemprop2 = "telephone" Then
BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
End If
Set BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
Set BusinessPhoneCollection = BusinessPhone.all
'loop for business phones
For Each BusinessPhone In BusinessPhoneCollection
Cells(RowNumber, 3).Value = BusinessPhone
RowNumber = RowNumber + 1
Next BusinessPhone
Range("A1").Activate
Set HTMLDoc = Nothing
'do some final formatting
Range("A3").CurrentRegion.WrapText = False
Range("A3").CurrentRegion.EntireColumn.AutoFit
Range("A1:C1").EntireColumn.HorizontalAlignment = xlCenter
Range("A1:D1").Merge
Range("A1").Value = "Manta.com Business Contacts"
Range("A1").Font.Bold = True
Application.StatusBar = ""
MsgBox "Done!"
End Sub
This extracts the info. You haven't looped all the results pages in your code or mentioned it so I have set this up to show you how to do the first page of results. Let me know how this goes.
Code:
Option Explicit
Public Sub FinalMantaSub() '<== Can't have ad blocker enabled for this site
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
IE.Visible = True
IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"
Do While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
Set HTMLDoc = IE.document
Dim c As Object, i As Long
Set c = HTMLDoc.querySelectorAll("div.media-body")
Do While Not c(i) Is Nothing
Debug.Print "Result #" & i + 1
Debug.Print vbNewLine
Debug.Print "Name: " & c(i).querySelector("[itemprop=""name""]").innerText
Debug.Print "Address: " & c(i).querySelector("[itemprop=""address""]").innerText
Debug.Print "Phone: " & c(i).querySelector("[itemprop=""telephone""]").innerText
Debug.Print String$(20, Chr$(61))
i = i + 1
Loop
IE.Quit
End Sub
Snapshot of output:
Update:
There are a vast number of results but you can have an outer loop as follows. You could then turn the above in to a sub that is called.
Dim arr() As String, pageNo As Long
arr = Split(HTMLDoc.querySelector(".pagination.pagination-md.mll a").href, "&pt")
pageNo = 1
Do While Err.Number = 0
On Error GoTo Errhand:
Dim url As String
url = Split(arr(0), "&")(0) & "&pg=" & pageNo & "&pt" & arr(1)
Debug.Print url
IE.navigate url
Do While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
pageNo = pageNo + 1
Loop
Errhand:
Debug.Print "Stopped after " & pageNo & " pages."

how to select 6th tr's td in a html table using vba code

tr---- 0495024988
14.08.1996
04/04/130/02514/AM96/
23.01.1996
0495024988
6. tr----(here the text is there which i have copy to my excel sheet)
here i have read many post where the javascript code is given but the vba code is not there. please help me out of this.
Sub GoToWebSiteAndPlayAroundNew()
Dim appIE As Object ' InternetExplorer.Application
Dim URL As String
Dim i As Long, strText As String
Dim doc As Object, hTable As Object, hBody As Object, hTR As Object, hTD As Object
Dim tb As Object, bb As Object, tr As Object, td As Object
Dim y As Long, z As Long, wb As Excel.Workbook, ws As Excel.Worksheet
Dim sws As SHDocVw.ShellWindows
Dim IE As Object
Dim vIE As SHDocVw.InternetExplorer
Set wb = Excel.ActiveWorkbook
Set ws = wb.ActiveSheet
Set appIE = CreateObject("InternetExplorer.Application")
URL = "http://dgft.delhi.nic.in:8100/dgft/IecPrint"
y = 1 'Column A in Excel
z = 1 'Row 1 in Excel
With appIE
.navigate URL
.Visible = True
Do While .busy: DoEvents: Loop
Do While .ReadyState <> 4: DoEvents: Loop
.document.getElementById("iec").Value = "0495024988"
.document.getElementById("name").Value = "AMB"
End With
On Error Resume Next
With appIE.document
Set elems = .getElementsByTagName("input")
For Each e In elems
If (e.getAttribute("value") = "Submit Query") Then
e.Click
Exit For
End If
Next e
End With
Set sws = New SHDocVw.ShellWindows
For Each vIE In sws
'If Left(vIE.LocationURL, 4) = "http" Then 'avoid explorer windows/etc this way
'If MsgBox("IE Window found. The URL is:" & vbCrLf & vIE.LocationURL & vbCrLf & _
'vbCrLf & "Do you want to see the html?", vbYesNo) = vbYes Then
'Show html in a msgbox
' MsgBox vIE.document.body.innerHTML
'Or put it to a file
'dim vFF as long
'vff=freefile
'open "C:\thehtml.txt" for output as #vff
'print #vff,vie.document.body.innerhtml
'close #vff
' End If
'End If
Set doc = vIE.document
Set hTable = doc.getElementsByTagName("table")
For Each tb In hTable
Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
MsgBox hTR.Length
For Each tr In hTR
Set hTD = tr.getElementsByTagName("td")
MsgBox hTD.Length
y = 1 ' Resets back to column A
For Each td In hTD
ws.Cells(z, y).Value = td.innerText
y = y + 1
Next td
DoEvents
z = z + 1
Next tr
Exit For
Next bb
Exit For
Next tb
Next
End Sub

Parse/Scrape column from table using MSHTML.HTMLDocument

I wrote this piece of code that scrapes the whole table from the webpage in the URL variable. I would like to only scrape/parse the column by the name
"Extrapolated Vol".
My html/xml is not strong, so a solution along with an explanation would be appreciated!
https://services.tcpl.ca/cor/public/gdsr/GdsrNGTLImperial20151122.htm
Thanks
Sub ExtractAlbertaAIL()
Application.ScreenUpdating = False
Application.EnableEvents = False
Dim URL As String
Dim Request As MSXML2.XMLHTTP
Dim doc As MSHTML.HTMLDocument
Dim tr As MSHTML.HTMLGenericElement
Dim td As MSHTML.HTMLGenericElement
Dim RowNumber As Integer
Dim ColNumber As Integer
ActiveWorkbook.Worksheets("Gas Day Summary").Range("A5:H10000") = ""
Set Request = CreateObject("msxml2.xmlhttp")
If Request Is Nothing Then
MsgBox "For some reason I wasn't able to make a MSXML2.XMLHTTP object"
Exit Sub
End If
URL = "https://services.tcpl.ca/cor/public/gdsr/GdsrNGTLImperial20151122.htm"
With Request
.Open "GET", URL, False
.send
Set doc = New MSHTML.HTMLDocument
doc.body.innerHTML = .responseText
End With
RowNumber = 1
For Each tr In doc.getElementsByTagName("table").Item(2).getElementsByTagName("tr")
ColNumber = 1
For Each td In tr.getElementsByTagName("td")
Worksheets("Gas Day Summary").Cells(RowNumber, ColNumber) = td.innerText
ColNumber = ColNumber + 1
Next td
RowNumber = RowNumber + 1
Next tr
Application.ScreenUpdating = True
Application.EnableEvents = True
End Sub
Try this
Sub ExtractAlbertaAIL()
Application.ScreenUpdating = False
Application.EnableEvents = False
Dim URL As String
Dim Request As MSXML2.XMLHTTP60
Dim doc As MSHTML.HTMLDocument
Dim tr As MSHTML.HTMLGenericElement
Dim td As MSHTML.HTMLGenericElement
Dim VOLUME_SUMMARY_FOUND As Boolean
VOLUME_SUMMARY_FOUND = False
Dim RowNumber As Integer
Dim ColNumber As Integer
ActiveWorkbook.Worksheets("Gas Day Summary").Range("A5:H10000") = ""
Set Request = CreateObject("msxml2.xmlhttp")
If Request Is Nothing Then
MsgBox "For some reason I wasn't able to make a MSXML2.XMLHTTP object"
Exit Sub
End If
URL = "https://services.tcpl.ca/cor/public/gdsr/GdsrNGTLImperial20151122.htm"
With Request
.Open "GET", URL, False
.send
Set doc = New MSHTML.HTMLDocument
doc.body.innerHTML = .responseText
End With
RowNumber = 1
For Each tr In doc.getElementsByTagName("table").Item(2).getElementsByTagName("tr")
If tr.Cells(0).innerText = "VOLUME SUMMARY" Then
VOLUME_SUMMARY_FOUND = True
End If
If VOLUME_SUMMARY_FOUND = True Then
Worksheets("Gas Day Summary").Cells(RowNumber, 1) = tr.Cells(0).innerText
Worksheets("Gas Day Summary").Cells(RowNumber, 2) = tr.Cells(2).innerText
RowNumber = RowNumber + 1
End If
Next tr
Application.ScreenUpdating = True
Application.EnableEvents = True
End Sub
​