Basics of webscraping - html

I want to get the prices of all the models of Maruti Alto using Web scraping. I am trying the code to get the data but i am not able to get it.
Sub Basics_Of_Web_Macro()
Dim myIE As Object
Dim myIEDoc As Object
'Start Internet Explorer
Set myIE = CreateObject("InternetExplorer.Application")
'if you want to see the window set this to True
myIE.Visible = False
'Now we open the page we'd like to use as a source for information
myIE.navigate "https://www.marutisuzuki.com/channels/arena/price-list/alto-price-in-mumbai-in-maharashtra"
'We wait for the Explorer to actually open the page and finish loading
While myIE.Busy
DoEvents
Wend
'Now lets read the HTML content of the page
Set myIEDoc = myIE.document
'Time to grab the information we want
Range("A1") = myIEDoc.Title
'Then we'll get something from teh inner page content by using the ID
Range("B1") = myIEDoc.Class("priceInfo clearfix")
End Sub

XHR:
You could use xmlhttp request and avoid browser. Loop the nodeList returned by collecting the classname cols. Start new row every 5 element and reset column to 1 for output. Thus creating tabular format for output from list format of nodeList/
VBE> Tools > References > Microsoft HTML Object Library
Option Explicit
Public Sub GetPrices()
Dim html As HTMLDocument
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.marutisuzuki.com/channels/arena/price-list/alto-price-in-mumbai-in-maharashtra", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
End With
Dim listings As Object, i As Long, r As Long, c As Long, results()
Set listings = html.querySelectorAll(".cols")
ReDim results(1 To (listings.Length - 2) / 4, 1 To 4)
r = 1: c = 1
For i = 0 To listings.Length - 2
If i Mod 4 = 0 And i > 0 Then r = r + 1: c = 1
results(r, c) = listings.item(i).innerText
c = c + 1
Next
ActiveSheet.Cells(1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Internet Explorer:
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetPrices()
Dim html As HTMLDocument
Set html = New HTMLDocument
Dim ie As New InternetExplorer
With ie
.Visible = True
.Navigate2 "https://www.marutisuzuki.com/channels/arena/price-list/alto-price-in-mumbai-in-maharashtra"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, i As Long, r As Long, c As Long, results()
Set listings = .document.querySelectorAll(".cols")
ReDim results(1 To (listings.Length - 2) / 4, 1 To 4)
r = 1: c = 1
For i = 0 To listings.Length - 2
If i Mod 4 = 0 And i > 0 Then r = r + 1: c = 1
results(r, c) = listings.item(i).innerText
c = c + 1
Next
.Quit
End With
ActiveSheet.Cells(1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

Related

Unable to get the exact element class table when scraping data from web using VBA

I would like to scrape below table from the website.
enter image description here
Based on the web code I found that the table seemed belongs to element class etxtmed so I wrote below VBA. After running this code I found that it only scrape below data
enter image description here
I thought this was because ("etxtmed")(0) refers to the 1st ("etxtmed") table then I tried several numbers after (0) and VBA first reports "Element not exist" then reports error Run-time error '91':Object variable or With block variable not set at this line of code r = tbl.Rows.Length - 1. Is it because I scraped the wrong class of table?
Sub CopyRateFromHKAB()
Dim ie As Object, btnmore As Object, tbl As Object
Dim rr As Object, cc As Object, r As Integer, c As Integer, i As Integer, j As Integer
ThisWorkbook.Sheets("Sheet2").UsedRange.Clear
Set ie = CreateObject("internetexplorer.application")
With ie
'.Visible = True
.navigate "https://www.hkab.org.hk/DisplayInterestSettlementRatesAction.do?lang=en"
Do
DoEvents
Loop While .readyState <> 4 Or .Busy
Set tbl = .document.getElementsByClassName("etxtmed")(0)
If tbl Is Nothing Then
MsgBox "Element not exist"
End If
End With
'get data from table
r = tbl.Rows.Length - 1
c = tbl.Rows(0).Cells.Length - 1
ReDim arr(0 To r, 0 To c)
Set rr = tbl.Rows
For i = 0 To r
Set cc = rr(i).Cells
For j = 0 To c
arr(i, j) = cc(j).innertext
Next
Next
ie.Quit
Application.ScreenUpdating = False
ThisWorkbook.Sheets("Sheet2").Cells(1, 1).Resize(r + 1, c + 1) = arr
With ThisWorkbook.Sheets("Sheet2")
.UsedRange.WrapText = False
.Columns.AutoFit
End With
End Sub
The table you want is inside an IFRAME so you need to access that page directly <iframe src="/hibor/listRates.do?lang=en&Submit=Detail"
Option Explicit
Sub CopyRateFromHKAB()
Const URL = "https://www.hkab.org.hk/hibor/listRates.do?lang=en&Submit=Detail"
Dim HTMLDoc As Object, request As Object
' get web page
Set HTMLDoc = CreateObject("HTMLfile")
Set request = CreateObject("MSXML2.XMLHTTP")
With request
.Open "GET", URL, False
.send
HTMLDoc.body.innerHTML = .responseText
End With
' parse html table
Dim wb As Workbook, r As Long, c As Long, arr
Dim tbl As Object, t As Object, tr As Object, td As Object
Set wb = ThisWorkbook
Set tbl = HTMLDoc.getElementsByClassName("etxtmed")
If tbl Is Nothing Then
MsgBox "No tables found", vbExclamation
Exit Sub
Else
If tbl(2) Is Nothing Then
MsgBox "Table not found", vbExclamation
Exit Sub
Else
r = tbl(2).Rows.Length
ReDim arr(1 To r, 1 To 3)
r = 1
For Each tr In tbl(2).Rows
c = 1
For Each td In tr.Cells
arr(r, c) = td.innerText
c = c + 1
Next
r = r + 1
Next
End If
'copy to sheet
With wb.Sheets("Sheet2")
.Cells(1, 1).Resize(UBound(arr), UBound(arr, 2)) = arr
.UsedRange.WrapText = False
.Columns.AutoFit
End With
End If
MsgBox "Done", vbInformation
End Sub

Scraping data from website with dynamic array function in vba

I'd like to know more about how array function is applied when scrape data from website. I'm currently using this vba to copy data from website. The code can scrape the data I want, however when it comes to copy data to the destination worksheet it copies all data to A1 cell. Since this vba was developed for my previous project and works fine I'm not sure which part went wrong.
Sub CopyFromHKAB()
Dim ie As Object, btnmore As Object, tbl As Object
Dim rr As Object, cc As Object, r As Integer, c As Integer, i As Integer, j As Integer
ThisWorkbook.Sheets("data").UsedRange.Clear
Set ie = CreateObject("internetexplorer.application")
With ie
.Visible = True
.navigate "https://www.hkab.org.hk/DisplayMemberAction.do?sectionid=4&subsectionid=0"
Do
DoEvents
Loop While .readyState <> 4 Or .Busy
Set tbl = .document.getElementsByClassName("etxtmed")(2)
End With
'get data from table
r = tbl.Rows.Length - 1
c = tbl.Rows(0).Cells.Length - 1
ReDim arr(0 To r, 0 To c)
Set rr = tbl.Rows
For i = 0 To r
Set cc = rr(i).Cells
For j = 0 To c
arr(i, j) = cc(j).innertext
Next
Next
ie.Quit
Application.ScreenUpdating = False
ThisWorkbook.Sheets("Sheet1").Cells(1, 1).Resize(r + 1, c + 1) = arr
With ThisWorkbook.Sheets("data")
.UsedRange.WrapText = False
.Columns.AutoFit
End With
End Sub
You need to pick up the right table given they are nested so change the index to 3. Otherwise, you are picking up the shared parent and thus all the listings are in fact within the one child element hence your current output.
Then you need to adjust your code to skip the first row.
N.B. You don't actually need IE for this as the content you want is static. You can use XMLHTTP. And you are writing out data to a different sheet than the one you end format.
Sub CopyFromHKAB()
Dim ie As Object, btnmore As Object, tbl As Object
Dim rr As Object, cc As Object, r As Integer, c As Integer, i As Integer, j As Integer
ThisWorkbook.Sheets("data").UsedRange.Clear
Set ie = CreateObject("internetexplorer.application")
With ie
.Visible = True
.navigate "https://www.hkab.org.hk/DisplayMemberAction.do?sectionid=4&subsectionid=0"
Do
DoEvents
Loop While .readyState <> 4 Or .Busy
Set tbl = .document.getElementsByClassName("etxtmed")(3)
End With
'get data from table
r = tbl.Rows.Length - 1
c = tbl.Rows(1).Cells.Length - 1
ReDim arr(0 To r, 0 To c)
Set rr = tbl.Rows
For i = 1 To r
Set cc = rr(i).Cells
For j = 0 To c
arr(i - 1, j) = cc(j).innertext
Next
Next
ie.Quit
'Application.ScreenUpdating = False
ThisWorkbook.Sheets("Sheet1").Cells(1, 1).Resize(r + 1, c + 1) = arr
With ThisWorkbook.Worksheets("data")
.UsedRange.WrapText = False
.Columns.AutoFit
End With
End Sub
I would consider switching to XHR to avoid overhead of browser, and using querySelectorAll to allow for using a css selector list to target only the nodes of interest
Option Explicit
Public Sub GetHKABInfo()
'tools > references > Microsoft HTML Object Library
Dim html As MSHTML.HTMLDocument, xhr As Object
Set xhr = CreateObject("MSXML2.XMLHTTP")
Set html = New MSHTML.HTMLDocument
With xhr
.Open "GET", "https://www.hkab.org.hk/DisplayMemberAction.do?sectionid=4&subsectionid=0", False
.setRequestHeader "User-Agent", "Safari/537.36"
.send
html.body.innerHTML = .responseText
End With
Dim arr() As Variant, nodes As MSHTML.IHTMLDOMChildrenCollection, i As Long
Set nodes = html.querySelectorAll(".etxtmed .etxtmed td")
ReDim arr(1 To nodes.Length - 1)
For i = LBound(arr) To UBound(arr)
arr(i) = nodes.Item(i).innertext
Next
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).Resize(UBound(arr), 1) = Application.Transpose(arr)
End Sub

How to extract values from nested divs using VBA

I have looked at the solution provided in this link Extract Table from Webpage in Excel using VBA and it was very helpful. But I need to extract the values in the div classes (cscore_score) and not a table Please refer to image below
The URL is: https://www.espncricinfo.com/scores
The div class is: cscore_score
The scores to extract is in nested divs. The sample data for each nested div I want to extract is like Country and Score i.e INDIA and in the next column "416..." into the Excel sheet.
Here's a screenshot of the table structure:
Public Sub GetInfo()
Const URL As String = "https://www.espncricinfo.com/scores"
Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, headers()
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
html.body.innerHTML = .responseText
End With
Set hDiv = html.querySelector("div.cscore")
Dim ul As Object, div As Object, r As Long, c As Long
r = 1
With ws
For Each div In hDiv.getElementsByClassName("cscore_link")
r = r + 1: c = 1
If r > 3 Then
For Each ul In div.getElementsByClassName("cscore_score")
.Cells(r - 2, c) = IIf(c = 2, "'" & div.innerText, div.innerText)
c = c + 1
Next
End If
Next
End With
End Sub
I would be grateful to receive any help to extract those scores from each div into the sheet.
You could use faster css selectors (using only class is faster than tag/type) which if used as shown below will allow you to also reduce your code complexity and improve performance by having only a single loop. Results can then be stored in an array and written out in one go - again another efficiency gain.
Note I am ensuring scores remain correctly formatted on output by concatenating "'" in front.
If you want scores for same match on same row:
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
ReDim results(1 To countries.Length / 2, 1 To 4)
For i = 0 To countries.Length - 1 Step 2
results(r, 1) = countries.item(i).innerText: results(r, 2) = "'" & scores.item(i).innerText
results(r, 3) = countries.item(i + 1).innerText: results(r, 4) = "'" & scores.item(i + 1).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 4) = Array("Home", "Score", "Away", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Scores on different rows for every team:
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
ReDim results(1 To countries.Length, 1 To 2)
For i = 0 To countries.Length - 1
results(i + 1, 1) = countries.item(i).innerText: results(i + 1, 2) = "'" & scores.item(i).innerText
Next
ws.Cells(1, 1) = "Country": ws.Cells(1, 2) = "Score"
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Additional column:
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object
Dim descs As Object, results(), i As Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
Set descs = html.querySelectorAll(".cscore--watchNotes .cscore_info-overview")
ReDim results(1 To countries.Length / 2, 1 To 5)
For i = 0 To countries.Length - 1 Step 2
results(r, 1) = descs.Item(i / 2).innerText
results(r, 2) = countries.Item(i).innerText: results(r, 3) = "'" & scores.Item(i).innerText
results(r, 4) = countries.Item(i + 1).innerText: results(r, 5) = "'" & scores.Item(i + 1).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 5) = Array("Desc", "Home", "Score", "Away", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Your request seems to be just fine. Parsing the HTML is where your problem is.
You could do something like the following (you can ignore the request part):
Option Explicit
Sub espn()
Dim req As New WinHttpRequest
Dim HTMLDocument As New HTMLDocument
Dim listElement As HTMLUListElement
Dim listItem As HTMLLIElement
Dim sht As Worksheet
Dim i As Long
Dim j As Long
Dim url As String
url = "https://www.espncricinfo.com/scores"
With req
.Open "GET", url, False
.send
HTMLDocument.body.innerHTML = .responseText
End With
Set sht = ThisWorkbook.Worksheets("Name of your worksheet")
i = 2
For Each listElement In HTMLDocument.getElementsByClassName("cscore_competitors")
j = 1
For Each listItem In listElement.getElementsByTagName("li")
sht.Cells(i, j) = listItem.getElementsByClassName("cscore_name cscore_name--long")(0).innerText
sht.Cells(i, j + 1) = listItem.getElementsByClassName("cscore_score")(0).innerText
j = j + 2
Next listItem
i = i + 1
Next listElement
End Sub
The results would look like so:
Basically each game is represented by a ul (unnumbered list) element which consists of two li elements which contain the info about the names and the score.

Extract Table from Webpage in Excel using VBA

How to Extract Below table in Excel from Webpage?
Table
Company | Bonus Ratio |Announcement|Record|Ex-Bonus
Codes
Dim ie As SHDocVw.InternetExplorer
Set ie = New InternetExplorerMedium
Set ie = CreateObject("InternetExplorer.Application")
While ie.busy
DoEvents
Wend
ie.Visible = True
While ie.busy
DoEvents
Wend
Dim NavURL As String
NavURL = "https://www.moneycontrol.com/stocks/marketinfo/bonus/homebody.php?sel_year=2015"
ie.Navigate NavURL
While ie.busy
DoEvents
Wend
Set doc = ie.document
Set hTable = doc.GetElementsByTagName("table")
y = 2 'Column B in Excel
z = 7 'Row 7 in Excel
For Each td In hTable
Set hHead = tb.GetElementsByTagName("td")
For Each hh In hHead
Set hTR = hh.GetElementsByTagName("tr")
For Each tr In hTR
Webpage: https://www.moneycontrol.com/stocks/marketinfo/bonus/homebody.php?sel_year=2015
by Keeping Bonus Ratio as Same as on Webpage or Text Format
While copy it in Excel, Bonus Ratio Converts to Decimal
Your hTable is a collection as opposed to a single element. Your code should be throwing an error.
You want to target the specific table and then loop the table rows and cells within rows. You want to check if the second column is being processed so you can protect the formatting of the ratios. You also want to monitor the row number to handle the merged cells at the top.
Option Explicit
Public Sub GetInfo()
Const URL As String = "https://www.moneycontrol.com/stocks/marketinfo/bonus/homebody.php?sel_year=2015"
Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, headers()
headers = Array("Company", "Bonus Ratio", "Announcement", "Record", "Ex-bonus")
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.Send
html.body.innerHTML = .responseText
End With
Set hTable = html.querySelector("table.dvdtbl")
Dim td As Object, tr As Object, r As Long, c As Long
r = 1
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For Each tr In hTable.getElementsByTagName("tr")
r = r + 1: c = 1
If r > 3 Then
For Each td In tr.getElementsByTagName("td")
.Cells(r - 2, c) = IIf(c = 2, "'" & td.innerText, td.innerText)
c = c + 1
Next
End If
Next
End With
End Sub

Scraping the data from list of href link?

I am trying to scrap a list of href link from a webpage, and then trying to scrap the value out of it. I am now facing the problem which the code only can handle up to 5 links. If the links more than 5, it will show runtime error on random line.
I am extracting the href link from these webpage:http://www.bursamalaysia.com/market/listed-companies/company-announcements/#/?category=SH&sub_category=all&alphabetical=All&date_from=28/09/2018
Option Explicit
Sub ScrapLink()
Dim IE As New InternetExplorer, html As HTMLDocument
Application.ScreenUpdating = False
With IE
IE.Visible = False
IE.navigate Cells(1, 1).Value
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 3)
Application.StatusBar = "Trying to go to website?"
DoEvents
Dim links As Object, i As Long
Set links = .document.querySelectorAll("#bm_ajax_container [href^='/market/listed-companies/company-announcements/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
.Cells(i + 1, 1) = links.item(i - 1)
End With
Next i
.Quit
End With
End Sub
Public Sub GetInfo()
Dim IE As New InternetExplorer, headers(), u As Long, resultCollection As Collection
headers = Array("URL", "Name", "No", "Date of change", "# Securities", "Type of Transaction", "Nature of Interest")
Set resultCollection = New Collection
Dim links()
links = Application.Transpose(ThisWorkbook.Worksheets("Sheet1").Range("A2:A100"))
With IE
.Visible = True
For u = LBound(links) To UBound(links)
If InStr(links(u), "http") > 0 Then
.navigate links(u)
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 2)
Dim data As Object, title As Object
With .document.getElementById("bm_ann_detail_iframe").contentDocument
Set title = .querySelector(".formContentData")
Set data = .querySelectorAll(".ven_table tr")
End With
Dim results(), numberOfRows As Long, i As Long, currentRow As Object, td As Object, c As Long, r As Long
numberOfRows = Round(data.Length / 4, 0)
ReDim results(1 To numberOfRows, 1 To 7)
For i = 0 To numberOfRows - 1
r = i + 1
results(r, 1) = links(u): results(r, 2) = title.innerText
Set currentRow = data.item(i * 4 + 1)
c = 3
For Each td In currentRow.getElementsByTagName("td")
results(r, c) = Replace$(td.innerText, "document.write(rownum++);", vbNullString)
c = c + 1
Next td
Next i
resultCollection.Add results
Set data = Nothing: Set title = Nothing
End If
Next u
.Quit
End With
Dim ws As Worksheet, item As Long
If Not resultCollection.Count > 0 Then Exit Sub
If Not Evaluate("ISREF('Results'!A1)") Then '<==Credit to #Rory for this test
Set ws = Worksheets.Add
ws.NAME = "Results"
Else
Set ws = ThisWorkbook.Worksheets("Results")
ws.Cells.Clear
End If
Dim outputRow As Long: outputRow = 2
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For item = 1 To resultCollection.Count
Dim arr()
arr = resultCollection(item)
For i = LBound(arr, 1) To UBound(arr, 1)
.Cells(outputRow, 1).Resize(1, 7) = Application.WorksheetFunction.Index(arr, i, 0)
outputRow = outputRow + 1
Next
Next
End With
End Sub
Discussion:
The problem is likely, at least from my testing, due to one of the links not having the table Details of changes, so the numberOfRows variable is set to 0, and this line:
ReDim results(1 To numberOfRows, 1 To 7)
fails with an index error as you have (1 To 0, 1 To 7).
Using this link in A1 there are 30 URLs retrieved. This retrieved link does not have that table whereas the others do.
You have a choice of how to handle this scenario. Here are some example options:
Option 1: Only process the page if the numberOfRows > 0. This is the example I give.
Option 2: Have a Select Case with numberOfRows and if Case 0 then handle page in one way, Case Else handle as normal.
Note:
1) You also want to reset the status bar with:
Application.StatusBar = False
2) I temporarily fixed the links range for testing with:
ThisWorkbook.Worksheets("Sheet1").Range("A2:A31")
TODO:
Refactor to be more modular and run the whole process with the same IE instance. Creating a class to hold the IE object would be a good idea. Provide it with methods for extracting your data, testing number of result rows etc.
Add some basic error handling, for example, to handle failed website connection.
Example handling using test of numberOfRows > 0:
Option Explicit
Sub ScrapeLink()
Dim IE As New InternetExplorer
Application.ScreenUpdating = False
With IE
IE.Visible = True
IE.navigate Cells(1, 1).Value
While .Busy Or .readyState < 4: DoEvents: Wend
' Application.Wait Now + TimeSerial(0, 0, 3)
Application.StatusBar = "Trying to go to website?"
DoEvents
Dim links As Object, i As Long
Set links = .document.querySelectorAll("#bm_ajax_container [href^='/market/listed-companies/company-announcements/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
.Cells(i + 1, 1) = links.item(i - 1)
End With
Next i
.Quit
End With
Application.StatusBar = false
End Sub
Public Sub GetInfo()
Dim IE As New InternetExplorer, headers(), u As Long, resultCollection As Collection
headers = Array("URL", "Name", "No", "Date of change", "# Securities", "Type of Transaction", "Nature of Interest")
Set resultCollection = New Collection
Dim links()
links = Application.Transpose(ThisWorkbook.Worksheets("Sheet1").Range("A2:A31")) '<== I have fixed the range here for testing
With IE
.Visible = True
For u = LBound(links) To UBound(links)
If InStr(links(u), "http") > 0 Then
.navigate links(u)
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 2)
Dim data As Object, title As Object
With .document.getElementById("bm_ann_detail_iframe").contentDocument
Set title = .querySelector(".formContentData")
Set data = .querySelectorAll(".ven_table tr")
End With
Dim results(), numberOfRows As Long, i As Long, currentRow As Object, td As Object, c As Long, r As Long
numberOfRows = Round(data.Length / 4, 0)
If numberOfRows > 0 Then
ReDim results(1 To numberOfRows, 1 To 7)
For i = 0 To numberOfRows - 1
r = i + 1
results(r, 1) = links(u): results(r, 2) = title.innerText
Set currentRow = data.item(i * 4 + 1)
c = 3
For Each td In currentRow.getElementsByTagName("td")
results(r, c) = Replace$(td.innerText, "document.write(rownum++);", vbNullString)
c = c + 1
Next td
Next i
resultCollection.Add results
Set data = Nothing: Set title = Nothing
End If
End If
Next u
.Quit
End With
Dim ws As Worksheet, item As Long
If Not resultCollection.Count > 0 Then Exit Sub
If Not Evaluate("ISREF('Results'!A1)") Then '<==Credit to #Rory for this test
Set ws = Worksheets.Add
ws.NAME = "Results"
Else
Set ws = ThisWorkbook.Worksheets("Results")
ws.Cells.Clear
End If
Dim outputRow As Long: outputRow = 2
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For item = 1 To resultCollection.Count
Dim arr()
arr = resultCollection(item)
For i = LBound(arr, 1) To UBound(arr, 1)
.Cells(outputRow, 1).Resize(1, 7) = Application.WorksheetFunction.Index(arr, i, 0)
outputRow = outputRow + 1
Next
Next
End With
End Sub
Sample results: