Data Scraping by Tag and Class - html

I'm trying to copy data from web-site, I need the all range of sizes,Price,Amenities,Specials, Reserve. I frame below code but I'm NOT able to copy element the below is now working. getting to many errors. Can anybody please look into this?
Sub gostoreit()
Dim ie As New InternetExplorer, ws As Worksheet
Dim element As IHTMLElement
Set ws = ThisWorkbook.Worksheets("Unit Data")
With ie
.Visible = True
.Navigate2 "" &
"https://www.gostoreit.com/locations/georgia/cumming/go-store-cumming/"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, listing As Object, headers(), results(), r
As Long, c As Long, item As Object
headers = Array("Size", "promo", "Reguler Price", "Online Price", "Listing Active", "features")
Set listings = .document.getElementsByTagName("l-main-container")
ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
For Each listing In listings
r = r + 1
results(r, 1) = listing.getElementsByClassName("size_txt")(0).innerText 'Size
results(r, 2) = listing.getElementsByClassName("helpDiscounts ls_discountsTitleSmall")(0).innerText 'promo(example. First Month Free)
results(r, 3) = listing.getElementsByClassName("wasPrice")(0).innerText 'reguler price
results(r, 4) = listing.getElementsByClassName("ls_unit_price")(0).innerText 'online price
results(r, 5) = listing.getElementsByClassName("unitSelectButtonRES isRESBut")(0).innerText ' listing active
results(r, 6) = listing.getElementsByClassName("tableUnitType _uSpan")(0).innerText ' features
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit
End With
End Sub

Use the iframe src and then process way we have discussed before (as my preference) i.e. identify rows then dump row html into surrogate HTMLDocument variable to leverage querySelector at more granular level. I've ignored reserve, as this shows no variation and you can auto-populate these with default. If wanted they can easily be added.
Option Explicit
'VBE > Tools > References: Microsoft Internet Controls
Public Sub GetData()
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = True
.Navigate2 "https://www.clickandstor.com/CAS_2.5.16/sorter/controller.php?fid=1162&mode=unit-table-p&target=casDiv1&width=100%25&height=100px&js=1&displayId=lsFramer_0&u=https%3A%2F%2Fwww.gostoreit.com%2Flocations%2Fgeorgia%2Fcumming%2Fgo-store-cumming%2F&&v_in=2.5.16&dn=1559990768103&1559990768"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim html2 As HTMLDocument, rows As Object, i As Long, results(), headers()
headers = Array("Size", "Description", "On site price", "Web Price", "Offer")
Set html2 = New HTMLDocument
Do
Set rows = .document.querySelectorAll(".unitRow") '.size_txt")
Loop While rows.Length = 0
ReDim results(1 To rows.Length, 1 To UBound(headers) + 1)
On Error Resume Next
For i = 1 To rows.Length - 1
html2.body.innerHTML = rows.item(i).outerHTML
results(i, 1) = html2.querySelector(".size_txt").innerText
results(i, 2) = GetDescription(html2.querySelectorAll(".unitMoreHelpTitle, .pop_spacer_li"))
results(i, 3) = html2.querySelector(".wasPrice").innerText
results(i, 4) = html2.querySelector(".ls_unit_price").innerText
results(i, 5) = html2.querySelector(".helpDiscounts").innerText
Next
On Error GoTo 0
.Quit
End With
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetDescription(ByVal nodeList As Object)
Dim i As Long, arr()
ReDim arr(0 To nodeList.Length - 1)
For i = 0 To nodeList.Length - 1
arr(i) = nodeList.item(i).innerText
Next
GetDescription = Join$(arr, Chr$(32))
End Function
If you want more verbose method of going via iframe. I choose to navigate on to the src of the iframe but you can use .document.getElementById("lsFramer_0").contentDocument.querySelector syntax to access
Option Explicit
'VBE > Tools > References: Microsoft Internet Controls
Public Sub GetData()
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = True
.Navigate2 "https://www.gostoreit.com/locations/georgia/cumming/go-store-cumming/"
While .Busy Or .readyState < 4: DoEvents: Wend
.Navigate2 .document.querySelector("#lsFramer_0").src
While .Busy Or .readyState < 4: DoEvents: Wend
Dim html2 As HTMLDocument, rows As Object, i As Long, results(), headers()
headers = Array("Size", "Description", "On site price", "Web Price", "Offer")
Set html2 = New HTMLDocument
Do
Set rows = .document.querySelectorAll(".unitRow") '.size_txt")
Loop While rows.Length = 0
ReDim results(1 To rows.Length, 1 To UBound(headers) + 1)
On Error Resume Next
For i = 1 To rows.Length - 1
html2.body.innerHTML = rows.item(i).outerHTML
results(i, 1) = html2.querySelector(".size_txt").innerText
results(i, 2) = GetDescription(html2.querySelectorAll(".unitMoreHelpTitle, .pop_spacer_li"))
results(i, 3) = html2.querySelector(".wasPrice").innerText
results(i, 4) = html2.querySelector(".ls_unit_price").innerText
results(i, 5) = html2.querySelector(".helpDiscounts").innerText
Next
On Error GoTo 0
.Quit
End With
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetDescription(ByVal nodeList As Object)
Dim i As Long, arr()
ReDim arr(0 To nodeList.Length - 1)
For i = 0 To nodeList.Length - 1
arr(i) = nodeList.item(i).innerText
Next
GetDescription = Join$(arr, Chr$(32))
End Function

Hi , The code I formatted bellow is running fine for me until "ReDim results" line
The problem looks to be that there is not "l-main-container" element at the web page (see picture bellow)
Sub gostoreit()
Dim ie As New InternetExplorer, ws As Worksheet
Dim element As IHTMLElement
Set ws = ThisWorkbook.Worksheets("Unit Data")
With ie
.Visible = True
.Navigate2 "" & "https://www.gostoreit.com/locations/georgia/cumming/go-store-cumming/"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long, item As Object
headers = Array("Size", "promo", "Reguler Price", "Online Price", "Listing Active", "features")
Set listings = .document.getElementsByTagName("l-main-container")
ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
For Each listing In listings
r = r + 1
results(r, 1) = listing.getElementsByClassName("size_txt")(0).innerText 'Size
results(r, 2) = listing.getElementsByClassName("helpDiscounts ls_discountsTitleSmall")
(0).innerText 'promo(example. First Month Free)
results(r, 3) = listing.getElementsByClassName("wasPrice")(0).innerText 'reguler price
results(r, 4) = listing.getElementsByClassName("ls_unit_price")(0).innerText 'online
price results
results(r, 4)(r, 5) = listing.getElementsByClassName("unitSelectButtonRES isRESBut")(0).innerText ' listing active
results(r, 6) = listing.getElementsByClassName("tableUnitType _uSpan")(0).innerText ' features
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit
End With
End Sub

Related

How to modify VBA code for Excel to use with an Access table?

I got this code from a competent user, not sure if he wants to be named. The code searches the HTML content for innerText of certain tags and transfers them to an Excel table, well sorted under the headers, structured as pivot.
Public Sub GetDataFromURL()
Const URL = "URL"
Dim html As MSHTML.HTMLDocument, xhr As Object
Set html = New MSHTML.HTMLDocument
Set xhr = CreateObject("MSXML2.ServerXMLHTTP.6.0")
With xhr
.Open "POST", URL, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send "...parameters..."
html.body.innerHTML = .responseText
End With
Dim table As MSHTML.HTMLTable, r As Long, c As Long, headers(), row As MSHTML.HTMLTableRow
Dim results() As Variant, html2 As MSHTML.HTMLDocument
headers = Array("HDR01", "HDR02", "HDR03", "HDR04")
ReDim results(1 To 100, 1 To UBound(headers) + 1)
Set table = html.querySelector("table")
Set html2 = New MSHTML.HTMLDocument
Dim lastRow As Boolean
For Each row In table.Rows
lastRow = False
Dim header As String
html2.body.innerHTML = row.innerHTML
header = Trim$(row.Children(0).innerText)
If header = "HDR01" Then
r = r + 1
Dim dict As Scripting.Dictionary: Set dict = GetBlankDictionary(headers)
On Error Resume Next
dict("HDR02") = Replace$(html2.querySelector("a").href, "about:", "https://URL")
On Error GoTo 0
End If
If dict.Exists(header) Then dict(header) = Trim$(row.Children(1).innerText)
If (header = vbNullString And html2.querySelectorAll("a").Length > 0) Then
dict("HDR03") = Replace$(html2.querySelector("a").href, "about:blank", "URL")
lastRow = True
ElseIf header = "HDR04" Then
If row.NextSibling.NodeType = 1 Then lastRow = True
End If
If lastRow Then
populateArrayFromDict dict, results, r
End If
Next
results = Application.Transpose(results)
ReDim Preserve results(1 To UBound(headers) + 1, 1 To r)
results = Application.Transpose(results)
Dim re As Object
Set re = CreateObject("VBScript.RegExp")
With re
.Global = False
.MultiLine = False
.IgnoreCase = True
.Pattern = "\s([0-9.]+)\smĀ²"
End With
Dim ie As SHDocVw.InternetExplorer
Set ie = New SHDocVw.InternetExplorer
With ie
.Visible = True
For r = LBound(results, 1) To UBound(results, 1)
If results(r, 7) <> vbNullString Then
.Navigate2 results(r, 7), headers:="Referer: " & URL
While .Busy Or .readyState <> READYSTATE_COMPLETE: DoEvents: Wend
'On Error Resume Next
results(r, 8) = re.Execute(.document.querySelector("#anz").innerHTML)(0).Submatches(0)
'On Error GoTo 0
End If
Next
.Quit
End With
With ActiveSheet
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
It works perfectly in Excel, but I need it for an Access-table. My Aceess-table named tblTab01 contains all the fields that are present in the code in the headers = array("..."), and I have disabled the following lines in the code:
results = Application.Transpose(results)
and
ActiveSheet.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
Instead, I added the following lines:
Dim db As DAO.Database
Dim strInsert
Set db = CurrentDb
strInsert = "INSERT INTO tblTab01 VALUES (results);"
db.Execute strInsert
But I only get all possible errors!
How would the code need to be modified for use with the Access table? THX
This produces same output as the Excel code. I attempted a solution that eliminated looping array but this version is actually faster.
Had to use Excel WorksheetFunction to make the Transpose method work. Make sure Excel library is selected in References.
results = Excel.WorksheetFunction.Transpose(results)
ReDim Preserve results(1 To UBound(headers) + 1, 1 To r)
results = Excel.WorksheetFunction.Transpose(results)
Uncomment the On Error lines:
On Error Resume Next
results(r, 8) = re.Execute(.document.querySelector("#anz").innerHTML)(0).Submatches(0)
On Error GoTo 0
Then instead of the With ActiveSheet block, loop through array.
Dim db As DAO.Database
Dim rs As DAO.Recordset
CurrentDb.Execute "DELETE * FROM tblNetzPortDwnLd"
Set db = CurrentDb
Set rs = db.OpenRecordset("tblNetzPortDwnLd", dbOpenDynaset)
For r = LBound(results, 1) To UBound(results, 1)
With rs
.AddNew
.Fields("zpID") = r
.Fields("zpAktenzeichen") = results(r, 1)
.Fields("zpAmtsgericht") = results(r, 2)
.Fields("zpObjekt") = results(r, 3)
.Fields("zpVerkehrswert") = results(r, 4)
.Fields("zpTermin") = results(r, 5)
.Fields("zpPdfLink") = results(r, 6)
.Fields("zpAdditLink") = results(r, 7)
.Fields("zpm2") = results(r, 8)
.Update
End With
Next
All fields in table are text type, per our chat discussion.

How to Extract the Element Name data scraping

I was trying scrape the "Element Name" not content for that element. Trying to pull it for 'data-promo-name' and the result should be '$1 first month rent' from below code.
class="ps-properties-property__units__prices col-4 col-md-3" data-promo-id="132" data-promo-name="$1 first month rent">
Website: https://www.publicstorage.com/self-storage-mi-ann-arbor/1760?sp=1760|1|Ann%20Arbor|42.28083|-83.74303|0|1|1
PFB the code:
Sub Element_Name()
Dim ie As New InternetExplorer, ws As Worksheet
Dim element As IHTMLElement
Set ws = ThisWorkbook.Worksheets("Unit Data")
With ie
.Visible = True
.Navigate2 "https://www.publicstorage.com/self-storage-mi-ann-arbor/1760?sp=1760|1|Ann%20Arbor|42.28083|-83.74303|0|1|1"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long, item As Object
headers = Array("Width", "Length", "Hight/Space Type", "promo", "Reguler Price", "Online Price", "Listing Active", "features", "features1", "features2", "features3", "features4", "features5", "features6")
Set listings = .document.getElementsByClassName("row ps-properties-property__units__row ps-properties-property__units__row__desktop")
ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
For Each listing In listings
r = r + 1
On Error Resume Next
results(r, 1) = listing.getElementsByClassName("ps-properties-property__units__header")(0).innerText 'Size
'results(r, 4) = listing.getElementsByClassName(Need a code here) 'Promo
results(r, 5) = listing.getElementsByClassName("ps-properties-property__units__prices__old-price")(0).innerText 'Sizet 'Reguler Price
results(r, 6) = listing.getElementsByClassName("ps-properties-property__units__prices__price")(0).innerText 'Online Price
results(r, 7) = listing.getElementsByTagName("ps-properties-property__units__prices col-1 col-md-3")(0).innerText 'Listing Active
results(r, 8) = listing.getElementsByClassName("ps-properties-property__units__feature")(0).innerText 'Features
On Error GoTo 0
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit
End With
You need to isolate the node then use the getAttribute function
node.getAttribute("data-promo-name")
The html appears different for me.
An example using your single line would be
ie.document.querySelector(".ps-properties-property__units__prices.col-4.col-md-3").getAttribute("data-promo-name")
Full code:
Option Explicit
Public Sub ElementName()
Dim ie As New InternetExplorer, ws As Worksheet
Dim element As IHTMLElement
Set ws = ThisWorkbook.Worksheets("Unit Data")
With ie
.Visible = True
.Navigate2 "https://www.publicstorage.com/self-storage-mi-ann-arbor/1760?sp=1760|1|Ann%20Arbor|42.28083|-83.74303|0|1|1"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long, item As Object
headers = Array("Width", "Length", "Hight/Space Type", "promo", "Reguler Price", "Online Price", "Listing Active", "features", "features1", "features2", "features3", "features4", "features5", "features6")
Set listings = .document.getElementsByClassName("row ps-properties-property__units__row ")
Dim html2 As HTMLDocument
Set html2 = New HTMLDocument
ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
For Each listing In listings
r = r + 1
On Error Resume Next
results(r, 1) = listing.getElementsByClassName("ps-properties-property__units__header")(0).innerText 'Size
html2.body.innerHTML = listing.outerHTML
results(r, 4) = html2.querySelector(".ps-properties-property__units__prices").getAttribute("data-promo-name")
'results(r, 4) = listing.getElementsByClassName(Need a code here) 'Promo
results(r, 5) = listing.getElementsByClassName("ps-properties-property__units__prices__old-price")(0).innerText 'Sizet 'Reguler Price
results(r, 6) = listing.getElementsByClassName("ps-properties-property__units__prices__price")(0).innerText 'Online Price
results(r, 7) = listing.getElementsByTagName("ps-properties-property__units__prices col-1 col-md-3")(0).innerText 'Listing Active
results(r, 8) = listing.getElementsByClassName("ps-properties-property__units__feature")(0).innerText 'Features
On Error GoTo 0
html2.body.innerHTML = vbNullString
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit
End With
End Sub
findElement(By.xpath("//div[#class='ps-properties-property__units__prices col-4 col-md-3']")).getAttribute("data-promo-name");

How to Extract the src Name data scraping

I am tring to copy the name of the image only. Trying to pull it for from below code src="images/capchs/6.png" I want to copy "images/capchs/6.png" same is for Image2 and image3 as well. I am not good in the coding, someone pls help.
Thanks in advance.
from below code:
PFB my code:
Sub SRC_Name()
Dim ie As New InternetExplorer, ws As Worksheet
Dim element As IHTMLElement
Set ws = ThisWorkbook.Worksheets("Unit Data")
With ie
.Visible = True
.Navigate2 "" & Sheets("Home").Range("C3").text
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long, item As Object
headers = Array("Img1", "Img2")
Set listings = .document.getElementsByTagName("td")
Dim html2 As HTMLDocument
Set html2 = New HTMLDocument
ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
For Each listing In listings
r = r + 1
On Error Resume Next
html2.body.innerHTML = listing.outerHTML
results(r, 1) = html2.querySelector(".cimg1").getAttribute("src")
results(r, 2) = html2.querySelector(".cimg1").getAttribute("src")
On Error GoTo 0
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit
End With[![enter image description here][1]][1]
I got the code guys...
Dim ie As New InternetExplorer, ws As Worksheet
Dim element As IHTMLElement
Set ws = ThisWorkbook.Worksheets("Unit Data")
With ie
.Visible = True
.Navigate2 "" & Sheets("Home").Range("C3").text
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long, item As Object
headers = Array("Width", "Length", "Hight/Space Type", "promo", "Reguler Price", "Online Price", "Listing Active", "features")
Set listings = .document.getElementById("cimg1").getElementsByTagName("img")
Dim html As HTMLDocument
Set html = New HTMLDocument
ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
For Each listing In listings
r = r + 1
html.body.innerHTML = listing.outerHTML
results(r, 1) = html.querySelector("img").getAttribute("src")
On Error GoTo 0
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit

Basics of webscraping

I want to get the prices of all the models of Maruti Alto using Web scraping. I am trying the code to get the data but i am not able to get it.
Sub Basics_Of_Web_Macro()
Dim myIE As Object
Dim myIEDoc As Object
'Start Internet Explorer
Set myIE = CreateObject("InternetExplorer.Application")
'if you want to see the window set this to True
myIE.Visible = False
'Now we open the page we'd like to use as a source for information
myIE.navigate "https://www.marutisuzuki.com/channels/arena/price-list/alto-price-in-mumbai-in-maharashtra"
'We wait for the Explorer to actually open the page and finish loading
While myIE.Busy
DoEvents
Wend
'Now lets read the HTML content of the page
Set myIEDoc = myIE.document
'Time to grab the information we want
Range("A1") = myIEDoc.Title
'Then we'll get something from teh inner page content by using the ID
Range("B1") = myIEDoc.Class("priceInfo clearfix")
End Sub
XHR:
You could use xmlhttp request and avoid browser. Loop the nodeList returned by collecting the classname cols. Start new row every 5 element and reset column to 1 for output. Thus creating tabular format for output from list format of nodeList/
VBE> Tools > References > Microsoft HTML Object Library
Option Explicit
Public Sub GetPrices()
Dim html As HTMLDocument
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.marutisuzuki.com/channels/arena/price-list/alto-price-in-mumbai-in-maharashtra", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
End With
Dim listings As Object, i As Long, r As Long, c As Long, results()
Set listings = html.querySelectorAll(".cols")
ReDim results(1 To (listings.Length - 2) / 4, 1 To 4)
r = 1: c = 1
For i = 0 To listings.Length - 2
If i Mod 4 = 0 And i > 0 Then r = r + 1: c = 1
results(r, c) = listings.item(i).innerText
c = c + 1
Next
ActiveSheet.Cells(1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Internet Explorer:
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetPrices()
Dim html As HTMLDocument
Set html = New HTMLDocument
Dim ie As New InternetExplorer
With ie
.Visible = True
.Navigate2 "https://www.marutisuzuki.com/channels/arena/price-list/alto-price-in-mumbai-in-maharashtra"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, i As Long, r As Long, c As Long, results()
Set listings = .document.querySelectorAll(".cols")
ReDim results(1 To (listings.Length - 2) / 4, 1 To 4)
r = 1: c = 1
For i = 0 To listings.Length - 2
If i Mod 4 = 0 And i > 0 Then r = r + 1: c = 1
results(r, c) = listings.item(i).innerText
c = c + 1
Next
.Quit
End With
ActiveSheet.Cells(1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

Scraping the data from list of href link?

I am trying to scrap a list of href link from a webpage, and then trying to scrap the value out of it. I am now facing the problem which the code only can handle up to 5 links. If the links more than 5, it will show runtime error on random line.
I am extracting the href link from these webpage:http://www.bursamalaysia.com/market/listed-companies/company-announcements/#/?category=SH&sub_category=all&alphabetical=All&date_from=28/09/2018
Option Explicit
Sub ScrapLink()
Dim IE As New InternetExplorer, html As HTMLDocument
Application.ScreenUpdating = False
With IE
IE.Visible = False
IE.navigate Cells(1, 1).Value
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 3)
Application.StatusBar = "Trying to go to website?"
DoEvents
Dim links As Object, i As Long
Set links = .document.querySelectorAll("#bm_ajax_container [href^='/market/listed-companies/company-announcements/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
.Cells(i + 1, 1) = links.item(i - 1)
End With
Next i
.Quit
End With
End Sub
Public Sub GetInfo()
Dim IE As New InternetExplorer, headers(), u As Long, resultCollection As Collection
headers = Array("URL", "Name", "No", "Date of change", "# Securities", "Type of Transaction", "Nature of Interest")
Set resultCollection = New Collection
Dim links()
links = Application.Transpose(ThisWorkbook.Worksheets("Sheet1").Range("A2:A100"))
With IE
.Visible = True
For u = LBound(links) To UBound(links)
If InStr(links(u), "http") > 0 Then
.navigate links(u)
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 2)
Dim data As Object, title As Object
With .document.getElementById("bm_ann_detail_iframe").contentDocument
Set title = .querySelector(".formContentData")
Set data = .querySelectorAll(".ven_table tr")
End With
Dim results(), numberOfRows As Long, i As Long, currentRow As Object, td As Object, c As Long, r As Long
numberOfRows = Round(data.Length / 4, 0)
ReDim results(1 To numberOfRows, 1 To 7)
For i = 0 To numberOfRows - 1
r = i + 1
results(r, 1) = links(u): results(r, 2) = title.innerText
Set currentRow = data.item(i * 4 + 1)
c = 3
For Each td In currentRow.getElementsByTagName("td")
results(r, c) = Replace$(td.innerText, "document.write(rownum++);", vbNullString)
c = c + 1
Next td
Next i
resultCollection.Add results
Set data = Nothing: Set title = Nothing
End If
Next u
.Quit
End With
Dim ws As Worksheet, item As Long
If Not resultCollection.Count > 0 Then Exit Sub
If Not Evaluate("ISREF('Results'!A1)") Then '<==Credit to #Rory for this test
Set ws = Worksheets.Add
ws.NAME = "Results"
Else
Set ws = ThisWorkbook.Worksheets("Results")
ws.Cells.Clear
End If
Dim outputRow As Long: outputRow = 2
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For item = 1 To resultCollection.Count
Dim arr()
arr = resultCollection(item)
For i = LBound(arr, 1) To UBound(arr, 1)
.Cells(outputRow, 1).Resize(1, 7) = Application.WorksheetFunction.Index(arr, i, 0)
outputRow = outputRow + 1
Next
Next
End With
End Sub
Discussion:
The problem is likely, at least from my testing, due to one of the links not having the table Details of changes, so the numberOfRows variable is set to 0, and this line:
ReDim results(1 To numberOfRows, 1 To 7)
fails with an index error as you have (1 To 0, 1 To 7).
Using this link in A1 there are 30 URLs retrieved. This retrieved link does not have that table whereas the others do.
You have a choice of how to handle this scenario. Here are some example options:
Option 1: Only process the page if the numberOfRows > 0. This is the example I give.
Option 2: Have a Select Case with numberOfRows and if Case 0 then handle page in one way, Case Else handle as normal.
Note:
1) You also want to reset the status bar with:
Application.StatusBar = False
2) I temporarily fixed the links range for testing with:
ThisWorkbook.Worksheets("Sheet1").Range("A2:A31")
TODO:
Refactor to be more modular and run the whole process with the same IE instance. Creating a class to hold the IE object would be a good idea. Provide it with methods for extracting your data, testing number of result rows etc.
Add some basic error handling, for example, to handle failed website connection.
Example handling using test of numberOfRows > 0:
Option Explicit
Sub ScrapeLink()
Dim IE As New InternetExplorer
Application.ScreenUpdating = False
With IE
IE.Visible = True
IE.navigate Cells(1, 1).Value
While .Busy Or .readyState < 4: DoEvents: Wend
' Application.Wait Now + TimeSerial(0, 0, 3)
Application.StatusBar = "Trying to go to website?"
DoEvents
Dim links As Object, i As Long
Set links = .document.querySelectorAll("#bm_ajax_container [href^='/market/listed-companies/company-announcements/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
.Cells(i + 1, 1) = links.item(i - 1)
End With
Next i
.Quit
End With
Application.StatusBar = false
End Sub
Public Sub GetInfo()
Dim IE As New InternetExplorer, headers(), u As Long, resultCollection As Collection
headers = Array("URL", "Name", "No", "Date of change", "# Securities", "Type of Transaction", "Nature of Interest")
Set resultCollection = New Collection
Dim links()
links = Application.Transpose(ThisWorkbook.Worksheets("Sheet1").Range("A2:A31")) '<== I have fixed the range here for testing
With IE
.Visible = True
For u = LBound(links) To UBound(links)
If InStr(links(u), "http") > 0 Then
.navigate links(u)
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 2)
Dim data As Object, title As Object
With .document.getElementById("bm_ann_detail_iframe").contentDocument
Set title = .querySelector(".formContentData")
Set data = .querySelectorAll(".ven_table tr")
End With
Dim results(), numberOfRows As Long, i As Long, currentRow As Object, td As Object, c As Long, r As Long
numberOfRows = Round(data.Length / 4, 0)
If numberOfRows > 0 Then
ReDim results(1 To numberOfRows, 1 To 7)
For i = 0 To numberOfRows - 1
r = i + 1
results(r, 1) = links(u): results(r, 2) = title.innerText
Set currentRow = data.item(i * 4 + 1)
c = 3
For Each td In currentRow.getElementsByTagName("td")
results(r, c) = Replace$(td.innerText, "document.write(rownum++);", vbNullString)
c = c + 1
Next td
Next i
resultCollection.Add results
Set data = Nothing: Set title = Nothing
End If
End If
Next u
.Quit
End With
Dim ws As Worksheet, item As Long
If Not resultCollection.Count > 0 Then Exit Sub
If Not Evaluate("ISREF('Results'!A1)") Then '<==Credit to #Rory for this test
Set ws = Worksheets.Add
ws.NAME = "Results"
Else
Set ws = ThisWorkbook.Worksheets("Results")
ws.Cells.Clear
End If
Dim outputRow As Long: outputRow = 2
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For item = 1 To resultCollection.Count
Dim arr()
arr = resultCollection(item)
For i = LBound(arr, 1) To UBound(arr, 1)
.Cells(outputRow, 1).Resize(1, 7) = Application.WorksheetFunction.Index(arr, i, 0)
outputRow = outputRow + 1
Next
Next
End With
End Sub
Sample results: