Exce VBA - Fetch links from a fetching-builds website - html

I've been trying to fetch the data from "http://builds.reicast.com/" but the issue is the website is also fetching information (which is what I actually need; the Master dev-build URLs). I feel like the delay in loading the Javascript fetching is hindering the fetching process on my side. Also, I've tried several different ways of fetching the build-URL's but they never appear (I'm assuming it's the same issue as mentioned previously).
This is what it looks like:
Sub FetchData()
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;http://builds.reicast.com/", Destination:=Range( _
"$A$1"))
.Name = "master"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingNone
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
End Sub
How can I get it to load the "master dev-builds section"?
Thanks for your time!

You can use Microsoft Internet Explorer Controls Library added via VBE > Tools > References and include a timed loop to ensure links are present e.g.
Option Explicit
Public Sub GetLinks()
Dim ie As New InternetExplorer, commits As Object, t As Date
Const MAX_WAIT_SEC As Long = 10
With ie
.Visible = True
.Navigate2 "http://builds.reicast.com/"
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
Set commits = ie.document.querySelectorAll(".commit [href]")
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While commits.Length = 0
Debug.Print commits.Length
Dim i As Long
For i = 0 To commits.Length - 1
With ActiveSheet
.Cells(i + 1, 1) = commits.item(i).innerText
.Cells(i + 1, 2) = commits.item(i).getAttribute("href")
End With
Next
Stop '<==Delete me later
.Quit
End With
End Sub
If you want to write out the whole table:
Option Explicit
Public Sub GetTable()
Dim ie As New InternetExplorer, hTable As Object, t As Date, headers(), ws As Worksheet
Const MAX_WAIT_SEC As Long = 10
headers = Array("Commit", "Date", "Android", "Win_x86", "Win_x64")
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ie
.Visible = True
.Navigate2 "http://builds.reicast.com/"
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
On Error Resume Next
Set hTable = ie.document.querySelector("#builds table")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While hTable Is Nothing
Writetable hTable, 1, ws
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Quit
End With
End Sub
Public Sub Writetable(ByVal hTable As Object, ByVal startRow As Long, ByVal ws As Worksheet)
Dim tr As Object, td As Object, r As Long, c As Long
For Each tr In hTable.getElementsByTagName("tr")
r = r + 1: c = 1
If r > 2 Then
For Each td In tr.getElementsByTagName("td")
Select Case c
Case 1, 3, 4, 5
ws.Cells(r - 1, c) = td.FirstChild
Case Else
ws.Cells(r - 1, c) = td.innerText
End Select
c = c + 1
Next
End If
Next
End Sub
Example output:

Related

web scraping from google page no longer returns anything

The below Excel vba code use to work and return the market cap, 52 week low and current price into my spreadsheet. It no longer does though and cannot work out why. Class names haven't changed but the getElementsByClassName doesn't seem to return anything anymore I think.
Sub get_title_header()
Dim wb As Object
Dim doc As Object
Dim sURL As String
Dim i As Integer
Dim allElements As IHTMLElementCollection
Application.DisplayAlerts = False
Set wb = CreateObject("internetExplorer.Application")
sURL = "https://www.google.com/search?q=aapl+stock+quote"
wb.navigate sURL
wb.Visible = False
While wb.Busy
Application.Wait Now + #12:00:01 AM#
DoEvents
Wend
Set doc = wb.document.body
On Error GoTo err_clear
Set allElements = doc.getElementsByClassName("iyjjgb")
x = allElements(3).innerText
Sheet6.Cells(i + 1, 2).Value = x
x = allElements(8).innerText
Sheet6.Cells(i + 1, 3).Value = x
x = ""
x = allElements(0).innerText
Sheet6.Cells(i + 1, 4).Value = x
x = ""
err_clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
wb.Quit
End Sub

Unable to get the exact element class table when scraping data from web using VBA

I would like to scrape below table from the website.
enter image description here
Based on the web code I found that the table seemed belongs to element class etxtmed so I wrote below VBA. After running this code I found that it only scrape below data
enter image description here
I thought this was because ("etxtmed")(0) refers to the 1st ("etxtmed") table then I tried several numbers after (0) and VBA first reports "Element not exist" then reports error Run-time error '91':Object variable or With block variable not set at this line of code r = tbl.Rows.Length - 1. Is it because I scraped the wrong class of table?
Sub CopyRateFromHKAB()
Dim ie As Object, btnmore As Object, tbl As Object
Dim rr As Object, cc As Object, r As Integer, c As Integer, i As Integer, j As Integer
ThisWorkbook.Sheets("Sheet2").UsedRange.Clear
Set ie = CreateObject("internetexplorer.application")
With ie
'.Visible = True
.navigate "https://www.hkab.org.hk/DisplayInterestSettlementRatesAction.do?lang=en"
Do
DoEvents
Loop While .readyState <> 4 Or .Busy
Set tbl = .document.getElementsByClassName("etxtmed")(0)
If tbl Is Nothing Then
MsgBox "Element not exist"
End If
End With
'get data from table
r = tbl.Rows.Length - 1
c = tbl.Rows(0).Cells.Length - 1
ReDim arr(0 To r, 0 To c)
Set rr = tbl.Rows
For i = 0 To r
Set cc = rr(i).Cells
For j = 0 To c
arr(i, j) = cc(j).innertext
Next
Next
ie.Quit
Application.ScreenUpdating = False
ThisWorkbook.Sheets("Sheet2").Cells(1, 1).Resize(r + 1, c + 1) = arr
With ThisWorkbook.Sheets("Sheet2")
.UsedRange.WrapText = False
.Columns.AutoFit
End With
End Sub
The table you want is inside an IFRAME so you need to access that page directly <iframe src="/hibor/listRates.do?lang=en&Submit=Detail"
Option Explicit
Sub CopyRateFromHKAB()
Const URL = "https://www.hkab.org.hk/hibor/listRates.do?lang=en&Submit=Detail"
Dim HTMLDoc As Object, request As Object
' get web page
Set HTMLDoc = CreateObject("HTMLfile")
Set request = CreateObject("MSXML2.XMLHTTP")
With request
.Open "GET", URL, False
.send
HTMLDoc.body.innerHTML = .responseText
End With
' parse html table
Dim wb As Workbook, r As Long, c As Long, arr
Dim tbl As Object, t As Object, tr As Object, td As Object
Set wb = ThisWorkbook
Set tbl = HTMLDoc.getElementsByClassName("etxtmed")
If tbl Is Nothing Then
MsgBox "No tables found", vbExclamation
Exit Sub
Else
If tbl(2) Is Nothing Then
MsgBox "Table not found", vbExclamation
Exit Sub
Else
r = tbl(2).Rows.Length
ReDim arr(1 To r, 1 To 3)
r = 1
For Each tr In tbl(2).Rows
c = 1
For Each td In tr.Cells
arr(r, c) = td.innerText
c = c + 1
Next
r = r + 1
Next
End If
'copy to sheet
With wb.Sheets("Sheet2")
.Cells(1, 1).Resize(UBound(arr), UBound(arr, 2)) = arr
.UsedRange.WrapText = False
.Columns.AutoFit
End With
End If
MsgBox "Done", vbInformation
End Sub

HTML Page update for every Dropbox change event

I want to pull calls and puts data from NSE wesbite,In the website, there are 2 dropdown boxes available (Refer the image).For every dropdown change event,the table value changes.
https://www.nseindia.com/option-chain
I am successfully able to update the HTML table values for the first drop down fields using HTML events,
and i am using the same "HTML event" to update the table values when the second drop down index changes,but could not get the work done,can one please help?
I am giving the source code also,please suggess
'Option Data Pulling
Sub Options_Pull_Active_List()
Dim Opt_Url As String
Opt_Url = ActiveSheet.Range("G3").Value
Dim ie As New InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
ie.Top = 0
ie.Left = 0
ie.Width = 1000
ie.Height = 750
ie.AddressBar = 0
ie.StatusBar = 0
ie.Toolbar = 0
ie.navigate Opt_Url
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim doc As HTMLDocument
Set doc = ie.document
Dim objElement As HTMLObjectElement
Dim sDD As String
doc.Focus
Dim k1, k2, Opto, Sana As Integer
k1 = ActiveSheet.Range("AL7").Value
k2 = ActiveSheet.Range("AL8").Value
For Opto = k1 To k2
ActiveSheet.Range("AK10").Value = Opto
Sana = ActiveSheet.Range("AL10").Value
'instrumentType
doc.getElementById("select_symbol").Focus 'Setting Instrument Type
Application.Wait Now + TimeValue("00:00:04")
doc.getElementById("select_symbol").selectedIndex = Sana
Dim objEvent
Set objEvent = doc.createEvent("HTMLEvents")
objEvent.initEvent "change", False, True
doc.getElementById("select_symbol").dispatchEvent objEvent 'This code successfully able to updat the HTML page
Application.Wait Now + TimeValue("00:00:05")
doc.getElementById("expirySelect").selectedIndex = 2
doc.getElementById("expirySelect").dispatchEvent objEvent 'This same code does not update the table values
'-------------------------
Application.Wait Now + TimeValue("00:00:03")
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
'------------------
Dim hTable As Object, hBody As Object, hTR As Object, hTD As Object
Dim tb As Object, bb As Object, tr As Object, td As Object
Dim i As Integer
y = 5 ' 'Column A in Excel
Z = 25 ' 'Row 1 in Excel
Set hTable = doc.getElementsByTagName("Table")
'Set hTable = doc.getElementById("historicalData")
For Each tb In hTable
Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each tr In hTR
Set hTD = tr.getElementsByTagName("td")
y = 5 ' Resets back to Default Column Set in the Excel Sheet
For i = 0 To hTD.Length
Sheets("M1 - Option Data Pull").Cells(Z, y).Value = hTD(i).innerText
If Trim(Sheets("M1 - Option Data Pull").Cells(Z, y).Value) = "-" Then
Sheets("M1 - Option Data Pull").Cells(Z, y).Value = 0
End If
If i = 11 Then
If hTD(i + 1).getAttribute("class") = "bg-yellow" And Sheets("M1 - Option Data Pull").Range("O1").Value = 0 Then
Sheets("M1 - Option Data Pull").Range("O1").Value = 1
Sheets("M1 - Option Data Pull").Range("O2").Value = hTD(i).innerText
End If
End If
y = y + 1
Next i
DoEvents
Z = Z + 1
Next tr
Exit For
Next bb
Exit For
Next tb
'-------------------
Next Opto
ie.Quit
ie.Visible = True
Set doc = Nothing
Set ie = Nothing
Application.DisplayAlerts = True
Application.ScreenUpdating = True
Application.EnableEvents = True
End Sub

Basics of webscraping

I want to get the prices of all the models of Maruti Alto using Web scraping. I am trying the code to get the data but i am not able to get it.
Sub Basics_Of_Web_Macro()
Dim myIE As Object
Dim myIEDoc As Object
'Start Internet Explorer
Set myIE = CreateObject("InternetExplorer.Application")
'if you want to see the window set this to True
myIE.Visible = False
'Now we open the page we'd like to use as a source for information
myIE.navigate "https://www.marutisuzuki.com/channels/arena/price-list/alto-price-in-mumbai-in-maharashtra"
'We wait for the Explorer to actually open the page and finish loading
While myIE.Busy
DoEvents
Wend
'Now lets read the HTML content of the page
Set myIEDoc = myIE.document
'Time to grab the information we want
Range("A1") = myIEDoc.Title
'Then we'll get something from teh inner page content by using the ID
Range("B1") = myIEDoc.Class("priceInfo clearfix")
End Sub
XHR:
You could use xmlhttp request and avoid browser. Loop the nodeList returned by collecting the classname cols. Start new row every 5 element and reset column to 1 for output. Thus creating tabular format for output from list format of nodeList/
VBE> Tools > References > Microsoft HTML Object Library
Option Explicit
Public Sub GetPrices()
Dim html As HTMLDocument
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.marutisuzuki.com/channels/arena/price-list/alto-price-in-mumbai-in-maharashtra", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
End With
Dim listings As Object, i As Long, r As Long, c As Long, results()
Set listings = html.querySelectorAll(".cols")
ReDim results(1 To (listings.Length - 2) / 4, 1 To 4)
r = 1: c = 1
For i = 0 To listings.Length - 2
If i Mod 4 = 0 And i > 0 Then r = r + 1: c = 1
results(r, c) = listings.item(i).innerText
c = c + 1
Next
ActiveSheet.Cells(1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Internet Explorer:
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetPrices()
Dim html As HTMLDocument
Set html = New HTMLDocument
Dim ie As New InternetExplorer
With ie
.Visible = True
.Navigate2 "https://www.marutisuzuki.com/channels/arena/price-list/alto-price-in-mumbai-in-maharashtra"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim listings As Object, i As Long, r As Long, c As Long, results()
Set listings = .document.querySelectorAll(".cols")
ReDim results(1 To (listings.Length - 2) / 4, 1 To 4)
r = 1: c = 1
For i = 0 To listings.Length - 2
If i Mod 4 = 0 And i > 0 Then r = r + 1: c = 1
results(r, c) = listings.item(i).innerText
c = c + 1
Next
.Quit
End With
ActiveSheet.Cells(1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

Scraping the data from list of href link?

I am trying to scrap a list of href link from a webpage, and then trying to scrap the value out of it. I am now facing the problem which the code only can handle up to 5 links. If the links more than 5, it will show runtime error on random line.
I am extracting the href link from these webpage:http://www.bursamalaysia.com/market/listed-companies/company-announcements/#/?category=SH&sub_category=all&alphabetical=All&date_from=28/09/2018
Option Explicit
Sub ScrapLink()
Dim IE As New InternetExplorer, html As HTMLDocument
Application.ScreenUpdating = False
With IE
IE.Visible = False
IE.navigate Cells(1, 1).Value
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 3)
Application.StatusBar = "Trying to go to website?"
DoEvents
Dim links As Object, i As Long
Set links = .document.querySelectorAll("#bm_ajax_container [href^='/market/listed-companies/company-announcements/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
.Cells(i + 1, 1) = links.item(i - 1)
End With
Next i
.Quit
End With
End Sub
Public Sub GetInfo()
Dim IE As New InternetExplorer, headers(), u As Long, resultCollection As Collection
headers = Array("URL", "Name", "No", "Date of change", "# Securities", "Type of Transaction", "Nature of Interest")
Set resultCollection = New Collection
Dim links()
links = Application.Transpose(ThisWorkbook.Worksheets("Sheet1").Range("A2:A100"))
With IE
.Visible = True
For u = LBound(links) To UBound(links)
If InStr(links(u), "http") > 0 Then
.navigate links(u)
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 2)
Dim data As Object, title As Object
With .document.getElementById("bm_ann_detail_iframe").contentDocument
Set title = .querySelector(".formContentData")
Set data = .querySelectorAll(".ven_table tr")
End With
Dim results(), numberOfRows As Long, i As Long, currentRow As Object, td As Object, c As Long, r As Long
numberOfRows = Round(data.Length / 4, 0)
ReDim results(1 To numberOfRows, 1 To 7)
For i = 0 To numberOfRows - 1
r = i + 1
results(r, 1) = links(u): results(r, 2) = title.innerText
Set currentRow = data.item(i * 4 + 1)
c = 3
For Each td In currentRow.getElementsByTagName("td")
results(r, c) = Replace$(td.innerText, "document.write(rownum++);", vbNullString)
c = c + 1
Next td
Next i
resultCollection.Add results
Set data = Nothing: Set title = Nothing
End If
Next u
.Quit
End With
Dim ws As Worksheet, item As Long
If Not resultCollection.Count > 0 Then Exit Sub
If Not Evaluate("ISREF('Results'!A1)") Then '<==Credit to #Rory for this test
Set ws = Worksheets.Add
ws.NAME = "Results"
Else
Set ws = ThisWorkbook.Worksheets("Results")
ws.Cells.Clear
End If
Dim outputRow As Long: outputRow = 2
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For item = 1 To resultCollection.Count
Dim arr()
arr = resultCollection(item)
For i = LBound(arr, 1) To UBound(arr, 1)
.Cells(outputRow, 1).Resize(1, 7) = Application.WorksheetFunction.Index(arr, i, 0)
outputRow = outputRow + 1
Next
Next
End With
End Sub
Discussion:
The problem is likely, at least from my testing, due to one of the links not having the table Details of changes, so the numberOfRows variable is set to 0, and this line:
ReDim results(1 To numberOfRows, 1 To 7)
fails with an index error as you have (1 To 0, 1 To 7).
Using this link in A1 there are 30 URLs retrieved. This retrieved link does not have that table whereas the others do.
You have a choice of how to handle this scenario. Here are some example options:
Option 1: Only process the page if the numberOfRows > 0. This is the example I give.
Option 2: Have a Select Case with numberOfRows and if Case 0 then handle page in one way, Case Else handle as normal.
Note:
1) You also want to reset the status bar with:
Application.StatusBar = False
2) I temporarily fixed the links range for testing with:
ThisWorkbook.Worksheets("Sheet1").Range("A2:A31")
TODO:
Refactor to be more modular and run the whole process with the same IE instance. Creating a class to hold the IE object would be a good idea. Provide it with methods for extracting your data, testing number of result rows etc.
Add some basic error handling, for example, to handle failed website connection.
Example handling using test of numberOfRows > 0:
Option Explicit
Sub ScrapeLink()
Dim IE As New InternetExplorer
Application.ScreenUpdating = False
With IE
IE.Visible = True
IE.navigate Cells(1, 1).Value
While .Busy Or .readyState < 4: DoEvents: Wend
' Application.Wait Now + TimeSerial(0, 0, 3)
Application.StatusBar = "Trying to go to website?"
DoEvents
Dim links As Object, i As Long
Set links = .document.querySelectorAll("#bm_ajax_container [href^='/market/listed-companies/company-announcements/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
.Cells(i + 1, 1) = links.item(i - 1)
End With
Next i
.Quit
End With
Application.StatusBar = false
End Sub
Public Sub GetInfo()
Dim IE As New InternetExplorer, headers(), u As Long, resultCollection As Collection
headers = Array("URL", "Name", "No", "Date of change", "# Securities", "Type of Transaction", "Nature of Interest")
Set resultCollection = New Collection
Dim links()
links = Application.Transpose(ThisWorkbook.Worksheets("Sheet1").Range("A2:A31")) '<== I have fixed the range here for testing
With IE
.Visible = True
For u = LBound(links) To UBound(links)
If InStr(links(u), "http") > 0 Then
.navigate links(u)
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 2)
Dim data As Object, title As Object
With .document.getElementById("bm_ann_detail_iframe").contentDocument
Set title = .querySelector(".formContentData")
Set data = .querySelectorAll(".ven_table tr")
End With
Dim results(), numberOfRows As Long, i As Long, currentRow As Object, td As Object, c As Long, r As Long
numberOfRows = Round(data.Length / 4, 0)
If numberOfRows > 0 Then
ReDim results(1 To numberOfRows, 1 To 7)
For i = 0 To numberOfRows - 1
r = i + 1
results(r, 1) = links(u): results(r, 2) = title.innerText
Set currentRow = data.item(i * 4 + 1)
c = 3
For Each td In currentRow.getElementsByTagName("td")
results(r, c) = Replace$(td.innerText, "document.write(rownum++);", vbNullString)
c = c + 1
Next td
Next i
resultCollection.Add results
Set data = Nothing: Set title = Nothing
End If
End If
Next u
.Quit
End With
Dim ws As Worksheet, item As Long
If Not resultCollection.Count > 0 Then Exit Sub
If Not Evaluate("ISREF('Results'!A1)") Then '<==Credit to #Rory for this test
Set ws = Worksheets.Add
ws.NAME = "Results"
Else
Set ws = ThisWorkbook.Worksheets("Results")
ws.Cells.Clear
End If
Dim outputRow As Long: outputRow = 2
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For item = 1 To resultCollection.Count
Dim arr()
arr = resultCollection(item)
For i = LBound(arr, 1) To UBound(arr, 1)
.Cells(outputRow, 1).Resize(1, 7) = Application.WorksheetFunction.Index(arr, i, 0)
outputRow = outputRow + 1
Next
Next
End With
End Sub
Sample results: