How do I clean up objects in Excel vba? - html

Public Sub D_Galoplar()
Application.ScreenUpdating = False
Dim Asay(1 To 250)
Dim Jsay(1 To 100)
For q = 2 To Sheets("Y").Columns("A:A").Find(What:="boş").Row - 1
Asay(q - 1) = Sheets("Y").Range("A" & q)
Next q
For q = 2 To Sheets("Y").Columns("C:C").Find(What:="boş").Row - 1
Jsay(q - 1) = Sheets("Y").Range("C" & q)
Next q
For w = 1 To 250
Cells.Delete Shift:=xlUp
Range("A1").Select
If Asay(w) < 1 Then Exit For
Dim elem As Object, trow As Object
Dim R&, C&, s$
With New XMLHTTP60
.Open "POST", "https://yenibeygir.com/at/getatdetaytab", False
.setRequestHeader "content-type", "application/x-www-form-urlencoded; charset=UTF-8"
.send "tab=galopTab&id=" & Asay(w)
s = .responseText
End With
With New HTMLDocument
.body.innerHTML = s
For Each elem In .getElementsByClassName("at_Galoplar")(0).Rows
For Each trow In elem.Cells
C = C + 1: Cells(R + 1, C) = trow.innerText
Next trow
C = 0: R = R + 1
Next elem
End With
Cells.UnMerge
Range("A1").Select
If Range("A1048576").End(xlUp).Row < 2 Then GoTo ATLA2
Columns("A:A").Insert
For i = 2 To Range("B1048576").End(xlUp).Row - 1
Range("A" & i) = Asay(w)
Next i
Range("O2").FormulaR1C1 = "=IF(ISBLANK(RC[-9]),""-"",IF(ISNUMBER(RC[-9]),RC[-9]/4,((LEFT(RC[-9],1)*6000)+(MID(RC[-9],3,2)*100)+(RIGHT(RC[-9],1)*10))/400))"
Range("P2").FormulaR1C1 = "=IF(ISBLANK(RC[-9]),""-"",IF(ISNUMBER(RC[-9]),RC[-9]/6,((LEFT(RC[-9],1)*6000)+(MID(RC[-9],3,2)*100)+(RIGHT(RC[-9],1)*10))/600))"
Range("Q2").FormulaR1C1 = "=IF(ISBLANK(RC[-9]),""-"",IF(ISNUMBER(RC[-9]),RC[-9]/8,((LEFT(RC[-9],1)*6000)+(MID(RC[-9],3,2)*100)+(RIGHT(RC[-9],1)*10))/800))"
Range("R2").FormulaR1C1 = "=IF(ISBLANK(RC[-9]),""-"",IF(ISNUMBER(RC[-9]),RC[-9]/10,((LEFT(RC[-9],1)*6000)+(MID(RC[-9],3,2)*100)+(RIGHT(RC[-9],1)*10))/1000))"
Range("S2").FormulaR1C1 = "=IF(ISBLANK(RC[-9]),""-"",IF(ISNUMBER(RC[-9]),RC[-9]/12,((LEFT(RC[-9],1)*6000)+(MID(RC[-9],3,2)*100)+(RIGHT(RC[-9],1)*10))/1200))"
Range("T2").FormulaR1C1 = "=IF(ISBLANK(RC[-9]),""-"",IF(ISNUMBER(RC[-9]),RC[-9]/14,((LEFT(RC[-9],1)*6000)+(MID(RC[-9],3,2)*100)+(RIGHT(RC[-9],1)*10))/1400))"
Range("O2:T2").Copy
Range("O2:O" & Range("A1048576").End(xlUp).Row).PasteSpecial Paste:=xlPasteFormulas
Cells.Copy
Cells.PasteSpecial Paste:=xlPasteValues
Columns("O:T").Cut Columns("F:K")
Range("A2:N" & Range("A1048576").End(xlUp).Row).Copy
Sheets("Galop").Range("A" & Sheets("Galop").Range("A1048576").End(xlUp).Row + 1).PasteSpecial Paste:=xlPasteValues
ATLA2:
Cells.Delete Shift:=xlUp
Next w
End Sub
I want to get a lot of data with the For Next cycle, but after a while the page hangs. How can I reset objects at the end of each cycle?
Asay numbers
10182
10221
10279
10303
10316
10325
10360
10370
10680
11598
11629
11715
11745
12335
12385
12533
12559
13154
13393
13635
13641
13669
13673
14027
14057
14062
14228
14619
14674
14687
14743
14770
14778
15197
15217
15323
15382
15507
15775
15828
16077
16335
16510
17149
17513
17867
18532
37964
60176
66067
66255
66581
66582
66896
66998
67056
67309
67356
67379
67473
68008
68012
68162
68298
68312
68320
68332
68333
68353
68383
68545
68702
68775
68922
69445
69606
69817
69963
69968
69985
69986
70048
70202
71372
(boş)

Slowing down maybe due to throttling of network if you are trying to hit the site too many times in quick succession. This is particularly likely given your access method. Better would be to see if an API is available to bulk access info. You are likely going through many networks to get to this page as well. It may be possible to get some basic info about delays from TRACERT command from a command prompt.
You are doing a POST so remember there is a fair amount of server side stuff going on as well.
You don't need to set elem to Nothing as it only exists during your For Loop. Same for tRow.
Putting .getElementsByClassName("at_Galoplar")(0).Rows into a variable will provided faster referencing.
Write the results to an array first and then dump the array out to the sheet in one go will provide significant improvement in speed.
Using New keyword can lead to unexpected behaviour. You can create one instance of HTMLDocument and work with that provided you have good error handling in. I have had occassional cases in a loop where I have had to set HTMLDocument to Nothing before looping back round.
Personally, I would cheat and re-write this to leverage that you can issue GET requests to get the same info. I use a class to hold the XMLHTTP object, and an array to hold the results. I write the results out in one go. This takes a few seconds to run for me. The asay numbers are in Sheet1 range A1:A84.
Class module clsHTTP
Option Explicit
Private http As Object
Private Sub Class_Initialize()
Set http = CreateObject("MSXML2.XMLHTTP")
End Sub
Public Function GetString(ByVal url As String) As String
Dim sResponse As String
With http
.Open "GET", url, False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
GetString = sResponse
End With
End Function
Standard module 1
Option Explicit
Public Sub DGaloplar()
Dim asays(), ws As Worksheet, asay As Long, html As HTMLDocument
Dim http As clsHTTP, url As String, headers(), numberOfRequests As Long
headers = Array("Asay", "Tarih", "Sehir", "Kg", "Jokey", "400", "600", "800", "1000", "1200", "1400", "Ç", "Pist", "Durum")
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
asays = Application.Transpose(ws.Range("A1:A84").Value) 'Load asay values from sheet 1
Const numTableRows As Long = 11
Const numTableColumns As Long = 15
Const BASE_URL As String = "https://yenibeygir.com/at/getatdetaytab/?tab=galopTab&id="
numberOfRequests = UBound(asays)
Dim results(), headerRow As Boolean, tRows As Object, tRow As Object, iRow As Long
Dim tCells As Object, tCell As Object, r As Long, c As Long, hTable As HTMLTable
ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)
Application.ScreenUpdating = False
For asay = 1 To numberOfRequests
headerRow = True
url = BASE_URL & asays(asay)
html.body.innerHTML = http.GetString(url)
Set hTable = html.querySelector(".at_Galoplar")
Set tRows = hTable.getElementsByTagName("tr")
For Each tRow In tRows
If Not headerRow Then
c = 2: r = r + 1
results(r, 1) = asays(asay)
Set tCells = tRow.getElementsByTagName("td")
For Each tCell In tCells
results(r, c) = tCell.innerText
c = c + 1
Next
End If
headerRow = False
Next
Next
With ws
.Cells(1, 3).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 3).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
Application.ScreenUpdating = True
End Sub
Refereces:
Microsoft HTML Object Library

In general Set elem = Nothing is what you need.
In your code you are assigning the variables within a for-each loop, thus even if you set them to Nothing later, there would not be a performance bonus.

Try setting those object to Nothing, like below:
Set elem = Nothing
Set trow = Nothing
I am not sure whether you need variable declarations in your loop, you can take them out of the loop, this might save some time.
But I think your HTTP requests are taking so long, not any VBA code.
UPDATE
Try setting Application.EnableEvents and Application.ScreenUpdating to False at the beggining of macro and setting them back to True at the end.

Related

Edit 14 day weather forecast Excel VBA to include precipitation

I found the code below which works nicely and I think I can repurpose it for my needs, but does not include the precipitation. I'm relatively new to HTML so having trouble understanding what each line of code's purpose is. I've gone to the website and looked at the elements and console but can't find "p[data-testid='wxPhrase']" or the word 'children' or 'child'.
I presumed precipitation was just another child so tried adding lines like these after editing the column headers in the first sub:
Results(r + 1, 3) = Children(r).FirstChild.innerText
Results(r + 1, 4) = Children(r).PreviousSibling.PreviousSibling.PreviousSibling.FirstChild.innerText
but it gives Run-time error '438': Object doesn't support this property or method. I appreciate very much some help and education. Thanks, in advance!
Sub MiamiWeather()
Dim Data As Variant
Data = MiamiWeatherData
Range("A1:B1").Value = Array("Date", "Temperature")
Range("A2").Resize(UBound(Data), 2).Value = Data
End Sub
Function MiamiWeatherData()
Const URL = "https://weather.com/weather/tenday/l/3881cd527264bc7c99b6b541473c0085e75aa026b6bd99658c56ad9bb55bd96e"
Dim responseText As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
responseText = .responseText
End With
Dim Document As HTMLDocument
Set Document = CreateObject("HTMLFILE")
Document.body.innerHTML = responseText
Dim Children As IHTMLDOMChildrenCollection
Set Children = Document.querySelectorAll("p[data-testid='wxPhrase']")
Dim Results As Variant
ReDim Results(1 To Children.Length, 1 To 2)
Dim r As Long
For r = 0 To Children.Length - 1
Results(r + 1, 1) = Children(r).PreviousSibling.PreviousSibling.FirstChild.innerText
Results(r + 1, 2) = Children(r).PreviousSibling.FirstChild.innerText
Next
MiamiWeatherData = Results
End Function
Assuming you want the percentage, you need to resize the array to hold an extra dimension then add an extra selector within the loop. That selector can select by attribute = value and will need to work off .Children(r).PreviousSibling. Assuming, you have a still maintained Microsoft set-up you can chain querySelector at this point as shown below.
For older versions e.g., <= Windows 7 then use Results(r + 1, 3) = Children(r).PreviousSibling.Children(2).Children(0).Children(1).innerText
Option Explicit
Public Sub MiamiWeather()
Dim Data As Variant
Data = MiamiWeatherData
Range("A1:C1").value = Array("Date", "Temperature", "Precipitation")
Range("A2").Resize(UBound(Data), 3).value = Data
End Sub
Function MiamiWeatherData()
Const URL = "https://weather.com/weather/tenday/l/3881cd527264bc7c99b6b541473c0085e75aa026b6bd99658c56ad9bb55bd96e"
Dim responseText As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
responseText = .responseText
End With
Dim Document As HTMLDocument
Set Document = CreateObject("HTMLFILE")
Document.body.innerHTML = responseText
Dim Children As IHTMLDOMChildrenCollection
Set Children = Document.querySelectorAll("p[data-testid='wxPhrase']")
Dim Results As Variant
ReDim Results(1 To Children.length, 1 To 3)
Dim r As Long
For r = 0 To Children.length - 1
Results(r + 1, 1) = Children(r).PreviousSibling.PreviousSibling.FirstChild.innerText
Results(r + 1, 2) = Children(r).PreviousSibling.FirstChild.innerText
Results(r + 1, 3) = Children(r).PreviousSibling.querySelector("[data-testid=PercentageValue]").innerText
Next
MiamiWeatherData = Results
End Function

Webscraping in VBA where some HTML information has no way to refer to it

I have this VBA script scraping from this URL https://accessgudid.nlm.nih.gov/devices/10806378034350
I want the LOT,SERIAL, and EXPIRATION information which in the below pic, has a "Yes" or "No" inside the HTML.
How do I return just that Yes or No information?
Sub GetInnerInformation(HTMLPage As MSHTML.HTMLDocument)
Dim HTMLResult As MSHTML.IHTMLElement
Dim HTMLResults As MSHTML.IHTMLElementCollection
Dim HTMLRow As MSHTML.IHTMLElement
Dim HTMLCell As MSHTML.IHTMLElement
Dim RowNum As Long, ColNum As Integer
Set HTMLResults = HTMLPage.getElementsByClassName("device-attribute")
For Each HTMLResult In HTMLResults
If (HTMLResult.innerText Like "*Lot*") = True Then
Debug.Print HTMLResult.innerText, HTMLResult.outerText, HTMLResult.innerHTML
End If
Next HTMLResult
End Sub
In my Immediate Window I get:
Lot or Batch Number: Lot or Batch Number: Lot or Batch Number:
So no reference to the Yes or No that is in the HTML.
HTML Parser:
You could use a css attribute = value selector to target the span with [?] that is just before the div of interest. Then climb up to shared parent with parentElement, and move to the div of interest with NextSibling. You can then use getElementsByTagName to grab the labels nodes, and loop that nodeList to write out required info. To get the values associated with labels, you again need to use NextSibling to handle the br children within the parent div.
I use xmlhttp to make the request which is faster than opening a browser.
Option Explicit
Public Sub WriteOutYesNos()
Dim html As MSHTML.HTMLDocument
Set html = New MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://accessgudid.nlm.nih.gov/devices/10806378034350", False
.send
html.body.innerHTML = .responseText
End With
Dim nodes As Object, i As Long
Set nodes = html.querySelector("[title*='A production identifier (PI) is a variable']").parentElement.NextSibling.getElementsByTagName("LABEL")
For i = 0 To nodes.Length - 3
With ActiveSheet
.Cells(i + 1, 1) = nodes(i).innerText
.Cells(i + 1, 2) = nodes(i).NextSibling.NodeValue
End With
Next
End Sub
JSON Parser:
Data is also available as json which means you can use a json parser to handle. I use jsonconverter.bas as the json parser to handle response. Download raw code from here and add to standard module called JsonConverter . You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime. Remove the top Attribute line from the copied code.
Option Explicit
Public Sub WriteOutYesNos()
Dim json As Object, ws As Worksheet, results(), i As Long, s As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
results = Array("lotBatch", "serialNumber", "manufacturingDate")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://accessgudid.nlm.nih.gov/devices/10806378034350.json", False
.send
Set json = JsonConverter.ParseJson(.responseText)
End With
With ws
For i = LBound(results) To UBound(results)
.Cells(i + 1, 1) = results(i)
.Cells(i + 1, 2).Value = IIf(json(results(i)), "Yes", "No")
Next
End With
End Sub
XML Parser:
Results also come as xml which you can parse with xml parser provided you handle the default namespace appropriately:
Option Explicit
Public Sub WriteOutYesNos()
Dim xmlDoc As Object, ws As Worksheet, results(), i As Long
Set xmlDoc = CreateObject("MSXML2.DOMDocument")
Set ws = ThisWorkbook.Worksheets("Sheet1")
results = Array("lotBatch", "serialNumber", "manufacturingDate")
With xmlDoc
.validateOnParse = True
.setProperty "SelectionLanguage", "XPath"
.setProperty "SelectionNamespaces", "xmlns:i='http://www.fda.gov/cdrh/gudid'"
.async = False
If Not .Load("https://accessgudid.nlm.nih.gov/devices/10806378034350.xml") Then
Err.Raise .parseError.ErrorCode, , .parseError.reason
Exit Sub
End If
End With
With ws
For i = LBound(results) To UBound(results)
.Cells(i + 1, 1) = results(i)
.Cells(i + 1, 2).Value = IIf(xmlDoc.SelectSingleNode("//i:" & results(i)).Text, "Yes", "No")
Next
End With
End Sub
Tinkered around and found it. I had to hardcode the results a little but here is what I got. Let me know if you've found a more elegant answer!
Sub GetInnerInformation(HTMLPage As MSHTML.HTMLDocument)
Dim HTMLResult As MSHTML.IHTMLElement
Dim HTMLResults As MSHTML.IHTMLElementCollection
Dim HTMLRow As MSHTML.IHTMLElement
Dim HTMLCell As MSHTML.IHTMLElement
Dim RowNum As Long, ColNum As Integer
Dim Lot As Boolean
Dim Serial As Boolean
Dim Expiration As Boolean
Set HTMLResults = HTMLPage.getElementsByClassName("expandable-device-content")
For Each HTMLResult In HTMLResults
If (HTMLResult.innerText Like "*Lot or Batch Number*") = True Then
Debug.Print HTMLResult.innerText
If HTMLResult.innerText Like "*Lot or Batch Number: Yes*" Then
Lot = True
End If
If HTMLResult.innerText Like "*Lot or Batch Number: No*" Then
Lot = False
End If
If HTMLResult.innerText Like "*Serial Number: Yes*" Then
Serial = True
End If
If HTMLResult.innerText Like "*Serial Number: No*" Then
Serial = False
End If
If HTMLResult.innerText Like "*Expiration Date: Yes*" Then
Serial = True
End If
If HTMLResult.innerText Like "*Expiration Date: No*" Then
Serial = False
End If
Debug.Print Lot, Serial, Expiration
End If
Next HTMLResult
End Sub

Runtime error being caused by trs in code

I am trying to pull cash flows for a bunch of companies in a folder I have created. I am pulling the information from market watch. An example of the websites I am pulling tables from is https://www.marketwatch.com/investing/stock/aapl/financials/cash-flow. All of the Ticker symbols for each company is in column A. My code is breaking on the following line with an error of "Runtime Error "91".
Set tRow = hTable.getElementsByTagName("tr")
I know that there are trs in the HTML code. Also, I ran the code for a couple of the companies) and then when I went to do it again the code never got past the first one (I didn't have the save and close functions in there the first time because I was testing it so I exited out of each of the workbooks I did and I didn't save them).
Public Sub Companies()
Dim sResponse As String, html As HTMLDocument, hTable As Object
Application.ScreenUpdating = False
Dim Last As Long
Dim i As Integer
Dim ws As Worksheet
Last = Cells(Rows.Count, "A").End(xlUp).Row
For i = Last To 572 Step -1
M = 0
Workbooks.Open "C:***\Desktop\Stock Portfolio\Stock Valuations\Temporary Valuations\" & Cells(i, "A").Value & ".xlsx"
ThisWorkbook.Activate
Set ws = Workbooks(Cells(i, "A").Value).Sheets.Add(After:= _
Workbooks(Cells(i, "A").Value).Sheets(Workbooks(Cells(i, "A").Value).Sheets.Count))
ws.Name = "Cash Flow"
ThisWorkbook.Activate
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.marketwatch.com/investing/stock/" & Cells(i, "A").Value & "/financials/cash-flow", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
ThisWorkbook.Activate
With html
.body.innerHTML = sResponse
Set hTable = .getElementsByTagName("tbody")(0)
WriteTable hTable, 1, Workbooks(Cells(i, "A").Value).Sheets("Cash Flow")
End With
ThisWorkbook.Activate
M = 3
With html
.body.innerHTML = sResponse
Set hTable = .getElementsByTagName("tbody")(1)
WriteTable hTable, 1, Workbooks(Cells(i, "A").Value).Sheets("Cash Flow")
End With
Workbooks(Cells(i, "A")).Save
Workbooks(Cells(i, "A")).Close
Next
End Sub
I used the code above and then I used the public code below (where the problem occurs) to get the table.
Public Sub WriteTable(ByVal hTable As Object, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
startRow = (M * 20) + 1
r = startRow
With ws
Set tRow = hTable.getElementsByTagName("tr")
For Each tr In tRow
r = r + 1: c = 1
Set tCell = tr.getElementsByTagName("td")
For Each td In tCell
.Cells(r, c).Value = td.innerText
c = c + 1
Next td
Next tr
End With
End Sub
Not the ideal answer but always inspect the response you are getting. Furthermmore, check whether hTable is nothing. If I inspect the response I notice the site is on the look out for bots and blocking with a captcha.
Pardon Our Interruption...
As you were browsing www.marketwatch.com something about your browser made us think you were a bot. There are a few reasons this
might happen:
You're a power user moving through this website with super-human
speed. You've disabled JavaScript in your web browser. A third-party
browser plugin, such as Ghostery or NoScript, is preventing JavaScript
from running. Additional information is available in this support
article.
After completing the CAPTCHA below, you will immediately regain access
to www.marketwatch.com.
If this is indeed the case for you, you have a few options:
1) Search for an alternative source of the info
2) Use browser automation (selenium basic) and hope that this alone, or with some appropriate waits may get you there
3) Change IP and user agent. If you were able to originally run XHR against this page then it may be that now you have been added to a watch list for suspected bots by the site. Alternating IP and user-agent is not something I would do.

Retrieving all Excel file links from a webpage

I'm trying to get all the downloadable Excel file links from the website, but having difficulty. Please help to guide me. Thanks.
Sub TYEX()
Dim internet As Object
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
URL = "https://www.jpx.co.jp/markets/public/short-selling/index.html"
internet.Navigate URL
Do Until internet.ReadyState >= 4
DoEvents
Loop
Application.Wait Now + TimeSerial(0, 0, 5)
Set internetdata = internet.Document
Set div_result = internetdata.getElementById("readArea")
Set header_links = div_result.getElementsByTagName("td")
For Each h In header_links
Set link = h.ChildNodes.item(0)
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = link.href
Next
MsgBox "done"
End Sub
You had the idea down correctly, but here's a different approach:
Sub TYEX()
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.navigate "https://www.jpx.co.jp/markets/public/short-selling/index.html"
.Visible = True
Do While .Busy Or .readyState < 4
DoEvents
Loop
Dim doc As Object, tbl As Object
Set doc = .document
Set tbl = doc.getElementsByClassName("component-normal-table")(0).Children(0)
Dim r As Long, xlsArr(), a As Object
With tbl.Rows
ReDim xlsArr(1 To .Length - 1)
For r = 1 To .Length - 1 ' 0 is the table header
xlsArr(r) = .Item(r).Children(1).innerHTML
Next r
End With
With CreateObject("VBScript.RegExp")
.Pattern = "<a href=""(\/markets.*?\.xls)"
For r = 1 To UBound(xlsArr)
xlsArr(r) = "https://www.jpx.co.jp" & .Execute(xlsArr(r))(0).SubMatches(0)
Debug.Print xlsArr(r)
Next
End With
End With
'Add to sheet
Dim ws As Worksheet, rng As Range
Set ws = ThisWorkbook.Worksheets(1)
With ws
Set rng = .Range(.Cells(NextRow(ws), 1), .Cells(NextRow(ws) + UBound( _
xlsArr) - 1, 1))
rng.Value = Application.Transpose(xlsArr)
End With
End Sub
Public Function NextRow(ByVal ws As Worksheet, Optional ByVal col As Variant = 1) As Long
With ws
NextRow = .Cells(.Rows.Count, col).End(xlUp).Row + 1
End With
End Function
Breaking Down the Code
This will loop your html table rows. We start at 1, because 0 is actually just the table header.
With tbl.Rows
ReDim xlsArr(1 To .Length - 1)
For r = 1 To .Length - 1 ' 0 is the table header
xlsArr(r) = .Item(r).Children(1).innerHTML
Next r
End With
This uses regular expressions to extract the url from the innerHTML property. You can see how this particular regex works here: Regex101
With CreateObject("VBScript.RegExp")
.Pattern = "<a href=""(\/markets.*?\.xls)"
For r = 1 To UBound(xlsArr)
xlsArr(r) = "https://www.jpx.co.jp" & .Execute(xlsArr(r))(0).SubMatches(0)
Debug.Print xlsArr(r)
Next
End With
You will size your range to be the same size as your array that contains the links, then write the array to the worksheet. This is usually much faster than writing cells one-by-one.
'Add to sheet
Dim ws As Worksheet, rng As Range
Set ws = ThisWorkbook.Worksheets(1)
With ws
Set rng = .Range(.Cells(NextRow(ws), 1), .Cells(NextRow(ws) + UBound( _
xlsArr) - 1, 1))
rng.Value = Application.Transpose(xlsArr)
End With
You can use an attribute = value CSS selector with $ operator to say the href value must end with .xls. Then use querySelectorAll to retrieve all matched results. Using CSS selectors is a very fast and generally robust method.
Dim list As Object
Set list = ie.document.querySelectorAll("[href$='.xls']")
It is much faster to use XMLHTTP as well, rather than opening IE. Note that you can then pass these links to a function to perform a binary download or to URLMon for downloading.
Option Explicit
Public Sub Links()
Dim sResponse As String, html As HTMLDocument, list As Object, i As Long
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.jpx.co.jp/markets/public/short-selling/index.html", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
Set html = New HTMLDocument
With html
.body.innerHTML = sResponse
Set list = html.querySelectorAll("[href$='.xls']")
End With
For i = 0 To list.Length - 1
Debug.Print Replace$(list.item(i), "about:", "https://www.jpx.co.jp")
Next
End Sub
Example download function (though you could re-use your existing XMLHTTP object - this is just to illustrate):
Public Function DownloadFile(ByVal downloadFolder As String, ByVal downloadURL As String) As String
Dim http As Object , tempArr As Variant
Set http = CreateObject("WinHttp.WinHttpRequest.5.1")
http.Open "GET", downloadURL, False
http.send
On Error GoTo errhand
With CreateObject("ADODB.Stream")
.Open
.Type = 1
.write http.responseBody
tempArr = Split(downloadURL, "/")
tempArr = tempArr(UBound(tempArr))
.SaveToFile downloadFolder & tempArr, 2 '< "/" on enter of downloadFolder. 2 for overwrite which is Ok if no file modifications.
.Close
End With
DownloadFile = downloadFolder & tempArr
Exit Function
errhand:
If Err.Number <> 0 Then
Debug.Print Err.Number, Err.Description
MsgBox "Download failed"
End If
DownloadFile = vbNullString
End Function
References (VBE > Tools > References):
Microsoft HTML Object Library

Data not updated from HTML Table to Excel

I have the following error, the data is updated in the webpage, but it is not in Excel. I use Application.OnTime to refresh the webpage.
Below are the code
Sub RefreshAction()
Dim htm As Object
Dim Tr As Object
Dim Td As Object
Dim Tab1 As Object
Dim URL As String
Dim Colstart As Long
Dim HTML As Variant
Dim i As Long
Dim j As Long
Dim n As Long
Dim ss As Long
Application.ScreenUpdating = False
URL = "http://register.um.edu.my/kok_kosong_bi.asp"
Set HTML = CreateObject("htmlfile") 'Create HTMLFile Object
With CreateObject("msxml2.xmlhttp") 'Get the WebPage Content
.Open "GET", URL, False
.send
HTML.Body.Innerhtml = .responseText
End With
Colstart = 1
j = 1
i = Colstart
n = 0
'Loop Through website tables
For Each Tab1 In HTML.getElementsByTagName("table")
With HTML.getElementsByTagName("table")(n)
For Each Tr In .Rows
For Each Td In Tr.Cells
Sheet1.Cells(j, i) = Td.innerText
i = i + 1
Next Td
i = Colstart
j = j + 1
Next Tr
End With
n = n + 1
i = Colstart
j = j + 1
Next Tab1
Application.ScreenUpdating = True
Application.EnableEvents = True
Debug.Print Now() + TimeValue("00:00:05")
Application.OnTime Now() + TimeValue("00:00:05"), "RefreshAction", Schedule = True
End Sub
The snapshot
As per the snapshot, the website has 7 row, but excel only capture 5 row only. I have tried every possible way, still can't find the reason. I am expecting to clear the web cache, but I could not find the reference to do so.
The numbers change on the website. It was 6 rows when I first looked, then 5, and then later 6 again.
Your code is fine, but you need Schedule:=True rather than Schedule = True (typo?), and do you really need to loop all tables? You could also Dim HTML As Object.
I think the website is rather sketchy, to be honest, if exhibiting this type of inconsistency in results.
An easy way to get all rows, at any given time, is to simply copy paste the entire table as shown below. You can link that in with your refresh code.
Option Explicit
Public Sub GetTable()
Dim sResponse As String, html As New HTMLDocument, clipboard As Object
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://register.um.edu.my/kok_kosong_bi.asp", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
html.body.innerHTML = sResponse
With ThisWorkbook.Worksheets("Sheet1")
.Cells.ClearContents
.Cells.ClearFormats
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
clipboard.SetText html.getElementsByTagName("table")(3).outerHTML
clipboard.PutInClipboard
.Cells(1, 1).PasteSpecial
End With
End Sub