file download and open through VBA - html

I want to download the Excel file attached to html via Excel vba and output it in Excel sheet. This homepage is a list of the current status of the box office that has been popular in Korean movie theaters.
http://www.kobis.or.kr/kobis/business/stat/boxs/findFormerBoxOfficeList.do?loadEnd=0&searchType=search&sMultiMovieYn=&sRepNationCd=&sWideAreaCd=
Attached file is Excel. I understand that downloading can be done using the click method through Internet search. However, an alert window appears during downloading of the file, and the date is inserted in the name of the Excel file to be downloaded. As a beginner in Excel VBA it is very difficult. So I left this question, and what logic would it be useful to implement to spread this file on an Excel sheet? I am a beginner in Excel VBA so if you give me a detailed answer, it will be really helpful.
<p class = "btn_regi">
<a href="#none" class="btn_type01" onclick="chkform('excel'); return false ;">
<strong> Excel </ strong> </a>
</ p>
The following logic I have coded myself until dawn. However, the logic was too inefficient and the results did not work so I asked for help.
Sub program_()
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
Dim bridge As String
Dim WinHttp As New WinHttpRequest
Dim sResponse As String, html As New HTMLDocument, hStructure As Object, hTable As HTMLTable
Dim ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set WinHttp = CreateObject("WinHttp.WinHttpRequest.5.1")
Dim Url As String
Url = "http://www.kobis.or.kr/kobis/business/stat/boxs/findFormerBoxOfficeList.do?"
Dim p1 As String 'parameter
Dim v1 As String
Dim p2 As String
Dim v2 As String
Dim p3 As String
Dim v3 As String
Dim p4 As String
Dim v4 As String
Dim p5 As String
Dim v5 As String
Dim v As Integer
Dim g As Integer
bridge = "&"
p1 = "loadEnd="
v1 = 0
p2 = "searchType="
v2 = "search"
p3 = "sMultiMovieYn="
v3 = ""
p4 = "sRepNationCd="
v4 = ""
p5 = "sWideAreaCd="
v5 = ""
With WinHttp
.Open "get", "" & Url & p1 & v1 & bridge & p2 & v2 & bridge & p3 & v3 & bridge & p4 & v4 & bridge & p5 & v5 & ""
.SetRequestHeader "Referer", "http://www.kobis.or.kr/kobis/business/stat/boxs/findFormerBoxOfficeList.do?"
.SetRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.Send
.WaitForResponse ': DoEvents
sResponse = StrConv(.responseBody, vbUnicode)
Dim hforms As HTMLFormElement
With html
.body.innerHTML = sResponse
sResponse = ""
Set hTable = .getElementsByClassName("boardList03")(0)
End With
Dim Arr0() As Variant
Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
r = 0
With ws
Set tRow = hTable.getElementsByTagName("tbody")(0).getElementsByTagName("tr")
ReDim Arr0(tRow.Length - 1, 10)
For Each tr In tRow
r = r + 1
Set tCell = tr.getElementsByTagName("td")
Dim j As Integer
c = 1
For Each td In tCell
If td.ID = "td_rank" Then
Arr0(r - 1, 0) = td.innerText
End If
If td.ID = "td_movie" Then
Arr0(r - 1, 1) = td.getElementsByTagName("a")(0).innerText
End If
If td.ID = "td_openDt" Then
Arr0(r - 1, 2) = td.innerText
End If
If td.ID = "td_salesAcc" Then
Arr0(r - 1, 3) = td.innerText
End If
If td.ID = "td_audiAcc" Then
Arr0(r - 1, 4) = td.innerText
End If
If td.ID = "td_scrnCnt" Then
Arr0(r - 1, 5) = td.innerText
End If
If td.ID = "td_showCnt" Then
Arr0(r - 1, 6) = td.innerText
End If
c = c + 1
Next td
Next tr
Dim k As Integer
Dim i As Integer
k = 0
For i = LBound(Arr0, 1) To UBound(Arr0, 1)
.Cells(2 + k + g, 2) = Arr0(i, 0)
.Cells(2 + k + g, 3) = Arr0(i, 1)
.Cells(2 + k + g, 4) = Arr0(i, 2)
.Cells(2 + k + g, 5) = Arr0(i, 3)
.Cells(2 + k + g, 6) = Arr0(i, 4)
.Cells(2 + k + g, 7) = Arr0(i, 5)
.Cells(2 + k + g, 8) = Arr0(i, 6)
k = k + 1
Next i
End With
Erase Arr0
Set tRow = Nothing: Set tCell = Nothing: Set tr = Nothing: Set td = Nothing
Set hforms = Nothing
Set hTable = Nothing
Application.Calculation = xlCalculationAutomatic
Application.ScreenUpdating = True
End Sub

You could just grab the table by its id and then loop the table rows and table cells within rows.
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, hTable As HTMLTable
Const MAX_WAIT_SEC As Long = 5
Application.ScreenUpdating = False
With IE
.Visible = True
.navigate "http://www.kobis.or.kr/kobis/business/stat/boxs/findFormerBoxOfficeList.do?loadEnd=0&searchType=search&sMultiMovieYn=&sRepNationCd=&sWideAreaCd="
While .Busy Or .readyState < 4: DoEvents: Wend
Set hTable = .document.getElementById("table_former")
WriteTable hTable, 1, ThisWorkbook.Worksheets("Sheet1")
.Quit
Application.ScreenUpdating = True
End With
End Sub
Public Sub WriteTable(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tSection As Object, tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long, tBody As Object
r = startRow
With ws
Dim headers As Object, header As Object, columnCounter As Long
Set headers = hTable.getElementsByTagName("th")
For Each header In headers
columnCounter = columnCounter + 1
.Cells(startRow, columnCounter) = header.innerText
Next header
startRow = startRow + 1
Set tBody = hTable.getElementsByTagName("tbody")
For Each tSection In tBody 'HTMLTableSection
Set tRow = tSection.getElementsByTagName("tr") 'HTMLTableRow
For Each tr In tRow
r = r + 1
Set tCell = tr.getElementsByTagName("td")
c = 1
For Each td In tCell 'DispHTMLElementCollection
.Cells(r, c).Value = td.innerText 'HTMLTableCell
c = c + 1
Next td
Next tr
Next tSection
End With
End Sub
References (VBE> Tools > References):
Microsoft HTML Object Library
Microsoft Internet Controls

Related

web scraping from google page no longer returns anything

The below Excel vba code use to work and return the market cap, 52 week low and current price into my spreadsheet. It no longer does though and cannot work out why. Class names haven't changed but the getElementsByClassName doesn't seem to return anything anymore I think.
Sub get_title_header()
Dim wb As Object
Dim doc As Object
Dim sURL As String
Dim i As Integer
Dim allElements As IHTMLElementCollection
Application.DisplayAlerts = False
Set wb = CreateObject("internetExplorer.Application")
sURL = "https://www.google.com/search?q=aapl+stock+quote"
wb.navigate sURL
wb.Visible = False
While wb.Busy
Application.Wait Now + #12:00:01 AM#
DoEvents
Wend
Set doc = wb.document.body
On Error GoTo err_clear
Set allElements = doc.getElementsByClassName("iyjjgb")
x = allElements(3).innerText
Sheet6.Cells(i + 1, 2).Value = x
x = allElements(8).innerText
Sheet6.Cells(i + 1, 3).Value = x
x = ""
x = allElements(0).innerText
Sheet6.Cells(i + 1, 4).Value = x
x = ""
err_clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
wb.Quit
End Sub

HTML Page update for every Dropbox change event

I want to pull calls and puts data from NSE wesbite,In the website, there are 2 dropdown boxes available (Refer the image).For every dropdown change event,the table value changes.
https://www.nseindia.com/option-chain
I am successfully able to update the HTML table values for the first drop down fields using HTML events,
and i am using the same "HTML event" to update the table values when the second drop down index changes,but could not get the work done,can one please help?
I am giving the source code also,please suggess
'Option Data Pulling
Sub Options_Pull_Active_List()
Dim Opt_Url As String
Opt_Url = ActiveSheet.Range("G3").Value
Dim ie As New InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
ie.Top = 0
ie.Left = 0
ie.Width = 1000
ie.Height = 750
ie.AddressBar = 0
ie.StatusBar = 0
ie.Toolbar = 0
ie.navigate Opt_Url
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim doc As HTMLDocument
Set doc = ie.document
Dim objElement As HTMLObjectElement
Dim sDD As String
doc.Focus
Dim k1, k2, Opto, Sana As Integer
k1 = ActiveSheet.Range("AL7").Value
k2 = ActiveSheet.Range("AL8").Value
For Opto = k1 To k2
ActiveSheet.Range("AK10").Value = Opto
Sana = ActiveSheet.Range("AL10").Value
'instrumentType
doc.getElementById("select_symbol").Focus 'Setting Instrument Type
Application.Wait Now + TimeValue("00:00:04")
doc.getElementById("select_symbol").selectedIndex = Sana
Dim objEvent
Set objEvent = doc.createEvent("HTMLEvents")
objEvent.initEvent "change", False, True
doc.getElementById("select_symbol").dispatchEvent objEvent 'This code successfully able to updat the HTML page
Application.Wait Now + TimeValue("00:00:05")
doc.getElementById("expirySelect").selectedIndex = 2
doc.getElementById("expirySelect").dispatchEvent objEvent 'This same code does not update the table values
'-------------------------
Application.Wait Now + TimeValue("00:00:03")
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
'------------------
Dim hTable As Object, hBody As Object, hTR As Object, hTD As Object
Dim tb As Object, bb As Object, tr As Object, td As Object
Dim i As Integer
y = 5 ' 'Column A in Excel
Z = 25 ' 'Row 1 in Excel
Set hTable = doc.getElementsByTagName("Table")
'Set hTable = doc.getElementById("historicalData")
For Each tb In hTable
Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each tr In hTR
Set hTD = tr.getElementsByTagName("td")
y = 5 ' Resets back to Default Column Set in the Excel Sheet
For i = 0 To hTD.Length
Sheets("M1 - Option Data Pull").Cells(Z, y).Value = hTD(i).innerText
If Trim(Sheets("M1 - Option Data Pull").Cells(Z, y).Value) = "-" Then
Sheets("M1 - Option Data Pull").Cells(Z, y).Value = 0
End If
If i = 11 Then
If hTD(i + 1).getAttribute("class") = "bg-yellow" And Sheets("M1 - Option Data Pull").Range("O1").Value = 0 Then
Sheets("M1 - Option Data Pull").Range("O1").Value = 1
Sheets("M1 - Option Data Pull").Range("O2").Value = hTD(i).innerText
End If
End If
y = y + 1
Next i
DoEvents
Z = Z + 1
Next tr
Exit For
Next bb
Exit For
Next tb
'-------------------
Next Opto
ie.Quit
ie.Visible = True
Set doc = Nothing
Set ie = Nothing
Application.DisplayAlerts = True
Application.ScreenUpdating = True
Application.EnableEvents = True
End Sub

How to extract values from nested divs using VBA

I have looked at the solution provided in this link Extract Table from Webpage in Excel using VBA and it was very helpful. But I need to extract the values in the div classes (cscore_score) and not a table Please refer to image below
The URL is: https://www.espncricinfo.com/scores
The div class is: cscore_score
The scores to extract is in nested divs. The sample data for each nested div I want to extract is like Country and Score i.e INDIA and in the next column "416..." into the Excel sheet.
Here's a screenshot of the table structure:
Public Sub GetInfo()
Const URL As String = "https://www.espncricinfo.com/scores"
Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, headers()
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
html.body.innerHTML = .responseText
End With
Set hDiv = html.querySelector("div.cscore")
Dim ul As Object, div As Object, r As Long, c As Long
r = 1
With ws
For Each div In hDiv.getElementsByClassName("cscore_link")
r = r + 1: c = 1
If r > 3 Then
For Each ul In div.getElementsByClassName("cscore_score")
.Cells(r - 2, c) = IIf(c = 2, "'" & div.innerText, div.innerText)
c = c + 1
Next
End If
Next
End With
End Sub
I would be grateful to receive any help to extract those scores from each div into the sheet.
You could use faster css selectors (using only class is faster than tag/type) which if used as shown below will allow you to also reduce your code complexity and improve performance by having only a single loop. Results can then be stored in an array and written out in one go - again another efficiency gain.
Note I am ensuring scores remain correctly formatted on output by concatenating "'" in front.
If you want scores for same match on same row:
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
ReDim results(1 To countries.Length / 2, 1 To 4)
For i = 0 To countries.Length - 1 Step 2
results(r, 1) = countries.item(i).innerText: results(r, 2) = "'" & scores.item(i).innerText
results(r, 3) = countries.item(i + 1).innerText: results(r, 4) = "'" & scores.item(i + 1).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 4) = Array("Home", "Score", "Away", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Scores on different rows for every team:
Option Explicit
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
ReDim results(1 To countries.Length, 1 To 2)
For i = 0 To countries.Length - 1
results(i + 1, 1) = countries.item(i).innerText: results(i + 1, 2) = "'" & scores.item(i).innerText
Next
ws.Cells(1, 1) = "Country": ws.Cells(1, 2) = "Score"
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Additional column:
Public Sub GetData()
Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object
Dim descs As Object, results(), i As Long, r As Long
Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.espncricinfo.com/scores", False
.send
html.body.innerHTML = .responseText
End With
Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
Set descs = html.querySelectorAll(".cscore--watchNotes .cscore_info-overview")
ReDim results(1 To countries.Length / 2, 1 To 5)
For i = 0 To countries.Length - 1 Step 2
results(r, 1) = descs.Item(i / 2).innerText
results(r, 2) = countries.Item(i).innerText: results(r, 3) = "'" & scores.Item(i).innerText
results(r, 4) = countries.Item(i + 1).innerText: results(r, 5) = "'" & scores.Item(i + 1).innerText
r = r + 1
Next
ws.Cells(1, 1).Resize(1, 5) = Array("Desc", "Home", "Score", "Away", "Score")
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
Your request seems to be just fine. Parsing the HTML is where your problem is.
You could do something like the following (you can ignore the request part):
Option Explicit
Sub espn()
Dim req As New WinHttpRequest
Dim HTMLDocument As New HTMLDocument
Dim listElement As HTMLUListElement
Dim listItem As HTMLLIElement
Dim sht As Worksheet
Dim i As Long
Dim j As Long
Dim url As String
url = "https://www.espncricinfo.com/scores"
With req
.Open "GET", url, False
.send
HTMLDocument.body.innerHTML = .responseText
End With
Set sht = ThisWorkbook.Worksheets("Name of your worksheet")
i = 2
For Each listElement In HTMLDocument.getElementsByClassName("cscore_competitors")
j = 1
For Each listItem In listElement.getElementsByTagName("li")
sht.Cells(i, j) = listItem.getElementsByClassName("cscore_name cscore_name--long")(0).innerText
sht.Cells(i, j + 1) = listItem.getElementsByClassName("cscore_score")(0).innerText
j = j + 2
Next listItem
i = i + 1
Next listElement
End Sub
The results would look like so:
Basically each game is represented by a ul (unnumbered list) element which consists of two li elements which contain the info about the names and the score.

Scraping the data from list of href link?

I am trying to scrap a list of href link from a webpage, and then trying to scrap the value out of it. I am now facing the problem which the code only can handle up to 5 links. If the links more than 5, it will show runtime error on random line.
I am extracting the href link from these webpage:http://www.bursamalaysia.com/market/listed-companies/company-announcements/#/?category=SH&sub_category=all&alphabetical=All&date_from=28/09/2018
Option Explicit
Sub ScrapLink()
Dim IE As New InternetExplorer, html As HTMLDocument
Application.ScreenUpdating = False
With IE
IE.Visible = False
IE.navigate Cells(1, 1).Value
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 3)
Application.StatusBar = "Trying to go to website?"
DoEvents
Dim links As Object, i As Long
Set links = .document.querySelectorAll("#bm_ajax_container [href^='/market/listed-companies/company-announcements/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
.Cells(i + 1, 1) = links.item(i - 1)
End With
Next i
.Quit
End With
End Sub
Public Sub GetInfo()
Dim IE As New InternetExplorer, headers(), u As Long, resultCollection As Collection
headers = Array("URL", "Name", "No", "Date of change", "# Securities", "Type of Transaction", "Nature of Interest")
Set resultCollection = New Collection
Dim links()
links = Application.Transpose(ThisWorkbook.Worksheets("Sheet1").Range("A2:A100"))
With IE
.Visible = True
For u = LBound(links) To UBound(links)
If InStr(links(u), "http") > 0 Then
.navigate links(u)
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 2)
Dim data As Object, title As Object
With .document.getElementById("bm_ann_detail_iframe").contentDocument
Set title = .querySelector(".formContentData")
Set data = .querySelectorAll(".ven_table tr")
End With
Dim results(), numberOfRows As Long, i As Long, currentRow As Object, td As Object, c As Long, r As Long
numberOfRows = Round(data.Length / 4, 0)
ReDim results(1 To numberOfRows, 1 To 7)
For i = 0 To numberOfRows - 1
r = i + 1
results(r, 1) = links(u): results(r, 2) = title.innerText
Set currentRow = data.item(i * 4 + 1)
c = 3
For Each td In currentRow.getElementsByTagName("td")
results(r, c) = Replace$(td.innerText, "document.write(rownum++);", vbNullString)
c = c + 1
Next td
Next i
resultCollection.Add results
Set data = Nothing: Set title = Nothing
End If
Next u
.Quit
End With
Dim ws As Worksheet, item As Long
If Not resultCollection.Count > 0 Then Exit Sub
If Not Evaluate("ISREF('Results'!A1)") Then '<==Credit to #Rory for this test
Set ws = Worksheets.Add
ws.NAME = "Results"
Else
Set ws = ThisWorkbook.Worksheets("Results")
ws.Cells.Clear
End If
Dim outputRow As Long: outputRow = 2
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For item = 1 To resultCollection.Count
Dim arr()
arr = resultCollection(item)
For i = LBound(arr, 1) To UBound(arr, 1)
.Cells(outputRow, 1).Resize(1, 7) = Application.WorksheetFunction.Index(arr, i, 0)
outputRow = outputRow + 1
Next
Next
End With
End Sub
Discussion:
The problem is likely, at least from my testing, due to one of the links not having the table Details of changes, so the numberOfRows variable is set to 0, and this line:
ReDim results(1 To numberOfRows, 1 To 7)
fails with an index error as you have (1 To 0, 1 To 7).
Using this link in A1 there are 30 URLs retrieved. This retrieved link does not have that table whereas the others do.
You have a choice of how to handle this scenario. Here are some example options:
Option 1: Only process the page if the numberOfRows > 0. This is the example I give.
Option 2: Have a Select Case with numberOfRows and if Case 0 then handle page in one way, Case Else handle as normal.
Note:
1) You also want to reset the status bar with:
Application.StatusBar = False
2) I temporarily fixed the links range for testing with:
ThisWorkbook.Worksheets("Sheet1").Range("A2:A31")
TODO:
Refactor to be more modular and run the whole process with the same IE instance. Creating a class to hold the IE object would be a good idea. Provide it with methods for extracting your data, testing number of result rows etc.
Add some basic error handling, for example, to handle failed website connection.
Example handling using test of numberOfRows > 0:
Option Explicit
Sub ScrapeLink()
Dim IE As New InternetExplorer
Application.ScreenUpdating = False
With IE
IE.Visible = True
IE.navigate Cells(1, 1).Value
While .Busy Or .readyState < 4: DoEvents: Wend
' Application.Wait Now + TimeSerial(0, 0, 3)
Application.StatusBar = "Trying to go to website?"
DoEvents
Dim links As Object, i As Long
Set links = .document.querySelectorAll("#bm_ajax_container [href^='/market/listed-companies/company-announcements/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
.Cells(i + 1, 1) = links.item(i - 1)
End With
Next i
.Quit
End With
Application.StatusBar = false
End Sub
Public Sub GetInfo()
Dim IE As New InternetExplorer, headers(), u As Long, resultCollection As Collection
headers = Array("URL", "Name", "No", "Date of change", "# Securities", "Type of Transaction", "Nature of Interest")
Set resultCollection = New Collection
Dim links()
links = Application.Transpose(ThisWorkbook.Worksheets("Sheet1").Range("A2:A31")) '<== I have fixed the range here for testing
With IE
.Visible = True
For u = LBound(links) To UBound(links)
If InStr(links(u), "http") > 0 Then
.navigate links(u)
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 2)
Dim data As Object, title As Object
With .document.getElementById("bm_ann_detail_iframe").contentDocument
Set title = .querySelector(".formContentData")
Set data = .querySelectorAll(".ven_table tr")
End With
Dim results(), numberOfRows As Long, i As Long, currentRow As Object, td As Object, c As Long, r As Long
numberOfRows = Round(data.Length / 4, 0)
If numberOfRows > 0 Then
ReDim results(1 To numberOfRows, 1 To 7)
For i = 0 To numberOfRows - 1
r = i + 1
results(r, 1) = links(u): results(r, 2) = title.innerText
Set currentRow = data.item(i * 4 + 1)
c = 3
For Each td In currentRow.getElementsByTagName("td")
results(r, c) = Replace$(td.innerText, "document.write(rownum++);", vbNullString)
c = c + 1
Next td
Next i
resultCollection.Add results
Set data = Nothing: Set title = Nothing
End If
End If
Next u
.Quit
End With
Dim ws As Worksheet, item As Long
If Not resultCollection.Count > 0 Then Exit Sub
If Not Evaluate("ISREF('Results'!A1)") Then '<==Credit to #Rory for this test
Set ws = Worksheets.Add
ws.NAME = "Results"
Else
Set ws = ThisWorkbook.Worksheets("Results")
ws.Cells.Clear
End If
Dim outputRow As Long: outputRow = 2
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For item = 1 To resultCollection.Count
Dim arr()
arr = resultCollection(item)
For i = LBound(arr, 1) To UBound(arr, 1)
.Cells(outputRow, 1).Resize(1, 7) = Application.WorksheetFunction.Index(arr, i, 0)
outputRow = outputRow + 1
Next
Next
End With
End Sub
Sample results:

When importing data from the web, how do I get the data with links?

This code (like other codes) written by Precious #QHarr works well. However, when importing data, I want to retrieve the data saved in the connection. The output of the code and the data I want to receive are shown in the attached image. What kind of code can I solve? (Google Translate)
Public Sub DYarislar()
Dim asays(), ws As Worksheet, asay As Long, html As HTMLDocument
Dim http As clsHTTP, url As String, headers(), numberOfRequests As Long
headers = Array("Asay", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("X")
Set html = New HTMLDocument
asays = Application.Transpose(Sheets("Y").Range("A2:A" & Sheets("Y").Columns("A:A").Find(What:="boş").Row - 1).Value)
Const numTableRows As Long = 250
Const numTableColumns As Long = 14
Const BASE_URL As String = "https://yenibeygir.com/at/"
numberOfRequests = UBound(asays)
Dim results(), headerRow As Boolean, tRows As Object, tRow As Object, iRow As Long
Dim tCells As Object, tCell As Object, r As Long, c As Long, hTable As HTMLTable
ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)
Application.ScreenUpdating = False
For asay = 1 To numberOfRequests
headerRow = True
url = BASE_URL & asays(asay)
html.body.innerHTML = http.GetString(url)
Set hTable = html.querySelector(".at_Yarislar")
Set tRows = hTable.getElementsByTagName("tr")
Const numberOfRaces As Long = 22
Dim counter As Long
counter = 1
For Each tRow In tRows
If Not headerRow Then
counter = counter + 1
If counter > numberOfRaces Then Exit For
c = 2: r = r + 1
results(r, 1) = asays(asay)
Set tCells = tRow.getElementsByTagName("td")
For Each tCell In tCells
results(r, c) = tCell.innerText
c = c + 1
Next
End If
headerRow = False
Next
Next
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
Application.ScreenUpdating = True
End Sub
You only need a few small changes. You use the same Class, clsHTTP, as before, then use the module 1 code below.
Notes:
Within each source page table row the jockey column contains an a tag link element
You can access this using:
tRow.getElementsByTagName("a")(1).href
As the link is relative you need to do a text replacement to add in the base part of the URL i.e.
Replace$(tRow.getElementsByTagName("a")(1).href, "about:", BASE_URL2)
The id is part of the href and can be extracted using Split:
Split(tRow.getElementsByTagName("a")(1).href, "/")(2)
To allow for these additional elements in the results you need to increase the number of output column count:
Const numTableColumns As Long = 16
And adapt your table rows loop to populate the additional columns:
results(r, 2) = Split(tRow.getElementsByTagName("a")(1).href, "/")(2)
results(r, 3) = Replace$(tRow.getElementsByTagName("a")(1).href, "about:", BASE_URL2)
Also, adjust in the loop to ensure other columns are populated from the 4th onwards (as 2 extra columns):
c = 4
Finally, adjust your headers to include the 2 new columns:
headers = Array("Asay", "JokeyId", "JokeyLink", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
VBA:
Module 1:
Option Explicit
Public Sub DYarislar()
Dim asays(), ws As Worksheet, asay As Long, html As HTMLDocument
Dim http As clsHTTP, url As String, headers(), numberOfRequests As Long
headers = Array("Asay", "JokeyId", "JokeyLink", "Tarih", "Sehir", "K.Cinsi", "Gr", "Msf/Pist", "Derece", "S", "Jokey", "Kilo", "G.Ç", "Hnd", "Gny", "Taki")
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("X")
Set html = New HTMLDocument
asays = Application.Transpose(Sheets("Y").Range("A2:A" & Sheets("Y").Columns("A:A").Find(What:="boş").Row - 1).Value)
Const numTableRows As Long = 250
Const numTableColumns As Long = 16
Const BASE_URL As String = "https://yenibeygir.com/at/"
Const BASE_URL2 As String = "https://yenibeygir.com"
numberOfRequests = UBound(asays)
Dim results(), headerRow As Boolean, tRows As Object, tRow As Object, iRow As Long
Dim tCells As Object, tCell As Object, r As Long, c As Long, hTable As HTMLTable
ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)
Application.ScreenUpdating = False
For asay = 1 To numberOfRequests
headerRow = True
url = BASE_URL & asays(asay)
html.body.innerHTML = http.GetString(url)
Set hTable = html.querySelector(".at_Yarislar")
Set tRows = hTable.getElementsByTagName("tr")
For Each tRow In tRows
If Not headerRow Then
c = 4: r = r + 1
results(r, 1) = asays(asay)
On Error Resume Next
results(r, 2) = Split(tRow.getElementsByTagName("a")(1).href, "/")(2)
results(r, 3) = Replace$(tRow.getElementsByTagName("a")(1).href, "about:", BASE_URL2)
On Error GoTo 0
Set tCells = tRow.getElementsByTagName("td")
For Each tCell In tCells
results(r, c) = tCell.innerText
c = c + 1
Next
End If
headerRow = False
Next
Next
With ws
.Cells(1, 3).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 3).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
Application.ScreenUpdating = True
End Sub
Sample results: