Data not updated from HTML Table to Excel - html

I have the following error, the data is updated in the webpage, but it is not in Excel. I use Application.OnTime to refresh the webpage.
Below are the code
Sub RefreshAction()
Dim htm As Object
Dim Tr As Object
Dim Td As Object
Dim Tab1 As Object
Dim URL As String
Dim Colstart As Long
Dim HTML As Variant
Dim i As Long
Dim j As Long
Dim n As Long
Dim ss As Long
Application.ScreenUpdating = False
URL = "http://register.um.edu.my/kok_kosong_bi.asp"
Set HTML = CreateObject("htmlfile") 'Create HTMLFile Object
With CreateObject("msxml2.xmlhttp") 'Get the WebPage Content
.Open "GET", URL, False
.send
HTML.Body.Innerhtml = .responseText
End With
Colstart = 1
j = 1
i = Colstart
n = 0
'Loop Through website tables
For Each Tab1 In HTML.getElementsByTagName("table")
With HTML.getElementsByTagName("table")(n)
For Each Tr In .Rows
For Each Td In Tr.Cells
Sheet1.Cells(j, i) = Td.innerText
i = i + 1
Next Td
i = Colstart
j = j + 1
Next Tr
End With
n = n + 1
i = Colstart
j = j + 1
Next Tab1
Application.ScreenUpdating = True
Application.EnableEvents = True
Debug.Print Now() + TimeValue("00:00:05")
Application.OnTime Now() + TimeValue("00:00:05"), "RefreshAction", Schedule = True
End Sub
The snapshot
As per the snapshot, the website has 7 row, but excel only capture 5 row only. I have tried every possible way, still can't find the reason. I am expecting to clear the web cache, but I could not find the reference to do so.

The numbers change on the website. It was 6 rows when I first looked, then 5, and then later 6 again.
Your code is fine, but you need Schedule:=True rather than Schedule = True (typo?), and do you really need to loop all tables? You could also Dim HTML As Object.
I think the website is rather sketchy, to be honest, if exhibiting this type of inconsistency in results.
An easy way to get all rows, at any given time, is to simply copy paste the entire table as shown below. You can link that in with your refresh code.
Option Explicit
Public Sub GetTable()
Dim sResponse As String, html As New HTMLDocument, clipboard As Object
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://register.um.edu.my/kok_kosong_bi.asp", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
html.body.innerHTML = sResponse
With ThisWorkbook.Worksheets("Sheet1")
.Cells.ClearContents
.Cells.ClearFormats
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
clipboard.SetText html.getElementsByTagName("table")(3).outerHTML
clipboard.PutInClipboard
.Cells(1, 1).PasteSpecial
End With
End Sub

Related

Question about extracting text from a specific website and printing it in excel using VBA

The webpage is "https://www.racingandsports.com/thoroughbred/jockey/jake-bayliss/27461". Below is the VBA code and questions.
Edit1: For QHarr. the way its set up it takes the data from the first 3 columns of the table. The xmlhttp is just there because I copy pasted it from a previous VBA I was working on.
Edit2: Thank you for the advice, Ron.
Edit3: #QHarr. yes I would like to be able to grab the first 3 columns from all the tables.
Sub Horse2()
Dim IE As InternetExplorer
Application.ScreenUpdating = False
Set IE = New InternetExplorer
IE.Visible = True
Set ws = ThisWorkbook.Worksheets("Sheet1")
Dim http As New XMLHTTP60, html As New HTMLDocument
Dim nodeRaceResultsTable As HTMLHtmlElement
Dim nodeTr As HTMLHtmlElement
Dim nodeDiv As HTMLHtmlElement
Dim Element1 As HTMLHtmlElement
Dim node1 As HTMLHtmlElement
Dim currentUrl As String
With IE
IE.Visible = True
IE.Navigate "https://www.racingandsports.com/thoroughbred/jockey/jake-bayliss/27461"
Do Until .readyState = 4: DoEvents: Loop
End With
With http
.Open "GET", "https://www.racingandsports.com/thoroughbred/jockey/jake-bayliss/27461", False
.send
html.body.innerHTML = .responseText
End With
The issue I have is there are multiple instances of the same class name so it will print the first table on the page. The class name here, along with the inner text, is what enables me to get text from the tables from the webpage. However I would like to be able to extract the first 3 data points from all tables on the webpage and have it printed onto excel.
For Each nodeRaceResultsTable In html.getElementsByClassName("col-md-12 table-responsive")
r = r + 1: c = 4
For Each nodeTr In nodeRaceResultsTable.getElementsByTagName("tr")
With nodeTr.getElementsByTagName("td")
If .Length Then
ws.Cells(r + 1, c + 3) = .Item(0).innerText
ws.Cells(r + 1, c + 4) = .Item(1).innerText
ws.Cells(r + 1, c + 5) = .Item(2).innerText
r = r + 1
End If
End With
Next
Next
IE.Quit
Set IE = Nothing
Application.StatusBar = ""
Application.ScreenUpdating = True
MsgBox "Input complete."
End Sub>

Webscraping in VBA where some HTML information has no way to refer to it

I have this VBA script scraping from this URL https://accessgudid.nlm.nih.gov/devices/10806378034350
I want the LOT,SERIAL, and EXPIRATION information which in the below pic, has a "Yes" or "No" inside the HTML.
How do I return just that Yes or No information?
Sub GetInnerInformation(HTMLPage As MSHTML.HTMLDocument)
Dim HTMLResult As MSHTML.IHTMLElement
Dim HTMLResults As MSHTML.IHTMLElementCollection
Dim HTMLRow As MSHTML.IHTMLElement
Dim HTMLCell As MSHTML.IHTMLElement
Dim RowNum As Long, ColNum As Integer
Set HTMLResults = HTMLPage.getElementsByClassName("device-attribute")
For Each HTMLResult In HTMLResults
If (HTMLResult.innerText Like "*Lot*") = True Then
Debug.Print HTMLResult.innerText, HTMLResult.outerText, HTMLResult.innerHTML
End If
Next HTMLResult
End Sub
In my Immediate Window I get:
Lot or Batch Number: Lot or Batch Number: Lot or Batch Number:
So no reference to the Yes or No that is in the HTML.
HTML Parser:
You could use a css attribute = value selector to target the span with [?] that is just before the div of interest. Then climb up to shared parent with parentElement, and move to the div of interest with NextSibling. You can then use getElementsByTagName to grab the labels nodes, and loop that nodeList to write out required info. To get the values associated with labels, you again need to use NextSibling to handle the br children within the parent div.
I use xmlhttp to make the request which is faster than opening a browser.
Option Explicit
Public Sub WriteOutYesNos()
Dim html As MSHTML.HTMLDocument
Set html = New MSHTML.HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://accessgudid.nlm.nih.gov/devices/10806378034350", False
.send
html.body.innerHTML = .responseText
End With
Dim nodes As Object, i As Long
Set nodes = html.querySelector("[title*='A production identifier (PI) is a variable']").parentElement.NextSibling.getElementsByTagName("LABEL")
For i = 0 To nodes.Length - 3
With ActiveSheet
.Cells(i + 1, 1) = nodes(i).innerText
.Cells(i + 1, 2) = nodes(i).NextSibling.NodeValue
End With
Next
End Sub
JSON Parser:
Data is also available as json which means you can use a json parser to handle. I use jsonconverter.bas as the json parser to handle response. Download raw code from here and add to standard module called JsonConverter . You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime. Remove the top Attribute line from the copied code.
Option Explicit
Public Sub WriteOutYesNos()
Dim json As Object, ws As Worksheet, results(), i As Long, s As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
results = Array("lotBatch", "serialNumber", "manufacturingDate")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://accessgudid.nlm.nih.gov/devices/10806378034350.json", False
.send
Set json = JsonConverter.ParseJson(.responseText)
End With
With ws
For i = LBound(results) To UBound(results)
.Cells(i + 1, 1) = results(i)
.Cells(i + 1, 2).Value = IIf(json(results(i)), "Yes", "No")
Next
End With
End Sub
XML Parser:
Results also come as xml which you can parse with xml parser provided you handle the default namespace appropriately:
Option Explicit
Public Sub WriteOutYesNos()
Dim xmlDoc As Object, ws As Worksheet, results(), i As Long
Set xmlDoc = CreateObject("MSXML2.DOMDocument")
Set ws = ThisWorkbook.Worksheets("Sheet1")
results = Array("lotBatch", "serialNumber", "manufacturingDate")
With xmlDoc
.validateOnParse = True
.setProperty "SelectionLanguage", "XPath"
.setProperty "SelectionNamespaces", "xmlns:i='http://www.fda.gov/cdrh/gudid'"
.async = False
If Not .Load("https://accessgudid.nlm.nih.gov/devices/10806378034350.xml") Then
Err.Raise .parseError.ErrorCode, , .parseError.reason
Exit Sub
End If
End With
With ws
For i = LBound(results) To UBound(results)
.Cells(i + 1, 1) = results(i)
.Cells(i + 1, 2).Value = IIf(xmlDoc.SelectSingleNode("//i:" & results(i)).Text, "Yes", "No")
Next
End With
End Sub
Tinkered around and found it. I had to hardcode the results a little but here is what I got. Let me know if you've found a more elegant answer!
Sub GetInnerInformation(HTMLPage As MSHTML.HTMLDocument)
Dim HTMLResult As MSHTML.IHTMLElement
Dim HTMLResults As MSHTML.IHTMLElementCollection
Dim HTMLRow As MSHTML.IHTMLElement
Dim HTMLCell As MSHTML.IHTMLElement
Dim RowNum As Long, ColNum As Integer
Dim Lot As Boolean
Dim Serial As Boolean
Dim Expiration As Boolean
Set HTMLResults = HTMLPage.getElementsByClassName("expandable-device-content")
For Each HTMLResult In HTMLResults
If (HTMLResult.innerText Like "*Lot or Batch Number*") = True Then
Debug.Print HTMLResult.innerText
If HTMLResult.innerText Like "*Lot or Batch Number: Yes*" Then
Lot = True
End If
If HTMLResult.innerText Like "*Lot or Batch Number: No*" Then
Lot = False
End If
If HTMLResult.innerText Like "*Serial Number: Yes*" Then
Serial = True
End If
If HTMLResult.innerText Like "*Serial Number: No*" Then
Serial = False
End If
If HTMLResult.innerText Like "*Expiration Date: Yes*" Then
Serial = True
End If
If HTMLResult.innerText Like "*Expiration Date: No*" Then
Serial = False
End If
Debug.Print Lot, Serial, Expiration
End If
Next HTMLResult
End Sub

Retrieving all Excel file links from a webpage

I'm trying to get all the downloadable Excel file links from the website, but having difficulty. Please help to guide me. Thanks.
Sub TYEX()
Dim internet As Object
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
URL = "https://www.jpx.co.jp/markets/public/short-selling/index.html"
internet.Navigate URL
Do Until internet.ReadyState >= 4
DoEvents
Loop
Application.Wait Now + TimeSerial(0, 0, 5)
Set internetdata = internet.Document
Set div_result = internetdata.getElementById("readArea")
Set header_links = div_result.getElementsByTagName("td")
For Each h In header_links
Set link = h.ChildNodes.item(0)
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = link.href
Next
MsgBox "done"
End Sub
You had the idea down correctly, but here's a different approach:
Sub TYEX()
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.navigate "https://www.jpx.co.jp/markets/public/short-selling/index.html"
.Visible = True
Do While .Busy Or .readyState < 4
DoEvents
Loop
Dim doc As Object, tbl As Object
Set doc = .document
Set tbl = doc.getElementsByClassName("component-normal-table")(0).Children(0)
Dim r As Long, xlsArr(), a As Object
With tbl.Rows
ReDim xlsArr(1 To .Length - 1)
For r = 1 To .Length - 1 ' 0 is the table header
xlsArr(r) = .Item(r).Children(1).innerHTML
Next r
End With
With CreateObject("VBScript.RegExp")
.Pattern = "<a href=""(\/markets.*?\.xls)"
For r = 1 To UBound(xlsArr)
xlsArr(r) = "https://www.jpx.co.jp" & .Execute(xlsArr(r))(0).SubMatches(0)
Debug.Print xlsArr(r)
Next
End With
End With
'Add to sheet
Dim ws As Worksheet, rng As Range
Set ws = ThisWorkbook.Worksheets(1)
With ws
Set rng = .Range(.Cells(NextRow(ws), 1), .Cells(NextRow(ws) + UBound( _
xlsArr) - 1, 1))
rng.Value = Application.Transpose(xlsArr)
End With
End Sub
Public Function NextRow(ByVal ws As Worksheet, Optional ByVal col As Variant = 1) As Long
With ws
NextRow = .Cells(.Rows.Count, col).End(xlUp).Row + 1
End With
End Function
Breaking Down the Code
This will loop your html table rows. We start at 1, because 0 is actually just the table header.
With tbl.Rows
ReDim xlsArr(1 To .Length - 1)
For r = 1 To .Length - 1 ' 0 is the table header
xlsArr(r) = .Item(r).Children(1).innerHTML
Next r
End With
This uses regular expressions to extract the url from the innerHTML property. You can see how this particular regex works here: Regex101
With CreateObject("VBScript.RegExp")
.Pattern = "<a href=""(\/markets.*?\.xls)"
For r = 1 To UBound(xlsArr)
xlsArr(r) = "https://www.jpx.co.jp" & .Execute(xlsArr(r))(0).SubMatches(0)
Debug.Print xlsArr(r)
Next
End With
You will size your range to be the same size as your array that contains the links, then write the array to the worksheet. This is usually much faster than writing cells one-by-one.
'Add to sheet
Dim ws As Worksheet, rng As Range
Set ws = ThisWorkbook.Worksheets(1)
With ws
Set rng = .Range(.Cells(NextRow(ws), 1), .Cells(NextRow(ws) + UBound( _
xlsArr) - 1, 1))
rng.Value = Application.Transpose(xlsArr)
End With
You can use an attribute = value CSS selector with $ operator to say the href value must end with .xls. Then use querySelectorAll to retrieve all matched results. Using CSS selectors is a very fast and generally robust method.
Dim list As Object
Set list = ie.document.querySelectorAll("[href$='.xls']")
It is much faster to use XMLHTTP as well, rather than opening IE. Note that you can then pass these links to a function to perform a binary download or to URLMon for downloading.
Option Explicit
Public Sub Links()
Dim sResponse As String, html As HTMLDocument, list As Object, i As Long
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.jpx.co.jp/markets/public/short-selling/index.html", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
Set html = New HTMLDocument
With html
.body.innerHTML = sResponse
Set list = html.querySelectorAll("[href$='.xls']")
End With
For i = 0 To list.Length - 1
Debug.Print Replace$(list.item(i), "about:", "https://www.jpx.co.jp")
Next
End Sub
Example download function (though you could re-use your existing XMLHTTP object - this is just to illustrate):
Public Function DownloadFile(ByVal downloadFolder As String, ByVal downloadURL As String) As String
Dim http As Object , tempArr As Variant
Set http = CreateObject("WinHttp.WinHttpRequest.5.1")
http.Open "GET", downloadURL, False
http.send
On Error GoTo errhand
With CreateObject("ADODB.Stream")
.Open
.Type = 1
.write http.responseBody
tempArr = Split(downloadURL, "/")
tempArr = tempArr(UBound(tempArr))
.SaveToFile downloadFolder & tempArr, 2 '< "/" on enter of downloadFolder. 2 for overwrite which is Ok if no file modifications.
.Close
End With
DownloadFile = downloadFolder & tempArr
Exit Function
errhand:
If Err.Number <> 0 Then
Debug.Print Err.Number, Err.Description
MsgBox "Download failed"
End If
DownloadFile = vbNullString
End Function
References (VBE > Tools > References):
Microsoft HTML Object Library

How do I clean up objects in Excel vba?

Public Sub D_Galoplar()
Application.ScreenUpdating = False
Dim Asay(1 To 250)
Dim Jsay(1 To 100)
For q = 2 To Sheets("Y").Columns("A:A").Find(What:="boş").Row - 1
Asay(q - 1) = Sheets("Y").Range("A" & q)
Next q
For q = 2 To Sheets("Y").Columns("C:C").Find(What:="boş").Row - 1
Jsay(q - 1) = Sheets("Y").Range("C" & q)
Next q
For w = 1 To 250
Cells.Delete Shift:=xlUp
Range("A1").Select
If Asay(w) < 1 Then Exit For
Dim elem As Object, trow As Object
Dim R&, C&, s$
With New XMLHTTP60
.Open "POST", "https://yenibeygir.com/at/getatdetaytab", False
.setRequestHeader "content-type", "application/x-www-form-urlencoded; charset=UTF-8"
.send "tab=galopTab&id=" & Asay(w)
s = .responseText
End With
With New HTMLDocument
.body.innerHTML = s
For Each elem In .getElementsByClassName("at_Galoplar")(0).Rows
For Each trow In elem.Cells
C = C + 1: Cells(R + 1, C) = trow.innerText
Next trow
C = 0: R = R + 1
Next elem
End With
Cells.UnMerge
Range("A1").Select
If Range("A1048576").End(xlUp).Row < 2 Then GoTo ATLA2
Columns("A:A").Insert
For i = 2 To Range("B1048576").End(xlUp).Row - 1
Range("A" & i) = Asay(w)
Next i
Range("O2").FormulaR1C1 = "=IF(ISBLANK(RC[-9]),""-"",IF(ISNUMBER(RC[-9]),RC[-9]/4,((LEFT(RC[-9],1)*6000)+(MID(RC[-9],3,2)*100)+(RIGHT(RC[-9],1)*10))/400))"
Range("P2").FormulaR1C1 = "=IF(ISBLANK(RC[-9]),""-"",IF(ISNUMBER(RC[-9]),RC[-9]/6,((LEFT(RC[-9],1)*6000)+(MID(RC[-9],3,2)*100)+(RIGHT(RC[-9],1)*10))/600))"
Range("Q2").FormulaR1C1 = "=IF(ISBLANK(RC[-9]),""-"",IF(ISNUMBER(RC[-9]),RC[-9]/8,((LEFT(RC[-9],1)*6000)+(MID(RC[-9],3,2)*100)+(RIGHT(RC[-9],1)*10))/800))"
Range("R2").FormulaR1C1 = "=IF(ISBLANK(RC[-9]),""-"",IF(ISNUMBER(RC[-9]),RC[-9]/10,((LEFT(RC[-9],1)*6000)+(MID(RC[-9],3,2)*100)+(RIGHT(RC[-9],1)*10))/1000))"
Range("S2").FormulaR1C1 = "=IF(ISBLANK(RC[-9]),""-"",IF(ISNUMBER(RC[-9]),RC[-9]/12,((LEFT(RC[-9],1)*6000)+(MID(RC[-9],3,2)*100)+(RIGHT(RC[-9],1)*10))/1200))"
Range("T2").FormulaR1C1 = "=IF(ISBLANK(RC[-9]),""-"",IF(ISNUMBER(RC[-9]),RC[-9]/14,((LEFT(RC[-9],1)*6000)+(MID(RC[-9],3,2)*100)+(RIGHT(RC[-9],1)*10))/1400))"
Range("O2:T2").Copy
Range("O2:O" & Range("A1048576").End(xlUp).Row).PasteSpecial Paste:=xlPasteFormulas
Cells.Copy
Cells.PasteSpecial Paste:=xlPasteValues
Columns("O:T").Cut Columns("F:K")
Range("A2:N" & Range("A1048576").End(xlUp).Row).Copy
Sheets("Galop").Range("A" & Sheets("Galop").Range("A1048576").End(xlUp).Row + 1).PasteSpecial Paste:=xlPasteValues
ATLA2:
Cells.Delete Shift:=xlUp
Next w
End Sub
I want to get a lot of data with the For Next cycle, but after a while the page hangs. How can I reset objects at the end of each cycle?
Asay numbers
10182
10221
10279
10303
10316
10325
10360
10370
10680
11598
11629
11715
11745
12335
12385
12533
12559
13154
13393
13635
13641
13669
13673
14027
14057
14062
14228
14619
14674
14687
14743
14770
14778
15197
15217
15323
15382
15507
15775
15828
16077
16335
16510
17149
17513
17867
18532
37964
60176
66067
66255
66581
66582
66896
66998
67056
67309
67356
67379
67473
68008
68012
68162
68298
68312
68320
68332
68333
68353
68383
68545
68702
68775
68922
69445
69606
69817
69963
69968
69985
69986
70048
70202
71372
(boş)
Slowing down maybe due to throttling of network if you are trying to hit the site too many times in quick succession. This is particularly likely given your access method. Better would be to see if an API is available to bulk access info. You are likely going through many networks to get to this page as well. It may be possible to get some basic info about delays from TRACERT command from a command prompt.
You are doing a POST so remember there is a fair amount of server side stuff going on as well.
You don't need to set elem to Nothing as it only exists during your For Loop. Same for tRow.
Putting .getElementsByClassName("at_Galoplar")(0).Rows into a variable will provided faster referencing.
Write the results to an array first and then dump the array out to the sheet in one go will provide significant improvement in speed.
Using New keyword can lead to unexpected behaviour. You can create one instance of HTMLDocument and work with that provided you have good error handling in. I have had occassional cases in a loop where I have had to set HTMLDocument to Nothing before looping back round.
Personally, I would cheat and re-write this to leverage that you can issue GET requests to get the same info. I use a class to hold the XMLHTTP object, and an array to hold the results. I write the results out in one go. This takes a few seconds to run for me. The asay numbers are in Sheet1 range A1:A84.
Class module clsHTTP
Option Explicit
Private http As Object
Private Sub Class_Initialize()
Set http = CreateObject("MSXML2.XMLHTTP")
End Sub
Public Function GetString(ByVal url As String) As String
Dim sResponse As String
With http
.Open "GET", url, False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
GetString = sResponse
End With
End Function
Standard module 1
Option Explicit
Public Sub DGaloplar()
Dim asays(), ws As Worksheet, asay As Long, html As HTMLDocument
Dim http As clsHTTP, url As String, headers(), numberOfRequests As Long
headers = Array("Asay", "Tarih", "Sehir", "Kg", "Jokey", "400", "600", "800", "1000", "1200", "1400", "Ç", "Pist", "Durum")
Set http = New clsHTTP
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
asays = Application.Transpose(ws.Range("A1:A84").Value) 'Load asay values from sheet 1
Const numTableRows As Long = 11
Const numTableColumns As Long = 15
Const BASE_URL As String = "https://yenibeygir.com/at/getatdetaytab/?tab=galopTab&id="
numberOfRequests = UBound(asays)
Dim results(), headerRow As Boolean, tRows As Object, tRow As Object, iRow As Long
Dim tCells As Object, tCell As Object, r As Long, c As Long, hTable As HTMLTable
ReDim results(1 To numTableRows * numberOfRequests, 1 To numTableColumns)
Application.ScreenUpdating = False
For asay = 1 To numberOfRequests
headerRow = True
url = BASE_URL & asays(asay)
html.body.innerHTML = http.GetString(url)
Set hTable = html.querySelector(".at_Galoplar")
Set tRows = hTable.getElementsByTagName("tr")
For Each tRow In tRows
If Not headerRow Then
c = 2: r = r + 1
results(r, 1) = asays(asay)
Set tCells = tRow.getElementsByTagName("td")
For Each tCell In tCells
results(r, c) = tCell.innerText
c = c + 1
Next
End If
headerRow = False
Next
Next
With ws
.Cells(1, 3).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 3).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
Application.ScreenUpdating = True
End Sub
Refereces:
Microsoft HTML Object Library
In general Set elem = Nothing is what you need.
In your code you are assigning the variables within a for-each loop, thus even if you set them to Nothing later, there would not be a performance bonus.
Try setting those object to Nothing, like below:
Set elem = Nothing
Set trow = Nothing
I am not sure whether you need variable declarations in your loop, you can take them out of the loop, this might save some time.
But I think your HTTP requests are taking so long, not any VBA code.
UPDATE
Try setting Application.EnableEvents and Application.ScreenUpdating to False at the beggining of macro and setting them back to True at the end.

How do i extract specific data (name, details )from the website into excel with excel vba?

How do i extract specific data (name, details )from the website into excel with excel vba?
Below I am trying to get processor and warranty:
Option Explicit
Sub GetData()
Dim objIE As InternetExplorer
Dim itemELE As Object
Dim html As IHTMLDocument
Dim Processor As String
Dim warranty As String
Dim y As Integer
'start a new browser instance
Set objIE = New InternetExplorer
'make browser visible
objIE.Visible = True
'navigate to page with needed data
objIE.navigate "https://www.harveynorman.com.sg/computers-tablets-and-gaming/computers/laptops/"
'wait for page to load
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
y = 1
For Each itemELE In objIE.document.getElementsByClassName("expandabaleContent")
Processor = itemELE.getElementsByTagName("d1")(0).innerText
warranty = itemELE.getElementsByClassName("d1")(0).getElementsByTagName("a")(0).textContent
Sheets("Sheet1").Range("A" & y).Value = Processor
Sheets("Sheet1").Range("B" & y).Value = warranty
y = y + 1
Next
End Sub
Screenshot of the page:
For the page shown (in your image) you can issue an XMLHTTP (XHR) GET request to grab the product info without opening a slow IE browser instance.
For the specific information:
Processor and warranty info:
If you inspect the page the info about processor and warranty appears associated with a classname facetedResults-feature-list
You can see the classname and then a dl tag housing a dt tag which has sibling dd tags. Two of these sibling dd tags are associated with the info for processor and warranty.
I use a CSS selector to grab all these dd tags which can be simplified,in this instance, to ignore the sibling dt and parent dl tags and use just:
.facetedResults-feature-list dd
The "." is a class selector. The CSS combination selection above says get the dd tags within elements with class facetedResults-feature-list
Product titles info:
The titles I get using another CSS selector of:
.facetedResults-title
This is elements with class facetedResults-title. This contains the product title.
Writing out product titles, processor and warranty info to the sheet:
A little maths shows me that the processor info repeats every 14, and that if I add 8 to the index for the processor I get the warranty info. You can see how you could write out each of the details as they occur at indices that repeat every 14. I combine the loop over the nodeList of dd elements with the titles to write out to the sheet.
VBA:
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, i As Long, html As New HTMLDocument
Application.ScreenUpdating = False
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.lenovo.com/sg/en/laptops/c/LAPTOPS", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Dim titles As Object, targetedInfo As Object, rowCounter As Long
With html
.body.innerHTML = sResponse
Set titles = .querySelectorAll(".facetedResults-title")
Set targetedInfo = .querySelectorAll(".facetedResults-feature-list dd")
End With
With Worksheets("Sheet1")
For i = 0 To targetedInfo.Length - 1
If i Mod 14 = 0 Then
rowCounter = rowCounter + 1
.Cells(rowCounter, 1) = titles(rowCounter - 1).innerText
.Cells(rowCounter, 2) = targetedInfo(i).innerText
.Cells(rowCounter, 3) = targetedInfo(i + 8).innerText
End If
Next i
End With
Application.ScreenUpdating = True
End Sub
Output sample:
More general info:
CSS selectors:
The product info is associated with an 'expandableContent facetedResults-expandableContent-features expandableContent-is-collapsed facetedResults-expandableContent-69' class name
The prices are associated with an 'expandableContent facetedResults-expandableContent-price expandableContent-is-collapsed' class name.
You can select these by the traditional .getElementsByClassName and then loop over the collection, or, in my case, use a CSS selector for class to do the same thing, and then traverse the length of the returned nodeList.
.getElementsByClassName("expandableContent facetedResults-expandableContent-features expandableContent-is-collapsed facetedResults-expandableContent-69")
is the same as
.querySelectorAll(".expandableContent.facetedResults-expandableContent-features.expandableContent-is-collapsed.facetedResults-expandableContent-69")
The "." is the class selector.
Titles are associated with a class facetedResults-title
VBA:
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, i As Long, html As New HTMLDocument
Application.ScreenUpdating = False
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.lenovo.com/sg/en/laptops/c/LAPTOPS", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Dim info As Object, prices As Object, titles As Object
With html
.body.innerHTML = sResponse
Set titles = .querySelectorAll(".facetedResults-title")
Set info = .querySelectorAll(".expandableContent.facetedResults-expandableContent-features.expandableContent-is-collapsed.facetedResults-expandableContent-69")
Set prices = .querySelectorAll(".expandableContent.facetedResults-expandableContent-price.expandableContent-is-collapsed")
End With
With Worksheets("Sheet1")
For i = 0 To titles.Length - 1
.Cells(i + 1, 1) = titles(i).innerText
.Cells(i + 1, 2) = info(i).innerText
.Cells(i + 1, 3) = prices(i).innerText
Next i
End With
Application.ScreenUpdating = True
End Sub
References required (VBE>Tools>References):
Microsoft HTML Object Library
Qharr has already provided some good options but in case still you want to try IE then see below code
Option Explicit
Sub GetData()
Dim objIE As InternetExplorer
Dim itemELE As Object
Dim html As IHTMLDocument
Dim Processor As String
Dim warranty As String
Dim y As Integer
'start a new browser instance
Set objIE = New InternetExplorer
'make browser visible
objIE.Visible = True
'navigate to page with needed data
objIE.navigate "https://www.lenovo.com/sg/en/laptops/c/LAPTOPS"
'wait for page to load
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
Application.Wait Now + TimeSerial(0, 0, 3)
y = 1
For Each itemELE In objIE.document.getElementsByClassName("facetedResults-feature-list")
If InStr(1, itemELE.className, "bundleList", vbTextCompare) = 0 Then
Application.Wait Now + TimeSerial(0, 0, 2)
Processor = itemELE.getElementsByTagName("dl")(0).innerText
warranty = itemELE.getElementsByTagName("dl")(4).innerText
Sheets("Sheet1").Range("A" & y).Value = Processor
Sheets("Sheet1").Range("B" & y).Value = warranty
y = y + 1
End If
Next
End Sub
Results