I have a webpage:
https://www.brcdirectory.com/InternalSite/Site.aspx?BrcSiteCode=1832583
I want to retrieve some text from this page, from within a HTML <Span ID>.
<span id="ctl00_ContentPlaceHolder1_FormView1_GridView1_ctl02_lb_ExpiryDate">Expiry Date : 07/12/2017</span>
I have IE 11.0.9600.18639
Via Excel, I am using the below code to open IE 11, navigate to the page and want to try and display a message box of the text inside the <SPAN>.
Code:
Option Explicit
Sub GoToWebsiteTest()
Application.ScreenUpdating = False
Application.DisplayAlerts = False
Dim appIE As Object
Dim objElement As Object
Dim objCollection As Object
Dim i As Long, LastRow As Long, sFolder As String
Dim sURL As String, FILE As String
LastRow = Range("I" & Rows.Count).End(xlUp).Row
For i = 6 To LastRow
Set appIE = New InternetExplorerMedium
sURL = "https://www.brcdirectory.com/InternalSite/Site.aspx?BrcSiteCode=" & Range("I392").Value
With appIE
.navigate sURL
.Visible = True
End With
Do While appIE.Busy Or appIE.READYSTATE <> READYSTATE_COMPLETE
DoEvents
Loop
Set objCollection = appIE.document.getElementById("ctl00_ContentPlaceHolder1_FormView1_GridView1_ctl02_lb_ExpiryDate")
MsgBox Replace(objCollection.innerText, "Expiry Date : ", "")
appIE.Quit
Set appIE = Nothing
Next i
Application.ScreenUpdating = True
Application.DisplayAlerts = True
MsgBox "All BRCs Succesfully Updated."
End Sub
I have tried everything! I have tried so many variations of this line where I get the error:
Do While appIE.Busy Or appIE.READYSTATE <> READYSTATE_COMPLETE
But alas I get this annoying error:
Runtime Error: -2147467259 (80004005)
Method 'Busy' of object 'IWebBrowser2' failed.
Please, please can someone show me what i am doing wrong. This is driving me crazy. Thanks in advance.
If you don't want to use the "get from web" you can use this code.
Sub expiry()
Dim RE As Object
Dim HTML As String
Set RE = CreateObject("vbscript.regexp")
HTML = GetHTML("https://www.brcdirectory.com/InternalSite/Site.aspx?BrcSiteCode=1832583")
'Expiry Date : 07/12/2017
RE.Pattern = "(Expiry Date : \d{2}\/\d{2}\/\d{4})"
RE.Global = True
RE.IgnoreCase = True
Set Matches = RE.Execute(HTML)
ExpiryDate = Matches.Item(0).submatches.Item(0)
End Sub
Function GetHTML(URL As String) As String
Dim HTML As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.Send
GetHTML = .ResponseText
End With
End Function
ExpiryDate will contain the text you wanted (I think).
If you only wanted the actual date you can use RE.Pattern = "Expiry Date : (\d{2}\/\d{2}\/\d{4})"
EDIT;
In response to comments below:
This is the references I have enabled
EDIT based on download to textfile.
Private Declare Function URLDownloadToFileA Lib "urlmon" (ByVal pCaller As Long, _
ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, _
ByVal lpfnCB As Long) As Long
Sub expiry()
Dim RE As Object
Dim HTML As String
Dim MyData As String
Set RE = CreateObject("vbscript.regexp")
DownloadFile "https://www.brcdirectory.com/InternalSite/Site.aspx?BrcSiteCode=1832583", "C:\TEST\goog.txt"
Open "C:\TEST\goog.txt" For Binary As #1
HTML = Space$(LOF(1))
Get #1, , HTML
Close #1
'Expiry Date : 07/12/2017
RE.Pattern = "(Expiry Date : \d{2}\/\d{2}\/\d{4})"
RE.Global = True
RE.IgnoreCase = True
Set Matches = RE.Execute(HTML)
ExpiryDate = Matches.Item(0).submatches.Item(0)
End Sub
Private Function DownloadFile(URL As String, LocalFilename As String) As Boolean
'Thanks Mentalis:)
Dim lngRetVal As Long
lngRetVal = URLDownloadToFileA(0, URL, LocalFilename, 0, 0)
If lngRetVal = 0 Then DownloadFile = True
End Function
EDIT again.
I managed to resolve this by using the following code:
Option Explicit
Private ieBrowser As InternetExplorer
Sub GetBRCText()
Dim i As Long, LastRow As Long
Dim a As Range, b As Range
Dim strDocHTML As String, strDocHTML2 As String
Dim dteStartTime As Date
Application.ScreenUpdating = False
Application.DisplayAlerts = False
On Error Resume Next
LastRow = ThisWorkbook.ActiveSheet.Range("I" & Rows.Count).End(xlUp).Row
Set a = Range("I6:I" & LastRow)
'Create a browser object
Set ieBrowser = CreateObject("internetexplorer.application")
For Each b In a.Rows
If Not IsEmpty(b) Then
'Start Browsing loop
ieBrowser.navigate "https://www.brcdirectory.com/InternalSite/Site.aspx?BrcSiteCode=" & b.Value
dteStartTime = Now
Do While ieBrowser.READYSTATE <> READYSTATE_COMPLETE
If DateDiff("s", dteStartTime, Now) > 240 Then Exit Sub
Loop
On Error Resume Next
strDocHTML = ieBrowser.document.getElementById("ctl00_ContentPlaceHolder1_FormView1_GridView1_ctl02_lb_ExpiryDate").innerHTML
strDocHTML2 = ieBrowser.document.getElementById("ctl00_ContentPlaceHolder1_FormView1_GridView1_ctl02_lb_Grade").innerHTML
b.Offset(0, 2).Value = Replace(strDocHTML, "Expiry Date : ", "")
b.Offset(0, 1).Value = Replace(strDocHTML2, "Grade : ", "")
End If
Next b
ieBrowser.Quit
Set ieBrowser = Nothing
Application.ScreenUpdating = True
Application.DisplayAlerts = True
End Sub
Related
The code below navigates to a webpage, fills search boxes with queries, and submits to the results page. However, the final element collection in the script, tdtags, which is defined after the redirect, is pulling data from the original search page, rather than the results page. I currently have the while ie.busy loop and a timed delay in the script, neither of which works. I have also tried waiting until an element only present in the results page becomes available in the html, but this also does not work.
Dim twb As Workbook
Dim ie As Object
Set twb = ThisWorkbook
twb.Activate
Set ie = CreateObject("internetexplorer.application")
'church = Sheets("Control").Range("A2").Value
'minister = Sheets("Control").Range("A4").Value
location = "London" 'Sheets("Control").Range("A6").Value
'denomination = Sheets("Control").Range("A8").Value
With ie
.navigate "http://www.ukchurch.org/index.php"
.Visible = True
Do While .Busy Or .ReadyState <> 4
DoEvents
Loop
End With
Application.Wait (Now + TimeValue("00:00:02"))
Set intags = ie.document.getelementsbytagname("input")
For Each intag In intags
If intag.getattribute("name") = "name" Then
If church <> "" Then
intag.Value = church
End If
ElseIf intag.getattribute("name") = "minister" Then
If minister <> "" Then
intag.Value = minister
End If
ElseIf intag.getattribute("name") = "location" Then
If location <> "" Then
intag.Value = location
End If
Else
End If
Next intag
Set dropopt = ie.document.getelementsbytagname("select")
For Each dropo In dropopt
If dropo.classname = "DenominationDropDown" Then
Set opttags = dropo.getelementsbytagname("option")
For Each opt In opttags
If opt.innertext = denomination Then
opt.Selected = True
End If
Next opt
End If
Next dropo
On Error Resume Next
For Each intag In intags
If intag.getattribute("src") = "images/ukchurch/button-go.jpg" Then
intag.Click
Do While ie.Busy Or ie.ReadyState <> 4
DoEvents
Loop
Application.Wait (Now + TimeValue("00:00:03"))
Exit For
End If
Next intag
Application.Wait (Now + TimeValue("00:00:03"))
Set tdtags = ie.document.getelementsbytagname("td")
For Each td In tdtags
If td.classname = "pText" Then
Debug.Print td.innertext
Debug.Print ie.locationURL
pagecount = Right(td.innertext, InStr(td.innertext, ":"))
End If
Next td
Debug.Print pagecount
End Sub
Any diagnosis would be appreciated.
Automating IE is a pain, so avoid it.
The following function requests the results page directly.
Public Function GetSearchResult(Optional ByVal ResultPage As Integer = 0, Optional ByVal ChurchName As String = "", Optional ByVal Minister As String = "", Optional ByVal ChurchLocation As String = "", Optional ByVal Denomination As String = "") As Object
Dim Request As Object: Set Request = CreateObject("MSXML2.serverXMLHTTP")
Dim Result As Object: Set Result = CreateObject("htmlfile")
Request.Open "POST", "http://www.ukchurch.org/searchresults1.php", False
Request.setRequestHeader "content-type", "application/x-www-form-urlencoded"
Request.send IIf(ResultPage = 0, "", "page=" & ResultPage & "&") & "name=" & ChurchName & "&minister=" & Minister & "&location=" & ChurchLocation & "&denomination=" & Denomination
Result.body.innerHTML = Request.responseText
Set GetSearchResult = Result
End Function
An example which prints the contents of the td with classname pText inside the table containing the search results
Sub Main()
Dim Document As Object
Set Document = GetSearchResult(ChurchLocation:="London")
Dim ResultRows as Object
Dim ResultRow As Object
Set ResultRows = Document.getElementsByTagName("table")(8).getElementsByTagName("td")
For Each ResultRow in ResultRows
If ResultRow.Classname = "pText" Then
Debug.print ResultRow.innerText
End If
Next
End Sub
Update
You need to add a couple of References to your VBA project to make the following code work.
In the VBA Editor, Goto the Tools Menu, Click References and in the dialog that opens add a check next to the following two items: Microsoft XML, v6.0 and Microsoft HTML Object Library (
Public Function GetChurchDetails(ByVal ChurchID As String) As MSHTML.HTMLDocument
Dim Request As New MSXML2.ServerXMLHTTP60
Dim Result As New MSHTML.HTMLDocument
Request.Open "GET", "http://www.ukchurch.org/churchdetails.php?churchid=" & ChurchID, False
Request.send
Result.body.innerHTML = Request.responseText
Set GetChurchDetails = Result
End Function
Sub Main2()
Dim Document As MSHTML.HTMLDocument
Dim Church As MSHTML.HTMLDocument
Set Document = GetSearchResult(ChurchLocation:="London")
Dim ResultRows As MSHTML.IHTMLElementCollection
Dim ResultRow As MSHTML.IHTMLElement
Dim ChurchID As String
'Set ResultRows = Document.getElementsByTagName("table")(8).getElementsByTagName("td")
' all result links on searchresults1.php have a classname of resultslink which makes getting them much easier
Set ResultRows = Document.getElementsByClassName("resultslink")
For Each ResultRow In ResultRows
ChurchID = ResultRow.getAttribute("href")
ChurchID = Mid(ChurchID, InStr(1, ChurchID, "=") + 1)
Set Church = GetChurchDetails(ChurchID)
' code to read data from the page using Church as the Document
' eg: Church.getElemenetsByTagName("td").....
Next
End Sub
You only need to use the "post" mode when your submitting data, for everything else you can use "get"
My webscrape stopped working. The owner changed the html.
I believe it is the Set allElements = doc.getElementsByClassName("el-col el-col-8") line that needs changing.
I am trying to grab text from the webpage that includes the "52-week Range (undefined)" section. I managed to grab text from before and after but not the section I need. An example webpage is https://www.gurufocus.com/stock/gliba/summary and my code should fill my cell with "38.72 - 73.63" after I do some trimming.
I need to do it this way so I can get my head round it and change it in the future when necessary so please just focus on correcting my set line of code (assuming that is the problem!) rather than a whole new more sophisticated method as it will be beyond me. (My other set line of code does what I want it to do.)
Sub get_title_header()
Dim wb As Object
Dim doc As Object
Dim incomeStmtURLs As Variant
Dim sURL As String
Dim lastrow As Long
Dim allRowOfData As Object
Dim i As Integer
Dim allElements As IHTMLElementCollection
Dim anElement As IHTMLElement
Dim aCell As HTMLTableCell
Application.DisplayAlerts = False
Call ToggleEvents(False)
incomeStmtURLs = Range("Sheet1!h1:h2").Value
For i = 1 To UBound(incomeStmtURLs)
Set wb = CreateObject("internetExplorer.Application")
sURL = incomeStmtURLs(i, 1)
wb.navigate sURL
wb.Visible = False
While wb.Busy
Application.Wait Now + #12:00:01 AM#
DoEvents
Wend
Set doc = wb.document
On Error GoTo err_clear
Set allElements = doc.getElementsByClassName("el-col el-col-8")
While allElements.Length = 0
Application.Wait Now + #12:00:01 AM#
DoEvents
Wend
x = allElements(0).innerText
' Debug.Print x
Sheet6.Cells(i + 1, 2).Value = Trim(Replace(Mid(x, InStr(1, x, "52-Week Range (undefined)") + 25, 25), vbLf, ""))
Set allElements = doc.getElementsByClassName("fs-x-large fc-primary fw-bolder")
x = allElements(0).innerText
Sheet6.Cells(i + 1, 4).Value = Trim(Replace(Mid(x, InStr(1, x, "$") + 1, 7), vbLf, ""))
err_clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
wb.Quit
Next i
Call ToggleEvents(True)
End Sub
Sub ToggleEvents(blnState As Boolean)
Application.DisplayAlerts = blnState
Application.EnableEvents = blnState
If blnState Then Application.CutCopyMode = False
If blnState Then Application.StatusBar = False
End Sub
The page dynamically updates content as you scroll down. You likely need to scroll that part of the page into view then use grab all the elements with classname statictics-item then take the n-2 index e.g. Without the scrolling part:
Set elems = ie.document.getElementsByClassName("statictics-item")
If elems.length > 1 Then Debug.print elems(elems.length-2).innerText
For future readers (I know OP doesn't want this):
I would avoid the whole scrolling pickle, dynamic html and browser and issue an xmlhttp request and regex out the appropriate values from the javscript objects the web page uses for updating. N.B. I would probably add in validation on regex match positions.
Public Sub test()
Dim r As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.gurufocus.com/stock/gliba/summary", False
.send
r = GetMatches(.responseText, "price52wlow:(.*?),|price52whigh:(.*?),")
If r <> "NA" Then MsgBox r
End With
End Sub
Public Function GetMatches(ByVal inputString As String, ByVal sPattern As String) As String
Dim matches As Object
With CreateObject("vbscript.regexp")
.Global = True
.MultiLine = True
.IgnoreCase = True
.Pattern = sPattern
'If .test(inputString) Then
Set matches = .Execute(inputString)
If matches.Count = 2 Then
GetMatches = matches.Item(0).submatches(0) & "-" & matches.Item(1).submatches(1)
Else
GetMatches = "NA"
End If
End With
End Function
Regex:
I'm trying to get all the downloadable Excel file links from the website, but having difficulty. Please help to guide me. Thanks.
Sub TYEX()
Dim internet As Object
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Set internet = CreateObject("InternetExplorer.Application")
internet.Visible = True
URL = "https://www.jpx.co.jp/markets/public/short-selling/index.html"
internet.Navigate URL
Do Until internet.ReadyState >= 4
DoEvents
Loop
Application.Wait Now + TimeSerial(0, 0, 5)
Set internetdata = internet.Document
Set div_result = internetdata.getElementById("readArea")
Set header_links = div_result.getElementsByTagName("td")
For Each h In header_links
Set link = h.ChildNodes.item(0)
Cells(Range("A" & Rows.Count).End(xlUp).Row + 1, 1) = link.href
Next
MsgBox "done"
End Sub
You had the idea down correctly, but here's a different approach:
Sub TYEX()
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.navigate "https://www.jpx.co.jp/markets/public/short-selling/index.html"
.Visible = True
Do While .Busy Or .readyState < 4
DoEvents
Loop
Dim doc As Object, tbl As Object
Set doc = .document
Set tbl = doc.getElementsByClassName("component-normal-table")(0).Children(0)
Dim r As Long, xlsArr(), a As Object
With tbl.Rows
ReDim xlsArr(1 To .Length - 1)
For r = 1 To .Length - 1 ' 0 is the table header
xlsArr(r) = .Item(r).Children(1).innerHTML
Next r
End With
With CreateObject("VBScript.RegExp")
.Pattern = "<a href=""(\/markets.*?\.xls)"
For r = 1 To UBound(xlsArr)
xlsArr(r) = "https://www.jpx.co.jp" & .Execute(xlsArr(r))(0).SubMatches(0)
Debug.Print xlsArr(r)
Next
End With
End With
'Add to sheet
Dim ws As Worksheet, rng As Range
Set ws = ThisWorkbook.Worksheets(1)
With ws
Set rng = .Range(.Cells(NextRow(ws), 1), .Cells(NextRow(ws) + UBound( _
xlsArr) - 1, 1))
rng.Value = Application.Transpose(xlsArr)
End With
End Sub
Public Function NextRow(ByVal ws As Worksheet, Optional ByVal col As Variant = 1) As Long
With ws
NextRow = .Cells(.Rows.Count, col).End(xlUp).Row + 1
End With
End Function
Breaking Down the Code
This will loop your html table rows. We start at 1, because 0 is actually just the table header.
With tbl.Rows
ReDim xlsArr(1 To .Length - 1)
For r = 1 To .Length - 1 ' 0 is the table header
xlsArr(r) = .Item(r).Children(1).innerHTML
Next r
End With
This uses regular expressions to extract the url from the innerHTML property. You can see how this particular regex works here: Regex101
With CreateObject("VBScript.RegExp")
.Pattern = "<a href=""(\/markets.*?\.xls)"
For r = 1 To UBound(xlsArr)
xlsArr(r) = "https://www.jpx.co.jp" & .Execute(xlsArr(r))(0).SubMatches(0)
Debug.Print xlsArr(r)
Next
End With
You will size your range to be the same size as your array that contains the links, then write the array to the worksheet. This is usually much faster than writing cells one-by-one.
'Add to sheet
Dim ws As Worksheet, rng As Range
Set ws = ThisWorkbook.Worksheets(1)
With ws
Set rng = .Range(.Cells(NextRow(ws), 1), .Cells(NextRow(ws) + UBound( _
xlsArr) - 1, 1))
rng.Value = Application.Transpose(xlsArr)
End With
You can use an attribute = value CSS selector with $ operator to say the href value must end with .xls. Then use querySelectorAll to retrieve all matched results. Using CSS selectors is a very fast and generally robust method.
Dim list As Object
Set list = ie.document.querySelectorAll("[href$='.xls']")
It is much faster to use XMLHTTP as well, rather than opening IE. Note that you can then pass these links to a function to perform a binary download or to URLMon for downloading.
Option Explicit
Public Sub Links()
Dim sResponse As String, html As HTMLDocument, list As Object, i As Long
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.jpx.co.jp/markets/public/short-selling/index.html", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
Set html = New HTMLDocument
With html
.body.innerHTML = sResponse
Set list = html.querySelectorAll("[href$='.xls']")
End With
For i = 0 To list.Length - 1
Debug.Print Replace$(list.item(i), "about:", "https://www.jpx.co.jp")
Next
End Sub
Example download function (though you could re-use your existing XMLHTTP object - this is just to illustrate):
Public Function DownloadFile(ByVal downloadFolder As String, ByVal downloadURL As String) As String
Dim http As Object , tempArr As Variant
Set http = CreateObject("WinHttp.WinHttpRequest.5.1")
http.Open "GET", downloadURL, False
http.send
On Error GoTo errhand
With CreateObject("ADODB.Stream")
.Open
.Type = 1
.write http.responseBody
tempArr = Split(downloadURL, "/")
tempArr = tempArr(UBound(tempArr))
.SaveToFile downloadFolder & tempArr, 2 '< "/" on enter of downloadFolder. 2 for overwrite which is Ok if no file modifications.
.Close
End With
DownloadFile = downloadFolder & tempArr
Exit Function
errhand:
If Err.Number <> 0 Then
Debug.Print Err.Number, Err.Description
MsgBox "Download failed"
End If
DownloadFile = vbNullString
End Function
References (VBE > Tools > References):
Microsoft HTML Object Library
I have a list of numbers (PDGA Numbers) in MS Excel. I would like to automatically search the PDGA website (https://www.pdga.com/players/) from the list and automatically paste the player's location next to the corresponding PDGA Number. Currently, I am able to search the number and paste the location individually, but not the entire list.
First I select an excel cell and 'Define Name' as PDGA, and another as Location.
https://imgur.com/AcGtuX8
Then I basically followed this YouTube video. https://www.youtube.com/watch?v=7sZRcaaAVbg
And ultimately got this VBA code to work. (Make sure the proper VBA References are checked)
https://imgur.com/a/OYSM7Am
Private Sub Worksheet_Change(ByVal Target As Range)
If Target.Column = Range("PDGA").Column Then
Dim IE As New InternetExplorer
IE.Visible = False
IE.navigate "https://www.pdga.com/player/" & Range("PDGA").Value
Do
DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
Dim Doc As HTMLDocument
Set Doc = IE.document
Dim sDD As String
sDD = Trim(Doc.getElementsByTagName("li")(127).outerText)
Range("Location").Value = sDD
End If
End Sub
I think I need some For Each loop, but I'm not sure. It should look like this when completed.
https://imgur.com/a/qOiW4JJ
Thanks in advance for any help.
If you have a specific list of players then you loop and issue XHR requests to get the info. Here I have the PDGA# in an array which is looped:
playerPDGA = Array(1, 5, 23, 46, 789, 567)
Code:
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, html As New HTMLDocument, playerPDGA(), results(), i As Long
playerPDGA = Array(1, 5, 23, 46, 789, 567)
ReDim results(0 To UBound(playerPDGA), 0 To 1)
With CreateObject("MSXML2.XMLHTTP")
For i = LBound(playerPDGA) To UBound(playerPDGA)
.Open "GET", "https://www.pdga.com/player/" & playerPDGA(i), False
.send
sResponse = StrConv(.responseBody, vbUnicode)
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
With html
.body.innerHTML = sResponse
results(i, 0) = .querySelector(".pane-content > h1").innerText
results(i, 1) = .querySelector(".location").innerText
End With
Next i
End With
ActiveSheet.Cells(1, 1).Resize(UBound(results, 1) + 1, UBound(results, 2) + 1) = results
End Sub
For any page listing players:
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, html As New HTMLDocument
Application.ScreenUpdating = False
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.pdga.com/players/", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Dim nameList As Object, cityList As Object, stateList As Object, countryList As Object, r As Long
With html
.body.innerHTML = sResponse
Set nameList = .querySelectorAll(".views-field.views-field-nothing")
Set cityList = .querySelectorAll(".views-field.views-field-City.city")
Set stateList = .querySelectorAll(".views-field.views-field-StateProv.state")
Set countryList = .querySelectorAll(".views-field.views-field-Country.country")
End With
With ActiveSheet
Dim i As Long
For i = 0 To nameList.Length - 1
r = r + 1
.Cells(r, 1) = nameList.item(i).innerText
.Cells(r, 2) = Trim$(cityList.item(i).innerText & Chr$(32) & stateList.item(i).innerText & Chr$(32) & countryList.item(i).innerText)
Next i
End With
Application.ScreenUpdating = True
End Sub
Reference:
HTML Object library
You can achieve your desired output in several ways. Here is one of such.
Sub FetchData()
Dim Http As New XMLHTTP60, Html As New HTMLDocument
Dim post As HTMLDivElement, Idic As New Scripting.Dictionary
Dim key As Variant, N$, CT$, S$, C$, R&
With Http
.Open "GET", "https://www.pdga.com/players/", False
.send
Html.body.innerHTML = .responseText
End With
For Each post In Html.querySelector("table.views-table tbody").getElementsByTagName("tr")
N = post.querySelector("a[title]").innerText
CT = post.querySelector(".city").innerText
S = post.querySelector(".state").innerText
C = post.querySelector(".country").innerText
Idic(N & "|" & CT & " " & S & " " & C) = 1
Next post
For Each key In Idic.Keys
R = R + 1: Cells(R, 1) = Split(key, "|")(0)
Cells(R, 2) = Split(key, "|")(1)
Next key
End Sub
Reference to add to the library:
Microsoft XML, v6.0
Microsoft HTML Object Library
Microsoft Scripting Runtime
Sub test()
Dim IE As New InternetExplorer
Dim Doc As HTMLDocument
Dim lastRow As Long, i As Long
Dim sDD As String
IE.Visible = False
lastRow = Range("A" & Rows.Count).End(xlUp).Row
For i = 1 To lastRow
IE.navigate "https://www.pdga.com/player/" & Range("PDGA").Cells(i).Value
Do
DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
Set Doc = IE.document
sDD = Trim(Doc.getElementsByTagName("li")(127).outerText)
Range("Location").Cells(i) = sDD
Next
Set IE = Nothing
Set Doc = Nothing
End Sub
I created a macro, in which I can fetch each URL from any webpage.
Now, I have each URL in column.
How can I check if a URL is working.
If any one of these URL is not working then it should show me error not working next to URL in next column.
Below is the code I wrote:
Sub CommandButton1_Click()
Dim ie As Object
Dim html As Object
Dim j As Integer
j = 1
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
url = "www.mini.co.uk"
ie.navigate url
Do While ie.READYSTATE <> READYSTATE_COMPLETE
Application.StatusBar = "Trying to go to website ..."
Loop
Application.StatusBar = " "
Set html = ie.document
'Dim htmltext As Collection
Dim htmlElements As Object
Dim htmlElement As Object
Set htmlElements = html.getElementsByTagName("*")
For Each htmlElement In htmlElements
'If htmlElement.getAttribute("href") <> "" Then Debug.Print htmlElement.getAttribute("href")
If htmlElement.getAttribute("href") <> "" Then Cells(j, 1).Value = htmlElement.getAttribute("href")
j = j + 1
Next
ActiveSheet.Range("$A$1:$A$2752").removeDuplicates Columns:=1, Header:=xlNo
End Sub
This code is to fetch the URL from web-page.
Below is the code to check the status of URL, if it is working or not.
Sub CommandButton2_Click()
Dim k As Integer
Dim j As Integer
k = 1
j = 1
'Dim Value As Object
'Dim urls As Object
'urls.Value = Cells(j, 1)
For Each url In Cells(j, 1)
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False
url = Cells(j, 1)
ie.navigate url
Do While ie.READYSTATE <> READYSTATE_COMPLETE
Application.StatusBar = "checking the Data. Please wait..."
Loop
Cells(k, 2).Value = "OK"
'Set html = ie.document
ie.Quit
j = j + 1
k = k + 1
Next
End Sub
Public Function IsURLGood(url As String) As Boolean
Dim request As New WinHttpRequest
On Error GoTo IsURLGoodError
request.Open "HEAD", url
request.Send
If request.Status = 200 Then
IsURLGood = True
Else
IsURLGood = False
End If
Exit Function
IsURLGoodError:
IsURLGood = False
End Function
Sub testLink()
Dim source As Range, req As Object, url$
Set source = Range("A2:B2")
source.Columns(2).Clear
For i = 1 To source.Rows.Count
url = source.Cells(i, 1)
If IsURLGood(url) Then
source.Cells(i, 2) = "OK"
Else
source.Cells(i, 2) = "Down"
End If
Next
MsgBox "Done"
End Sub
Since you are interested to know whether the link is working, xmlhttp may be one solution.
Set sh = ThisWorkBook.Sheets("Sheet1")
Dim column_number: column_number = 2
'Row starts from 2
For i=2 To 100
strURL = sh.cells(i,column_number)
sh.cells(i, column_number+1) = CallHTTPRequest(strURL)
Next
Function CallHTTPRequest(strURL)
Set objXMLHTTP = CreateObject("MSXML2.XMLHTTP")
objXMLHTTP.Open "GET", strURL, False
objXMLHTTP.send
status = objXMLHTTP.Status
'strContent = ""
'If objXMLHTTP.Status = 200 Then
' strContent = objXMLHTTP.responseText
'Else
' MsgBox "HTTP Request unsuccessfull!", vbCritical, "HTTP REQUEST"
' Exit Function
'End If
Set objXMLHTTP = Nothing
CallHTTPRequest = status
End Function
You can actually get the status codes using IE automation, but it requires working with events and a reference to the Microsoft Internet Controls library.
Private Declare PtrSafe Sub SleepEx Lib "Kernel32.dll" (ByVal dwMilliseconds As Long, Optional ByVal bAlertable As Boolean = True)
Private WithEvents ie As SHDocVw.InternetExplorer
Private LastStatusCode As Long
Private Sub ie_NavigateError(ByVal pDisp As Object, URL As Variant, TargetFrameName As Variant, StatusCode As Variant, Cancel As Boolean)
LastStatusCode = StatusCode
End Sub
Public Sub NavigateReturnStatus(url As String) As Long
Set ie = CreateObject("InternetExplorer.Application")
Status = 0
ie.Navigate url
Do While IEObject.ReadyState <> READYSTATE_COMPLETE Or IEObject.Busy
SleepEx 50 'No busy waiting, short wait time
DoEvents 'Need to receive events from IE application
Loop
NavigateReturnStatus = LastStatusCode
End Sub
This doesn't return a conventional HTTP status code, but instead returns a NavigateError status code. That means you can get more detailed information about errors, but no information about successful navigation. Of course, if it's 0, no error has occurred so the status is likely 200.
Speed is very much slower than a WinHTTP/MSXML approach, but I'm sharing this mainly for cases where someone's already navigating using Internet Explorer anyway.
Of course, the code can (and likely should) be modified to reuse the internet explorer application.
Sub URLWorkingorNot()
'Make Sure to Select Cells containing URL
Dim i As Long
AddReference
i= 1
Selection.Replace "#N/A", "NA": Selection.Offset(0, 1).EntireColumn.Insert
Dim IE As InternetExplorer
If ActiveWorkbook Is Nothing Then Exit Sub
For Each cell In Selection
If cell.Value <> "" Then
Set IE = New InternetExplorer
IE.Navigate2 cell.Value
IE.Left = 900
IE.Width = 900
IE.Visible = True
While IE.Busy: DoEvents: Wend
On Error Resume Next
If InStr(1, IE.document.body.innerText, "The webpage cannot be found", vbBinaryCompare) <> 0 Then cell.Offset(0, 1).Value = "Not Available"
'MsgBox IE.document.body.innerText
If err.Number <> 0 Then err.Clear: On Error GoTo 0
IE.Quit: Set IE = Nothing
End If
i = i + 1:
ProgressBar Selection.Count, i, "Working on " & i & " Cell": DoEvents
If ActiveWorkbook.Path <> "" And Left(i, 3) = "00" Then ActiveWorkbook.Save
Next cell
Unload UProgressBar
Application.StatusBar = ""
End Sub
Sub AddReference()
'cOPIED FROM iNTERNET
'Macro purpose: To add a reference to the project using the GUID for the
'reference library
Dim strGUID As String, theRef As Variant, i As Long
'Update the GUID you need below.
'strGUID = "{00020905-0000-0000-C000-000000000046}"
strGUID = "{EAB22AC0-30C1-11CF-A7EB-0000C05BAE0B}"
'iNTERNET cONTROLS - "{EAB22AC0-30C1-11CF-A7EB-0000C05BAE0B}" MAJOR 1 MINOR 1
'HTMLOBJECT "{3050F1C5-98B5-11CF-BB82-00AA00BDCE0B}" MAJOR 4 MINOR 0
'Set to continue in case of error
On Error Resume Next
'Remove any missing references
For i = ThisWorkbook.Vbproject.References.Count To 1 Step -1
Set theRef = ThisWorkbook.Vbproject.References.Item(i)
If theRef.isbroken = True Then
ThisWorkbook.Vbproject.References.Remove theRef
End If
Next i
'Clear any errors so that error trapping for GUID additions can be evaluated
err.Clear
'Add the reference
ThisWorkbook.Vbproject.References.AddFromGuid _
GUID:=strGUID, Major:=1, Minor:=0
ThisWorkbook.Vbproject.References.AddFromFile "C:\Windows\System32\UIAutomationCore.dll"
'If an error was encountered, inform the user
Select Case err.Number
Case Is = 32813
'Reference already in use. No action necessary
Case Is = vbNullString
'Reference added without issue
Case Else
'An unknown error was encountered, so alert the user
MsgBox "A problem was encountered trying to" & vbNewLine _
& "add or remove a reference in this file" & vbNewLine & "Please check the " _
& "references in your VBA project!", vbCritical + vbOKOnly, "Error!"
End Select
On Error GoTo 0
End Sub