VBA to fetch html URL from webpage - html

I have created Macro which can read all the HTML of provided URL, however I want to fetch all the url from that HTML.
Sub GetHTML_Click()
Dim ie As InternetExplorer
Dim html As HTMLDocument
Dim j As Integer
Set ie = New InternetExplorer
ie.Visible = True
url = Cells(1, 2)
ie.navigate url
Do While ie.READYSTATE <> READYSTATE_COMPLETE
Application.StatusBar = "Trying to go to website ..."
Loop
Application.StatusBar = " "
Set html = ie.document
'Dim htmltext As Collection
Dim htmltext As String
htmltext = html.DocumentElement.innerHTML
'Need to add iURL
Dim htmlurl As IHTMLElement
For Each htmlurl In hmtltext
iurl = htmlurl.toString
Cells(j, 1).Value = CLng(iurl)
j = j + 1
Next
End Sub
I tried to code this to fetch the URLs however its giving "Object Required error"
can anyone please help to modify this macro which will help me to fetch all the URL from HTML page.
I am using www.mini.in website for testing.
Mayur.

Try this :
Dim ie As Object
Dim html As Object
Dim j As Integer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
URL = "google.com"
ie.Navigate URL
Do While ie.ReadyState <> READYSTATE_COMPLETE
Application.StatusBar = "Trying to go to website ..."
Loop
Application.StatusBar = " "
Set html = ie.Document
'Dim htmltext As Collection
Dim htmlElements As Object
Dim htmlElement As Object
Set htmlElements = html.getElementsByTagName("*")
For Each htmlElement In htmlElements
If htmlElement.getAttribute("href") <> "" Then Debug.Print htmlElement.getAttribute("href")
Next

Related

Entering text into webpage searchbox

I am trying to enter text into a searchbox and am running into different errors. Below is my code, can anyone point out to me where i have gone wrong?
Sub GetHTMLDocument()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
Dim HTMLInput As MSHTML.IHTMLElement
IE.Visible = True
IE.navigate "http://shopee.sg"
Do While IE.readyState <> READYSTATE_COMPLETE
Loop
Set HTMLDoc = IE.document
Set HTMLInput = HTMLDoc.getElementsByClassName("shopee-searchbar-input__input")
HTMLInput.Value = "Excel VBA"
End Sub
I suggest try to make a test with the code sample below may help to solve the issue.
Sub demo()
Dim URL As String
Dim IE As Object
Dim element As HTMLInputElement
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
URL = "YOUR WEB PAGE ADDRESS HERE" 'Add your site address here....
IE.navigate URL
Do While IE.Busy
Application.Wait DateAdd("s", 1, Now)
Loop
Application.Wait DateAdd("s", 2, Now)
Set element = IE.document.querySelector("[class='shopee-searchbar-input__input']")
element.removeAttribute ("aria-label")
element.removeAttribute ("placeholder")
element.Value = "abc"
element.FireEvent ("OnChange")
Set IE = Nothing
End Sub
Output:
Further, you can modify the code example as your requirements.

Grabbing a single piece of data from a website's HTML and assign it to a variable

I'm working on a project where I grab data that I stored in an excel sheet and search a specific website that can be seen in the code below. Once the website completes the search, I want to grab the "worth" from the top right of the page. I'm fairly new to using VBA with HTML, so I'm not sure how to take the element (worth) that I'm looking for from the web page, and assign it to a variable in VBA so I can paste it into my excel sheet.
Right now I'm able to open IE, insert my data into the search bar of the specific website that I'm using, and click search. What I have is seen below. Thank you in advance!
Sub BrowsetoSite()
Dim IE As New SHDocVw.InternetExplorer
Dim website As String
Dim i As Integer
i = 2
'Set ie = New SHDocVw.InternetExplorer
website = "https://cardmavin.com/category/football"
IE.navigate website
IE.Visible = False
Do While IE.readyState <> READYSTATE_COMPLETE
'assign info to variable to enter into the search bar
Loop
Dim idoc As MSHTML.HTMLDocument
Set idoc = IE.document
Dim Brand As String
Dim Year As String
Dim Num As String
Dim Name As String
Dim search As String
Dim value As Variant
Brand = Range("A" & i).value
Year = Range("B" & i).value
Num = Range("D" & i).value
Name = Range("E" & i).value
search = (Year & " " & Brand & " " & Name & " " & Num)
i=i+1
idoc.getElementById("search-field").value = search
idoc.getElementById("to-mavin").Click
While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
Dim value As Variant
value = idoc.getElementsByTagName("h4")(0).innerText
MsgBox value
IE.Quit
End Sub
The issue that I'm having is the value = idoc.getElementsByTagName("h4")(0).innerText. I've tried to get the element a few different ways, but have been unsuccessful so far.
You need Set idoc = IE.document after you've submitted the search, to get a reference to that new page. Otherwise you're still trying to access the previous page.
i=i+1
idoc.getElementById("search-field").value = search
idoc.getElementById("to-mavin").Click
While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
Set idoc = IE.document '<<<<<<<<<<<<<<
Dim value As Variant
value = idoc.getElementById("worthBox") _
.getElementsByTagName("h4")(0).innerText
MsgBox value
Try this approach. Suppose in cell A1 the string 2008 Topps Thomas DeCoud
Sub Test()
Const sURL As String = "https://mavin.io/search?q="
Dim json As Object
Set json = GetJSONFromHTMLHead(sURL & Application.WorksheetFunction.EncodeURL(Range("A1").Value))
Debug.Print json("offers")("priceCurrency")
Debug.Print json("offers")("price")
End Sub
Function GetJSONFromHTMLHead(ByVal sURL As String) As Object
Dim http As MSXML2.XMLHTTP60, html As MSHTML.HTMLDocument, re As Object, json As Object
Set http = New MSXML2.XMLHTTP60
Set html = New MSHTML.HTMLDocument
Set re = CreateObject("VBScript.RegExp")
re.Pattern = "<head>([\s\S]+)<\/head>"
With http
.OPEN "Get", sURL, False
.send
html.body.innerHTML = Replace$(Replace$(re.Execute(.responseText)(0), "<head>", "<body>"), "</head>", "</body>")
End With
Set json = JSONConverter.ParseJson(html.querySelector("script[type='application/ld+json']").innerHTML)
Set GetJSONFromHTMLHead = json
End Function

Web Scraping IE NAVIGATE method Works Vs MSXML2.XMLHTTP60 not Working

I am pulling data from NSE site,
the URL is:https://www1.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=VOLTAS&instrument=FUTSTK&type=-&strike=-&expiry=28MAY2020#
I am successfully extract the item using Internet explorer,How ever this method is slow,
so i moved to MSXML2.XMLHTTP60 method,but this method returns null string
please find my codes
Method 1:Works fine
Sub OI_Slow_Method()
Dim ie As New InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
Dim Link As String
Link = ActiveSheet.Range("C4").Value
ie.Visible = False
ie.navigate Link
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim doc As HTMLDocument
Set doc = ie.document
Dim objElement As HTMLObjectElement
Dim sDD As String
doc.Focus
ActiveSheet.Cells(1, 1).Value = doc.getElementById("openInterest").innerText 'Open Interest Value
ie.Quit
ie.Visible = True
Set doc = Nothing
Set ie = Nothing
End Sub
'--------------------------
Method 2:Help required in this method only
Sub OI_Fast_Method()
Dim xhr As MSXML2.XMLHTTP60, html As MSHTML.HTMLDocument
Set xhr = New MSXML2.XMLHTTP60
Set html = New MSHTML.HTMLDocument
With xhr
.Open "GET", "https://www1.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=VOLTAS&instrument=FUTSTK&type=-&strike=-&expiry=30APR2020#", False
.send
html.body.innerHTML = StrConv(.responseBody, vbUnicode)
End With
Debug.Print html.getElementById("openInterest").Innertext
'The output of this is "<SPAN id=openInterest>??</SPAN>" only question mark returned inside the SPAN
End Sub
I think Tim hit the nail on the head, as always. You are getting some raw XML and the stuff you want is not in that XML. You can do a data dump and get what you want.
Sub DumpData()
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
URL = "https://www1.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=VOLTAS&instrument=FUTSTK&type=-&strike=-&expiry=28MAY2020#"
'Wait for site to fully load
ie.Navigate2 URL
Do While ie.Busy = True
DoEvents
Loop
RowCount = 1
With Sheets("Sheet1")
.Cells.ClearContents
RowCount = 1
For Each itm In ie.Document.all
.Range("B" & RowCount) = Left(itm.innerText, 1024)
RowCount = RowCount + 1
Next itm
End With
End Sub
Then you would have to parse the text. It's not hard, but it will be a little extra labor.
Another option may be to download the entire contents of the website, save it as a text file, import the data, and then parse that data.
Sub Sample()
Dim ie As Object
Dim retStr As String
Set ie = CreateObject("internetexplorer.application")
With ie
.Navigate "https://www1.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=VOLTAS&instrument=FUTSTK&type=-&strike=-&expiry=28MAY2020#"
.Visible = True
End With
Do While ie.readystate <> 4: Wait 5: Loop
DoEvents
retStr = ie.document.body.innerText
'~> Write the above to a text file
Dim filesize As Integer
Dim FlName As String
'~~> Change this to the relevant path
FlName = "C:\Users\ryans\OneDrive\Desktop\Sample.Txt"
filesize = FreeFile()
Open FlName For Output As #filesize
Print #filesize, retStr
Close #filesize
End Sub
Private Sub Wait(ByVal nSec As Long)
nSec = nSec + Timer
While nSec > Timer
DoEvents
Wend
End Sub
I couldn't get either of your code samples to run on my machine.

Excel VBA code to click web button

Need help how to create excel vba code for this
I'll be needing the codes so I can complete my macro.
Thanks in advance
First, you will need to create a reference to:
Microsoft Internet Controls
Microsoft HTML Object Library
In VBE, click Tools > References
Sub clickLink()
Dim ie As New InternetExplorer, Url$, doc As HTMLDocument
Url = "http://UrlToYourLink.com"
With ie
.navigate Url
Do While .Busy Or .readyState < READYSTATE_COMPLETE
DoEvents
Loop
doc = .document
.Visible = True
End With
Dim myBtn As Object
Set myBtn = doc.getElementsByClassName("button rounded")(0)
myBtn.Click
End Sub
The Internet control is used to browse the webpage and the HTML Objects are used to identify the username and password textboxes and submit the text using the control button.
Dim HTMLDoc As HTMLDocument
Dim oBrowser As InternetExplorer
Sub Login_2_Website()
Dim oHTML_Element As IHTMLElement
Dim sURL As String
On Error GoTo Err_Clear
sURL = "https://www.google.com/accounts/Login"
Set oBrowser = New InternetExplorer
oBrowser.Silent = True
oBrowser.timeout = 60
oBrowser.navigate sURL
oBrowser.Visible = True
Do
' Wait till the Browser is loaded
Loop Until oBrowser.readyState = READYSTATE_COMPLETE
Set HTMLDoc = oBrowser.Document
HTMLDoc.all.Email.Value = "sample#vbadud.com"
HTMLDoc.all.passwd.Value = "*****"
For Each oHTML_Element In HTMLDoc.getElementsByTagName("input")
If oHTML_Element.Type = "submit" Then oHTML_Element.Click: Exit For
Next
' oBrowser.Refresh ' Refresh If Needed
Err_Clear:
If Err <> 0 Then
Debug.Assert Err = 0
Err.Clear
Resume Next
End If
End Sub
The program requires references to the following:
1 Microsoft Internet Controls
2. Microsoft HTML Object Library
Microsoft internet controls are a great way to do this, but if you aren't allowed to add new references, here is another way to go about web scraping.
This methode ain't as 'clean' as Microsoft internet controls and HTML object but it gets the job done.
Sub GoogleSearch()
Dim ie As Object
Dim objSearchBnt As Object
Dim objCollection As Object
Dim i As Integer
'initialize counter
i = 0
'Create InternetExplorer Object
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
'navigate to the url
ie.navigate "Www.google.com"
'Statusbar shows in the buttom corner of excel
Application.StatusBar = "Loading, please wait..."
'Wait until page is ready
Do While ie.busy
Application.Wait DateAdd("s", 1, Now)
Loop
'Store all the elements with input tag
Set objCollection = ie.Document.getElementsByTagName("input")
'Go through all input elements
While i < objCollection.Length
'input search field
If objCollection(i).Name = "q" Then
objCollection(i).Value = "Hello World"
End If
'search button
If objCollection(i).Type = "submit" Then
Set objSearchBnt = objCollection(i)
End If
i = i + 1
Wend
objSearchBnt.Click
'Clean up
Set objSearchBnt = Nothing
Set objCollection = Nothing
Set ie = Nothing
'Give excel control over the status bar agian
Application.StatusBar = ""
End Sub

How can I get the "tr id" from HTML code?

I want to use VBA for taking the URL adress of diferent links from a web page, but without success. Has anyone any idea why my code don't work?
My code is below:
Sub Test()
Dim URL As String
Dim IE As New InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim dictObj As Object: Set dictObj = CreateObject("Scripting.Dictionary")
Dim tRowID As String
URL = "http://www.flashscore.com/soccer/england/premier-league/";
With IE
.Navigate URL
.Visible = True Do Until
.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop Set HTMLdoc = .Document
End With
With HTMLdoc
Set tblSet = .getElementById("fs-results")
Set mTbl = tblSet.getElementsByTagName("tbody")(1)
Set tRows = mTbl.getElementsByTagName("tr")
With dictObj
For Each tRow In tRows
tRowID = Mid(tRow.getAttribute("id"), 5)
If Not .Exists(tRowID) Then .Add tRowID, Empty
End If
Next tRow
End With
End With
For Each Key In dictObj
Debug.Print Key
Next Key
Set IE = Nothing
End Sub