my question is relatively simple and maddeningly evasive. It's not unlike many questions I've found on the internet and at StackOverflow yet no suggestions have helped my little conundrum....
Using Excel2010 I wasnt to enter data into a single field, submit it (Part1) and capture a few lines of data (part2) , paste into excel in a list/table format (part30 - and do it 999,999 times.....Part 1 and 3 are working - Part2 refuses to acknowledge the new internet window and all gettagnames and SelectTable workarounds just use the original URL - the attached was a desperate attempt using Sendkeys - which worse perfectly! - for the first loop - then absolutely nothing!
anyway, the code should be fairly simple - appologies for some mess in the coding order - it's down to me starting to cut out bits with a scalple but after hours of messing about resorted to hatchets...
Dim HTMLdoc As HTMLDocument
Dim ie As InternetExplorer
Sub EPF_FSA()
'Application.DisplayAlerts = False
Application.EnableEvents = False
Dim iHTML_Element As IHTMLElement
Dim sURL As String
Dim miss1 As Integer
Dim FrmNo As Long
Dim FrmName As String
Dim Address1 As String
Dim Address2 As String
Dim Address3 As String
Dim Address4 As String
Dim Address5 As String
Dim Address6 As String
Dim Address7 As String
Dim Address8 As String
Dim AnyLuck As String
Dim RowNum As Integer
Dim ColNum As Integer
RowNum = 1
ColNum = 1
FrmNo = 100111
While FrmNo <= 100112
'Do While FrmNo <= 100112
On Error GoTo Err_Clear
sURL = "http://www.fsa.gov.uk/register/epfSearchForm.do"
Set ie = CreateObject("internetexplorer.application")
'Set Ex = CreateObject("MicrosoftExcel.application")
ie.navigate sURL
ie.Visible = True
Do
' Wait till the Browser is loaded
Loop Until ie.readyState = READYSTATE_COMPLETE
Set HTMLdoc = ie.document
HTMLdoc.all.epfref.Value = FrmNo
For Each iHTML_Element In HTMLdoc.getElementsByTagName("input")
If iHTML_Element.Type = "submit" Then miss1 = miss1 + 1
If miss1 = 2 Then iHTML_Element.Click: Exit For
Next
Err_Clear:
If Err <> 0 Then Err.Clear
Resume Next
'PART 2 ********************************************************************
Do
' Wait till the Browser is loaded
Loop Until ie.readyState = READYSTATE_COMPLETE
Call SendKeys("^a")
DoEvents
Call SendKeys("^c")
DoEvents
ActiveSheet.PasteSpecial Format:="Text", link:=False, DisplayAsIcon:=False
range("A2").Select
'Copy and select relevant text to sheet 2
Worksheets("Sheet1").Activate
FrmName = Cells(39, "A").Value
Address1 = Cells(59, "A").Value
Address2 = Cells(60, "A").Value
Address3 = Cells(61, "A").Value
Address4 = Cells(62, "A").Value
Address5 = Cells(63, "A").Value
Address6 = Cells(64, "A").Value
Address7 = Cells(65, "A").Value
Address8 = Cells(66, "A").Value
AnyLuck = Cells(47, "A").Value
Worksheets("Sheet2").Activate
Cells(RowNum, "A").Value = FrmNo
Cells(RowNum, "B").Value = FrmName
Cells(RowNum, "C").Value = Address1
Cells(RowNum, "D").Value = Address2
Cells(RowNum, "E").Value = Address3
Cells(RowNum, "F").Value = Address4
Cells(RowNum, "G").Value = Address5
Cells(RowNum, "H").Value = Address6
Cells(RowNum, "I").Value = Address7
Cells(RowNum, "J").Value = Address8
Cells(RowNum, "K").Value = AnyLuck
RowNum = RowNum + 1
'ActiveCell.Offset(1, 0).Select
Worksheets("Sheet1").Activate
Cells.Select
Selection.Delete Shift:=xlUp
range("A2").Select
'MsgBox (FrmNo & Chr(10) & FrmName)
'Part 3
FrmNo = FrmNo + 1
ie.Quit
ie.Quit
Wend
'Loop
Application.EnableEvents = True
End Sub
Looks like you can go directly to the results page. Try:
sUrl = "http://www.fsa.gov.uk/register/epfRefSearch.do?epfRef="
sUrl = sUrl & frmNo
and then just navigate to that page. The actual details are then in a div with an ID of "box"
Related
The below Excel vba code use to work and return the market cap, 52 week low and current price into my spreadsheet. It no longer does though and cannot work out why. Class names haven't changed but the getElementsByClassName doesn't seem to return anything anymore I think.
Sub get_title_header()
Dim wb As Object
Dim doc As Object
Dim sURL As String
Dim i As Integer
Dim allElements As IHTMLElementCollection
Application.DisplayAlerts = False
Set wb = CreateObject("internetExplorer.Application")
sURL = "https://www.google.com/search?q=aapl+stock+quote"
wb.navigate sURL
wb.Visible = False
While wb.Busy
Application.Wait Now + #12:00:01 AM#
DoEvents
Wend
Set doc = wb.document.body
On Error GoTo err_clear
Set allElements = doc.getElementsByClassName("iyjjgb")
x = allElements(3).innerText
Sheet6.Cells(i + 1, 2).Value = x
x = allElements(8).innerText
Sheet6.Cells(i + 1, 3).Value = x
x = ""
x = allElements(0).innerText
Sheet6.Cells(i + 1, 4).Value = x
x = ""
err_clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
wb.Quit
End Sub
I have an MSHTML.HTMLDocument code that:
Opens the page "https://www.ksestocks.com/HistoryHighLow"
Fills an input i.e 786
Then click on a button to fetch a table
There I catch a row and its 4 children using the following code
Sub KSE_GetHTMLDocument()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDOC As MSHTML.HTMLDocument
Dim HTMLInput As MSHTML.IHTMLElement
Dim HTMLClasses As MSHTML.IHTMLElementCollection
Dim HTMLClass As MSHTML.IHTMLElement
Dim HTMLCel As MSHTML.IHTMLElement
Dim colNum, rowNum, RowN, C As Integer
Dim Cel As Range
IE.Visible = False
IE.Navigate "https://www.ksestocks.com/HistoryHighLow"
Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop
For Each Cel In Sheets("Sheet1").Range("A3:A" & Cells(Rows.Count, 1).End(xlUp).Row)
If IsEmpty(Cel.Value) = False Then
Set HTMLDOC = IE.Document
Set HTMLInput = HTMLDOC.getElementById("selscrip")
HTMLInput.Value = Trim(Cel.Value)
Debug.Print Cel.Value
HTMLDOC.getElementsByTagName("input")(0).Click
While IE.Busy Or IE.readyState < 4: DoEvents: Wend
C = 0
For Each HTMLClass In HTMLDOC.getElementsByTagName("tr")
If InStr(HTMLClass.innerText, "Last 3 years (") > 0 Then
If Left(HTMLClass.innerText, 14) = "Last 3 years (" Then
For Each HTMLCel In HTMLClass.Children
Debug.Print HTMLCel.innerText
If C = 1 Then
Cel.Offset(0, 7).Value = HTMLCel.innerText
ElseIf C = 2 Then
Cel.Offset(0, 8).Value = HTMLCel.innerText
ElseIf C = 3 Then
Cel.Offset(0, 9).Value = HTMLCel.innerText
ElseIf C = 4 Then
Cel.Offset(0, 10).Value = HTMLCel.innerText
End If
C = C + 1
Next
End If
End If
Next
End If
Next
End Sub
The above code is working fine getting values from the website, but when I change the code to shift it to XML it stops working also internet explorer is popping up with no results every time with a new window.
where I am doing it wrong?
Is there more robust way of scraping webpage?
Please check the following code before running
Sub KSE_Get_XML()
Dim XMLp As New MSXML2.XMLHTTP60
Dim HTMLDOC As New MSHTML.HTMLDocument
Dim HTMLInput As MSHTML.IHTMLElement
Dim HTMLClasses As MSHTML.IHTMLElementCollection
Dim HTMLClass As MSHTML.IHTMLElement
Dim HTMLCel As MSHTML.IHTMLElement
Dim colNum, rowNum, RowN, C As Integer
XMLp.Open "GET", "https://www.ksestocks.com/HistoryHighLow", False
XMLp.send
HTMLDOC.body.innerHTML = XMLp.responseText
Dim Cel As Range
' Do While HTMLDOC.ReadyState <> READYSTATE_COMPLETE
' Loop
For Each Cel In Sheets("Sheet1").Range("A3:A" & Cells(Rows.Count, 1).End(xlUp).Row)
If IsEmpty(Cel.Value) = False Then
HTMLDOC.body.innerHTML = XMLp.responseText
Set HTMLInput = HTMLDOC.getElementById("selscrip")
HTMLInput.Value = Trim(Cel.Value)
Debug.Print Cel.Value
HTMLDOC.getElementsByTagName("input")(0).Click
'Application.Wait Now + TimeValue("00:00:01")
'' Do While HTMLDOC.ReadyState <> READYSTATE_COMPLETE
' DoEvents
' Loop
C = 0
For Each HTMLClass In HTMLDOC.getElementsByTagName("tr")
If InStr(HTMLClass.innerText, "Last 3 years (") > 0 Then
If Left(HTMLClass.innerText, 14) = "Last 3 years (" Then
For Each HTMLCel In HTMLClass.Children
Debug.Print HTMLCel.innerText
If C = 1 Then
Cel.Offset(0, 7).Value = HTMLCel.innerText
ElseIf C = 2 Then
Cel.Offset(0, 8).Value = HTMLCel.innerText
ElseIf C = 3 Then
Cel.Offset(0, 9).Value = HTMLCel.innerText
ElseIf C = 4 Then
Cel.Offset(0, 10).Value = HTMLCel.innerText
End If
C = C + 1
Next
End If
End If
Next
End If
Next
End Sub
Get rid of IE altogether and switch to xmlhttp requests, which is robust and less error prone. When you go for xhr, you need to issue a post http requests with appropriate parameters. This is something you can do to get the results right next to Last 3 years (1 Sep 2018 - 1 Sep 2021) from that table.
Public Sub GetContent()
Const Url = "https://www.ksestocks.com/HistoryHighLow"
Dim Http As Object, Html As HTMLDocument, Htmldoc As HTMLDocument
Dim params$, I&, R&, ws As Worksheet, searchKeyword$
Set Html = New HTMLDocument
Set Htmldoc = New HTMLDocument
Set Http = CreateObject("MSXML2.XMLHTTP")
Set ws = ThisWorkbook.Worksheets("Sheet1")
R = 2
searchKeyword = "786" 'you can use different search keywords here to get related results
params = "selscrip=" & searchKeyword
With Http
.Open "POST", Url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36"
.setRequestHeader "content-type", "application/x-www-form-urlencoded; charset=UTF-8"
.send (params)
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll("td.plain")
For I = 0 To .Length - 1
If InStr(.item(I).innerText, "Last 3 years") > 0 Then
Htmldoc.body.innerHTML = "<table>" & .item(I).ParentNode.outerHTML & "</table>"
ws.Cells(R, 1) = Htmldoc.querySelectorAll("td.plain")(1).innerText
ws.Cells(R, 2) = Htmldoc.querySelectorAll("td.plain")(2).innerText
ws.Cells(R, 3) = Htmldoc.querySelectorAll("td.plain")(3).innerText
ws.Cells(R, 4) = Htmldoc.querySelectorAll("td.plain")(4).innerText
End If
Next I
End With
End Sub
Reference to add:
1. Microsoft XML, v6.0
2. Microsoft HTML Object Library
Your search keyword would be what you see in this image.
I am trying to extract the Futures data from MRCI.com and restructure it into one continous table in an excel worksheet so I can manipulate from there.
How can I repeat the Futures Contract in each row to get the following table layout:
Table Structure
Here's my code so far:
Sub MRCIData()
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim FutTable As MSHTML.IHTMLElement
Dim FutRows As MSHTML.IHTMLElementCollection
Dim FutRow As MSHTML.IHTMLElement
Dim FutCells As MSHTML.IHTMLElementCollection
Dim FutCell As MSHTML.IHTMLElement
Dim FutContracts As MSHTML.IHTMLElementCollection
Dim FutContract As MSHTML.IHTMLElement
Dim FutRowText As String
Dim MrciURLHist As String
MrciURLHist = "https://www.mrci.com/ohlc/2020/200320.php"
XMLReq.Open "GET", MrciURLHist, False
XMLReq.send
If XMLReq.Status <> 200 Then
MsgBox "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Exit Sub
End If
HTMLDoc.body.innerHTML = XMLReq.responseText
Set XMLReq = Nothing
Set FutContracts = HTMLDoc.getElementsByClassName("note1")
For Each FutContract In FutContracts
Next
Set FutTable = HTMLDoc.getElementsByClassName("strat")(0)
Set FutRows = FutTable.getElementsByTagName("tr")
For Each FutRow In FutRows
Set FutCells = FutRow.getElementsByTagName("td")
FutRowText = ""
If InStr(FutRow.innerText, "Total Volume") = 0 Then
For Each FutCell In FutCells
FutRowText = FutRowText & vbTab & FutCell.innerText
Next
End If
Debug.Print , FutRowText
Next
End Sub
The following code looks through the table line by line and determines what future to apply to the next rows until it finds a the next one, and so on. the output is not pretty so more a proof of concept. The code now outputs the table correctly.
Sub Main(ByVal Sheet As Worksheet)
Dim oRequest As New MSXML2.XMLHTTP60
Dim oDocument As New MSHTML.HTMLDocument
Dim oRows As MSHTML.IHTMLElementCollection
Dim oRow As MSHTML.IHTMLElement
Dim oCells As MSHTML.IHTMLElementCollection
Dim oCell As MSHTML.IHTMLElement
oRequest.Open "GET", "https://www.mrci.com/ohlc/2020/200320.php", False
oRequest.send
If oRequest.Status <> 200 Then
MsgBox "Error"
Exit Sub
End If
oDocument.body.innerHTML = oRequest.responseText
Set oRequest = Nothing
Dim Skip As Boolean
Dim Current As String
Dim RowIndex As Integer
Dim ColumnIndex As Integer
Set oRows = oDocument.getElementsByClassName("strat")(0).getElementsByTagName("tr")
Current = ""
Application.ScreenUpdating = False
For Each oRow In oRows
Skip = False
If oRow.getElementsByTagName("th").Length > 0 Then
Current = oRow.innerText
Skip = True
End If
If Not Current = "" And Skip = False Then
If InStr(oRow.innerText, "Total Volume") = 0 Then
Set oCells = oRow.getElementsByTagName("td")
ColumnIndex = 2
Sheet.Cells(RowIndex, 1).Value = Current
For Each oCell In oCells
Sheet.Cells(RowIndex, ColumnIndex).Value = oCell.innerText
ColumnIndex = ColumnIndex + 1
Next oCell
RowIndex = RowIndex + 1
End If
End If
Next oRow
Application.ScreenUpdating = True
End Sub
I'm not sure why my code isn't working (returning business names, phone numbers, and contact numbers from a website's HTMLDoc I'm trying to pull information from. Can you help identify what I'm doing incorrectly (most likely with the IHTMLElement and IHTMLElementCollection data types, and/or accessing the HTML through getElementsByTagName, getElementsByClassName, etc). Thank you!!
Option Explicit
Sub FinalMantaSub()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
IE.Visible = False
IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"
Do While IE.READYSTATE <> READYSTATE_COMPLETE
DoEvents
Loop
Set HTMLDoc = IE.document
Range("A3").Value = "Name"
Range("B3").Value = "Address"
Range("C3").Value = "Phone"
'variables to output on excel sheet
Dim BusinessNameFinal As String
Dim BusinessAddressFinal As String
Dim BusinessPhoneFinal As String
'variables used to create final BusinessAddress variable
Dim streetAddress As IHTMLElement
Dim addressLocality As IHTMLElement
Dim addressRegion As IHTMLElement
Dim postalCode As IHTMLElement
Dim itemprop As String
Dim itemprop2 As String
Dim BusinessNameCollection As IHTMLElementCollection
Dim BusinessName As IHTMLElement
Dim BusinessAddressCollection As IHTMLElementCollection
Dim BusinessAddress As IHTMLElement
Dim BusinessPhoneCollection As IHTMLElementCollection
Dim BusinessPhone As IHTMLElement
Dim RowNumber As Long
'get ready for business name looping
RowNumber = 4
Set BusinessName = HTMLDoc.getElementsByClassName("media-heading text-primary h4")(0).getElementsByTagName("strong").innerText
Set BusinessNameCollection = BusinessName.all
'loop for business names
For Each BusinessName In BusinessNameCollection
Cells(RowNumber, 1).Value = BusinessName
RowNumber = RowNumber + 1
Next BusinessName
'get ready for business address looping
RowNumber = 4
itemprop = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").getAttribute("itemprop")
If itemprop = "streetAddress" Then
Set streetAddress = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").innerText
ElseIf itemprop = "addressLocality" Then
Set addressLocality = HTMLDoc.getElementsByTagName("span").innerText
ElseIf itemprop = "addressRegion" Then
Set addressRegion = HTMLDoc.getElementsByTagName("span").innerText
ElseIf itemprop = "postalCode" Then
Set postalCode = HTMLDoc.getElementsByTagName("span").innerText
End If
Set BusinessAddress = streetAddress & addressLocality & addressRegion & postalCode
Set BusinessAddressCollection = BusinessAddress.all
'loop for business addresses
For Each BusinessAddress In BusinessAddressCollection
BusinessAddress = streetAddress & vbNewLine & addressLocality & ", " & addressRegion & " " & postalCode
Cells(RowNumber, 2).Value = BusinessAddress
RowNumber = RowNumber + 1
Next BusinessAddress
'get ready for business phone looping
RowNumber = 4
itemprop2 = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getAttribute("itemprop")
If itemprop2 = "telephone" Then
BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
End If
Set BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
Set BusinessPhoneCollection = BusinessPhone.all
'loop for business phones
For Each BusinessPhone In BusinessPhoneCollection
Cells(RowNumber, 3).Value = BusinessPhone
RowNumber = RowNumber + 1
Next BusinessPhone
Range("A1").Activate
Set HTMLDoc = Nothing
'do some final formatting
Range("A3").CurrentRegion.WrapText = False
Range("A3").CurrentRegion.EntireColumn.AutoFit
Range("A1:C1").EntireColumn.HorizontalAlignment = xlCenter
Range("A1:D1").Merge
Range("A1").Value = "Manta.com Business Contacts"
Range("A1").Font.Bold = True
Application.StatusBar = ""
MsgBox "Done!"
End Sub
This extracts the info. You haven't looped all the results pages in your code or mentioned it so I have set this up to show you how to do the first page of results. Let me know how this goes.
Code:
Option Explicit
Public Sub FinalMantaSub() '<== Can't have ad blocker enabled for this site
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
IE.Visible = True
IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"
Do While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
Set HTMLDoc = IE.document
Dim c As Object, i As Long
Set c = HTMLDoc.querySelectorAll("div.media-body")
Do While Not c(i) Is Nothing
Debug.Print "Result #" & i + 1
Debug.Print vbNewLine
Debug.Print "Name: " & c(i).querySelector("[itemprop=""name""]").innerText
Debug.Print "Address: " & c(i).querySelector("[itemprop=""address""]").innerText
Debug.Print "Phone: " & c(i).querySelector("[itemprop=""telephone""]").innerText
Debug.Print String$(20, Chr$(61))
i = i + 1
Loop
IE.Quit
End Sub
Snapshot of output:
Update:
There are a vast number of results but you can have an outer loop as follows. You could then turn the above in to a sub that is called.
Dim arr() As String, pageNo As Long
arr = Split(HTMLDoc.querySelector(".pagination.pagination-md.mll a").href, "&pt")
pageNo = 1
Do While Err.Number = 0
On Error GoTo Errhand:
Dim url As String
url = Split(arr(0), "&")(0) & "&pg=" & pageNo & "&pt" & arr(1)
Debug.Print url
IE.navigate url
Do While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
pageNo = pageNo + 1
Loop
Errhand:
Debug.Print "Stopped after " & pageNo & " pages."
I am getting a "Method 'Range' of object '_Global' failed" error about 50% of the time I try to run the below code. Debug takes me to this line:
Set rng = xlWS.Range(Range("A1"), xlWS.Range("A1").SpecialCells(xlLastCell))
Can anyone help with this problem?? Thanks.
Private Sub Command48_Click()
'On Error Resume Next
Dim Filename As String
Dim month1 As String
Dim year1 As Integer
Dim startTime As Date
startTime = Now
Dim strDirectoryPath As String
Filename = strDirectoryPath & "\" & "QI_GAP_REPORT_2_ " & Format$(Now(), "mm-dd-yyyy") & ".xls"
DoCmd.OpenQuery "QI_GAP_REPORT_FOR_EXCEL"
DoCmd.TransferSpreadsheet acExport, acSpreadsheetTypeExcel12, "QI_GAP_REPORT_FOR_EXCEL", Filename, False, "Summary"
DoCmd.Close acQuery, "QI_GAP_REPORT_FOR_EXCEL"
'///****Format excel workbook****////
' Late binding to avoid reference:
Dim xlApp As Object 'Excel.Application
Dim xlWB As Object 'Workbook
Dim xlWS As Object 'Worksheet
Dim GetBook As String
' Create the instance of Excel that we will use to open the temp book
Set xlApp = CreateObject("Excel.Application")
xlApp.Visible = True
Set xlWB = xlApp.Workbooks.Open(Filename)
Set xlWS = xlWB.Worksheets("Summary")
' Format our temp sheet
' ************************************************** *************************
xlApp.Range("A1").Select
Const xlLandscape As Long = 2
Const xlCenter As Long = -4108
Const xlBottom As Long = -4107
Const xlContext As Integer = -5002
Const xlDown As Integer = -4121
Const xlContinuous As Integer = 1
Const xlThin As Integer = 2
Const xlLastCell As Long = 11
Const xlYes As Long = 1
With xlWS
With .UsedRange
.borders.LineStyle = xlContinuous
.borders.ColorIndex = 0
.borders.TintAndShade = 0
.borders.Weight = xlThin
End With
'format header 90 degree
With .Range("i1:y1")
.HorizontalAlignment = xlCenter
.VerticalAlignment = xlBottom
.WrapText = False
.Orientation = 90
.AddIndent = False
.IndentLevel = 0
.ShrinkToFit = False
.ReadingOrder = xlContext
.MergeCells = False
End With
.UsedRange.Rows.RowHeight = 15
.UsedRange.Columns.AutoFit
Dim tbl As ListObject
Dim rng As Range
Set rng = xlWS.Range(Range("A1"), xlWS.Range("A1").SpecialCells(xlLastCell))
Set tbl = ActiveSheet.ListObjects.Add(xlSrcRange, rng, , xlYes)
tbl.TableStyle = "TableStyleMedium2"
tbl.ShowTotals = True