The portion of code pasted below attempts to loop through each page of a web search. The button to do so is defined by the html also below. The loop works through only the second page, regardless of the results page count, at which point, I receive a Permission denied (Error 70).
Sub finrascrape()
Dim ie As Object
Dim pgcount, a, i, commaindex As Integer
Dim bname, cc, cnum, pg, nb As IHTMLElement
Dim blist, clist, pagelist, nextb, testtxt, testtxt2 As IHTMLElementCollection
Set ie = CreateObject("internetexplorer.application")
brokersearch = InputBox("ENTER BROKER NAME OR CRD#")
firmsearch = InputBox("ENTER FIRM NAME OR CRD#")
geosearch = InputBox("ENTER ZIP CODE (must be valid zip code, otherwise leave blank)")
With ie
.navigate "https://brokercheck.finra.org/"
.Visible = True
Do While .Busy Or .readyState <> 4
DoEvents
Loop
End With
Set intags = ie.document.getElementsByTagName("input")
For Each gat In intags
If gat.placeholder = "Name or CRD#" Then
gat.Value = brokersearch
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
gat.dispatchEvent evt
ElseIf gat.placeholder = "Firm Name or CRD# (optional)" Then
gat.Value = firmsearch
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
gat.dispatchEvent evt
ElseIf gat.placeholder = "City, State or Zip (optional)" Then
gat.Value = geosearch
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
gat.dispatchEvent evt
Else
End If
Next gat
ie.document.getElementsByClassName("md-raised md-primary md-hue-2 md-button md-ink-ripple").Item.Click
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Set pagelist = ie.document.getElementsByClassName("bold font-dark-blue ng-binding")
For Each pg In pagelist
If pg.className = "bold font-dark-blue ng-binding" Then
a = pg.innerText
Exit For
Debug.Print a
End If
Next pg
pgcount = WorksheetFunction.RoundUp(a / 12, 0)
Debug.Print pgcount
Sheets("Results").Select
Range("A1").Value = "BROKER NAME"
Range("B1").Value = "BROKER CRD#"
Range("C1").Value = "MAILING CITY"
Range("D1").Value = "MAILING STATE"
Range("E1").Value = "MAILING ZIP"
With Range(Cells(1, 1), Cells(1, Cells(1, Columns.Count).End(xlToLeft).Column))
.Font.Bold = True
.HorizontalAlignment = xlCenter
End With
For i = 1 To pgcount
On Error GoTo reportcomplete
Set blist = ie.document.getElementsByClassName("smaller ng-binding flex")
For Each bname In blist
Sheets("Results").Select
Sheets("Results").Range("A" & Range("A" & Rows.Count).End(xlUp).Row + 1).Value = bname.innerText
Next bname
Set clist = ie.document.getElementsByClassName("smaller")
For Each cnum In clist
Sheets("Process").Select
If cnum.className = "smaller" Then
Sheets("Process").Range("A" & Range("A" & Rows.Count).End(xlUp).Row + 1).Value = cnum.innerText
End If
Next cnum
Sheets("Process").Select
For a = 1 To Sheets("Process").Range("A" & Rows.Count).End(xlUp).Row
If Left(Sheets("Process").Range("A" & a).Value, 4) = "CRD#" Then
Sheets("Results").Select
Sheets("Results").Range("B" & Range("B" & Rows.Count).End(xlUp).Row + 1).Value = Right(Sheets("Process").Range("A" & a).Value, Len(Sheets("Process").Range("A" & a)) - 7)
If InStr(Sheets("Process").Range("A" & a + 1).Value, ",") = 0 Then
Sheets("Process").Range("A" & a + 1).Value = "UNAVAILABLE, NA XXXXX"
End If
ElseIf InStr(Sheets("Process").Range("A" & a).Value, ",") > 0 Then
commaindex = InStr(Sheets("Process").Range("A" & a).Value, ",")
Sheets("Results").Select
Sheets("Results").Range("C" & Sheets("Results").Range("C" & Rows.Count).End(xlUp).Row + 1).Value = Left(Sheets("Process").Range("A" & a).Value, commaindex - 1)
Sheets("Results").Range("D" & Sheets("Results").Range("D" & Rows.Count).End(xlUp).Row + 1).Value = Left(Right(Sheets("Process").Range("A" & a).Value, _
Len(Sheets("Process").Range("A" & a)) - commaindex - 1), 2)
Sheets("Results").Range("E" & Sheets("Results").Range("E" & Rows.Count).End(xlUp).Row + 1).Value = Left(Right(Sheets("Process").Range("A" & a).Value, _
Len(Sheets("Process").Range("A" & a)) - commaindex - 4), 5)
Else
End If
Sheets("Process").Select
Next a
Sheets("Process").Cells.Clear
Set testtxt = ie.document.getElementsByTagName("a")
For Each txt In testtxt
If txt.className = "ng-binding" Then
txt.Click
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Set testtxt2 = ie.document.getElementsByTagName("a")
For a = pgcount To 2 Step -1
For Each txt2 In testtxt2
If txt2.className = "ng-binding" And txt2.innerText = a & " of " & pgcount & " pages" Then
'Debug.Print a
GoTo end_of_for
End If
Next txt2
Next a
End If
Next txt
end_of_for:
a = pgcount
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Next i
reportcomplete:
MsgBox "FINRA Web Scrape Complete. Please review."
End Sub
HTML:
<li ng-if="::directionLinks" ng-class="{disabled: noNext()||ngDisabled}" class="pagination-next ng-scope">
›</li>
The URL for the search landing page is https://brokercheck.finra.org. Any help would be much appreciated.
I developed a loop as follows, which successfully navigates the "a" tagged elements, identifies the correct button based on the innerText of the prior element, and loops through each page.
Set testtxt = ie.document.getElementsByTagName("a")
If i < pgcount Then
For d = 1 To testtxt.Length
If testtxt.Item(d).innerText = i & " of " & pgcount & " pages" Then
testtxt.Item(d + 1).Click
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Exit For
End If
Next d
End If
This is what you can do to traverse all 27 pages and get the broker names.
Sub Get_Content()
Dim ie As New InternetExplorer, html As HTMLDocument
Dim itm As Object, post As Object, posts As Object, elem As Object
With ie
.Visible = True
.navigate "https://brokercheck.finra.org/"
Do Until .readyState = READYSTATE_COMPLETE: Loop
Set html = .document
End With
Set evt = html.createEvent("keyboardevent")
evt.initEvent "change", True, False
For Each itm In html.getElementsByTagName("input")
If InStr(itm.placeholder, "Name or CRD#") > 0 Then
itm.Value = "Michael John"
Exit For
End If
Next itm
itm.dispatchEvent evt
For Each post In html.getElementsByTagName("input")
If InStr(post.placeholder, "Firm Name or CRD# (optional)") > 0 Then
post.Value = "Morgan Stanley"
Exit For
End If
Next post
post.dispatchEvent evt
html.getElementsByClassName("md-button")(0).Click
Do While ie.Busy Or ie.readyState <> 4: DoEvents: Loop
Do
For Each elem In html.getElementsByClassName("smaller ng-binding flex")
x = x + 1: Cells(x, 1) = elem.innerText
Next elem
html.getElementsByClassName("pagination-next")(0).getElementsByTagName("a")(0).Click
Do While ie.Busy Or ie.readyState <> 4: DoEvents: Loop
Loop Until InStr(html.body.innerHTML, " class=""pagination-last ng-scope disabled""") > 0
ie.Quit
End Sub
Related
The below Excel vba code use to work and return the market cap, 52 week low and current price into my spreadsheet. It no longer does though and cannot work out why. Class names haven't changed but the getElementsByClassName doesn't seem to return anything anymore I think.
Sub get_title_header()
Dim wb As Object
Dim doc As Object
Dim sURL As String
Dim i As Integer
Dim allElements As IHTMLElementCollection
Application.DisplayAlerts = False
Set wb = CreateObject("internetExplorer.Application")
sURL = "https://www.google.com/search?q=aapl+stock+quote"
wb.navigate sURL
wb.Visible = False
While wb.Busy
Application.Wait Now + #12:00:01 AM#
DoEvents
Wend
Set doc = wb.document.body
On Error GoTo err_clear
Set allElements = doc.getElementsByClassName("iyjjgb")
x = allElements(3).innerText
Sheet6.Cells(i + 1, 2).Value = x
x = allElements(8).innerText
Sheet6.Cells(i + 1, 3).Value = x
x = ""
x = allElements(0).innerText
Sheet6.Cells(i + 1, 4).Value = x
x = ""
err_clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
wb.Quit
End Sub
I am trying to scrap a list of href link from a webpage, and then trying to scrap the value out of it. I am now facing the problem which the code only can handle up to 5 links. If the links more than 5, it will show runtime error on random line.
I am extracting the href link from these webpage:http://www.bursamalaysia.com/market/listed-companies/company-announcements/#/?category=SH&sub_category=all&alphabetical=All&date_from=28/09/2018
Option Explicit
Sub ScrapLink()
Dim IE As New InternetExplorer, html As HTMLDocument
Application.ScreenUpdating = False
With IE
IE.Visible = False
IE.navigate Cells(1, 1).Value
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 3)
Application.StatusBar = "Trying to go to website?"
DoEvents
Dim links As Object, i As Long
Set links = .document.querySelectorAll("#bm_ajax_container [href^='/market/listed-companies/company-announcements/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
.Cells(i + 1, 1) = links.item(i - 1)
End With
Next i
.Quit
End With
End Sub
Public Sub GetInfo()
Dim IE As New InternetExplorer, headers(), u As Long, resultCollection As Collection
headers = Array("URL", "Name", "No", "Date of change", "# Securities", "Type of Transaction", "Nature of Interest")
Set resultCollection = New Collection
Dim links()
links = Application.Transpose(ThisWorkbook.Worksheets("Sheet1").Range("A2:A100"))
With IE
.Visible = True
For u = LBound(links) To UBound(links)
If InStr(links(u), "http") > 0 Then
.navigate links(u)
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 2)
Dim data As Object, title As Object
With .document.getElementById("bm_ann_detail_iframe").contentDocument
Set title = .querySelector(".formContentData")
Set data = .querySelectorAll(".ven_table tr")
End With
Dim results(), numberOfRows As Long, i As Long, currentRow As Object, td As Object, c As Long, r As Long
numberOfRows = Round(data.Length / 4, 0)
ReDim results(1 To numberOfRows, 1 To 7)
For i = 0 To numberOfRows - 1
r = i + 1
results(r, 1) = links(u): results(r, 2) = title.innerText
Set currentRow = data.item(i * 4 + 1)
c = 3
For Each td In currentRow.getElementsByTagName("td")
results(r, c) = Replace$(td.innerText, "document.write(rownum++);", vbNullString)
c = c + 1
Next td
Next i
resultCollection.Add results
Set data = Nothing: Set title = Nothing
End If
Next u
.Quit
End With
Dim ws As Worksheet, item As Long
If Not resultCollection.Count > 0 Then Exit Sub
If Not Evaluate("ISREF('Results'!A1)") Then '<==Credit to #Rory for this test
Set ws = Worksheets.Add
ws.NAME = "Results"
Else
Set ws = ThisWorkbook.Worksheets("Results")
ws.Cells.Clear
End If
Dim outputRow As Long: outputRow = 2
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For item = 1 To resultCollection.Count
Dim arr()
arr = resultCollection(item)
For i = LBound(arr, 1) To UBound(arr, 1)
.Cells(outputRow, 1).Resize(1, 7) = Application.WorksheetFunction.Index(arr, i, 0)
outputRow = outputRow + 1
Next
Next
End With
End Sub
Discussion:
The problem is likely, at least from my testing, due to one of the links not having the table Details of changes, so the numberOfRows variable is set to 0, and this line:
ReDim results(1 To numberOfRows, 1 To 7)
fails with an index error as you have (1 To 0, 1 To 7).
Using this link in A1 there are 30 URLs retrieved. This retrieved link does not have that table whereas the others do.
You have a choice of how to handle this scenario. Here are some example options:
Option 1: Only process the page if the numberOfRows > 0. This is the example I give.
Option 2: Have a Select Case with numberOfRows and if Case 0 then handle page in one way, Case Else handle as normal.
Note:
1) You also want to reset the status bar with:
Application.StatusBar = False
2) I temporarily fixed the links range for testing with:
ThisWorkbook.Worksheets("Sheet1").Range("A2:A31")
TODO:
Refactor to be more modular and run the whole process with the same IE instance. Creating a class to hold the IE object would be a good idea. Provide it with methods for extracting your data, testing number of result rows etc.
Add some basic error handling, for example, to handle failed website connection.
Example handling using test of numberOfRows > 0:
Option Explicit
Sub ScrapeLink()
Dim IE As New InternetExplorer
Application.ScreenUpdating = False
With IE
IE.Visible = True
IE.navigate Cells(1, 1).Value
While .Busy Or .readyState < 4: DoEvents: Wend
' Application.Wait Now + TimeSerial(0, 0, 3)
Application.StatusBar = "Trying to go to website?"
DoEvents
Dim links As Object, i As Long
Set links = .document.querySelectorAll("#bm_ajax_container [href^='/market/listed-companies/company-announcements/']")
For i = 1 To links.Length
With ThisWorkbook.Worksheets("Sheet1")
.Cells(i + 1, 1) = links.item(i - 1)
End With
Next i
.Quit
End With
Application.StatusBar = false
End Sub
Public Sub GetInfo()
Dim IE As New InternetExplorer, headers(), u As Long, resultCollection As Collection
headers = Array("URL", "Name", "No", "Date of change", "# Securities", "Type of Transaction", "Nature of Interest")
Set resultCollection = New Collection
Dim links()
links = Application.Transpose(ThisWorkbook.Worksheets("Sheet1").Range("A2:A31")) '<== I have fixed the range here for testing
With IE
.Visible = True
For u = LBound(links) To UBound(links)
If InStr(links(u), "http") > 0 Then
.navigate links(u)
While .Busy Or .readyState < 4: DoEvents: Wend
Application.Wait Now + TimeSerial(0, 0, 2)
Dim data As Object, title As Object
With .document.getElementById("bm_ann_detail_iframe").contentDocument
Set title = .querySelector(".formContentData")
Set data = .querySelectorAll(".ven_table tr")
End With
Dim results(), numberOfRows As Long, i As Long, currentRow As Object, td As Object, c As Long, r As Long
numberOfRows = Round(data.Length / 4, 0)
If numberOfRows > 0 Then
ReDim results(1 To numberOfRows, 1 To 7)
For i = 0 To numberOfRows - 1
r = i + 1
results(r, 1) = links(u): results(r, 2) = title.innerText
Set currentRow = data.item(i * 4 + 1)
c = 3
For Each td In currentRow.getElementsByTagName("td")
results(r, c) = Replace$(td.innerText, "document.write(rownum++);", vbNullString)
c = c + 1
Next td
Next i
resultCollection.Add results
Set data = Nothing: Set title = Nothing
End If
End If
Next u
.Quit
End With
Dim ws As Worksheet, item As Long
If Not resultCollection.Count > 0 Then Exit Sub
If Not Evaluate("ISREF('Results'!A1)") Then '<==Credit to #Rory for this test
Set ws = Worksheets.Add
ws.NAME = "Results"
Else
Set ws = ThisWorkbook.Worksheets("Results")
ws.Cells.Clear
End If
Dim outputRow As Long: outputRow = 2
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For item = 1 To resultCollection.Count
Dim arr()
arr = resultCollection(item)
For i = LBound(arr, 1) To UBound(arr, 1)
.Cells(outputRow, 1).Resize(1, 7) = Application.WorksheetFunction.Index(arr, i, 0)
outputRow = outputRow + 1
Next
Next
End With
End Sub
Sample results:
I'm not sure why my code isn't working (returning business names, phone numbers, and contact numbers from a website's HTMLDoc I'm trying to pull information from. Can you help identify what I'm doing incorrectly (most likely with the IHTMLElement and IHTMLElementCollection data types, and/or accessing the HTML through getElementsByTagName, getElementsByClassName, etc). Thank you!!
Option Explicit
Sub FinalMantaSub()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
IE.Visible = False
IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"
Do While IE.READYSTATE <> READYSTATE_COMPLETE
DoEvents
Loop
Set HTMLDoc = IE.document
Range("A3").Value = "Name"
Range("B3").Value = "Address"
Range("C3").Value = "Phone"
'variables to output on excel sheet
Dim BusinessNameFinal As String
Dim BusinessAddressFinal As String
Dim BusinessPhoneFinal As String
'variables used to create final BusinessAddress variable
Dim streetAddress As IHTMLElement
Dim addressLocality As IHTMLElement
Dim addressRegion As IHTMLElement
Dim postalCode As IHTMLElement
Dim itemprop As String
Dim itemprop2 As String
Dim BusinessNameCollection As IHTMLElementCollection
Dim BusinessName As IHTMLElement
Dim BusinessAddressCollection As IHTMLElementCollection
Dim BusinessAddress As IHTMLElement
Dim BusinessPhoneCollection As IHTMLElementCollection
Dim BusinessPhone As IHTMLElement
Dim RowNumber As Long
'get ready for business name looping
RowNumber = 4
Set BusinessName = HTMLDoc.getElementsByClassName("media-heading text-primary h4")(0).getElementsByTagName("strong").innerText
Set BusinessNameCollection = BusinessName.all
'loop for business names
For Each BusinessName In BusinessNameCollection
Cells(RowNumber, 1).Value = BusinessName
RowNumber = RowNumber + 1
Next BusinessName
'get ready for business address looping
RowNumber = 4
itemprop = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").getAttribute("itemprop")
If itemprop = "streetAddress" Then
Set streetAddress = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").innerText
ElseIf itemprop = "addressLocality" Then
Set addressLocality = HTMLDoc.getElementsByTagName("span").innerText
ElseIf itemprop = "addressRegion" Then
Set addressRegion = HTMLDoc.getElementsByTagName("span").innerText
ElseIf itemprop = "postalCode" Then
Set postalCode = HTMLDoc.getElementsByTagName("span").innerText
End If
Set BusinessAddress = streetAddress & addressLocality & addressRegion & postalCode
Set BusinessAddressCollection = BusinessAddress.all
'loop for business addresses
For Each BusinessAddress In BusinessAddressCollection
BusinessAddress = streetAddress & vbNewLine & addressLocality & ", " & addressRegion & " " & postalCode
Cells(RowNumber, 2).Value = BusinessAddress
RowNumber = RowNumber + 1
Next BusinessAddress
'get ready for business phone looping
RowNumber = 4
itemprop2 = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getAttribute("itemprop")
If itemprop2 = "telephone" Then
BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
End If
Set BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
Set BusinessPhoneCollection = BusinessPhone.all
'loop for business phones
For Each BusinessPhone In BusinessPhoneCollection
Cells(RowNumber, 3).Value = BusinessPhone
RowNumber = RowNumber + 1
Next BusinessPhone
Range("A1").Activate
Set HTMLDoc = Nothing
'do some final formatting
Range("A3").CurrentRegion.WrapText = False
Range("A3").CurrentRegion.EntireColumn.AutoFit
Range("A1:C1").EntireColumn.HorizontalAlignment = xlCenter
Range("A1:D1").Merge
Range("A1").Value = "Manta.com Business Contacts"
Range("A1").Font.Bold = True
Application.StatusBar = ""
MsgBox "Done!"
End Sub
This extracts the info. You haven't looped all the results pages in your code or mentioned it so I have set this up to show you how to do the first page of results. Let me know how this goes.
Code:
Option Explicit
Public Sub FinalMantaSub() '<== Can't have ad blocker enabled for this site
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
IE.Visible = True
IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"
Do While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
Set HTMLDoc = IE.document
Dim c As Object, i As Long
Set c = HTMLDoc.querySelectorAll("div.media-body")
Do While Not c(i) Is Nothing
Debug.Print "Result #" & i + 1
Debug.Print vbNewLine
Debug.Print "Name: " & c(i).querySelector("[itemprop=""name""]").innerText
Debug.Print "Address: " & c(i).querySelector("[itemprop=""address""]").innerText
Debug.Print "Phone: " & c(i).querySelector("[itemprop=""telephone""]").innerText
Debug.Print String$(20, Chr$(61))
i = i + 1
Loop
IE.Quit
End Sub
Snapshot of output:
Update:
There are a vast number of results but you can have an outer loop as follows. You could then turn the above in to a sub that is called.
Dim arr() As String, pageNo As Long
arr = Split(HTMLDoc.querySelector(".pagination.pagination-md.mll a").href, "&pt")
pageNo = 1
Do While Err.Number = 0
On Error GoTo Errhand:
Dim url As String
url = Split(arr(0), "&")(0) & "&pg=" & pageNo & "&pt" & arr(1)
Debug.Print url
IE.navigate url
Do While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
pageNo = pageNo + 1
Loop
Errhand:
Debug.Print "Stopped after " & pageNo & " pages."
I have one VBA script, that find some data on web (csfd.cz) and put into Excel. But it is quite slow because it takes some time to load the website (in IE) and then extract data. My thought is how to make that IE only loads HTML and it doesn't load any graphics and functional things - only pure HTML. Is it somehow possible? Thanks for help...
here is my code:
Sub InputData()
Dim cursor As String
Dim i As Long
Dim ie As Object
Dim lastRow As Long
Dim releasesLength As Long
Dim releases As Object
Dim oneRelease As Object
Dim datumKino As String
Dim datumDVD As String
Dim origins As String
Dim year As Long
Dim time As Long
Dim name As String
Dim genreLong As String
Dim genre As String
'zapamatování kurzoru
cursor = ActiveCell.Address
'zjištění posledního řádku
With ActiveSheet
lastRow = .Cells(.Rows.Count, "L").End(xlUp).Row
End With
'první viditelná buňka
Range("L2").Select
ActiveCell.Offset(1, 0).Select
Do Until ActiveCell.Height <> 0
ActiveCell.Offset(1, 0).Select
Loop
'inicializace
Set ie = CreateObject("InternetExplorer.Application")
'ZAČÁTEK SMYČKY--------------------------------------------------------------
For i = ActiveCell.Row To lastRow
Cells(i, 12).Select
'resetování proměných
releasesLength = vbNullLong
Set releases = Nothing
Set oneRelease = Nothing
datumKino = ""
datumDVD = ""
origins = ""
year = vbNullLong
time = vbNullLong
name = ""
genreLong = ""
genre = ""
'vyřazení
If (InStr(Cells(i, 12).Value, "csfd.cz") = 0 Or ActiveCell.Height = 0) Then
GoTo NextRow
End If
'otevření stránky
ie.Visible = False
ie.navigate Cells(i, 12).Value
Application.StatusBar = "Načítám údaje. Prosím počkejte..."
Do While ie.busy
Application.Wait DateAdd("s", 1, Now)
Loop
'úprava procent a datumů
Cells(i, 9).Value = ie.document.getElementById("rating").Children(0).innerText
releasesLength = ie.document.getElementById("releases").getElementsByClassName("content")(0).getElementsByTagName("table")(0).getElementsByTagName("tbody")(0).Children.Length
Set releases = ie.document.getElementById("releases").getElementsByClassName("content")(0).getElementsByTagName("table")(0).getElementsByTagName("tbody")(0).Children
For j = 0 To releasesLength - 1
Set oneRelease = releases(j)
If (oneRelease.getElementsByTagName("th")(0).getElementsByTagName("img")(0).getAttribute("title") = "Česko") Then
If (InStr(oneRelease.getElementsByTagName("th")(0).innerHTML, "V kinech")) Then
If (datumKino = "") Then
datumKino = Left(Replace(Replace(Replace(oneRelease.getElementsByTagName("td")(0).innerHTML, " ", ""), vbLf, ""), vbTab, ""), 10)
End If
ElseIf (InStr(oneRelease.getElementsByTagName("th")(0).innerHTML, "Na DVD")) Then
If (datumDVD = "") Then
datumDVD = Left(Replace(Replace(Replace(oneRelease.getElementsByTagName("td")(0).innerHTML, " ", ""), vbLf, ""), vbTab, ""), 10)
End If
ElseIf (InStr(oneRelease.getElementsByTagName("th")(0).innerHTML, "Na Blu-ray")) Then
If (datumDVD = "") Then
datumDVD = Left(Replace(Replace(Replace(oneRelease.getElementsByTagName("td")(0).innerHTML, " ", ""), vbLf, ""), vbTab, ""), 10)
End If
End If
Else
GoTo NextRelease
End If
NextRelease:
Next j
If (Len(datumKino) <> 0) Then
Cells(i, 1).Value = datumKino
End If
If (Len(datumDVD) <> 0) Then
Cells(i, 2).Value = datumDVD
End If
'1. první zápis do řádku
If (Cells(i, 4).Value = "") Then
year = ie.document.getElementsByClassName("origin")(0).getElementsByTagName("span")(0).innerHTML
Cells(i, 4).Value = year
origin = ie.document.getElementsByClassName("origin")(0).innerHTML
originSplit = Split(origin, ",")
time = Replace(originSplit(UBound(originSplit)), " min", "")
Cells(i, 10).Value = time
name = Replace(Replace(ie.document.getElementsByClassName("info")(0).getElementsByClassName("header")(0).getElementsByTagName("h1")(0).innerHTML, vbLf, ""), vbTab, "")
Cells(i, 3).Value = name
genreLong = ie.document.getElementsByClassName("genre")(0).innerHTML
genre = Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(genreLong, " ", ""), "Akční", "Akč."), "Animovaný", "Anim."), "Dobrodružný", "Dobr."), "Dokumentární", "Dokument."), "Fantasy", "Fant."), "Historický", "Histor."), "Katastrofický", "Katastrof."), "Komedie", "Kom."), "Mysteriózní", "Myster."), "Rodinný", "Rod."), "Romantický", "Romant."), "Thriller", "Thril."), "Životopisný", "Životopis.")
Cells(i, 5).Value = genre
End If
NextRow:
Next i
'KONEC SMYČKY----------------------------------------------------------------
'Clean
ie.Quit
Set ie = Nothing
Set releases = Nothing
Set oneRelease = Nothing
Application.StatusBar = ""
Range(cursor).Select
End Sub
In the following url
http://www.indiavotes.com/ac/details/1/32051/216
There is link to "EXPORT to CSV", but I could find neither table info nor download.csv link in pagesource().
How can I download this csv using R?
Sub DumpData()
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
URL = "http://www.indiavotes.com/ac/details/1/32051/216"
'Wait for site to fully load
IE.Navigate2 URL
Do While IE.Busy = True
DoEvents
Loop
RowCount = 1
With Sheets("Sheet1")
.Cells.ClearContents
RowCount = 1
For Each itm In IE.document.all
.Range("A" & RowCount) = itm.tagname
.Range("B" & RowCount) = itm.ID
.Range("C" & RowCount) = itm.classname
.Range("D" & RowCount) = Left(itm.innertext, 1024)
RowCount = RowCount + 1
Next itm
End With
End Sub