vbs to download data from url as csv table - csv

I am using the following code to download data from a website using vbs. The data is held at the url in table form. However, the resulting downloaded data is in the form of a simple continuous text data.
The solution at https://www.example-code.com/vbscript/html_table_to_csv.asp allows converting the downloaded data to csv format, but requires specific api and software to be pre-installed.
I was wondering if it would be possible to download/ convert the data in csv format using vbs only and without using a third-party software. Perhaps above link could give some ideas?
(I can download the same using excel etc., but I find vbs to be much faster and efficient).
Note:
the file needs to be saved in D:\ as any_name.vbs and resulting downloaded data file will be downloaded in D:\
For i = 1 to 1
createFile(i)
Next
Public Sub createFile(a)
Dim fso,MyFile
filePath = "D:\file_name" & a & ".txt"
Set fso = CreateObject("Scripting.FileSystemObject")
Set MyFile = fso.CreateTextFile(filePath)
myURL = "https://example-code.com/data/etf_table.html"
'Create XMLHTTP Object & HTML File
Set oXMLHttp = CreateObject("MSXML2.XMLHTTP")
Set ohtmlFile = CreateObject("htmlfile")
'Send Request To Web Server
oXMLHttp.Open "GET", myURL, False
oXMLHttp.send
'If Return Status is Success
If oXMLHttp.Status = 200 Then
'Get Web Data to HTML file Object
ohtmlFile.Write oXMLHttp.responseText
ohtmlFile.Close
'Parse HTML File
Set oTable = ohtmlFile.getElementsByTagName("table")
For Each oTab In oTable
MyFile.WriteLine oTab.Innertext
Next
MyFile.close
End If
End Sub
'Process Completed
'WScript.Quit

I would propose this solution:
For i = 1 to 1
createFile(i)
Next
Public Sub createFile(a)
Dim fso,MyFile
filePath = "z:\_Comunity\StackOverflow\file_name" & a & ".txt"
Set fso = CreateObject("Scripting.FileSystemObject")
Set MyFile = fso.CreateTextFile(filePath)
myURL = "https://example-code.com/data/etf_table.html"
'Create XMLHTTP Object & HTML File
Set oXMLHttp = CreateObject("MSXML2.XMLHTTP")
Set ohtmlFile = CreateObject("htmlfile")
'Send Request To Web Server
oXMLHttp.Open "GET", myURL, False
oXMLHttp.send
'If Return Status is Success
If oXMLHttp.Status = 200 Then
'Get Web Data to HTML file Object
ohtmlFile.Write oXMLHttp.responseText
ohtmlFile.Close
'Parse HTML File
Set oTable_coll = ohtmlFile.getElementsByTagName("table")
For Each oTab_enum In oTable_coll
For Each oRow_enum In oTab_enum.rows
ROW = ""
For Each oCell_enum In oRow_enum.cells
ROW = ROW & oCell_enum.innerText & ","
Next
ROW = Left(ROW, Len(ROW) - 1)
CSV = CSV & ROW & vbCrLf
Next
Next
MyFile.WriteLine CSV
MyFile.close
End If
End Sub
'Process Completed
'WScript.Quit
REMARK: TESTED WORKS !!!
btw.
and I'm not well VBS scripter.

Do you mean this following solution ?
For i = 1 to 1
createFile(i)
Next
Public Sub createFile(a)
Dim fso, MyFile
FilePath = "z:\_Comunity\StackOverflow\file_name" & a & ".txt"
Set fso = CreateObject("Scripting.FileSystemObject")
Set MyFile = fso.CreateTextFile(FilePath)
' myURL = "https://www.investing.com/indices/major-indices"
myURL = "https://example-code.com/data/etf_table.html"
'Create XMLHTTP Object & HTML File
Set oXMLHttp = CreateObject("MSXML2.XMLHTTP")
Set ohtmlFile = CreateObject("htmlfile")
'Send Request To Web Server
oXMLHttp.Open "GET", myURL, False
oXMLHttp.send
'If Return Status is Success
If oXMLHttp.Status = 200 Then
'Get Web Data to HTML file Object
ohtmlFile.Write oXMLHttp.responseText
ohtmlFile.Close
'Parse HTML File
Set oTable_coll = ohtmlFile.getElementsByTagName("table")
CSV = ""
ROW_idx = 0
COL_idx = 0
For Each oTab_enum In oTable_coll
ROW_idx = 0
For Each oROW_enum In oTab_enum.rows
ROW = ""
COL_idx = 0
For Each oCell_enum In oROW_enum.cells
If COL_idx >= 0 And COL_idx < 3 Then
ROW = ROW & oCell_enum.innerText & ","
End If
' incrase Column index counter
COL_idx = COL_idx + 1
Next
If ROW_idx > 2 And ROW_idx < 5 Then
ROW = Left(ROW, Len(ROW) - 1)
CSV = CSV & ROW & vbCrLf
End If
' incrase ROW index counter
ROW_idx = ROW_idx + 1
Next
Next
MyFile.WriteLine CSV
MyFile.close
End If
End Sub

Related

Excel VBA - Error 91 problem when HTML value is nothing

Hi I recently discovered excel VBA and am using it to aid my study of German.
I have a list of German words but no meaning/part of speech, example sentences, etc.
I wrote a macro to go to website (https://dictionary.cambridge.org/dictionary/german-english/) and fetch html data.
However, for some words, the example sentences are not provided (Hence the html returning no value and the error 91).
I have referred to other posts concerning this and added If Not HTMLDoc.getElementsByClassName() Is Nothing Then statements, but no luck so far.
Could you please tell me how to write a code such that if there is no html value, the macro moves on and go to the next word? (word is set by integer corresponding to the cell number in the excel sheet)
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim i As Integer
Dim strURL As String
For i = 2 To 3493
strURL = "https://dictionary.cambridge.org/dictionary/german-english/" & Range("A" & i)
XMLReq.Open "Get", strURL, False
XMLReq.send
If XMLReq.Status <> 200 Then
MsgBox "Error."
Exit Sub
End If
HTMLDoc.body.innerHTML = XMLReq.responseText
Set XMLReq = Nothing
'Part
If IsObject(HTMLDoc.getElementsByClassName("pos dpos")) Then
Range("B" & i) = HTMLDoc.getElementsByClassName("pos dpos")(0).innerText
End If
'Meaning
If IsObject(HTMLDoc.getElementsByClassName("ddef_h")) Then
Range("C" & i) = HTMLDoc.getElementsByClassName("ddef_h")(0).innerText
End If
'ExampleGer
If Not HTMLDoc.getElementsByClassName("eg deg") Is Nothing Then
i = i + 1
Else
Range("D" & i) = HTMLDoc.getElementsByClassName("eg deg")(0).innerText
End If
'ExampleEng
If Not HTMLDoc.getElementsByClassName("trans dtrans hdb") Is Nothing Then
i = i + 1
Else
Range("E" & i) = HTMLDoc.getElementsByClassName("trans dtrans hdb")(0).innerText
End If
Next i
End Sub
Ok, I'm a German and therefore did not need any example words.
A word that delivers all 4 values: Haus (house)
A word that delivers only 2 values: Gummibaum (rubber plant)
Try the following code and please ...
NEVER! NEVER! NEVER! manipulate the counting variable of a for loop in the code block of the loop. Never use this i = i + 1 if i is the counting variable of the for loop. If you do that you run into problems in 99.9%
Sub Dictionary()
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim i As Integer
Dim strURL As String
'Use object variables for each node you want to read from the DOM tree
'In the code below, these variables are then used to check whether an object exists or not
Dim nodePart As Object
Dim nodeMeaning As Object
Dim nodeExampleGer As Object
Dim nodeExampleEng As Object
For i = 2 To 3493
strURL = "https://dictionary.cambridge.org/dictionary/german-english/" & Range("A" & i)
'strURL = "https://dictionary.cambridge.org/dictionary/german-english/haus"
XMLReq.Open "Get", strURL, False
XMLReq.send
If XMLReq.Status <> 200 Then
MsgBox "Error."
Exit Sub
End If
HTMLDoc.body.innerHTML = XMLReq.responseText
Set XMLReq = Nothing
'Part
Set nodePart = HTMLDoc.getElementsByClassName("pos dpos")(0)
If Not nodePart Is Nothing Then
Range("B" & i) = nodePart.innerText
End If
'Meaning
Set nodeMeaning = HTMLDoc.getElementsByClassName("ddef_h")(0)
If Not nodeMeaning Is Nothing Then
Range("C" & i) = nodeMeaning.innerText
End If
'ExampleGer
Set nodeExampleGer = HTMLDoc.getElementsByClassName("eg deg")(0)
If Not nodeExampleGer Is Nothing Then
Range("D" & i) = nodeExampleGer.innerText
End If
'ExampleEng
Set nodeExampleEng = HTMLDoc.getElementsByClassName("trans dtrans hdb")(0)
If Not nodeExampleEng Is Nothing Then
Range("E" & i) = nodeExampleEng.innerText
End If
Next i
End Sub

I'm getting stuck at vba runtime error 424

I'm getting
run-time error 424
in 68th row (line)
request.Open "GET", Url, False
and I don't know how to fix it.
My previous question I posted ;
How to scrape specific part of online english dictionary?
My final goal is to get result like this;
A B
beginning bɪˈɡɪnɪŋ
behalf bɪˈhæf
behave bɪˈheɪv
behaviour bɪˈheɪvjər
belong bɪˈlɔːŋ
below bɪˈloʊ
bird bɜːrd
biscuit ˈbɪskɪt
Here's code I wrote, and it's mostly based on someone else's code I found on internet.
' Microsoft ActiveX Data Objects x.x Library
' Microsoft XML, v3.0
' Microsoft VBScript Regular Expressions
Sub ParseHelp()
' Word reference from
Dim Url As String
Url = "https://www.oxfordlearnersdictionaries.com/definition/english/" & Cells(ActiveCell.Row, "B").Value
' Get dictionary's html
Dim Html As String
Html = GetHtml(Url)
' Check error
If InStr(Html, "<TITLE>Not Found</Title>") > 0 Then
MsgBox "404"
Exit Sub
End If
' Extract phonetic alphabet from HTML
Dim wrapPattern As String
wrapPattern = "<span class='name' (.*?)</span>"
Set wrapCollection = FindRegexpMatch(Html, wrapPattern)
' MsgBox StripHtml(CStr(wrapCollection(1)))
' Fill phonetic alphabet into cell
If Not wrapCollection Is Nothing Then
Dim wrap As String
On Error Resume Next
wrap = StripHtml(CStr(wrapCollection(1)))
If Err.Number <> 0 Then
wrap = ""
End If
Cells(ActiveCell.Row, "C").Value = wrap
Else
MsgBox "not found"
End If
End Sub
Public Function StripHtml(Html As String) As String
Dim RegEx As New RegExp
Dim sOut As String
Html = Replace(Html, "</li>", vbNewLine)
Html = Replace(Html, " ", " ")
With RegEx
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "<[^>]+>"
End With
sOut = RegEx.Replace(Html, "")
StripHtml = sOut
Set RegEx = Nothing
End Function
Public Function GetHtml(Url As String) As String
Dim xmlhttp As Object
Set xmlhttp = CreateObject("MSXML2.serverXMLHTTP")
Dim converter As New ADODB.stream
' Get
request.Open "GET", Url, False
request.send
' raw bytes
converter.Open
converter.Type = adTypeBinary
converter.Write request.responseBody
' read
converter.Position = 0
converter.Type = adTypeText
converter.Charset = "utf-8"
' close
GetHtml = converter.ReadText
converter.Close
End Function
Public Function FindRegexpMatch(txt As String, pat As String) As Collection
Set FindRegexpMatch = New Collection
Dim rx As New RegExp
Dim matcol As MatchCollection
Dim mat As Match
Dim ret As String
Dim delimiter As String
txt = Replace(txt, Chr(10), "")
txt = Replace(txt, Chr(13), "")
rx.Global = True
rx.IgnoreCase = True
rx.MultiLine = True
rx.Pattern = pat
Set matcol = rx.Execute(txt)
'MsgBox "Match:" & matcol.Count
On Error GoTo ErrorHandler
For Each mat In matcol
'FindRegexpMatch.Add mat.SubMatches(0)
FindRegexpMatch.Add mat.Value
Next mat
Set rx = Nothing
' Insert code that might generate an error here
Exit Function
ErrorHandler:
' Insert code to handle the error here
MsgBox "FindRegexpMatch. " & Err.GetException()
Resume Next
End Function
Any kind of help would be greatly appreciated.
The following is an example of how to read in values from column A and write out pronounciations to column B. It uses css selectors to match a child node then steps up to parentNode in order to ensure entire pronounciation is grabbed. There are a number of ways you could have matched on the parent node to get the second pronounciation. Note that I use a parent node and Replace as the pronounciation may span multiple childNodes.
If doing this for lots of lookups please be a good netizen and put some waits in the code so as to not bombard the site with requests.
Option Explicit
Public Sub WriteOutPronounciations()
Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
Dim data As String, lastRow As Long, urls()
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row 'you need at least two words in column A or change the redim.
urls = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
ReDim results(1 To UBound(urls))
Set html = New MSHTML.HTMLDocument
With CreateObject("MSXML2.ServerXMLHTTP")
For i = LBound(urls) To UBound(urls)
.Open "GET", "https://www.oxfordlearnersdictionaries.com/definition/english/" & urls(i), False
.send
html.body.innerHTML = .responseText
data = Replace$(Replace$(html.querySelector(".name ~ .wrap").ParentNode.innerText, "/", vbNullString), Chr$(10), Chr$(32))
results(i) = Right$(data, Len(data) - 4)
Next
End With
With ThisWorkbook.Worksheets(1)
.Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
End With
End Sub
Required references (VBE>Tools>References):
Microsoft HTML Object Library
Should you go down the API route then here is a small example. You can make 1000 free calls in a month with Prototype account. The next best, depending on how many calls you wish to make looks like the 10,001 calls (that one extra PAYG call halves the price). # calls will be affected by whether word is head word or needs lemmas lookup call first. The endpoint construction you need is GET /entries/{source_lang}/{word_id}?fields=pronunciations though that doesn't seem to filter massively. You will need a json parser to handle the json returned e.g. github.com/VBA-tools/VBA-JSON/blob/master/JsonConverter.bas. Download raw code from there and add to standard module called JsonConverter. You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime. Remove the top Attribute line from the copied code.
Option Explicit
Public Sub WriteOutPronounciations()
Dim html As MSHTML.HTMLDocument, i As Long, ws As Worksheet
Dim data As String, lastRow As Long, words()
'If not performing lemmas lookup then must be head word e.g. behave, behalf
Const appId As String = "yourAppId"
Const appKey As String = "yourAppKey"
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).row
words = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
ReDim results(1 To UBound(words))
Set html = New MSHTML.HTMLDocument
Dim json As Object
With CreateObject("MSXML2.ServerXMLHTTP")
For i = LBound(words) To UBound(words)
.Open "GET", "https://od-api.oxforddictionaries.com/api/v2/entries/en-us/" & LCase$(words(i)) & "?fields=pronunciations", False
.setRequestHeader "app_id", appId
.setRequestHeader "app_key", appKey
.setRequestHeader "ContentType", "application/json"
.send
Set json = JsonConverter.ParseJson(.responseText)
results(i) = IIf(json("results")(1)("type") = "headword", json("results")(1)("lexicalEntries")(1)("pronunciations")(2)("phoneticSpelling"), "lemmas lookup required")
Set json = Nothing
Next
End With
With ThisWorkbook.Worksheets(1)
.Cells(1, 2).Resize(UBound(results, 1), 1) = Application.Transpose(results)
End With
End Sub

How to download and get values from JSON file using VBScript or batch file?

This is the VBScript code answered here to get the JSON file from computer with proper values.
Set fso = CreateObject("Scripting.FileSystemObject")
json = fso.OpenTextFile("C:\path\to\combined.json").ReadAll
Set re = New RegExp
re.Pattern = """passed"":(true|false),"
re.IgnoreCase = True
For Each m In re.Execute(json)
passed = CBool(m.SubMatches(0))
Next
But I have a JSON file that looks like this which is online,
["AA-BB-CC-MAKE-SAME.json","SS-ED-SIXSIX-TENSE.json","FF-EE-EE-EE-WW.json","ZS-WE-AS-FOUR-MINE.json","DD-RF-LATERS-LATER.json","FG-ER-DC-ED-FG.json"]
How to download this JSON file and get the values to five variables using either VBScript or batch file?
Here is an example for downloading a json from internet and parse it :
Dim http,URL
URL = "http://ip-api.com/json/"
Set http = CreateObject("Msxml2.XMLHTTP")
http.open "GET",URL,False
http.send
strJson = http.responseText
Set j = ParseJson(strJson)
Result = "IP =" & j.query & vbCrlf &_
"ISP = "& j.isp & vbCrlf &_
"Country = "& j.country & vbCrlf &_
"TimeZone = "& j.timezone
Wscript.echo Result
'--------------------------------------------------------
Function ParseJson(strJson)
Set html = CreateObject("htmlfile")
Set window = html.parentWindow
window.execScript "var json = " & strJson, "JScript"
Set ParseJson = window.json
End Function
'--------------------------------------------------------
You can give a try for this code :
Dim http,URL
URL = "https://privateURL/jsonfile/"
Set http = CreateObject("Msxml2.XMLHTTP")
http.open "GET",URL,False
http.send
strJson = http.responseText
Result = Extract(strJson,"(\x22(.*)\x22)")
Arr = Split(Result,",")
For each Item in Arr
wscript.echo Item
Next
'******************************************
Function Extract(Data,Pattern)
Dim oRE,oMatches,Match,Line
set oRE = New RegExp
oRE.IgnoreCase = True
oRE.Global = True
oRE.Pattern = Pattern
set oMatches = oRE.Execute(Data)
If not isEmpty(oMatches) then
For Each Match in oMatches
Line = Line & Trim(Match.Value) & vbCrlf
Next
Extract = Line
End if
End Function
'******************************************

read attachment field from accdb with classic asp3/vbscript

I have an attachment field in my accdb database file,
i'm trying to read it to extract the attachments but it keep return empty values
recording to this post Using Attachment field with Classic ASP
there is no way to do it with adodb, is true? and if yes, what other ways i have to do that ?
this is the code that i'm running:
qid = request.querystring("qid")
wikiDbAddress="database/my.accdb"
set cnWiki=server.CreateObject("adodb.connection")
cnWiki.open "DRIVER={Microsoft Access Driver (*.mdb, *.accdb)}; DBQ=" & Server.MapPath(root&wikiDbAddress)
SQL = "select * from [Knowledge Base] where id="&qid
RS.Open SQL, cnWiki
do while not RS.eof
response.write RS("attachments")
RS.movenext
loop
For now, i did some workaround that helped me, it's not efficient but does the work.
qid = request.querystring("qid")
name = request.querystring("name")
SQL = "select Attachments.FileName as fname, Attachments.FileData as data, Attachments.FileType as FileType from [Knowledge Base] where Attachments.FileName='"&name&"' and id="&qid
RS.Open SQL, cnWiki
do while not RS.eof
if rs("fname")= name then
filename = Server.MapPath("/KB_"&qid&"_"&rs("fname"))
set fs=Server.CreateObject("Scripting.FileSystemObject")
if not fs.FileExists(filename) then
SaveBinaryData filename, rs("data")
data = readBinary(filename)
' CHR(255) = FF, CHR(170) = AA
data = Mid(data, 21, Len(data) - 20)
writeBinary data,filename
end if
set fs=nothing
downloadFromFile(filename )
exit do
else
RS.movenext
end if
loop
rs.close
cnWiki.close
function downloadFromFile(strFile )
Dim objConn
Dim intCampaignRecipientID
If strFile <> "" Then
Dim objStream
Set objStream = Server.CreateObject("ADODB.Stream")
objStream.Type = 1 'adTypeBinary
objStream.Open
objStream.LoadFromFile(strFile)
Response.Clear
'Response.ContentType = "image/jpeg"
Response.Addheader "Content-Disposition", "attachment; filename=" & strFile
Response.BinaryWrite objStream.Read
objStream.Close
Set objStream = Nothing
End If
End Function
Function SaveBinaryData(FileName, ByteArray)
Const adTypeBinary = 1
Const adSaveCreateOverWrite = 2
'Create Stream object
Dim BinaryStream
Set BinaryStream = CreateObject("ADODB.Stream")
'Specify stream type - we want To save binary data.
BinaryStream.Type = adTypeBinary
'Open the stream And write binary data To the object
BinaryStream.Open
BinaryStream.Write ByteArray
'Save binary data To disk
BinaryStream.SaveToFile FileName, adSaveCreateOverWrite
End Function
Function readBinary(path)
Dim a, fso, file, i, ts
Set fso = CreateObject("Scripting.FileSystemObject")
Set file = fso.getFile(path)
If isNull(file) Then
wscript.echo "File not found: " & path
Exit Function
End If
Set ts = file.OpenAsTextStream()
a = makeArray(file.size)
i = 0
While Not ts.atEndOfStream
a(i) = ts.read(1)
i = i + 1
Wend
ts.close
readBinary = Join(a,"")
End Function
Sub writeBinary(bstr, path)
Dim fso, ts
Set fso = CreateObject("Scripting.FileSystemObject")
On Error Resume Next
Set ts = fso.createTextFile(path)
If Err.number <> 0 Then
wscript.echo Err.message
Exit Sub
End If
On Error GoTo 0
ts.Write(bstr)
ts.Close
End Sub
Function makeArray(n)
Dim s
s = Space(n)
makeArray = Split(s," ")
End Function

Download files from a web page using VBA HTML

I have been trying desperately for months to automate a process whereby a csv file is downloaded, maned and saved in a given location.
so far I only managed with excel vba to open the web page and click the bottom to download the csv file, but the code stop and required a manual intervention to to be completed, i would like it to be fully automated if possible.
see the code used (I am not the author):
Sub WebDataExtraction()
Dim URL As String
Dim IeApp As Object
Dim IeDoc As Object
Dim ieForm As Object
Dim ieObj As Object
Dim objColl As Collection
URL = "http://www.bmreports.com/bsp/BMRSSystemData.php?pT=DDAD&zT=N&dT=NRT"
Set IeApp = CreateObject("InternetExplorer.Application")
IeApp.Visible = True
IeApp.Navigate URL
Do Until IeApp.ReadyState = READYSTATE_COMPLETE
Loop
Set IeDoc = IeApp.Document
For Each ele In IeApp.Document.getElementsByTagName("span")
If ele.innerHTML = "CSV" Then
Application.Wait (Now + TimeValue("0:00:15"))
DoEvents
ele.Click
'At this point you need to Save the document manually
' or figure out for yourself how to automate this interaction.
Test_Save_As_Set_Filename
File_Download_Click_Save
End If
Next
IeApp.Quit
End Sub"
thanks in advance
Nunzio
I am posting a second answer, since, as I believe my first answer is adequate for many similar applications, it does not work in this instance.
Why the other methods fail:
The .Click method: This raises a new window which expects user input at run-time, it doesn't seem to be possible to use the WinAPI to control this window. Or, at least not any way that I can determine. The code execution stops on the .Click line until the user manually intervenes, there is no way to use a GoTo or a Wait or any other method to circumvent this behavior.
Using a WinAPI function to download the source file directly does not work, since the button's URL does not contain a file, but rather a js function that serves the file dynamically.
Here is my proposed workaround solution:
You can read the webpage's .body.InnerText, write that out to a plain text/csv file using FileSystemObject and then with a combination of Regular Expressions and string manipulation, parse the data into a properly delimited CSV file.
Sub WebDataExtraction()
Dim url As String
Dim fName As String
Dim lnText As String
Dim varLine() As Variant
Dim vLn As Variant
Dim newText As String
Dim leftText As String
Dim breakTime As Date
'## Requires reference to Microsoft VBScript Regular Expressions 5.5
Dim REMatches As MatchCollection
Dim m As Match
'## Requires reference to Microsoft Internet Controls
Dim IeApp As InternetExplorer
'## Requires reference to Microsoft HTML object library
Dim IeDoc As HTMLDocument
Dim ele As HTMLFormElement
'## Requires reference to Microsoft Scripting Runtime
Dim fso As FileSystemObject
Dim f As TextStream
Dim ln As Long: ln = 1
breakTime = DateAdd("s", 60, Now)
url = "http://www.bmreports.com/bsp/BMRSSystemData.php?pT=DDAD&zT=N&dT=NRT"
Set IeApp = CreateObject("InternetExplorer.Application")
With IeApp
.Visible = True
.Navigate url
Do Until .ReadyState = 4
Loop
Set IeDoc = .Document
End With
'Wait for the data to display on the page
Do
If Now >= breakTime Then
If MsgBox("The website is taking longer than usual, would you like to continue waiting?", vbYesNo) = vbNo Then
GoTo EarlyExit
Else:
breakTime = DateAdd("s", 60, Now)
End If
End If
Loop While Trim(IeDoc.body.innerText) = "XML CSV Please Wait Data Loading Sorting"
'## Create the text file
fName = ActiveWorkbook.Path & "\exported-csv.csv"
Set fso = CreateObject("Scripting.FileSystemObject")
Set f = fso.OpenTextFile(fName, 2, True, -1)
f.Write IeDoc.body.innerText
f.Close
Set f = Nothing
'## Read the text file
Set f = fso.OpenTextFile(fName, 1, False, -1)
Do
lnText = f.ReadLine
'## The data starts on the 4th line in the InnerText.
If ln >= 4 Then
'## Return a collection of matching date/timestamps to which we can parse
Set REMatches = SplitLine(lnText)
newText = lnText
For Each m In REMatches
newText = Replace(newText, m.Value, ("," & m.Value & ","), , -1, vbTextCompare)
Next
'## Get rid of consecutive delimiters:
Do
newText = Replace(newText, ",,", ",")
Loop While InStr(1, newText, ",,", vbBinaryCompare) <> 0
'## Then use some string manipulation to parse out the first 2 columns which are
' not a match to the RegExp we used above.
leftText = Left(newText, InStr(1, newText, ",", vbTextCompare) - 1)
leftText = Left(leftText, 10) & "," & Right(leftText, Len(leftText) - 10)
newText = Right(newText, Len(newText) - InStr(1, newText, ",", vbTextCompare))
newText = leftText & "," & newText
'## Store these lines in an array
ReDim Preserve varLine(ln - 4)
varLine(ln - 4) = newText
End If
ln = ln + 1
Loop While Not f.AtEndOfStream
f.Close
'## Re-open the file for writing the delimited lines:
Set f = fso.OpenTextFile(fName, 2, True, -1)
'## Iterate over the array and write the data in CSV:
For Each vLn In varLine
'Omit blank lines, if any.
If Len(vLn) <> 0 Then f.WriteLine vLn
Next
f.Close
EarlyExit:
Set fso = Nothing
Set f = Nothing
IeApp.Quit
Set IeApp = Nothing
End Sub
Function SplitLine(strLine As String) As MatchCollection
'returns a RegExp MatchCollection of Date/Timestamps found in each line
'## Requires reference to Microsoft VBScript Regular Expressions 5.5
Dim RE As RegExp
Dim matches As MatchCollection
Set RE = CreateObject("vbscript.regexp")
With RE
.MultiLine = False
.Global = True
.IgnoreCase = True
'## Use this RegEx pattern to parse the date & timestamps:
.Pattern = "(19|20)\d\d[-](0[1-9]|1[012])[-](0[1-9]|[12][0-9]|3[01])[ ]\d\d?:\d\d:\d\d"
End With
Set matches = RE.Execute(strLine)
Set SplitLine = matches
End Function
EDIT
I tested my original answer code using the URL:
http://www.bmreports.com/bsp/BMRSSystemData.php?pT=DDAD&zT=N&dT=NRT#saveasCSV
But this method does not seem to work, for this site. The ele.Click doesn't seem to initiate the download, it just opens the data tabular on the webpage. To download, you need to do the right-click/save-as. If you have gotten that far (as I suspect, based on the subroutines you are calling, but for which you did not provide the code), then you can probably use the Win API to get the HWND of the Save dialog and possibly automate that event. Santosh provides some information on that:
VBA - Go to website and download file from save prompt
Here is also a good resource that should help solve your problem:
http://social.msdn.microsoft.com/Forums/en-US/beb6fa0e-fbc8-49df-9f2e-30f85d941fad/download-file-from-ie-with-vba
Original Answer
If you are able to determine the URL of the CSV then you can use this subroutine to open a connection to the CSV data and import it directly to the workbook. You may need to automate a text-to-columns operation on the imported data, but that can easily be replicated with the macro recorder. I put an example of this in the Test() subroutine below.
You could easily modify this to add the QueryTables in a new workbook, and then automate the SaveAs method on that workbook to save the file as a CSV.
This example uses a known URL for Yahoo Finance, Ford Motor Company, and will add a QueryTables with the CSV data in cell A1 of the active worksheet. This can be modified pretty easily to put it in another sheet, another workbook, etc.
Sub Test()
Dim MyURL as String
MyURL = "http://ichart.finance.yahoo.com/table.csv?s=GM&a0&b=1&c2010&d=05&e=20&f=2013&g=d&ignore=.csv"
OpenURL MyURL
'Explode the CSV data:
Range("A:A").TextToColumns Destination:=Range("A1"), DataType:=xlDelimited, _
TextQualifier:=xlDoubleQuote, ConsecutiveDelimiter:=False, Tab:=False, _
Semicolon:=False, Comma:=True, Space:=False, Other:=False, FieldInfo _
:=Array(Array(1, 3), Array(2, 1), Array(3, 1), Array(4, 1), Array(5, 1), Array(6, 1), _
Array(7, 1)), TrailingMinusNumbers:=True
End Sub
Private Sub OpenURL(fullURL As String)
'This opens the CSV in querytables connection.
On Error GoTo ErrOpenURL
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;" & fullURL, Destination:=Range("A1"))
.Name = fullURL
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = True
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingAll
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
ExitOpenURL:
Exit Sub 'if all goes well, you can exit
'Error handling...
ErrOpenURL:
Err.Clear
bCancel = True
Resume ExitOpenURL
End Sub