Converting csv data ([]string) to float64 with strconv then summing the data - csv
I am trying to sum data from a CSV file I created from a previous function. Here is a snippet of the file:
datetime,open,high,low,close,volume
2020-11-09 00:00,69.58,137.45,69.00,100.00,273517274.00
2020-11-10 00:00,104.65,128.80,101.75,107.00,141284399.00
2020-11-11 00:00,109.00,114.45,96.76,98.42,96648953.00
2020-11-12 00:00,95.98,106.60,89.15,90.00,149794913.00
[For context: this is historical price data for Rolls-Royce Holdings stock price from Yahoo finance. I plan to use up to 200 rows].
The problem I am facing is converting the []string data from the CSV to float64. The ParseFloat() function is trying to convert my headings and obviously can't as it is 'invalid syntax'. Here is the error code:
Error converting string: strconv.ParseFloat: parsing "open": invalid syntaxError converting string: strconv.ParseFloat: parsing "high": invalid syntaxError converting string: strconv.ParseFloat: parsing "low": invalid syntaxError converting string: strconv.ParseFloat: parsing "close": invalid syntaxError converting string: strconv.ParseFloat: parsing "volume": invalid syntax&{ 0 0 0 0 0}
My code is below for reference:
package main
import (
"encoding/csv"
"fmt"
"log"
"os"
"strconv"
)
const file = "./rr.csv"
// Data struct is the data from the csv file
type Data struct {
datetime string
open float64
high float64
low float64
close float64
volume float64
}
func readAmounts(r []string) (d *Data, err error) {
d = new(Data)
open := r[1]
d.open, err = strconv.ParseFloat(open, 64)
if err != nil {
fmt.Printf("Error converting string: %v", err)
}
high := r[2]
d.high, err = strconv.ParseFloat(high, 64)
if err != nil {
fmt.Printf("Error converting string: %v", err)
}
low := r[3]
d.low, err = strconv.ParseFloat(low, 64)
if err != nil {
fmt.Printf("Error converting string: %v", err)
}
close := r[4]
d.close, err = strconv.ParseFloat(close, 64)
if err != nil {
fmt.Printf("Error converting string: %v", err)
}
volume := r[5]
d.volume, err = strconv.ParseFloat(volume, 64)
if err != nil {
fmt.Printf("Error converting string: %v", err)
}
return d, nil
}
func main() {
csvFile, err := os.Open(file)
if err != nil {
log.Fatal(err)
}
r := csv.NewReader(csvFile)
lines, err := r.Read()
if err != nil {
log.Fatal(err)
}
data, err := readAmounts(lines)
if err != nil {
fmt.Printf("Error reading file: %v", err)
}
fmt.Println(data)
}
I am just printing the data to see if the ParseFloat() works and then I'll create a function to sum the columns.
So, what I'm asking is; how do I ignore the header line and just read through the numerical lines.
FYI: I've read other answers (eg: FieldsPerRecord) but they haven't worked for my specific problem, as I am then trying to sum the whole columns afterwards.
You can load the file into a CSV, then process row by row the file:
package main
import (
"bytes"
"encoding/csv"
"fmt"
"io/ioutil"
"strconv"
)
const file = "./data.csv"
// Data struct is the data from the csv file
type Data struct {
datetime string
open float64
high float64
low float64
close float64
volume float64
}
func main() {
f, err := ioutil.ReadFile(file)
if err != nil {
panic(err)
}
rawData, err := readCsv(f, ',')
if err != nil {
panic(err)
}
amounts, err := readAmounts(rawData[1:])
if err != nil {
panic(err)
}
fmt.Printf("%+v\n", amounts)
}
func readAmounts(r [][]string) ([]Data, error) {
var d []Data = make([]Data, len(r))
var err error
for i, row := range r {
d[i].datetime = row[0]
if err != nil {
fmt.Printf("Error converting string: %v", err)
}
d[i].open, err = strconv.ParseFloat(row[1], 64)
if err != nil {
fmt.Printf("Error converting string: %v", err)
}
d[i].high, err = strconv.ParseFloat(row[2], 64)
if err != nil {
fmt.Printf("Error converting string: %v", err)
}
d[i].low, err = strconv.ParseFloat(row[3], 64)
if err != nil {
fmt.Printf("Error converting string: %v", err)
}
d[i].close, err = strconv.ParseFloat(row[4], 64)
if err != nil {
fmt.Printf("Error converting string: %v", err)
}
d[i].volume, err = strconv.ParseFloat(row[5], 64)
if err != nil {
fmt.Printf("Error converting string: %v", err)
}
}
return d, nil
}
func readCsv(data []byte, separator rune) ([][]string, error) {
csvReader := csv.NewReader(bytes.NewReader(data))
csvReader.Comma = separator
lines, err := csvReader.ReadAll()
if err != nil {
return nil, err
}
return lines, nil
}
Example of output:
[{datetime:2020-11-09 00:00 open:69.58 high:137.45 low:69 close:100 volume:2.73517274e+08} {datetime:2020-11-10 00:00 open:104.65 high:128.8 low:101.75 close:107 volume:1.41284399e+08} {datetime:2020-11-11 00:00 open:109 high:114.45 low:96.76 close:98.42 volume:9.6648953e+07} {datetime:2020-11-12 00:00 open:95.
98 high:106.6 low:89.15 close:90 volume:1.49794913e+08}]
NOTE:
You can find some example of code that work using the CSV library, you can view the following repositories: https://github.com/alessiosavi/GoSFTPtoS3
I've commented out the program so that's easy to understand. Basic idea is to ignore the header. Also, as you're indexing and getting the fields of the record; it's better to put a check for the number of fields that are present in the record (FieldsPerRecord).
package main
import (
"encoding/csv"
"errors"
"fmt"
"io"
"log"
"os"
"strconv"
)
// file stores the filepath
const file = "./rr.csv"
// Data store metadata
type Data struct {
datetime string
open float64
high float64
low float64
close float64
volume float64
}
// s2f converts string to float64
func s2f(str string) (float64, error) {
f, err := strconv.ParseFloat(str, 64)
if err != nil {
return 0, fmt.Errorf("Error converting string \"%v\" to float", err)
}
return f, nil
}
// ReadAmounts processes the fields from the record and stores them in Data
func ReadAmounts(r []string) (*Data, error) {
var (
dt = r[0]
open = r[1]
high = r[2]
low = r[3]
close = r[4]
volume = r[5]
d = new(Data)
err error
)
d.datetime = dt
d.open, err = s2f(open)
if err != nil {
return nil, err
}
d.high, err = s2f(high)
if err != nil {
return nil, err
}
d.low, err = s2f(low)
if err != nil {
return nil, err
}
d.close, err = s2f(close)
if err != nil {
return nil, err
}
d.volume, err = s2f(volume)
if err != nil {
return nil, err
}
return d, nil
}
func main() {
// Open the file
file, err := os.Open(file)
if err != nil {
log.Fatalln(err)
}
// CSV Reader
r := csv.NewReader(file)
// Set Options for the reader
{
r.Comma = ',' // Delimiter
r.TrimLeadingSpace = true // Trim the leading spaces
r.FieldsPerRecord = 0 // Rows should have same number of columns as header
r.ReuseRecord = true // Reuse the same backing array (Efficient)
}
// Alternatively, r.ReadAll() could be also used and slicing it using [1:] ignores
// the header as well.
// Ignore header
_, _ = r.Read()
for {
// Read record (one by one)
record, err := r.Read()
if err != nil {
// Exit out. Done!
if errors.Is(err, io.EOF) {
break
}
// Log and continue
log.Printf("Error reading record: %v\n", err)
continue
}
// Process
data, err := ReadAmounts(record)
if err != nil {
// Log and continue
fmt.Printf("Error reading record: %v\n", err)
continue
}
// Print the filled Data struct
fmt.Printf("Record: %+v\n", *data)
}
}
Output:
Record: {datetime:2020-11-09 00:00 open:69.58 high:137.45 low:69 close:100 volume:2.73517274e+08}
Record: {datetime:2020-11-10 00:00 open:104.65 high:128.8 low:101.75 close:107 volume:1.41284399e+08}
Record: {datetime:2020-11-11 00:00 open:109 high:114.45 low:96.76 close:98.42 volume:9.6648953e+07}
Record: {datetime:2020-11-12 00:00 open:95.98 high:106.6 low:89.15 close:90 volume:1.49794913e+08}
Some different options:
Skip parsing the first line. This assumes every file starts with a header.
Skip lines that have parsing errors. The easiest method, but hard to debug when things go wrong.
If the first line has parsing errors, skip it, because it is probably a header row.
On a side note, you should handle errors properly in your code, which you are not currently doing.
Related
How can I use AES encryption on top of JSON marshalling using Go?
I tried to use the following tutorial: https://golangdocs.com/aes-encryption-decryption-in-golang In order to encrypt/decrypt text using AES256 with Go, It seems to work with plain strings, but not with JSON encoded structure. I don't understand why, because I thought JSON encoded data were strings as well. The part of the code dealing with plain strings is commented with Using trings. // Using strings pt := "This is a secret" c := EncryptAES([]byte(key), []byte(pt)) fmt.Printf("Initial string: %#v\n", pt) fmt.Printf("Coded: %v\n", c) decoded := DecryptAES([]byte(key), c) fmt.Printf("Decoded: %s\n", decoded) The part of the code after the comment Using JSON strings is the part which doesn't seem to word as expected. // Using JSON strings p2 := []record{{Name: "John", Age: 20}, {Name: "Jane", Age: 25}} m2, _ := json.Marshal(p2) fmt.Printf("m2 = %s\n", string(m2)) fmt.Printf("m2 = %#v\n", string(m2)) coded := EncryptAES([]byte(key), m2) decoded = DecryptAES([]byte(key), coded) fmt.Printf("Decoded: %s\n", decoded) What am I doing wrong? I'm using Go: go version go1.18 darwin/arm64 package main import ( "crypto/aes" "encoding/json" "fmt" ) func CheckError(err error) { if err != nil { panic(err) } } type record struct { Name string `json:"first_name"` Age int `json:"age"` } func main() { // cipher key key := "thisis32bitlongpassphraseimusing" fmt.Printf("len of key %d\n", len(key)) // Using strings pt := "This is a secret" c := EncryptAES([]byte(key), []byte(pt)) fmt.Printf("Initial string: %#v\n", pt) fmt.Printf("Coded: %v\n", c) decoded := DecryptAES([]byte(key), c) fmt.Printf("Decoded: %s\n", decoded) // Using JSON strings p2 := []record{{Name: "John", Age: 20}, {Name: "Jane", Age: 25}} m2, _ := json.Marshal(p2) fmt.Printf("m2 = %s\n", string(m2)) fmt.Printf("m2 = %#v\n", string(m2)) coded := EncryptAES([]byte(key), m2) decoded = DecryptAES([]byte(key), coded) fmt.Printf("Decoded: %s\n", decoded) } func EncryptAES(key []byte, plaintext []byte) []byte { c, err := aes.NewCipher(key) CheckError(err) out := make([]byte, len(plaintext)) c.Encrypt(out, []byte(plaintext)) return out } func DecryptAES(key []byte, ct []byte) []byte { c, err := aes.NewCipher(key) CheckError(err) pt := make([]byte, len(ct)) c.Decrypt(pt, ct) return pt }
Here is a working implementation of the encryptFile and decryptFile functions: (Based on: https://medium.com/#mertkimyonsen/encrypt-a-file-using-go-f1fe3bc7c635) func encryptFile(key []byte, plainText []byte) []byte { // Creating block of algorithm block, err := aes.NewCipher(key) if err != nil { log.Fatalf("cipher err: %v", err.Error()) } // Creating GCM mode gcm, err := cipher.NewGCM(block) if err != nil { log.Fatalf("cipher GCM err: %v", err.Error()) } // Generating random nonce nonce := make([]byte, gcm.NonceSize()) if _, err := io.ReadFull(rand.Reader, nonce); err != nil { log.Fatalf("nonce err: %v", err.Error()) } // Decrypt file cipherText := gcm.Seal(nonce, nonce, plainText, nil) return cipherText } func decryptFile(key []byte, cipherText []byte) []byte { // Creating block of algorithm block, err := aes.NewCipher(key) if err != nil { log.Fatalf("cipher err: %v", err.Error()) } // Creating GCM mode gcm, err := cipher.NewGCM(block) if err != nil { log.Fatalf("cipher GCM err: %v", err.Error()) } // Deattached nonce and decrypt nonce := cipherText[:gcm.NonceSize()] cipherText = cipherText[gcm.NonceSize():] plainText, err := gcm.Open(nil, nonce, cipherText, nil) if err != nil { log.Fatalf("decrypt file err: %v", err.Error()) } return plainText }
Golang Read JSON from S3 into struct in memory
I have a JSON file in S3 that takes the format of the following struct: type StockInfo []struct { Ticker string `json:"ticker"` BoughtPrice string `json:"boughtPrice"` NumberOfShares string `json:"numberOfShares"` } and I want to read it into a struct value from S3. In python the code would look something like this: import boto3 import json s3 = boto3.client('s3', 'us-east-1') obj = s3.get_object(Bucket=os.environ["BucketName"], Key=os.environ["Key"]) fileContents = obj['Body'].read().decode('utf-8') json_content = json.loads(fileContents) However I'm kinda stuck on how to make this happen in Go. I've gotten this far: package main import ( "archive/tar" "bytes" "fmt" "log" "os" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/s3" "github.com/aws/aws-sdk-go/service/s3/s3manager" "github.com/joho/godotenv" ) type StockInfo []struct { Ticker string `json:"ticker"` BoughtPrice string `json:"boughtPrice"` NumberOfShares string `json:"numberOfShares"` } func init() { // loads values from .env into the system if err := godotenv.Load(); err != nil { log.Print("No .env file found") } return } func main() { // Store the PATH environment variable in a variable sess, err := session.NewSession(&aws.Config{ Region: aws.String("us-east-1")}, ) if err != nil { panic(err) } s3Client := s3.New(sess) bucket := "ian-test-bucket-go-python" key := "StockInfo.json" requestInput := &s3.GetObjectInput{ Bucket: aws.String(bucket), Key: aws.String(key), } result, err := s3Client.GetObject(requestInput) if err != nil { fmt.Println(err) } fmt.Println(result) which returns to me the body/object buffer, but im not sure how to read that into a string so I can marshal it into my struct. I found this code in a similar question: requestInput := &s3.GetObjectInput{ Bucket: aws.String(bucket), Key: aws.String(key), } buf := new(aws.WriteAtBuffer) numBytes, _ := *s3manager.Downloader.Download(buf, requestInput) tr := tar.NewReader(bytes.NewReader(buf.Bytes())) but I get the following errors: not enough arguments in call to method expression s3manager.Downloader.Download have (*aws.WriteAtBuffer, *s3.GetObjectInput) want (s3manager.Downloader, io.WriterAt, *s3.GetObjectInput, ...func(*s3manager.Downloader)) multiple-value s3manager.Downloader.Download() in single-value context Can anyone point me in the right direction? kinda frustrating how hard it seems to do this compared to python.
I was able to do it with the following code: requestInput := &s3.GetObjectInput{ Bucket: aws.String(bucket), Key: aws.String(key), } result, err := s3Client.GetObject(requestInput) if err != nil { fmt.Println(err) } defer result.Body.Close() body1, err := ioutil.ReadAll(result.Body) if err != nil { fmt.Println(err) } bodyString1 := fmt.Sprintf("%s", body1) var s3data StockInfo decoder := json.NewDecoder(strings.NewReader(bodyString1)) err = decoder.Decode(&s3data) if err != nil { fmt.Println("twas an error") } fmt.Println(s3data)
Alternative solution using json.Unmarshal besed on aws-sdk-go-v2 ... params := &s3.GetObjectInput{ Bucket: aws.String(s3Record.S3.Bucket.Name), Key: aws.String(s3Record.S3.Object.Key), } result, _ := client.GetObject(context.TODO(), params) if err != nil { panic(err) } defer result.Body.Close() // capture all bytes from upload b, err := ioutil.ReadAll(result.Body) if err != nil { panic(err) } var temp StockInfo if err = json.Unmarshal(b, &temp); err != nil { panic(err) } ftm.Println("res: ",b)
How to compare JSON with varying order?
I'm attempting to implement testing with golden files, however, the JSON my function generates varies in order but maintains the same values. I've implemented the comparison method used here: How to compare two JSON requests? But it's order dependent. And as stated here by brad: JSON objects are unordered, just like Go maps. If you're depending on the order that a specific implementation serializes your JSON objects in, you have a bug. I've written some sample code that simulated my predicament: package main import ( "bufio" "encoding/json" "fmt" "io/ioutil" "math/rand" "os" "reflect" "time" ) type example struct { Name string Earnings float64 } func main() { slice := GetSlice() gfile, err := ioutil.ReadFile("testdata/example.golden") if err != nil { fmt.Println(err) fmt.Println("Failed reading golden file") } testJSON, err := json.Marshal(slice) if err != nil { fmt.Println(err) fmt.Println("Error marshalling slice") } equal, err := JSONBytesEqual(gfile, testJSON) if err != nil { fmt.Println(err) fmt.Println("Error comparing JSON") } if !equal { fmt.Println("Restults don't match JSON") } else { fmt.Println("Success!") } } func GetSlice() []example { t := []example{ example{"Penny", 50.0}, example{"Sheldon", 70.0}, example{"Raj", 20.0}, example{"Bernadette", 200.0}, example{"Amy", 250.0}, example{"Howard", 1.0}} rand.Seed(time.Now().UnixNano()) rand.Shuffle(len(t), func(i, j int) { t[i], t[j] = t[j], t[i] }) return t } func JSONBytesEqual(a, b []byte) (bool, error) { var j, j2 interface{} if err := json.Unmarshal(a, &j); err != nil { return false, err } if err := json.Unmarshal(b, &j2); err != nil { return false, err } return reflect.DeepEqual(j2, j), nil } func WriteTestSliceToFile(arr []example, filename string) { file, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { fmt.Println("failed creating file: %s", err) } datawriter := bufio.NewWriter(file) marshalledStruct, err := json.Marshal(arr) if err != nil { fmt.Println("Error marshalling json") fmt.Println(err) } _, err = datawriter.Write(marshalledStruct) if err != nil { fmt.Println("Error writing to file") fmt.Println(err) } datawriter.Flush() file.Close() }
JSON arrays are ordered. The json.Marshal function preserves order when encoding a slice to a JSON array. JSON objects are not ordered. The json.Marshal function writes object members in sorted key order as described in the documentation. The bradfitz comment JSON object ordering is not relevant to this question: The application in the question is working with a JSON array, not a JSON object. The package was updated to write object fields in sorted key order a couple of years after Brad's comment. To compare slices while ignoring order, sort the two slices before comparing. This can be done before encoding to JSON or after decoding from JSON. sort.Slice(slice, func(i, j int) bool { if slice[i].Name != slice[j].Name { return slice[i].Name < slice[j].Name } return slice[i].Earnings < slice[j].Earnings })
For unit testing, you could use assert.JSONEq from Testify. If you need to do it programatically, you could follow the code of the JSONEq function. https://github.com/stretchr/testify/blob/master/assert/assertions.go#L1551
golang json array unmarshal into type struct
What is causing this to not break apart json? Anyone have an idea on how type is wrong? With error of: {"Rates":[{"Symbol":"EURAUD","Bid":"1.45492","Ask":"1.45608","Spread":"11.60","ProductType":"1",},{"Symbol":"Copper","Bid":"2.6068","Ask":"2.6088","Spread":"2.00","ProductType":"3",},{"Symbol":"AUDNZD","Bid":"1.08999","Ask":"1.09177","Spread":"17.80","ProductType":"1",},{"Symbol":"EURSEK","Bid":"9.63786","Ask":"9.65569","Spread":"178.30","ProductType":"1",},{"Symbol":"CADJPY","Bid":"81.629","Ask":"81.708","Spread":"7.90","ProductType":"1",},{"Symbol":"USDCHF","Bid":"0.99463","Ask":"0.99527","Spread":"6.40","ProductType":"1",},{"Symbol":"USDCNH","Bid":"6.8973","Ask":"6.8993","Spread":"20.00","ProductType":"1",},{"Symbol":"US30","Bid":"20950.00","Ask":"20952.00","Spread":"2.00","ProductType":"2",},{"Symbol":"XAGUSD","Bid":"17.202","Ask":"17.25","Spread":"4.80","ProductType":"5",},{"Symbol":"USDSEK","Bid":"8.84794","Ask":"8.85542","Spread":"74.80","ProductType":"1",},{"Symbol":"AUDCHF","Bid":"0.74417","Ask":"0.74588","Spread":"17.10","ProductType":"1",},{"Symbol":"GER30","Bid":"12431.05","Ask":"12433.45","Spread":"2.40","ProductType":"2",},{"Symbol":"USOil","Bid":"49.16","Ask":"49.21","Spread":"5.00","ProductType":"3",},{"Symbol":"GBPNZD","Bid":"1.88546","Ask":"1.88762","Spread":"21.60","ProductType":"1",},{"Symbol":"EURCAD","Bid":"1.48748","Ask":"1.48893","Spread":"14.50","ProductType":"1",},{"Symbol":"EURUSD","Bid":"1.08977","Ask":"1.08997","Spread":"2.00","ProductType":"1",},{"Symbol":"AUS200","Bid":"5922.00","Ask":"5924.00","Spread":"2.00","ProductType":"2",},{"Symbol":"EURJPY","Bid":"121.512","Ask":"121.57","Spread":"5.80","ProductType":"1",},{"Symbol":"EURGBP","Bid":"0.84132","Ask":"0.84208","Spread":"7.60","ProductType":"1",},{"Symbol":"EURNOK","Bid":"9.34136","Ask":"9.36364","Spread":"222.80","ProductType":"1",},{"Symbol":"USDCAD","Bid":"1.36524","Ask":"1.36588","Spread":"6.40","ProductType":"1",},{"Symbol":"GBPCHF","Bid":"1.28753","Ask":"1.28922","Spread":"16.90","ProductType":"1",},{"Symbol":"GBPAUD","Bid":"1.72838","Ask":"1.7303","Spread":"19.20","ProductType":"1",},{"Symbol":"USDJPY","Bid":"111.51","Ask":"111.537","Spread":"2.70","ProductType":"1",},{"Symbol":"USDNOK","Bid":"8.57607","Ask":"8.58684","Spread":"107.70","ProductType":"1",},{"Symbol":"AUDCAD","Bid":"1.02173","Ask":"1.02347","Spread":"17.40","ProductType":"1",},{"Symbol":"FRA40","Bid":"5259.60","Ask":"5267.30","Spread":"7.70","ProductType":"2",},{"Symbol":"AUDUSD","Bid":"0.74858","Ask":"0.74899","Spread":"4.10","ProductType":"1",},{"Symbol":"USDHKD","Bid":"7.77769","Ask":"7.77956","Spread":"18.70","ProductType":"1",},{"Symbol":"NZDCHF","Bid":"0.68192","Ask":"0.68406","Spread":"21.40","ProductType":"1",},{"Symbol":"EURTRY","Bid":"3.86851","Ask":"3.87478","Spread":"62.70","ProductType":"1",},{"Symbol":"AUDJPY","Bid":"83.469","Ask":"83.543","Spread":"7.40","ProductType":"1",},{"Symbol":"USDZAR","Bid":"13.3464","Ask":"13.3941","Spread":"477.00","ProductType":"1",},{"Symbol":"Bund","Bid":"161.78","Ask":"161.81","Spread":"3.00","ProductType":"4",},{"Symbol":"USDMXN","Bid":"18.81249","Ask":"18.83178","Spread":"192.90","ProductType":"1",},{"Symbol":"USDTRY","Bid":"3.54925","Ask":"3.5536","Spread":"43.50","ProductType":"1",},{"Symbol":"USDOLLAR","Bid":"12232.00","Ask":"12237.00","Spread":"5.00","ProductType":"7",},{"Symbol":"JPN225","Bid":"19195.00","Ask":"19205.00","Spread":"10.00","ProductType":"2",},{"Symbol":"UK100","Bid":"7197.80","Ask":"7198.90","Spread":"1.10","ProductType":"2",},{"Symbol":"HKG33","Bid":"24650.00","Ask":"24655.00","Spread":"5.00","ProductType":"2",},{"Symbol":"CADCHF","Bid":"0.72748","Ask":"0.72979","Spread":"23.10","ProductType":"1",},{"Symbol":"NAS100","Bid":"5582.80","Ask":"5583.80","Spread":"1.00","ProductType":"2",},{"Symbol":"NGAS","Bid":"3.2645","Ask":"3.2755","Spread":"11.00","ProductType":"3",},{"Symbol":"ZARJPY","Bid":"8.323","Ask":"8.361","Spread":"3.80","ProductType":"1",},{"Symbol":"GBPCAD","Bid":"1.76724","Ask":"1.76912","Spread":"18.80","ProductType":"1",},{"Symbol":"ESP35","Bid":"10712.00","Ask":"10720.00","Spread":"8.00","ProductType":"2",},{"Symbol":"GBPUSD","Bid":"1.29452","Ask":"1.29527","Spread":"7.50","ProductType":"1",},{"Symbol":"SPX500","Bid":"2384.18","Ask":"2384.68","Spread":"5.00","ProductType":"2",},{"Symbol":"GBPJPY","Bid":"144.336","Ask":"144.448","Spread":"11.20","ProductType":"1",},{"Symbol":"EUSTX50","Bid":"3554.00","Ask":"3555.00","Spread":"1.00","ProductType":"2",},{"Symbol":"TRYJPY","Bid":"31.378","Ask":"31.44","Spread":"6.20","ProductType":"1",},{"Symbol":"NZDCAD","Bid":"0.93642","Ask":"0.93862","Spread":"22.00","ProductType":"1",},{"Symbol":"EURNZD","Bid":"1.58644","Ask":"1.58916","Spread":"27.20","ProductType":"1",},{"Symbol":"XAUUSD","Bid":"1267.79","Ask":"1268.26","Spread":"47.00","ProductType":"5",},{"Symbol":"NZDUSD","Bid":"0.68587","Ask":"0.68692","Spread":"10.50","ProductType":"1",},{"Symbol":"NZDJPY","Bid":"76.489","Ask":"76.607","Spread":"11.80","ProductType":"1",},{"Symbol":"UKOil","Bid":"51.84","Ask":"51.89","Spread":"5.00","ProductType":"3",},{"Symbol":"CHFJPY","Bid":"112.02","Ask":"112.148","Spread":"12.80","ProductType":"1",},{"Symbol":"EURCHF","Bid":"1.08416","Ask":"1.08459","Spread":"4.30","ProductType":"1",}]} {"Rates":[{"Symbol":"EURAUD","Bid":"1.45492","Ask":"1.45608","Spread":"11.60","ProductType":"1"},{"Symbol":"Copper","Bid":"2.6068","Ask":"2.6088","Spread":"2.00","ProductType":"3"},{"Symbol":"AUDNZD","Bid":"1.08999","Ask":"1.09177","Spread":"17.80","ProductType":"1"},{"Symbol":"EURSEK","Bid":"9.63786","Ask":"9.65569","Spread":"178.30","ProductType":"1"},{"Symbol":"CADJPY","Bid":"81.629","Ask":"81.708","Spread":"7.90","ProductType":"1"},{"Symbol":"USDCHF","Bid":"0.99463","Ask":"0.99527","Spread":"6.40","ProductType":"1"},{"Symbol":"USDCNH","Bid":"6.8973","Ask":"6.8993","Spread":"20.00","ProductType":"1"},{"Symbol":"US30","Bid":"20950.00","Ask":"20952.00","Spread":"2.00","ProductType":"2"},{"Symbol":"XAGUSD","Bid":"17.202","Ask":"17.25","Spread":"4.80","ProductType":"5"},{"Symbol":"USDSEK","Bid":"8.84794","Ask":"8.85542","Spread":"74.80","ProductType":"1"},{"Symbol":"AUDCHF","Bid":"0.74417","Ask":"0.74588","Spread":"17.10","ProductType":"1"},{"Symbol":"GER30","Bid":"12431.05","Ask":"12433.45","Spread":"2.40","ProductType":"2"},{"Symbol":"USOil","Bid":"49.16","Ask":"49.21","Spread":"5.00","ProductType":"3"},{"Symbol":"GBPNZD","Bid":"1.88546","Ask":"1.88762","Spread":"21.60","ProductType":"1"},{"Symbol":"EURCAD","Bid":"1.48748","Ask":"1.48893","Spread":"14.50","ProductType":"1"},{"Symbol":"EURUSD","Bid":"1.08977","Ask":"1.08997","Spread":"2.00","ProductType":"1"},{"Symbol":"AUS200","Bid":"5922.00","Ask":"5924.00","Spread":"2.00","ProductType":"2"},{"Symbol":"EURJPY","Bid":"121.512","Ask":"121.57","Spread":"5.80","ProductType":"1"},{"Symbol":"EURGBP","Bid":"0.84132","Ask":"0.84208","Spread":"7.60","ProductType":"1"},{"Symbol":"EURNOK","Bid":"9.34136","Ask":"9.36364","Spread":"222.80","ProductType":"1"},{"Symbol":"USDCAD","Bid":"1.36524","Ask":"1.36588","Spread":"6.40","ProductType":"1"},{"Symbol":"GBPCHF","Bid":"1.28753","Ask":"1.28922","Spread":"16.90","ProductType":"1"},{"Symbol":"GBPAUD","Bid":"1.72838","Ask":"1.7303","Spread":"19.20","ProductType":"1"},{"Symbol":"USDJPY","Bid":"111.51","Ask":"111.537","Spread":"2.70","ProductType":"1"},{"Symbol":"USDNOK","Bid":"8.57607","Ask":"8.58684","Spread":"107.70","ProductType":"1"},{"Symbol":"AUDCAD","Bid":"1.02173","Ask":"1.02347","Spread":"17.40","ProductType":"1"},{"Symbol":"FRA40","Bid":"5259.60","Ask":"5267.30","Spread":"7.70","ProductType":"2"},{"Symbol":"AUDUSD","Bid":"0.74858","Ask":"0.74899","Spread":"4.10","ProductType":"1"},{"Symbol":"USDHKD","Bid":"7.77769","Ask":"7.77956","Spread":"18.70","ProductType":"1"},{"Symbol":"NZDCHF","Bid":"0.68192","Ask":"0.68406","Spread":"21.40","ProductType":"1"},{"Symbol":"EURTRY","Bid":"3.86851","Ask":"3.87478","Spread":"62.70","ProductType":"1"},{"Symbol":"AUDJPY","Bid":"83.469","Ask":"83.543","Spread":"7.40","ProductType":"1"},{"Symbol":"USDZAR","Bid":"13.3464","Ask":"13.3941","Spread":"477.00","ProductType":"1"},{"Symbol":"Bund","Bid":"161.78","Ask":"161.81","Spread":"3.00","ProductType":"4"},{"Symbol":"USDMXN","Bid":"18.81249","Ask":"18.83178","Spread":"192.90","ProductType":"1"},{"Symbol":"USDTRY","Bid":"3.54925","Ask":"3.5536","Spread":"43.50","ProductType":"1"},{"Symbol":"USDOLLAR","Bid":"12232.00","Ask":"12237.00","Spread":"5.00","ProductType":"7"},{"Symbol":"JPN225","Bid":"19195.00","Ask":"19205.00","Spread":"10.00","ProductType":"2"},{"Symbol":"UK100","Bid":"7197.80","Ask":"7198.90","Spread":"1.10","ProductType":"2"},{"Symbol":"HKG33","Bid":"24650.00","Ask":"24655.00","Spread":"5.00","ProductType":"2"},{"Symbol":"CADCHF","Bid":"0.72748","Ask":"0.72979","Spread":"23.10","ProductType":"1"},{"Symbol":"NAS100","Bid":"5582.80","Ask":"5583.80","Spread":"1.00","ProductType":"2"},{"Symbol":"NGAS","Bid":"3.2645","Ask":"3.2755","Spread":"11.00","ProductType":"3"},{"Symbol":"ZARJPY","Bid":"8.323","Ask":"8.361","Spread":"3.80","ProductType":"1"},{"Symbol":"GBPCAD","Bid":"1.76724","Ask":"1.76912","Spread":"18.80","ProductType":"1"},{"Symbol":"ESP35","Bid":"10712.00","Ask":"10720.00","Spread":"8.00","ProductType":"2"},{"Symbol":"GBPUSD","Bid":"1.29452","Ask":"1.29527","Spread":"7.50","ProductType":"1"},{"Symbol":"SPX500","Bid":"2384.18","Ask":"2384.68","Spread":"5.00","ProductType":"2"},{"Symbol":"GBPJPY","Bid":"144.336","Ask":"144.448","Spread":"11.20","ProductType":"1"},{"Symbol":"EUSTX50","Bid":"3554.00","Ask":"3555.00","Spread":"1.00","ProductType":"2"},{"Symbol":"TRYJPY","Bid":"31.378","Ask":"31.44","Spread":"6.20","ProductType":"1"},{"Symbol":"NZDCAD","Bid":"0.93642","Ask":"0.93862","Spread":"22.00","ProductType":"1"},{"Symbol":"EURNZD","Bid":"1.58644","Ask":"1.58916","Spread":"27.20","ProductType":"1"},{"Symbol":"XAUUSD","Bid":"1267.79","Ask":"1268.26","Spread":"47.00","ProductType":"5"},{"Symbol":"NZDUSD","Bid":"0.68587","Ask":"0.68692","Spread":"10.50","ProductType":"1"},{"Symbol":"NZDJPY","Bid":"76.489","Ask":"76.607","Spread":"11.80","ProductType":"1"},{"Symbol":"UKOil","Bid":"51.84","Ask":"51.89","Spread":"5.00","ProductType":"3"},{"Symbol":"CHFJPY","Bid":"112.02","Ask":"112.148","Spread":"12.80","ProductType":"1"},{"Symbol":"EURCHF","Bid":"1.08416","Ask":"1.08459","Spread":"4.30","ProductType":"1"}]} panic: json: cannot unmarshal string into Go value of type main.MsgRatesArray goroutine 1 [running]: main.main() /tmp/test.go:50 +0x52c With this code: package main import ( "log" "fmt" "net/http" "bytes" "io/ioutil" "strings" "github.com/pquerna/ffjson/ffjson" ) type MsgRatesArray struct { RateQuote []MsgRateQuoteJson `json:"Rates"` } type MsgRateQuoteJson struct { SymbolName string `json:"Symbol"` SymbolBid int64 `json:"Bid"` SymbolAsk int64 `json:"Ask"` SymbolSpread int64 `json:"Spread"` SymbolPT string `json:"ProductType"` } var respBytes []byte func main() { var msg MsgRatesArray response,err := http.Get("https://ratesjson.fxcm.com/DataDisplayer") if err != nil { log.Fatal(err) } defer response.Body.Close() respBytes, err := ioutil.ReadAll(response.Body) //Get bad JSON into string jsonBytes := respBytes[bytes.Index(respBytes, []byte("{")):bytes.LastIndex(respBytes, []byte("}"))+1] jsonString := string(jsonBytes) fmt.Println(jsonString) // Use a positive number to indicate max replacement count to fix bad JSON string so we can remove comma in JSON ARRAY. result := strings.Replace(jsonString, "\",}", "\"}", -1) fmt.Println(result) // Turn GOOD JSON string back to JSON BYTES (BIN) jsonBytes2, err := ffjson.Marshal(result) if err != nil { panic(err) } // Parse JSON ! err = ffjson.Unmarshal(jsonBytes2, &msg) if err != nil { panic(err) } } What is wrong with type for json array?
In the returned json Bid, Ask, and Spread are json strings not integers, so change your type definition to this: type MsgRateQuoteJson struct { SymbolName string `json:"Symbol"` SymbolBid string `json:"Bid"` SymbolAsk string `json:"Ask"` SymbolSpread string `json:"Spread"` SymbolPT string `json:"ProductType"` } And marshaling a json string to get json bytes is not the correct way, just convert the string to a byte slice like this: jsonBytes2 := []byte(result) ... and you're good to go: // Parse JSON ! err = ffjson.Unmarshal(jsonBytes2, &msg) if err != nil { panic(err) } Edit: If you want to convert those strings into specific types during the json unmarshaling, you can do so by defining an UnmarshalJSON method on the *MsgRateQuoteJson type, plus with the help of the strconv package, like this: type MsgRateQuoteJson struct { SymbolName string `json:"Symbol"` SymbolBid float64 `json:"Bid"` SymbolAsk float64 `json:"Ask"` SymbolSpread float64 `json:"Spread"` SymbolPT int64 `json:"ProductType"` } func (msg *MsgRateQuoteJson) UnmarshalJSON(data []byte) (err error) { m := map[string]string{} if err = ffjson.Unmarshal(data, &m); err != nil { return err } msg.SymbolName = m["Symbol"] if msg.SymbolBid, err = strconv.ParseFloat(m["Bid"], 64); err != nil { return err } if msg.SymbolAsk, err = strconv.ParseFloat(m["Ask"], 64); err != nil { return err } if msg.SymbolSpread, err = strconv.ParseFloat(m["Spread"], 64); err != nil { return err } if msg.SymbolPT, err = strconv.ParseInt(m["ProductType"], 10, 64); err != nil { return err } return nil }
Efficient read and write CSV in Go
The Go code below reads in a 10,000 record CSV (of timestamp times and float values), runs some operations on the data, and then writes the original values to another CSV along with an additional column for score. However it is terribly slow (i.e. hours, but most of that is calculateStuff()) and I'm curious if there are any inefficiencies in the CSV reading/writing I can take care of. package main import ( "encoding/csv" "log" "os" "strconv" ) func ReadCSV(filepath string) ([][]string, error) { csvfile, err := os.Open(filepath) if err != nil { return nil, err } defer csvfile.Close() reader := csv.NewReader(csvfile) fields, err := reader.ReadAll() return fields, nil } func main() { // load data csv records, err := ReadCSV("./path/to/datafile.csv") if err != nil { log.Fatal(err) } // write results to a new csv outfile, err := os.Create("./where/to/write/resultsfile.csv")) if err != nil { log.Fatal("Unable to open output") } defer outfile.Close() writer := csv.NewWriter(outfile) for i, record := range records { time := record[0] value := record[1] // skip header row if i == 0 { writer.Write([]string{time, value, "score"}) continue } // get float values floatValue, err := strconv.ParseFloat(value, 64) if err != nil { log.Fatal("Record: %v, Error: %v", floatValue, err) } // calculate scores; THIS EXTERNAL METHOD CANNOT BE CHANGED score := calculateStuff(floatValue) valueString := strconv.FormatFloat(floatValue, 'f', 8, 64) scoreString := strconv.FormatFloat(prob, 'f', 8, 64) //fmt.Printf("Result: %v\n", []string{time, valueString, scoreString}) writer.Write([]string{time, valueString, scoreString}) } writer.Flush() } I'm looking for help making this CSV read/write template code as fast as possible. For the scope of this question we need not worry about the calculateStuff method.
You're loading the file in memory first then processing it, that can be slow with a big file. You need to loop and call .Read and process one line at a time. func processCSV(rc io.Reader) (ch chan []string) { ch = make(chan []string, 10) go func() { r := csv.NewReader(rc) if _, err := r.Read(); err != nil { //read header log.Fatal(err) } defer close(ch) for { rec, err := r.Read() if err != nil { if err == io.EOF { break } log.Fatal(err) } ch <- rec } }() return } playground //note it's roughly based on DaveC's comment.
This is essentially Dave C's answer from the comments sections: package main import ( "encoding/csv" "log" "os" "strconv" ) func main() { // setup reader csvIn, err := os.Open("./path/to/datafile.csv") if err != nil { log.Fatal(err) } r := csv.NewReader(csvIn) // setup writer csvOut, err := os.Create("./where/to/write/resultsfile.csv")) if err != nil { log.Fatal("Unable to open output") } w := csv.NewWriter(csvOut) defer csvOut.Close() // handle header rec, err := r.Read() if err != nil { log.Fatal(err) } rec = append(rec, "score") if err = w.Write(rec); err != nil { log.Fatal(err) } for { rec, err = r.Read() if err != nil { if err == io.EOF { break } log.Fatal(err) } // get float value value := rec[1] floatValue, err := strconv.ParseFloat(value, 64) if err != nil { log.Fatal("Record, error: %v, %v", value, err) } // calculate scores; THIS EXTERNAL METHOD CANNOT BE CHANGED score := calculateStuff(floatValue) scoreString := strconv.FormatFloat(score, 'f', 8, 64) rec = append(rec, scoreString) if err = w.Write(rec); err != nil { log.Fatal(err) } w.Flush() } } Note of course the logic is all jammed into main(), better would be to split it into several functions, but that's beyond the scope of this question.
encoding/csv is indeed very slow on big files, as it performs a lot of allocations. Since your format is so simple I recommend using strings.Split instead which is much faster. If even that is not fast enough you can consider implementing the parsing yourself using strings.IndexByte which is implemented in assembly: http://golang.org/src/strings/strings_decl.go?s=274:310#L1 Having said that, you should also reconsider using ReadAll if the file is larger than your memory.