Usually, result, err := func() is used.
When one of the variables is already initialized:
_, err := func()
var result string
result, err = func()
Doing:
result, err = func()
all_results += result // seems redundant and unneeded
How do you append results to one of them (result), and reset the other one?
// along the lines of this:
var result slice
// for loop {
result, _ += func() // combine this line
_, err = func() // with this line
Can you do:
result +=, err = func()
// or
result, err +=, = func()
// or
result, err += = func()
// or
result, err (+=, =) func() // ?
The language spec does not support different treatment for multiple return values.
However, it's very easy to do it with a helper function:
func foo() (int, error) {
return 1, nil
}
func main() {
var all int
add := func(result int, err error) error {
all += result
return err
}
if err := add(foo()); err != nil {
panic(err)
}
if err := add(foo()); err != nil {
panic(err)
}
if err := add(foo()); err != nil {
panic(err)
}
fmt.Println(all)
}
This will output 3 (try it on the Go Playground).
If you can move the error handling into the helper function, it can also look like this:
var all int
check := func(result int, err error) int {
if err != nil {
panic(err)
}
return result
}
all += check(foo())
all += check(foo())
all += check(foo())
fmt.Println(all)
This outputs the same, try this one on the Go Playground.
Another variant can be to do everything in the helper function:
var all int
handle := func(result int, err error) {
if err != nil {
panic(err)
}
all += result
}
handle(foo())
handle(foo())
handle(foo())
fmt.Println(all)
Try this one on the Go Playground.
See related: Multiple values in single-value context
Related
I need to get the number of json objects in a given file. The File contains an array of JSON objects. I observe that its taking approximately 150-180 seconds to count a file with 1 million objects. Is there a way I can optimize the below code to get the count faster?
func Count(file string) (int, error) {
f, err := os.Open(file)
if err != nil {
return -1, err
}
defer f.Close()
dec := json.NewDecoder(bufio.NewReader(f))
_, e := dec.Token()
if e != nil {
return -1, e
}
var count int
for dec.More() {
var tempMap map[string]interface{}
readErr := dec.Decode(&tempMap)
if readErr != nil {
return -1, readErr
}
tranCount++
}
return count, nil
}
Speed things up by counting start object delimiters instead of decoding to Go values.
Based on the code in the question, it looks like your goal is to count objects at the first level of nesting in the document. Here's code that does that:
func Count(r io.Reader) (int, error) {
dec := json.NewDecoder(r)
nest := 0
count := 0
for {
t, err := dec.Token()
if err == io.EOF {
break
}
if err != nil {
return -1, err
}
switch t {
case json.Delim('{'):
if nest == 1 {
count++
}
nest++
case json.Delim('}'):
nest--
}
}
return count, nil
}
If your goal is to count all objects, remove all uses of nest from the code above:
func Count(r io.Reader) (int, error) {
dec := json.NewDecoder(r)
count := 0
for {
t, err := dec.Token()
if err == io.EOF {
break
}
if err != nil {
return -1, err
}
switch t {
case json.Delim('{'):
count++
}
}
return count, nil
}
I'm working on a program that makes a query to MySQL, then for each row, changes something with that row and then update the row.
The problem is that sometimes when performing an update I get a deadlock, I'm not sure if it's because the query isn't releasing the lock by the time I update or if it's something else.
Example of what I'm doing:
const (
selectQuery = `select user_id, original_transaction_id, max(payment_id) as max_payment_id from Payment_Receipt
where auto_renew_status = 1 group by user_id, original_transaction_id having count(*) > 1`
updateQuery = `update Payment_Receipt set auto_renew_status = 0, changed_by = "payment_receipt_condenser",
changed_time = ? where user_id = ? and original_transaction_id = ? and payment_id != ? and auto_renew_status = 1`
)
mysql.go:
func New(db *sql.DB, driver string) (database.Database, error) {
sqlDB := sqlx.NewDb(db, driver)
if err := db.Ping(); err != nil {
return nil, errors.Wrap(err, "connecting to database")
}
selectStmt, err := sqlDB.Preparex(selectQuery)
if err != nil {
return nil, errors.Wrap(err, "preparing select query")
}
updateStmt, err := sqlDB.Preparex(updateQuery)
if err != nil {
return nil, errors.Wrap(err, "preparing update query")
}
return &mysql{
db: sqlDB,
selectStmt: selectStmt,
updateStmt: updateStmt,
}, nil
}
func (m *mysql) Query() (<- chan *database.Row, error) {
rowsChan := make(chan *database.Row)
rows, err := m.selectStmt.Queryx()
if err != nil {
return nil, errors.Wrap(err, "making query")
}
go func() {
defer rows.Close()
defer close(rowsChan)
for rows.Next() {
row := &database.Row{}
if err := rows.StructScan(row); err != nil {
log.WithError(err).WithField("user_id", row.UserID.Int32).Error("scanning row")
}
// change some of the data here
// and put into channel for worker to consume
rowsChan <- row
}
}()
return rowsChan, nil
}
func (m *mysql) Update(row *database.Row) error {
tx, err := m.db.Beginx()
if err != nil {
return errors.Wrap(err, "beginning transaction")
}
if _, err := tx.Stmtx(m.updateStmt).Exec(row.ChangedTime); err != nil {
return errors.Wrap(err, "executing update")
}
if err := tx.Commit(); err != nil {
return errors.Wrap(err, "committing transaction")
}
return nil
}
worker.go
func (w *worker) Run(wg *sync.WaitGroup) {
rowsChan, err := w.db.Query()
if err != nil {
log.WithError(err).Fatal("failed making query")
}
for i := 0; i < w.config.Count(); i++ {
wg.Add(1)
go func() {
defer wg.Done()
for row := range rowsChan {
if err := w.db.Update(row); err != nil {
log.WithError(err).WithField("user_id", row.UserID.Int32).Error("updating row")
}
}
}()
}
}
You could make the results (row) channel from a Query() buffered:
func (m *mysql) Query() (<- chan *database.Row, error) {
rowsChan := make(chan *database.Row, 1000) // <- band-aid fix
// ...
}
This will ensure that the row collector function can write multiple results without waiting for your worker go-routine to read the results. The query operation will complete (provided there are 1000 rows or less), and the update go-routine operations can begin their parallel work.
If this fixes things, then consider putting say an SQL limit on your queries (e.g. LIMIT 1000) to ensure you don't hit deadlock again (if 1000+ records is a real possibility).
Crafting "pagination" style queries to grab the next say 1000 rows, using RowID markers etc. to ensure full coverage of results - all while avoiding locking out any of your update operations.
I have JSON string like
"{\"a\": \"b\", \"a\":true,\"c\":[\"field_3 string 1\",\"field3 string2\"]}"
how to detect the duplicate attribute in this json string using Golang
Use the json.Decoder to walk through the JSON. When an object is found, walk through keys and values checking for duplicate keys.
func check(d *json.Decoder, path []string, dup func(path []string) error) error {
// Get next token from JSON
t, err := d.Token()
if err != nil {
return err
}
// Is it a delimiter?
delim, ok := t.(json.Delim)
// No, nothing more to check.
if !ok {
// scaler type, nothing to do
return nil
}
switch delim {
case '{':
keys := make(map[string]bool)
for d.More() {
// Get field key.
t, err := d.Token()
if err != nil {
return err
}
key := t.(string)
// Check for duplicates.
if keys[key] {
// Duplicate found. Call the application's dup function. The
// function can record the duplicate or return an error to stop
// the walk through the document.
if err := dup(append(path, key)); err != nil {
return err
}
}
keys[key] = true
// Check value.
if err := check(d, append(path, key), dup); err != nil {
return err
}
}
// consume trailing }
if _, err := d.Token(); err != nil {
return err
}
case '[':
i := 0
for d.More() {
if err := check(d, append(path, strconv.Itoa(i)), dup); err != nil {
return err
}
i++
}
// consume trailing ]
if _, err := d.Token(); err != nil {
return err
}
}
return nil
}
Here's how to call it:
func printDup(path []string) error {
fmt.Printf("Duplicate %s\n", strings.Join(path, "/"))
return nil
}
...
data := `{"a": "b", "a":true,"c":["field_3 string 1","field3 string2"], "d": {"e": 1, "e": 2}}`
if err := check(json.NewDecoder(strings.NewReader(data)), nil, printDup); err != nil {
log.Fatal(err)
}
The output is:
Duplicate a
Duplicate d/e
Run it on the Playground
Here's how to generate an error on the first duplicate key:
var ErrDuplicate = errors.New("duplicate")
func dupErr(path []string) error {
return ErrDuplicate
}
...
data := `{"a": "b", "a":true,"c":["field_3 string 1","field3 string2"], "d": {"e": 1, "e": 2}}`
err := check(json.NewDecoder(strings.NewReader(data)), nil, dupErr)
if err == ErrDuplicate {
fmt.Println("found a duplicate")
} else if err != nil {
// some other error
log.Fatal(err)
}
One that would probably work well would be to simply decode, reencode, then check the length of the new json against the old json:
https://play.golang.org/p/50P-x1fxCzp
package main
import (
"encoding/json"
"fmt"
)
func main() {
jsn := []byte("{\"a\": \"b\", \"a\":true,\"c\":[\"field_3 string 1\",\"field3 string2\"]}")
var m map[string]interface{}
err := json.Unmarshal(jsn, &m)
if err != nil {
panic(err)
}
l := len(jsn)
jsn, err = json.Marshal(m)
if err != nil {
panic(err)
}
if l != len(jsn) {
panic(fmt.Sprintf("%s: %d (%d)", "duplicate key", l, len(jsn)))
}
}
The right way to do it would be to re-implement the json.Decode function, and store a map of keys found, but the above should work (especially if you first stripped any spaces from the json using jsn = bytes.Replace(jsn, []byte(" "), []byte(""), -1) to guard against false positives.
I have a json which contains one its attributes value as an array and I need to keep appending values to the array and write to a file. Is there a way I could avoid rewrite of the existing data and only append the new values?
----- Moving next question on different thread ---------------
what is recommended way for writing big data sets onto the file incremental file write or file dump at the end process?
A general solution makes the most sense if the existing JSON is actually an array, or if it's an object that has an array as the last or only pair, as in your case. Otherwise, you're inserting instead of appending. You probably don't want to read the entire file either.
One approach is not much different than what you were thinking, but handles several details
Read the end of the file to verify that it "ends with an array"
Retain that part
Position the file at that ending array bracket
Take the output from a standard encoder for an array of new data, dropping its opening bracket, and inserting a comma if necessary
The end of the the new output replaces the original ending array bracket
Tack the rest of the tail back on
import (
"bytes"
"errors"
"io"
"io/ioutil"
"os"
"regexp"
"unicode"
)
const (
tailCheckLen = 16
)
var (
arrayEndsObject = regexp.MustCompile("(\\[\\s*)?](\\s*}\\s*)$")
justArray = regexp.MustCompile("(\\[\\s*)?](\\s*)$")
)
type jsonAppender struct {
f *os.File
strippedBracket bool
needsComma bool
tail []byte
}
func (a jsonAppender) Write(b []byte) (int, error) {
trimmed := 0
if !a.strippedBracket {
t := bytes.TrimLeftFunc(b, unicode.IsSpace)
if len(t) == 0 {
return len(b), nil
}
if t[0] != '[' {
return 0, errors.New("not appending array: " + string(t))
}
trimmed = len(b) - len(t) + 1
b = t[1:]
a.strippedBracket = true
}
if a.needsComma {
a.needsComma = false
n, err := a.f.Write([]byte(", "))
if err != nil {
return n, err
}
}
n, err := a.f.Write(b)
return trimmed + n, err
}
func (a jsonAppender) Close() error {
if _, err := a.f.Write(a.tail); err != nil {
defer a.f.Close()
return err
}
return a.f.Close()
}
func JSONArrayAppender(file string) (io.WriteCloser, error) {
f, err := os.OpenFile(file, os.O_RDWR, 0664)
if err != nil {
return nil, err
}
pos, err := f.Seek(0, io.SeekEnd)
if err != nil {
return nil, err
}
if pos < tailCheckLen {
pos = 0
} else {
pos -= tailCheckLen
}
_, err = f.Seek(pos, io.SeekStart)
if err != nil {
return nil, err
}
tail, err := ioutil.ReadAll(f)
if err != nil {
return nil, err
}
hasElements := false
if len(tail) == 0 {
_, err = f.Write([]byte("["))
if err != nil {
return nil, err
}
} else {
var g [][]byte
if g = arrayEndsObject.FindSubmatch(tail); g != nil {
} else if g = justArray.FindSubmatch(tail); g != nil {
} else {
return nil, errors.New("does not end with array")
}
hasElements = len(g[1]) == 0
_, err = f.Seek(-int64(len(g[2])+1), io.SeekEnd) // 1 for ]
if err != nil {
return nil, err
}
tail = g[2]
}
return jsonAppender{f: f, needsComma: hasElements, tail: tail}, nil
}
Usage is then like in this test fragment
a, err := JSONArrayAppender(f)
if err != nil {
t.Fatal(err)
}
added := []struct {
Name string `json:"name"`
}{
{"Wonder Woman"},
}
if err = json.NewEncoder(a).Encode(added); err != nil {
t.Fatal(err)
}
if err = a.Close(); err != nil {
t.Fatal(err)
}
You can use whatever settings on the Encoder you want. The only hard-coded part is handling needsComma, but you can add an argument for that.
If your JSON array is simple you can use something like the following code. In this code, I create JSON array manually.
type item struct {
Name string
}
func main() {
fd, err := os.Create("hello.json")
if err != nil {
log.Fatal(err)
}
fd.Write([]byte{'['})
for i := 0; i < 10; i++ {
b, err := json.Marshal(item{
"parham",
})
if err != nil {
log.Fatal(err)
}
if i != 0 {
fd.Write([]byte{','})
}
fd.Write(b)
}
fd.Write([]byte{']'})
}
If you want to have a valid array in each step you can write ']' at the end of each iteration and then seek back on the start of the next iteration.
The Go code below reads in a 10,000 record CSV (of timestamp times and float values), runs some operations on the data, and then writes the original values to another CSV along with an additional column for score. However it is terribly slow (i.e. hours, but most of that is calculateStuff()) and I'm curious if there are any inefficiencies in the CSV reading/writing I can take care of.
package main
import (
"encoding/csv"
"log"
"os"
"strconv"
)
func ReadCSV(filepath string) ([][]string, error) {
csvfile, err := os.Open(filepath)
if err != nil {
return nil, err
}
defer csvfile.Close()
reader := csv.NewReader(csvfile)
fields, err := reader.ReadAll()
return fields, nil
}
func main() {
// load data csv
records, err := ReadCSV("./path/to/datafile.csv")
if err != nil {
log.Fatal(err)
}
// write results to a new csv
outfile, err := os.Create("./where/to/write/resultsfile.csv"))
if err != nil {
log.Fatal("Unable to open output")
}
defer outfile.Close()
writer := csv.NewWriter(outfile)
for i, record := range records {
time := record[0]
value := record[1]
// skip header row
if i == 0 {
writer.Write([]string{time, value, "score"})
continue
}
// get float values
floatValue, err := strconv.ParseFloat(value, 64)
if err != nil {
log.Fatal("Record: %v, Error: %v", floatValue, err)
}
// calculate scores; THIS EXTERNAL METHOD CANNOT BE CHANGED
score := calculateStuff(floatValue)
valueString := strconv.FormatFloat(floatValue, 'f', 8, 64)
scoreString := strconv.FormatFloat(prob, 'f', 8, 64)
//fmt.Printf("Result: %v\n", []string{time, valueString, scoreString})
writer.Write([]string{time, valueString, scoreString})
}
writer.Flush()
}
I'm looking for help making this CSV read/write template code as fast as possible. For the scope of this question we need not worry about the calculateStuff method.
You're loading the file in memory first then processing it, that can be slow with a big file.
You need to loop and call .Read and process one line at a time.
func processCSV(rc io.Reader) (ch chan []string) {
ch = make(chan []string, 10)
go func() {
r := csv.NewReader(rc)
if _, err := r.Read(); err != nil { //read header
log.Fatal(err)
}
defer close(ch)
for {
rec, err := r.Read()
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
ch <- rec
}
}()
return
}
playground
//note it's roughly based on DaveC's comment.
This is essentially Dave C's answer from the comments sections:
package main
import (
"encoding/csv"
"log"
"os"
"strconv"
)
func main() {
// setup reader
csvIn, err := os.Open("./path/to/datafile.csv")
if err != nil {
log.Fatal(err)
}
r := csv.NewReader(csvIn)
// setup writer
csvOut, err := os.Create("./where/to/write/resultsfile.csv"))
if err != nil {
log.Fatal("Unable to open output")
}
w := csv.NewWriter(csvOut)
defer csvOut.Close()
// handle header
rec, err := r.Read()
if err != nil {
log.Fatal(err)
}
rec = append(rec, "score")
if err = w.Write(rec); err != nil {
log.Fatal(err)
}
for {
rec, err = r.Read()
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
// get float value
value := rec[1]
floatValue, err := strconv.ParseFloat(value, 64)
if err != nil {
log.Fatal("Record, error: %v, %v", value, err)
}
// calculate scores; THIS EXTERNAL METHOD CANNOT BE CHANGED
score := calculateStuff(floatValue)
scoreString := strconv.FormatFloat(score, 'f', 8, 64)
rec = append(rec, scoreString)
if err = w.Write(rec); err != nil {
log.Fatal(err)
}
w.Flush()
}
}
Note of course the logic is all jammed into main(), better would be to split it into several functions, but that's beyond the scope of this question.
encoding/csv is indeed very slow on big files, as it performs a lot of allocations. Since your format is so simple I recommend using strings.Split instead which is much faster.
If even that is not fast enough you can consider implementing the parsing yourself using strings.IndexByte which is implemented in assembly: http://golang.org/src/strings/strings_decl.go?s=274:310#L1
Having said that, you should also reconsider using ReadAll if the file is larger than your memory.