I'm working on a program that makes a query to MySQL, then for each row, changes something with that row and then update the row.
The problem is that sometimes when performing an update I get a deadlock, I'm not sure if it's because the query isn't releasing the lock by the time I update or if it's something else.
Example of what I'm doing:
const (
selectQuery = `select user_id, original_transaction_id, max(payment_id) as max_payment_id from Payment_Receipt
where auto_renew_status = 1 group by user_id, original_transaction_id having count(*) > 1`
updateQuery = `update Payment_Receipt set auto_renew_status = 0, changed_by = "payment_receipt_condenser",
changed_time = ? where user_id = ? and original_transaction_id = ? and payment_id != ? and auto_renew_status = 1`
)
mysql.go:
func New(db *sql.DB, driver string) (database.Database, error) {
sqlDB := sqlx.NewDb(db, driver)
if err := db.Ping(); err != nil {
return nil, errors.Wrap(err, "connecting to database")
}
selectStmt, err := sqlDB.Preparex(selectQuery)
if err != nil {
return nil, errors.Wrap(err, "preparing select query")
}
updateStmt, err := sqlDB.Preparex(updateQuery)
if err != nil {
return nil, errors.Wrap(err, "preparing update query")
}
return &mysql{
db: sqlDB,
selectStmt: selectStmt,
updateStmt: updateStmt,
}, nil
}
func (m *mysql) Query() (<- chan *database.Row, error) {
rowsChan := make(chan *database.Row)
rows, err := m.selectStmt.Queryx()
if err != nil {
return nil, errors.Wrap(err, "making query")
}
go func() {
defer rows.Close()
defer close(rowsChan)
for rows.Next() {
row := &database.Row{}
if err := rows.StructScan(row); err != nil {
log.WithError(err).WithField("user_id", row.UserID.Int32).Error("scanning row")
}
// change some of the data here
// and put into channel for worker to consume
rowsChan <- row
}
}()
return rowsChan, nil
}
func (m *mysql) Update(row *database.Row) error {
tx, err := m.db.Beginx()
if err != nil {
return errors.Wrap(err, "beginning transaction")
}
if _, err := tx.Stmtx(m.updateStmt).Exec(row.ChangedTime); err != nil {
return errors.Wrap(err, "executing update")
}
if err := tx.Commit(); err != nil {
return errors.Wrap(err, "committing transaction")
}
return nil
}
worker.go
func (w *worker) Run(wg *sync.WaitGroup) {
rowsChan, err := w.db.Query()
if err != nil {
log.WithError(err).Fatal("failed making query")
}
for i := 0; i < w.config.Count(); i++ {
wg.Add(1)
go func() {
defer wg.Done()
for row := range rowsChan {
if err := w.db.Update(row); err != nil {
log.WithError(err).WithField("user_id", row.UserID.Int32).Error("updating row")
}
}
}()
}
}
You could make the results (row) channel from a Query() buffered:
func (m *mysql) Query() (<- chan *database.Row, error) {
rowsChan := make(chan *database.Row, 1000) // <- band-aid fix
// ...
}
This will ensure that the row collector function can write multiple results without waiting for your worker go-routine to read the results. The query operation will complete (provided there are 1000 rows or less), and the update go-routine operations can begin their parallel work.
If this fixes things, then consider putting say an SQL limit on your queries (e.g. LIMIT 1000) to ensure you don't hit deadlock again (if 1000+ records is a real possibility).
Crafting "pagination" style queries to grab the next say 1000 rows, using RowID markers etc. to ensure full coverage of results - all while avoiding locking out any of your update operations.
Related
Why call rows.Close() takes too long time when I call it after I exit from loop rows.Next() before processing all elements of loop.
Its happen when I make request which returns huge amount of data (around 300 000 rows).
This problem doesn't exists when amount of rows is not so big.
func SelectHugeAmountOfRows() {
query := `SELECT * FROM big_table`
rows, _ := Conn.Query(context.Background(), query)
defer func() {
fmt.Println("Start close rows")
start := time.Now()
rows.Close()
duration := time.Since(start)
fmt.Println("rowsClose duration:", duration)
}()
for rows.Next() {
rowValues, err := rows.Values()
if err != nil {
fmt.Println(err)
return
}
// do something with rowValues and get error in process
err = func() error {
fmt.Println(rowValues)
return errors.New("some error")
}()
if err != nil {
return
}
}
if err := rows.Err(); err != nil {
fmt.Println(err)
return
}
}
rowsClose duration: 1m2.5488669s
Interesting that duration of rows.Close() in this case in same as duration which I will have if will process all elements of rows.Next() loop without break it.
I want to retrieve an array of app IDs from a MySQL database. I used http://go-database-sql.org's example code:
func QueryAppList() *[]int64 {
var (
appList []int64
appid int64
)
qry := "SELECT a.appid FROM app a WHERE a.app_status IN (1, 2);"
// cfg.GetDb() supplies the database connection already established
rows, err := cfg.GetDb().Query(qry)
if err != nil {
logg.Error(err)
return &appList
}
defer func(rows *sql.Rows) {
// simple defer does not catch every error: https://www.joeshaw.org/dont-defer-close-on-writable-files/
err := rows.Close()
if err != nil {
logg.Error(err)
}
}(rows)
for rows.Next() {
err := rows.Scan(&appid)
if err != nil {
logg.Error(err)
return &appList
}
appidList = append(appList, appid)
}
err = rows.Err()
if err != nil {
logg.Error(err)
return &appList
}
return &appidList
}
My programm will be littered with queries like this. All the ways of getting the result list and how it to prevent failure make this small query hard to read what is actually going on.
Is there a way to make queries more concise?
These are my thoughts to make the code less verbose:
Use functions to handle the errors reducing the error handling to one line.
If it's one column array I want, I could pass the query and column name as parameters and reuse the query function. I rather just rewrite a query function than to deal with complicated abstractions.
Are there packages I missed that help reduce the clutter?
Using an ORM like gorm is NOT an option.
I just started Go programming so I am lacking experience with the language.
Below is the same query in Node.js with the same result. It has 9 lines compared to Go's 34 i.e. 65% more concise in terms of length. That's where I ideally would like to get to.
import {query} from "../db/pool"; // connection pool query from https://github.com/sidorares/node-mysql2
export const queryAppList = async () => {
try {
const qry = "SELECT a.appid FROM app a WHERE a.app_status IN (1, 2);";
const [appList] = await query(qry);
return appList;
} catch (err) {
console.error(err)
return [];
}
};
You can make a Query struct which has reusable methods for do such things.
Something like this:
type Query struct{
conn *sql.DB
rows *sql.Rows
...
}
func NewQuery(conn *sql.DB) *Query {
return &Query{
conn: conn,
rows: nil,
}
}
func (q Query) OpenSQL(sql string) error {
q.rows, err = q.conn.Query(sql)
if err != nil {
log.Error("SQL error during query ("+sql+"). "+err.Error())
return err
}
return nil
}
func (q Query)Close() (error) {
err := q.rows.Close()
if err != nil {
log.Error("Error closing rows. "+err.Error())
return err
}
return nil
}
//You can use generic functions to make the code even smaller
func FetchToSlice[T any](q Query) ([]T, error) {
result := make([]T, 0)
var value T
for q.rows.Next() {
err := q.rows.Scan(&value)
if err != nil {
log.Error("Error during fetching. "+err.Error())
return nil, err
}
result = append(result, value)
}
return result, nil
}
With this you code will look something like this:
qry := NewQuery(cfg.GetDB())
err := qry.OpenSQL("SELECT a.appid FROM app a WHERE a.app_status IN (1, 2);")
if err != nil {
return err
}
defer qry.Close()
appidList, err := FetchToSlice[int](qry)
if err != nil {
return err
}
You can later add more methods to your Query to handle more complex cases, even you can use a sync.Pool to cache your query structs and so on.
Usually, result, err := func() is used.
When one of the variables is already initialized:
_, err := func()
var result string
result, err = func()
Doing:
result, err = func()
all_results += result // seems redundant and unneeded
How do you append results to one of them (result), and reset the other one?
// along the lines of this:
var result slice
// for loop {
result, _ += func() // combine this line
_, err = func() // with this line
Can you do:
result +=, err = func()
// or
result, err +=, = func()
// or
result, err += = func()
// or
result, err (+=, =) func() // ?
The language spec does not support different treatment for multiple return values.
However, it's very easy to do it with a helper function:
func foo() (int, error) {
return 1, nil
}
func main() {
var all int
add := func(result int, err error) error {
all += result
return err
}
if err := add(foo()); err != nil {
panic(err)
}
if err := add(foo()); err != nil {
panic(err)
}
if err := add(foo()); err != nil {
panic(err)
}
fmt.Println(all)
}
This will output 3 (try it on the Go Playground).
If you can move the error handling into the helper function, it can also look like this:
var all int
check := func(result int, err error) int {
if err != nil {
panic(err)
}
return result
}
all += check(foo())
all += check(foo())
all += check(foo())
fmt.Println(all)
This outputs the same, try this one on the Go Playground.
Another variant can be to do everything in the helper function:
var all int
handle := func(result int, err error) {
if err != nil {
panic(err)
}
all += result
}
handle(foo())
handle(foo())
handle(foo())
fmt.Println(all)
Try this one on the Go Playground.
See related: Multiple values in single-value context
I have a json which contains one its attributes value as an array and I need to keep appending values to the array and write to a file. Is there a way I could avoid rewrite of the existing data and only append the new values?
----- Moving next question on different thread ---------------
what is recommended way for writing big data sets onto the file incremental file write or file dump at the end process?
A general solution makes the most sense if the existing JSON is actually an array, or if it's an object that has an array as the last or only pair, as in your case. Otherwise, you're inserting instead of appending. You probably don't want to read the entire file either.
One approach is not much different than what you were thinking, but handles several details
Read the end of the file to verify that it "ends with an array"
Retain that part
Position the file at that ending array bracket
Take the output from a standard encoder for an array of new data, dropping its opening bracket, and inserting a comma if necessary
The end of the the new output replaces the original ending array bracket
Tack the rest of the tail back on
import (
"bytes"
"errors"
"io"
"io/ioutil"
"os"
"regexp"
"unicode"
)
const (
tailCheckLen = 16
)
var (
arrayEndsObject = regexp.MustCompile("(\\[\\s*)?](\\s*}\\s*)$")
justArray = regexp.MustCompile("(\\[\\s*)?](\\s*)$")
)
type jsonAppender struct {
f *os.File
strippedBracket bool
needsComma bool
tail []byte
}
func (a jsonAppender) Write(b []byte) (int, error) {
trimmed := 0
if !a.strippedBracket {
t := bytes.TrimLeftFunc(b, unicode.IsSpace)
if len(t) == 0 {
return len(b), nil
}
if t[0] != '[' {
return 0, errors.New("not appending array: " + string(t))
}
trimmed = len(b) - len(t) + 1
b = t[1:]
a.strippedBracket = true
}
if a.needsComma {
a.needsComma = false
n, err := a.f.Write([]byte(", "))
if err != nil {
return n, err
}
}
n, err := a.f.Write(b)
return trimmed + n, err
}
func (a jsonAppender) Close() error {
if _, err := a.f.Write(a.tail); err != nil {
defer a.f.Close()
return err
}
return a.f.Close()
}
func JSONArrayAppender(file string) (io.WriteCloser, error) {
f, err := os.OpenFile(file, os.O_RDWR, 0664)
if err != nil {
return nil, err
}
pos, err := f.Seek(0, io.SeekEnd)
if err != nil {
return nil, err
}
if pos < tailCheckLen {
pos = 0
} else {
pos -= tailCheckLen
}
_, err = f.Seek(pos, io.SeekStart)
if err != nil {
return nil, err
}
tail, err := ioutil.ReadAll(f)
if err != nil {
return nil, err
}
hasElements := false
if len(tail) == 0 {
_, err = f.Write([]byte("["))
if err != nil {
return nil, err
}
} else {
var g [][]byte
if g = arrayEndsObject.FindSubmatch(tail); g != nil {
} else if g = justArray.FindSubmatch(tail); g != nil {
} else {
return nil, errors.New("does not end with array")
}
hasElements = len(g[1]) == 0
_, err = f.Seek(-int64(len(g[2])+1), io.SeekEnd) // 1 for ]
if err != nil {
return nil, err
}
tail = g[2]
}
return jsonAppender{f: f, needsComma: hasElements, tail: tail}, nil
}
Usage is then like in this test fragment
a, err := JSONArrayAppender(f)
if err != nil {
t.Fatal(err)
}
added := []struct {
Name string `json:"name"`
}{
{"Wonder Woman"},
}
if err = json.NewEncoder(a).Encode(added); err != nil {
t.Fatal(err)
}
if err = a.Close(); err != nil {
t.Fatal(err)
}
You can use whatever settings on the Encoder you want. The only hard-coded part is handling needsComma, but you can add an argument for that.
If your JSON array is simple you can use something like the following code. In this code, I create JSON array manually.
type item struct {
Name string
}
func main() {
fd, err := os.Create("hello.json")
if err != nil {
log.Fatal(err)
}
fd.Write([]byte{'['})
for i := 0; i < 10; i++ {
b, err := json.Marshal(item{
"parham",
})
if err != nil {
log.Fatal(err)
}
if i != 0 {
fd.Write([]byte{','})
}
fd.Write(b)
}
fd.Write([]byte{']'})
}
If you want to have a valid array in each step you can write ']' at the end of each iteration and then seek back on the start of the next iteration.
The Go code below reads in a 10,000 record CSV (of timestamp times and float values), runs some operations on the data, and then writes the original values to another CSV along with an additional column for score. However it is terribly slow (i.e. hours, but most of that is calculateStuff()) and I'm curious if there are any inefficiencies in the CSV reading/writing I can take care of.
package main
import (
"encoding/csv"
"log"
"os"
"strconv"
)
func ReadCSV(filepath string) ([][]string, error) {
csvfile, err := os.Open(filepath)
if err != nil {
return nil, err
}
defer csvfile.Close()
reader := csv.NewReader(csvfile)
fields, err := reader.ReadAll()
return fields, nil
}
func main() {
// load data csv
records, err := ReadCSV("./path/to/datafile.csv")
if err != nil {
log.Fatal(err)
}
// write results to a new csv
outfile, err := os.Create("./where/to/write/resultsfile.csv"))
if err != nil {
log.Fatal("Unable to open output")
}
defer outfile.Close()
writer := csv.NewWriter(outfile)
for i, record := range records {
time := record[0]
value := record[1]
// skip header row
if i == 0 {
writer.Write([]string{time, value, "score"})
continue
}
// get float values
floatValue, err := strconv.ParseFloat(value, 64)
if err != nil {
log.Fatal("Record: %v, Error: %v", floatValue, err)
}
// calculate scores; THIS EXTERNAL METHOD CANNOT BE CHANGED
score := calculateStuff(floatValue)
valueString := strconv.FormatFloat(floatValue, 'f', 8, 64)
scoreString := strconv.FormatFloat(prob, 'f', 8, 64)
//fmt.Printf("Result: %v\n", []string{time, valueString, scoreString})
writer.Write([]string{time, valueString, scoreString})
}
writer.Flush()
}
I'm looking for help making this CSV read/write template code as fast as possible. For the scope of this question we need not worry about the calculateStuff method.
You're loading the file in memory first then processing it, that can be slow with a big file.
You need to loop and call .Read and process one line at a time.
func processCSV(rc io.Reader) (ch chan []string) {
ch = make(chan []string, 10)
go func() {
r := csv.NewReader(rc)
if _, err := r.Read(); err != nil { //read header
log.Fatal(err)
}
defer close(ch)
for {
rec, err := r.Read()
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
ch <- rec
}
}()
return
}
playground
//note it's roughly based on DaveC's comment.
This is essentially Dave C's answer from the comments sections:
package main
import (
"encoding/csv"
"log"
"os"
"strconv"
)
func main() {
// setup reader
csvIn, err := os.Open("./path/to/datafile.csv")
if err != nil {
log.Fatal(err)
}
r := csv.NewReader(csvIn)
// setup writer
csvOut, err := os.Create("./where/to/write/resultsfile.csv"))
if err != nil {
log.Fatal("Unable to open output")
}
w := csv.NewWriter(csvOut)
defer csvOut.Close()
// handle header
rec, err := r.Read()
if err != nil {
log.Fatal(err)
}
rec = append(rec, "score")
if err = w.Write(rec); err != nil {
log.Fatal(err)
}
for {
rec, err = r.Read()
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
// get float value
value := rec[1]
floatValue, err := strconv.ParseFloat(value, 64)
if err != nil {
log.Fatal("Record, error: %v, %v", value, err)
}
// calculate scores; THIS EXTERNAL METHOD CANNOT BE CHANGED
score := calculateStuff(floatValue)
scoreString := strconv.FormatFloat(score, 'f', 8, 64)
rec = append(rec, scoreString)
if err = w.Write(rec); err != nil {
log.Fatal(err)
}
w.Flush()
}
}
Note of course the logic is all jammed into main(), better would be to split it into several functions, but that's beyond the scope of this question.
encoding/csv is indeed very slow on big files, as it performs a lot of allocations. Since your format is so simple I recommend using strings.Split instead which is much faster.
If even that is not fast enough you can consider implementing the parsing yourself using strings.IndexByte which is implemented in assembly: http://golang.org/src/strings/strings_decl.go?s=274:310#L1
Having said that, you should also reconsider using ReadAll if the file is larger than your memory.