XML Insert Performance into MYSQL - mysql

I have some code which inserts the records on the database:
The code is supposed to insert 15M records on the database, right now, it takes 60 hours on a AWS t2.large instance. I'm looking for ways to make the insert on the DB faster while also not duplicating records.
Do you guys have suggestions for me?
I'm using Gorm and MYSQL.
// InsertJob will insert job into database, by checking its hash.
func InsertJob(job XMLJob, oid int, ResourceID int) (Job, error) {
db := globalDBConnection
cleanJobDescription := job.Body
hashString := GetMD5Hash(job.Title + job.Body + job.Location + job.Zip)
JobDescriptionHash := GetMD5Hash(job.Body)
empty := sql.NullString{String: "", Valid: true}
j := Job{
CurrencyID: 1, //USD
//other fields here elided for brevity
PrimaryIndustry: sql.NullString{String: job.PrimaryIndustry, Valid: true},
}
err := db.Where("hash = ?", hashString).Find(&j).Error
if err != nil {
if err.Error() != "record not found" {
return j, err
}
err2 := db.Create(&j).Error
if err2 != nil {
log.Println("Unable to create job:" + err.Error())
return j, err2
}
}
return j, nil
}

You can speed it up using using semaphore pattern.
https://play.golang.org/p/OxO8pNy3bc6
inspired from here.
https://gist.github.com/montanaflynn/ea4b92ed640f790c4b9cee36046a5383

Related

Bulk insert with Golang and Gorm deadlock in concurrency goroutines

I'm trying to bulk insert many records using Gorm, Golang and MySQL. My code looks like this:
package main
import (
"fmt"
"sync"
"gorm.io/driver/mysql"
"gorm.io/gorm"
)
type Article struct {
gorm.Model
Code string `gorm:"size:255;uniqueIndex"`
}
func main() {
db, err := gorm.Open(mysql.Open("root#tcp(127.0.0.1:3306)/q_test"), nil)
if err != nil {
panic(err)
}
db.AutoMigrate(&Article{})
// err = db.Exec("TRUNCATE articles").Error
err = db.Exec("DELETE FROM articles").Error
if err != nil {
panic(err)
}
// Build some articles
n := 10000
var articles []Article
for i := 0; i < n; i++ {
article := Article{Code: fmt.Sprintf("code_%d", i)}
articles = append(articles, article)
}
// // Save articles
// err = db.Create(&articles).Error
// if err != nil {
// panic(err)
// }
// Save articles with goroutines
chunkSize := 100
var wg sync.WaitGroup
wg.Add(n / chunkSize)
for i := 0; i < n; i += chunkSize {
go func(i int) {
defer wg.Done()
chunk := articles[i:(i + chunkSize)]
err := db.Create(&chunk).Error
if err != nil {
panic(err)
}
}(i)
}
wg.Wait()
}
When I run this code sometimes (about one in three times) I get this error:
panic: Error 1213: Deadlock found when trying to get lock; try restarting transaction
If I run the code without goroutines (commented lines), I get no deadlock. Also, I've noticed that if I remove the unique index on the code field the deadlock doesn't happen anymore. And if I replace the DELETE FROM articles statement with TRUNCATE articles the deadlock doesn't seem to happen anymore.
I've also run the same code with Postgresql and it works, with no deadlocks.
Any idea why the deadlock happens only with the unique index on MySQL and how to avoid it?
DELETE statement is executed using a row lock, each row in the table is locked for deletion.
TRUNCATE TABLE always locks the table and page but not each row.
source : https://stackoverflow.com/a/20559931/18012302
I think mysql need time to do DELETE query.
try add time.Sleep after query delete.
err = db.Exec("DELETE FROM articles").Error
if err != nil {
panic(err)
}
time.Sleep(time.Second)

MySQL query sometimes deadlocks

I'm working on a program that makes a query to MySQL, then for each row, changes something with that row and then update the row.
The problem is that sometimes when performing an update I get a deadlock, I'm not sure if it's because the query isn't releasing the lock by the time I update or if it's something else.
Example of what I'm doing:
const (
selectQuery = `select user_id, original_transaction_id, max(payment_id) as max_payment_id from Payment_Receipt
where auto_renew_status = 1 group by user_id, original_transaction_id having count(*) > 1`
updateQuery = `update Payment_Receipt set auto_renew_status = 0, changed_by = "payment_receipt_condenser",
changed_time = ? where user_id = ? and original_transaction_id = ? and payment_id != ? and auto_renew_status = 1`
)
mysql.go:
func New(db *sql.DB, driver string) (database.Database, error) {
sqlDB := sqlx.NewDb(db, driver)
if err := db.Ping(); err != nil {
return nil, errors.Wrap(err, "connecting to database")
}
selectStmt, err := sqlDB.Preparex(selectQuery)
if err != nil {
return nil, errors.Wrap(err, "preparing select query")
}
updateStmt, err := sqlDB.Preparex(updateQuery)
if err != nil {
return nil, errors.Wrap(err, "preparing update query")
}
return &mysql{
db: sqlDB,
selectStmt: selectStmt,
updateStmt: updateStmt,
}, nil
}
func (m *mysql) Query() (<- chan *database.Row, error) {
rowsChan := make(chan *database.Row)
rows, err := m.selectStmt.Queryx()
if err != nil {
return nil, errors.Wrap(err, "making query")
}
go func() {
defer rows.Close()
defer close(rowsChan)
for rows.Next() {
row := &database.Row{}
if err := rows.StructScan(row); err != nil {
log.WithError(err).WithField("user_id", row.UserID.Int32).Error("scanning row")
}
// change some of the data here
// and put into channel for worker to consume
rowsChan <- row
}
}()
return rowsChan, nil
}
func (m *mysql) Update(row *database.Row) error {
tx, err := m.db.Beginx()
if err != nil {
return errors.Wrap(err, "beginning transaction")
}
if _, err := tx.Stmtx(m.updateStmt).Exec(row.ChangedTime); err != nil {
return errors.Wrap(err, "executing update")
}
if err := tx.Commit(); err != nil {
return errors.Wrap(err, "committing transaction")
}
return nil
}
worker.go
func (w *worker) Run(wg *sync.WaitGroup) {
rowsChan, err := w.db.Query()
if err != nil {
log.WithError(err).Fatal("failed making query")
}
for i := 0; i < w.config.Count(); i++ {
wg.Add(1)
go func() {
defer wg.Done()
for row := range rowsChan {
if err := w.db.Update(row); err != nil {
log.WithError(err).WithField("user_id", row.UserID.Int32).Error("updating row")
}
}
}()
}
}
You could make the results (row) channel from a Query() buffered:
func (m *mysql) Query() (<- chan *database.Row, error) {
rowsChan := make(chan *database.Row, 1000) // <- band-aid fix
// ...
}
This will ensure that the row collector function can write multiple results without waiting for your worker go-routine to read the results. The query operation will complete (provided there are 1000 rows or less), and the update go-routine operations can begin their parallel work.
If this fixes things, then consider putting say an SQL limit on your queries (e.g. LIMIT 1000) to ensure you don't hit deadlock again (if 1000+ records is a real possibility).
Crafting "pagination" style queries to grab the next say 1000 rows, using RowID markers etc. to ensure full coverage of results - all while avoiding locking out any of your update operations.

How can I ensure that all of my write transaction functions get resolved in order? Also, why is the else function not executing?

I'm trying to create a very simple Bolt database called "ledger.db" that includes one Bucket, called "Users", which contains Usernames as a Key and Balances as the value that allows users to transfer their balance to one another. I am using Bolter to view the database in the command line
There are two problems, both contained in this transfer function issue resides in the transfer function.
The First: Inside the transfer function is an if/else. If the condition is true, it executes as it should. If it's false, nothing happens. There's no syntax errors and the program runs as though nothing is wrong, it just doesn't execute the else statement.
The Second: Even if the condition is true, when it executes, it doesn't update BOTH the respective balance values in the database. It updates the balance of the receiver, but it doesn't do the same for the sender. The mathematical operations are completed and the values are marshaled into a JSON-compatible format.
The problem is that the sender balance is not updated in the database.
Everything from the second "Success!" fmt.Println() function onward is not processed
I've tried changing the "db.Update()" to "db.Batch()". I've tried changing the order of the Put() functions. I've tried messing with goroutines and defer, but I have no clue how to use those, as I am rather new to golang.
func (from *User) transfer(to User, amount int) error{
var fbalance int = 0
var tbalance int = 0
db, err := bolt.Open("ledger.db", 0600, nil)
if err != nil {
log.Fatal(err)
}
defer db.Close()
return db.Update(func(tx *bolt.Tx) error {
uBuck := tx.Bucket([]byte("Users"))
json.Unmarshal(uBuck.Get([]byte(from.username)), &fbalance)
json.Unmarshal(uBuck.Get([]byte(to.username)), &tbalance)
if (amount <= fbalance) {
fbalance = fbalance - amount
encoded, err := json.Marshal(fbalance)
if err != nil {
return err
}
tbalance = tbalance + amount
encoded2, err := json.Marshal(tbalance)
if err != nil {
return err
}
fmt.Println("Success!")
c := uBuck
err = c.Put([]byte(to.username), encoded2)
return err
fmt.Println("Success!")
err = c.Put([]byte(from.username), encoded)
return err
fmt.Println("Success!")
} else {
return fmt.Errorf("Not enough in balance!", amount)
}
return nil
})
return nil
}
func main() {
/*
db, err := bolt.Open("ledger.db", 0600, nil)
if err != nil {
log.Fatal(err)
}
defer db.Close()
*/
var b User = User{"Big", "jig", 50000, 0}
var t User = User{"Trig", "pig", 40000, 0}
// These two functions add each User to the database, they aren't
// the problem
b.createUser()
t.createUser()
/*
db.View(func(tx *bolt.Tx) error {
c := tx.Bucket([]byte("Users"))
get := c.Get([]byte(b.username))
fmt.Printf("The return value %v",get)
return nil
})
*/
t.transfer(b, 40000)
}
I expect the database to show Big:90000 Trig:0 from the beginning values of Big:50000 Trig:40000
Instead, the program outputs Big:90000 Trig:40000
You return unconditionally:
c := uBuck
err = c.Put([]byte(to.username), encoded2)
return err
fmt.Println("Success!")
err = c.Put([]byte(from.username), encoded)
return err
fmt.Println("Success!")
You are not returning and checking errors.
json.Unmarshal(uBuck.Get([]byte(from.username)), &fbalance)
json.Unmarshal(uBuck.Get([]byte(to.username)), &tbalance)
t.transfer(b, 40000)
And so on.
Debug your code statement by statement.

GoLang Dynamic SQL Query in App Engine

I want to make dynamic sql in GoLang and I cant seem to find the correct way to do it.
Basically, I just want to do:
query := "SELECT id, email, something FROM User"
var paramValues []string
filterString := ""
if userParams.Name != "" {
paramString += " WHERE id = ?"
paramValues = append(paramValues, userParams.Name)
}
if userParams.UserID != "" {
if len(paramString) > 0 {
paramString += " AND"
} else {
paramString += " WHERE"
}
paramString += " email = ?"
paramValues = append(paramValues, userParams.UserID)
}
stmtOut, err := db.Prepare(query + paramString)
err = stmtOut.QueryRow(paramValues).Scan(&id, &email, &something)
Related to building a dynamic query in mysql and golang
I've been unable to find a solid way to do this that doesn't allow sql injection. The issue with my above solution is that QueryRow() does not take a []string as a parameter.
I want to protect from SQL Injection, so fmt.Sprintf doesn't really solve the problem.
This way I can allow searches on user using either the ID or Email, and I will also use this logic for different objects with more searchable fields.
I'm using go-sql-driver/mysql
Here's something which I can run on my local machine (go1.8 linux/amd64 and current GO MySQL driver 1.3).
Couple of ways are demonstrated.
package main
import (
"database/sql"
"log"
_ "github.com/go-sql-driver/mysql"
"fmt"
)
// var db *sql.DB
// var err error
/*
Database Name/Schema : Test123
Table Name: test
Table Columns and types:
number INT (PRIMARY KEY)
cube INT
*/
func main() {
//Username root, password root
db, err := sql.Open("mysql", "root:root#tcp(127.0.0.1:3306)/Test123?charset=utf8")
if err != nil {
fmt.Println(err) // needs proper handling as per app requirement
return
}
defer db.Close()
err = db.Ping()
if err != nil {
fmt.Println(err) // needs proper handling as per app requirement
return
}
//Prepared statement for inserting data
stmtIns, err := db.Prepare("INSERT INTO test VALUES( ?, ? )") // ? = placeholders
if err != nil {
panic(err.Error()) // needs proper handling as per app requirement
}
defer stmtIns.Close()
//Insert cubes of 1- 10 numbers
for i := 1; i < 10; i++ {
_, err = stmtIns.Exec(i, (i * i * i)) // Insert tuples (i, i^3)
if err != nil {
panic(err.Error()) // proper error handling instead of panic in your app
}
}
num := 3
// Select statement
dataEntity := "cube"
condition := "WHERE number=? AND cube > ?"
finalStatement := "SELECT " + dataEntity + " FROM test " + condition
cubeLowerLimit := 10
var myCube int
err = db.QueryRow(finalStatement, num, cubeLowerLimit).Scan(&myCube)
switch {
case err == sql.ErrNoRows:
log.Printf("No row with this number %d", num)
case err != nil:
log.Fatal(err)
default:
fmt.Printf("Cube for %d is %d\n", num, myCube)
}
var cubenum int
// //Prepared statement for reading data
stmtRead, err := db.Prepare(finalStatement)
if err != nil {
panic(err.Error()) // needs proper err handling
}
defer stmtRead.Close()
// Query for cube of 5
num = 5
err = stmtRead.QueryRow(num, cubeLowerLimit).Scan(&cubenum)
switch {
case err == sql.ErrNoRows:
log.Printf("No row with this number %d", num)
case err != nil:
log.Fatal(err)
default:
fmt.Printf("Cube number for %d is %d\n", num, cubenum)
}
}
If you run it subsequent times, you need to delete the rows in the database so that the inserts won't create a panic (or alternatively change the insert rows code so that it doesn't panic). I haven't tried it on Google App Engine. Hope this helps.

Correctly remove second json.Marshal in Go

I have, for whatever reason, while trying to build a simple Rest API in Go with MySQL storage, added a second json.Marshal which is double-encoding and producing results with escaped quotes and such. I could strip the quotes, but I think I shouldn't have two json.Marshal things happening in the first place.
The problem is twofold - 1) which is proper to remove (leaning toward the first because "result" should be the larger array) and 2)how to keep the code functioning after removal? I can't just simply remove the first one as I start encountering all sorts of errors. Here are the relevant portions of the code:
type Volume struct {
Id int
Name string
Description string
}
... skipping ahead ....
var result = make([]string,1000)
switch request.Method {
case "GET":
name := request.URL.Query().Get("name")
stmt, err := db.Prepare("select id, name, description from idm_assets.VOLUMES where name = ?")
if err != nil{
fmt.Print( err );
}
rows, err := stmt.Query(name)
if err != nil {
fmt.Print( err )
}
i := 0
for rows.Next() {
var name string
var id int
var description string
err = rows.Scan( &id, &name, &description )
if err != nil {
fmt.Println("Error scanning: " + err.Error())
return
}
volume := &Volume{Id: id,Name:name,Description: description}
Here is the first json.Marshal ...
b, err := json.Marshal(volume)
fmt.Println(b)
if err != nil {
fmt.Println(err)
return
}
result[i] = fmt.Sprintf("%s", string(b))
i++
}
result = result[:i]
...skipping other cases for PUT, DELETE, Etc. To the second json.Marshal ...
default:
}
json, err := json.Marshal(result)
if err != nil {
fmt.Println(err)
return
}
fmt.Fprintf(response,"'%v'\n",string(json) )
Turn result into an array of *Volume
result := []*Volume{}
and then append new Volume records:
result = append(result, &Volume{Id: id,Name:name,Description: description})
and in the end use Marshal(result) to get the JSON result.