pgloader: Heap exhausted, game over. problem - mysql

After writing the code, When I run my pgloader script, it fails this way:
load database
from mysql://xxx:yyy127.0.0.1/zzz
into pgsql://xxx:yyy#localhost/zzz
including only table names matching 'TABLE'
with
data only,
create no tables, preserve index names,
batch rows = 1000,
batch size = 500 MB,
prefetch rows = 1000
-- on error stop,
set work_mem to '2048 MB', maintenance_work_mem to '4096 MB';
-- before load do $$ drop schema if exists jobs cascade; $$;
I get this error message, but this is intermittent, that is, it doesn't always happen, and I'm not sure what parameters to put. I have plenty of ram, the records are about 50kb each.
2020-05-25T04:28:14.194000Z INFO Incomplete Foreign Key definition: constraint "fk_job_currency1" on table "jobs2_beta2.job" referencing table NIL
2020-05-25T04:28:14.194000Z INFO Incomplete Foreign Key definition: constraint "fk_job_job_category1" on table "jobs2_beta2.job" referencing table NIL
2020-05-25T04:28:14.194000Z INFO Incomplete Foreign Key definition: constraint "fk_job_organization1" on table "jobs2_beta2.job" referencing table NIL
2020-05-25T04:28:14.194000Z INFO Incomplete Foreign Key definition: constraint "fk_job_resource1" on table "jobs2_beta2.job" referencing table NIL
2020-05-25T04:28:14.194000Z INFO Incomplete Foreign Key definition: constraint "fk_job_user1" on table "jobs2_beta2.job" referencing table NIL
2020-05-25T04:28:14.198000Z SQL MySQL: sending query: -- params: db-name
-- table-type-name
-- only-tables
-- only-tables
-- including
-- filter-list-to-where-clause incuding
-- excluding
-- filter-list-to-where-clause excluding
SELECT table_name, index_name, index_type,
sum(non_unique),
cast(GROUP_CONCAT(column_name order by seq_in_index) as char)
FROM information_schema.statistics
WHERE table_schema = 'jobs2_beta2'
and (table_name = 'job')
GROUP BY table_name, index_name, index_type;
2020-05-25T04:28:14.225000Z INFO Processing source catalogs
2020-05-25T04:28:14.272000Z NOTICE Prepare PostgreSQL database.
2020-05-25T04:28:14.275000Z DEBUG CONNECTED TO #<PGLOADER.PGSQL:PGSQL-CONNECTION pgsql://ziprecruiter#localhost:5432/ziprecruiter {1006A2A683}>
2020-05-25T04:28:14.275000Z DEBUG SET client_encoding TO 'utf8'
2020-05-25T04:28:14.275000Z DEBUG SET work_mem TO '1024 MB'
2020-05-25T04:28:14.276000Z DEBUG SET maintenance_work_mem TO '4096 MB'
2020-05-25T04:28:14.276000Z DEBUG SET application_name TO 'pgloader'
2020-05-25T04:28:14.280000Z DEBUG BEGIN
2020-05-25T04:28:14.314000Z SQL DROP TABLE IF EXISTS jobs2_beta2.job CASCADE;
2020-05-25T04:28:15.316000Z SQL CREATE TABLE jobs2_beta2.job
(
id bigserial not null,
resource_id bigint not null,
url text not null,
job_title text,
html_job_description text,
text_job_description text,
last_crawl_date timestamptz,
first_indexed_date timestamptz,
job_category_id bigint not null,
currency_id bigint not null,
salary_exact double precision,
salary_is_range smallint,
salary_range_start double precision,
salary_range_end double precision,
address text,
source_program varchar(255),
organization varchar(255),
organization_count bigint,
expiration_date timestamptz,
is_sponsored smallint,
is_hidden smallint,
educational_requirements text,
experience_requirements text,
destination text,
organization_id bigint not null,
user_id bigint not null,
is_expired smallint,
salary_periodicity varchar(255),
json_schema text,
clean_job_description text,
estimated_job_category varchar(255)
);
2020-05-25T04:28:15.326000Z SQL -- params: table-names
select n, n::regclass::oid
from (values ('jobs2_beta2.job')) as t(n);
2020-05-25T04:28:15.392000Z NOTICE COPY jobs2_beta2.job
2020-05-25T04:28:15.392000Z DEBUG Reader started for jobs2_beta2.job
2020-05-25T04:28:15.405000Z DEBUG start jobs2_beta2.job 1400
2020-05-25T04:28:15.407000Z INFO COPY ON ERROR STOP
2020-05-25T04:28:15.408000Z DEBUG CONNECTED TO #<MYSQL-CONNECTION mysql://jobs_client2#127.0.0.1:3306/jobs2_beta2 {100A4E61B3}>
2020-05-25T04:28:15.408000Z SQL MySQL: sending query: SELECT `id`, `resource_id`, `url`, `job_title`, `html_job_description`, `text_job_description`, `last_crawl_date`, `first_indexed_date`, `job_category_id`, `currency_id`, `salary_exact`, `salary_is_range`, `salary_range_start`, `salary_range_end`, `address`, `source_program`, `organization`, `organization_count`, `expiration_date`, `is_sponsored`, `is_hidden`, `educational_requirements`, `experience_requirements`, `destination`, `organization_id`, `user_id`, `is_expired`, `salary_periodicity`, `json_schema`, `clean_job_description`, `estimated_job_category` FROM `job`
2020-05-25T04:28:15.416000Z DEBUG CONNECTED TO #<PGLOADER.PGSQL:PGSQL-CONNECTION pgsql://ziprecruiter#localhost:5432/ziprecruiter {100A8C11A3}>
2020-05-25T04:28:15.416000Z DEBUG SET client_encoding TO 'utf8'
2020-05-25T04:28:15.416000Z DEBUG SET work_mem TO '1024 MB'
2020-05-25T04:28:15.416000Z DEBUG SET maintenance_work_mem TO '4096 MB'
2020-05-25T04:28:15.416000Z DEBUG SET application_name TO 'pgloader'
2020-05-25T04:28:15.416000Z SQL SET search_path TO jobs2_beta2;
2020-05-25T04:28:15.417000Z INFO pgsql:copy-rows-from-queue[0]: jobs2_beta2.job (id resource_id url job_title
html_job_description
text_job_description
last_crawl_date
first_indexed_date
job_category_id currency_id
salary_exact salary_is_range
salary_range_start
salary_range_end address
source_program organization
organization_count
expiration_date is_sponsored
is_hidden
educational_requirements
experience_requirements
destination organization_id
user_id is_expired
salary_periodicity json_schema
clean_job_description
estimated_job_category)
Gen Boxed Unboxed LgBox LgUnbox Pin Alloc Waste Trig WP GCs Mem-age
0 0 0 0 0 0 0 0 42949672 0 0 0.0000
1 577 42719 0 36 5 1301487024 118415952 753961944 0 1 1.3187
2 1776 83008 25 60 65 2579123472 201863920 2000000 668 0 0.8672
3 0 0 0 0 0 0 0 2000000 0 0 0.0000
4 0 0 0 0 0 0 0 2000000 0 0 0.0000
5 0 0 0 0 0 0 0 2000000 0 0 0.0000
6 1593 1278 0 0 0 90993120 3083808 2000000 1501 0 0.0000
7 0 0 0 0 0 0 0 2000000 0 0 0.0000
Total bytes allocated = 3971603616
Dynamic-space-size bytes = 4294967296
GC control variables:
*GC-INHIBIT* = true
*GC-PENDING* = true
*STOP-FOR-GC-PENDING* = false
fatal error encountered in SBCL pid 9514(tid 0x7ffff492f700):
Heap exhausted, game over.
Welcome to LDB, a low-level debugger for the Lisp runtime environment.
ldb>
I tried fiddling with memory the parameters, can anybody provide assistance on this issue?

Found a github issue related to "Heap exhausted, game over", might be helpful.
https://github.com/dimitri/pgloader/issues/327

Related

Counting product pairs in a store whose difference in expenses is less than a certain amount in SQL

I have a table with the serial number of each product, whether it is in stock (1- in stock, 0- not in stock), the level of revenue from the product and the level of expenses from the product in the store. I would like to write a query that counts all customer pairs (without duplication of the same pair), that the expense difference between them is less than NIS 1,000 and both are in stock or both are out of stock. Show the average income gap (approximately) of all pairs, how many such pairs are in stock And how much is not in stock.
Sample table:
serial
Is_in_stock
Revenu_ from_the_product
Expenses_from_the_product
1
1
27627
57661
2
0
48330
20686
3
0
26010
861
4
1
22798
37771
5
0
24606
8905
6
1
48311
6433
7
0
29929
6278
8
0
24254
8590
Unfortunately I am lost and unable to find a solution to my problem.
I was thinking of creating subqueries but could not find a suitable solution
The result should show something like this(Please do not refer to this data for illustration):
Average income gap (in absolute value) of all pairs
Quantity of pairs in stock
The amount of pairs that are not in stock
13
10
5
In addition it is very important that the count be done without duplicates of the same pair
We can do this with two queries, without a procedure or user defined function
CREATE TABLE products(serial INT, Instock INT, Revenu INT, Expenses INT);
INSERT INTO products VALUES
(1,1,27627,57661),
(2,0,48330,20686),
(3,0,26010,861 ),
(4,1,22798,37771),
(5,0,24606,8905 ),
(6,1,48311,6433 ),
(7,0,29929,6278 ),
(8,0,24254,8590 );
✓
✓
SELECT a.serial,b.serial from
products a
join products b
on abs(a.expenses-b.expenses)<1000
where a.serial<b.serial
and a.instock=b.instock
serial | serial
-----: | -----:
5 | 8
select count(a.expenses) 'number of pairs',
avg(abs(a.expenses-b.expenses)) 'average difference',
sum(case when a.instock=1 and b.instock=1 then 1 else 0 end) pairsInstock,
sum(case when a.instock=0 and b.instock=0 then 1 else 0 end) pairsneitherStock,
sum(case when (a.instock+b.instock)=1 then 1 else 0 end ) oneInStock
from products a
cross join products b
where a.serial < b.serial;
number of pairs | average difference | pairsInstock | pairsneitherStock | oneInStock
--------------: | -----------------: | -----------: | ----------------: | ---------:
28 | 21362.1071 | 3 | 10 | 15
db<>fiddle here
I have solved it in stored procedure.
Starting with variables definition.
Cursor iterate results of sorted list and check if the following condition it TRUE according to your definition of pair.
prev_exp - curr_Expenses_from_the_product < 1000 AND prev_in_stock - curr_Is_in_stock = 0
In case it TRUE counter increased by 1.
In the end I closing the cursor and returning the counter value.
* You can add more logic to procedure and return more columns.
** Usage of this procedure is just to call to stored procedure by its name.
Table creation:
CREATE TABLE A(serial INT(11), Is_in_stock INT(11), Revenu_from_the_product INT(11), Expenses_from_the_product INT(11));
Data insertion:
INSERT INTO A (serial,Is_in_stock,Revenu_from_the_product,Expenses_from_the_product) VALUES
(1,1,27627,57661),
(2,0,48330,20686),
(3,0,26010,861 ),
(4,1,22798,37771),
(5,0,24606,8905 ),
(6,1,48311,6433 ),
(7,0,29929,6278 ),
(8,0,24254,8590 );
Query:
BEGIN
DECLARE finished INTEGER DEFAULT 0;
DECLARE prev_exp int(11) DEFAULT 0;
DECLARE prev_in_stock int(11) DEFAULT 0;
DECLARE curr_Is_in_stock int(11) DEFAULT 0;
DECLARE curr_Expenses_from_the_product int(11) DEFAULT 0;
DECLARE duplications_counter int(11) DEFAULT 0;
-- declare cursor for relevant fields
DEClARE curs
CURSOR FOR
SELECT A.Is_in_stock,A.Expenses_from_the_product FROM A ORDER BY A.Expenses_from_the_product DESC;
-- declare NOT FOUND handler
DECLARE CONTINUE HANDLER
FOR NOT FOUND SET finished = 1;
OPEN curs;
getRow: LOOP
FETCH curs INTO curr_Is_in_stock,curr_Expenses_from_the_product;
IF finished = 1 THEN
LEAVE getRow;
END IF;
IF prev_exp - curr_Expenses_from_the_product < 1000 AND prev_in_stock - curr_Is_in_stock = 0 THEN
SET duplications_counter = duplications_counter+1;
END IF;
END LOOP getRow;
CLOSE curs;
-- return the counter
SELECT duplications_counter;
END
Result:
Counter: 5

MySQL LOAD DATA - Avoid convert string to zero when integer column

I try to trigger an error when I load a string into integer column with LOAD DATA.
The string value in file (aaa) become "0" in table.
My table :
CREATE TABLE (
a INT(11) DEFAULT NULL,
b INT(11) DEFAULT NULL,
c VARCHAR(45) DEFAULT NULL,
c VARCHAR(45) DEFAULT NULL
)
My loader :
LOAD DATA LOCAL INFILE 'file.txt'
INTO TABLE `test1`
FIELDS TERMINATED BY ';'
IGNORE 1 LINES (a,b,c,d)
My data file :
a;b;c;d
aaa;11;aa;z
2;bbb;bb;x
3;33;cc;w
4;44;dd;y
And the result in the table :
a b c d
-------------
0 11 aa z
2 0 bb x
3 33 cc w
4 44 dd y
You can see that "aaa" become "0" and "bbb" too.
I would like the file records to be rejected.
I tried to set sql mode to STRICT_ALL_TABLES but no effect :
set sql_mode = STRICT_ALL_TABLES;
Thank you !

GORM UUID too long

Currently I am using GO-GORM for all of my database queries (mostly CRUD) and I am having some issues inserting a generated UUID into a MySQL database column.
The column is a BINARY(16) as suggested in multiple blogs, the UUID is generated using github.com/satori/go.uuid package for Golang.
I am using GORM's BeforeCreate hook to generate the UUID if one does not already exist on the user, the code that I am using is as follows:
func (u *User) BeforeCreate(scope *gorm.Scope) (err error) {
if u.UserID == uuid.Nil {
uuid, err := uuid.NewV4().MarshalBinary()
scope.SetColumn("user_id", uuid)
}
}
I have also used len to get the length that MarshalBinary outputs and it returns as 16.
The error I get from GORM when trying to insert the UUID into MySQL is as follows:
(Error 1406: Data too long for column 'user_id' at row 1)
I have also fmt.Println(uuid) to see the results and they are also as follows (obviosuly changes as the UUID is generated every insert)
[93 132 59 55 102 96 72 35 137 185 34 21 195 88 213 127]
My MYSQL schema is as follows also:
CREATE TABLE users
(
id INT(10) unsigned PRIMARY KEY NOT NULL AUTO_INCREMENT,
created_at TIMESTAMP,
updated_at TIMESTAMP,
deleted_at TIMESTAMP,
user_id BINARY(16) NOT NULL,
username VARCHAR(255) NOT NULL,
password VARCHAR(255),
firstname VARCHAR(255),
lastname VARCHAR(255),
email VARCHAR(255),
address_id VARCHAR(255)
);
CREATE INDEX idx_users_deleted_at ON users (deleted_at);
CREATE UNIQUE INDEX username ON users (username);
CREATE UNIQUE INDEX user_id ON users (user_id);
I have tried different methods and libraries to generate UUIDs and convert them to binary to insert with similar results.
I think the problem is in the definition of model User. To save the GUID as 16-bytes binary, you need to define the UserID column as []byte not uuid.UUID.
type User struct {
//other fields ..
UserID []byte
//other fields ...
}
func (u *User) BeforeCreate(scope *gorm.Scope) (err error) {
if u.UserID == nil {
uuid, err := uuid.NewV4().MarshalBinary()
scope.SetColumn("user_id", uuid)
}
return nil
}
If you define the field as uuid.UUID, gorm "misinterpreted" the field as string and then insert that string into the database as binary. For example, the following UUID,
uuid: 16ac369b-e57f-471b-96f6-1068ead0bf98
//16-bytes equivalent
bytes: [22 172 54 155 229 127 71 27 150 246 16 104 234 208 191 152]
will be inserted to database as the ASCII codes of the UUID which are
0x31 0x36 0x61 0x63 0x33 0x36 0x39 0x62 0x2D 0x65 ...
('1' '6' 'a' 'c' '3' '6' '9' 'b' '-' 'e' ...)
which are 36-bytes in length, thus you're getting Error 1406: ...

for loop statement to create rows in database

I am trying to use for loop statement as follows:
for(int i=1; i <= 48; i++) { insertdiary("", ""); }
in my MyDB file:
package com.cookbook.data;
import android.content.ContentValues;
import android.content.Context;
import android.database.Cursor;
import android.database.sqlite.SQLiteDatabase;
import android.database.sqlite.SQLiteException;
import android.util.Log;
public class MyDB {
private SQLiteDatabase db;
private final Context context;
private final MyDBhelper dbhelper;
// Initializes MyDBHelper instance
public MyDB(Context c){
context = c;
dbhelper = new MyDBhelper(context, Constants.DATABASE_NAME, null,
Constants.DATABASE_VERSION);
}
// Closes the database connection
public void close()
{
db.close();
}
// Initializes a SQLiteDatabase instance using MyDBhelper
public void open() throws SQLiteException
{
try {
db = dbhelper.getWritableDatabase();
} catch(SQLiteException ex) {
Log.v("Open database exception caught", ex.getMessage());
db = dbhelper.getReadableDatabase();
}
}
// Saves a diary entry to the database as name-value pairs in ContentValues instance
// then passes the data to the SQLitedatabase instance to do an insert
public long insertdiary(String title, String content)
{
try{
ContentValues newTaskValue = new ContentValues();
newTaskValue.put(Constants.TITLE_NAME, title);
newTaskValue.put(Constants.CONTENT_NAME, content);
newTaskValue.put(Constants.DATE_NAME, java.lang.System.currentTimeMillis());
return db.insert(Constants.TABLE_NAME, null, newTaskValue);
} catch(SQLiteException ex) {
Log.v("Insert into database exception caught",
ex.getMessage());
return -1;
}
}
// updates a diary entry (existing row)
public boolean updateDiaryEntry(String title, long rowId)
{
ContentValues newValue = new ContentValues();
newValue.put(Constants.TITLE_NAME, title);
return db.update(Constants.TABLE_NAME, newValue, Constants.KEY_ID + "=" + rowId, null)>0;
}
// Reads the diary entries from database, saves them in a Cursor class and returns it from the method
public Cursor getdiaries()
{
Cursor c = db.query(Constants.TABLE_NAME, null, null,
null, null, null, null);
return c;
}
}
My aim is to create 48 empty rows upon database or table first creation so I can further update these rows instead of creating new entries. Unfortunately my attempts to utilize this code were unfortunate giving me errors or creating many more rows than 48.
Is there anyone who could help me with utilizing this code to create 48 rows upon database or table first time creation please?
I appreciate all help.
Paddy
Unless there is really some strict rule governing the requirement to create 48 empty rows, creating them is really the absolute wrong way to go about doing it. Create them as needed, when you need to plug data into them.
I did this in mysql originally. Had trouble creating an SQLFiddle so i created an SQLite version as well.
There is an SQLFiddle. Squeezing all the stuff that follows into 8K, the SQLFiddle limit, was 'interesting' ;-/
The SQLite version, which is exactly the same apart from the 'create table' statements, i will make available if required. It will be a download of the database file, that understand, is the same across all machines. I can also provide the creation scripts if required.
Purpose:
The idea, i understand, is to display 'appointments' where the day is split into 48, 30 minute periods.
The requirement is to only record the actual appointments.
I pictured it as a small number of departments, recording appointments during the day when events will happen. In my example data, people visiting.
Here is the query to show appointments:
SELECT *
FROM department_appointments_view dav
WHERE dav.the_date = '2014-04-11'
AND dav.department_id = 1
AND dav.time_slot_id BETWEEN 12 AND 20;
Here is the sample output:
appointment_id department_id department_code the_date time_slot_id start_time attendee reason duration
-------------- ------------- --------------- ------------------- ------------ ---------- ----------------- --------------------- ----------
0 1 dept_01 2014-04-11 00:00:00 12 05:30:00 30
0 1 dept_01 2014-04-11 00:00:00 13 06:00:00 30
1 1 dept_01 2014-04-11 00:00:00 14 06:30:00 Catherine Tramell to see you 30
0 1 dept_01 2014-04-11 00:00:00 15 07:00:00 30
2 1 dept_01 2014-04-11 00:00:00 16 07:30:00 Buddy Ackerman to see them 30
0 1 dept_01 2014-04-11 00:00:00 17 08:00:00 30
0 1 dept_01 2014-04-11 00:00:00 18 08:30:00 30
3 1 dept_01 2014-04-11 00:00:00 19 09:00:00 Ivan Drago to visit someone else 30
0 1 dept_01 2014-04-11 00:00:00 20 09:30:00 30
So, the main table, where appointments are entered, is:
CREATE TABLE `department_appointments` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`department_id` int(11) NOT NULL,
`the_date` date NOT NULL,
`time_slot_id` int(11) NOT NULL,
`attendee` varchar(128) COLLATE utf8_unicode_ci NOT NULL,
`reason` varchar(128) COLLATE utf8_unicode_ci NOT NULL,
`duration` int(11) NOT NULL,
PRIMARY KEY (`id`),
KEY `dept_fk` (`department_id`),
CONSTRAINT `dept_fk` FOREIGN KEY (`department_id`) REFERENCES `departments` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
This is the only table where appointment information is entered.
Sample data:
id department_id the_date time_slot_id attendee reason duration
------ ------------- ---------- ------------ ----------------------- --------------------- ----------
1 1 2014-04-11 14 Catherine Tramell to see you 30
2 1 2014-04-11 16 Buddy Ackerman to see them 30
3 1 2014-04-11 19 Ivan Drago to visit someone else 30
We need some supporting tables:
The departments table:
CREATE TABLE `departments` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`department_code` varchar(64) COLLATE utf8_unicode_ci NOT NULL,
`title` varchar(128) COLLATE utf8_unicode_ci NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
Sample data:
id department_code title
------ --------------- -----------------------------
1 dept_01 Dept 01 - The Widget Makers
2 dept_02 Dept 02 - For Bar Workers
The calendar: This is just a table with dates in it. My test data was for april.
CREATE TABLE `the_calendar` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`the_date` datetime NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=31 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
Sample data:
id the_date
------ ---------------------
1 2014-04-01 00:00:00
2 2014-04-02 00:00:00
3 2014-04-03 00:00:00
4 2014-04-04 00:00:00
The read_only_time_slots table. This has 48 rows in it with start times. This table is read only and never updated or copied or anything.
CREATE TABLE `read_only_time_slots` (
`time_slot_id` int(11) NOT NULL,
`start_time` time NOT NULL,
`duration` int(11) NOT NULL,
PRIMARY KEY (`time_slot_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
Sample data:
time_slot_id start_time duration
------------ ---------- ----------
1 00:00:00 30
2 00:30:00 30
3 01:00:00 30
----------------------
You now need some queries to run this lot. Please be aware that we take advantage of the database engine to do cartesian products whenever it can. It will generate all the needed rows for us from just the above tables.
Now, to simplify the use of the information, i have used 'views'. Less confusion for me that way.
The views
The first is: time_slot_view
CREATE VIEW `time_slot_view` AS (
SELECT ts.time_slot_id AS time_slot_id,
ts.start_time AS start_time,
ts.duration AS duration
FROM read_only_time_slots ts
ORDER BY ts.time_slot_id ASC)
The next is: department_calendar_view
This returns empty timeslots for each department, for each day.
CREATE VIEW `department_calendar_view` AS (
SELECT
`d`.`id` AS `department_id`,
`d`.`department_code` AS `department_code`,
`c`.`the_date` AS `the_date`,
`tsv`.`time_slot_id` AS `time_slot_id`,
`tsv`.`start_time` AS `start_time`,
`tsv`.`duration` AS `duration`
FROM ((`the_calendar` `c`
JOIN `time_slot_view` `tsv`)
JOIN `departments` `d`)
ORDER BY `d`.`department_code`,`c`.`the_date`,`tsv`.`time_slot_id`)
Finally: there is the view that uses all the above:
The: department_appointments_view
This probably could be done as an outer join. I just used two queries and a union.
CREATE VIEW `department_appointments_view` AS
SELECT da.id AS appointment_id,
dcv.`department_id` AS department_id,
dcv.`department_code` AS department_code,
da.`the_date` AS the_date,
da.`time_slot_id` AS time_slot_id,
dcv.start_time AS start_time,
da.`attendee` AS attendee,
da.`reason` AS reason,
da.`duration` AS duration
FROM
`department_appointments` AS da
INNER JOIN department_calendar_view AS dcv
ON da.department_id = dcv.department_id
AND da.the_date = dcv.the_date
AND da.time_slot_id = dcv.time_slot_id
UNION
SELECT 0,
dcv.department_id,
dcv.`department_code` ,
dcv.the_date,
dcv.time_slot_id,
dcv.start_time,
'' AS attendee,
'' AS reason,
dcv.`duration`
FROM department_calendar_view AS dcv
WHERE NOT EXISTS (SELECT 1
FROM `department_appointments` AS da
WHERE da.department_id = dcv.department_id
AND da.the_date = dcv.the_date
AND da.time_slot_id = dcv.time_slot_id)
ORDER BY department_code, the_date, time_slot_id;

MySQL Hierarchical Structure Data Extraction

I've been struggling for about 2 hours on one query now. Help? :(
I have a table like this:
id name lft rgt
35 Top level board 1 16
37 2nd level board 3 6 15
38 2nd level board 2 4 5
39 2nd level board 1 2 3
40 3rd level board 1 13 14
41 3rd level board 2 9 12
42 3rd level board 3 7 8
43 4th level board 1 10 11
It is stored in the structure recommended in this tutorial. What I want to do is select a forum board and all sub forums ONE level below the selected forum board (no lower). Ideally, the query would get the selected forum's level while only being passed the board's ID, then it would select that forum, and all it's immediate children.
So, I would hopefully end up with:
id name lft rgt
35 Top level board 1 16
37 2nd level board 3 6 15
38 2nd level board 2 4 5
39 2nd level board 1 2 3
Or
id name lft rgt
37 2nd level board 3 6 15
40 3rd level board 1 13 14
41 3rd level board 2 9 12
42 3rd level board 3 7 8
The top rows here are the parent forums, the others sub forums. Also, I'd like something where a depth value is given, where the depth is relative to the selected parent form. For example, taking the last table as some working data, we would have:
id name lft rgt depth
37 2nd level board 3 6 15 0
40 3rd level board 1 13 14 1
41 3rd level board 2 9 12 1
42 3rd level board 3 7 8 1
Or
id name lft rgt depth
35 Top level board 1 16 0
37 2nd level board 3 6 15 1
38 2nd level board 2 4 5 1
39 2nd level board 1 2 3 1
I hope you get my drift here.
Can anyone help with this? It's really getting me annoyed now :(
James
The easiest way for you to do it - just add a column where you keep the depth.
Otherwise the query will be very inefficient - you will have to get a the whole hierarchy, sorted by left number (that will put very first child be first), join it to itself to make sure that for each next node left number is equal to previous node right number + 1
In general, nested intervals algorithm is nice, but has a serious disadvantage - if you add something to tree, a lot of recalculations required.
A nice alternative for this is Tropashko Nested intervals algorithm with continued fractions - just google for it. And getting a single level below the parent with this algorithm is done very naturally. Also, given a child, you can calculate all numbers for all its parents without hitting a database.
One more thing to consider is that relational databases really are not the most optimal and natural way to store hierarchical data. A structure like you have here - a binary tree, essentially - would be much easier to represent with an XML blob that you can persist, or store as an object in an object-oriented database.
I prefer the adjacency list approach myself. The following example uses a non-recursive stored procedure to return a tree/subtree which I then transform into an XML DOM but you could do whatever you like with the resultset. Remember it's a single call from PHP to MySQL and adjacency lists are much easier to manage.
full script here : http://pastie.org/1294143
PHP
<?php
header("Content-type: text/xml");
$conn = new mysqli("localhost", "foo_dbo", "pass", "foo_db", 3306);
// one non-recursive db call to get the tree
$result = $conn->query(sprintf("call department_hier(%d,%d)", 2,1));
$xml = new DomDocument;
$xpath = new DOMXpath($xml);
$dept = $xml->createElement("department");
$xml->appendChild($dept);
// loop and build the DOM
while($row = $result->fetch_assoc()){
$staff = $xml->createElement("staff");
// foreach($row as $col => $val) $staff->setAttribute($col, $val);
$staff->setAttribute("staff_id", $row["staff_id"]);
$staff->setAttribute("name", $row["name"]);
$staff->setAttribute("parent_staff_id", $row["parent_staff_id"]);
if(is_null($row["parent_staff_id"])){
$dept->setAttribute("dept_id", $row["dept_id"]);
$dept->setAttribute("department_name", $row["department_name"]);
$dept->appendChild($staff);
}
else{
$qry = sprintf("//*[#staff_id = '%d']", $row["parent_staff_id"]);
$parent = $xpath->query($qry)->item(0);
if(!is_null($parent)) $parent->appendChild($staff);
}
}
$result->close();
$conn->close();
echo $xml->saveXML();
?>
XML Output
<department dept_id="2" department_name="Mathematics">
<staff staff_id="1" name="f00" parent_staff_id="">
<staff staff_id="5" name="gamma" parent_staff_id="1"/>
<staff staff_id="6" name="delta" parent_staff_id="1">
<staff staff_id="7" name="zeta" parent_staff_id="6">
<staff staff_id="2" name="bar" parent_staff_id="7"/>
<staff staff_id="8" name="theta" parent_staff_id="7"/>
</staff>
</staff>
</staff>
</department>
SQL Stuff
-- TABLES
drop table if exists staff;
create table staff
(
staff_id smallint unsigned not null auto_increment primary key,
name varchar(255) not null
)
engine = innodb;
drop table if exists departments;
create table departments
(
dept_id tinyint unsigned not null auto_increment primary key,
name varchar(255) unique not null
)
engine = innodb;
drop table if exists department_staff;
create table department_staff
(
dept_id tinyint unsigned not null,
staff_id smallint unsigned not null,
parent_staff_id smallint unsigned null,
primary key (dept_id, staff_id),
key (staff_id),
key (parent_staff_id)
)
engine = innodb;
-- STORED PROCEDURES
drop procedure if exists department_hier;
delimiter #
create procedure department_hier
(
in p_dept_id tinyint unsigned,
in p_staff_id smallint unsigned
)
begin
declare v_done tinyint unsigned default 0;
declare v_dpth smallint unsigned default 0;
create temporary table hier(
dept_id tinyint unsigned,
parent_staff_id smallint unsigned,
staff_id smallint unsigned,
depth smallint unsigned
)engine = memory;
insert into hier select dept_id, parent_staff_id, staff_id, v_dpth from department_staff
where dept_id = p_dept_id and staff_id = p_staff_id;
/* http://dev.mysql.com/doc/refman/5.0/en/temporary-table-problems.html */
create temporary table tmp engine=memory select * from hier;
while not v_done do
if exists( select 1 from department_staff e
inner join hier on e.dept_id = hier.dept_id and e.parent_staff_id = hier.staff_id and hier.depth = v_dpth) then
insert into hier select e.dept_id, e.parent_staff_id, e.staff_id, v_dpth + 1 from department_staff e
inner join tmp on e.dept_id = tmp.dept_id and e.parent_staff_id = tmp.staff_id and tmp.depth = v_dpth;
set v_dpth = v_dpth + 1;
truncate table tmp;
insert into tmp select * from hier where depth = v_dpth;
else
set v_done = 1;
end if;
end while;
select
hier.dept_id,
d.name as department_name,
s.staff_id,
s.name,
p.staff_id as parent_staff_id,
p.name as parent_name,
hier.depth
from
hier
inner join departments d on hier.dept_id = d.dept_id
inner join staff s on hier.staff_id = s.staff_id
left outer join staff p on hier.parent_staff_id = p.staff_id;
drop temporary table if exists hier;
drop temporary table if exists tmp;
end #
delimiter ;
-- TEST DATA
insert into staff (name) values
('f00'),('bar'),('alpha'),('beta'),('gamma'),('delta'),('zeta'),('theta');
insert into departments (name) values
('Computing'),('Mathematics'),('English'),('Engineering'),('Law'),('Music');
insert into department_staff (dept_id, staff_id, parent_staff_id) values
(1,1,null),
(1,2,1),
(1,3,1),
(1,4,3),
(1,7,4),
(2,1,null),
(2,5,1),
(2,6,1),
(2,7,6),
(2,8,7),
(2,2,7);
-- TESTING (call this sproc from your php)
call department_hier(1,1);
call department_hier(2,1);