How to find all synoyms for a french word using NLTK - nltk

I would like to find all synonyms of a french word. I have implemented used this code:
import nltk
from nltk.corpus import wordnet
syns=[synset.lemma_names('fra') for synset in wordnet.synsets('maison', lang='fra')]
print(syns)
The program outputs contextuel similarity not semantic similarity and also there is no output the conjugated verbs.
Any idea please ?
Thanks

based on this post, I suggest this solution:
from nltk.tokenize import TreebankWordTokenizer
from nltk.wsd import lesk
from nltk.corpus import wordnet as wn
# ['als', 'arb', 'cat', 'cmn', 'dan', 'eng', 'eus', 'fas',
# 'fin', 'fra', 'fre', 'glg', 'heb', 'ind', 'ita', 'jpn', 'nno',
# 'nob', 'pol', 'por', 'spa', 'tha', 'zsm']
lang='fra'
sent = TreebankWordTokenizer().tokenize("Je voudrai essayer avec cette phrase")
synsets = [lesk(sent, w, 'n') for w in sent]
print(synsets)
for ws in sent:
for ss in [n for synset in wn.synsets(ws, lang=lang) for n in synset.lemma_names(lang)]:
print((ws, ss), '\n')
Here is the result:
[None, None, Synset('trier.n.02'), None, None, Synset('phrase.n.04')]
('Je', 'je')
('Je', 'Moi')
('Je', 'iode')
('Je', 'je')
('Je', 'Moi')
('essayer', 'essayer')
('essayer', 'essayer')
('essayer', 'rendre')
('essayer', 'tenter')
('essayer', 'essayer')
('essayer', 'goût')
('essayer', 'goûter')
('essayer', 'priser')
('essayer', 'tenter')
('essayer', 'assumer')
('essayer', 'entreprendre')
('essayer', 'essayer')
('essayer', 'tenter')
('essayer', 'essayer')
('essayer', 'aspirer')
('essayer', 'chercher')
('essayer', 'essayer')
('essayer', 'solliciter')
('essayer', 'tendre')
('essayer', 'trouver')
('essayer', 'entendre')
('essayer', 'essayer')
('essayer', 'décider')
('essayer', 'essayer')
('essayer', 'juge')
('essayer', 'juger')
('essayer', 'tenter')
('essayer', 'chercher')
('essayer', 'efforcer')
('essayer', 'essayer')
('essayer', 'tenter')
('essayer', 'essayer')
('essayer', 'tester')
('essayer', 'vérifier')
('phrase', 'condamner')
('phrase', 'phrase')
('phrase', 'condamnation')
('phrase', 'conviction')
('phrase', 'jugement')
('phrase', 'phrase')
('phrase', 'sentence')
('phrase', 'phrase')
('phrase', 'connexion')
('phrase', 'expression')
('phrase', 'locution')
('phrase', 'phrase')
('phrase', 'syntagme')
('phrase', 'phrase')
('phrase', 'théorème')
('phrase', 'locution')
('phrase', 'phrase')
('phrase', 'syntagme')
('phrase', 'mouvement')
('phrase', 'phrase')
('phrase', 'délai')
('phrase', 'durée')
('phrase', 'fois')
('phrase', 'jugement')
('phrase', 'longtemps')
('phrase', 'peine')
('phrase', 'phrase')
('phrase', 'temps')

Related

SQL Query return zero if is NOT in list

I have the followin
SELECT
au.country as country_code,
COALESCE(SUM(uwm.amount), 0) as amountInbound
FROM user_wallet_movement uwm
LEFT JOIN user_wallet uw ON uwm.wallet_id = uw.id
LEFT JOIN app_user au ON uw.user_id = au.id
WHERE
status = 'execute'
and direction = 'inbound'
and mov_date > '2020-07-01'
and au.country IN ('AD', 'AC', 'AE', 'AF', 'AG', 'AI', 'AL', 'AM', 'AN', 'AO', 'AQ', 'AR', 'AS', 'AT', 'AU', 'AW', 'AZ', 'BA', 'BB', 'BD', 'BE', 'BF', 'BG', 'BH', 'BI', 'BJ', 'BM', 'BN', 'BO', 'BR', 'BS', 'BT', 'BV', 'BW', 'BY', 'BZ', 'CA', 'CC', 'CD', 'CF', 'CG', 'CH', 'CI', 'CK', 'CL', 'CM', 'CN', 'CO', 'CR', 'CU', 'CV', 'CX', 'CY', 'CZ', 'DE', 'DJ', 'DK', 'DM', 'DO', 'DZ', 'EC', 'EE', 'EG', 'EH', 'ER', 'ES', 'ET', 'FI', 'FJ', 'FK', 'FM', 'FO', 'FR', 'GA', 'GB', 'GD', 'GE', 'GF', 'GH', 'GI', 'GL', 'GM', 'GN', 'GP', 'GQ', 'GR', 'GT', 'GU', 'GW', 'GY', 'HK', 'HM', 'HN', 'HR', 'HT', 'HU', 'ID', 'IE', 'IL', 'IN', 'IO', 'IQ', 'IR', 'IS', 'IT', 'JM', 'JO', 'JP', 'KE', 'KG', 'KH', 'KI', 'KM', 'KN', 'KP', 'KR', 'KW', 'KY', 'KZ', 'LA', 'LB', 'LC', 'LI', 'LK', 'LR', 'LS', 'LT', 'LU', 'LV', 'LY', 'MA', 'MC', 'MC', 'MD', 'ME', 'MG', 'MH', 'MK', 'ML', 'MM', 'MN', 'MP', 'MQ', 'MR', 'MS', 'MT', 'MU', 'MV', 'MW', 'MX', 'MY', 'MZ', 'NA', 'NC', 'NE', 'NF', 'NG', 'NI', 'NL', 'NO', 'NP', 'NR', 'NU', 'NZ', 'OM', 'PA', 'PE', 'PF', 'PG', 'PH', 'PK', 'PL', 'PM', 'PN', 'PR', 'PT', 'PW', 'PY', 'QA', 'RE', 'RO', 'RS', 'RU', 'RW', 'SA', 'SB', 'SC', 'SD', 'SE', 'SG', 'SH', 'SI', 'SJ', 'SK', 'SL', 'SM', 'SN', 'SO', 'SR', 'ST', 'SV', 'SY', 'SZ', 'TC', 'TD', 'TF', 'TG', 'TH', 'TJ', 'TK', 'TL', 'TM', 'TN', 'TO', 'TR', 'TT', 'TV', 'TZ', 'UA', 'UG', 'UM', 'US', 'UY', 'UZ', 'VA', 'VC', 'VE', 'VG', 'VI', 'VN', 'VU', 'WF', 'WS', 'xA', 'xE', 'xF', 'XK', 'xN', 'xO', 'xS', 'YE', 'YT', 'ZA', 'ZM', 'ZW')
GROUP BY country_code
ORDER BY country_code
Which should return me the amount of money spent in each country in the list.
My output is
AE 0.35365110000000016
AR 1.0367374499999995
AT 0.11195171000000001
AU 1.7345992
BE 1.9242438800000006
BG 5.043282479999997
CA 0.5906319000000001
CH 0.5082077999999999
CO 0.14248785
CR 0.036722840000000014
CU 0.11325390999999999
CY 0.18752883999999997
CZ 0.11454307999999999
DE 8.057752660000036
DO 0.8858295500000001
EE 0.7410690900000001
ES 31.125371000000023
FR 0.4851664099999999
GB 1.44115391
HR 0.023154
HU 1.0131190899999998
IE 0.3229343799999997
IN 0.026833529999999984
IT 2199.1061043693944
KE 0.21115987
KR 0.161765
LU 0.20279967
MC 0.2127708600000001
MT 0.028277630000000005
MX 0.45381685
NL 0.1408655
PE 0.00108554
QA 1.8347713
RO 7.0233499800000105
RS 0.25260947000000006
RU 0.16577983
SE 3.4979126399999947
SH 1.1328741000000002
SI 0.00178069
SK 0.04637177
SZ 0.3603625199999996
US 2.41114205
VE 0.53491791
So, as you can see, there are countries in the list which not appear in the output because the amount is null.
How can I include them in the list with the value 0?
Thank you
EDIT:
Not all countries in the list are in the table; I also would like to be in the output countries that are not in the table but are in the list
the problem is if you don't have data in wallet tables for given table join will return nothing. instead you can use left join :
SELECT
au.country as country_code,
COALESCE(SUM(uwm.amount), 0) as amountInbound
FROM
app_user au
left join user_wallet uw ON uw.user_id = au.id
left JOIN user_wallet_movement uwm ON uwm.wallet_id = uw.id
WHERE
status = 'execute'
and direction = 'inbound'
and mov_date > '2020-07-01'
au.country IN ('AD', 'AC', 'AE', 'AF', 'AG', 'AI', 'AL', 'AM', 'AN', 'AO', 'AQ', 'AR', 'AS', 'AT', 'AU', 'AW', 'AZ', 'BA', 'BB', 'BD', 'BE', 'BF', 'BG', 'BH', 'BI', 'BJ', 'BM', 'BN', 'BO', 'BR', 'BS', 'BT', 'BV', 'BW', 'BY', 'BZ', 'CA', 'CC', 'CD', 'CF', 'CG', 'CH', 'CI', 'CK', 'CL', 'CM', 'CN', 'CO', 'CR', 'CU', 'CV', 'CX', 'CY', 'CZ', 'DE', 'DJ', 'DK', 'DM', 'DO', 'DZ', 'EC', 'EE', 'EG', 'EH', 'ER', 'ES', 'ET', 'FI', 'FJ', 'FK', 'FM', 'FO', 'FR', 'GA', 'GB', 'GD', 'GE', 'GF', 'GH', 'GI', 'GL', 'GM', 'GN', 'GP', 'GQ', 'GR', 'GT', 'GU', 'GW', 'GY', 'HK', 'HM', 'HN', 'HR', 'HT', 'HU', 'ID', 'IE', 'IL', 'IN', 'IO', 'IQ', 'IR', 'IS', 'IT', 'JM', 'JO', 'JP', 'KE', 'KG', 'KH', 'KI', 'KM', 'KN', 'KP', 'KR', 'KW', 'KY', 'KZ', 'LA', 'LB', 'LC', 'LI', 'LK', 'LR', 'LS', 'LT', 'LU', 'LV', 'LY', 'MA', 'MC', 'MC', 'MD', 'ME', 'MG', 'MH', 'MK', 'ML', 'MM', 'MN', 'MP', 'MQ', 'MR', 'MS', 'MT', 'MU', 'MV', 'MW', 'MX', 'MY', 'MZ', 'NA', 'NC', 'NE', 'NF', 'NG', 'NI', 'NL', 'NO', 'NP', 'NR', 'NU', 'NZ', 'OM', 'PA', 'PE', 'PF', 'PG', 'PH', 'PK', 'PL', 'PM', 'PN', 'PR', 'PT', 'PW', 'PY', 'QA', 'RE', 'RO', 'RS', 'RU', 'RW', 'SA', 'SB', 'SC', 'SD', 'SE', 'SG', 'SH', 'SI', 'SJ', 'SK', 'SL', 'SM', 'SN', 'SO', 'SR', 'ST', 'SV', 'SY', 'SZ', 'TC', 'TD', 'TF', 'TG', 'TH', 'TJ', 'TK', 'TL', 'TM', 'TN', 'TO', 'TR', 'TT', 'TV', 'TZ', 'UA', 'UG', 'UM', 'US', 'UY', 'UZ', 'VA', 'VC', 'VE', 'VG', 'VI', 'VN', 'VU', 'WF', 'WS', 'xA', 'xE', 'xF', 'XK', 'xN', 'xO', 'xS', 'YE', 'YT', 'ZA', 'ZM', 'ZW')
GROUP BY country_code
ORDER BY country_code
Probably your inner joins are limiting the rows in your output, try using outer joins:
SELECT
au.country as country_code,
COALESCE(SUM(uwm.amount), 0) as amountInbound
FROM user_wallet_movement uwm
LEFT OUTER JOIN user_wallet uw ON uwm.wallet_id = uw.id
LEFT OUTER JOIN app_user au ON uw.user_id = au.id
WHERE
status = 'execute'
and direction = 'inbound'
and mov_date > '2020-07-01'
and au.country IN (...)
GROUP BY country_code
ORDER BY country_code
If your data has all the countries, then a simple fix is to use conditional aggregation:
SELECT au.country as country_code,
SUM(CASE WHEN status = 'execute' and direction = 'inbound' and mov_date > '2020-07-01' THEN uwm.amount ELSE 0 END) as amountInbound
FROM user_wallet_movement uwm JOIN
user_wallet uw
ON uwm.wallet_id = uw.id JOIN
app_user au
ON uw.user_id = au.id
WHERE au.country IN ('AD', 'AC', 'AE', 'AF', 'AG', 'AI', 'AL', 'AM', 'AN', 'AO', 'AQ', 'AR', 'AS', 'AT', 'AU', 'AW', 'AZ', 'BA', 'BB', 'BD', 'BE', 'BF', 'BG', 'BH', 'BI', 'BJ', 'BM', 'BN', 'BO', 'BR', 'BS', 'BT', 'BV', 'BW', 'BY', 'BZ', 'CA', 'CC', 'CD', 'CF', 'CG', 'CH', 'CI', 'CK', 'CL', 'CM', 'CN', 'CO', 'CR', 'CU', 'CV', 'CX', 'CY', 'CZ', 'DE', 'DJ', 'DK', 'DM', 'DO', 'DZ', 'EC', 'EE', 'EG', 'EH', 'ER', 'ES', 'ET', 'FI', 'FJ', 'FK', 'FM', 'FO', 'FR', 'GA', 'GB', 'GD', 'GE', 'GF', 'GH', 'GI', 'GL', 'GM', 'GN', 'GP', 'GQ', 'GR', 'GT', 'GU', 'GW', 'GY', 'HK', 'HM', 'HN', 'HR', 'HT', 'HU', 'ID', 'IE', 'IL', 'IN', 'IO', 'IQ', 'IR', 'IS', 'IT', 'JM', 'JO', 'JP', 'KE', 'KG', 'KH', 'KI', 'KM', 'KN', 'KP', 'KR', 'KW', 'KY', 'KZ', 'LA', 'LB', 'LC', 'LI', 'LK', 'LR', 'LS', 'LT', 'LU', 'LV', 'LY', 'MA', 'MC', 'MC', 'MD', 'ME', 'MG', 'MH', 'MK', 'ML', 'MM', 'MN', 'MP', 'MQ', 'MR', 'MS', 'MT', 'MU', 'MV', 'MW', 'MX', 'MY', 'MZ', 'NA', 'NC', 'NE', 'NF', 'NG', 'NI', 'NL', 'NO', 'NP', 'NR', 'NU', 'NZ', 'OM', 'PA', 'PE', 'PF', 'PG', 'PH', 'PK', 'PL', 'PM', 'PN', 'PR', 'PT', 'PW', 'PY', 'QA', 'RE', 'RO', 'RS', 'RU', 'RW', 'SA', 'SB', 'SC', 'SD', 'SE', 'SG', 'SH', 'SI', 'SJ', 'SK', 'SL', 'SM', 'SN', 'SO', 'SR', 'ST', 'SV', 'SY', 'SZ', 'TC', 'TD', 'TF', 'TG', 'TH', 'TJ', 'TK', 'TL', 'TM', 'TN', 'TO', 'TR', 'TT', 'TV', 'TZ', 'UA', 'UG', 'UM', 'US', 'UY', 'UZ', 'VA', 'VC', 'VE', 'VG', 'VI', 'VN', 'VU', 'WF', 'WS', 'xA', 'xE', 'xF', 'XK', 'xN', 'xO', 'xS', 'YE', 'YT', 'ZA', 'ZM', 'ZW')
GROUP BY country_code
ORDER BY country_code;
Otherwise, you will need to use a LEFT JOIN. For that purpose, it is better to start with a countries table of some sort. Do you have such a table?
If you would like to have all the countries from table A to show up even though table b has less number of countries then your join should be LEFT instead of Inner.
This way rows with no amount will be null in cases where the countries doesn't exist in table B. This null can be replaced using COALESCE()
COALESCE returns the first non-null value. if both are not null then it returns the first one.
SELECT
au.country as country_code,
COALESCE(SUM(uwm.amount), 0) as amountInbound
FROM user_wallet_movement uwm
LEFT JOIN user_wallet uw
ON uwm.wallet_id = uw.id
LEFT JOIN app_user au
ON uw.user_id = au.id
WHERE
status = 'execute'
and direction = 'inbound'
and mov_date > '2020-07-01'
and au.country IN ('ALL THE COUNTRIES')
GROUP BY
country_code
ORDER BY
country_code

Find the business day number in a pay period given a date, using MySQL

I am wanting to aggregate data using a derived value that I am struggling to calculate: "business day number in the pay period". I have provided a further description below:
In any given month, there are two pay periods. The first period commences on the first business day of the month, and concludes at COB on the last business day leading up to and including the 15th (eg: it may conclude on 14th because 15th is a Saturday). The second period commences on the following business day after the previous period has concluded, and ends at COB on the last business day of the month. How can I transform a datetime column into an integer representing which day of the pay period a tuple belongs to, that way data can be aggregated based on "business day number in a pay period".
If possible, I would prefer not to require intermediate tables or user-defined functions to do this; preferably using arithmetic only. Essentially a query like:
SELECT (<arithmetic on datetime col here>) as businessDayNumber, count(someCol)
FROM someTbl
GROUP BY businessDayNumber;
Here is sample data which provides the outcome that I desire:
CREATE TABLE sampleData (
dataId INT AUTO_INCREMENT PRIMARY KEY,
dataDt DATE NOT NULL,
someValue INT NOT NULL
);
INSERT INTO sampleData (dataDt, someValue) VALUES ('2020-01-01', 51),
('2020-01-01', 62),
('2020-01-01', 23),
('2020-01-01', 54),
('2020-01-02', 61),
('2020-01-02', 35),
('2020-01-02', 47),
('2020-01-02', 69),
('2020-01-02', 32),
('2020-01-02', 83),
('2020-01-02', 13),
('2020-01-03', 51),
('2020-01-03', 62),
('2020-01-03', 23),
('2020-01-03', 54),
('2020-01-03', 61),
('2020-01-03', 35),
('2020-01-06', 54),
('2020-01-06', 61),
('2020-01-06', 35),
('2020-01-06', 47),
('2020-01-06', 69),
('2020-01-06', 32),
('2020-01-06', 83),
('2020-01-06', 13),
('2020-01-07', 51),
('2020-01-07', 62),
('2020-01-07', 23),
('2020-01-07', 54),
('2020-01-07', 61),
('2020-01-07', 35),
('2020-01-07', 47),
('2020-01-07', 69),
('2020-01-07', 32),
('2020-01-08', 51),
('2020-01-08', 62),
('2020-01-08', 23),
('2020-01-08', 54),
('2020-01-08', 61),
('2020-01-08', 35),
('2020-01-08', 47),
('2020-01-08', 69),
('2020-01-08', 32),
('2020-01-08', 83),
('2020-01-08', 13),
('2020-01-09', 35),
('2020-01-09', 47),
('2020-01-09', 69),
('2020-01-09', 32),
('2020-01-09', 83),
('2020-01-09', 13),
('2020-01-09', 54),
('2020-01-09', 61),
('2020-01-09', 35),
('2020-01-09', 47),
('2020-01-10', 69),
('2020-01-10', 32),
('2020-01-10', 83),
('2020-01-10', 13),
('2020-01-10', 51),
('2020-01-10', 62),
('2020-01-13', 83),
('2020-01-13', 13),
('2020-01-13', 54),
('2020-01-13', 61),
('2020-01-13', 35),
('2020-01-13', 47),
('2020-01-14', 69),
('2020-01-14', 32),
('2020-01-14', 83),
('2020-01-14', 13),
('2020-01-14', 51),
('2020-01-14', 62),
('2020-01-14', 23),
('2020-01-14', 54),
('2020-01-15', 61),
('2020-01-15', 35),
('2020-01-15', 47),
('2020-01-15', 69),
('2020-01-15', 32),
('2020-01-16', 51),
('2020-01-16', 62),
('2020-01-16', 23),
('2020-01-16', 54),
('2020-01-16', 61),
('2020-01-16', 35),
('2020-01-16', 47),
('2020-01-16', 69),
('2020-01-16', 32),
('2020-01-16', 83),
('2020-01-16', 13),
('2020-01-16', 51),
('2020-01-16', 62),
('2020-01-17', 23),
('2020-01-17', 54),
('2020-01-17', 61),
('2020-01-17', 35),
('2020-01-17', 47),
('2020-01-17', 69),
('2020-01-17', 32),
('2020-01-17', 83),
('2020-01-17', 13),
('2020-01-17', 54),
('2020-01-20', 47),
('2020-01-20', 69),
('2020-01-20', 32),
('2020-01-20', 83),
('2020-01-20', 13),
('2020-01-20', 51),
('2020-01-20', 62),
('2020-01-20', 23),
('2020-01-20', 54),
('2020-01-20', 61),
('2020-01-20', 35),
('2020-01-20', 47),
('2020-01-20', 69),
('2020-01-20', 32),
('2020-01-21', 83),
('2020-01-21', 13),
('2020-01-21', 54),
('2020-01-21', 61),
('2020-01-21', 35),
('2020-01-21', 47),
('2020-01-21', 69),
('2020-01-21', 32),
('2020-01-21', 83),
('2020-01-21', 13),
('2020-01-21', 51),
('2020-01-21', 62),
('2020-01-21', 23),
('2020-01-21', 54),
('2020-01-21', 61),
('2020-01-21', 35),
('2020-01-21', 47),
('2020-01-21', 69),
('2020-01-21', 32),
('2020-01-21', 83),
('2020-01-21', 13),
('2020-01-22', 54),
('2020-01-22', 61),
('2020-01-22', 35),
('2020-01-22', 47),
('2020-01-22', 69),
('2020-01-22', 32),
('2020-01-22', 83),
('2020-01-23', 13),
('2020-01-23', 51),
('2020-01-23', 62),
('2020-01-23', 23),
('2020-01-23', 54),
('2020-01-23', 61),
('2020-01-24', 35),
('2020-01-24', 47),
('2020-01-24', 69),
('2020-01-24', 32),
('2020-01-25', 35),
('2020-01-25', 47),
('2020-01-25', 69),
('2020-01-27', 35),
('2020-01-27', 47),
('2020-01-27', 69),
('2020-01-27', 32),
('2020-01-27', 83),
('2020-01-27', 13),
('2020-01-27', 51),
('2020-01-27', 62),
('2020-01-28', 23),
('2020-01-28', 54),
('2020-01-28', 61),
('2020-01-28', 35),
('2020-01-28', 47),
('2020-01-28', 69),
('2020-01-28', 32),
('2020-01-29', 69),
('2020-01-29', 32),
('2020-01-29', 83),
('2020-01-29', 13),
('2020-01-29', 51),
('2020-01-29', 62),
('2020-01-29', 23),
('2020-01-30', 54),
('2020-01-30', 61),
('2020-01-30', 35),
('2020-01-30', 47),
('2020-01-30', 69),
('2020-01-30', 32),
('2020-01-31', 35),
('2020-01-31', 47),
('2020-01-31', 69),
('2020-01-31', 32),
('2020-01-31', 83),
('2020-01-31', 13),
('2020-01-31', 54),
('2020-01-31', 61),
('2020-02-02', 47),
('2020-02-03', 54),
('2020-02-03', 61),
('2020-02-04', 35),
('2020-02-04', 51),
('2020-02-04', 62),
('2020-02-04', 23),
('2020-02-04', 54),
('2020-02-06', 61),
('2020-02-06', 35),
('2020-02-06', 47),
('2020-02-06', 69),
('2020-02-07', 23),
('2020-02-07', 54),
('2020-02-07', 61),
('2020-02-07', 35),
('2020-02-07', 47),
('2020-02-08', 23),
('2020-02-08', 54),
('2020-02-08', 61),
('2020-02-08', 35),
('2020-02-08', 47),
('2020-02-08', 69),
('2020-02-08', 35),
('2020-02-08', 47),
('2020-02-08', 69),
('2020-02-08', 32),
('2020-02-09', 83),
('2020-02-09', 13),
('2020-02-09', 54),
('2020-02-09', 61),
('2020-02-09', 35),
('2020-02-09', 47),
('2020-02-09', 69),
('2020-02-09', 32),
('2020-02-09', 83),
('2020-02-09', 13),
('2020-02-09', 51),
('2020-02-09', 62),
('2020-02-09', 23),
('2020-02-09', 54),
('2020-02-10', 61),
('2020-02-10', 35),
('2020-02-10', 47),
('2020-02-10', 69),
('2020-02-10', 32),
('2020-02-10', 51),
('2020-02-11', 62),
('2020-02-11', 23),
('2020-02-11', 54),
('2020-02-11', 32),
('2020-02-11', 83),
('2020-02-12', 13),
('2020-02-12', 51),
('2020-02-13', 62),
('2020-02-13', 23),
('2020-02-13', 54),
('2020-02-13', 61),
('2020-02-13', 35),
('2020-02-13', 47),
('2020-02-14', 69),
('2020-02-14', 32),
('2020-02-14', 83),
('2020-02-14', 13),
('2020-02-14', 54),
('2020-02-14', 61),
('2020-02-14', 35),
('2020-02-14', 47),
('2020-02-15', 69),
('2020-02-15', 32),
('2020-02-15', 83),
('2020-02-15', 13),
('2020-02-15', 51),
('2020-02-16', 62),
('2020-02-16', 23),
('2020-02-16', 54),
('2020-02-16', 61),
('2020-02-16', 61),
('2020-02-16', 35),
('2020-02-16', 47),
('2020-02-16', 69),
('2020-02-16', 32),
('2020-02-16', 83),
('2020-02-16', 13),
('2020-02-16', 51),
('2020-02-16', 62),
('2020-02-17', 23),
('2020-02-18', 35),
('2020-02-18', 47),
('2020-02-18', 69),
('2020-02-18', 32),
('2020-02-18', 83),
('2020-02-18', 13),
('2020-02-18', 51),
('2020-02-18', 62),
('2020-02-18', 23),
('2020-02-18', 54),
('2020-02-18', 61),
('2020-02-18', 35),
('2020-02-18', 47),
('2020-02-18', 69),
('2020-02-18', 32),
('2020-02-19', 51),
('2020-02-19', 62),
('2020-02-19', 23),
('2020-02-19', 54),
('2020-02-19', 61),
('2020-02-19', 35),
('2020-02-20', 47),
('2020-02-20', 69),
('2020-02-20', 32),
('2020-02-20', 83),
('2020-02-20', 13),
('2020-02-20', 51),
('2020-02-20', 62),
('2020-02-20', 23),
('2020-02-20', 54),
('2020-02-20', 61),
('2020-02-20', 35),
('2020-02-20', 47),
('2020-02-20', 69),
('2020-02-20', 32),
('2020-02-21', 83),
('2020-02-21', 13),
('2020-02-21', 54),
('2020-02-21', 61),
('2020-02-21', 35),
('2020-02-21', 47),
('2020-02-21', 69),
('2020-02-21', 32),
('2020-02-21', 83),
('2020-02-21', 13),
('2020-02-21', 51),
('2020-02-21', 62),
('2020-02-21', 23),
('2020-02-21', 54),
('2020-02-21', 61),
('2020-02-21', 35),
('2020-02-21', 47),
('2020-02-21', 69),
('2020-02-21', 32),
('2020-02-21', 83),
('2020-02-21', 13),
('2020-02-22', 54),
('2020-02-22', 61),
('2020-02-22', 35),
('2020-02-22', 47),
('2020-02-22', 69),
('2020-02-22', 32),
('2020-02-22', 83),
('2020-02-23', 13),
('2020-02-23', 51),
('2020-02-23', 62),
('2020-02-23', 23),
('2020-02-23', 54),
('2020-02-23', 61),
('2020-02-24', 35),
('2020-02-24', 47),
('2020-02-24', 69),
('2020-02-24', 32),
('2020-02-25', 35),
('2020-02-25', 47),
('2020-02-25', 69),
('2020-02-25', 32),
('2020-02-25', 83),
('2020-02-25', 13),
('2020-02-25', 51),
('2020-02-25', 62),
('2020-02-25', 23),
('2020-02-25', 54),
('2020-02-25', 61),
('2020-02-26', 35),
('2020-02-26', 47),
('2020-02-26', 69),
('2020-02-26', 32),
('2020-02-26', 83),
('2020-02-26', 13),
('2020-02-26', 54),
('2020-02-26', 61),
('2020-02-27', 35),
('2020-02-27', 47),
('2020-02-27', 69),
('2020-02-27', 32),
('2020-02-27', 83),
('2020-02-27', 13),
('2020-02-27', 51),
('2020-02-27', 62),
('2020-02-28', 69),
('2020-02-28', 32),
('2020-02-29', 69),
('2020-02-29', 32),
('2020-02-29', 83);
and in a SQL Fiddle.
WITH cte AS ( SELECT someValue,
DENSE_RANK() OVER (PARTITION BY DATE_FORMAT(dataDt, '%Y%m'),
DAY(dataDT) <= 15
ORDER BY dataDT) businessDayNumber
FROM sampleData
WHERE DAYOFWEEK(dataDT) BETWEEN 2 AND 6 )
SELECT businessDayNumber, COUNT(someValue)
FROM cte
GROUP BY businessDayNumber;
fiddle

Rank by calculated variable in MySQL

The following table is for practice only. I will use the code on a much larger table.
SELECT *
FROM price_practice;
gives
id company dt price
'16', 'Amex', '2015-07-01', '5.00'
'17', 'Amex', '2015-07-02', '5.10'
'18', 'Amex', '2015-07-03', '5.00'
'19', 'Amex', '2015-07-06', '5.88'
'20', 'Amex', '2015-07-07', '4.21'
'21', 'Citi', '2015-07-01', '1.00'
'22', 'Citi', '2015-07-02', '1.10'
'23', 'Citi', '2015-07-03', '1.00'
'24', 'Citi', '2015-07-06', '0.88'
'25', 'Citi', '2015-07-07', '1.01'
'26', 'Amex', '2015-07-08', '5.23'
'27', 'Amex', '2015-07-09', '5.35'
'28', 'Amex', '2015-07-10', '5.55'
'29', 'Amex', '2015-07-13', '5.88'
'30', 'Amex', '2015-07-14', '6.01'
'31', 'Citi', '2015-07-08', '0.95'
'32', 'Citi', '2015-07-09', '0.83'
'33', 'Citi', '2015-07-10', '0.79'
'34', 'Citi', '2015-07-13', '0.72'
'35', 'Citi', '2015-07-14', '0.59'
The following snippet calculates the percentage change in price from one date to the next.
SELECT x.id, x.company, x.dt, x.price, (x.price - y.price)/y.price AS 'Change'
FROM
(
SELECT a.id AS aid, MAX(b.id) AS aPrevid
FROM price_practice a
INNER JOIN price_practice b
WHERE a.id > b.id
AND a.company = b.company
GROUP BY a.id
) Sub1
INNER JOIN price_practice x ON Sub1.aid = x.id
INNER JOIN price_practice y ON Sub1.aPrevid = y.id
ORDER BY x.id DESC
As intended, it returns
id company dt price Change
'35', 'Citi', '2015-07-14', '0.59', '-0.180556'
'34', 'Citi', '2015-07-13', '0.72', '-0.088608'
'33', 'Citi', '2015-07-10', '0.79', '-0.048193'
'32', 'Citi', '2015-07-09', '0.83', '-0.126316'
'31', 'Citi', '2015-07-08', '0.95', '-0.059406'
'30', 'Amex', '2015-07-14', '6.01', '0.022109'
'29', 'Amex', '2015-07-13', '5.88', '0.059459'
'28', 'Amex', '2015-07-10', '5.55', '0.037383'
'27', 'Amex', '2015-07-09', '5.35', '0.022945'
'26', 'Amex', '2015-07-08', '5.23', '0.242280'
'25', 'Citi', '2015-07-07', '1.01', '0.147727'
'24', 'Citi', '2015-07-06', '0.88', '-0.120000'
'23', 'Citi', '2015-07-03', '1.00', '-0.090909'
'22', 'Citi', '2015-07-02', '1.10', '0.100000'
'20', 'Amex', '2015-07-07', '4.21', '-0.284014'
'19', 'Amex', '2015-07-06', '5.88', '0.176000'
'18', 'Amex', '2015-07-03', '5.00', '-0.019608'
'17', 'Amex', '2015-07-02', '5.10', '0.020000'
The following snippet does something entirely different: it ranks observations by price for every company seperately.
SELECT (
CASE company
WHEN #curType
THEN #curRow := #curRow + 1
ELSE #curRow := 1 AND #curType := company END
) + 1 AS rank,
id,
company,
dt,
price
FROM price_practice,
(SELECT #curRow := 0, #curType := '') r
ORDER BY company DESC, price DESC;
As intended, it returns
rank id company dt price
'1', '22', 'Citi', '2015-07-02', '1.10'
'2', '25', 'Citi', '2015-07-07', '1.01'
'3', '23', 'Citi', '2015-07-03', '1.00'
'4', '21', 'Citi', '2015-07-01', '1.00'
'5', '31', 'Citi', '2015-07-08', '0.95'
'6', '24', 'Citi', '2015-07-06', '0.88'
'7', '32', 'Citi', '2015-07-09', '0.83'
'8', '33', 'Citi', '2015-07-10', '0.79'
'9', '34', 'Citi', '2015-07-13', '0.72'
'10', '35', 'Citi', '2015-07-14', '0.59'
'1', '30', 'Amex', '2015-07-14', '6.01'
'2', '19', 'Amex', '2015-07-06', '5.88'
'3', '29', 'Amex', '2015-07-13', '5.88'
'4', '28', 'Amex', '2015-07-10', '5.55'
'5', '27', 'Amex', '2015-07-09', '5.35'
'6', '26', 'Amex', '2015-07-08', '5.23'
'7', '17', 'Amex', '2015-07-02', '5.10'
'8', '18', 'Amex', '2015-07-03', '5.00'
'9', '16', 'Amex', '2015-07-01', '5.00'
'10', '20', 'Amex', '2015-07-07', '4.21'
The question is:
How do I rank observations by percentage change?
I imagine you can save the percentage change data in a new column and then rank it, but I suspect this is not the best method. I will do many similar calculations (eg weekly % change, variance etc), and I have around 3,000,000 observations, so the table would grow big quickly. If this is the only way to do it, I will, but I think combining the two snippets above to calculate percentage change and rank in one go would be better. Or what do you think?
As I'm sure you can tell from my question, I'm a beginner at MySQL. Any advise on how to proceed is appreciated!

MySQL change data set with table JOIN .. Is what I am asking possible?

This is a complicated question, so please bear with me. I am using 3 different tables to make 1 result set. They are as follows:
customer_address_entity
entity_id | entity_type_id | attribute_set_id | increment_id | parent_id | create_at | update_at | is_active
customer_entity_int
value_id | entity_type_id | attribute_id | entity_id | value
customer_address_entity_varchar
value_id | entity_type_id | attribute_id | entity_id | value
Ok, so now you have the structure, here is my SQL call I have built so far:
SELECT CAE.entity_id,
CEI.value AS default_entity_id,
CAEV.attribute_id,
CAEV.value
FROM customer_address_entity AS CAE
JOIN customer_entity_int AS CEI
ON CEI.entity_id = CAE.parent_id
AND CEI.attribute_id = '13'
JOIN customer_address_entity_varchar AS CAEV
ON CAEV.entity_id = CAE.entity_id
WHERE CAE.parent_id = '2328'
AND CAE.is_active = 1
This outputs the following example dataset:
ID default att value
'1567', '1567', '19', 'John'
'1567', '1567', '21', 'Doe'
'1567', '1567', '23', 'Johns Company'
'1567', '1567', '25', 'Johns City'
'1567', '1567', '26', 'Johns Country'
'1567', '1567', '27', 'Johns State'
'1567', '1567', '29', 'Johns Zip Code'
'1567', '1567', '30', 'Johns Phone'
'1567', '1567', '31', 'Johns Fax'
'1568', '1567', '19', 'Jane'
'1568', '1567', '21', 'Doe'
'1568', '1567', '23', 'Janes Company'
'1568', '1567', '25', 'Janes City'
'1568', '1567', '26', 'Janes Country'
'1568', '1567', '27', 'Janes State'
'1568', '1567', '29', 'Janes Zip'
'1568', '1567', '30', 'Janes Phone'
'1568', '1567', '31', 'Janes Fax'
'1569', '1567', '19', 'Frank'
'1569', '1567', '21', 'Frunz'
'1569', '1567', '23', 'Franks Company'
'1569', '1567', '25', 'Franks City'
'1569', '1567', '26', 'Franks Country'
'1569', '1567', '27', 'Franks State'
'1569', '1567', '29', 'Franks Zip'
'1569', '1567', '30', 'Franks Phone'
'1569', '1567', '31', 'Franks Fax'
The final part of this code, I would like to create X number (in this case 3) of ROWS based on the number UNIQUE entity_id (Column 1 in returned data set ie .1567,1568 and 1569). The intended end result being:
'1567', '1567', 'John', 'Doe', 'Johns Company', 'Johns City', 'Johns State', 'Johns Zip Code', 'Johns Phone', 'Johns Fax'
'1568', '1567', 'Jane', 'Doe', 'Janes Company', ... etc
'1569', '1567', 'Frank', 'Franz', 'Franks Comapny', ... etc
Is this even possible?
EDIT Thanks to Gordon Linoff -- The answer is elegant and simple! I threw in a few edits of my own, but will be accepting Gordons answer and voting it up. Here are the edits I made, which work beautifully!!
select entity_id,
if(entity_id = default_entity_id, 'true', 'false') as default_entity,
max(case when attr = '19' then `value` end) as `FirstName`,
max(case when attr = '21' then `value` end) as `LastName`,
max(case when attr = '23' then `value` end) as `CompanyName`,
max(case when attr = '25' then `value` end) as `City`,
max(case when attr = '27' then `value` end) as `State`,
max(case when attr = '29' then `value` end) as `ZipCode`,
max(case when attr = '30' then `value` end) as `PhoneNumber`,
max(case when attr = '31' then `value` end) as `Fax`
from (SELECT CAE.entity_id, CEI.value AS default_entity_id, CAEV.attribute_id AS attr, CAEV.value
FROM customer_address_entity CAE
JOIN customer_entity_int CEI
ON CEI.entity_id = CAE.parent_id
AND CEI.attribute_id = '13'
JOIN customer_address_entity_varchar CAEV
ON CAEV.entity_id = CAE.entity_id
WHERE CAE.parent_id = '2328'
AND CAE.is_active = 1
) as t
group by entity_id
You can do this with a group by:
select entity_id,
MAX(default) as default,
max(case when att = '19' then value end) as FirstName,
max(case when att = '21' then value end) as LastName,
max(case when att = '23' then value end) as CompanyName,
max(case when att = '25' then value end) as City,
max(case when att = '27' then value end) as State,
max(case when att = '29' then value end) as ZipCode,
max(case when att = '30' then value end) as PhoneNumber,
max(case when att = '31' then value end) as Fax
from (SELECT CAE.entity_id, CEI.value AS default_entity_id, CAEV.attribute_id, CAEV.value
FROM customer_address_entity CAE
JOIN customer_entity_int CEI
ON CEI.entity_id = CAE.parent_id
AND CEI.attribute_id = '13'
JOIN customer_address_entity_varchar CAEV
ON CAEV.entity_id = CAE.entity_id
WHERE CAE.parent_id = '2328'
AND CAE.is_active = 1
) t
group by entity_id
This process is called pivoting and aggreagtion is one solution (some databases have a pivot keyword for this). This assumes that each value appears only once per entity. Also, if a value is not present, it will get the value NULL.
As #Gordon's answer points out, this is what is known as a PIVOT but MySQL does not have that function. In MySQL you can use an aggregate function with a CASE statement.
You can hard-code the values if they are all known:
SELECT CAE.entity_id,
CEI.value AS default_entity_id,
MAX(case when CAEV.attribute_id = 19 then CAEV.value else null end) FirstName,
MAX(case when CAEV.attribute_id = 21 then CAEV.value else null end) LastName,
MAX(case when CAEV.attribute_id = 23 then CAEV.value else null end) Company,
MAX(case when CAEV.attribute_id = 25 then CAEV.value else null end) City,
MAX(case when CAEV.attribute_id = 26 then CAEV.value else null end) Country,
MAX(case when CAEV.attribute_id = 27 then CAEV.value else null end) State,
MAX(case when CAEV.attribute_id = 29 then CAEV.value else null end) ZipCode,
MAX(case when CAEV.attribute_id = 30 then CAEV.value else null end) Phone,
MAX(case when CAEV.attribute_id = 31 then CAEV.value else null end) Fax
FROM customer_address_entity AS CAE
JOIN customer_entity_int AS CEI
ON CEI.entity_id = CAE.parent_id
AND CEI.attribute_id = '13'
JOIN customer_address_entity_varchar AS CAEV
ON CAEV.entity_id = CAE.entity_id
WHERE CAE.parent_id = '2328'
AND CAE.is_active = 1
GROUP BY CAE.entity_id, CEI.value;
Or you can use a prepared statement to implement dynamic sql. I would assume that you have a table that ties each attribute value to the name of the attribute:
SET #sql = NULL;
SELECT
GROUP_CONCAT(DISTINCT
CONCAT(
'max(case when CAEV.attribute_id = ''',
attribute_id,
''' then CAEV.value else null end) AS `',
attribute_id, '`'
)
) INTO #sql
FROM customer_address_entity_varchar;
SET #sql = CONCAT('SELECT CAE.entity_id,
CEI.value AS default_entity_id, ', #sql, '
FROM customer_address_entity AS CAE
JOIN customer_entity_int AS CEI
ON CEI.entity_id = CAE.parent_id
AND CEI.attribute_id = ''13' '
JOIN customer_address_entity_varchar AS CAEV
ON CAEV.entity_id = CAE.entity_id
WHERE CAE.parent_id = ''2328''
AND CAE.is_active = 1
GROUP BY CAE.entity_id, CEI.value');
PREPARE stmt FROM #sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;

MySQL count(*) everyday in a month returns [BLOB-2B] instead of number

I'm going to count every rows each day in a month with a specific user id(vwr_tid). Everything works fine - the result shows up in a table but one thing. A count each days doesn't comes up. It becomes [BLOB-xx] instead of number of rows that day. Here is my code :
SELECT MONTH_v, YEAR_V,
GROUP_CONCAT(IF(day_v=1, views, null)) AS '1',
GROUP_CONCAT(IF(day_v=2, views, null)) AS '2',
GROUP_CONCAT(IF(day_v=3, views, null)) AS '3',
GROUP_CONCAT(IF(day_v=4, views, null)) AS '4',
GROUP_CONCAT(IF(day_v=5, views, null)) AS '5',
GROUP_CONCAT(IF(day_v=6, views, null)) AS '6',
GROUP_CONCAT(IF(day_v=7, views, null)) AS '7',
GROUP_CONCAT(IF(day_v=8, views, null)) AS '8',
GROUP_CONCAT(IF(day_v=9, views, null)) AS '9',
GROUP_CONCAT(IF(day_v=10, views, null)) AS '10',
GROUP_CONCAT(IF(day_v=11, views, null)) AS '11',
GROUP_CONCAT(IF(day_v=12, views, null)) AS '12',
GROUP_CONCAT(IF(day_v=13, views, null)) AS '13',
GROUP_CONCAT(IF(day_v=14, views, null)) AS '14',
GROUP_CONCAT(IF(day_v=15, views, null)) AS '15',
GROUP_CONCAT(IF(day_v=16, views, null)) AS '16',
GROUP_CONCAT(IF(day_v=17, views, null)) AS '17',
GROUP_CONCAT(IF(day_v=18, views, null)) AS '18',
GROUP_CONCAT(IF(day_v=19, views, null)) AS '19',
GROUP_CONCAT(IF(day_v=20, views, null)) AS '20',
GROUP_CONCAT(IF(day_v=21, views, null)) AS '21',
GROUP_CONCAT(IF(day_v=22, views, null)) AS '22',
GROUP_CONCAT(IF(day_v=23, views, null)) AS '23',
GROUP_CONCAT(IF(day_v=24, views, null)) AS '24',
GROUP_CONCAT(IF(day_v=25, views, null)) AS '25',
GROUP_CONCAT(IF(day_v=26, views, null)) AS '26',
GROUP_CONCAT(IF(day_v=27, views, null)) AS '27',
GROUP_CONCAT(IF(day_v=28, views, null)) AS '28',
GROUP_CONCAT(IF(day_v=29, views, null)) AS '29',
GROUP_CONCAT(IF(day_v=30, views, null)) AS '30',
GROUP_CONCAT(IF(day_v=31, views, null)) AS '31'
FROM
(
SELECT DAY(vwr_date) AS day_v,
MONTH(vwr_date) AS MONTH_v,
Year(vwr_date) AS YEAR_V,
date(vwr_date) AS date_v,
count(vwr_id) AS views
FROM car_viewer
WHERE Year(vwr_date)='2012' AND vwr_tid='18'
GROUP BY date_v
) as viewz
GROUP BY MONTH_v, YEAR_V
ORDER BY MONTH_v, YEAR_V DESC
The script is updated from #rs : Count record each day of a month from mysql into html table
The result.
I don't see the need for GROUP_CONCAT(), when SUM() will serve your needs:
SELECT MONTH_v, YEAR_V,
SUM(IF(day_v=1, views, 0)) AS '1',
SUM(IF(day_v=2, views, 0)) AS '2',
SUM(IF(day_v=3, views, 0)) AS '3',
SUM(IF(day_v=4, views, 0)) AS '4',
SUM(IF(day_v=5, views, 0)) AS '5',
SUM(IF(day_v=6, views, 0)) AS '6',
SUM(IF(day_v=7, views, 0)) AS '7',
SUM(IF(day_v=8, views, 0)) AS '8',
SUM(IF(day_v=9, views, 0)) AS '9',
SUM(IF(day_v=10, views, 0)) AS '10',
SUM(IF(day_v=11, views, 0)) AS '11',
SUM(IF(day_v=12, views, 0)) AS '12',
SUM(IF(day_v=13, views, 0)) AS '13',
SUM(IF(day_v=14, views, 0)) AS '14',
SUM(IF(day_v=15, views, 0)) AS '15',
SUM(IF(day_v=16, views, 0)) AS '16',
SUM(IF(day_v=17, views, 0)) AS '17',
SUM(IF(day_v=18, views, 0)) AS '18',
SUM(IF(day_v=19, views, 0)) AS '19',
SUM(IF(day_v=20, views, 0)) AS '20',
SUM(IF(day_v=21, views, 0)) AS '21',
SUM(IF(day_v=22, views, 0)) AS '22',
SUM(IF(day_v=23, views, 0)) AS '23',
SUM(IF(day_v=24, views, 0)) AS '24',
SUM(IF(day_v=25, views, 0)) AS '25',
SUM(IF(day_v=26, views, 0)) AS '26',
SUM(IF(day_v=27, views, 0)) AS '27',
SUM(IF(day_v=28, views, 0)) AS '28',
SUM(IF(day_v=29, views, 0)) AS '29',
SUM(IF(day_v=30, views, 0)) AS '30',
SUM(IF(day_v=31, views, 0)) AS '31'
FROM
(
SELECT DAY(vwr_date) AS day_v,
MONTH(vwr_date) AS MONTH_v,
Year(vwr_date) AS YEAR_V,
date(vwr_date) AS date_v,
count(vwr_id) AS views
FROM car_viewer
WHERE Year(vwr_date)='2012' AND vwr_tid='18'
GROUP BY date_v
) as viewz
GROUP BY MONTH_v, YEAR_V
ORDER BY MONTH_v, YEAR_V DESC;
See http://sqlfiddle.com/#!2/75aa8/1 for a working example.