Another Pivot with Dynamic Fields - sql-server-2008

I have a view defined as follows:
Select IdSezioneDonatore, NumeroDonatore, Anno, DonaAnno
From dbo.DonazioniAnnue
Where (Anno>= YEAR(GETDATE())-4)
I will return the following fields and values:
IdSezioneDonatore NumeroDonatore Anno DonaAnno
2850 3624 2009 3
2850 5585 2009 1
2850 3624 2010 2
2850 5586 2010 1
2850 3624 2011 1
2850 5586 2011 1
.... ..... .... ...
How can I do to get a result like this?:
IdSezioneDonatore NumeroDonatore Anno 2009 Anno2010 Anno 2011 Anno 2012 Anno 2013
2850 3624 3 2 0 0 0
2850 5585 1 0 0 0 1
2850 5586 0 1 1 0 1
.... ..... ... ... ... ... ...
Tank far all

DECLARE #QUERY NVARCHAR(MAX)
DECLARE #Annos TABLE(Anno INT)
INSERT INTO #Annos
SELECT DISTINCT Anno FROM TEST
DECLARE #Annuals VARCHAR(MAX)
DECLARE #Annuals_New VARCHAR(MAX)
SELECT #Annuals = COALESCE(+#Annuals+'],[' ,'[') +CONVERT(VARCHAR(10),A.Anno)
FROM #Annos A
SET #Annuals_New = #Annuals+']'
SELECT #QUERY='SELECT * FROM TEST PIVOT (MAX(DonaAnno) FOR Anno IN ('+#Annuals_New+'))AS [pivot]'
EXEC SP_EXECUTESQL #QUERY
This would help. ;-)

Related

statsmodels OLS gives parameters despite perfect multicollinearity

Assume the following df:
ib c d1 d2
0 1.14 1 1 0
1 1.0 1 1 0
2 0.71 1 1 0
3 0.6 1 1 0
4 0.66 1 1 0
5 1.0 1 1 0
6 1.26 1 1 0
7 1.29 1 1 0
8 1.52 1 1 0
9 1.31 1 1 0
10 0.89 1 0 1
d1 and d2 are perfectly colinear. Now I estimate the following regression model:
import statsmodels.api as sm
reg = sm.OLS(df['ib'], df[['c', 'd1', 'd2']]).fit().summary()
reg
This gives me the following output:
<class 'statsmodels.iolib.summary.Summary'>
"""
OLS Regression Results
==============================================================================
Dep. Variable: ib R-squared: 0.087
Model: OLS Adj. R-squared: -0.028
Method: Least Squares F-statistic: 0.7590
Date: Thu, 17 Nov 2022 Prob (F-statistic): 0.409
Time: 12:19:34 Log-Likelihood: -1.5470
No. Observations: 10 AIC: 7.094
Df Residuals: 8 BIC: 7.699
Df Model: 1
Covariance Type: nonrobust
===============================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------
c 0.7767 0.111 7.000 0.000 0.521 1.033
d1 0.2433 0.127 1.923 0.091 -0.048 0.535
d2 0.5333 0.213 2.499 0.037 0.041 1.026
==============================================================================
Omnibus: 0.257 Durbin-Watson: 0.760
Prob(Omnibus): 0.879 Jarque-Bera (JB): 0.404
Skew: 0.043 Prob(JB): 0.817
Kurtosis: 2.019 Cond. No. 8.91e+15
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 2.34e-31. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
"""
However, including c, d1 and d2 represents the well known dummy variable trap which, from my understanding, should make it impossible to estimate the model. Why is this not the case here?

Apply a function for all columns of a table

I am trying to make a calculation in MySQL for all columns of a table.
Table: bev
Jahr GKZ gesamt A B C
2017 1111000 88.519 855 888 814
2017 1112000 247.943 2.414 2.379 2.262
2017 1113000 253.106 2.290 2.343 2.289
2017 1113004 43.392 408 416 403
2017 1113008 12.383 137 134 124
2017 1113012 27.106 252 252 249
2017 1113016 41.673 391 410 398
2017 1113020 39.585 364 391 373
2017 1113024 10.075 63 73 74
2017 1113028 24.083 199 205 209
2017 1113032 8.745 63 77 65
2017 1113036 18.143 170 170 143
2017 1113040 27.921 243 215 251
Table: ja
GKZ Jahr ja_name
1001000 2017 K X
1002000 2017 K Y
5370000 2017 L Z
5370004 2017 Z1
5370012 2017 Z2
5370016 2017 Z3
5370020 2017 Z4
I already got the calculation for one column (the first one: gesamt) in a function:
CREATE DEFINER=`DB`#`%` FUNCTION `Total_Amount_Funct`(
bev_ID int(11),
bev_Total int(11),
ja_name VARCHAR(255),
ja_jahr int(11)) RETURNS int(11)
DETERMINISTIC
BEGIN
DECLARE Total_Amount int(11);
DECLARE kreis int(11);
DECLARE Total_Sum int(11);
SET kreis = (bev_ID / 1000) ;
SET Total_Sum = (SELECT SUM(b.gesamt)
FROM bev as b, ja as j
WHERE b.GKZ = j.GKZ
AND b.Jahr = j.Jahr
AND j.Jahr = ja_jahr
AND (MOD(b.GKZ, 1000) <> 0)
AND (MOD(b.GKZ, 1000) != 0)
AND NOT (MOD(b.GKZ, 1000) = 0)
AND (b.GKZ BETWEEN (kreis*1000 + 1) AND (((kreis+1)*1000)-1))
AND j.ja_name IS NOT NULL);
SET Total_Amount = bev_Total-Total_Sum;
RETURN (Total_Amount);
END
This function can be called with the following select:
SELECT DISTINCT
bev.GKZ,
bev.Jahr,
bev.gesamt,
CASE WHEN (bev.GKZ % 1000 = 0) THEN
coalesce(Total_Amount_Funct(bev.GKZ, bev.gesamt, ja.ja_name, bev.Jahr), bev.gesamt)
ELSE bev.gesamt
END AS bev,
ja.ja_name
FROM
ja, bev
WHERE
bev.GKZ = ja.GKZ
AND bev.Jahr = ja.Jahr;
I really would like to apply the function for all columns of the table. Maybe as a stored procedure? Maybe as dynamic columns. I do not know. I have solved this problem in MS SQL with dynamic columns but I have the feeling that translating it will take more time than trying to complete the function as a Stored Procedure.
The name of the columns can be obtained by:
SELECT column_name
FROM information_schema.columns
WHERE table_name='bev'
and column_name not in ('Jahr','GKZ');
As Result it should be:
GKZ Jahr gesamt bev ja_name
1111000 2017 88.519 88.519 K X
1112000 2017 247.943 247.943 K Y
1113000 2017 253.106 101.350 L Z
1113004 2017 43.392 43.392 Z1
1113012 2017 27.106 27.106 Z2
1113016 2017 41.673 41.673 Z3
1113020 2017 39.585 39.585 Z4
As you are using the column only in the SUM, you could pass the column name as parameter and use CASE-statement to pick the column accordingly. Something like:
CREATE FUNCTION `Total_Amount_Funct`(
bev_ID decimal(8,3),
bev_Total int,
ja_name VARCHAR(255),
ja_jahr int,
in_col varchar(10)
)
RETURNS int
DETERMINISTIC
BEGIN
DECLARE Total_Amount int(11);
DECLARE Total_Sum int(11);
SELECT
SUM(
case
when in_col='gesamt' then b.gesamt
when in_col='A' then b.A
when in_col='B' then b.B
when in_col='C' then b.C
end
) into Total_Sum
FROM bev as b
join ja as j on b.GKZ = j.GKZ AND b.Jahr = j.Jahr
WHERE
MOD(b.GKZ, 1000) != 0
AND b.GKZ BETWEEN bev_ID+1 AND bev_ID+999
AND j.ja_name IS NOT NULL
SET Total_Amount = bev_Total-Total_Sum;
RETURN (Total_Amount);
END
And then call the function with column name and correct value:
Total_Amount_Funct(bev.GKZ, bev.gesamt, ja.ja_name, bev.Jahr, 'gesamt'),
Total_Amount_Funct(bev.GKZ, bev.A, ja.ja_name, bev.Jahr, 'A')
...
Note that calling a function which makes a query will serialize your SQL (calling the function on each row causes the function query to be executed on each row). This will hurt the query performance.
slaakso,
thank you very much for your answer. You are from today my idol :-).
Thanks Thanks.
I have maybe one performance Question.
It is posible to write the function for all columns of the table bev. We can copy the column names in a temporary table:
CREATE TEMPORARY TABLE listColumns(
Columns_ID MEDIUMINT NOT NULL AUTO_INCREMENT ,
Columnsnamen varchar(256) ,
PRIMARY KEY (Columns_ID)
);
Readed from the System Information:
insert into listColumns (Columnsnamen)
SELECT column_name
FROM information_schema.columns
WHERE table_name='bev'
and column_name not in ('Jahr','GKZ');
This Table looks like:
Columns_ID Columnsnamen
1 gesamt
2 A
3 B
4 C
5
6
In such a way, that it is not necessary to mentione every column name (the table contains about 100 columns). Maybe with a cursor over the Columns_ID?
It woul be great if you have another advice for me.
Thank you and kind regads
Ana

Same stored procedure, same tables, different DB gives different result

I have stored procedure which gives me expected result when I run it on MySQL 5. But when I tried to run same procedure on MariaDB 10.1.22, it gives me a different result.
Here is my stored procedure -
DELIMITER ;;
CREATE DEFINER=`mconnect_admin`#`%` PROCEDURE `TestCumulative`(IN
start_date TIMESTAMP,IN end_date TIMESTAMP,IN duration TEXT,IN
mno_id TEXT,IN profile_type TEXT,IN timezone TEXT)
BEGIN
SET #provisioned = 0;
SET #downloaded = 0;
SET #excludeProfileFilter = FALSE;
SET #hourlyReport = FALSE;
SET #monthlyReport = FALSE;
SET #sdate = start_date;
DROP TABLE IF EXISTS tempDates;
CREATE TEMPORARY TABLE IF NOT EXISTS tempDates(timeRange
VARCHAR(50));
DELETE FROM tempDates;
IF(profile_type = '')
THEN
SET profile_type = null;
SET #excludeProfileFilter = TRUE;
END IF;
IF (duration = 'lastDay')
THEN
SET #hourlyReport=true;
END IF;
IF (duration = 'lastYear')
THEN
SET #monthlyReport = TRUE;
END IF;
WHILE #sdate <= end_date DO
IF (#hourlyReport = TRUE)
THEN
INSERT INTO tempDates (timeRange) VALUES (HOUR(#sdate) + 1);
SET #sdate = date_add(#sdate, INTERVAL 1 HOUR);
ELSE IF(#monthlyReport = TRUE)
THEN
INSERT INTO tempDates (timeRange) VALUES (MONTH(#sdate));
SET #sdate = date_add(#sdate, INTERVAL 1 MONTH);
ELSE
INSERT INTO tempDates (timeRange) VALUES (DATE(#sdate));
SET #sdate = date_add(#sdate, INTERVAL 1 DAY);
END IF;
END IF;
END WHILE ;
SELECT CASE WHEN r.DateRange IS NULL THEN (#provisioned :=
#provisioned) ELSE (#provisioned := #provisioned + r.Provisioned)
END AS Provisioned,
CASE WHEN r.DateRange IS NULL THEN (#downloaded := #downloaded) ELSE
(#downloaded := #downloaded + r.Downloaded) END AS Downloaded,
CASE WHEN r.DateRange IS NULL THEN d.timeRange ELSE r.DateRange END
AS DateRange FROM (
SELECT sum(result.Provisioned) as Provisioned,
sum(result.Downloaded) AS Downloaded, result.DateRange FROM (
SELECT
1 As Provisioned,
0 AS Downloaded,
CASE WHEN #hourlyReport=TRUE
THEN HOUR(CONVERT_TZ(s.provisioning_date,"+00:00",timezone))
WHEN #monthlyReport=TRUE
THEN MONTH(CONVERT_TZ(s.provisioning_date,"+00:00",timezone))
ELSE DATE(CONVERT_TZ(s.provisioning_date,"+00:00",timezone))
END
AS DateRange
FROM subscription s
INNER JOIN profile_type p ON p.id = s.profile_type
WHERE s.mno_id = mno_id
AND (#excludeProfileFilter=true or p.display_name=profile_type OR p.subscription_type=profile_type)
AND DATE(CONVERT_TZ(s.provisioning_date,"+00:00",timezone)) BETWEEN DATE(CONVERT_TZ(start_date,"+00:00",timezone)) AND DATE(CONVERT_TZ(end_date,"+00:00",timezone))
UNION ALL
SELECT
0 As Provisioned,
1 As Downloaded,
CASE WHEN #hourlyReport=TRUE
THEN COALESCE(HOUR(CONVERT_TZ(r.end_download_date,"+00:00",timezone)),HOUR(CONVERT_TZ(r.last_update,"+00:00",timezone)))
WHEN #monthlyReport=TRUE
THEN COALESCE(MONTH(CONVERT_TZ(r.end_download_date,"+00:00",timezone)),MONTH(CONVERT_TZ(r.last_update,"+00:00",timezone)))
ELSE COALESCE(DATE(CONVERT_TZ(r.end_download_date,"+00:00",timezone)),DATE(CONVERT_TZ(r.last_update,"+00:00",timezone)))
END
AS DateRange
FROM subscription s
INNER JOIN profile_type p ON p.id = s.profile_type
LEFT JOIN rsp_session r ON r.profile_iccid = s.iccid
WHERE s.mno_id = mno_id
AND (#excludeProfileFilter=TRUE OR p.display_name=profile_type OR p.subscription_type=profile_type)
AND COALESCE(DATE(CONVERT_TZ(r.end_download_date,"+00:00",timezone)),DATE(CONVERT_TZ(r.last_update,"+00:00",timezone))) BETWEEN DATE(CONVERT_TZ(start_date,"+00:00",timezone)) AND DATE(CONVERT_TZ(end_date,"+00:00",timezone)) AND s.status IN('INSTALLED','ENABLED','DELETED')
) result GROUP BY result.DateRange
) r RIGHT OUTER JOIN tempDates d ON r.DateRange = d.timeRange;
END;;
DELIMITER ;
The result I am getting in MaroiaDB 10.1.22, which is not the correct one is -
2 0 2017-11-02
5 10 2017-11-03
32 16 2017-11-06
51 34 2017-11-07
64 42 2017-11-08
79 47 2017-11-09
79 48 2017-11-10
102 61 2017-11-13
116 61 2017-11-14
128 68 2017-11-15
145 71 2017-11-16
157 82 2017-11-17
196 95 2017-11-20
254 111 2017-11-21
273 118 2017-11-22
313 134 2017-11-23
323 144 2017-11-24
363 149 2017-11-27
368 152 2017-11-28
371 152 2017-11-29
403 160 2017-11-30
403 160 2017-11-01
403 160 2017-11-04
403 160 2017-11-05
403 160 2017-11-11
403 160 2017-11-12
403 160 2017-11-18
403 160 2017-11-19
403 160 2017-11-25
403 160 2017-11-26
It should be in order.
Can anyone tell what is the wrong? Or its DB issue?
Thanks in advance.
You have no order by in any of your queries. The result set can be in any order, because result sets without an order by (like tables) are unordered sets.
Hence, the database is correct. Your understanding is missing this important fact about SQL.
Add the order by that you want and the result set will be appropriately ordered in any version of the database that you use.

Auto-generate custom ID using mysql workbench?

I want it to be in the format of "YYYY-###" where it gives the year and number ### incriminating starting from 1. ex: 2016-001, 2016-002, 2016-003,...
You can use a Numbers/Tally table if you like. I use a UDF to dynamically generate number ranges. As you can see in the function call I set the range from 1 to 12 with an increment of 1
Declare #Table table (SomeField int)
Insert into #Table values
(2015),(2016)
Select StringValue =cast(SomeField as varchar(25))+'-'+right('000'+cast(RetVal as varchar(25)),3)
,SomeField
,RetVal
From #Table A
Join (Select RetVal=cast(RetVal as int) from [dbo].[udf-Create-Range-Number](1,12,1)) B
on (1=1)
Returns
StringValue SomeField RetVal
2015-001 2015 1
2015-002 2015 2
2015-003 2015 3
2015-004 2015 4
2015-005 2015 5
2015-006 2015 6
2015-007 2015 7
2015-008 2015 8
2015-009 2015 9
2015-010 2015 10
2015-011 2015 11
2015-012 2015 12
2016-001 2016 1
2016-002 2016 2
2016-003 2016 3
2016-004 2016 4
2016-005 2016 5
2016-006 2016 6
2016-007 2016 7
2016-008 2016 8
2016-009 2016 9
2016-010 2016 10
2016-011 2016 11
2016-012 2016 12
The UDF
CREATE FUNCTION [dbo].[udf-Create-Range-Number] (#R1 money,#R2 money,#Incr money)
-- Syntax Select * from [dbo].[udf-Create-Range-Number](0,100,2)
Returns
#ReturnVal Table (RetVal money)
As
Begin
With NumbTable as (
Select NumbFrom = #R1
union all
Select nf.NumbFrom + #Incr
From NumbTable nf
Where nf.NumbFrom < #R2
)
Insert into #ReturnVal(RetVal)
Select NumbFrom from NumbTable Option (maxrecursion 0)
Return
End

Adding values from 2 rows based on conditions

I have a table as Below....
ROW gvkey datadate CQTR CYEARQ Value
1 6066 3/31/2015 0:00 1 2015 3610
2 6066 12/31/2014 0:00 4 2014 16868
3 6066 9/30/2014 0:00 3 2014 10809
4 6066 6/30/2014 0:00 2 2014 6905
5 6066 3/31/2014 0:00 1 2014 3326
I want to get the sum of Value of 3/31/2015 and 12/31/2014. Please suggest how Can I do it in MS Sql.
Are you looking for this :-
Set Nocount On;
If Object_Id('tempdb.dbo.#table') Is Not Null
Begin
Drop Table #table;
End
Create Table #table
(
Id Int Primary Key
,Col1 Int
,RDate Datetime
,Col2 Int
,RYear Int
,Col3 Int
)
Insert Into #table Values
(1,6066,'03/31/2015 0:00',1,2015,3610)
,(2,6066,'12/31/2014 0:00',4,2014,16868)
,(3,6066,'09/30/2014 0:00',3,2014,10809)
,(4,6066,'06/30/2014 0:00',2,2014,6905)
,(5,6066,'03/31/2014 0:00',1,2014,3326)
Select t.Col1
,Sum(t.Col3) As ColSum
From #table As t With (Nolock)
Where t.RDate In ('03/31/2015','12/31/2014')
Group By t.Col1