Request to retrieve a list of FullvisitorId - BigQuery - mysql

I want to get a list of FullvisitorId.
But I do not know how to select a list of FullvisitorId in my request ..
the goal is to retrieve a list of FullvisitorId in particular and not all sessions
standardSQL
SELECT
date,
visitStartTime,
TIMESTAMP_SECONDS(visitStartTime) as starttime_UTC,
DATETIME(TIMESTAMP_SECONDS(visitStartTime), "Europe/Amsterdam") as starttime_LOCAL,
totals.timeOnSite,
trafficSource.source,
trafficSource.referralPath,
hits.hour,
hits.minute,
hits.hitNumber,
fullVisitorId,
hits.isEntrance,
hits.isExit,
hits.referer,
hits.eventInfo.eventAction,
hits.latencyTracking.userTimingValue,
hits.type,
CONCAT(CAST(fullvisitorid AS string),CAST(visitid AS string)) AS case_id,
CONCAT(CAST( EXTRACT(YEAR FROM PARSE_DATE('%Y%m%d',date) ) AS string) ,"/",
CAST( EXTRACT(MONTH FROM PARSE_DATE('%Y%m%d',date))AS string) , "/" ,
CAST( EXTRACT(DAY FROM PARSE_DATE('%Y%m%d',date) ) AS string), " " ,
CAST(hits.hour AS string), ":" ,
CAST(hits.minute AS string), ":" ,
CAST(hits.hitNumber AS string)) AS dated,
(SELECT MAX( IF(index = 4,value,NULL) ) FROM UNNEST(hits.customDimensions)) type_page,
(SELECT MAX( IF(index = 6,value,NULL) ) FROM UNNEST(hits.customDimensions)) Univers
FROM `ga-orange-pro-etp.*****.ga_sessions_20190616*` , UNNEST(hits) as hits
WHERE (SELECT MAX( IF(index = 10,value,NULL) ) FROM UNNEST(hits.customDimensions)) = 'prod'

You can simply get the list of distinct fullVisitorId by running:
SELECT
DISTINCT fullVisitorId
FROM `ga-orange-pro-etp.*****.ga_sessions_20190616*` , UNNEST(hits) as hits
WHERE (SELECT MAX( IF(index = 10,value,NULL) ) FROM UNNEST(hits.customDimensions)) = 'prod'
If you remove DISTINCT it will return a value per single row so you will see duplicate fullVisitorId.

Related

How to improve complex SQL query

I have a complex SQL query, this is analytics query for conversations from customers of a facebook fanpage, as bellow:
SELECT
SeriesTime AS Time,
FP.PageID AS PageID,
COALESCE(MAX(FC.Customers), 0) AS Customers,
COALESCE(MAX(FC.Conversations), 0) AS Conversations,
COALESCE(MAX(FCM.Conversations), 0) AS UpdatedConversations,
COALESCE(MAX(Phones), 0) AS Phones,
COALESCE(MAX(Missed), 0) AS Missed,
COALESCE(MAX(FCM.MessageTypes), 0) AS MessageConversations,
COALESCE(MAX(Total), 0) AS TotalMessage,
COALESCE(AVG(ResponseTime), 0) AS ResponseTime
FROM
GENERATE_SERIES(:Start, :End, :Interval :: INTERVAL) S (SeriesTime)
CROSS JOIN (
SELECT DISTINCT PageID FROM FacebookConversations
) FP
LEFT JOIN (
SELECT
FCM.PageID,
DATE_TRUNC(:Trunc, NULLIF(CreatedTime, '')::TIMESTAMP AT TIME ZONE 'Etc/GMT+7') AS Time,
COUNT(DISTINCT FCM.ConversationID) FILTER (WHERE TotalReplied = 0) AS Missed,
COUNT(DISTINCT FCM.ConversationID) AS Conversations,
COUNT(DISTINCT CASE WHEN FCM."type" = 'message' THEN FCM.ConversationID ELSE NULL END) AS MessageTypes,
COUNT(FCM.ID) AS Total,
AVG(EXTRACT(EPOCH FROM ResponseTime)) FILTER (WHERE IsReplied) AS ResponseTime,
COUNT(DISTINCT PhoneNumber) AS Phones
FROM (
SELECT
*,
COUNT(IsReplied) FILTER (WHERE IsReplied) OVER (PARTITION BY ConversationID) AS TotalReplied
FROM (
SELECT
ID,
PageID,
type,
ConversationID,
CreatedTime,
CreatedTime::TIMESTAMP AT TIME ZONE 'Etc/GMT+7' - LAG(CreatedTime::TIMESTAMP AT TIME ZONE 'Etc/GMT+7') OVER Ordered AS ResponseTime,
COALESCE((LAG("from") OVER Ordered <> "from") AND "from" = PageID, FALSE) AS IsReplied
FROM
FacebookConversationMessages
WINDOW Ordered AS (
PARTITION BY ConversationID ORDER BY CreatedTime::TIMESTAMP AT TIME ZONE 'Etc/GMT+7'
)
) FCM
) FCM
LEFT JOIN
ConversationPhones CP
ON
CP.ConversationMessageID = FCM.ID
GROUP BY
Time,
FCM.PageID
) FCM
ON
FCM.PageID = FP.PageID
AND
Time >= SeriesTime
AND
Time < SeriesTime + :Interval :: INTERVAL
LEFT JOIN (
SELECT
PageID,
DATE_TRUNC(:Trunc, NULLIF(CreatedTime, '')::TIMESTAMP AT TIME ZONE 'Etc/GMT+7') AS CreatedAt,
COUNT(DISTINCT "from") AS customers,
COUNT(*) AS Conversations
FROM
FacebookConversations
GROUP BY
CreatedAt,
PageID,
Type
) FC
ON
FC.PageID = FP.PageID
AND
CreatedAt >= SeriesTime
AND
CreatedAt < SeriesTime + :Interval :: INTERVAL
WHERE
FP.PageID = :PageID
GROUP BY
SeriesTime,
FP.PageID
ORDER BY
FP.PageID,
SeriesTime
On my localhost (with fewer data), it run quite fast, and return exactly what I want. But on server, it run very very SLOW. (normally it take about 5 minutes to complete :() Can any one tell me what parts make this SLOW?
Thank you very much!

MySQL performance issue on multiple select inside each other

Below you can see my query is a big query running over a big table if you consider 200,000 data big but it loads over 10 sec to load I want to get expert help to optimize the query: any suggestion would be highly appreciated.
SELECT mt5_users.Name AS Name,
Test2.Login AS SLogin,
(
SELECT COUNT(Test.Order)
FROM (
SELECT *
FROM (
SELECT MAX(`Order`) AS `Order`,
SUBSTRING_INDEX(SUBSTRING_INDEX(GROUP_CONCAT(DISTINCT Time SEPARATOR ","), ",", 1), ",", -1) AS OPEN_TIME,
SUBSTRING_INDEX(SUBSTRING_INDEX(GROUP_CONCAT(DISTINCT Time SEPARATOR ","), ",", 2), ",", -1) AS CLOSE_TIME,
MAX(Profit) AS Profit,
MAX(Storage) AS Storage,
MAX(Login) AS Login,
MAX(Action) AS Action,
MAX(Entry) AS Entry
FROM `mt5_deals_2020`
WHERE Time BETWEEN "2020-09-01" AND "2020-10-01"
AND Entry IN ("0",
"1")
GROUP BY PositionID) AS Main
WHERE OPEN_TIME != CLOSE_TIME) As Test
WHERE Login = SLogin
AND Test.Entry <> "0"
AND Test.CLOSE_TIME BETWEEN "2020-09-01" AND "2020-10-01"
AND TIMESTAMPDIFF(MINUTE,Test.OPEN_TIME,Test.CLOSE_TIME) <= "5"
AND Test.Action <= 1 ) AS Scalp,
SUM(Test2.Profit+Test2.Storage) AS Profit,
(
SELECT COUNT(mt5_deals_2020.order)
FROM mt5_deals_2020
WHERE Login = SLogin
AND mt5_deals_2020.Time BETWEEN "2020-09-01" AND "2020-10-01"
AND mt5_deals_2020.Action <= 1
AND mt5_deals_2020.Entry <> "0" ) AS Trades,
(
SELECT SUM(mt5_deals_2020.Profit+mt5_deals_2020.Storage)
FROM mt5_deals_2020
WHERE Login = SLogin
AND mt5_deals_2020.Time BETWEEN "2020-09-01" AND "2020-10-01"
AND mt5_deals_2020.Entry <> "0"
AND mt5_deals_2020.Action <= 1 ) AS PL
FROM (
SELECT *
FROM (
SELECT MAX(`Order`) AS `Order`,
SUBSTRING_INDEX(SUBSTRING_INDEX(GROUP_CONCAT(DISTINCT Time SEPARATOR ","), ",", 1), ",", -1) AS OPEN_TIME,
SUBSTRING_INDEX(SUBSTRING_INDEX(GROUP_CONCAT(DISTINCT Time SEPARATOR ","), ",", 2), ",", -1) AS CLOSE_TIME,
MAX(Profit) AS Profit,
MAX(Storage) AS Storage,
MAX(Login) AS Login,
MAX(Action) AS Action,
MAX(Entry) AS Entry
FROM `mt5_deals_2020`
WHERE Time BETWEEN "2020-09-01" AND "2020-10-01"
AND Entry IN ("0",
"1")
GROUP BY PositionID) AS Main1
WHERE OPEN_TIME != CLOSE_TIME) As Test2
LEFT JOIN mt5_users
ON Test2.Login = mt5_users.Login
WHERE mt5_users.Group IN ("KUVVARSTUSD",
"real\\KUV3VARSIUSD",
"real\\KUVVARPLUSD",
"real\\KUVVARGOUSD",
"real\\KUVVARGOEUR"
)
AND Test2.CLOSE_TIME BETWEEN "2020-09-01" AND "2020-10-01"
AND TIMESTAMPDIFF(MINUTE,Test2.OPEN_TIME,Test2.CLOSE_TIME) <= "5"
AND Test2.Action <= 1
GROUP BY Test2.Login
I need the time difference of an opening and closing order with some other data so on inside selects what I do is just that.
Explain result added:
First, let's simplify
SELECT COUNT(Test.Order)
FROM
(
SELECT *
FROM
(
SELECT ...
FROM `mt5_deals_2020`
WHERE Time BETWEEN "2020-09-01" AND "2020-10-01"
AND Entry IN ("0", "1")
GROUP BY PositionID
) AS Main
WHERE OPEN_TIME != CLOSE_TIME
) As Test
WHERE Login = SLogin
AND Test.Entry <> "0"
AND ...
to
SELECT COUNT(*)
FROM
(
SELECT ...
FROM `mt5_deals_2020`
WHERE Time BETWEEN "2020-09-01" AND "2020-10-01"
AND Entry IN ("0", "1")
GROUP BY PositionID
) AS Main
WHERE OPEN_TIME != CLOSE_TIME
HAVING Login = SLogin
AND Test.Entry <> "0"
AND ...
Notes:
COUNT(x) tests x for being NOT NULL; I suspect that is irrelevant.
HAVING is like WHERE but it can reference expressions, such as aggregates like SUM().
Your formulation has a SELECT *, which involves creating a big(?) temp table with all the 'columns'. Mine avoids that.
SUBSTRING_INDEX is messy. Consider redesigning the schema so that you don't need to use it.
What are the possible values of Entry and Action? There may be a better way to do the tests on those. For example if Entry can be only 0 or 1, it is better to say mt5_deals_2020.Entry = 1, thereby opening the door for an index.
Potential bug:
Time BETWEEN "2020-09-01" AND "2020-10-01"
If Time is a DATE, then that includes the first of October. (Please provide SHOW CREATE TABLE.) I prefer the following:
Time >= "2020-09-01"
AND Time < "2020-09-01" + INTERVAL 1 MONTH
mt5_users might benefit from this composite, covering, index:
INDEX(Login, Group, Name) -- in this order
After doing some of those, come back for more discussion if you like.

How to fix problem with sum, length and group by

Field contracts is a text, value is strings separated by "," .
A query:
select sum( if( length(dapps.contracts) =
sum(length(replace(dapps.contracts,',',''))) , 1 , length(dapps.contracts)
- sum(length(replace(dapps.contracts,',',''))) ) ) as f1
from dapps
group by id
show :
Error in query (1111): Invalid use of group function
Need a one level query
This query work fine
select sum(f1)
from (
select if( length(dapps.contracts) =
sum(length(replace(dapps.contracts,',',''))) , 1 , length(dapps.contracts)
- sum(length(replace(dapps.contracts,',',''))) ) as f1
from dapps
group by id
) tb1​
Need query without subquery

Getting the first and last row?

i have another query to select return min, max, start and end price.
It is for a specific month and works perfect for metal_id = 1 but when changed to 2, it returns no data.
please see:
The query below does its job to select the min, max, start and last price per day in a given month.
I would like to select the same but for the whole month, as in show the overall performance for the given month instead of on a daily basis.
Fiddle:
http://sqlfiddle.com/#!9/bee86/1
I just need the last 2 prices, first and last price on the months selected...
select
highp.metal_price_datetime_IST AS high_price_metal_price_datetime_IST
, highp.metal_price as highest_price
, lowp.report_term
, lowp.metal_price as lowest_price
, lowp.metal_price_datetime_IST AS low_price_metal_price_datetime_IST
from (select #report_term:=concat(monthname(metal_price_datetime_IST), ' ', year(metal_price_datetime_IST)) as report_term
, metal_price_datetime_IST
, metal_price
, metal_id
, case when #report_term=#old_report_term then #rn1:=#rn1+1 else #rn1:=1 end as rn
, #old_report_term:=#report_term
from metal_prices
cross join (select #rn1:=0, #old_report_term:='') inituservar1
where datediff(now(), metal_price_datetime_IST) between 0 and 180
and metal_id = 1
order by metal_id, report_term, metal_price asc) lowp
inner join (select #report_term2:=concat(monthname(metal_price_datetime_IST), ' ', year(metal_price_datetime_IST)) as report_term
, metal_price_datetime_IST
, metal_price
, metal_id
, case when #report_term2=#old_report_term2 then #rn2:=#rn2+1 else #rn2:=1 end as rn
, #old_report_term2:=#report_term2
from metal_prices
cross join (select #rn2:=0, #old_report_term2:='') inituservar1
where datediff(now(), metal_price_datetime_IST) between 0 and 180
and metal_id = 1
order by metal_id, report_term, metal_price desc) highp
on lowp.rn=highp.rn
and lowp.metal_id = highp.metal_id
and lowp.report_term = highp.report_term
and lowp.rn = 1
order by lowp.metal_price_datetime_IST DESC

Rolling 12 month with Multiple columns (or dimension) in SQL Server 2008?

Let suppose i have these fields in my table Year, Month, Customer,Market,Product,Production Place, Category and Sales(decimal). Now I want an addition column Rolling 12 Sales which calculates sum of sales for current month + past 11 month.
rolling 12 sales (feb 2014)= sales (march 2013)+ sales (april 2013)+..........+sale (Feb 2014)
We have already achieved it using Co-related Sub query with AND condition for all the dimension, but that doesn't provides right out put!
Co-related sub=query works when i have only 4 colum like year, month , customer and sales
Please help!
Regards
Sushant
I'm going to assume that if your table's columns are
Year, Month, Customer,Market,Product,Production Place, Category and Sales(decimal)
... you want your Rolling 12 Sales column to be only the last 12 months' sales for that customer, market, product, production place, and category, rather than summarized to some higher level.
You don't say whether any of these columns can be Null, but I'm going to assume they can't, for simplicity.
I would expect this to work (given the conditions I mention above):
Select Year
, Month
, Customer
, Market
, Product
, [Production Place]
, Category
, Sales
, [Rolling 12 Month] =
(Select SUM(t2.Sales)
From Table1 t2
Where t1.Customer = t2.Customer
and t1.Market = t2.Market
and t1.Product = t2.Product
and T1.[Production Place] = t2.[Production Place]
and t1.Category = t2.Category
and (
(t2.Year = t1.Year and t2.Month <= t1.Month)
OR (t2.Year = t1.Year - 1 and t2.Month > t1.Month)
)
)
From Table1 t1
If any of those columns in the subquery's WHERE clause are nullable, change their format to this:
Where ((t1.Market = t2.Market) or (t1.Market IS NULL and t2.Market IS NULL))
If this doesn't work, please explain what error message or incorrect data you get. Thanks.
ETA: You asked whether this could be made to work in a stored procedure. I don't see why it couldn't. You didn't say what the parameters of the stored procedure needed to be (Customer? Current Year and Month?) so the version below has no parameters and returns data for all products and customers that had activity for the last twelve months.
Create dbo.Procedure1()
AS
Begin
declare #CurrentMonth int
, #CurrentYear int
-- If you want to parameterize this, you could make #CurrentMonth and #CurrentYear parameters
-- and skip this.
set #currentMonth = DatePart(MM, GetDate())
set #CurrentYear = DatePart(YYYY,GetDate())
;
with SalesFor12Months
as (
select Year
, Month
, Customer
, Market
, Product
, [Production Place]
, Category
, Sales
from Table1 t1
where (#CurrentYear = T1.Year and #CurrentMonth >= t1.Month)
OR (#CurrentYear - 1 = t1.Year and #CurrentMonth < t1.Month)
)
, SalesForCurrentMonth
as (
select *
from Table1
where Month = #CurrentMonth
and Year = #CurrentYear
)
, AllCustomers
as (
select distinct
Customer
, Market
, Product
, [Production Place]
, Category
from SalesFor12Months
)
select Month = #CurrentMonth
, Year = #CurrentYear
, ac.Customer
, ac.Market
, ac.Product
, ac.[Production Place]
, ac.Category
, Sales = COALESCE(curSales.Sales,0)
, [Rolling 12 Month] = COALESCE(
(Select SUM(t2.Sales)
From SalesFor12Months t2
Where ac.Customer = t2.Customer
and ac.Market = t2.Market
and ac.Product = t2.Product
and ac.[Production Place] = t2.[Production Place]
and ac.Category = t2.Category
)
,0)
from AllCustomers ac
left join SalesForCurrentMonth curSales
on ac.Customer = curSales.Customer
and ac.Market = curSales.Market
and ac.Product = curSales.Product
and ac.[Production Place] = curSales.[Production Place]
and ac.Category = curSales.Category
End
Hope that helps. I did that without having test data so I apologize if there are typos or other errors.
Join SELECT
'PMD' as RecType,
PMID as RecID,
CustomerID,
PMMonth as Month,
PMYear as Year,
Quantity as Rolling12Packs,
Size * Quantity / 1000 AS Rolling12Litres,
(PMDelSpl.Method) as Method,
(PMDelSpl.Size) as Size,
(PMDelSpl.Shape) as Shape,
(PMDelSpl.System) as System,
(PMDelSpl.Type) as Type,
(PMDelSpl.Category) as Category,
(PMDelSpl.SubCat) as SubCat,
(PMDelSpl.Place) as Place,
PMDelSpl.Group,
PMDelSpl.Distribution,
convert( datetime, str( PMYear ) + '/' + STR( PMMonth ) + '/01' ) as Period,
L_ProductType AS LocalCategory,
L_ProductCategory AS LocalSubcategory
FROM PMDelSpl;
**For i=1 to 11**
Join SELECT
'PMD' as RecType,
PMID as RecID,
CustomerID,
Quantity as Rolling12Packs,
Size * Quantity / 1000 AS Rolling12Litres,
(PMDelSpl.Method) as Method,
(PMDelSpl.Size) as Size,
(PMDelSpl.Shape) as Shape,
(PMDelSpl.System) as System,
(PMDelSpl.Type) as Type,
(PMDelSpl.Category) as Category,
(PMDelSpl.SubCat) as SubCat,
L_ProductType AS LocalCategory,
L_ProductCategory AS LocalSubcategory,
(PMDelSpl.Place) as Place,
PMDelSpl.Group,
PMDelSpl.Distribution,
datepart( month, dateadd( month, $(i), convert( datetime, str( PMYear ) + '/' + STR( PMMonth ) + '/01' ) ) ) as Month,
datepart( year, dateadd( month, $(i), convert( datetime, str( PMYear ) + '/' + STR( PMMonth ) + '/01' ) ) ) as Year,
dateadd( month, $(i), convert( datetime, str( PMYear ) + '/' + STR( PMMonth ) + '/01' ) ) as Period
FROM PMDelSpl
WHERE dateadd( month, $(i), convert( datetime, str( PMYear ) + '/' + STR( PMMonth ) + '/01' ) ) <GETDATE();
**Next i**