I need Some function in PowerQuery to get Additional Columns for duplicated data (not just keep/remove duplicates)
Example:
For the given table I want to get following info for duplicated columns set {"Date", "Product", "Color"}:
Minimal RowId - basicaly, Id of the 1st occurence of data
Nr. of Duplicate - duplicates counter within MinRowId group
NB! For non duplicates it should return null values
try grouping then expanding in powerquery
let Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
#"Grouped Rows" = Table.Group(Source, {"Product", "Color"}, {
{"data", each Table.AddIndexColumn(_, "nDupl", 0, 1, Int64.Type), type table},
{"MinRowID", each List.Min(_[RowId]), type number}
}),
#"Expanded data" = Table.ExpandTableColumn(#"Grouped Rows", "data", {"RowId", "Date", "amount", "nDupl"}, {"RowId", "Date", "amount", "nDupl"})
in #"Expanded data"
Please try following function (download):
Function call Example:
tfnAddDuplicatesInfo2(Source,{"Product","Color","Date"},"DuplInfo" ,"RowId")
Function Arguments:
srcTable as table, // input Table
inGroupBy as list, // List of ColumnNames to search duplicates
outDuplInfo as text, // Output ColumnName for Information about Duplicates - Duplicate number and Minimal RowId (if inRowId provided) within a group
optional inRowId as nullable text // RowId ColumnName - required for outMinRowId calculation for inGroupBy columns
Function body:
let
func = (
srcTable as table, // input Table
inGroupBy as list, // List of ColumnNames to search duplicates
outDuplInfo as text, // Output ColumnName for Information about Duplicates - Duplicate number and Minimal RowId (if inRowId provided) within a group
optional inRowId as nullable text // RowId ColumnName - required for outMinRowId calculation for inGroupBy columns
) =>
let
Source = srcTable,
// // To test as script
// inGroupBy = {"Product", "Color","Date"},
// outDuplInfo = "DuplInfo",
// inRowId = "RowId", // null, "RowId",
//> == Variables ===================================================
Columns2Expand = List.Combine({List.Difference(Table.ColumnNames(Source),inGroupBy),{"__outDuplCounter__"}}),
srcType = Value.Type(Source),
srcTypeRow=
Type.ForRecord(
Record.Combine(
{
Type.RecordFields(Type.TableRow(srcType)),
Type.RecordFields(type [__outDuplCounter__= Int64.Type])
}
),
false
),
RowIdType = if inRowId<>null then Type.TableColumn(srcType,inRowId) else Any.Type, // Stores Column Typename
//< == Variables ===================================================
#"Grouped Rows" = Table.Group(
Source,
inGroupBy,
{
{"__tmpCount__" , each Table.RowCount(_), Int64.Type},
{"__MinGroupRowId__", each if inRowId<> null then List.Min( Record.Field(_,inRowId) ) else null, RowIdType},
{"__AllRows__" , each Table.AddIndexColumn(_, "__outDuplCounter__", 0, 1, Int64.Type), type table srcTypeRow}
}
),
#"Expanded __AllRows__" = Table.ExpandTableColumn(#"Grouped Rows", "__AllRows__", Columns2Expand),
nulls4MinRowId = Table.ReplaceValue(#"Expanded __AllRows__",each [__tmpCount__]<=1, null,
(currentValue, isConditionTrue, replacementValue) => if isConditionTrue then null else currentValue, // Replace.Value function
if inRowId<>null then {"__MinGroupRowId__","__outDuplCounter__"} else {"__outDuplCounter__"}
),
Add_outDuplInfo =
if inRowId<> null then
Table.AddColumn(nulls4MinRowId, outDuplInfo,
each
if [__outDuplCounter__]=null
then null
else [MinRowId=[__MinGroupRowId__], nDupl = [__outDuplCounter__]] ,
type nullable [MinRowId = RowIdType, nDupl = Int64.Type]
)
else
Table.AddColumn(nulls4MinRowId, outDuplInfo, each [__outDuplCounter__], Int64.Type),
Result_tfnAddDuplMinRowId = Table.SelectColumns(Add_outDuplInfo, List.Combine({Table.ColumnNames(Source),{outDuplInfo}}))
in
Result_tfnAddDuplMinRowId,
documentation = [
Documentation.Name = " tfnAddDuplicatesInfo2 ",
Documentation.Description = " Adds two info columns for Duplicates - 1st occurence RowId and given group Occurence Number",
Documentation.LongDescription = " Adds two info columns for Duplicates - 1st occurence RowId and given group Occurence Number",
Documentation.Category = " Running Total ",
Documentation.Source = " ",
Documentation.Version = " 1.0 ",
Documentation.Author = " Denis Sipchenko ",
Documentation.Examples = {
[
Description = "tfnAddDuplicatesInfo2 arguments: ",
Code = "
srcTable as table, // input Table
inGroupBy as list, // List of ColumnNames to search duplicates
outDuplInfo as text, // Output ColumnName for Information about Duplicates - Duplicate number and Minimal RowId (if inRowId provided) within a group
optional inRowId as nullable text // RowId ColumnName - required for outMinRowId calculation for inGroupBy columns",
Result =""
],
[
Description = "tfnAddDuplicatesInfo2 function call example ",
Code = "
let Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText(""hZTBasMwEET/xWdDdteSbP9CT4U2h2JyCK1oQ0xS3IT8frUpWsmSqpxs4ccw2pn1NDXYtA3CBsYNAZE7PNn96cc93+w8n2/uZWwBml07NfwVTIS+nN+PK1SDZzuW1RG7PX3Y5Wb3y4r3uHKHDgrSz9fle7buRQ2e1e5EpuA4sORZw+x/NgIvtnu2jbGP42G5rMS73sMDw0MdlhuODKua68Ai8KT7CH49fH5dVqOOaI6QoO5DCX1PkeraKDTnSKquLdNDjhGLvgMtsE6NZHUKrEnrVBPuU8/F0El6jRykox+UlSR45DCJamEGmODhhpERGNOa5BeNaErrna0NSU3ovpJjXVpqQip1LcGLbZSVJJ1OMLsjBtcm/Y8Ux43BCwcKxa0s0UPqPC84/hV89ws="", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [RowId = Int64.Type, Date = date, Product = _t, Color = _t, Amount = Currency.Type])
in
tfnAddDuplicatesInfo2(Source,{""Product"",""Color"",""Date""},""DuplInfo"" ,""RowId"")
",
Result = "Adds to Source table ""DuplInfo"" column with records:
""MinRowId"" - Minimal RowId within within given group,
""nDupl"" - given group Occurence Number
"
],
[
Description = "tfnAddDuplicatesInfo2 function short call example ",
Code = "
let Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText(""hZTBasMwEET/xWdDdteSbP9CT4U2h2JyCK1oQ0xS3IT8frUpWsmSqpxs4ccw2pn1NDXYtA3CBsYNAZE7PNn96cc93+w8n2/uZWwBml07NfwVTIS+nN+PK1SDZzuW1RG7PX3Y5Wb3y4r3uHKHDgrSz9fle7buRQ2e1e5EpuA4sORZw+x/NgIvtnu2jbGP42G5rMS73sMDw0MdlhuODKua68Ai8KT7CH49fH5dVqOOaI6QoO5DCX1PkeraKDTnSKquLdNDjhGLvgMtsE6NZHUKrEnrVBPuU8/F0El6jRykox+UlSR45DCJamEGmODhhpERGNOa5BeNaErrna0NSU3ovpJjXVpqQip1LcGLbZSVJJ1OMLsjBtcm/Y8Ux43BCwcKxa0s0UPqPC84/hV89ws="", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [RowId = Int64.Type, Date = date, Product = _t, Color = _t, Amount = Currency.Type])
in
tfnAddDuplicatesInfo2(Source,{""Product"",""Color"",""Date""},""nDupl"")
",
Result = "Adds to Source table one column:
""nDupl"" - given group Occurence Number
"
]
}
]
in
Value.ReplaceType(func, Value.ReplaceMetadata(Value.Type(func), documentation))
P.S. Idea about group & expand index column borrowed from horseyride post.
P.S.S. Initially, I took as a source Running Total by Category by Rick de Groot. And than reworked it.
How to write table function with non fixed parameter list length?
Particular simplified example:
I want to write function trimupper(TableName,ColumnName1,ColumnName2,...) that combines just two steps for given set of columns:
TRIM whitespaces
UPPERCASE text
Example for two columns case:
(tbl as table, cn1 as text, cn2 as text) =>
let
#"Trimmed Text" = Table.TransformColumns(tbl,{{cn1, Text.Trim , type text}, {cn2, Text.Trim , type text}}),
#"Uppercased Text" = Table.TransformColumns(tbl,{{cn1, Text.Upper, type text}, {cn2, Text.Upper, type text}}),
trimupperResult = #"Uppercased Text"
in
trimupperResult
But how to do it for variable number of ColumnNames?
let Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
changethem = transform (Source,{"ColumnName1","ColumnName2"})
in changethem
function transform
(Table as table, columnnames as list) =>
let columnnames = if columnnames = null then Table.ColumnNames(Table) else columnnames,
change = Table.TransformColumns( Table, List.Transform(columnnames, each {_, Text.Trim, type text} ) ),
change1 = Table.TransformColumns( change, List.Transform(columnnames, each {_, Text.Upper, type text} ) )
in change1
As a newbe, I have a question about Power Query (M)
I am looking for a way to extract samo info from an API result.
For starters I am doing this:
I have created a query to get the title from a task.
This works fine:
let
Source = Web.Contents(#fxGetSource() & "/tasks/IEABCDQ7KQPO5DQ4",
[Headers=[#"Authorization"=#fxGetHeader()]]),
convertToJson = Json.Document(Source),
data = convertToJson[data],
ConvertedToTable = Table.FromList(data, Splitter.SplitByNothing(), null, null, ExtraValues.Error),
ExpandedColumn1 = Table.ExpandRecordColumn(ConvertedToTable, "Column1", {"title"}),
TheTitle = Table.TransformColumnTypes(ExpandedColumn1,{{"title", type text}})
in
TheTitle
I would like to have the taskid to sit in a variable, so I created a function:
(aTask as text) as text =>
let
Source = Web.Contents(#fxGetSource() & "/tasks/" & aTask,
[Headers=[#"Authorization"=#fxGetHeader()]]),
convertToJson = Json.Document(Source),
data = convertToJson[data],
ConvertedToTable = Table.FromList(data, Splitter.SplitByNothing(), null, null, ExtraValues.Error),
ExpandedColumn1 = Table.ExpandRecordColumn(ConvertedToTable, "Column1", {"title"}),
TheTitle = Table.TransformColumnTypes(ExpandedColumn1,{{"title", type text}})
in
TheTitle
When I invoke this function ans use the taskid from above I get:
Expression Error: We cannot convert a value of type Table to type Text.
change
(aTask as text) as text =>
to
(aTask as text) as table =>
I am unable to run the query because it says there is a token comma expected and highlights the "donordrive" in "donordrive-password".
let
Source = Json.Document(Web.Contents("https://api.donordrive.com/cmndancemarathon/export/commit.JSON", [Headers=[#"donordrive-email" ="email#gmail.com”, #"donordrive-password" ="password2020"])),
result = Source[result],
#"Converted to Table" = Table.FromList(result, Splitter.SplitByNothing(), null, null, ExtraValues.Error),
#"Expanded Column1" = Table.ExpandRecordColumn(#"Converted to Table", "Column1", {"donationentereddate", "eventfiscalyear", "donationamount", "donationisregistrationfee", "participantid", "participantfirstname", "participantlastname", "teamname", "donorfirstname", "donorlastname"}, {"Column1.donationentereddate", "Column1.eventfiscalyear", "Column1.donationamount", "Column1.donationisregistrationfee", "Column1.participantid", "Column1.participantfirstname", "Column1.participantlastname", "Column1.teamname", "Column1.donorfirstname", "Column1.donorlastname"})
in
#"Expanded Column1"
The query below runs quickly when the "FROM" clause has only a single account number (a.account_nbr) and a single object code (a.fin_object_cd), but when I modify the query so that the FROM clause has a range of account numbers and a range of object codes, it takes a VERY long amount of time to return results. It goes from about a minute to run to 20 or more minutes.
The query does return the results I want, but I need to make it run more quickly.
What can I do? I am not sure if adding indexes to some columns would help, or if there's just a better way to write the query.
If you need more information about the tables, or what I'm trying to accomplish, please let me know.
select
a.account_nbr as "Account Number",
a.account_nm as "Account Name",
a.fin_object_cd as "Object Code",
a.fin_obj_cd_nm as "Object Code Name",
(select COALESCE(sum(fin_beg_bal_ln_amt),0) from kfsprd.gl_balance_t where account_nbr = a.account_nbr and fin_object_cd = a.fin_object_cd and univ_fiscal_yr = "2015" and fin_balance_typ_cd != "CB") as "Beginning Balance",
(select COALESCE(sum(trn_ldgr_entr_amt),0) FROM kfsprd.kf_f_transaction_dtl where univ_fiscal_yr = "2015" and account_nbr = a.account_nbr and fin_object_cd = a.fin_object_cd and trn_debit_crdt_cd = "D") as "Debits",
(select COALESCE(sum(trn_ldgr_entr_amt),0) FROM kfsprd.kf_f_transaction_dtl where univ_fiscal_yr = "2015" and account_nbr = a.account_nbr and fin_object_cd = a.fin_object_cd and trn_debit_crdt_cd = "C" and trn_ldgr_entr_amt is not null) as "Credits",
(
(select COALESCE(sum(fin_beg_bal_ln_amt),0) from kfsprd.gl_balance_t where account_nbr = a.account_nbr and fin_object_cd = a.fin_object_cd and univ_fiscal_yr = "2015" and fin_balance_typ_cd != "CB") +
(select COALESCE(sum(trn_ldgr_entr_amt),0) FROM kfsprd.kf_f_transaction_dtl where univ_fiscal_yr = "2015" and account_nbr = a.account_nbr and fin_object_cd = a.fin_object_cd and trn_debit_crdt_cd = "D") -
(select COALESCE(sum(trn_ldgr_entr_amt),0) FROM kfsprd.kf_f_transaction_dtl where univ_fiscal_yr = "2015" and account_nbr = a.account_nbr and fin_object_cd = a.fin_object_cd and trn_debit_crdt_cd = "C")
) as "Ending Balance"
from kfsprd.kf_f_transaction_dtl a where univ_fiscal_yr = "2015" and (univ_fiscal_prd_cd between "01" and "13" or
univ_fiscal_prd_cd = "BB") and a.account_nbr between "1014000" and "1014005" and a.fin_object_cd between "0000" and "9999" and a.fin_balance_typ_cd != "CB"
group by account_nbr, fin_object_cd
order by account_nbr, fin_object_cd;
Are you able to add fin_coa_cd to the where clause? If that has the same primary key as the original table (gl_entry_t), then the first 3 columns of that table would be univ_fiscal_yr, fin_coa_cd, and account_nbr. If you have a chart code to use, it may help Oracle use that index in the main query.