Zendesk/PowerBI: Pagination with PowerQuery M - json

I am trying to get all ticket data from Zendesk using their v2 API through PowerBI. Yet, the API is limited to 100 responses per page and then paginated, which causes me to only get the first page in my table on PowerBI.
How do i setup the pagination in Power Query to get all results? I've tried with List.Generate, but i'm unable to succeed so far. Do you got a solution?
The Query tells me what the next_page and previous_page is : https://i.stack.imgur.com/fm8We.png
I have never done powerquery and I often encounter syntax errors in what I write...
let
Source = Json.Document(Web.Contents("https://xxxxxxx.zendesk.com/api/v2/tickets", [Headers=[Authorization="xxxxxx"]])),
#"Converti en table" = Table.FromRecords({Source}),
#"tickets développé" = Table.ExpandListColumn(#"Converti en table", "tickets"),
#"tickets développé1" = Table.ExpandRecordColumn(#"tickets développé", "tickets", {"url", "id", "external_id", "via", "created_at", "updated_at", "type", "subject", "raw_subject", "description", "priority", "status", "recipient", "requester_id", "submitter_id", "assignee_id", "organization_id", "group_id", "collaborator_ids", "follower_ids", "email_cc_ids", "forum_topic_id", "problem_id", "has_incidents", "is_public", "due_at", "tags", "custom_fields", "satisfaction_rating", "sharing_agreement_ids", "fields", "followup_ids", "ticket_form_id", "brand_id", "allow_channelback", "allow_attachments"}, {"tickets.url", "tickets.id", "tickets.external_id", "tickets.via", "tickets.created_at", "tickets.updated_at", "tickets.type", "tickets.subject", "tickets.raw_subject", "tickets.description", "tickets.priority", "tickets.status", "tickets.recipient", "tickets.requester_id", "tickets.submitter_id", "tickets.assignee_id", "tickets.organization_id", "tickets.group_id", "tickets.collaborator_ids", "tickets.follower_ids", "tickets.email_cc_ids", "tickets.forum_topic_id", "tickets.problem_id", "tickets.has_incidents", "tickets.is_public", "tickets.due_at", "tickets.tags", "tickets.custom_fields", "tickets.satisfaction_rating", "tickets.sharing_agreement_ids", "tickets.fields", "tickets.followup_ids", "tickets.ticket_form_id", "tickets.brand_id", "tickets.allow_channelback", "tickets.allow_attachments"}),
#"tickets.via développé" = Table.ExpandRecordColumn(#"tickets développé1", "tickets.via", {"channel", "source"}, {"tickets.via.channel", "tickets.via.source"}),
#"tickets.via.source développé" = Table.ExpandRecordColumn(#"tickets.via développé", "tickets.via.source", {"from", "to", "rel"}, {"tickets.via.source.from", "tickets.via.source.to", "tickets.via.source.rel"}),
#"tickets.via.source.from développé" = Table.ExpandRecordColumn(#"tickets.via.source développé", "tickets.via.source.from", {"address", "name"}, {"tickets.via.source.from.address", "tickets.via.source.from.name"}),
#"tickets.via.source.to développé" = Table.ExpandRecordColumn(#"tickets.via.source.from développé", "tickets.via.source.to", {"name", "address"}, {"tickets.via.source.to.name", "tickets.via.source.to.address"}),
#"tickets.satisfaction_rating développé" = Table.ExpandRecordColumn(#"tickets.via.source.to développé", "tickets.satisfaction_rating", {"score", "id", "comment", "reason", "reason_id"}, {"tickets.satisfaction_rating.score", "tickets.satisfaction_rating.id", "tickets.satisfaction_rating.comment", "tickets.satisfaction_rating.reason", "tickets.satisfaction_rating.reason_id"}),
#"Type modifié" = Table.TransformColumnTypes(#"tickets.satisfaction_rating développé",{{"tickets.url", type text}, {"tickets.id", Int64.Type}, {"tickets.external_id", type any}, {"tickets.via.channel", type text}, {"tickets.via.source.from.address", type text}, {"tickets.via.source.from.name", type text}, {"tickets.via.source.to.name", type text}, {"tickets.via.source.to.address", type text}, {"tickets.via.source.rel", type any}, {"tickets.created_at", type datetime}, {"tickets.updated_at", type datetime}, {"tickets.type", type text}, {"tickets.subject", type text}, {"tickets.raw_subject", type text}, {"tickets.description", type text}, {"tickets.priority", type text}, {"tickets.status", type text}, {"tickets.recipient", type text}, {"tickets.requester_id", Int64.Type}, {"tickets.submitter_id", Int64.Type}, {"tickets.assignee_id", Int64.Type}, {"tickets.organization_id", Int64.Type}, {"tickets.group_id", Int64.Type}, {"tickets.collaborator_ids", type any}, {"tickets.follower_ids", type any}, {"tickets.email_cc_ids", type any}, {"tickets.forum_topic_id", type any}, {"tickets.problem_id", type any}, {"tickets.has_incidents", type logical}, {"tickets.is_public", type logical}, {"tickets.due_at", type any}, {"tickets.tags", type any}, {"tickets.custom_fields", type any}, {"tickets.satisfaction_rating.score", type text}, {"tickets.satisfaction_rating.id", Int64.Type}, {"tickets.satisfaction_rating.comment", type any}, {"tickets.satisfaction_rating.reason", type text}, {"tickets.satisfaction_rating.reason_id", Int64.Type}, {"tickets.sharing_agreement_ids", type any}, {"tickets.fields", type any}, {"tickets.followup_ids", type any}, {"tickets.ticket_form_id", Int64.Type}, {"tickets.brand_id", Int64.Type}, {"tickets.allow_channelback", type logical}, {"tickets.allow_attachments", type logical}, {"next_page", type text}, {"previous_page", type any}, {"count", Int64.Type}})
in #"Type modifié"
Thanks in advanc

Related

How to use 2 variables in a function instead of 1 to webs scrape sub-URLs in power bi and power query?

I Have this table;
that was created using this code;
let
Source = Xml.Tables(Web.Contents("https://www.edmunds.com/sitemap_web54-mmy-cost-to-own.xml")),
Table0 = Source{0}[Table],
#"Kept First Rows" = Table.FirstN(Table0,10),
#"Added Custom" = Table.AddColumn(#"Kept First Rows", "Custom", each Web.BrowserContents([loc])),
#"Added Custom3" = Table.AddColumn(#"Added Custom", "Custom.3", each try Text.Range([Custom],Text.PositionOf([Custom],"<optgroup"),Text.PositionOf([Custom],"</optgroup>")-Text.PositionOf([Custom],"<optgroup")+11) otherwise "<optgroup/>"),
#"Parsed XML" = Table.TransformColumns(#"Added Custom3",{{"Custom.3", Xml.Tables}}),
#"Expanded Custom.3" = Table.ExpandTableColumn(#"Parsed XML", "Custom.3", {"option"}, {"option"}),
#"Expanded option" = Table.ExpandTableColumn(#"Expanded Custom.3", "option", {"Element:Text", "Attribute:value"}, {"Model", "Style"})
in
#"Expanded option"
If you look at the loc column you will see that there is the same link for multiple models.
Ultimately I want the Cost to Own data for each model.
So I created this code in a new query and tied it to a custom column in the above query.
(PageMake as text)=>
let
Source = Web.BrowserContents(PageMake),
#"Extracted Table From Html" = Html.Table(Source, {{"Column1", "SECTION:nth-child(2) > DIV.table-responsive > TABLE.costs-table.text-gray-darker.table.table-borderless > * > TR > :nth-child(1)"}, {"Column2", "SECTION:nth-child(2) > DIV.table-responsive > TABLE.costs-table.text-gray-darker.table.table-borderless > * > TR > :nth-child(2)"}, {"Column3", "SECTION:nth-child(2) > DIV.table-responsive > TABLE.costs-table.text-gray-darker.table.table-borderless > * > TR > :nth-child(3)"}, {"Column4", "SECTION:nth-child(2) > DIV.table-responsive > TABLE.costs-table.text-gray-darker.table.table-borderless > * > TR > :nth-child(4)"}, {"Column5", "SECTION:nth-child(2) > DIV.table-responsive > TABLE.costs-table.text-gray-darker.table.table-borderless > * > TR > :nth-child(5)"}, {"Column6", "SECTION:nth-child(2) > DIV.table-responsive > TABLE.costs-table.text-gray-darker.table.table-borderless > * > TR > :nth-child(6)"}, {"Column7", "SECTION:nth-child(2) > DIV.table-responsive > TABLE.costs-table.text-gray-darker.table.table-borderless > * > TR > :nth-child(7)"}}, [RowSelector="SECTION:nth-child(2) > DIV.table-responsive > TABLE.costs-table.text-gray-darker.table.table-borderless > * > TR"]),
#"Promoted Headers" = Table.PromoteHeaders(#"Extracted Table From Html", [PromoteAllScalars=true]),
#"Changed Type" = Table.TransformColumnTypes(#"Promoted Headers",{{"", type text}, {"Year 1", Currency.Type}, {"Year 2", Currency.Type}, {"Year 3", Currency.Type}, {"Year 4", Currency.Type}, {"Year 5", Currency.Type}, {"Total", Currency.Type}})
in
#"Changed Type"
The problem is that the link does not have any tie to the different models so when it pulls the Cost to Own data table it simply pulls the first one for all models associated with that link.
The problem you are facing is that there is not a distinct table for each vehicle. There is one table shared by several. If you do a new web query (Query -> Web) and enter a URL like https://www.edmunds.com/lexus/ls-460/2016/cost-to-own/?style=401580678 you'll see the collection of tables. (I'm quite sure you already know this from your info above.) But if you look within the Suggested Tables, you'll see they all contain information for multiple 2016 models. I think you'll need to pull the entire table with the information you want (I think you want Table 1) and then parse that table once you get it. You can use any one of the vehicles from that table to get to the table. (It appears to me, from a quick glance, that each table is all of the vehicles from the year group).
Call your function from a new column in your first query with = Table.AddColumn(#"Expanded option", "Custom.1", each fnGetEdmunds(Text.From([loc])&"?style="&Text.From([Style]))). You'll get some errors because some rows don't have Styles to find pages with the Cost to Own and some of the pages don't have tables with Cost to Own. So you'll have to handle those errors.
Here's M code:
//The base query:
let
Source = Xml.Tables(Web.Contents("https://www.edmunds.com/sitemap_web54-mmy-cost-to-own.xml")),
Table0 = Source{0}[Table],
#"Kept First Rows" = Table.FirstN(Table0,10),
#"Added Custom" = Table.AddColumn(#"Kept First Rows", "Custom", each Web.BrowserContents([loc])),
#"Added Custom3" = Table.AddColumn(#"Added Custom", "Custom.3", each try Text.Range([Custom],Text.PositionOf([Custom],"<optgroup"),Text.PositionOf([Custom],"</optgroup>")-Text.PositionOf([Custom],"<optgroup")+11) otherwise "<optgroup/>"),
#"Parsed XML" = Table.TransformColumns(#"Added Custom3",{{"Custom.3", Xml.Tables}}),
#"Expanded Custom.3" = Table.ExpandTableColumn(#"Parsed XML", "Custom.3", {"option"}, {"option"}),
#"Expanded option" = Table.ExpandTableColumn(#"Expanded Custom.3", "option", {"Element:Text", "Attribute:value"}, {"Model", "Style"}),
#"Invoked Custom Function" = Table.AddColumn(#"Expanded option", "Custom.1", each fnGetEdmunds(Text.From([loc])&"?style="&Text.From([Style])))
in
#"Invoked Custom Function"
//The function named fnGetEdmunds
(PageMake as text)=>
let
Source = Web.BrowserContents(PageMake),
#"Extracted Table From Html" = Html.Table(Source, {{"Column1", ".col-fixed"}, {"Column2", ".col-padding-left"}, {"Column3", ".col-padding-left + *"}, {"Column4", ".d-none TD:nth-child(4)"}, {"Column5", ".d-none TD:nth-child(5)"}, {"Column6", ".d-none TD:nth-child(6)"}, {"Column7", ".d-none .font-weight-bold"}, {"Column8", ".d-none:nth-child(3) TD:nth-child(4)"}, {"Column9", ".d-none:nth-child(3) TD:nth-child(5)"}, {"Column10", ".d-none:nth-child(3) TD:nth-child(6)"}, {"Column11", ".d-none:nth-child(4) TD:nth-child(4)"}, {"Column12", ".d-none:nth-child(4) TD:nth-child(5)"}, {"Column13", ".d-none:nth-child(4) TD:nth-child(6)"}, {"Column14", ".d-none:nth-child(5) TD:nth-child(4)"}, {"Column15", ".d-none:nth-child(5) TD:nth-child(5)"}, {"Column16", ".d-none:nth-child(5) TD:nth-child(6)"}, {"Column17", ".d-inline"}, {"Column18", ".p-0.heading-4"}, {"Column19", ".mb-1 SPAN"}, {"Column20", "CAPTION"}, {"Column21", "TH:nth-child(4)"}, {"Column22", "TH:nth-child(5)"}, {"Column23", "TH:nth-child(6)"}, {"Column24", "TH:nth-child(7)"}}, [RowSelector=".col-fixed"]),
#"Promoted Headers" = Table.PromoteHeaders(#"Extracted Table From Html", [PromoteAllScalars=true]),
#"Changed Type" = Table.TransformColumnTypes(#"Promoted Headers",{{"", type text}, {"Year 1", type text}, {"Year 2", type text}, {"Column4", Currency.Type}, {"Column5", Currency.Type}, {"Column6", Currency.Type}, {"Column7", Currency.Type}, {"Column8", Currency.Type}, {"Column9", Currency.Type}, {"Column10", Currency.Type}, {"Column11", Currency.Type}, {"Column12", Currency.Type}, {"Column13", Currency.Type}, {"Column14", Currency.Type}, {"Column15", Currency.Type}, {"Column16", Currency.Type}, {"Column17", type text}, {"Column18", type text}, {"Column19", type text}, {"Column20", type text}, {"Year 3", type text}, {"Year 4", type text}, {"Year 5", type text}, {"Total", type text}})
in
#"Changed Type"
Like I said above: You'll still need to parse the tables to extract the info for the specific vehicles, and to handle the errors.

Power query JSON dynamic URL

it is my first use of power query and I need your help
I'd like to retrieve from web some compagny values base on their identification number, the french SIRET
for that I'd like to enter the SIRET in a cell (named entersiret) in my excel and then pass it to the url and in power query. I have tried this but it is not working :
let
siret = Excel.CurrentWorkbook(){[Name="entersiret"]}[Content],
entersiret = siret[Column1]{0},
Source = Json.Document(Web.Contents("https://entreprise.data.gouv.fr/api/sirene/v3/etablissements/" & siret)),
#"Converti en table" = Record.ToTable(Source),
#"Value développé" = Table.ExpandRecordColumn(#"Converti en table", "Value", {"id", "siren", "nic", "siret", "statut_diffusion", "date_creation", "tranche_effectifs", "annee_effectifs", "activite_principale_registre_metiers", "date_dernier_traitement", "etablissement_siege", "nombre_periodes", "complement_adresse", "numero_voie", "indice_repetition", "type_voie", "libelle_voie", "code_postal", "libelle_commune", "libelle_commune_etranger", "distribution_speciale", "code_commune", "code_cedex", "libelle_cedex", "code_pays_etranger", "libelle_pays_etranger", "complement_adresse_2", "numero_voie_2", "indice_repetition_2", "type_voie_2", "libelle_voie_2", "code_postal_2", "libelle_commune_2", "libelle_commune_etranger_2", "distribution_speciale_2", "code_commune_2", "code_cedex_2", "libelle_cedex_2", "code_pays_etranger_2", "libelle_pays_etranger_2", "date_debut", "etat_administratif", "enseigne_1", "enseigne_2", "enseigne_3", "denomination_usuelle", "activite_principale", "nomenclature_activite_principale", "caractere_employeur", "longitude", "latitude", "geo_score", "geo_type", "geo_adresse", "geo_id", "geo_ligne", "geo_l4", "geo_l5", "unite_legale_id", "created_at", "updated_at", "unite_legale"}, {"Value.id", "Value.siren", "Value.nic", "Value.siret", "Value.statut_diffusion", "Value.date_creation", "Value.tranche_effectifs", "Value.annee_effectifs", "Value.activite_principale_registre_metiers", "Value.date_dernier_traitement", "Value.etablissement_siege", "Value.nombre_periodes", "Value.complement_adresse", "Value.numero_voie", "Value.indice_repetition", "Value.type_voie", "Value.libelle_voie", "Value.code_postal", "Value.libelle_commune", "Value.libelle_commune_etranger", "Value.distribution_speciale", "Value.code_commune", "Value.code_cedex", "Value.libelle_cedex", "Value.code_pays_etranger", "Value.libelle_pays_etranger", "Value.complement_adresse_2", "Value.numero_voie_2", "Value.indice_repetition_2", "Value.type_voie_2", "Value.libelle_voie_2", "Value.code_postal_2", "Value.libelle_commune_2", "Value.libelle_commune_etranger_2", "Value.distribution_speciale_2", "Value.code_commune_2", "Value.code_cedex_2", "Value.libelle_cedex_2", "Value.code_pays_etranger_2", "Value.libelle_pays_etranger_2", "Value.date_debut", "Value.etat_administratif", "Value.enseigne_1", "Value.enseigne_2", "Value.enseigne_3", "Value.denomination_usuelle", "Value.activite_principale", "Value.nomenclature_activite_principale", "Value.caractere_employeur", "Value.longitude", "Value.latitude", "Value.geo_score", "Value.geo_type", "Value.geo_adresse", "Value.geo_id", "Value.geo_ligne", "Value.geo_l4", "Value.geo_l5", "Value.unite_legale_id", "Value.created_at", "Value.updated_at", "Value.unite_legale"})
in
#"Value développé"
You drill down the value "siret" in "entersiret" but you don't use it. This will work:
let
siret = Excel.CurrentWorkbook(){[Name="entersiret"]}[Content],
entersiret = siret[Column1]{0},
Source = Json.Document(Web.Contents("https://entreprise.data.gouv.fr/api/sirene/v3/etablissements/" & entersiret)),
#"Converti en table" = Record.ToTable(Source),
#"Value développé" = Table.ExpandRecordColumn(#"Converti en table", "Value", {"id", "siren", "nic", "siret", "statut_diffusion", "date_creation", "tranche_effectifs", "annee_effectifs", "activite_principale_registre_metiers", "date_dernier_traitement", "etablissement_siege", "nombre_periodes", "complement_adresse", "numero_voie", "indice_repetition", "type_voie", "libelle_voie", "code_postal", "libelle_commune", "libelle_commune_etranger", "distribution_speciale", "code_commune", "code_cedex", "libelle_cedex", "code_pays_etranger", "libelle_pays_etranger", "complement_adresse_2", "numero_voie_2", "indice_repetition_2", "type_voie_2", "libelle_voie_2", "code_postal_2", "libelle_commune_2", "libelle_commune_etranger_2", "distribution_speciale_2", "code_commune_2", "code_cedex_2", "libelle_cedex_2", "code_pays_etranger_2", "libelle_pays_etranger_2", "date_debut", "etat_administratif", "enseigne_1", "enseigne_2", "enseigne_3", "denomination_usuelle", "activite_principale", "nomenclature_activite_principale", "caractere_employeur", "longitude", "latitude", "geo_score", "geo_type", "geo_adresse", "geo_id", "geo_ligne", "geo_l4", "geo_l5", "unite_legale_id", "created_at", "updated_at", "unite_legale"}, {"Value.id", "Value.siren", "Value.nic", "Value.siret", "Value.statut_diffusion", "Value.date_creation", "Value.tranche_effectifs", "Value.annee_effectifs", "Value.activite_principale_registre_metiers", "Value.date_dernier_traitement", "Value.etablissement_siege", "Value.nombre_periodes", "Value.complement_adresse", "Value.numero_voie", "Value.indice_repetition", "Value.type_voie", "Value.libelle_voie", "Value.code_postal", "Value.libelle_commune", "Value.libelle_commune_etranger", "Value.distribution_speciale", "Value.code_commune", "Value.code_cedex", "Value.libelle_cedex", "Value.code_pays_etranger", "Value.libelle_pays_etranger", "Value.complement_adresse_2", "Value.numero_voie_2", "Value.indice_repetition_2", "Value.type_voie_2", "Value.libelle_voie_2", "Value.code_postal_2", "Value.libelle_commune_2", "Value.libelle_commune_etranger_2", "Value.distribution_speciale_2", "Value.code_commune_2", "Value.code_cedex_2", "Value.libelle_cedex_2", "Value.code_pays_etranger_2", "Value.libelle_pays_etranger_2", "Value.date_debut", "Value.etat_administratif", "Value.enseigne_1", "Value.enseigne_2", "Value.enseigne_3", "Value.denomination_usuelle", "Value.activite_principale", "Value.nomenclature_activite_principale", "Value.caractere_employeur", "Value.longitude", "Value.latitude", "Value.geo_score", "Value.geo_type", "Value.geo_adresse", "Value.geo_id", "Value.geo_ligne", "Value.geo_l4", "Value.geo_l5", "Value.unite_legale_id", "Value.created_at", "Value.updated_at", "Value.unite_legale"})
in
#"Value développé"
solved...my mistake:
let
siret = Excel.CurrentWorkbook(){[Name="entersiret"]}[Content],
paramsiret = Number.ToText(siret[Column1]{0}),
Source = Json.Document(Web.Contents("https://entreprise.data.gouv.fr/api/sirene/v3/etablissements/" & paramsiret)),
#"Converti en table" = Record.ToTable(Source),
#"Value développé" = Table.ExpandRecordColumn(#"Converti en table", "Value", {"id", "siren", "nic", "siret", "statut_diffusion", "date_creation", "tranche_effectifs", "annee_effectifs", "activite_principale_registre_metiers", "date_dernier_traitement", "etablissement_siege", "nombre_periodes", "complement_adresse", "numero_voie", "indice_repetition", "type_voie", "libelle_voie", "code_postal", "libelle_commune", "libelle_commune_etranger", "distribution_speciale", "code_commune", "code_cedex", "libelle_cedex", "code_pays_etranger", "libelle_pays_etranger", "complement_adresse_2", "numero_voie_2", "indice_repetition_2", "type_voie_2", "libelle_voie_2", "code_postal_2", "libelle_commune_2", "libelle_commune_etranger_2", "distribution_speciale_2", "code_commune_2", "code_cedex_2", "libelle_cedex_2", "code_pays_etranger_2", "libelle_pays_etranger_2", "date_debut", "etat_administratif", "enseigne_1", "enseigne_2", "enseigne_3", "denomination_usuelle", "activite_principale", "nomenclature_activite_principale", "caractere_employeur", "longitude", "latitude", "geo_score", "geo_type", "geo_adresse", "geo_id", "geo_ligne", "geo_l4", "geo_l5", "unite_legale_id", "created_at", "updated_at", "unite_legale"}, {"Value.id", "Value.siren", "Value.nic", "Value.siret", "Value.statut_diffusion", "Value.date_creation", "Value.tranche_effectifs", "Value.annee_effectifs", "Value.activite_principale_registre_metiers", "Value.date_dernier_traitement", "Value.etablissement_siege", "Value.nombre_periodes", "Value.complement_adresse", "Value.numero_voie", "Value.indice_repetition", "Value.type_voie", "Value.libelle_voie", "Value.code_postal", "Value.libelle_commune", "Value.libelle_commune_etranger", "Value.distribution_speciale", "Value.code_commune", "Value.code_cedex", "Value.libelle_cedex", "Value.code_pays_etranger", "Value.libelle_pays_etranger", "Value.complement_adresse_2", "Value.numero_voie_2", "Value.indice_repetition_2", "Value.type_voie_2", "Value.libelle_voie_2", "Value.code_postal_2", "Value.libelle_commune_2", "Value.libelle_commune_etranger_2", "Value.distribution_speciale_2", "Value.code_commune_2", "Value.code_cedex_2", "Value.libelle_cedex_2", "Value.code_pays_etranger_2", "Value.libelle_pays_etranger_2", "Value.date_debut", "Value.etat_administratif", "Value.enseigne_1", "Value.enseigne_2", "Value.enseigne_3", "Value.denomination_usuelle", "Value.activite_principale", "Value.nomenclature_activite_principale", "Value.caractere_employeur", "Value.longitude", "Value.latitude", "Value.geo_score", "Value.geo_type", "Value.geo_adresse", "Value.geo_id", "Value.geo_ligne", "Value.geo_l4", "Value.geo_l5", "Value.unite_legale_id", "Value.created_at", "Value.updated_at", "Value.unite_legale"})
in
#"Value développé"

Decoding HTML symbol decimal numbers into actual symbols in Power BI

The data has HTML values inside text:
col1
-------------------------------------------------------------
Drell-Yan Process Background Estimation Using eμ Method
Expressions of constant π
Computational Analysis of Protein β-Structure
δ13C and 14C Measurements in Aerosol Particles
I need to get actual symbols instead of all the HTML decimal values.
Html.Table in powerquery-m can decode HTML decimal values into visible symbols:
let
Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("PY7BCsIwDIZfJcyrg03nUHaa06MwEA8ydyg1arFrR5KCvr1l4C7JId/3/+m65EBobXpVDlryGplhr/T7ST64OxxZzKDEeAcXNu4JeAtZtioXu01ZwQnl5e9Jv+yS42ek6EaQwT9Axy3KCfzxMqsmrvHDGGRKVBbqOL5sJiW2Cxo3G0VZpWehoCUQTu582Vb5ugFDkBdNfEJxJAZ0whD9Gsmzt9AqEqMtctL3Pw==", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [col1 = _t]),
#"Changed Type" = Table.TransformColumnTypes(Source,{{"col1", type text}}),
#"Added Custom" = Table.AddColumn(#"Changed Type", "HtmlTable", each Html.Table([col1],{{"HtmlDecoded",":root"}})),
#"Expanded HtmlTable" = Table.ExpandTableColumn(#"Added Custom", "HtmlTable", {"HtmlDecoded"}, {"HtmlDecoded"})
in
#"Expanded HtmlTable"
Or in place:
let
Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("PY7BCsIwDIZfJcyrg03nUHaa06MwEA8ydyg1arFrR5KCvr1l4C7JId/3/+m65EBobXpVDlryGplhr/T7ST64OxxZzKDEeAcXNu4JeAtZtioXu01ZwQnl5e9Jv+yS42ek6EaQwT9Axy3KCfzxMqsmrvHDGGRKVBbqOL5sJiW2Cxo3G0VZpWehoCUQTu582Vb5ugFDkBdNfEJxJAZ0whD9Gsmzt9AqEqMtctL3Pw==", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [col1 = _t]),
#"Changed Type" = Table.TransformColumnTypes(Source,{{"col1", type text}}),
Decoded = Table.TransformColumns(#"Changed Type", {{"col1", each Table.FirstValue(Html.Table(_,{{"HtmlDecoded",":root"}})) }} )
in
Decoded

Power Query in SSIS - Do not support Html.Table

SSIS supports Power Query as a Source, but do not seem to support M code using Html.Table. I'm using only SQL Server Data Tools (SSDT)
M code below=
let Scrapper =
(Page as number) as table =>
let
Source = Web.BrowserContents("https://www.zerohedge.com/?page=" & Number.ToText(Page)),
#"Extracted Table From Html" = Html.Table(Source, {{"Title", ".teaser-title:nth-last-child(4)"}, {"Date", ".extras__created:nth-last-child(1)"}, {"Views", ".extras__views:nth-last-child(2)"}}, [RowSelector=".view-content:nth-last-child(2) > DIV.views-row"]),
#"Changed Type" = Table.TransformColumnTypes(#"Extracted Table From Html",{{"Title", type text}, {"Date", type datetime}, {"Views", Int64.Type}})
in
#"Changed Type",
Source = {0..1},
#"Converted to Table" = Table.FromList(Source, Splitter.SplitByNothing(), null, null, ExtraValues.Error),
#"Renamed Columns" = Table.RenameColumns(#"Converted to Table",{{"Column1", "Pages"}}),
#"Invoked Custom Function" = Table.AddColumn(#"Renamed Columns", "Scrapper", each Scrapper([Pages])),
#"Expanded Scrapper" = Table.ExpandTableColumn(#"Invoked Custom Function", "Scrapper", {"Title", "Date", "Views"}, {"Title", "Date", "Views"})
in
#"Expanded Scrapper"
Error: 0x0 at Data Flow Task, Power Query Source: The import
Html.Table matches no exports. Did you miss a module reference?
I don't see any other solution that wait for Microsoft to support Html.table
error image in SSDT

Loop Computation in Power Query

I'm trying to merge several columns in a table to columns of another table. Each column in the primary table contains texts while the PrimaryAnalysis table contains indexes for the texts. I'd like to create columns of indexes for the primary table but I'm having to do that one at a time for each table thus:
#"Merged Queries" = Table.NestedJoin(#"Changed Type2",{"Text.1"},PrimaryAnalysis,{"Letter"},"NewColumn"),
#"Expanded NewColumn" = Table.ExpandTableColumn(#"Merged Queries", "NewColumn", {"Index"}, {"Index"}),
#"Renamed Columns2" = Table.RenameColumns(#"Expanded NewColumn",{{"Index", "First"}}),
#"Merged Queries1" = Table.NestedJoin(#"Renamed Columns2",{"Text.2"},PrimaryAnalysis,{"Letter"},"NewColumn"),
#"Expanded NewColumn1" = Table.ExpandTableColumn(#"Merged Queries1", "NewColumn", {"Index"}, {"Index"}),
#"Renamed Columns3" = Table.RenameColumns(#"Expanded NewColumn1",{{"Index", "2nd"}}),
#"Merged Queries2" = Table.NestedJoin(#"Renamed Columns3",{"Text.3"},PrimaryAnalysis,{"Letter"},"NewColumn"),
#"Expanded NewColumn2" = Table.ExpandTableColumn(#"Merged Queries2", "NewColumn", {"Index"}, {"Index"}),
#"Renamed Columns4" = Table.RenameColumns(#"Expanded NewColumn2",{{"Index", "3rd"}}),
Now I have to do that for 23 columns. Is there a way to implement DO...Repeat or any other loop in Power Query to perform this task?
Thanks in advance.
One way to loop in Power Query is to use a recursive function.
In the code below I read an Excel file with a table that should be similar to your primary table (so the step #"Changed Type2" in the code below should be similar to your step #"Changed Type2").
Next a function AddIndices is defined in which 1 column with an Index is added in each iteration. After 23 iterations the function stops, otherwise it calls itself.
An important point of attention with such recursive functions is that it MUST include a Table.Buffer (see step "Expanded"), otherwise in each iteration the code tries to evaluate all former iterations again and gets stuck. Table.Buffer prevents this.
In the last step of the query, the function is invoked.
let
Source = Excel.Workbook(File.Contents("C:\Users\Marcel\Documents\Forum bijdragen\StackOverflow Power Query\Loop Computation in Power Query.xlsx"), null, true),
Tabel1_Table = Source{[Item="Tabel1",Kind="Table"]}[Data],
#"Changed Type2" = Table.TransformColumnTypes(Tabel1_Table,{{"Text.1", type text}, {"Text.2", type text}, {"Text.3", type text}, {"Text.4", type text}, {"Text.5", type text}, {"Text.6", type text}, {"Text.7", type text}, {"Text.8", type text}, {"Text.9", type text}, {"Text.10", type text}, {"Text.11", type text}, {"Text.12", type text}, {"Text.13", type text}, {"Text.14", type text}, {"Text.15", type text}, {"Text.16", type text}, {"Text.17", type text}, {"Text.18", type text}, {"Text.19", type text}, {"Text.20", type text}, {"Text.21", type text}, {"Text.22", type text}, {"Text.23", type text}}),
// Recursive function:
AddIndices = (TableSoFar as table, optional Iteration as number) as table =>
let
CurrentIteration = if Iteration = null then 1 else Iteration,
CurrentColumn = "Text."&Text.From(CurrentIteration),
NewIndexColumn = "Index."&Text.From(CurrentIteration),
MergedTable = Table.NestedJoin(TableSoFar,{CurrentColumn},PrimaryAnalysis,{"Letter"},"NewColumn"),
Expanded = Table.Buffer(Table.ExpandTableColumn(MergedTable, "NewColumn", {"Index"}, {NewIndexColumn})),
Result = if CurrentIteration = 23 then Expanded else #AddIndices(Expanded, CurrentIteration + 1)
in
Result,
// Call recursive function:
AddedIndices = AddIndices(#"Changed Type2")
in
AddedIndices