My code creates a dict file Sam10.txt:
My Code:
res=helpers.scan(es,query=search_param,index=index_name, size=10000,request_timeout = None,scroll= '10m')
x=0
#i=1
with open('Sam10.txt', 'w') as f:
for i in res:
x=x+1
my_dict = i['_source']
w = csv.DictWriter(f,my_dict.keys())
w.writerow(my_dict)
if x==3:
exit()
The output of the file looks like below: Showing just one line
"{'messageId': 'wssfx_20181012213423_8945_1_000011326', 'businessId': '2018081310342', 'batchId': 'wssfx_20181012213423_8945_1', 'sourceSystem': 'wssfx', 'secondarySourceSystem': None, 'sourceSystemCreationTimestamp': '2018-10-13T01:36:31.217Z', 'sentBy': 'wssfx', 'sentTo': 'SA', 'messageType': 'Trade', 'schemaVersion': '1.3', 'processing': 'EOD'}","{'tradeHeader': {'scotiaUPI': None, 'assetClass': 'NonCash-ForeignExchange-Forward', 'algoProductCategory': 'FX-Forex-SI', 'isInternalDeal': False, 'isTradingBook': True, 'tradeDate': '2018-08-13', 'entryDateTime': '2018-08-13T16:00:04.000Z', 'executionDateTime': {'millisTimestamp': '2018-08-13T16:00:04.000Z', 'nanoOfSecond': 0}, 'originalExecutionDateTime': None, 'tradeUpdateDateTime': '2018-08-13T16:00:04.000Z', 'tradeStatus': 'LIVE', 'tradeEvent': 'SnapShot', 'tradeSubEvent': None, 'sourceSystemProductId': {'sourceInternalId': 'FXF', 'sourceInternalIdType': None, 'sourceInstrumentName': 'FX FORWARD TRANSACTION', 'sourceInstrumentCategory': 'FX', 'sourceIsin': '', 'sourceCusip': None, 'sourceSedol1': None, 'sourceSedol2': None, 'primaryMarketId': None, 'primaryMarketIdType': None}, 'csaEligible': True, 'isPartOfPortfolioCompression': None, 'tradeIdentifiers': {'tradeId': {'id': '2018081310342', 'version': None}, 'previousTradeId': None, 'originatingTradeId': {'id': '2018081310342', 'version': None}, 'originatingOrderId': '', 'originatingParentOrderId': None, 'originatingQuoteId': None, 'originatingParentQuoteId': None, 'venueTransactionId': '', 'uniqueSwapId': '', 'uniqueTransactionId': None, 'tradePackageId': None, 'tradePackageSize': None, 'internalReverseTradeId': None, 'tradeName': None}, 'venueInfo': {'executionVenueType': None, 'executionPlatformId': '', 'exchangeCode': '', 'exchangeCodeType': None}, 'persons': {'tradeExecutorId': 'SML', 'traderId': None, 'traderName': None, 'traderLocation': None, 'salesPersonId': 'SML', 'salesPersonName': 'Colin Smolders', 'salesPersonLocation': None, 'algorithmId': None, 'algorithmName': None, 'algorithmLocation': None}, 'settlement': {'settlementType': 'Physical', 'isClearingEligible': False, 'isNetted': None, 'settlementDate': '2019-10-31'}, 'regulatory': {'isdaUPIv1': 'ForeignExchange:Forward', 'isdaUPIv2': None, 'isdaAssetClass': 'ForeignExchange', 'isdaBaseProductId': 'Forward', 'isdaSubProductId': None, 'isdaTransactionType': None, 'cfiCode': None, 'isDoddFrankUsPerson': None, 'isVolckerSpot': None, 'isEmirSpot': False, 'mifidTradingCapacity': None, 'mifidTradingCapacityEnum': 'DEAL', 'mifidInvestmentDecisionWithinFirm': 'SML', 'mifidExecutionWithinFirm': 'SML', 'isMifidRTO': None, 'isMifidPriceMaker': False, 'isHedgeTrade': False, 'isMifidAgencyTrade': False, 'isMifidSecuritiesFinancingTrans': False, 'isMifidCommodityDerivative': None, 'mifidTransparencyFlag': None, 'mifidWaiverIndicators': None, 'mifidOtcPostTradeIndicators': None, 'mifidInstrumentIdentificationType': None, 'mifidInstrumentIdentificationCode': '', 'isMifid2FinInstrument': None, 'mifidLastLqdtyInd': None, 'mifidBuySell': 'Buy', 'mifidQuantity': 1509000.0, 'mifidQuantityCurrency': 'USD', 'mifidPriceCurrency': 'CAD', 'mifidNotionalCurrencyAmount': 1509000.0, 'mifidOutstandingNotionalAmount': None, 'mifidNotionalCurrency': 'USD', 'mifidNotionalCurrency1': 'CAD', 'mifidNotionalCurrency2': 'USD', 'mifidOtherDetails': None, 'shortSellingIndicator': None}}, 'book': {'bookingPoint': None, 'bookId': 'NAFXFW', 'bookDescription': 'NORTH AMERICAN FORWARD', 'scotiaLegalEntityId': 'L3I9ZG2KFGXZ61BMYR72', 'transitNumber': '67496'}, 'parties': {'counterparty': {'partyId': 'TDBT', 'partyIdType': None, 'partyName': 'TD BANK TREASURY', 'partyLei': 'PT3QB789TSUIDF371261', 'cardsId': None, 'ccdId': None}, 'originalCounterparty': {'partyId': 'TDBT', 'partyIdType': None, 'partyName': 'TD BANK TREASURY', 'partyLei': 'PT3QB789TSUIDF371261', 'cardsId': None, 'ccdId': None}, 'client': None, 'cardsId': '086641', 'ccdId': '1000177', 'executingParty': {'partyId': 'GFXGL', 'partyIdType': None, 'partyName': 'GFXGL', 'partyLei': 'L3I9ZG2KFGXZ61BMYR72', 'cardsId': None, 'ccdId': None}, 'executingBroker': None, 'clearingParty': {'partyId': None, 'partyIdType': None, 'partyName': None, 'partyLei': None, 'cardsId': None, 'ccdId': None}, 'orderOriginatingParty': None, 'triPartyAgent': None}, 'costsAndCharges': None, 'clearingInstructions': None, 'sourceSystemSpecific': None, 'product': {'npvCurrency': 'USD', 'payCurrency': 'USD', 'payNotional': 1509000.0, 'payDiscountCurve': 'DK', 'receiveCurrency': 'CAD', 'receiveNotional': 1969607.16, 'receiveDiscountCurve': 'DK', 'paymentHolidayCenters': ['CAD', 'USD'], 'theoreticalModel': '', 'fxRate': {'currencyPair': 'USDCAD', 'quoteTimestamp': '2018-08-13T16:00:04.000Z', 'quoteBasis': 'ReceiveCurrencyPerPayCurrency', 'quoteValue': 1.30524}, 'isSpotTrade': False, 'isForwardStarting': None, 'calculatedTrueSpotDate': '2018-08-14', 'isPaySideNonDeliverable': None, 'payDeliveryCurrency': 'USD', 'payNdfReferenceIndex': None, 'payNdfResetOffset': None, 'payNdfFxReset': None, 'isReceiveSideNonDeliverable': None, 'receiveDeliveryCurrency': 'CAD', 'receiveNdfReferenceIndex': None, 'receiveNdfResetOffset': None, 'receiveNdfFxReset': None, 'ndfResetHolidayCenters': None, 'isTimeOptionForward': False, 'timeOptionSet': None, 'isPartOfFxSwap': False, 'fxSwapSet': None, 'events': {'effectiveDate': '2018-08-13', 'terminationDate': '2019-10-31', 'tenorBusinessPeriod': {'periodMultiplier': 301, 'period': 'd'}, 'contractBreaks': None}}}"
My question is.
How do I extract the values of the from this dict.
I keep getting the below error:
for lr in sam_dict['header']:
TypeError: 'generator' object is not subscriptable
or
TypeError: string indices must be integers
The final out should be a csv file.
lets take just 3 fields for example:
messageId,scotiaUPI,sourceInternalId
wssfx_20181012213423_8945_1_000011326,None,FXF
Any help is appreciated.
Regards,
Sam
Thanks for your responses.
I was able to parse it by.
reading the file using:
f=open('XXXX.txt','r')
lineread=f.readlines() # f.read() does not work for some reason
for i in lineread:
print(eval(i)) #just the eval() function
So the problem was solved by using:
f.readlines() instead of f.read()
and
eval()
Related
Hi I have this json dict that i would like to simply reproduce in a xlsx or csv file (whatever is easier) but it's just so weirdly structured I have no idea how to format it. This is a snipped of it, it's very long and continues in the same structure:
{'status': {'timestamp': '2022-10-03T11:45:57.639Z', 'error_code': 0, 'error_message': None, 'elapsed': 122, 'credit_count': 25, 'notice': None, 'total_count': 9466}, 'data': [{'id': 1, 'name': 'Bitcoin', 'symbol': 'BTC', 'slug': 'bitcoin', 'num_market_pairs': 9758, 'date_added': '2013-04-28T00:00:00.000Z', 'tags': ['mineable', 'pow', 'sha-256', 'store-of-value', 'state-channel', 'coinbase-ventures-portfolio', 'three-arrows-capital-portfolio', 'polychain-capital-portfolio', 'binance-labs-portfolio', 'blockchain-capital-portfolio', 'boostvc-portfolio', 'cms-holdings-portfolio', 'dcg-portfolio', 'dragonfly-capital-portfolio', 'electric-capital-portfolio', 'fabric-ventures-portfolio', 'framework-ventures-portfolio', 'galaxy-digital-portfolio', 'huobi-capital-portfolio', 'alameda-research-portfolio', 'a16z-portfolio', '1confirmation-portfolio', 'winklevoss-capital-portfolio', 'usv-portfolio', 'placeholder-ventures-portfolio', 'pantera-capital-portfolio', 'multicoin-capital-portfolio', 'paradigm-portfolio'], 'max_supply': 21000000, 'circulating_supply': 19167806, 'total_supply': 19167806, 'platform': None, 'cmc_rank': 1, 'self_reported_circulating_supply': None, 'self_reported_market_cap': None, 'tvl_ratio': None, 'last_updated': '2022-10-03T11:43:00.000Z', 'quote': {'USD': {'price': 19225.658331409155, 'volume_24h': 24499551567.663418, 'volume_change_24h': 31.8917, 'percent_change_1h': 0.17357826, 'percent_change_24h': 0.07206242, 'percent_change_7d': 1.89824678, 'percent_change_30d': -3.09210177, 'percent_change_60d': -16.08415351, 'percent_change_90d': -2.52728996, 'market_cap': 368513689118.7344, 'market_cap_dominance': 39.6701, 'fully_diluted_market_cap': 403738824959.59, 'tvl': None, 'last_updated': '2022-10-03T11:43:00.000Z'}}}, {'id': 1027, 'name': 'Ethereum', 'symbol': 'ETH', 'slug': 'ethereum', 'num_market_pairs': 6121, 'date_added': '2015-08-07T00:00:00.000Z', 'tags': ['pos', 'smart-contracts', 'ethereum-ecosystem', 'coinbase-ventures-portfolio', 'three-arrows-capital-portfolio', 'polychain-capital-portfolio', 'binance-labs-portfolio', 'blockchain-capital-portfolio', 'boostvc-portfolio', 'cms-holdings-portfolio', 'dcg-portfolio', 'dragonfly-capital-portfolio', 'electric-capital-portfolio', 'fabric-ventures-portfolio', 'framework-ventures-portfolio', 'hashkey-capital-portfolio', 'kenetic-capital-portfolio', 'huobi-capital-portfolio', 'alameda-research-portfolio', 'a16z-portfolio', '1confirmation-portfolio', 'winklevoss-capital-portfolio', 'usv-portfolio', 'placeholder-ventures-portfolio', 'pantera-capital-portfolio', 'multicoin-capital-portfolio', 'paradigm-portfolio', 'injective-ecosystem'], 'max_supply': None, 'circulating_supply': 122632957.499, 'total_supply': 122632957.499, 'platform': None, 'cmc_rank': 2, 'self_reported_circulating_supply': None, 'self_reported_market_cap': None, 'tvl_ratio': None, 'last_updated': '2022-10-03T11:43:00.000Z', 'quote': {'USD': {'price': 1296.4468710090778, 'volume_24h': 8517497687.565527, 'volume_change_24h': 23.596, 'percent_change_1h': 0.1720414, 'percent_change_24h': -0.21259957, 'percent_change_7d': 0.14320028, 'percent_change_30d': -16.39161383, 'percent_change_60d': -19.95869375, 'percent_change_90d': 15.00727432, 'market_cap': 158987114032.16776, 'market_cap_dominance': 17.1131, 'fully_diluted_market_cap': 158987114032.17, 'tvl': None, 'last_updated': '2022-10-03T11:43:00.000Z'}}}, {'id': 825, 'name': 'Tether', 'symbol': 'USDT', 'slug': 'tether', 'num_market_pairs': 40432, 'date_added': '2015-02-25T00:00:00.000Z', 'tags': ['payments', 'stablecoin', 'asset-backed-stablecoin', 'avalanche-ecosystem', 'solana-ecosystem', 'arbitrum-ecosytem', 'moonriver-ecosystem', 'injective-ecosystem', 'bnb-chain', 'usd-stablecoin'], 'max_supply': None, 'circulating_supply': 67949424437.85899, 'total_supply': 70155449906.09953, 'platform': .....to be continued
This is all I have:
.....
data = json.loads(response.text)
df = pd.json_normalize(data)
path = "C:\\Users\\NIWE\\Desktop\\Python\\PLS.xlsx"
writer = pd.ExcelWriter(path, engine="xlsxwriter")
df.to_excel(writer)
writer.save()
#writer.close()
I am using the pyflightdata library to search for flight stats. It returns json inside a list of dicts.
Here is an example of the first dictionary in the list after my query:
> flightlog = {'identification': {'number': {'default': 'KE504', 'alternative': 'None'}, 'callsign': 'KAL504', 'codeshare': 'None'}
, 'status': {'live': False, 'text': 'Landed 22:29', 'estimated': 'None', 'ambiguous': False, 'generic': {'status': {'text': 'landed', 'type': 'arrival', 'color': 'green', 'diverted': 'None'}
, 'eventTime': {'utc_millis': 1604611778000, 'utc_date': '20201105', 'utc_time': '2229', 'utc': 1604611778, 'local_millis': 1604615378000, 'local_date': '20201105', 'local_time': '2329', 'local': 1604615378}}}
, 'aircraft': {'model': {'code': 'B77L', 'text': 'Boeing 777-FEZ'}, 'registration': 'HL8075', 'country': {'name': 'South Korea', 'alpha2': 'KR', 'alpha3': 'KOR'}}
, 'airline': {'name': 'Korean Air', 'code': {'iata': 'KE', 'icao': 'KAL'}}
, 'airport': {'origin': {'name': 'London Heathrow Airport', 'code': {'iata': 'LHR', 'icao': 'EGLL'}, 'position': {'latitude': 51.471626, 'longitude': -0.467081, 'country': {'name': 'United Kingdom', 'code': 'GB'}, 'region': {'city': 'London'}}
, 'timezone': {'name': 'Europe/London', 'offset': 0, 'abbr': 'GMT', 'abbrName': 'Greenwich Mean Time', 'isDst': False}}, 'destination': {'name': 'Paris Charles de Gaulle Airport', 'code': {'iata': 'CDG', 'icao': 'LFPG'}, 'position': {'latitude': 49.012516, 'longitude': 2.555752, 'country': {'name': 'France', 'code': 'FR'}, 'region': {'city': 'Paris'}}, 'timezone': {'name': 'Europe/Paris', 'offset': 3600, 'abbr': 'CET', 'abbrName': 'Central European Time', 'isDst': False}}, 'real': 'None'}
, 'time': {'scheduled': {'departure_millis': 1604607300000, 'departure_date': '20201105', 'departure_time': '2115', 'departure': 1604607300, 'arrival_millis': 1604612700000, 'arrival_date': '20201105', 'arrival_time': '2245', 'arrival': 1604612700}, 'real': {'departure_millis': 1604609079000, 'departure_date': '20201105', 'departure_time': '2144', 'departure': 1604609079, 'arrival_millis': 1604611778000, 'arrival_date': '20201105', 'arrival_time': '2229', 'arrival': 1604611778}, 'estimated': {'departure': 'None', 'arrival': 'None'}, 'other': {'eta_millis': 1604611778000, 'eta_date': '20201105', 'eta_time': '2229', 'eta': 1604611778}}}
This dictionary is a huge, multi-nested, json mess and I am struggling to find a way to make it readable. I guess something like this:
identification number default KE504
alternative None
callsign KAL504
codeshare None
status live False
text Landed 22:29
Estimated None
ambiguous False
...
I am trying to turn it into a pandas DataFrame, with mixed results.
In this post it was explained that MultiIndex values have to be tuples, not dictionaries, so I used their example to convert my dictionary:
> flightlog_tuple = {(outerKey, innerKey): values for outerKey, innerDict in flightlog.items() for innerKey, values in innerDict.items()}
Which worked, up to a certain point.
df2 = pd.Series(flightlog_tuple)
gives the following output:
identification number {'default': 'KE504', 'alternative': 'None'}
callsign KAL504
codeshare None
status live False
text Landed 22:29
estimated None
ambiguous False
generic {'status': {'text': 'landed', 'type': 'arrival...
aircraft model {'code': 'B77L', 'text': 'Boeing 777-FEZ'}
registration HL8075
country {'name': 'South Korea', 'alpha2': 'KR', 'alpha...
airline name Korean Air
code {'iata': 'KE', 'icao': 'KAL'}
airport origin {'name': 'London Heathrow Airport', 'code': {'...
destination {'name': 'Paris Charles de Gaulle Airport', 'c...
real None
time scheduled {'departure_millis': 1604607300000, 'departure...
real {'departure_millis': 1604609079000, 'departure...
estimated {'departure': 'None', 'arrival': 'None'}
other {'eta_millis': 1604611778000, 'eta_date': '202...
dtype: object
Kind of what I was going for but some of the indexes are still in the column with values because there are so many levels. So I followed this explanation and tried to add more levels:
level_up = {(level1Key, level2Key, level3Key): values for level1Key, level2Dict in flightlog.items() for level2Key, level3Dict in level2Dict.items() for level3Key, values in level3Dict.items()}
df2 = pd.Series(level_up)
This code gives me AttributeError: 'str' object has no attribute 'items'. I don't understand why the first 2 indexes worked, but the others give an error.
I've tried other methods like MultiIndex.from_tuple or DataFrame.from_dict, but I can't get it to work.
This Dictionary is too complex as a beginner. I don't know what the right approach is. Maybe I am using DataFrames in the wrong way. Maybe there is an easier way to access the data that I am overlooking.
Any help would be much appreciated!
I'd like to convert API response into a pandas dataframe to make it easier to manipulate.
Below it's what I've tried so far:
import requests
import pandas as pd
URL = 'https://api.gleif.org/api/v1/lei-records?page[size]=10&page[number]=1&filter[entity.names]=*'
r = requests.get(URL, proxies=proxyDict)
x = r.json()
x
out:
{'meta': {'goldenCopy': {'publishDate': '2020-07-14T00:00:00Z'},
'pagination': {'currentPage': 1,
'perPage': 10,
'from': 1,
'to': 10,
'total': 1675786,
'lastPage': 167579}},
'links': {'first': 'https://api.gleif.org/api/v1/lei-records?filter%5Bentity.names%5D=%2A&page%5Bnumber%5D=1&page%5Bsize%5D=10',
'next': 'https://api.gleif.org/api/v1/lei-records?filter%5Bentity.names%5D=%2A&page%5Bnumber%5D=2&page%5Bsize%5D=10',
'last': 'https://api.gleif.org/api/v1/lei-records?filter%5Bentity.names%5D=%2A&page%5Bnumber%5D=167579&page%5Bsize%5D=10'},
'data': [{'type': 'lei-records',
'id': '254900RR9EUYHB7PI211',
'attributes': {'lei': '254900RR9EUYHB7PI211',
'entity': {'legalName': {'name': 'MedicLights Research Inc.',
'language': None},
'otherNames': [],
'transliteratedOtherNames': [],
'legalAddress': {'language': None,
'addressLines': ['300 Ranee Avenue'],
'addressNumber': None,
'addressNumberWithinBuilding': None,
'mailRouting': None,
'city': 'Toronto',
'region': 'CA-ON',
'country': 'CA',
'postalCode': 'M6A 1N8'},
'headquartersAddress': {'language': None,
'addressLines': ['76 Marble Arch Crescent'],
'addressNumber': None,
'addressNumberWithinBuilding': None,
'mailRouting': None,
'city': 'Toronto',
'region': 'CA-ON',
'country': 'CA',
'postalCode': 'M1R 1W9'},
'registeredAt': {'id': 'RA000079', 'other': None},
'registeredAs': '002185472',
'jurisdiction': 'CA-ON',
'category': None,
'legalForm': {'id': 'O90R', 'other': None},
'associatedEntity': {'lei': None, 'name': None},
'status': 'ACTIVE',
'expiration': {'date': None, 'reason': None},
'successorEntity': {'lei': None, 'name': None},
'otherAddresses': []},
'registration': {'initialRegistrationDate': '2020-07-13T21:09:50Z',
'lastUpdateDate': '2020-07-13T21:09:50Z',
'status': 'ISSUED',
'nextRenewalDate': '2021-07-13T21:09:50Z',
'managingLou': '5493001KJTIIGC8Y1R12',
'corroborationLevel': 'PARTIALLY_CORROBORATED',
'validatedAt': {'id': 'RA000079', 'other': None},
'validatedAs': '002185472'},
'bic': None},
'relationships': {'managing-lou': {'links': {'related': 'https://api.gleif.org/api/v1/lei-records/254900RR9EUYHB7PI211/managing-lou'}},
'lei-issuer': {'links': {'related': 'https://api.gleif.org/api/v1/lei-records/254900RR9EUYHB7PI211/lei-issuer'}},
'direct-parent': {'links': {'reporting-exception': 'https://api.gleif.org/api/v1/lei-records/254900RR9EUYHB7PI211/direct-parent-reporting-exception'}},
'ultimate-parent': {'links': {'reporting-exception': 'https://api.gleif.org/api/v1/lei-records/254900RR9EUYHB7PI211/ultimate-parent-reporting-exception'}}},
'links': {'self': 'https://api.gleif.org/api/v1/lei-records/254900RR9EUYHB7PI211'}},
{'type': 'lei-records',
'id': '254900F9XV2K6IR5TO93',
Then I tried to put it into pandas and gives me the following results
f = pd.DataFrame(x['data'])
f
type id attributes relationships links
0 lei-records 254900RR9EUYHB7PI211 {'lei': '254900RR9EUYHB7PI211', 'entity': {'le... {'managing-lou': {'links': {'related': 'https:... {'self': 'https://api.gleif.org/api/v1/lei-rec...
1 lei-records 254900F9XV2K6IR5TO93 {'lei': '254900F9XV2K6IR5TO93', 'entity': {'le... {'managing-lou': {'links': {'related': 'https:... {'self': 'https://api.gleif.org/api/v1/lei-rec...
2 lei-records 254900DIC0729LEXNL12 {'lei': '254900DIC0729LEXNL12', 'entity': {'le... {'managing-lou': {'links': {'related': 'https:... {'self': 'https://api.gleif.org/api/v1/lei-rec...
Which isn't the result expected. I even tried to read_json with below codes:
g = pd.read_json(x.text)
g
which gives me the error
AttributeError: 'dict' object has no attribute 'text'
the expected output should look like this:
lei entity.legalName.name entity.legalAddress.addressLines entity.legalAddress.city entity.legalAddress.postalcode status registration.status
254900RR9EUYHB7PI211 MedicLights Research Inc. 300 Ranee Avenue Toronto M6A 1N8 ACTIVE ISSUED
Thanks for anyone helping
Use json_normalize like:
pd.json_normalize(x['data'])
Here is another method to use the pandas to normalize the json file using pandas.io.json.json_normalize from pandas.io.json library.
How to normalize json correctly by Python Pandas
I made a get request to a website and parsed it using BS4 using 'Html.parser'. I want to extract the ID, size and availability from the string. I have parsed it down to this final string:
'{"id":706816278547,"parent_id":81935859731,"available":false,
"sku":"665570057894","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["S"],
"option1":"s","option2":"","option3":"","option4":""},
{"id":707316252691,"parent_id":81935859731,"available":true,
"sku":"665570057900","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["M"],
"option1":"m","option2":"","option3":"", "option4":""},
{"id":707316285459,"parent_id":81935859731,"available":true,
"sku":"665570057917","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["L"],
"option1":"l","option2":"","option3":"","option4":""},`
{"id":707316318227,"parent_id":81935859731,"available":true,`
"sku":"665570057924","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["XL"],
"option1":"xl","option2":"","option3":"","option4":""}'
I also tried using the split() method but I get lost and im unable to extract the needed information without creating a cluttered list and getting lost.
I tried using json.loads() so i could just extract the information needed by calling the key and value pairs but i get the following error
final_id =
'{"id":706816278547,"parent_id":81935859731,"available":false,
"sku":"665570057894","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["S"],
"option1":"s","option2":"","option3":"","option4":""},
{"id":707316252691,"parent_id":81935859731,"available":true,
"sku":"665570057900","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["M"],
"option1":"m","option2":"","option3":"", "option4":""},
{"id":707316285459,"parent_id":81935859731,"available":true,
"sku":"665570057917","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["L"],
"option1":"l","option2":"","option3":"","option4":""},`
{"id":707316318227,"parent_id":81935859731,"available":true,`
"sku":"665570057924","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["XL"],
"option1":"xl","option2":"","option3":"","option4":""}'
find_id = json.loads(final_id)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/anaconda3/lib/python3.7/json/__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "/anaconda3/lib/python3.7/json/decoder.py", line 340, in decode
raise JSONDecodeError("Extra data", s, end)
json.decoder.JSONDecodeError: Extra data: line 1 column 233 (char 232)
I want to create a json object for each ID and Size and if that size is available or not.
Any help is welcomed. Thank you.
First thats not a valid json info
second, json.loads works for files, so a file containing this info will solve the issue because null in json equal None in python so json.load you can say translate a json file so python understand it, so
import json
with open('sof.json', 'r') as stackof:
final_id = json.load(stackof)
print(final_id)
will output
[{'id': 706816278547, 'parent_id': 81935859731, 'available': 'false', 'sku': '665570057894', 'featured_image': None, 'public_title': None, 'requires_shipping': True, 'price': 40000, 'options': ['S'], 'option1': 's', 'option2': '', 'option3': '', 'option4': ''}, {'id': 707316252691, 'parent_id': 81935859731, 'available': True, 'sku': '665570057900', 'featured_image': None, 'public_title': None, 'requires_shipping': True, 'price': 40000, 'options': ['M'], 'option1': 'm', 'option2': '', 'option3': '', 'option4': ''}, {'id': 707316285459, 'parent_id': 81935859731, 'available': True, 'sku': '665570057917', 'featured_image': None, 'public_title': None, 'requires_shipping': True, 'price': 40000, 'options': ['L'], 'option1': 'l', 'option2': '', 'option3': '', 'option4': ''}, {'id': 707316318227, 'parent_id': 81935859731, 'available': True, 'sku': '665570057924', 'featured_image': None, 'public_title': None, 'requires_shipping': True, 'price': 40000, 'options': ['XL'], 'option1': 'xl', 'option2': '', 'option3': '', 'option4': ''}]
i made all of them divided into array, so now if you print the first id you should write
print(final_id[0]['id'])
output:
706816278547
Tell me in the comments if that helped you,
btw click on >> sof.json to see sof.json
I have a MySQL database and a table with the schema
tweet_id BIGINT
tweet_metadata LONGBLOB
I am trying to insert a row into my database as follows :
import MySQLdb as mysql
host = 'localhost'
user = 'root'
passwd = '************'
db = 'twitter'
insert_tweet_query = ''' INSERT INTO tweets(tweet_id, tweet_metadata) VALUES(%s, %s)'''
''' Creates a MySQL connection and returns the cursor '''
def create_connection():
connection = mysql.connect(host, user, passwd, db,use_unicode=True)
connection.set_character_set('utf8')
cursor = connection.cursor()
cursor.execute('SET NAMES utf8;')
cursor.execute('SET CHARACTER SET utf8;')
cursor.execute('SET character_set_connection=utf8;')
return connection, cursor
''' Close the connection '''
def close_connection(cursor, connection):
cursor.close()
connection.commit()
connection.close()
connection, cursor = create_connection()
tweet = dict({u'contributors': None, u'truncated': False, u'text': u'RT #HMV_Anime: \u7530\u6751\u3086\u304b\u308a\u59eb\u30d9\u30b9\u30c8\u30a2\u30eb\u30d0\u30e0\u300cEverlasting Gift\u300d\u98db\u3076\u3088\u3046\u306b\u58f2\u308c\u3066\u3044\u307e\u3059\uff01\u6728\u66dc\u306f\u6a2a\u30a2\u30ea\u516c\u6f14\uff01\u300c\u30d1\u30fc\u30c6\u30a3\u30fc\u306f\u7d42\u308f\u3089\u306a\u3044\u300d\u306e\u30e9\u30c3\u30d7\u30d1\u30fc\u30c8\u306e\u4e88\u7fd2\u5fa9\u7fd2\u306b\u3082\u5fc5\u9808\u3067\u3059\uff01 http://t.co/SVWm2E1r http://t.co/rSP ...', u'in_reply_to_status_id': None, u'id': 258550064480387072L, u'source': u'ShootingStar', u'retweeted': False, u'coordinates': None, u'entities': {u'user_mentions': [{u'indices': [3, 13], u'id': 147791077, u'id_str': u'147791077', u'screen_name': u'HMV_Anime', u'name': u'HMV\u30a2\u30cb\u30e1\uff01'}], u'hashtags': [], u'urls': [{u'indices': [100, 120], u'url': u'http://t.co/SVWm2E1r', u'expanded_url': u'http://ow.ly/evEvT', u'display_url': u'ow.ly/evEvT'}, {u'indices': [121, 136], u'url': u'http://t.co/rSP', u'expanded_url': u'http://t.co/rSP', u'display_url': u't.co/rSP'}]}, u'in_reply_to_screen_name': None, u'in_reply_to_user_id': None, u'retweet_count': 40, u'id_str': u'258550064480387072', u'favorited': False, u'retweeted_status': {u'contributors': None, u'truncated': False, u'text': u'\u7530\u6751\u3086\u304b\u308a\u59eb\u30d9\u30b9\u30c8\u30a2\u30eb\u30d0\u30e0\u300cEverlasting Gift\u300d\u98db\u3076\u3088\u3046\u306b\u58f2\u308c\u3066\u3044\u307e\u3059\uff01\u6728\u66dc\u306f\u6a2a\u30a2\u30ea\u516c\u6f14\uff01\u300c\u30d1\u30fc\u30c6\u30a3\u30fc\u306f\u7d42\u308f\u3089\u306a\u3044\u300d\u306e\u30e9\u30c3\u30d7\u30d1\u30fc\u30c8\u306e\u4e88\u7fd2\u5fa9\u7fd2\u306b\u3082\u5fc5\u9808\u3067\u3059\uff01 http://t.co/SVWm2E1r http://t.co/rSPYm0bE #yukarin', u'in_reply_to_status_id': None, u'id': 258160273171574784L, u'source': u'HootSuite', u'retweeted': False, u'coordinates': None, u'entities': {u'user_mentions': [], u'hashtags': [{u'indices': [127, 135], u'text': u'yukarin'}], u'urls': [{u'indices': [85, 105], u'url': u'http://t.co/SVWm2E1r', u'expanded_url': u'http://ow.ly/evEvT', u'display_url': u'ow.ly/evEvT'}, {u'indices': [106, 126], u'url': u'http://t.co/rSPYm0bE', u'expanded_url': u'http://twitpic.com/awuzz0', u'display_url': u'twitpic.com/awuzz0'}]}, u'in_reply_to_screen_name': None, u'in_reply_to_user_id': None, u'retweet_count': 40, u'id_str': u'258160273171574784', u'favorited': False, u'user': {u'follow_request_sent': None, u'profile_use_background_image': True, u'id': 147791077, u'verified': False, u'profile_image_url_https': u'https://si0.twimg.com/profile_images/2573283223/mn4nu924bnxh643sgu1p_normal.jpeg', u'profile_sidebar_fill_color': u'DDEEF6', u'geo_enabled': False, u'profile_text_color': u'333333', u'followers_count': 17108, u'profile_sidebar_border_color': u'C0DEED', u'location': u'\u4e03\u68ee\u4e2d\u5b66\u6821', u'default_profile_image': False, u'listed_count': 1012, u'utc_offset': 32400, u'statuses_count': 33277, u'description': u'\u79c1\u3001\u8d64\u5ea7\u3042\u304b\u308a\u3002\u3069\u3053\u306b\u3067\u3082\u3044\u308b\u3054\u304f\u666e\u901a\u306e\u4e2d\u5b66\u751f\u3002\u305d\u3093\u306a\u79c1\u3060\u3051\u3069\u3001\u6bce\u65e5\u3068\u3063\u3066\u3082\u5145\u5b9f\u3057\u3066\u308b\u306e\u3002\u3060\u3063\u3066\u3042\u304b\u308a\u306f\u2026\u2026 \u3060\u3063\u3066\u3042\u304b\u308a\u306f\u2026\u2026\u3000\uff08\u203b\u3053\u3061\u3089\u306f#HMV_Japan\u306e\u59c9\u59b9\u30a2\u30ab\u30a6\u30f3\u30c8\u3067\u3059\u3002\u3054\u8cea\u554f\u30fb\u304a\u554f\u3044\u5408\u308f\u305b\u306f\u3001HMV\u30b5\u30a4\u30c8\u4e0a\u306e\u5c02\u7528\u30d5\u30a9\u30fc\u30e0\u3088\u308a\u304a\u9858\u3044\u81f4\u3057\u307e\u3059\u3002\uff09', u'friends_count': 17046, u'profile_link_color': u'0084B4', u'profile_image_url': u'http://a0.twimg.com/profile_images/2573283223/mn4nu924bnxh643sgu1p_normal.jpeg', u'following': None, u'profile_background_image_url_https': u'https://si0.twimg.com/profile_background_images/104844943/bg_hmv2.gif', u'profile_background_color': u'202020', u'id_str': u'147791077', u'profile_background_image_url': u'http://a0.twimg.com/profile_background_images/104844943/bg_hmv2.gif', u'name': u'HMV\u30a2\u30cb\u30e1\uff01', u'lang': u'ja', u'profile_background_tile': False, u'favourites_count': 0, u'screen_name': u'HMV_Anime', u'notifications': None, u'url': u'http://www.hmv.co.jp/anime/', u'created_at': u'Tue May 25 02:07:35 +0000 2010', u'contributors_enabled': False, u'time_zone': u'Tokyo', u'protected': False, u'default_profile': False, u'is_translator': False}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'created_at': u'Tue Oct 16 10:59:40 +0000 2012', u'possibly_sensitive_editable': True, u'in_reply_to_status_id_str': None, u'place': None}, u'user': {u'follow_request_sent': None, u'profile_use_background_image': True, u'id': 500471418, u'verified': False, u'profile_image_url_https': u'https://si0.twimg.com/profile_images/2722246932/b71d269b9e1e16f59698b4f7fa23a0fe_normal.jpeg', u'profile_sidebar_fill_color': u'DDEEF6', u'geo_enabled': False, u'profile_text_color': u'333333', u'followers_count': 2241, u'profile_sidebar_border_color': u'C0DEED', u'location': u'\u3072\u3060\u307e\u308a\u8358204\u53f7\u5ba4', u'default_profile_image': False, u'listed_count': 41, u'utc_offset': 32400, u'statuses_count': 18879, u'description': u'\u611f\u3058\u308d\u2026\u2026\u3002 \u2514(\u2510L \u309c\u03c9\u3002)\u2518\u305d\u3057\u3066\uff71\uff8d\u9854\uff80\uff9e\uff8c\uff9e\uff99\uff8b\uff9f\uff70\uff7d\u3060 \u270c( \u055e\u0a0a \u055e)\u270c \u2026\u2026\uff01 \u3051\u3044\u304a\u3093\u3001\u307e\u3069\u30de\u30ae\u3001AB\u3001\u3089\u304d\u2606\u3059\u305f\u3001\u3086\u308b\u3086\u308a\u3001\u30df\u30eb\u30ad\u30a3\u3068\u304b\u306e\u30a2\u30cb\u30e1\u3001\u6771\u65b9\u3001\u30dc\u30ab\u30ed\u597d\u304d\u3060\u3088\u2517(^\u03c9^ )\u251b\u30c7\u30c7\u30f3\uff01 \u30d5\u30a9\u30ed\u30d0\u306f\u3059\u308b\u304b\u3089\u5f85\u3063\u3068\u3044\u3066 \u53ef\u6190\u3061\u3083\u3093\u540c\u76dfNo.9 \u308c\u3044\u3080\u540c\u76dfNo.4 \u898f\u5236\u57a2\u2192#SpeedPer_2', u'friends_count': 2038, u'profile_link_color': u'0084B4', u'profile_image_url': u'http://a0.twimg.com/profile_images/2722246932/b71d269b9e1e16f59698b4f7fa23a0fe_normal.jpeg', u'following': None, u'profile_background_image_url_https': u'https://si0.twimg.com/profile_background_images/600710368/ff2z5gv4s83u313432hj.jpeg', u'profile_background_color': u'C0DEED', u'id_str': u'500471418', u'profile_background_image_url': u'http://a0.twimg.com/profile_background_images/600710368/ff2z5gv4s83u313432hj.jpeg', u'name': u'\u3055\u30fc\u3057\u3083\u3059#\u30cf\u30cb\u30ab\u30e0\u30ac\u30c1\u52e2', u'lang': u'ja', u'profile_background_tile': True, u'favourites_count': 3066, u'screen_name': u'SpeedPer', u'notifications': None, u'url': u'https://mobile.twitter.com/account', u'created_at': u'Thu Feb 23 05:10:57 +0000 2012', u'contributors_enabled': False, u'time_zone': u'Irkutsk', u'protected': False, u'default_profile': False, u'is_translator': False}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'created_at': u'Wed Oct 17 12:48:33 +0000 2012', u'possibly_sensitive_editable': True, u'in_reply_to_status_id_str': None, u'place': None})
cursor.execute(insert_tweet_query, (tweet['id_str'], tweet))
close_connection(cursor, connection)
However, despite setting appropriate 'UTF-8' encodings I get an exception as follows
_mysql_exceptions.ProgrammingError: (1064, 'You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \': \'NULL\', u\'truncated\': \'0\', u\'text\': "\'RT #HMV_Anime: \\xe7\\x94\\xb0\\xe6\\x9d\\x91\\\' at line 1')
What am I doing wrong?
you could try with repr:
cursor.execute(insert_tweet_query, (tweet['id_str'], repr(tweet)))