ProgrammingError in inserting blob object in MySql using Python - exception

I have a MySQL database and a table with the schema
tweet_id BIGINT
tweet_metadata LONGBLOB
I am trying to insert a row into my database as follows :
import MySQLdb as mysql
host = 'localhost'
user = 'root'
passwd = '************'
db = 'twitter'
insert_tweet_query = ''' INSERT INTO tweets(tweet_id, tweet_metadata) VALUES(%s, %s)'''
''' Creates a MySQL connection and returns the cursor '''
def create_connection():
connection = mysql.connect(host, user, passwd, db,use_unicode=True)
connection.set_character_set('utf8')
cursor = connection.cursor()
cursor.execute('SET NAMES utf8;')
cursor.execute('SET CHARACTER SET utf8;')
cursor.execute('SET character_set_connection=utf8;')
return connection, cursor
''' Close the connection '''
def close_connection(cursor, connection):
cursor.close()
connection.commit()
connection.close()
connection, cursor = create_connection()
tweet = dict({u'contributors': None, u'truncated': False, u'text': u'RT #HMV_Anime: \u7530\u6751\u3086\u304b\u308a\u59eb\u30d9\u30b9\u30c8\u30a2\u30eb\u30d0\u30e0\u300cEverlasting Gift\u300d\u98db\u3076\u3088\u3046\u306b\u58f2\u308c\u3066\u3044\u307e\u3059\uff01\u6728\u66dc\u306f\u6a2a\u30a2\u30ea\u516c\u6f14\uff01\u300c\u30d1\u30fc\u30c6\u30a3\u30fc\u306f\u7d42\u308f\u3089\u306a\u3044\u300d\u306e\u30e9\u30c3\u30d7\u30d1\u30fc\u30c8\u306e\u4e88\u7fd2\u5fa9\u7fd2\u306b\u3082\u5fc5\u9808\u3067\u3059\uff01 http://t.co/SVWm2E1r http://t.co/rSP ...', u'in_reply_to_status_id': None, u'id': 258550064480387072L, u'source': u'ShootingStar', u'retweeted': False, u'coordinates': None, u'entities': {u'user_mentions': [{u'indices': [3, 13], u'id': 147791077, u'id_str': u'147791077', u'screen_name': u'HMV_Anime', u'name': u'HMV\u30a2\u30cb\u30e1\uff01'}], u'hashtags': [], u'urls': [{u'indices': [100, 120], u'url': u'http://t.co/SVWm2E1r', u'expanded_url': u'http://ow.ly/evEvT', u'display_url': u'ow.ly/evEvT'}, {u'indices': [121, 136], u'url': u'http://t.co/rSP', u'expanded_url': u'http://t.co/rSP', u'display_url': u't.co/rSP'}]}, u'in_reply_to_screen_name': None, u'in_reply_to_user_id': None, u'retweet_count': 40, u'id_str': u'258550064480387072', u'favorited': False, u'retweeted_status': {u'contributors': None, u'truncated': False, u'text': u'\u7530\u6751\u3086\u304b\u308a\u59eb\u30d9\u30b9\u30c8\u30a2\u30eb\u30d0\u30e0\u300cEverlasting Gift\u300d\u98db\u3076\u3088\u3046\u306b\u58f2\u308c\u3066\u3044\u307e\u3059\uff01\u6728\u66dc\u306f\u6a2a\u30a2\u30ea\u516c\u6f14\uff01\u300c\u30d1\u30fc\u30c6\u30a3\u30fc\u306f\u7d42\u308f\u3089\u306a\u3044\u300d\u306e\u30e9\u30c3\u30d7\u30d1\u30fc\u30c8\u306e\u4e88\u7fd2\u5fa9\u7fd2\u306b\u3082\u5fc5\u9808\u3067\u3059\uff01 http://t.co/SVWm2E1r http://t.co/rSPYm0bE #yukarin', u'in_reply_to_status_id': None, u'id': 258160273171574784L, u'source': u'HootSuite', u'retweeted': False, u'coordinates': None, u'entities': {u'user_mentions': [], u'hashtags': [{u'indices': [127, 135], u'text': u'yukarin'}], u'urls': [{u'indices': [85, 105], u'url': u'http://t.co/SVWm2E1r', u'expanded_url': u'http://ow.ly/evEvT', u'display_url': u'ow.ly/evEvT'}, {u'indices': [106, 126], u'url': u'http://t.co/rSPYm0bE', u'expanded_url': u'http://twitpic.com/awuzz0', u'display_url': u'twitpic.com/awuzz0'}]}, u'in_reply_to_screen_name': None, u'in_reply_to_user_id': None, u'retweet_count': 40, u'id_str': u'258160273171574784', u'favorited': False, u'user': {u'follow_request_sent': None, u'profile_use_background_image': True, u'id': 147791077, u'verified': False, u'profile_image_url_https': u'https://si0.twimg.com/profile_images/2573283223/mn4nu924bnxh643sgu1p_normal.jpeg', u'profile_sidebar_fill_color': u'DDEEF6', u'geo_enabled': False, u'profile_text_color': u'333333', u'followers_count': 17108, u'profile_sidebar_border_color': u'C0DEED', u'location': u'\u4e03\u68ee\u4e2d\u5b66\u6821', u'default_profile_image': False, u'listed_count': 1012, u'utc_offset': 32400, u'statuses_count': 33277, u'description': u'\u79c1\u3001\u8d64\u5ea7\u3042\u304b\u308a\u3002\u3069\u3053\u306b\u3067\u3082\u3044\u308b\u3054\u304f\u666e\u901a\u306e\u4e2d\u5b66\u751f\u3002\u305d\u3093\u306a\u79c1\u3060\u3051\u3069\u3001\u6bce\u65e5\u3068\u3063\u3066\u3082\u5145\u5b9f\u3057\u3066\u308b\u306e\u3002\u3060\u3063\u3066\u3042\u304b\u308a\u306f\u2026\u2026 \u3060\u3063\u3066\u3042\u304b\u308a\u306f\u2026\u2026\u3000\uff08\u203b\u3053\u3061\u3089\u306f#HMV_Japan\u306e\u59c9\u59b9\u30a2\u30ab\u30a6\u30f3\u30c8\u3067\u3059\u3002\u3054\u8cea\u554f\u30fb\u304a\u554f\u3044\u5408\u308f\u305b\u306f\u3001HMV\u30b5\u30a4\u30c8\u4e0a\u306e\u5c02\u7528\u30d5\u30a9\u30fc\u30e0\u3088\u308a\u304a\u9858\u3044\u81f4\u3057\u307e\u3059\u3002\uff09', u'friends_count': 17046, u'profile_link_color': u'0084B4', u'profile_image_url': u'http://a0.twimg.com/profile_images/2573283223/mn4nu924bnxh643sgu1p_normal.jpeg', u'following': None, u'profile_background_image_url_https': u'https://si0.twimg.com/profile_background_images/104844943/bg_hmv2.gif', u'profile_background_color': u'202020', u'id_str': u'147791077', u'profile_background_image_url': u'http://a0.twimg.com/profile_background_images/104844943/bg_hmv2.gif', u'name': u'HMV\u30a2\u30cb\u30e1\uff01', u'lang': u'ja', u'profile_background_tile': False, u'favourites_count': 0, u'screen_name': u'HMV_Anime', u'notifications': None, u'url': u'http://www.hmv.co.jp/anime/', u'created_at': u'Tue May 25 02:07:35 +0000 2010', u'contributors_enabled': False, u'time_zone': u'Tokyo', u'protected': False, u'default_profile': False, u'is_translator': False}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'created_at': u'Tue Oct 16 10:59:40 +0000 2012', u'possibly_sensitive_editable': True, u'in_reply_to_status_id_str': None, u'place': None}, u'user': {u'follow_request_sent': None, u'profile_use_background_image': True, u'id': 500471418, u'verified': False, u'profile_image_url_https': u'https://si0.twimg.com/profile_images/2722246932/b71d269b9e1e16f59698b4f7fa23a0fe_normal.jpeg', u'profile_sidebar_fill_color': u'DDEEF6', u'geo_enabled': False, u'profile_text_color': u'333333', u'followers_count': 2241, u'profile_sidebar_border_color': u'C0DEED', u'location': u'\u3072\u3060\u307e\u308a\u8358204\u53f7\u5ba4', u'default_profile_image': False, u'listed_count': 41, u'utc_offset': 32400, u'statuses_count': 18879, u'description': u'\u611f\u3058\u308d\u2026\u2026\u3002 \u2514(\u2510L \u309c\u03c9\u3002)\u2518\u305d\u3057\u3066\uff71\uff8d\u9854\uff80\uff9e\uff8c\uff9e\uff99\uff8b\uff9f\uff70\uff7d\u3060 \u270c( \u055e\u0a0a \u055e)\u270c \u2026\u2026\uff01 \u3051\u3044\u304a\u3093\u3001\u307e\u3069\u30de\u30ae\u3001AB\u3001\u3089\u304d\u2606\u3059\u305f\u3001\u3086\u308b\u3086\u308a\u3001\u30df\u30eb\u30ad\u30a3\u3068\u304b\u306e\u30a2\u30cb\u30e1\u3001\u6771\u65b9\u3001\u30dc\u30ab\u30ed\u597d\u304d\u3060\u3088\u2517(^\u03c9^ )\u251b\u30c7\u30c7\u30f3\uff01 \u30d5\u30a9\u30ed\u30d0\u306f\u3059\u308b\u304b\u3089\u5f85\u3063\u3068\u3044\u3066 \u53ef\u6190\u3061\u3083\u3093\u540c\u76dfNo.9 \u308c\u3044\u3080\u540c\u76dfNo.4 \u898f\u5236\u57a2\u2192#SpeedPer_2', u'friends_count': 2038, u'profile_link_color': u'0084B4', u'profile_image_url': u'http://a0.twimg.com/profile_images/2722246932/b71d269b9e1e16f59698b4f7fa23a0fe_normal.jpeg', u'following': None, u'profile_background_image_url_https': u'https://si0.twimg.com/profile_background_images/600710368/ff2z5gv4s83u313432hj.jpeg', u'profile_background_color': u'C0DEED', u'id_str': u'500471418', u'profile_background_image_url': u'http://a0.twimg.com/profile_background_images/600710368/ff2z5gv4s83u313432hj.jpeg', u'name': u'\u3055\u30fc\u3057\u3083\u3059#\u30cf\u30cb\u30ab\u30e0\u30ac\u30c1\u52e2', u'lang': u'ja', u'profile_background_tile': True, u'favourites_count': 3066, u'screen_name': u'SpeedPer', u'notifications': None, u'url': u'https://mobile.twitter.com/account', u'created_at': u'Thu Feb 23 05:10:57 +0000 2012', u'contributors_enabled': False, u'time_zone': u'Irkutsk', u'protected': False, u'default_profile': False, u'is_translator': False}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'created_at': u'Wed Oct 17 12:48:33 +0000 2012', u'possibly_sensitive_editable': True, u'in_reply_to_status_id_str': None, u'place': None})
cursor.execute(insert_tweet_query, (tweet['id_str'], tweet))
close_connection(cursor, connection)
However, despite setting appropriate 'UTF-8' encodings I get an exception as follows
_mysql_exceptions.ProgrammingError: (1064, 'You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \': \'NULL\', u\'truncated\': \'0\', u\'text\': "\'RT #HMV_Anime: \\xe7\\x94\\xb0\\xe6\\x9d\\x91\\\' at line 1')
What am I doing wrong?

you could try with repr:
cursor.execute(insert_tweet_query, (tweet['id_str'], repr(tweet)))

Related

Trying to convert a weirdly nested or structured json dict into a csv or xlsx file

Hi I have this json dict that i would like to simply reproduce in a xlsx or csv file (whatever is easier) but it's just so weirdly structured I have no idea how to format it. This is a snipped of it, it's very long and continues in the same structure:
{'status': {'timestamp': '2022-10-03T11:45:57.639Z', 'error_code': 0, 'error_message': None, 'elapsed': 122, 'credit_count': 25, 'notice': None, 'total_count': 9466}, 'data': [{'id': 1, 'name': 'Bitcoin', 'symbol': 'BTC', 'slug': 'bitcoin', 'num_market_pairs': 9758, 'date_added': '2013-04-28T00:00:00.000Z', 'tags': ['mineable', 'pow', 'sha-256', 'store-of-value', 'state-channel', 'coinbase-ventures-portfolio', 'three-arrows-capital-portfolio', 'polychain-capital-portfolio', 'binance-labs-portfolio', 'blockchain-capital-portfolio', 'boostvc-portfolio', 'cms-holdings-portfolio', 'dcg-portfolio', 'dragonfly-capital-portfolio', 'electric-capital-portfolio', 'fabric-ventures-portfolio', 'framework-ventures-portfolio', 'galaxy-digital-portfolio', 'huobi-capital-portfolio', 'alameda-research-portfolio', 'a16z-portfolio', '1confirmation-portfolio', 'winklevoss-capital-portfolio', 'usv-portfolio', 'placeholder-ventures-portfolio', 'pantera-capital-portfolio', 'multicoin-capital-portfolio', 'paradigm-portfolio'], 'max_supply': 21000000, 'circulating_supply': 19167806, 'total_supply': 19167806, 'platform': None, 'cmc_rank': 1, 'self_reported_circulating_supply': None, 'self_reported_market_cap': None, 'tvl_ratio': None, 'last_updated': '2022-10-03T11:43:00.000Z', 'quote': {'USD': {'price': 19225.658331409155, 'volume_24h': 24499551567.663418, 'volume_change_24h': 31.8917, 'percent_change_1h': 0.17357826, 'percent_change_24h': 0.07206242, 'percent_change_7d': 1.89824678, 'percent_change_30d': -3.09210177, 'percent_change_60d': -16.08415351, 'percent_change_90d': -2.52728996, 'market_cap': 368513689118.7344, 'market_cap_dominance': 39.6701, 'fully_diluted_market_cap': 403738824959.59, 'tvl': None, 'last_updated': '2022-10-03T11:43:00.000Z'}}}, {'id': 1027, 'name': 'Ethereum', 'symbol': 'ETH', 'slug': 'ethereum', 'num_market_pairs': 6121, 'date_added': '2015-08-07T00:00:00.000Z', 'tags': ['pos', 'smart-contracts', 'ethereum-ecosystem', 'coinbase-ventures-portfolio', 'three-arrows-capital-portfolio', 'polychain-capital-portfolio', 'binance-labs-portfolio', 'blockchain-capital-portfolio', 'boostvc-portfolio', 'cms-holdings-portfolio', 'dcg-portfolio', 'dragonfly-capital-portfolio', 'electric-capital-portfolio', 'fabric-ventures-portfolio', 'framework-ventures-portfolio', 'hashkey-capital-portfolio', 'kenetic-capital-portfolio', 'huobi-capital-portfolio', 'alameda-research-portfolio', 'a16z-portfolio', '1confirmation-portfolio', 'winklevoss-capital-portfolio', 'usv-portfolio', 'placeholder-ventures-portfolio', 'pantera-capital-portfolio', 'multicoin-capital-portfolio', 'paradigm-portfolio', 'injective-ecosystem'], 'max_supply': None, 'circulating_supply': 122632957.499, 'total_supply': 122632957.499, 'platform': None, 'cmc_rank': 2, 'self_reported_circulating_supply': None, 'self_reported_market_cap': None, 'tvl_ratio': None, 'last_updated': '2022-10-03T11:43:00.000Z', 'quote': {'USD': {'price': 1296.4468710090778, 'volume_24h': 8517497687.565527, 'volume_change_24h': 23.596, 'percent_change_1h': 0.1720414, 'percent_change_24h': -0.21259957, 'percent_change_7d': 0.14320028, 'percent_change_30d': -16.39161383, 'percent_change_60d': -19.95869375, 'percent_change_90d': 15.00727432, 'market_cap': 158987114032.16776, 'market_cap_dominance': 17.1131, 'fully_diluted_market_cap': 158987114032.17, 'tvl': None, 'last_updated': '2022-10-03T11:43:00.000Z'}}}, {'id': 825, 'name': 'Tether', 'symbol': 'USDT', 'slug': 'tether', 'num_market_pairs': 40432, 'date_added': '2015-02-25T00:00:00.000Z', 'tags': ['payments', 'stablecoin', 'asset-backed-stablecoin', 'avalanche-ecosystem', 'solana-ecosystem', 'arbitrum-ecosytem', 'moonriver-ecosystem', 'injective-ecosystem', 'bnb-chain', 'usd-stablecoin'], 'max_supply': None, 'circulating_supply': 67949424437.85899, 'total_supply': 70155449906.09953, 'platform': .....to be continued
This is all I have:
.....
data = json.loads(response.text)
df = pd.json_normalize(data)
path = "C:\\Users\\NIWE\\Desktop\\Python\\PLS.xlsx"
writer = pd.ExcelWriter(path, engine="xlsxwriter")
df.to_excel(writer)
writer.save()
#writer.close()

Python Dict parsing qustion

My code creates a dict file Sam10.txt:
My Code:
res=helpers.scan(es,query=search_param,index=index_name, size=10000,request_timeout = None,scroll= '10m')
x=0
#i=1
with open('Sam10.txt', 'w') as f:
for i in res:
x=x+1
my_dict = i['_source']
w = csv.DictWriter(f,my_dict.keys())
w.writerow(my_dict)
if x==3:
exit()
The output of the file looks like below: Showing just one line
"{'messageId': 'wssfx_20181012213423_8945_1_000011326', 'businessId': '2018081310342', 'batchId': 'wssfx_20181012213423_8945_1', 'sourceSystem': 'wssfx', 'secondarySourceSystem': None, 'sourceSystemCreationTimestamp': '2018-10-13T01:36:31.217Z', 'sentBy': 'wssfx', 'sentTo': 'SA', 'messageType': 'Trade', 'schemaVersion': '1.3', 'processing': 'EOD'}","{'tradeHeader': {'scotiaUPI': None, 'assetClass': 'NonCash-ForeignExchange-Forward', 'algoProductCategory': 'FX-Forex-SI', 'isInternalDeal': False, 'isTradingBook': True, 'tradeDate': '2018-08-13', 'entryDateTime': '2018-08-13T16:00:04.000Z', 'executionDateTime': {'millisTimestamp': '2018-08-13T16:00:04.000Z', 'nanoOfSecond': 0}, 'originalExecutionDateTime': None, 'tradeUpdateDateTime': '2018-08-13T16:00:04.000Z', 'tradeStatus': 'LIVE', 'tradeEvent': 'SnapShot', 'tradeSubEvent': None, 'sourceSystemProductId': {'sourceInternalId': 'FXF', 'sourceInternalIdType': None, 'sourceInstrumentName': 'FX FORWARD TRANSACTION', 'sourceInstrumentCategory': 'FX', 'sourceIsin': '', 'sourceCusip': None, 'sourceSedol1': None, 'sourceSedol2': None, 'primaryMarketId': None, 'primaryMarketIdType': None}, 'csaEligible': True, 'isPartOfPortfolioCompression': None, 'tradeIdentifiers': {'tradeId': {'id': '2018081310342', 'version': None}, 'previousTradeId': None, 'originatingTradeId': {'id': '2018081310342', 'version': None}, 'originatingOrderId': '', 'originatingParentOrderId': None, 'originatingQuoteId': None, 'originatingParentQuoteId': None, 'venueTransactionId': '', 'uniqueSwapId': '', 'uniqueTransactionId': None, 'tradePackageId': None, 'tradePackageSize': None, 'internalReverseTradeId': None, 'tradeName': None}, 'venueInfo': {'executionVenueType': None, 'executionPlatformId': '', 'exchangeCode': '', 'exchangeCodeType': None}, 'persons': {'tradeExecutorId': 'SML', 'traderId': None, 'traderName': None, 'traderLocation': None, 'salesPersonId': 'SML', 'salesPersonName': 'Colin Smolders', 'salesPersonLocation': None, 'algorithmId': None, 'algorithmName': None, 'algorithmLocation': None}, 'settlement': {'settlementType': 'Physical', 'isClearingEligible': False, 'isNetted': None, 'settlementDate': '2019-10-31'}, 'regulatory': {'isdaUPIv1': 'ForeignExchange:Forward', 'isdaUPIv2': None, 'isdaAssetClass': 'ForeignExchange', 'isdaBaseProductId': 'Forward', 'isdaSubProductId': None, 'isdaTransactionType': None, 'cfiCode': None, 'isDoddFrankUsPerson': None, 'isVolckerSpot': None, 'isEmirSpot': False, 'mifidTradingCapacity': None, 'mifidTradingCapacityEnum': 'DEAL', 'mifidInvestmentDecisionWithinFirm': 'SML', 'mifidExecutionWithinFirm': 'SML', 'isMifidRTO': None, 'isMifidPriceMaker': False, 'isHedgeTrade': False, 'isMifidAgencyTrade': False, 'isMifidSecuritiesFinancingTrans': False, 'isMifidCommodityDerivative': None, 'mifidTransparencyFlag': None, 'mifidWaiverIndicators': None, 'mifidOtcPostTradeIndicators': None, 'mifidInstrumentIdentificationType': None, 'mifidInstrumentIdentificationCode': '', 'isMifid2FinInstrument': None, 'mifidLastLqdtyInd': None, 'mifidBuySell': 'Buy', 'mifidQuantity': 1509000.0, 'mifidQuantityCurrency': 'USD', 'mifidPriceCurrency': 'CAD', 'mifidNotionalCurrencyAmount': 1509000.0, 'mifidOutstandingNotionalAmount': None, 'mifidNotionalCurrency': 'USD', 'mifidNotionalCurrency1': 'CAD', 'mifidNotionalCurrency2': 'USD', 'mifidOtherDetails': None, 'shortSellingIndicator': None}}, 'book': {'bookingPoint': None, 'bookId': 'NAFXFW', 'bookDescription': 'NORTH AMERICAN FORWARD', 'scotiaLegalEntityId': 'L3I9ZG2KFGXZ61BMYR72', 'transitNumber': '67496'}, 'parties': {'counterparty': {'partyId': 'TDBT', 'partyIdType': None, 'partyName': 'TD BANK TREASURY', 'partyLei': 'PT3QB789TSUIDF371261', 'cardsId': None, 'ccdId': None}, 'originalCounterparty': {'partyId': 'TDBT', 'partyIdType': None, 'partyName': 'TD BANK TREASURY', 'partyLei': 'PT3QB789TSUIDF371261', 'cardsId': None, 'ccdId': None}, 'client': None, 'cardsId': '086641', 'ccdId': '1000177', 'executingParty': {'partyId': 'GFXGL', 'partyIdType': None, 'partyName': 'GFXGL', 'partyLei': 'L3I9ZG2KFGXZ61BMYR72', 'cardsId': None, 'ccdId': None}, 'executingBroker': None, 'clearingParty': {'partyId': None, 'partyIdType': None, 'partyName': None, 'partyLei': None, 'cardsId': None, 'ccdId': None}, 'orderOriginatingParty': None, 'triPartyAgent': None}, 'costsAndCharges': None, 'clearingInstructions': None, 'sourceSystemSpecific': None, 'product': {'npvCurrency': 'USD', 'payCurrency': 'USD', 'payNotional': 1509000.0, 'payDiscountCurve': 'DK', 'receiveCurrency': 'CAD', 'receiveNotional': 1969607.16, 'receiveDiscountCurve': 'DK', 'paymentHolidayCenters': ['CAD', 'USD'], 'theoreticalModel': '', 'fxRate': {'currencyPair': 'USDCAD', 'quoteTimestamp': '2018-08-13T16:00:04.000Z', 'quoteBasis': 'ReceiveCurrencyPerPayCurrency', 'quoteValue': 1.30524}, 'isSpotTrade': False, 'isForwardStarting': None, 'calculatedTrueSpotDate': '2018-08-14', 'isPaySideNonDeliverable': None, 'payDeliveryCurrency': 'USD', 'payNdfReferenceIndex': None, 'payNdfResetOffset': None, 'payNdfFxReset': None, 'isReceiveSideNonDeliverable': None, 'receiveDeliveryCurrency': 'CAD', 'receiveNdfReferenceIndex': None, 'receiveNdfResetOffset': None, 'receiveNdfFxReset': None, 'ndfResetHolidayCenters': None, 'isTimeOptionForward': False, 'timeOptionSet': None, 'isPartOfFxSwap': False, 'fxSwapSet': None, 'events': {'effectiveDate': '2018-08-13', 'terminationDate': '2019-10-31', 'tenorBusinessPeriod': {'periodMultiplier': 301, 'period': 'd'}, 'contractBreaks': None}}}"
My question is.
How do I extract the values of the from this dict.
I keep getting the below error:
for lr in sam_dict['header']:
TypeError: 'generator' object is not subscriptable
or
TypeError: string indices must be integers
The final out should be a csv file.
lets take just 3 fields for example:
messageId,scotiaUPI,sourceInternalId
wssfx_20181012213423_8945_1_000011326,None,FXF
Any help is appreciated.
Regards,
Sam
Thanks for your responses.
I was able to parse it by.
reading the file using:
f=open('XXXX.txt','r')
lineread=f.readlines() # f.read() does not work for some reason
for i in lineread:
print(eval(i)) #just the eval() function
So the problem was solved by using:
f.readlines() instead of f.read()
and
eval()

convert api response to pandas

I'd like to convert API response into a pandas dataframe to make it easier to manipulate.
Below it's what I've tried so far:
import requests
import pandas as pd
URL = 'https://api.gleif.org/api/v1/lei-records?page[size]=10&page[number]=1&filter[entity.names]=*'
r = requests.get(URL, proxies=proxyDict)
x = r.json()
x
out:
{'meta': {'goldenCopy': {'publishDate': '2020-07-14T00:00:00Z'},
'pagination': {'currentPage': 1,
'perPage': 10,
'from': 1,
'to': 10,
'total': 1675786,
'lastPage': 167579}},
'links': {'first': 'https://api.gleif.org/api/v1/lei-records?filter%5Bentity.names%5D=%2A&page%5Bnumber%5D=1&page%5Bsize%5D=10',
'next': 'https://api.gleif.org/api/v1/lei-records?filter%5Bentity.names%5D=%2A&page%5Bnumber%5D=2&page%5Bsize%5D=10',
'last': 'https://api.gleif.org/api/v1/lei-records?filter%5Bentity.names%5D=%2A&page%5Bnumber%5D=167579&page%5Bsize%5D=10'},
'data': [{'type': 'lei-records',
'id': '254900RR9EUYHB7PI211',
'attributes': {'lei': '254900RR9EUYHB7PI211',
'entity': {'legalName': {'name': 'MedicLights Research Inc.',
'language': None},
'otherNames': [],
'transliteratedOtherNames': [],
'legalAddress': {'language': None,
'addressLines': ['300 Ranee Avenue'],
'addressNumber': None,
'addressNumberWithinBuilding': None,
'mailRouting': None,
'city': 'Toronto',
'region': 'CA-ON',
'country': 'CA',
'postalCode': 'M6A 1N8'},
'headquartersAddress': {'language': None,
'addressLines': ['76 Marble Arch Crescent'],
'addressNumber': None,
'addressNumberWithinBuilding': None,
'mailRouting': None,
'city': 'Toronto',
'region': 'CA-ON',
'country': 'CA',
'postalCode': 'M1R 1W9'},
'registeredAt': {'id': 'RA000079', 'other': None},
'registeredAs': '002185472',
'jurisdiction': 'CA-ON',
'category': None,
'legalForm': {'id': 'O90R', 'other': None},
'associatedEntity': {'lei': None, 'name': None},
'status': 'ACTIVE',
'expiration': {'date': None, 'reason': None},
'successorEntity': {'lei': None, 'name': None},
'otherAddresses': []},
'registration': {'initialRegistrationDate': '2020-07-13T21:09:50Z',
'lastUpdateDate': '2020-07-13T21:09:50Z',
'status': 'ISSUED',
'nextRenewalDate': '2021-07-13T21:09:50Z',
'managingLou': '5493001KJTIIGC8Y1R12',
'corroborationLevel': 'PARTIALLY_CORROBORATED',
'validatedAt': {'id': 'RA000079', 'other': None},
'validatedAs': '002185472'},
'bic': None},
'relationships': {'managing-lou': {'links': {'related': 'https://api.gleif.org/api/v1/lei-records/254900RR9EUYHB7PI211/managing-lou'}},
'lei-issuer': {'links': {'related': 'https://api.gleif.org/api/v1/lei-records/254900RR9EUYHB7PI211/lei-issuer'}},
'direct-parent': {'links': {'reporting-exception': 'https://api.gleif.org/api/v1/lei-records/254900RR9EUYHB7PI211/direct-parent-reporting-exception'}},
'ultimate-parent': {'links': {'reporting-exception': 'https://api.gleif.org/api/v1/lei-records/254900RR9EUYHB7PI211/ultimate-parent-reporting-exception'}}},
'links': {'self': 'https://api.gleif.org/api/v1/lei-records/254900RR9EUYHB7PI211'}},
{'type': 'lei-records',
'id': '254900F9XV2K6IR5TO93',
Then I tried to put it into pandas and gives me the following results
f = pd.DataFrame(x['data'])
f
type id attributes relationships links
0 lei-records 254900RR9EUYHB7PI211 {'lei': '254900RR9EUYHB7PI211', 'entity': {'le... {'managing-lou': {'links': {'related': 'https:... {'self': 'https://api.gleif.org/api/v1/lei-rec...
1 lei-records 254900F9XV2K6IR5TO93 {'lei': '254900F9XV2K6IR5TO93', 'entity': {'le... {'managing-lou': {'links': {'related': 'https:... {'self': 'https://api.gleif.org/api/v1/lei-rec...
2 lei-records 254900DIC0729LEXNL12 {'lei': '254900DIC0729LEXNL12', 'entity': {'le... {'managing-lou': {'links': {'related': 'https:... {'self': 'https://api.gleif.org/api/v1/lei-rec...
Which isn't the result expected. I even tried to read_json with below codes:
g = pd.read_json(x.text)
g
which gives me the error
AttributeError: 'dict' object has no attribute 'text'
the expected output should look like this:
lei entity.legalName.name entity.legalAddress.addressLines entity.legalAddress.city entity.legalAddress.postalcode status registration.status
254900RR9EUYHB7PI211 MedicLights Research Inc. 300 Ranee Avenue Toronto M6A 1N8 ACTIVE ISSUED
Thanks for anyone helping
Use json_normalize like:
pd.json_normalize(x['data'])
Here is another method to use the pandas to normalize the json file using pandas.io.json.json_normalize from pandas.io.json library.
How to normalize json correctly by Python Pandas

How to extract certain information from a string and create a json object in python

I made a get request to a website and parsed it using BS4 using 'Html.parser'. I want to extract the ID, size and availability from the string. I have parsed it down to this final string:
'{"id":706816278547,"parent_id":81935859731,"available":false,
"sku":"665570057894","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["S"],
"option1":"s","option2":"","option3":"","option4":""},
{"id":707316252691,"parent_id":81935859731,"available":true,
"sku":"665570057900","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["M"],
"option1":"m","option2":"","option3":"", "option4":""},
{"id":707316285459,"parent_id":81935859731,"available":true,
"sku":"665570057917","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["L"],
"option1":"l","option2":"","option3":"","option4":""},`
{"id":707316318227,"parent_id":81935859731,"available":true,`
"sku":"665570057924","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["XL"],
"option1":"xl","option2":"","option3":"","option4":""}'
I also tried using the split() method but I get lost and im unable to extract the needed information without creating a cluttered list and getting lost.
I tried using json.loads() so i could just extract the information needed by calling the key and value pairs but i get the following error
final_id =
'{"id":706816278547,"parent_id":81935859731,"available":false,
"sku":"665570057894","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["S"],
"option1":"s","option2":"","option3":"","option4":""},
{"id":707316252691,"parent_id":81935859731,"available":true,
"sku":"665570057900","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["M"],
"option1":"m","option2":"","option3":"", "option4":""},
{"id":707316285459,"parent_id":81935859731,"available":true,
"sku":"665570057917","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["L"],
"option1":"l","option2":"","option3":"","option4":""},`
{"id":707316318227,"parent_id":81935859731,"available":true,`
"sku":"665570057924","featured_image":null,"public_title":null,
"requires_shipping":true,"price":40000,"options":["XL"],
"option1":"xl","option2":"","option3":"","option4":""}'
find_id = json.loads(final_id)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/anaconda3/lib/python3.7/json/__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "/anaconda3/lib/python3.7/json/decoder.py", line 340, in decode
raise JSONDecodeError("Extra data", s, end)
json.decoder.JSONDecodeError: Extra data: line 1 column 233 (char 232)
I want to create a json object for each ID and Size and if that size is available or not.
Any help is welcomed. Thank you.
First thats not a valid json info
second, json.loads works for files, so a file containing this info will solve the issue because null in json equal None in python so json.load you can say translate a json file so python understand it, so
import json
with open('sof.json', 'r') as stackof:
final_id = json.load(stackof)
print(final_id)
will output
[{'id': 706816278547, 'parent_id': 81935859731, 'available': 'false', 'sku': '665570057894', 'featured_image': None, 'public_title': None, 'requires_shipping': True, 'price': 40000, 'options': ['S'], 'option1': 's', 'option2': '', 'option3': '', 'option4': ''}, {'id': 707316252691, 'parent_id': 81935859731, 'available': True, 'sku': '665570057900', 'featured_image': None, 'public_title': None, 'requires_shipping': True, 'price': 40000, 'options': ['M'], 'option1': 'm', 'option2': '', 'option3': '', 'option4': ''}, {'id': 707316285459, 'parent_id': 81935859731, 'available': True, 'sku': '665570057917', 'featured_image': None, 'public_title': None, 'requires_shipping': True, 'price': 40000, 'options': ['L'], 'option1': 'l', 'option2': '', 'option3': '', 'option4': ''}, {'id': 707316318227, 'parent_id': 81935859731, 'available': True, 'sku': '665570057924', 'featured_image': None, 'public_title': None, 'requires_shipping': True, 'price': 40000, 'options': ['XL'], 'option1': 'xl', 'option2': '', 'option3': '', 'option4': ''}]
i made all of them divided into array, so now if you print the first id you should write
print(final_id[0]['id'])
output:
706816278547
Tell me in the comments if that helped you,
btw click on >> sof.json to see sof.json

un-nest json columns in pandas

I have the code below which reads data in from a json file to a pandas dataframe. Some of the columns like "attributes" still wind up with dicts in them. I'd like them to be columns like "attributes.GoodForMeal.Dessert", similar to what the flatten function from r does.
Can anyone suggest a way to do this in python?
Code:
df_business = pd.read_json('dataset/business.json', lines=True)
print(df_business[1:3])
Data:
address attributes \
1 2824 Milton Rd {u'GoodForMeal': {u'dessert': False, u'latenig...
2 337 Danforth Avenue {u'BusinessParking': {u'garage': False, u'stre...
business_id categories \
1 mLwM-h2YhXl2NCgdS84_Bw [Food, Soul Food, Convenience Stores, Restaura...
2 v2WhjAB3PIBA8J8VxG3wEg [Food, Coffee & Tea]
city hours is_open \
1 Charlotte {u'Monday': u'10:00-22:00', u'Tuesday': u'10:0... 0
2 Toronto {u'Monday': u'10:00-19:00', u'Tuesday': u'10:0... 0
latitude longitude name neighborhood \
1 35.236870 -80.741976 South Florida Style Chicken & Ribs Eastland
2 43.677126 -79.353285 The Tea Emporium Riverdale
postal_code review_count stars state
1 28215 4 4.5 NC
2 M4K 1N7 7 4.5 ON
Update:
from pandas.io.json import json_normalize
print json_normalize('dataset/business.json')
Error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-12-bb0ce59acb26> in <module>()
1 from pandas.io.json import json_normalize
----> 2 print json_normalize('dataset/business.json')
/Users/anaconda/lib/python2.7/site-packages/pandas/io/json.pyc in json_normalize(data, record_path, meta, meta_prefix, record_prefix)
791
792 if record_path is None:
--> 793 if any([isinstance(x, dict) for x in compat.itervalues(data[0])]):
794 # naive normalization, this is idempotent for flat records
795 # and potentially will inflate the data considerably for
/Users/anaconda/lib/python2.7/site-packages/pandas/compat/__init__.pyc in itervalues(obj, **kw)
169
170 def itervalues(obj, **kw):
--> 171 return obj.itervalues(**kw)
172
173 next = lambda it : it.next()
AttributeError: 'str' object has no attribute 'itervalues'
Update2:
Code:
import json;
json_normalize(json.load('dataset/business.json'))
Error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-20-4fb4bf64efc6> in <module>()
1 import json;
----> 2 json_normalize(json.load('dataset/business.json'))
/Users/anaconda/lib/python2.7/json/__init__.pyc in load(fp, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
285
286 """
--> 287 return loads(fp.read(),
288 encoding=encoding, cls=cls, object_hook=object_hook,
289 parse_float=parse_float, parse_int=parse_int,
AttributeError: 'str' object has no attribute 'read'
Update3:
Code:
with open('dataset/business.json') as f:
df = json_normalize(json.load(f))
Error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-21-e3449614f320> in <module>()
1 with open('dataset/business.json') as f:
----> 2 df = json_normalize(json.load(f))
/Users/anaconda/lib/python2.7/json/__init__.pyc in load(fp, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
289 parse_float=parse_float, parse_int=parse_int,
290 parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
--> 291 **kw)
292
293
/Users/anaconda/lib/python2.7/json/__init__.pyc in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
337 parse_int is None and parse_float is None and
338 parse_constant is None and object_pairs_hook is None and not kw):
--> 339 return _default_decoder.decode(s)
340 if cls is None:
341 cls = JSONDecoder
/Users/anaconda/lib/python2.7/json/decoder.pyc in decode(self, s, _w)
365 end = _w(s, end).end()
366 if end != len(s):
--> 367 raise ValueError(errmsg("Extra data", s, end, len(s)))
368 return obj
369
ValueError: Extra data: line 2 column 1 - line 156640 column 1 (char 731 - 132272455)
Update4:
Code:
with open('dataset/business.json') as f:
reviews = f.read().strip().split("\n")
reviews = [json.loads(review) for review in reviews]
reviews[1:5]
Sample Data:
[{u'address': u'2824 Milton Rd',
u'attributes': {u'Ambience': {u'casual': False,
u'classy': False,
u'divey': False,
u'hipster': False,
u'intimate': False,
u'romantic': False,
u'touristy': False,
u'trendy': False,
u'upscale': False},
u'BusinessAcceptsCreditCards': False,
u'GoodForKids': True,
u'GoodForMeal': {u'breakfast': False,
u'brunch': False,
u'dessert': False,
u'dinner': False,
u'latenight': False,
u'lunch': False},
u'HasTV': False,
u'NoiseLevel': u'average',
u'OutdoorSeating': False,
u'RestaurantsAttire': u'casual',
u'RestaurantsDelivery': True,
u'RestaurantsGoodForGroups': True,
u'RestaurantsPriceRange2': 2,
u'RestaurantsReservations': False,
u'RestaurantsTakeOut': True},
u'business_id': u'mLwM-h2YhXl2NCgdS84_Bw',
u'categories': [u'Food',
u'Soul Food',
u'Convenience Stores',
u'Restaurants'],
u'city': u'Charlotte',
u'hours': {u'Friday': u'10:00-22:00',
u'Monday': u'10:00-22:00',
u'Saturday': u'10:00-22:00',
u'Sunday': u'10:00-22:00',
u'Thursday': u'10:00-22:00',
u'Tuesday': u'10:00-22:00',
u'Wednesday': u'10:00-22:00'},
u'is_open': 0,
u'latitude': 35.23687,
u'longitude': -80.7419759,
u'name': u'South Florida Style Chicken & Ribs',
u'neighborhood': u'Eastland',
u'postal_code': u'28215',
u'review_count': 4,
u'stars': 4.5,
u'state': u'NC'},
{u'address': u'337 Danforth Avenue',
u'attributes': {u'BikeParking': True,
u'BusinessAcceptsCreditCards': True,
u'BusinessParking': {u'garage': False,
u'lot': False,
u'street': True,
u'valet': False,
u'validated': False},
u'OutdoorSeating': False,
u'RestaurantsPriceRange2': 2,
u'WheelchairAccessible': True,
u'WiFi': u'no'},
u'business_id': u'v2WhjAB3PIBA8J8VxG3wEg',
u'categories': [u'Food', u'Coffee & Tea'],
u'city': u'Toronto',
u'hours': {u'Friday': u'10:00-19:00',
u'Monday': u'10:00-19:00',
u'Saturday': u'10:00-18:00',
u'Sunday': u'12:00-17:00',
u'Thursday': u'10:00-19:00',
u'Tuesday': u'10:00-19:00',
u'Wednesday': u'10:00-19:00'},
u'is_open': 0,
u'latitude': 43.6771258,
u'longitude': -79.3532848,
u'name': u'The Tea Emporium',
u'neighborhood': u'Riverdale',
u'postal_code': u'M4K 1N7',
u'review_count': 7,
u'stars': 4.5,
u'state': u'ON'},
{u'address': u'7702 E Doubletree Ranch Rd, Ste 300',
u'attributes': {},
u'business_id': u'CVtCbSB1zUcUWg-9TNGTuQ',
u'categories': [u'Professional Services', u'Matchmakers'],
u'city': u'Scottsdale',
u'hours': {u'Friday': u'9:00-17:00',
u'Monday': u'9:00-17:00',
u'Thursday': u'9:00-17:00',
u'Tuesday': u'9:00-17:00',
u'Wednesday': u'9:00-17:00'},
u'is_open': 1,
u'latitude': 33.5650816,
u'longitude': -111.9164003,
u'name': u'TRUmatch',
u'neighborhood': u'',
u'postal_code': u'85258',
u'review_count': 3,
u'stars': 3.0,
u'state': u'AZ'},
{u'address': u'4719 N 20Th St',
u'attributes': {u'Alcohol': u'none',
u'Ambience': {u'casual': False,
u'classy': False,
u'divey': False,
u'hipster': False,
u'intimate': False,
u'romantic': False,
u'touristy': False,
u'trendy': False,
u'upscale': False},
u'BikeParking': True,
u'BusinessAcceptsCreditCards': True,
u'BusinessParking': {u'garage': False,
u'lot': False,
u'street': False,
u'valet': False,
u'validated': False},
u'Caters': True,
u'GoodForKids': True,
u'GoodForMeal': {u'breakfast': False,
u'brunch': False,
u'dessert': False,
u'dinner': False,
u'latenight': False,
u'lunch': False},
u'HasTV': False,
u'NoiseLevel': u'quiet',
u'OutdoorSeating': False,
u'RestaurantsAttire': u'casual',
u'RestaurantsDelivery': False,
u'RestaurantsGoodForGroups': True,
u'RestaurantsPriceRange2': 1,
u'RestaurantsReservations': False,
u'RestaurantsTableService': False,
u'RestaurantsTakeOut': True,
u'WiFi': u'no'},
u'business_id': u'duHFBe87uNSXImQmvBh87Q',
u'categories': [u'Sandwiches', u'Restaurants'],
u'city': u'Phoenix',
u'hours': {},
u'is_open': 0,
u'latitude': 33.5059283,
u'longitude': -112.0388474,
u'name': u'Blimpie',
u'neighborhood': u'',
u'postal_code': u'85016',
u'review_count': 10,
u'stars': 4.5,
u'state': u'AZ'}]