Not able to read csv, error: List index out of range - csv

It seems like my excel in the format of a csv-file is not properly read as a csv-file in jupyterlab. I have the same code as all of my classmates, but they do not get any error. Could there be something wrong in my excel-settings, or is there mainly a problem with my code?
** This is my code:**
Use_raw = pd.read_csv("U15_US_2021.csv", header=3, index_col=1, na_values='---').drop('Unnamed: 0', axis=1)
Use=Use_raw.iloc[0:17,0:15].fillna(0)
ValueAdded=Use_raw.iloc[18:22,0:15].fillna(0)
FinalDemand=Use_raw.iloc[0:17,16:21].fillna(0)
Supply_raw = pd.read_csv('S15_US_2021.csv', header=3, index_col=1,na_values='---')
Supply=Supply_raw.iloc[0:17,1:16].fillna(0)
The error:
IndexError Traceback (most recent call last)
Cell In[23], line 1
----> 1 Use_raw = pd.read_csv("U15_US_2021.csv", header=3, index_col=1, na_values='--- ').drop('Unnamed: 0', axis=1)
2 Use=Use_raw.iloc[0:17,0:15].fillna(0)
3 ValueAdded=Use_raw.iloc[18:22,0:15].fillna(0)
File ~\anaconda3\envs\IO\lib\site-packages\pandas\util\_decorators.py:211, in deprecate_kwarg. <locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
File ~\anaconda3\envs\IO\lib\site-packages\pandas\util\_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
325 if len(args) > num_allow_args:
326 warnings.warn(
327 msg.format(arguments=_format_argument_list(allow_args)),
328 FutureWarning,
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
File ~\anaconda3\envs\IO\lib\site-packages\pandas\io\parsers\readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
935 kwds_defaults = _refine_defaults_read(
936 dialect,
937 delimiter,
(...)
946 defaults={"delimiter": ","},
947 )
948 kwds.update(kwds_defaults)
--> 950 return _read(filepath_or_buffer, kwds)
File ~\anaconda3\envs\IO\lib\site-packages\pandas\io\parsers\readers.py:605, in _read(filepath_or_buffer, kwds)
602 _validate_names(kwds.get("names", None))
604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
607 if chunksize or iterator:
608 return parser
File ~\anaconda3\envs\IO\lib\site-packages\pandas\io\parsers\readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds)
1439 self.options["has_index_names"] = kwds["has_index_names"]
1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)
File ~\anaconda3\envs\IO\lib\site-packages\pandas\io\parsers\readers.py:1753, in TextFileReader._make_engine(self, f, engine)
1750 raise ValueError(msg)
1752 try:
-> 1753 return mapping[engine](f, **self.options)
1754 except Exception:
1755 if self.handles is not None:
File ~\anaconda3\envs\IO\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py:174, in CParserWrapper.__init__(self, src, **kwds)
164 if self._reader.leading_cols == 0 and is_index_col(
165 self.index_col # type: ignore[has-type]
166 ):
168 self._name_processed = True
169 (
170 index_names,
171 # error: Cannot determine type of 'names'
172 self.names, # type: ignore[has-type]
173 self.index_col,
--> 174 ) = self._clean_index_names(
175 # error: Cannot determine type of 'names'
176 self.names, # type: ignore[has-type]
177 # error: Cannot determine type of 'index_col'
178 self.index_col, # type: ignore[has-type]
179 )
181 if self.index_names is None:
182 self.index_names = index_names
File ~\anaconda3\envs\IO\lib\site-packages\pandas\io\parsers\base_parser.py:999, in ParserBase._clean_index_names(self, columns, index_col)
997 break
998 else:
--> 999 name = cp_cols[c]
1000 columns.remove(name)
1001 index_names.append(name)
IndexError: list index out of range

Related

ValueError while using CTGAN library generating the sample

I am using CTGAN library on colab notebook. I have passed on a tabular dataset, with one categorical feature. I have mentioned the categorical feature as given in dcumentation. The model training is also complete without error. I am getting ValueError, while generating a simulated data.
How to resolve this error
Adding a reproducible code below
import pandas as pd
import numpy as np
import seaborn as sns
from ctgan import CTGAN
iris = sns.load_dataset('iris')
iris.head()
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(iris['species'].unique())
iris['species'] = pd.DataFrame(le.transform(iris['species']))
data = iris.copy()
ctgan_model = CTGAN(epochs=2,batch_size=50,verbose = True)
ctgan_model.fit(data)
n_ctgan_generated_data = 2000
synthetic_data = ctgan.sample(n_ctgan_generated_data)
Complete error message
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-17-199b6dc04389> in <module>
1 n_ctgan_generated_data = 2000
----> 2 synthetic_data = ctgan.sample(n_ctgan_generated_data)
6 frames
/usr/local/lib/python3.8/dist-packages/ctgan/synthesizers/base.py in wrapper(self, *args, **kwargs)
48 def wrapper(self, *args, **kwargs):
49 if self.random_states is None:
---> 50 return function(self, *args, **kwargs)
51
52 else:
/usr/local/lib/python3.8/dist-packages/ctgan/synthesizers/ctgan.py in sample(self, n, condition_column, condition_value)
475 data = data[:n]
476
--> 477 return self._transformer.inverse_transform(data)
478
479 def set_device(self, device):
/usr/local/lib/python3.8/dist-packages/ctgan/data_transformer.py in inverse_transform(self, data, sigmas)
211 column_data = data[:, st:st + dim]
212 if column_transform_info.column_type == 'continuous':
--> 213 recovered_column_data = self._inverse_transform_continuous(
214 column_transform_info, column_data, sigmas, st)
215 else:
/usr/local/lib/python3.8/dist-packages/ctgan/data_transformer.py in _inverse_transform_continuous(self, column_transform_info, column_data, sigmas, st)
185 def _inverse_transform_continuous(self, column_transform_info, column_data, sigmas, st):
186 gm = column_transform_info.transform
--> 187 data = pd.DataFrame(column_data[:, :2], columns=list(gm.get_output_sdtypes()))
188 data.iloc[:, 1] = np.argmax(column_data[:, 1:], axis=1)
189 if sigmas is not None:
/usr/local/lib/python3.8/dist-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
670 )
671 else:
--> 672 mgr = ndarray_to_mgr(
673 data,
674 index,
/usr/local/lib/python3.8/dist-packages/pandas/core/internals/construction.py in ndarray_to_mgr(values, index, columns, dtype, copy, typ)
322 )
323
--> 324 _check_values_indices_shape_match(values, index, columns)
325
326 if typ == "array":
/usr/local/lib/python3.8/dist-packages/pandas/core/internals/construction.py in _check_values_indices_shape_match(values, index, columns)
391 passed = values.shape
392 implied = (len(index), len(columns)-1)
--> 393 raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}")
394
395
ValueError: Shape of passed values is (2000, 2), indices imply (2000, 3)
Is this issue from the library, where I have to change any source code?

Error when using factor_analyzer. Empty data passed with indices specified

Getting the following error when using factor_analyzer.
I was able to get the example from here to work. https://www.datacamp.com/community/tutorials/introduction-factor-analysis
The main difference I see is their values were all numeric. Mine are string.
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
chi_square_value,p_value=calculate_bartlett_sphericity(df)
chi_square_value, p_value
(-0.0, 0.0)
from factor_analyzer.factor_analyzer import calculate_kmo
kmo_all,kmo_model=calculate_kmo(df)
ValueError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes)
1664
-> 1665 mgr = BlockManager(blocks, axes)
1666 mgr._consolidate_inplace()
~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in __init__(self, blocks, axes, do_integrity_check)
148 if do_integrity_check:
--> 149 self._verify_integrity()
150
~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in _verify_integrity(self)
325 if block.shape[1:] != mgr_shape[1:]:
--> 326 raise construction_error(tot_items, block.shape[1:], self.axes)
327 if len(self.items) != tot_items:
ValueError: Empty data passed with indices specified.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-31-af263734b3bc> in <module>
1 from factor_analyzer.factor_analyzer import calculate_kmo
----> 2 kmo_all,kmo_model=calculate_kmo(df)
~\anaconda3\lib\site-packages\factor_analyzer\factor_analyzer.py in calculate_kmo(data)
143 corr_sum = corr.sum(0)
144 kmo_per_item = corr_sum / (corr_sum + partial_corr_sum)
--> 145 kmo_per_item = pd.DataFrame(kmo_per_item,
146 index=data.columns,
147 columns=['KMO'])
~\anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
495 mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
496 else:
--> 497 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
498
499 # For data is list-like, or Iterable (will consume into list)
~\anaconda3\lib\site-packages\pandas\core\internals\construction.py in init_ndarray(values, index, columns, dtype, copy)
232 block_values = [values]
233
--> 234 return create_block_manager_from_blocks(block_values, [columns, index])
235
236
~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes)
1670 blocks = [getattr(b, "values", b) for b in blocks]
1671 tot_items = sum(b.shape[0] for b in blocks)
-> 1672 raise construction_error(tot_items, blocks[0].shape[1:], axes, e)
1673
1674
ValueError: Empty data passed with indices specified.

Cuda Out of Memory Error even after reducing the batch size

I'm trying to train a deep learning model in Azure Notebook which uses GPU from the DSVM-Ubuntu 18.04 which consists of Standard NC6 (6 vcpus, 56 GiB memory) and is getting the following Error:
RuntimeError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 0; 11.17 GiB total capacity; 10.76 GiB already allocated; 50.31 MiB free; 10.84 GiB reserved in total by PyTorch)
I've searched on this regard and couldn't find any solution in any of the questions on the web. And '10.84 GiB reserved in total by PyTorch' in the error message caught my attention, whether this can be configured to have a low memory value? I would like to receive any opinions in this regard. thank you.
This is my Code for Fine-tuning/training
for epoch in range(EPOCHS):
for idx,article in tqdm_notebook(enumerate(article_loader)):
article_tens = torch.tensor(tokenizer.encode(article[0], max_length=1024)).unsqueeze(0).to(device)
outputs = model(article_tens, labels=article_tens)
train_loss, prediction_scores = outputs[:2]
train_loss.backward()
train_sum_loss = train_sum_loss + train_loss.detach().data
iteration_count=idx
article_count = article_count + 1
if article_count == BATCH_SIZE:
article_count = 0
batch_count += 1
optimizer.step()
scheduler.step()
optimizer.zero_grad()
model.zero_grad()
Whole Stack-trace of the error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-11-2c74a22e42f7> in <module>
20 article_tens = torch.tensor(tokenizer.encode(article[0], max_length=1024)).unsqueeze(0).to(device)
21
---> 22 outputs = model(article_tens, labels=article_tens)
23
24 train_loss, prediction_scores = outputs[:2]
/anaconda/envs/py37_pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/anaconda/envs/py37_pytorch/lib/python3.7/site-packages/transformers/modeling_gpt2.py in forward(self, input_ids, past, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, use_cache)
602 head_mask=head_mask,
603 inputs_embeds=inputs_embeds,
--> 604 use_cache=use_cache,
605 )
606 hidden_states = transformer_outputs[0]
/anaconda/envs/py37_pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/anaconda/envs/py37_pytorch/lib/python3.7/site-packages/transformers/modeling_gpt2.py in forward(self, input_ids, past, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, use_cache)
486 attention_mask=attention_mask,
487 head_mask=head_mask[i],
--> 488 use_cache=use_cache,
489 )
490
/anaconda/envs/py37_pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/anaconda/envs/py37_pytorch/lib/python3.7/site-packages/transformers/modeling_gpt2.py in forward(self, x, layer_past, attention_mask, head_mask, use_cache)
240
241 x = x + a
--> 242 m = self.mlp(self.ln_2(x))
243 x = x + m
244
/anaconda/envs/py37_pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/anaconda/envs/py37_pytorch/lib/python3.7/site-packages/transformers/modeling_gpt2.py in forward(self, x)
215
216 def forward(self, x):
--> 217 h = self.act(self.c_fc(x))
218 h2 = self.c_proj(h)
219 return self.dropout(h2)
/anaconda/envs/py37_pytorch/lib/python3.7/site-packages/transformers/activations.py in gelu_new(x)
27 Also see https://arxiv.org/abs/1606.08415
28 """
---> 29 return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))
30
31
RuntimeError: CUDA out of memory. Tried to allocate 16.00 MiB (GPU 0; 11.17 GiB total capacity; 10.74 GiB already allocated; 320.00 KiB free; 10.89 GiB reserved in total by PyTorch)

json decode issue backtesting with zipline

I'm running the test code below on ubuntu server. I'm trying to back test a simple buy and hold strategy with zipline. I'm just trying to make sure I've got everything installed correctly to back test. I'm pretty new to zipline. I'm getting the error below and not sure why.
I'm following the steps in this blog post:
https://towardsdatascience.com/introduction-to-backtesting-trading-strategies-7afae611a35e
I'm running the code in a jupyter notebook.
when I run the code before to verify the bundle has been ingested this is what it shows:
code:
!zipline bundles
output:
apple-prices-2017-2019 2020-06-06 20:33:05.356288
apple-prices-2017-2019 2020-06-05 06:33:39.834841
apple-prices-2017-2019 2020-06-05 06:29:53.091904
apple-prices-2017-2019 2020-06-05 06:26:56.583051
csvdir <no ingestions>
quandl 2020-06-06 20:25:58.737940
quandl 2020-06-06 20:18:01.977412
quantopian-quandl 2020-06-04 16:15:57.717373
quantopian-quandl 2020-05-29 05:59:54.967114
the code I'm running in the jupyter notebook is below, along with the error. does anyone see what the issue is, and can you suggest how to fix?
code:
%%zipline --start 2016-1-1 --end 2017-12-31 --capital-base 1050.0 -o buy_and_hold.pkl
# imports
from zipline.api import order, symbol, record
# parameters
selected_stock = 'AAPL'
n_stocks_to_buy = 10
def initialize(context):
context.has_ordered = False
def handle_data(context, data):
# record price for further inspection
record(price=data.current(symbol(selected_stock), 'price'))
# trading logic
if not context.has_ordered:
# placing order, negative number for sale/short
order(symbol(selected_stock), n_stocks_to_buy)
# setting up a flag for holding a position
context.has_ordered = True
error:
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
<ipython-input-10-d11030c42e8b> in <module>()
----> 1 get_ipython().run_cell_magic('zipline', '--start 2016-1-1 --end 2017-12-31 --capital-base 1050.0 -o buy_and_hold.pkl', "\n# imports\nfrom zipline.api import order, symbol, record\n\n# parameters\nselected_stock = 'AAPL'\nn_stocks_to_buy = 10\n\ndef initialize(context):\n context.has_ordered = False \n\ndef handle_data(context, data):\n # record price for further inspection\n record(price=data.current(symbol(selected_stock), 'price'))\n \n # trading logic\n if not context.has_ordered:\n # placing order, negative number for sale/short\n order(symbol(selected_stock), n_stocks_to_buy)\n # setting up a flag for holding a position\n context.has_ordered = True")
~/anaconda3/envs/py36/lib/python3.5/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2165 magic_arg_s = self.var_expand(line, stack_depth)
2166 with self.builtin_trap:
-> 2167 result = fn(magic_arg_s, cell)
2168 return result
2169
~/anaconda3/envs/py36/lib/python3.5/site-packages/zipline/__main__.py in zipline_magic(line, cell)
309 '%s%%zipline' % ((cell or '') and '%'),
310 # don't use system exit and propogate errors to the caller
--> 311 standalone_mode=False,
312 )
313 except SystemExit as e:
~/anaconda3/envs/py36/lib/python3.5/site-packages/click/core.py in main(self, args, prog_name, complete_var, standalone_mode, **extra)
780 try:
781 with self.make_context(prog_name, args, **extra) as ctx:
--> 782 rv = self.invoke(ctx)
783 if not standalone_mode:
784 return rv
~/anaconda3/envs/py36/lib/python3.5/site-packages/click/core.py in invoke(self, ctx)
1064 _maybe_show_deprecated_notice(self)
1065 if self.callback is not None:
-> 1066 return ctx.invoke(self.callback, **ctx.params)
1067
1068
~/anaconda3/envs/py36/lib/python3.5/site-packages/click/core.py in invoke(*args, **kwargs)
608 with augment_usage_errors(self):
609 with ctx:
--> 610 return callback(*args, **kwargs)
611
612 def forward(*args, **kwargs): # noqa: B902
~/anaconda3/envs/py36/lib/python3.5/site-packages/click/decorators.py in new_func(*args, **kwargs)
19
20 def new_func(*args, **kwargs):
---> 21 return f(get_current_context(), *args, **kwargs)
22
23 return update_wrapper(new_func, f)
~/anaconda3/envs/py36/lib/python3.5/site-packages/zipline/__main__.py in run(ctx, algofile, algotext, define, data_frequency, capital_base, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, blotter)
274 local_namespace=local_namespace,
275 environ=os.environ,
--> 276 blotter=blotter,
277 )
278
~/anaconda3/envs/py36/lib/python3.5/site-packages/zipline/utils/run_algo.py in _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, data, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, environ, blotter)
157 trading_calendar=trading_calendar,
158 trading_day=trading_calendar.day,
--> 159 trading_days=trading_calendar.schedule[start:end].index,
160 )
161 first_trading_day =\
~/anaconda3/envs/py36/lib/python3.5/site-packages/zipline/finance/trading.py in __init__(self, load, bm_symbol, exchange_tz, trading_calendar, trading_day, trading_days, asset_db_path, future_chain_predicates, environ)
101 trading_day,
102 trading_days,
--> 103 self.bm_symbol,
104 )
105
~/anaconda3/envs/py36/lib/python3.5/site-packages/zipline/data/loader.py in load_market_data(trading_day, trading_days, bm_symbol, environ)
147 # date so that we can compute returns for the first date.
148 trading_day,
--> 149 environ,
150 )
151 tc = ensure_treasury_data(
~/anaconda3/envs/py36/lib/python3.5/site-packages/zipline/data/loader.py in ensure_benchmark_data(symbol, first_date, last_date, now, trading_day, environ)
214
215 try:
--> 216 data = get_benchmark_returns(symbol)
217 data.to_csv(get_data_filepath(filename, environ))
218 except (OSError, IOError, HTTPError):
~/anaconda3/envs/py36/lib/python3.5/site-packages/zipline/data/benchmarks.py in get_benchmark_returns(symbol)
33 'https://api.iextrading.com/1.0/stock/{}/chart/5y'.format(symbol)
34 )
---> 35 data = r.json()
36
37 df = pd.DataFrame(data)
~/anaconda3/envs/py36/lib/python3.5/site-packages/requests/models.py in json(self, **kwargs)
894 # used.
895 pass
--> 896 return complexjson.loads(self.text, **kwargs)
897
898 #property
~/anaconda3/envs/py36/lib/python3.5/json/__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
317 parse_int is None and parse_float is None and
318 parse_constant is None and object_pairs_hook is None and not kw):
--> 319 return _default_decoder.decode(s)
320 if cls is None:
321 cls = JSONDecoder
~/anaconda3/envs/py36/lib/python3.5/json/decoder.py in decode(self, s, _w)
337
338 """
--> 339 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
340 end = _w(s, end).end()
341 if end != len(s):
~/anaconda3/envs/py36/lib/python3.5/json/decoder.py in raw_decode(self, s, idx)
355 obj, end = self.scan_once(s, idx)
356 except StopIteration as err:
--> 357 raise JSONDecodeError("Expecting value", s, err.value) from None
358 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
this post solve my problem. only needed the first two steps listed in it. you need to update the benchmarks.py and loader.py files in zipline/data
Getting JSONDecodeError: Expecting value: line 1 column 1 (char 0) with Python + Zipline + Docker + Jupyter

Trying to transfer panda dataframe to mySQL dataset

I was using panda to analyze the JSON file: https://data.cityofnewyork.us/api/views/kpav-sd4t/rows.json?accessType=DOWNLOAD
Everything is going well until I arrive at the end to transfer my information from panda to SQL.
I put:
df.to_sql('table', con, chunksize=20000)
but it results in
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/usr/local/lib/python3.5/dist-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
1399 else:
-> 1400 cur.execute(*args)
1401 return cur
/usr/lib/python3/dist-packages/MySQLdb/cursors.py in execute(self, query, args)
209 query = query.decode(db.unicode_literal.charset)
--> 210 query = query % args
211
TypeError: not all arguments converted during string formatting
During handling of the above exception, another exception occurred:
DatabaseError Traceback (most recent call last)
<ipython-input-25-a485028ed4c0> in <module>()
----> 1 df.to_sql('table', con, chunksize=20000)
/usr/local/lib/python3.5/dist-packages/pandas/core/generic.py in to_sql(self, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
1199 sql.to_sql(self, name, con, flavor=flavor, schema=schema,
1200 if_exists=if_exists, index=index, index_label=index_label,
-> 1201 chunksize=chunksize, dtype=dtype)
1202
1203 def to_pickle(self, path):
/usr/local/lib/python3.5/dist-packages/pandas/io/sql.py in to_sql(frame, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
468 pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
469 index_label=index_label, schema=schema,
--> 470 chunksize=chunksize, dtype=dtype)
471
472
/usr/local/lib/python3.5/dist-packages/pandas/io/sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
1499 if_exists=if_exists, index_label=index_label,
1500 dtype=dtype)
-> 1501 table.create()
1502 table.insert(chunksize)
1503
/usr/local/lib/python3.5/dist-packages/pandas/io/sql.py in create(self)
581
582 def create(self):
--> 583 if self.exists():
584 if self.if_exists == 'fail':
585 raise ValueError("Table '%s' already exists." % self.name)
/usr/local/lib/python3.5/dist-packages/pandas/io/sql.py in exists(self)
569
570 def exists(self):
--> 571 return self.pd_sql.has_table(self.name, self.schema)
572
573 def sql_schema(self):
/usr/local/lib/python3.5/dist-packages/pandas/io/sql.py in has_table(self, name, schema)
1511 "WHERE type='table' AND name=%s;") % wld
1512
-> 1513 return len(self.execute(query, [name, ]).fetchall()) > 0
1514
1515 def get_table(self, table_name, schema=None):
/usr/local/lib/python3.5/dist-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
1410 ex = DatabaseError(
1411 "Execution failed on sql '%s': %s" % (args[0], exc))
-> 1412 raise_with_traceback(ex)
1413
1414 #staticmethod
/usr/local/lib/python3.5/dist-packages/pandas/compat/__init__.py in raise_with_traceback(exc, traceback)
337 if traceback == Ellipsis:
338 _, _, traceback = sys.exc_info()
--> 339 raise exc.with_traceback(traceback)
340 else:
341 # this version of raise is a syntax error in Python 3
/usr/local/lib/python3.5/dist-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
1398 cur.execute(*args, **kwargs)
1399 else:
-> 1400 cur.execute(*args)
1401 return cur
1402 except Exception as exc:
/usr/lib/python3/dist-packages/MySQLdb/cursors.py in execute(self, query, args)
208 if not PY2 and isinstance(query, bytes):
209 query = query.decode(db.unicode_literal.charset)
--> 210 query = query % args
211
212 if isinstance(query, unicode):
DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting
I connected to my server using
con = mdb.connect(host = 'localhost',
user = 'root',
passwd = 'dwdstudent2015',
charset = 'utf8', use_unicode=True)
and
engine = con
I dont understand why it doesn't work
I have seen other examples but they don't translate
The con parameter of DataFrame.to_sql can be either a SQLAlchemy engine or
an sqlite connection.
If you are using MySQL (and the MySQLdb Python adapter), then you must connect
to it using an SQLAlchemy engine:
import sqlalchemy as SA
engine = SA.create_engine('mysql+mysqldb://{u}:{p}#{h}/{d}'.format(
u=USER, p=PASSWORD, h=HOST, d=DATABASE'))
df.to_sql('table', engine, chunksize=20000)
Notice that the error says
DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting
This SQL statement references sqlite_master because Pandas is assuming the
connection is to an sqlite database. Pandas will generate sqlite-centric SQL if
passed a connection. It will use SQLAlchemy to generate the SQL if passed an
SQLAlchemy engine.