have no idea what the exception is here - csv

I am trying to read a csv file. using below command:
sample = pd.read_csv("C:/kushal/DataMining/hillary/out.txt" ,header = 0, delimiter = "\t")
Unfortunately it gives me some exception about which I have no idea what is causing it. Does anyone knows anything about this exception
sample["ExtractedBodyText"][1]
Traceback (most recent call last):
File "<pyshell#75>", line 1, in <module>
hillary["ExtractedBodyText"][1]
File "C:\Python34\lib\site-packages\pandas\core\frame.py", line 1914, in __getitem__
return self._getitem_column(key)
File "C:\Python34\lib\site-packages\pandas\core\frame.py", line 1921, in _getitem_column
return self._get_item_cache(key)
File "C:\Python34\lib\site-packages\pandas\core\generic.py", line 1090, in _get_item_cache
values = self._data.get(item)
File "C:\Python34\lib\site-packages\pandas\core\internals.py", line 3102, in get
loc = self.items.get_loc(item)
File "C:\Python34\lib\site-packages\pandas\core\index.py", line 1692, in get_loc
return self._engine.get_loc(_values_from_object(key))
File "pandas\index.pyx", line 137, in pandas.index.IndexEngine.get_loc (pandas\index.c:3979)
File "pandas\index.pyx", line 157, in pandas.index.IndexEngine.get_loc (pandas\index.c:3843)
File "pandas\hashtable.pyx", line 668, in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12265)
File "pandas\hashtable.pyx", line 676, in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12216)
KeyError: 'ExtractedBodyText'

Related

Document is empty ( lxml.etree.ParserError: Document is empty )

What could be the cause of this error?
I think this is due to the incomplete page loading of the relevant web page. is it right?
Traceback (most recent call last):
File "/home/ubuntu/.local/share/virtualenvs/Project-RDkr7CyY/lib/python3.7/site-packages/pyquery/pyquery.py", line 57, in fromstring
result = getattr(etree, meth)(context)
File "src/lxml/etree.pyx", line 3213, in lxml.etree.fromstring
File "src/lxml/parser.pxi", line 1877, in lxml.etree._parseMemoryDocument
File "src/lxml/parser.pxi", line 1765, in lxml.etree._parseDoc
File "src/lxml/parser.pxi", line 1127, in lxml.etree._BaseParser._parseDoc
File "src/lxml/parser.pxi", line 601, in lxml.etree._ParserContext._handleParseResultDoc
File "src/lxml/parser.pxi", line 711, in lxml.etree._handleParseResult
File "src/lxml/parser.pxi", line 640, in lxml.etree._raiseParseError
File "<string>", line 1
lxml.etree.XMLSyntaxError: Document is empty, line 1, column 1
Traceback (most recent call last):
File "/home/ubuntu/services/Project/src/parser.py", line 9, in __init__
self._parser = HTML(html=text)
File "/home/ubuntu/.local/share/virtualenvs/projects-RDkr7CyY/lib/python3.7/site-packages/requests_html.py", line 421, in __init__
element=PyQuery(html)('html') or PyQuery(f'<html>{html}</html>')('html'),
File "/home/ubuntu/.local/share/virtualenvs/projects-RDkr7CyY/lib/python3.7/site-packages/pyquery/pyquery.py", line 217, in __init__
elements = fromstring(context, self.parser)
File "/home/ubuntu/.local/share/virtualenvs/projects-RDkr7CyY/lib/python3.7/site-packages/pyquery/pyquery.py", line 61, in fromstring
result = getattr(lxml.html, meth)(context)
File "/home/ubuntu/.local/share/virtualenvs/projects-RDkr7CyY/lib/python3.7/site-packages/lxml/html/__init__.py", line 876, in fromstring
doc = document_fromstring(html, parser=parser, base_url=base_url, **kw)
File "/home/ubuntu/.local/share/virtualenvs/projects-RDkr7CyY/lib/python3.7/site-packages/lxml/html/__init__.py", line 765, in document_fromstring
"Document is empty")
lxml.etree.ParserError: Document is empty
For me this was due to leading or trailing spaces but I did not manage to reproduce. str.strip() fixed "Document is empty" error:
html = html.strip()
dom = fromstring(html)

'ner_ontonotes_bert_mult' model Custom train

When I want to train this 'ner_ontonotes_bert_mult' model with my custom dataset it is showing the error below. (I have saved my datset in the ~\.deeppavlov\downloads\ontonotes folder that was mentioned in [deeppavlov documentation][1]. )
PS C:\Users\sghanta\Desktop\NER> & c:/Users/sghanta/Desktop/NER/env/Scripts/Activate.ps1
(env) PS C:\Users\sghanta\Desktop\NER> & c:/Users/sghanta/Desktop/NER/env/Scripts/python.exe c:/Users/sghanta/Desktop/NER/train_model.py
C:\Users\sghanta\Desktop\NER\env\lib\site-packages\numpy\_distributor_init.py:32: UserWarning: loaded more than 1 DLL from .libs:
C:\Users\sghanta\Desktop\NER\env\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
C:\Users\sghanta\Desktop\NER\env\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll
stacklevel=1)
Traceback (most recent call last):
File "c:/Users/sghanta/Desktop/NER/train_model.py", line 12, in <module>
ner_model = train_model(configs.ner.ner_ontonotes_bert_mult)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\__init__.py", line 29, in train_model
train_evaluate_model_from_config(config, download=download, recursive=recursive)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\core\commands\train.py", line 92, in train_evaluate_model_from_config
data = read_data_by_config(config)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\core\commands\train.py", line 58, in read_data_by_config
return reader.read(data_path, **reader_config)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\dataset_readers\conll2003_reader.py", line 56, in read
dataset[name] = self.parse_ner_file(file_name)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\dataset_readers\conll2003_reader.py", line 106, in parse_ner_file
raise Exception(f"Input is not valid {line}")
Exception: Input is not valid
O
(env) PS C:\Users\sghanta\Desktop\NER>
After cleaning the dataset the above error has gone but this is the new error.
New Error
2021-08-12 02:43:35.335 ERROR in 'deeppavlov.core.common.params'['params'] at line 112: Exception in <class 'deeppavlov.models.bert.bert_sequence_tagger.BertSequenceTagger'>
Traceback (most recent call last):
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\client\session.py", line 1365, in _do_call
return fn(*args)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\client\session.py", line 1350, in _run_fn
target_list, run_metadata)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\client\session.py", line 1443, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Assign requires shapes of both tensors to match. lhs shape= [13,13] rhs shape= [37,37]
[[{{node save/Assign_76}}]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\training\saver.py", line 1290, in restore
{self.saver_def.filename_tensor_name: save_path})
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\client\session.py", line 956, in run
run_metadata_ptr)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\client\session.py", line 1180, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\client\session.py", line 1359, in _do_run
run_metadata)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\client\session.py", line 1384, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Assign requires shapes of both tensors to match. lhs shape= [13,13] rhs shape= [37,37]
[[node save/Assign_76 (defined at C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\framework\ops.py:1748) ]]
Original stack trace for 'save/Assign_76':
File "c:/Users/sghanta/Desktop/NER/train_model.py", line 12, in <module>
ner_model = train_model(configs.ner.ner_ontonotes_bert_mult)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\__init__.py", line 29, in train_model
train_evaluate_model_from_config(config, download=download, recursive=recursive)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\core\commands\train.py", line 121, in train_evaluate_model_from_config
trainer.train(iterator)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\core\trainers\nn_trainer.py", line 334, in train
self.fit_chainer(iterator)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\core\trainers\fit_trainer.py", line 104, in fit_chainer
component = from_params(component_config, mode='train')
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\core\common\params.py", line 106, in from_params
component = obj(**dict(config_params, **kwargs))
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\core\models\tf_backend.py", line 76, in __call__
obj.__init__(*args, **kwargs)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\core\models\tf_backend.py", line 28, in _wrapped
return func(*args, **kwargs)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\models\bert\bert_sequence_tagger.py", line 529, in __init__
**kwargs)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\models\bert\bert_sequence_tagger.py", line 259, in __init__
self.load()
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\core\models\tf_backend.py", line 28, in _wrapped
return func(*args, **kwargs)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\models\bert\bert_sequence_tagger.py", line 457, in load
return super().load(exclude_scopes=exclude_scopes, **kwargs)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\core\models\tf_model.py", line 251, in load
return super().load(exclude_scopes=exclude_scopes, **kwargs)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\deeppavlov\core\models\tf_model.py", line 54, in load
saver = tf.train.Saver(var_list)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\training\saver.py", line 828, in __init__
self.build()
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\training\saver.py", line 840, in build
self._build(self._filename, build_save=True, build_restore=True)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\training\saver.py", line 878, in _build
build_restore=build_restore)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\training\saver.py", line 508, in _build_internal
restore_sequentially, reshape)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\training\saver.py", line 350, in _AddRestoreOps
assign_ops.append(saveable.restore(saveable_tensors, shapes))
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\training\saving\saveable_object_util.py", line 73, in restore
self.op.get_shape().is_fully_defined())
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\ops\state_ops.py", line 227, in assign
validate_shape=validate_shape)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\ops\gen_state_ops.py", line 66, in assign
use_locking=use_locking, name=name)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\framework\op_def_library.py", line 794, in _apply_op_helper
op_def=op_def)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3357, in create_op
attrs, op_def, compute_device)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3426, in _create_op_internal
op_def=op_def)
File "C:\Users\sghanta\Desktop\NER\env\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1748, in __init__
self._traceback = tf_stack.extract_stack()
Can anyone explain how to solve it.
[1]: http://docs.deeppavlov.ai/en/master/features/models/ner.html
conll2003_reader dataset reader failed to parse the following line:
O
conll2003_reader dataset reader expects that line is either empty or contains a token and a label. In your case only label is present.
So, I would suggest to clean your data from empty lines with labels.
Sample text from DeepPavlov docs:
EU B-ORG
rejects O
the O
call O
of O
Germany B-LOC
to O
boycott O
lamb O
from O
Great B-LOC
Britain I-LOC
. O
China B-LOC

Run python errors.use kereas to implement CNN

I am learning Deep Learning and want to use python-kereas to implement CNN, but when I run in command, it looks like some errors.
This is my source code. https://github.com/lijhong/CNN-kereas.git
And my fault is like this:
Traceback (most recent call last):
File "/home/ah0818lijhong/CNN-kereas/cnn-kereas.py", line 167, in <module>
model.fit(x_train, y_train,epochs=3)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/keras/models.py", line 845, in fit
initial_epoch=initial_epoch)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/keras/engine/training.py", line 1485, in fit
initial_epoch=initial_epoch)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/keras/engine/training.py", line 1140, in _fit_loop
outs = f(ins_batch)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 2073, in __call__
feed_dict=feed_dict)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 778, in run
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 778, in run
run_metadata_ptr)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 982, in _run
feed_dict_string, options, run_metadata)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1032, in _do_run
target_list, options, run_metadata)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1052, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: indices[0,868] = 115873 is not in [0, 20001)
[[Node: embedding_1/Gather = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, validate_indices=true, _device="/job:localhost/replica:0/task:0/cpu:0"](embedding_1/embeddi
ngs/read, _recv_embedding_1_input_0)]]
Caused by op u'embedding_1/Gather', defined at:
File "/home/ah0818lijhong/CNN-kereas/cnn-kereas.py", line 122, in <module>
model_left.add(embedding_layer)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/keras/models.py", line 422, in add
layer(x)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/keras/engine/topology.py", line 554, in __call__
output = self.call(inputs, **kwargs)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/keras/layers/embeddings.py", line 119, in call
out = K.gather(self.embeddings, inputs)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 966, in gather
return tf.gather(reference, indices)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1207, in gather
validate_indices=validate_indices, name=name)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/ah0818lijhong/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): indices[0,868] = 115873 is not in [0, 20001)
[[Node: embedding_1/Gather = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, validate_indices=true, _device="/job:localhost/replica:0/task:0/cpu:0"](embedding_1/embeddi
ngs/read, _recv_embedding_1_input_0)]]
I hope someone can help me fix it.

AWS Beanstalk integration with codecommit

I have existing python project on AWS Beanstalk I decided to integrate it with codecommit but I get following err when I enter Branch name.
Enter Branch Name
***** Must have at least one commit to create a new branch with CodeCommit *****
(default is "develop"): develop
Traceback (most recent call last):
File "/home/fartash/.local/bin/eb", line 11, in <module>
sys.exit(main())
File "/home/fartash/.local/lib/python3.5/site-packages/ebcli/core/ebcore.py", line 150, in main
app.run()
File "/home/fartash/.local/lib/python3.5/site-packages/cement/core/foundation.py", line 797, in run
return_val = self.controller._dispatch()
File "/home/fartash/.local/lib/python3.5/site-packages/cement/core/controller.py", line 472, in _dispatch
return func()
File "/home/fartash/.local/lib/python3.5/site-packages/cement/core/controller.py", line 478, in _dispatch
return func()
File "/home/fartash/.local/lib/python3.5/site-packages/ebcli/core/abstractcontroller.py", line 57, in default
self.do_command()
File "/home/fartash/.local/lib/python3.5/site-packages/ebcli/controllers/initialize.py", line 172, in do_command
branch = get_branch_interactive(repository)
File "/home/fartash/.local/lib/python3.5/site-packages/ebcli/controllers/initialize.py", line 575, in get_branch_interactive
source_control.setup_new_codecommit_branch(branch_name=branch_name)
File "/home/fartash/.local/lib/python3.5/site-packages/ebcli/objects/sourcecontrol.py", line 313, in setup_new_codecommit_branch
self.fetch_remote_branches(self.codecommit_remote_name)
File "/home/fartash/.local/lib/python3.5/site-packages/ebcli/objects/sourcecontrol.py", line 397, in fetch_remote_branches
stdout, stderr, exitcode = self._run_cmd(['git', 'fetch', self.get_codecommit_presigned_remote_url(),
File "/home/fartash/.local/lib/python3.5/site-packages/ebcli/objects/sourcecontrol.py", line 434, in get_codecommit_presigned_remote_url
signed_url = codecommit.create_signed_url(remote_url)
File "/home/fartash/.local/lib/python3.5/site-packages/ebcli/lib/codecommit.py", line 138, in create_signed_url
password = _sign_codecommit_url(aws.get_region_name(), remote_url)
File "/home/fartash/.local/lib/python3.5/site-packages/ebcli/lib/codecommit.py", line 130, in _sign_codecommit_url
signature = signer.signature(string_to_sign, request)
File "/home/fartash/anaconda3/lib/python3.5/site-packages/botocore/auth.py", line 305, in signature
k_date = self._sign(('AWS4' + key).encode('utf-8'),
TypeError: Can't convert 'NoneType' object to str implicitly

Django mysql when saving a object got warning

I was just writing a little app to store words in my mysql database using Django.I read data from a text file which is extremely well organised,like this:
The text file is like this:
DELUGE
DELUSION
DELVE
DEMAGOGUE
DEMANDING
DEMOLITION
DEMONSTRATE
DEMORALIZE
DEMOTIC
DEMUR
DENIGRATE
DENOUEMENT
DENOUNCE
DENT
DENUDE
DEPLETE
DEPLORE
DEPLOY
And then I read date from it using open('thefile').readlines like this:
for line in open('/home/jacos/sorted-gre.txt').readlines():
... if line:
... p = Word(word_spelling = line)
... p.save()
The word_spelling field is the primary key.
Then came this warning:
Traceback (most recent call last):
File "<console>", line 4, in <module>
File "/usr/local/lib/python2.7/dist-packages/django/db/models/base.py", line 460, in save
self.save_base(using=using, force_insert=force_insert, force_update=force_update)
File "/usr/local/lib/python2.7/dist-packages/django/db/models/base.py", line 553, in save_base
result = manager._insert(values, return_id=update_pk, using=using)
File "/usr/local/lib/python2.7/dist-packages/django/db/models/manager.py", line 195, in _insert
return insert_query(self.model, values, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/django/db/models/query.py", line 1436, in insert_query
return query.get_compiler(using=using).execute_sql(return_id)
File "/usr/local/lib/python2.7/dist-packages/django/db/models/sql/compiler.py", line 791, in execute_sql
cursor = super(SQLInsertCompiler, self).execute_sql(None)
File "/usr/local/lib/python2.7/dist-packages/django/db/models/sql/compiler.py", line 735, in execute_sql
cursor.execute(sql, params)
File "/usr/local/lib/python2.7/dist-packages/django/db/backends/util.py", line 34, in execute
return self.cursor.execute(sql, params)
File "/usr/local/lib/python2.7/dist-packages/django/db/backends/mysql/base.py", line 86, in execute
return self.cursor.execute(query, args)
File "/usr/lib/pymodules/python2.7/MySQLdb/cursors.py", line 176, in execute
if not self._defer_warnings: self._warning_check()
File "/usr/lib/pymodules/python2.7/MySQLdb/cursors.py", line 92, in _warning_check
warn(w[-1], self.Warning, 3)
Data truncated for column 'word_spelling' at row 1
As a result, only part of these words were stored in mysql. I'd like to know why.
CharFields have a max_length attribute. What did you set when you generate database
for Word.object.get(pk=1).word_spelling?
nothing related with your warning but
it's recommended to close file or open it with a with statement.
with open('/home/jacos/sorted-gre.txt') as f:
for line in f.readlines():
if line:
p = Word(word_spelling = line)
p.save()