KeyError: "text" when preprocessing for tokenizing - text

I am trying to tokenize .json files of tweets with this code:
*from nltk.corpus import brown
brown.words()
from nltk.tokenize import word_tokenize
import json
import re
.
.
.
with open('volby2018_1.json', 'r') as f:
for line in f:
tweet = json.loads(line)
tokens = preprocess(tweet['text'])*
and I am constantly getting:
**KeyError Traceback (most recent call last)
<ipython-input-2-daba85c11858> in <module>()
52 for line in f:
53 tweet = json.loads(line)
---> 54 tokens = preprocess(tweet['text'])
55
56 print(preprocess(tweet))
KeyError: 'text'**
.json looks like this:
*{"statuses":[{"created_at":"Sun Feb 04 09:26:24 +0000
2018","id":960082100341919744,"id_str":"960082100341919744","text":"#PREZIDENTmluvci
Voli\u010d\u016fm Zemana. Sd\u00edlejte. #Zeman v debate na TV den
p\u0159ed #Volby2018 potvrdil, \u017ee je\u2026
https://t.co/bZOlX2DjqK","truncated":true,"entities":{"hashtags":[{"text":"Zeman","indices":[43,49]},{"text":"Volby2018","indices":[74,84]}],"symbols":[],"user_mentions":[{"screen_name":"PREZIDENTmluvci","name":"Ji\u0159\u00ed
Ov\u010d\u00e1\u010dek","id":3055366126,"id_str":"3055366126","indices":[0,16]}],"urls":[{"url":"https://t.co/bZOlX2DjqK","expanded_url":"https://twitter.com/i/web/status/960082100341919744","display_url":"twitter.com/i/web/status/9\u2026","indices":[102,125]}]},"metadata":{"iso_language_code":"cs","result_type":"recent"},"source":"\u003ca
href=\"http://twitter.com/download/android\"
rel=\"nofollow\"\u003eTwitter for
Android\u003c/a\u003e","in_reply_to_status_id":960080132764467200,"in_reply_to_status_id_str":"960080132764467200","in_reply_to_user_id":3055366126,"in_reply_to_user_id_str":"3055366126","in_reply_to_screen_name":"PREZIDENTmluvci","user":{"id":1915891352,"id_str":"1915891352","name":"Zden\u011bk
Bub\u00e1k","screen_name":"ZdenekBubak","location":"\u0160\u00e9fredaktor
/ Editor in Chief","description":"Redaktor se specializac\u00ed na
finan\u010dn\u00ed
produkty","url":"https://t.co/nvivnZXApP","entities":{"url":{"urls":[{"url":"https://t.co/nvivnZXApP","expanded_url":"http://www.finparada.cz","display_url":"finparada.cz","indices":[0,23]}]},"description":{"urls":[]}},"protected":false,"followers_count":196,"friends_count":201,"listed_count":3,"created_at":"Sun
Sep 29 02:09:56 +0000
2013","favourites_count":782,"utc_offset":null,"time_zone":null,"geo_enabled":true,"verified":false,"statuses_count":4821,"lang":"cs","contributors_enabled":false,"is_translator":false,"is_translation_enabled":false,"profile_background_color":"C0DEED","profile_background_image_url":"http://abs.twimg.com/images/themes/theme1/bg.png","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme1/bg.png","profile_background_tile":false,"profile_image_url":"http://pbs.twimg.com/profile_images/843934466175356928/94cCpcLK_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/843934466175356928/94cCpcLK_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/1915891352/1380992989","profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"has_extended_profile":false,"default_profile":true,"default_profile_image":false,"following":false,"follow_request_sent":false,"notifications":false,"translator_type":"none"},"geo":null,"coordinates":null,"place":{"id":"018e2bf71a3ef896","url":"https://api.twitter.com/1.1/geo/id/018e2bf71a3ef896.json","place_type":"city","name":"Prague","full_name":"Prague,
Czech Republic","country_code":"CZ","country":"Czech
Republic","contained_within":[],"bounding_box":{"type":"Polygon","coordinates":[[[14.2252428,49.9419037],[14.7065078,49.9419037],[14.7065078,50.1772562],[14.2252428,50.1772562]]]},"attributes":
* ....

Related

Not able to understand why this error is coming in for loop?

import numpy as np
import csv
with open(r'D:\data\11. Lecture 11 - NumPy\11. Lecture 11 - NumPy\33 - 34\terrorismData.csv') as terr_data:
data = csv.DictReader(terr_data, skipinitialspace = True)
country = []
killed = []
for i in data:
country.append(i['Country'])
killed.append(i['Killed'])
terr_data.close()
#error coming in for loop
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
Input In [52], in <cell line: 3>()
5 country = []
6 killed = []
----> 7 for i in data:
8 country.append(i['Country'])
9 killed.append(i['Killed'])
File ~\anaconda3\lib\encodings\cp1252.py:23, in IncrementalDecoder.decode(self, input, final)
22 def decode(self, input, final=False):
---> 23 return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 6235: character maps to <undefined>
Tried many thing from internet from other many sources ,,
Expecting the problem to be resilved here.

Not able to import _check_y from sklearn.utils.validation

ImportError Traceback (most recent call last)
in
55
56 filename = '/local/home/amajum/XXX/YYY/ZZZ/' + filename
---> 57 loaded_model = pickle.load(open(filename, 'rb'))
58
59 pred = loaded_model.predict(new_sellers)
~/anaconda3/lib/python3.8/site-packages/feature_engine/encoding/init.py in
3 """
4
----> 5 from .count_frequency import CountFrequencyEncoder
6 from .decision_tree import DecisionTreeEncoder
7 from .mean_encoding import MeanEncoder
~/anaconda3/lib/python3.8/site-packages/feature_engine/encoding/count_frequency.py in
16 )
17 from feature_engine._docstrings.substitute import Substitution
---> 18 from feature_engine.dataframe_checks import check_X
19 from feature_engine.encoding._docstrings import (
20 _errors_docstring,
~/anaconda3/lib/python3.8/site-packages/feature_engine/dataframe_checks.py in
8 import pandas as pd
9 from scipy.sparse import issparse
---> 10 from sklearn.utils.validation import _check_y, check_consistent_length
11
12
ImportError: cannot import name '_check_y' from 'sklearn.utils.validation' (/home/amajum/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py)

unable to import pytorch-lightning

I installed pytorch-lightning using pip, and I'm running on Mac.
I tried:
! pip install pytorch-lightning --upgrade
! pip install pytorch-lightning-bolts
(finished successfully)
and then:
import pytorch_lightning as pl
and what I get is:
--
-------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-3-f3b4217dcea1> in <module>
7 from torchvision.datasets import MNIST
8 from torchvision import transforms
----> 9 import pytorch_lightning as pl
10 from pytorch_lightning.metrics.functional import accuracy
11 tmpdir = os.getcwd()
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytorch_lightning/__init__.py in <module>
60 # We are not importing the rest of the lightning during the build process, as it may not be compiled yet
61 else:
---> 62 from pytorch_lightning import metrics
63 from pytorch_lightning.callbacks import Callback
64 from pytorch_lightning.core import LightningDataModule, LightningModule
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytorch_lightning/metrics/__init__.py in <module>
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
---> 14 from pytorch_lightning.metrics.classification import ( # noqa: F401
15 Accuracy,
16 AUC,
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytorch_lightning/metrics/classification/__init__.py in <module>
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
---> 14 from pytorch_lightning.metrics.classification.accuracy import Accuracy # noqa: F401
15 from pytorch_lightning.metrics.classification.auc import AUC # noqa: F401
16 from pytorch_lightning.metrics.classification.auroc import AUROC # noqa: F401
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytorch_lightning/metrics/classification/accuracy.py in <module>
16 import torch
17
---> 18 from pytorch_lightning.metrics.functional.accuracy import _accuracy_compute, _accuracy_update
19 from pytorch_lightning.metrics.metric import Metric
20
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytorch_lightning/metrics/functional/__init__.py in <module>
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
---> 14 from pytorch_lightning.metrics.functional.accuracy import accuracy # noqa: F401
15 from pytorch_lightning.metrics.functional.auc import auc # noqa: F401
16 from pytorch_lightning.metrics.functional.auroc import auroc # noqa: F401
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytorch_lightning/metrics/functional/accuracy.py in <module>
16 import torch
17
---> 18 from pytorch_lightning.metrics.classification.helpers import _input_format_classification, DataType
19
20
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytorch_lightning/metrics/classification/helpers.py in <module>
17 import torch
18
---> 19 from pytorch_lightning.metrics.utils import select_topk, to_onehot
20 from pytorch_lightning.utilities import LightningEnum
21
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytorch_lightning/metrics/utils.py in <module>
16 import torch
17
---> 18 from pytorch_lightning.utilities import rank_zero_warn
19
20 METRIC_EPS = 1e-6
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytorch_lightning/utilities/__init__.py in <module>
16 import numpy
17
---> 18 from pytorch_lightning.utilities.apply_func import move_data_to_device # noqa: F401
19 from pytorch_lightning.utilities.distributed import ( # noqa: F401
20 AllGatherGrad,
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytorch_lightning/utilities/apply_func.py in <module>
23
24 from pytorch_lightning.utilities.exceptions import MisconfigurationException
---> 25 from pytorch_lightning.utilities.imports import _TORCHTEXT_AVAILABLE
26
27 if _TORCHTEXT_AVAILABLE:
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytorch_lightning/utilities/imports.py in <module>
54 _TORCH_GREATER_EQUAL_1_7 = _compare_version("torch", operator.ge, "1.7.0")
55 _TORCH_QUANTIZE_AVAILABLE = bool([eg for eg in torch.backends.quantized.supported_engines if eg != 'none'])
---> 56 _APEX_AVAILABLE = _module_available("apex.amp")
57 _BOLTS_AVAILABLE = _module_available('pl_bolts')
58 _DEEPSPEED_AVAILABLE = not _IS_WINDOWS and _module_available('deepspeed')
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pytorch_lightning/utilities/imports.py in _module_available(module_path)
32 """
33 try:
---> 34 return find_spec(module_path) is not None
35 except AttributeError:
36 # Python 3.6
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/importlib/util.py in find_spec(name, package)
92 parent_name = fullname.rpartition('.')[0]
93 if parent_name:
---> 94 parent = __import__(parent_name, fromlist=['__path__'])
95 try:
96 parent_path = parent.__path__
/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/apex/__init__.py in <module>
11 ISessionFactory)
12 from pyramid.security import NO_PERMISSION_REQUIRED
---> 13 from pyramid.session import UnencryptedCookieSessionFactoryConfig
14 from pyramid.settings import asbool
15
ImportError: cannot import name 'UnencryptedCookieSessionFactoryConfig' from 'pyramid.session' (unknown location
I guess this is an outdated issue as we have cut out TorchMetrics to a standalone package. Please, check out the latest PytorchLightning.
Try installing it from the GitHub repository first before importing it in the notebook.
Run the following command in the Notebook:
!pip install git+https://github.com/PyTorchLightning/pytorch-lightning

how can I resolve this error in Reading xlsx dataset in python

with open("F:\MTechProjects\FirstProject\BL.xlsx") as fin:
notes=fin.read()
print(notes)
this is what executed with all import required
UnicodeDecodeError
Traceback (most recent call last)
<ipython-input-13-87e93887fc02> in <module>
17 #About the data
18 with open("F:\MTechProjects\FirstProject\Shashikala - Blood Deferral Dataset.xlsx") as fin:
---> 19 notes=fin.read()
20 print(notes)
~\Anaconda3\lib\encodings\cp1252.py in decode(self, input, final)
21 class IncrementalDecoder(codecs.IncrementalDecoder):
22 def decode(self, input, final=False):
---> 23 return codecs.charmap_decode(input,self.errors,decoding_table)[0]
24
25 class StreamWriter(Codec,codecs.StreamWriter):
UnicodeDecodeError: 'charmap' codec can't decode byte 0x90 in position 597: character maps to <undefined>
Got this error. Is there any other way to read xlsx files? Thanks in advance
Try:
file = open(filename, encoding="utf8")
If the error still persist, You can Ignore error.
open(filename, errors='ignore')
You can also check which encoding is your file using like:
with open('file_name.csv') as f:
print(f)
With pandas in python modules you can read excel file like this
import pandas as pd
data1=pd.read_excel('F:\MTechProjects\FirstProject\BL.xlsx')

Unable to connect py2neo v3 with neo4j 3.4.1

I have been trying to connect to my local neo4j server using py2neo v3 and neo4j version 3.4.1.
The commands I used are:-
from py2neo import Graph, Node, Relationship
graphURL='http://localhost:7474/db/data/'
graphUser = "neo4j"
graphPassphrase = "XXXX"
graph=Graph(graphURL, user=graphUser, password=graphPassphrase)
I receive the following errors on trying to use this code.
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-20-ab3844caf22c> in <module>()
3 graphPassphrase = "XXXX"
4
----> 5 graph=Graph(graphURL, user=graphUser, password=graphPassphrase)
~\Anaconda3\lib\site-packages\py2neo\graph.py in __new__(cls, *uris, **settings)
333 def __new__(cls, *uris, **settings):
334 database = settings.pop("database", "data")
--> 335 graph_service = GraphService(*uris, **settings)
336 address = graph_service.address
337 if database in graph_service:
~\Anaconda3\lib\site-packages\py2neo\graph.py in __new__(cls, *uris, **settings)
77 from py2neo.addressing import register_graph_service, get_graph_service_auth
78 from py2neo.http import register_http_driver
---> 79 from neo4j.v1 import GraphDatabase
80 register_http_driver()
81 address = register_graph_service(*uris, **settings)
~\Anaconda3\lib\site-packages\neo4j\v1\__init__.py in <module>()
20
21 from .api import *
---> 22 from .bolt import *
23 from .security import *
24 from .types import *
~\Anaconda3\lib\site-packages\neo4j\v1\bolt.py in <module>()
30 from .security import SecurityPlan, Unauthorized
31 from .summary import ResultSummary
---> 32 from .types import Record
33
34
~\Anaconda3\lib\site-packages\neo4j\v1\types\__init__.py in <module>()
31 from operator import xor as xor_operator
32
---> 33 from neo4j.packstream import Structure
34 from neo4j.compat import map_type, string, integer, ustr
35
~\Anaconda3\lib\site-packages\neo4j\packstream\__init__.py in <module>()
20
21
---> 22 from neo4j.util import import_best as _import_best
23
24 from .structure import Structure
ImportError: cannot import name 'import_best'
I have tried using the handbook https://py2neo.org/v3/database.html?highlight=relation for v3 but it was of no use for my problem. Could you please help me with this issue.
The driver support the BOLT and HTTP proctole, but it seems here that you want to use the HTTP one, and the driver is trying to instantiate the BOLT ...
I recommend you to use BOLT, so your code should be :
from py2neo import Graph, Node, Relationship
graphHost='localhost'
graphUser = "neo4j"
graphPassphrase = "XXXX"
graph=Graph(bolt=true, host=graphHost, user=graphUser, password=graphPassphrase)
If you really want to use the http :
graph=Graph(bolt=false, host=graphHost, user=graphUser, password=graphPassphrase)

Resources