I am using the following code to connect to the google spreadsheet.
import gspread
from oauth2client.service_account import ServiceAccountCredentials
scope = ['https://spreadsheets.google.com/feeds',
'https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
gc = gspread.authorize(credentials)
and I have the credentials.json which I exported from the Google Sheets API console.
But I am getting the following error:
TypeError: cannot use a string pattern on a bytes-like object
This is the complete trace of error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-25-ee731769ce7a> in <module>()
7 credentials = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
8
----> 9 gc = gspread.authorize(credentials)
~\Anaconda3\lib\site-packages\gspread\__init__.py in authorize(credentials, client_class)
36 """
37 client = client_class(auth=credentials)
---> 38 client.login()
39 return client
~\Anaconda3\lib\site-packages\gspread\client.py in login(self)
49
50 http = httplib2.Http()
---> 51 self.auth.refresh(http)
52
53 self.session.headers.update({
c:\users\developer\appdata\local\google\cloud sdk\google-cloud-sdk\lib\third_party\oauth2client\client.py in refresh(self, http)
570 request.
571 """
--> 572 self._refresh(http.request)
573
574 def revoke(self, http):
c:\users\developer\appdata\local\google\cloud sdk\google-cloud-sdk\lib\third_party\oauth2client\client.py in _refresh(self, http_request)
780 """
781 if not self.store:
--> 782 self._do_refresh_request(http_request)
783 else:
784 self.store.acquire_lock()
c:\users\developer\appdata\local\google\cloud sdk\google-cloud-sdk\lib\third_party\oauth2client\client.py in _do_refresh_request(self, http_request, attempt)
816 resp, content = http_request(
817 self.token_uri.encode('idna'), method='POST',
--> 818 body=body, headers=headers)
819 content = _helpers._from_bytes(content)
820 if resp.status == http_client.OK:
c:\users\developer\appdata\local\google\cloud sdk\google-cloud-sdk\lib\third_party\httplib2\python3\httplib2\__init__.py in request(self, uri, method, body, headers, redirections, connection_type)
1767 uri = iri2uri(uri)
1768
-> 1769 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
1770
1771 conn_key = scheme + ":" + authority
c:\users\developer\appdata\local\google\cloud sdk\google-cloud-sdk\lib\third_party\httplib2\python3\httplib2\__init__.py in urlnorm(uri)
218
219 def urlnorm(uri):
--> 220 (scheme, authority, path, query, fragment) = parse_uri(uri)
221 if not scheme or not authority:
222 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
c:\users\developer\appdata\local\google\cloud sdk\google-cloud-sdk\lib\third_party\httplib2\python3\httplib2\__init__.py in parse_uri(uri)
213 (scheme, authority, path, query, fragment) = parse_uri(uri)
214 """
--> 215 groups = URI.match(uri).groups()
216 return (groups[1], groups[3], groups[4], groups[6], groups[8])
217
TypeError: cannot use a string pattern on a bytes-like object
Related
I am stuck at this point in my code. I am trying to divide the startdate and enddate into multiple rows based on months and for that I am trying to use the resample function to sample the dates on monthly basis. The sample code looks like this-
PS- A lot of the BCA_REF, STARTDATE, ENDDATE values are repeated and are not unique owing to the usecase
df = pd.DataFrame(
data = [['abc','2018-08-01','2025-07-31'], ['abc','2018-08-01','2025-07-31'],['xyz','2017-04-01','2017-04-01'], ['xyz','2017-04-01','2017-04-01'], ['pqr','2016-05-16','2017-10-15']],
columns = ['BCA_REF', 'STARTDATE', 'ENDDATE']
)
df['STARTDATE'] = pd.to_datetime(df['STARTDATE'])
df['ENDDATE'] = pd.to_datetime(df['ENDDATE'])
df_start_end = df.melt(id_vars=['BCA_REF'],value_vars=['STARTDATE','ENDDATE'], value_name='date')
df_new = (
df_start_end.groupby(['BCA_REF'])
.apply(lambda x: x.drop_duplicates('date').set_index('date')
.resample('M').pad())
.drop(columns=['BCA_REF','variable'])
.reset_index()
)
After I run this for 40K such rows, it gives me the following error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_15069/2048245652.py in <module>
4 merged_final_new = (
5 mf_start_end.groupby(['BCA_REF'])
----> 6 .apply(lambda x: x.drop_duplicates('date').set_index('date')
7 .resample('M').pad())
8 # .drop(columns=['BCA_REF','variable'])
~/.local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
1273 with option_context("mode.chained_assignment", None):
1274 try:
-> 1275 result = self._python_apply_general(f, self._selected_obj)
1276 except TypeError:
1277 # gh-20949
~/.local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f, data)
1307 data after applying f
1308 """
-> 1309 keys, values, mutated = self.grouper.apply(f, data, self.axis)
1310
1311 return self._wrap_applied_output(
~/.local/lib/python3.7/site-packages/pandas/core/groupby/ops.py in apply(self, f, data, axis)
850 # group might be modified
851 group_axes = group.axes
--> 852 res = f(group)
853 if not _is_indexed_like(res, group_axes, axis):
854 mutated = True
/tmp/ipykernel_15069/2048245652.py in <lambda>(x)
5 mf_start_end.groupby(['BCA_REF'])
6 .apply(lambda x: x.drop_duplicates('date').set_index('date')
----> 7 .resample('M').pad())
8 # .drop(columns=['BCA_REF','variable'])
9 # .reset_index()
~/.local/lib/python3.7/site-packages/pandas/core/resample.py in pad(self, limit)
507 DataFrame.fillna: Fill NA/NaN values using the specified method.
508 """
--> 509 return self._upsample("pad", limit=limit)
510
511 ffill = pad
~/.local/lib/python3.7/site-packages/pandas/core/resample.py in _upsample(self, method, limit, fill_value)
1204 else:
1205 result = obj.reindex(
-> 1206 res_index, method=method, limit=limit, fill_value=fill_value
1207 )
1208
~/.local/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
322 #wraps(func)
323 def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 324 return func(*args, **kwargs)
325
326 kind = inspect.Parameter.POSITIONAL_OR_KEYWORD
~/.local/lib/python3.7/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
4770 kwargs.pop("axis", None)
4771 kwargs.pop("labels", None)
-> 4772 return super().reindex(**kwargs)
4773
4774 #deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])
~/.local/lib/python3.7/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4817 # perform the reindex on the axes
4818 return self._reindex_axes(
-> 4819 axes, level, limit, tolerance, method, fill_value, copy
4820 ).__finalize__(self, method="reindex")
4821
~/.local/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4596 if index is not None:
4597 frame = frame._reindex_index(
-> 4598 index, method, copy, level, fill_value, limit, tolerance
4599 )
4600
~/.local/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
4612 ):
4613 new_index, indexer = self.index.reindex(
-> 4614 new_index, method=method, level=level, limit=limit, tolerance=tolerance
4615 )
4616 return self._reindex_with_indexers(
~/.local/lib/python3.7/site-packages/pandas/core/indexes/base.py in reindex(self, target, method, level, limit, tolerance)
3824 if self._index_as_unique:
3825 indexer = self.get_indexer(
-> 3826 target, method=method, limit=limit, tolerance=tolerance
3827 )
3828 else:
~/.local/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_indexer(self, target, method, limit, tolerance)
3484 )
3485
-> 3486 return self._get_indexer(target, method, limit, tolerance)
3487
3488 def _get_indexer(
~/.local/lib/python3.7/site-packages/pandas/core/indexes/base.py in _get_indexer(self, target, method, limit, tolerance)
3506
3507 if method in ["pad", "backfill"]:
-> 3508 indexer = self._get_fill_indexer(target, method, limit, tolerance)
3509 elif method == "nearest":
3510 indexer = self._get_nearest_indexer(target, limit, tolerance)
~/.local/lib/python3.7/site-packages/pandas/core/indexes/base.py in _get_fill_indexer(self, target, method, limit, tolerance)
3582 indexer = engine_method(target_values, limit)
3583 else:
-> 3584 indexer = self._get_fill_indexer_searchsorted(target, method, limit)
3585 if tolerance is not None and len(self):
3586 indexer = self._filter_indexer_tolerance(target_values, indexer, tolerance)
~/.local/lib/python3.7/site-packages/pandas/core/indexes/base.py in _get_fill_indexer_searchsorted(self, target, method, limit)
3606 indexer = self.get_indexer(target)
3607 nonexact = indexer == -1
-> 3608 indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side)
3609 if side == "left":
3610 # searchsorted returns "indices into a sorted array such that,
~/.local/lib/python3.7/site-packages/pandas/core/indexes/base.py in _searchsorted_monotonic(self, label, side)
5763 return len(self) - pos
5764
-> 5765 raise ValueError("index must be monotonic increasing or decreasing")
5766
5767 def get_slice_bound(self, label, side: str_t, kind=None) -> int:
ValueError: index must be monotonic increasing or decreasing
I tried to look for solutions for this error wherein people suggested using sort_index()/sort_values() for the 'date' column but it still does not work. I believe the issue is with the resample function.
Any help would be appreciated. Thank you
I am collecting historic tweets using sntwitter (ref: https://betterprogramming.pub/how-to-scrape-tweets-with-snscrape-90124ed006af). For some of the keyword searches, I am getting the error "player_stream_content_type". I got the source code for the module on github (https://github.com/JustAnotherArchivist/snscrape/blob/master/snscrape/modules/twitter.py) but I am unable to figure out how to handle the error. Any suggestions on how to handle this is highly appreciated.
import tweepy
import pandas as pd
import os
import snscrape.modules.twitter as sntwitter
from langdetect import detect, DetectorFactory
DetectorFactory.seed = 0
# Creating list to append tweet data to
tweets_list1 = []
for i,tweet in enumerate(sntwitter.TwitterSearchScraper('itv since:2017-03-06 until:2017-04-03').get_items()):
if tweet.lang=="en":
tweets_list1.append([tweet.date, tweet.id, tweet.rawContent, tweet.user.username,tweet.user.id,
tweet.user.followersCount, tweet.user.friendsCount,tweet.user.location,
tweet.replyCount, tweet.retweetCount,tweet.likeCount,tweet.quoteCount,
tweet.hashtags,tweet.inReplyToUser,tweet.mentionedUsers
])
print(len(tweets_list1))
tweets_df2 = pd.DataFrame(tweets_list1, columns=['Datetime', 'Tweet Id', 'Text', 'Username',
'user_id','user_followers_count','user_friends_count',
'user_location','reply_count','retweet_count','like_count',
'quote_count','hashtags',
'is_reply_to','mentioned_users'])
Error message:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-14-716543e9be2b> in <module>
2 tweets_list1 = []
3
----> 4 for i,tweet in enumerate(sntwitter.TwitterSearchScraper('itv since:2017-03-06 until:2017-04-03').get_items()):
5 if tweet.lang=="en":
6
/opt/anaconda3/envs/p38/lib/python3.8/site-packages/snscrape/modules/twitter.py in get_items(self)
1448
1449 for obj in self._iter_api_data('https://api.twitter.com/2/search/adaptive.json', _TwitterAPIType.V2, params, paginationParams, cursor = self._cursor):
-> 1450 yield from self._v2_timeline_instructions_to_tweets(obj)
1451
1452 #classmethod
/opt/anaconda3/envs/p38/lib/python3.8/site-packages/snscrape/modules/twitter.py in _v2_timeline_instructions_to_tweets(self, obj, includeConversationThreads)
802 for entry in entries:
803 if entry['entryId'].startswith('sq-I-t-') or entry['entryId'].startswith('tweet-'):
--> 804 yield from self._v2_instruction_tweet_entry_to_tweet(entry['entryId'], entry['content'], obj)
805 elif includeConversationThreads and entry['entryId'].startswith('conversationThread-') and not entry['entryId'].endswith('-show_more_cursor'):
806 for item in entry['content']['timelineModule']['items']:
/opt/anaconda3/envs/p38/lib/python3.8/site-packages/snscrape/modules/twitter.py in _v2_instruction_tweet_entry_to_tweet(self, entryId, entry, obj)
825 else:
826 raise snscrape.base.ScraperException(f'Unable to handle entry {entryId!r}')
--> 827 yield self._tweet_to_tweet(tweet, obj)
828
829 def _get_tweet_id(self, tweet):
/opt/anaconda3/envs/p38/lib/python3.8/site-packages/snscrape/modules/twitter.py in _tweet_to_tweet(self, tweet, obj)
1267 kwargs['quotedTweet'] = self._tweet_to_tweet(obj['globalObjects']['tweets'][tweet['quoted_status_id_str']], obj)
1268 if 'card' in tweet:
-> 1269 kwargs['card'] = self._make_card(tweet['card'], _TwitterAPIType.V2, self._get_tweet_id(tweet))
1270 return self._make_tweet(tweet, user, **kwargs)
1271
/opt/anaconda3/envs/p38/lib/python3.8/site-packages/snscrape/modules/twitter.py in _make_card(self, card, apiType, tweetId)
1113 video = Video(
1114 thumbnailUrl = bindingValues['player_image'],
-> 1115 variants = [VideoVariant(contentType = bindingValues['player_stream_content_type'], url = bindingValues['amplify_url_vmap'], bitrate = None)],
1116 ),
1117 )
KeyError: 'player_stream_content_type'
x=input('Enter Roll Number :')
str1 = "http://xxxxx/xxxx/students_uploads/"
str2 = x+"_P.jpg"
res = str1 + str2
filename = wget.download(res)
This simple code will download students profile pictures from our college website.The code works well if we entered correct roll number of student which means the website had a file on that roll number but when we enter wrong roll number it throws
Exception Traceback (most recent call last)
<ipython-input-20-cbbda742e201> in <module>
4 str2 = x+"_P.jpg"
5 res = str1 + str2
----> 6 filename = wget.download(res)
D:\Anaconda\lib\site-packages\wget.py in download(url, out, bar)
314 callback = None
315
--> 316 (tmpfile, headers) = ThrowOnErrorOpener().retrieve(url, tmpfile, callback)
317 names["header"] = filename_from_headers(headers)
318 if os.path.isdir(names["out"]):
D:\Anaconda\lib\urllib\request.py in retrieve(self, url, filename, reporthook, data)
1822 except OSError as msg:
1823 pass
-> 1824 fp = self.open(url, data)
1825 try:
1826 headers = fp.info()
D:\Anaconda\lib\urllib\request.py in open(self, fullurl, data)
1788 try:
1789 if data is None:
-> 1790 return getattr(self, name)(url)
1791 else:
1792 return getattr(self, name)(url, data)
D:\Anaconda\lib\urllib\request.py in open_http(self, url, data)
1966 def open_http(self, url, data=None):
1967 """Use HTTP protocol."""
-> 1968 return self._open_generic_http(http.client.HTTPConnection, url, data)
1969
1970 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
D:\Anaconda\lib\urllib\request.py in _open_generic_http(self, connection_factory, url, data)
1960 response.status)
1961 else:
-> 1962 return self.http_error(
1963 url, response.fp,
1964 response.status, response.reason, response.msg, data)
D:\Anaconda\lib\urllib\request.py in http_error(self, url, fp, errcode, errmsg, headers, data)
1982 result = method(url, fp, errcode, errmsg, headers, data)
1983 if result: return result
-> 1984 return self.http_error_default(url, fp, errcode, errmsg, headers)
1985
1986 def http_error_default(self, url, fp, errcode, errmsg, headers):
D:\Anaconda\lib\site-packages\wget.py in http_error_default(self, url, fp, errcode, errmsg, headers)
285 class ThrowOnErrorOpener(urllib.request.FancyURLopener):
286 def http_error_default(self, url, fp, errcode, errmsg, headers):
--> 287 raise Exception("{0}: {1}".format(errcode, errmsg))
288
289 def download(url, out=None, bar=bar_adaptive):
Exception: 404: Not Found
Is there any way where i can simply put the output to File not found enter correct roll number when someone inputs wrong rollnumber .
Thanks.
Simple exception catch should be enough around the wget.download call:
try:
filename = wget.download(res)
except Exception as exc:
print(f"wget failed: {str(exc)}")
I have this code:
import praw
print('starting')
reddit = praw.Reddit(client_id='****',
client_secret='********',
user_agent='****',
username = '****',
password = '****')
r = reddit.post("/api/submit",data={'title':'my firts title','text':'the text of my post','sr':'r/test'})
print("finishing")
But it returns with the error:
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-19-7e66ffa81635> in <module>
9 password = '*****')
10
---> 11 r = reddit.post("/api/submit",data={'title':'my firts title','text':'the text of my post','sr':'r/test'})
12
13 print("finishing")
~\AppData\Local\Continuum\anaconda3\lib\site-packages\praw\reddit.py in post(self, path, data, files, params)
481 data = self.request('POST', path, data=data or {}, files=files,
482 params=params)
--> 483 return self._objector.objectify(data)
484
485 def put(self, path, data=None):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\praw\objector.py in objectify(self, data)
148 if len(errors) == 1:
149 raise APIException(*errors[0])
--> 150 assert not errors
151
152 elif isinstance(data, dict):
AssertionError:
and in some occasions the same code returns :
---------------------------------------------------------------------------
APIException Traceback (most recent call last)
<ipython-input-27-b62f9f5f585d> in <module>
9 password = '****')
10
---> 11 r = reddit.post("/api/submit",data={'title':'my firts title','text':'the text of my post','sr':'r/test'})
12
13 print("finishing")
~\AppData\Local\Continuum\anaconda3\lib\site-packages\praw\reddit.py in post(self, path, data, files, params)
481 data = self.request('POST', path, data=data or {}, files=files,
482 params=params)
--> 483 return self._objector.objectify(data)
484
485 def put(self, path, data=None):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\praw\objector.py in objectify(self, data)
147 errors = data['json']['errors']
148 if len(errors) == 1:
--> 149 raise APIException(*errors[0])
150 assert not errors
151
APIException: INVALID_OPTION: 'opci\xf3n inv\xe1lida' on field 'sr'
To be honest I do not know what I am doing wrong. I suppose there is a better way to simple submit a post in reddit, but the documentation is not so helpful
You should do:
my_post = reddit.subreddit('subreddit').submit('My Title', selftext='Stuff you want to put in the textbox')
Note that subreddit shouldn't include the r/.
as per:
https://praw.readthedocs.io/en/latest/code_overview/models/subreddit.html#praw.models.Subreddit.submit
I don't understand why this error is occurring in my code while connecting the code to the database of MongoDB. Here is my code:-
import Credentials
client = Credentials.client
db = client.VisitorBook
company = input('Enter Company Name: ')
result=db.Company_name.insert_one({'Name':company})
print(result.inserted_id)
And this is my Credentials file which I have made seperately and this Credentials module is called in the above code:
from pymongo import MongoClient
from bson.objectid import ObjectId
client = MongoClient('mongodb+srv://Username:<Password>#basicdatabase-w4eg3.mongodb.net/test?retryWrites=true');
After entering my username and password when I execute my code it shows this error:
---------------------------------------------------------------------------
ServerSelectionTimeoutError Traceback (most recent call last)
<ipython-input-10-754b7e794657> in <module>()
5
6 company = input('Enter Company Name: ')
----> 7 result=db.Company_name.insert_one({'Name':company})
8 print(result.inserted_id)
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\collection.py in insert_one(self, document, bypass_document_validation, session)
681 self._insert(document,
682 bypass_doc_val=bypass_document_validation,
--> 683 session=session),
684 self.write_concern.acknowledged)
685
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\collection.py in _insert(self, docs, ordered, check_keys, manipulate, write_concern, op_id, bypass_doc_val, session)
597 return self._insert_one(
598 docs, ordered, check_keys, manipulate, write_concern, op_id,
--> 599 bypass_doc_val, session)
600
601 ids = []
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\collection.py in _insert_one(self, doc, ordered, check_keys, manipulate, write_concern, op_id, bypass_doc_val, session)
577
578 result = self.__database.client._retryable_write(
--> 579 True, _insert_command, session)
580 _check_write_command_response(result)
581 else:
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\mongo_client.py in _retryable_write(self, retryable, func, session)
1099 def _retryable_write(self, retryable, func, session):
1100 """Internal retryable write helper."""
-> 1101 with self._tmp_session(session) as s:
1102 return self._retry_with_session(retryable, func, s, None)
1103
C:\ProgramData\Anaconda3\lib\contextlib.py in __enter__(self)
79 def __enter__(self):
80 try:
---> 81 return next(self.gen)
82 except StopIteration:
83 raise RuntimeError("generator didn't yield") from None
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\mongo_client.py in _tmp_session(self, session, close)
1405 return
1406
-> 1407 s = self._ensure_session(session)
1408 if s and close:
1409 with s:
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\mongo_client.py in _ensure_session(self, session)
1392 # Don't make implied sessions causally consistent. Applications
1393 # should always opt-in.
-> 1394 return self.start_session(causal_consistency=False)
1395 except (ConfigurationError, InvalidOperation):
1396 # Sessions not supported, or multiple users authenticated.
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\mongo_client.py in start_session(self, causal_consistency)
1370
1371 # Raises ConfigurationError if sessions are not supported.
-> 1372 server_session = self._get_server_session()
1373 opts = client_session.SessionOptions(
1374 causal_consistency=causal_consistency)
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\mongo_client.py in _get_server_session(self)
1378 def _get_server_session(self):
1379 """Internal: start or resume a _ServerSession."""
-> 1380 return self._topology.get_server_session()
1381
1382 def _return_server_session(self, server_session, lock):
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\topology.py in get_server_session(self)
425 any_server_selector,
426 self._settings.server_selection_timeout,
--> 427 None)
428 elif not self._description.readable_servers:
429 self._select_servers_loop(
C:\ProgramData\Anaconda3\lib\site-packages\pymongo\topology.py in _select_servers_loop(self, selector, timeout, address)
197 if timeout == 0 or now > end_time:
198 raise ServerSelectionTimeoutError(
--> 199 self._error_message(selector))
200
201 self._ensure_opened()
ServerSelectionTimeoutError: basicdatabase-w4eg3.mongodb.net:27017: [Errno 11001] getaddrinfo failed
Hi change this code portion like this,
import Credentials
client = Credentials.client
db = client['VisitorBook']
collection = db['Company_name']
company = input('Enter Company Name: ')
result = collection.insert_one({'Name':company})
print(result.inserted_id)
or try to run mongod.exe manualy. Sometimes you can see the Mongod.exe console gets stuck, in such cases hit the enter button inside the mongod console to refresh.