Some mistakes when I reload "torch.nn.functioanl"? - python-3.x

I want to modify some functions in "torch.nn.functional", and then I use "importlib.reload" to reload the new "torch.nn.functional".But I encounter a Runtime error:function 'avg_pool2d' already has a docstring.
I am trying to do some work on excitationbp(https://github.com/greydanus/excitationbp), and there is a operation named "eb.use_eb()" to modify "torch.nn.functional". The functiona eb.use_eb() is for modifying the "torch.nn.linear","torch.nn.functional.conv1d" and so on.I try to modify the code including many eb.use_eb() in the utils.py.When I want to run the original "torch.nn.linear",I must run the eb.use_eb(False).But the module has loaded in the head of the file(import torch.nn.funtional). So I want to reload the module "torch.nn.funtional".
When I use the "importlib.reload", I encounter a error.
eb.use_eb(False,True)
importlib.reload(torch)
importlib.reload(torch.nn)
importlib.reload(torch.nn.functional)
print("gradient_evidence:"+str(gradient_evidence))
return torch.autograd.grad(contr_h_, target_h_, grad_outputs=gradient_evidence)[0]
RuntimeError Traceback (most recent call last)
ipython-input-15-29d0fca29882 in module()
----> 1 prob_inputs_one = eb.excitation_backprop(model, inputs, prob_outputs_one, contrastive=2)
2 #pdb.set_trace()
3 prob_inputs_true = eb.excitation_backprop(model, inputs, prob_outputs_true, contrastive=2)
~/code/excitationbp/excitationbp-master/excitationbp/utils.py in excitation_backprop(model, inputs, prob_outputs, contrastive, target_layer)
75 importlib.reload(torch)
76 importlib.reload(torch.nn)
---> 77 importlib.reload(torch.nn.functional)
78
79 gradient_evidence = torch.autograd.grad(top_h_, contr_h_, grad_outputs=prob_outputs.clone())[0]
~/anaconda3/envs/pytorch0.3/lib/python3.6/importlib/__init__.py in reload(module)
164 target = module
165 spec = module.__spec__ = _bootstrap._find_spec(name, pkgpath, target)
--> 166 _bootstrap._exec(spec, module)
167 # The module may have replaced itself in sys.modules!
168 return sys.modules[name]
~/anaconda3/envs/pytorch0.3/lib/python3.6/importlib/_bootstrap.py in _exec(spec, module)
~/anaconda3/envs/pytorch0.3/lib/python3.6/importlib/_bootstrap_external.py in exec_module(self, module)
~/anaconda3/envs/pytorch0.3/lib/python3.6/importlib/_bootstrap.py in _call_with_frames_removed(f, *args, **kwds)
~/anaconda3/envs/pytorch0.3/lib/python3.6/site-packages/torch/nn/functional.py in <module>()
286 count_include_pad: when True, will include the zero-padding in th
287 averaging calculation. Default: ``True``
--> 288 """)
289
290 avg_pool3d = _add_docstr(torch._C._nn.avg_pool3d, r"""
RuntimeError: function 'avg_pool2d' already has a docstring

Related

Huggingface tokenizer not able to load model after upgrading python to 3.10

I just updated Python to version 3.10.8. Note that I use JupyterLab.
I had to re-install a lot of packages, but now I get an error when I try to load the tokenizer of an HuggingFace model
This is my code:
# Import libraries
from transformers import pipeline, AutoTokenizer
# Define checkpoint
model_checkpoint = 'deepset/xlm-roberta-large-squad2'
# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
Note that version of transformers is 4.24.0.
This is the error I get:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In [3], line 2
1 # Tokenizer
----> 2 tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
File ~/.local/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:637, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
635 tokenizer_class_py, tokenizer_class_fast = TOKENIZER_MAPPING[type(config)]
636 if tokenizer_class_fast and (use_fast or tokenizer_class_py is None):
--> 637 return tokenizer_class_fast.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
638 else:
639 if tokenizer_class_py is not None:
File ~/.local/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1777, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
1774 else:
1775 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1777 return cls._from_pretrained(
1778 resolved_vocab_files,
1779 pretrained_model_name_or_path,
1780 init_configuration,
1781 *init_inputs,
1782 use_auth_token=use_auth_token,
1783 cache_dir=cache_dir,
1784 local_files_only=local_files_only,
1785 _commit_hash=commit_hash,
1786 **kwargs,
1787 )
File ~/.local/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1932, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, local_files_only, _commit_hash, *init_inputs, **kwargs)
1930 # Instantiate tokenizer.
1931 try:
-> 1932 tokenizer = cls(*init_inputs, **init_kwargs)
1933 except OSError:
1934 raise OSError(
1935 "Unable to load vocabulary from file. "
1936 "Please check that the provided vocabulary is accessible and not corrupted."
1937 )
File ~/.local/lib/python3.10/site-packages/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py:155, in XLMRobertaTokenizerFast.__init__(self, vocab_file, tokenizer_file, bos_token, eos_token, sep_token, cls_token, unk_token, pad_token, mask_token, **kwargs)
139 def __init__(
140 self,
141 vocab_file=None,
(...)
151 ):
152 # Mask token behave like a normal word, i.e. include the space before it
153 mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token
--> 155 super().__init__(
156 vocab_file,
157 tokenizer_file=tokenizer_file,
158 bos_token=bos_token,
159 eos_token=eos_token,
160 sep_token=sep_token,
161 cls_token=cls_token,
162 unk_token=unk_token,
163 pad_token=pad_token,
164 mask_token=mask_token,
165 **kwargs,
166 )
168 self.vocab_file = vocab_file
169 self.can_save_slow_tokenizer = False if not self.vocab_file else True
File ~/.local/lib/python3.10/site-packages/transformers/tokenization_utils_fast.py:114, in PreTrainedTokenizerFast.__init__(self, *args, **kwargs)
111 fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
112 elif slow_tokenizer is not None:
113 # We need to convert a slow tokenizer to build the backend
--> 114 fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)
115 elif self.slow_tokenizer_class is not None:
116 # We need to create and convert a slow tokenizer to build the backend
117 slow_tokenizer = self.slow_tokenizer_class(*args, **kwargs)
File ~/.local/lib/python3.10/site-packages/transformers/convert_slow_tokenizer.py:1162, in convert_slow_tokenizer(transformer_tokenizer)
1154 raise ValueError(
1155 f"An instance of tokenizer class {tokenizer_class_name} cannot be converted in a Fast tokenizer instance."
1156 " No converter was found. Currently available slow->fast convertors:"
1157 f" {list(SLOW_TO_FAST_CONVERTERS.keys())}"
1158 )
1160 converter_class = SLOW_TO_FAST_CONVERTERS[tokenizer_class_name]
-> 1162 return converter_class(transformer_tokenizer).converted()
File ~/.local/lib/python3.10/site-packages/transformers/convert_slow_tokenizer.py:438, in SpmConverter.__init__(self, *args)
434 requires_backends(self, "protobuf")
436 super().__init__(*args)
--> 438 from .utils import sentencepiece_model_pb2 as model_pb2
440 m = model_pb2.ModelProto()
441 with open(self.original_tokenizer.vocab_file, "rb") as f:
File ~/.local/lib/python3.10/site-packages/transformers/utils/sentencepiece_model_pb2.py:20
18 from google.protobuf import descriptor as _descriptor
19 from google.protobuf import message as _message
---> 20 from google.protobuf import reflection as _reflection
21 from google.protobuf import symbol_database as _symbol_database
24 # ##protoc_insertion_point(imports)
File /usr/lib/python3/dist-packages/google/protobuf/reflection.py:58
56 from google.protobuf.pyext import cpp_message as message_impl
57 else:
---> 58 from google.protobuf.internal import python_message as message_impl
60 # The type of all Message classes.
61 # Part of the public interface, but normally only used by message factories.
62 GeneratedProtocolMessageType = message_impl.GeneratedProtocolMessageType
File /usr/lib/python3/dist-packages/google/protobuf/internal/python_message.py:69
66 import copyreg as copyreg
68 # We use "as" to avoid name collisions with variables.
---> 69 from google.protobuf.internal import containers
70 from google.protobuf.internal import decoder
71 from google.protobuf.internal import encoder
File /usr/lib/python3/dist-packages/google/protobuf/internal/containers.py:182
177 collections.MutableMapping.register(MutableMapping)
179 else:
180 # In Python 3 we can just use MutableMapping directly, because it defines
181 # __slots__.
--> 182 MutableMapping = collections.MutableMapping
185 class BaseContainer(object):
187 """Base container class."""
AttributeError: module 'collections' has no attribute 'MutableMapping'
I tried several solutions (for example, this and this), but none seem to work.
According to this link, I should change collections.Mapping into collections.abc.Mapping, but I wouldn't knwo where to do it.
Another possible solution is downgrading Python to 3.9, but I would like to keep it as last resort.
How can I fix this?
Turned out it was a problem related to protobuf module. I updated it to the latest version to date (which is 4.21.9).
This changed the error to:
TypeError: Descriptors cannot not be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
1. Downgrade the protobuf package to 3.20.x or lower.
2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).
More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates
So I downgraded protobuf to version 3.20.0 and that worked.
For further details, look here.

STAN on Databricks - AttributeError: 'ConsoleBuffer' object has no attribute 'closed'

Running STAN (pystan) on Databricks 8.2 ML throws the following Error
To reproduce, just run the simple example from https://pystan.readthedocs.io/en/latest/
Seems like the ConsoleBuffer Class doesn't have an implementation for closed? Have others run into this issue? Any workarounds recommended? I am currently using a single node Cluster and ideally don't want to run this on a local machine.
Stack Trace
AttributeError Traceback (most recent call last)
<command-261559943577864> in <module>
3 "sigma": [15, 10, 16, 11, 9, 11, 10, 18]}
4
----> 5 posterior = stan.build(schools_code, data=schools_data)
6 fit = posterior.sample(num_chains=4, num_samples=1000)
7 eta = fit["eta"] # array with shape (8, 4000)
/databricks/python/lib/python3.8/site-packages/stan/model.py in build(program_code, data, random_seed)
468
469 try:
--> 470 return asyncio.run(go())
471 except KeyboardInterrupt:
472 return # type: ignore
/databricks/python/lib/python3.8/asyncio/runners.py in run(main, debug)
41 events.set_event_loop(loop)
42 loop.set_debug(debug)
---> 43 return loop.run_until_complete(main)
44 finally:
45 try:
/databricks/python/lib/python3.8/asyncio/base_events.py in run_until_complete(self, future)
614 raise RuntimeError('Event loop stopped before Future completed.')
615
--> 616 return future.result()
617
618 def stop(self):
/databricks/python/lib/python3.8/site-packages/stan/model.py in go()
438 async def go():
439 io = ConsoleIO()
--> 440 io.error("<info>Building...</info>")
441 async with stan.common.HttpstanClient() as client:
442 # Check to see if model is in cache.
/databricks/python/lib/python3.8/site-packages/clikit/api/io/io.py in error(self, string, flags)
84 The string is formatted before it is written to the output.
85 """
---> 86 self._error_output.write(string, flags=flags)
87
88 def error_line(self, string, flags=None): # type: (str, Optional[int]) -> None
/databricks/python/lib/python3.8/site-packages/clikit/api/io/output.py in write(self, string, flags, new_line)
59 formatted += "\n"
60
---> 61 self._stream.write(to_str(formatted))
62
63 def write_line(self, string, flags=None): # type: (str, Optional[int]) -> None
/databricks/python/lib/python3.8/site-packages/clikit/io/output_stream/stream_output_stream.py in write(self, string)
19 Writes a string to the stream.
20 """
---> 21 if self.is_closed():
22 raise io.UnsupportedOperation("Cannot write to a closed input.")
23
/databricks/python/lib/python3.8/site-packages/clikit/io/output_stream/stream_output_stream.py in is_closed(self)
114 Returns whether the stream is closed.
115 """
--> 116 return self._stream.closed
AttributeError: 'ConsoleBuffer' object has no attribute 'closed'
After trying some old clusters, I realized that pystan 3 is a complete re-write. So one workaround is to go back to pystan==2.19.1.1

Cytoscape: How do you import ABC file types with py2cytoscape's cyrest api?

I have a file of the type:
A B 0.123
A C 0.84
B D 0.52
...
Where the data are tab separated, and the first and second columns are the nodes, and the third is the associated edge weight.
When trying to import this file into cytoscape using py2cytoscape, I'm receiving an error:
from py2cytoscape import cyrest
fileName="/Users/96v/Documents/lco/lcoAllAt25/lcoAll25/lcoAll25_top0.041pct_data/lcoAll25_top0.041pct.txt"
cyclient = cyrest.cyclient()
cyclient.network.import_file(dataTypeList='string,string,double',
afile=fileName,
delimiters='\t',
indexColumnSourceInteraction="0",
indexColumnTargetInteraction="1",
verbose=True)
'http://localhost:1234/v1/commands/network/import file'
TypeError Traceback (most recent call last)
in
----> 1 cyclient.network.import_file(dataTypeList='string,string,double', afile=fileName, delimiters='\t', indexColumnSourceInteraction="0", indexColumnTargetInteraction="1", defaultInteraction="Edge Attribute",verbose=True)
2
~/opt/anaconda3/lib/python3.8/site-packages/py2cytoscape/cyrest/network.py in import_file(self, dataTypeList, defaultInteraction, delimiters, delimitersForDataList, afile, firstRowAsColumnNames, indexColumnSourceInteraction, indexColumnTargetInteraction, indexColumnTypeInteraction, NetworkViewRendererList, RootNetworkList, startLoadRow, TargetColumnList, verbose)
464 afile,firstRowAsColumnNames,indexColumnSourceInteraction,indexColumnTargetInteraction,
465 indexColumnTypeInteraction,NetworkViewRendererList,RootNetworkList,startLoadRow,TargetColumnList])
--> 466 response=api(url=self.__url+"/import file", PARAMS=PARAMS, method="POST", verbose=verbose)
467 return response
468
~/opt/anaconda3/lib/python3.8/site-packages/py2cytoscape/cyrest/base.py in api(namespace, command, PARAMS, body, host, port, version, method, verbose, url, parse_params)
139 sys.stdout.flush()
140 r = requests.post(url = baseurl, json = PARAMS)
--> 141 verbose_=checkresponse(r, verbose=verbose)
142 if (verbose) or (verbose_):
143 verbose=True
~/opt/anaconda3/lib/python3.8/site-packages/py2cytoscape/cyrest/base.py in checkresponse(r, verbose)
43 if 200 <= status < 300:
44 if verbose:
---> 45 print("response status "+status)
46 sys.stdout.flush()
47 res=None
TypeError: can only concatenate str (not "int") to str
The edge weights aren't being recognized, yet the documentation isn't as verbose for this function.
Any help would be extremely appreciated!
After looking further at the GUI, I realized:
Columns are not 0 indexed.
Verbose has an error in it.
The below code works fine:
from py2cytoscape import cyrest
fileName="pathToFile"
cyclient = cyrest.cyclient()
collection = cyclient.network.import_file(dataTypeList='string,string,double',
afile=fileName,
delimiters='\t',
indexColumnSourceInteraction="1",
indexColumnTargetInteraction="2",
defaultInteraction="interacts with")

Getting TypeError: can't pickle SSLContext objects in Using Ray

I am trying to experiment with the Ray library for parallel processing some of my functions to get output faster. In my local machine, it works ok in my cloud instance it is showing error
TypeError Traceback (most recent call last)
<ipython-input-14-1941686e1604> in <module>
4 # datalist=f1.result()
5
----> 6 datalist_rayval=Customer_Merchant_value_pass.remote(customerlist)
7 #datalist=ray.get(datalist_rayval)
8
~/anaconda3/lib/python3.7/site-packages/ray/remote_function.py in _remote_proxy(*args, **kwargs)
93 #wraps(function)
94 def _remote_proxy(*args, **kwargs):
---> 95 return self._remote(args=args, kwargs=kwargs)
96
97 self.remote = _remote_proxy
~/anaconda3/lib/python3.7/site-packages/ray/remote_function.py in _remote(self, args, kwargs, num_return_vals, is_direct_call, num_cpus, num_gpus, memory, object_store_memory, resources, max_retries)
168 # first driver. This is an argument for repickling the function,
169 # which we do here.
--> 170 self._pickled_function = pickle.dumps(self._function)
171
172 self._function_descriptor = PythonFunctionDescriptor.from_function(
~/anaconda3/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py in dumps(obj, protocol, buffer_callback)
70 cp = CloudPickler(file, protocol=protocol,
71 buffer_callback=buffer_callback)
---> 72 cp.dump(obj)
73 return file.getvalue()
74
~/anaconda3/lib/python3.7/site-packages/ray/cloudpickle/cloudpickle_fast.py in dump(self, obj)
615 def dump(self, obj):
616 try:
--> 617 return Pickler.dump(self, obj)
618 except RuntimeError as e:
619 if "recursion" in e.args[0]:
TypeError: can't pickle SSLContext objects
My Ray decorated code is
#ray.remote
def Prefer_Attachment_query2(listval):
customer_wallet=listval[0]
merchant_wallet=listval[1]
#print(x,y)
prefquery="""MATCH (p1:CUSTOMER {WALLETID: '%s'})
MATCH (p2:MERCHANT {WALLETID: '%s'})
RETURN gds.alpha.linkprediction.preferentialAttachment(p1, p2,{relationshipQuery: "PAYMENT"}) as score"""%(customer_wallet,merchant_wallet)
#print(prefquery)
return prefquery
from timeit import default_timer as timer
import itertools
#ray.remote
def Customer_Merchant_value_pass(text):
minicustomer=text
begin=timer()
sum_val=0
list_avg_score=[]
list_category_val=[]
dict_list=[]
#Avg_score=0
with graphdriver.session()as session:
for i in itertools.islice(minicustomer,len(minicustomer)):
for key in list_of_unique_merchants:
print("Here at list_of_unique_merchants customer value is ",i)
print("BMCC_Code",key)
valuelist=list_of_unique_merchants[key]
#print("Uniquelistfor:",key,valuelist)
for j in valuelist:
#print("list len",len(valuelist))
#print("Here the iner of value list ",i)
#print("--------------------------------")
#print([i,j])
pref_attach_score_rayvalue=Prefer_Attachment_query2.remote([i,j])
pref_attach_score=ray.get(pref_attach_score_rayvalue)
#print(pref_attach_score)
result=session.run(pref_attach_score)
for line in result:
#print(line["score"])
sum_val=sum_val+line["score"]
#Avg_score=sum_val/len(valuelist)
Totalsumval=sum_val
print("Totalsum",Totalsumval)
Avg_score=sum_val/len(valuelist)
print("Avg_score",Avg_score)
sum_val=0
list_avg_score.append(Avg_score)
list_category_val.append(key)
avg_score_list=list_avg_score
category_list=list_category_val
#print("sumval is now",sum_val)
#print(result)
max_dictionary =MaxValue_calc(i,category_list,avg_score_list)
#MaxValue_calc(i,category_list,avg_score_list)
print("max_dicitionary",max_dictionary)
dict_list.append(max_dictionary)
rowlist=dict_list
print('appended list',rowlist)
print('process',len(rowlist))
#dict_list=[]
list_avg_score=[]
list_category_val=[]
#print("rowlist", rowlist)
#print("list_category_val is now",list_category_val)
#print("for",i," category AVG scores is now ",category_list)
#print("list_avg_score is now",list_avg_score)
#print("for",i," category AVG scores is now ",avg_score_list)
session.close()
end=timer()
print("Total time :",(end-begin))
return rowlist
datalist_rayval=Customer_Merchant_value_pass.remote(customerlist)
datalist=ray.get(datalist_rayval)
why I am getting this error. and kindly help me to solve this

How do you use dask + distributed for NFS files?

Working from Matthew Rocklin's post on distributed data frames with Dask, I'm trying to distribute some summary statistics calculations across my cluster. Setting up the cluster with dcluster ... works fine. Inside a notebook,
import dask.dataframe as dd
from distributed import Executor, progress
e = Executor('...:8786')
df = dd.read_csv(...)
The file I'm reading is on an NFS mount that all the worker machines have access to. At this point I can look at df.head() for example and everything looks correct. From the blog post, I think I should be able to do this:
df_future = e.persist(df)
progress(df_future)
# ... wait for everything to load ...
df_future.head()
But that's an error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-26-8d59adace8bf> in <module>()
----> 1 fraudf.head()
/work/analytics2/analytics/python/envs/analytics/lib/python3.5/site-packages/dask/dataframe/core.py in head(self, n, compute)
358
359 if compute:
--> 360 result = result.compute()
361 return result
362
/work/analytics2/analytics/python/envs/analytics/lib/python3.5/site-packages/dask/base.py in compute(self, **kwargs)
35
36 def compute(self, **kwargs):
---> 37 return compute(self, **kwargs)[0]
38
39 #classmethod
/work/analytics2/analytics/python/envs/analytics/lib/python3.5/site-packages/dask/base.py in compute(*args, **kwargs)
108 for opt, val in groups.items()])
109 keys = [var._keys() for var in variables]
--> 110 results = get(dsk, keys, **kwargs)
111
112 results_iter = iter(results)
/work/analytics2/analytics/python/envs/analytics/lib/python3.5/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, **kwargs)
55 results = get_async(pool.apply_async, len(pool._pool), dsk, result,
56 cache=cache, queue=queue, get_id=_thread_get_id,
---> 57 **kwargs)
58
59 return results
/work/analytics2/analytics/python/envs/analytics/lib/python3.5/site-packages/dask/async.py in get_async(apply_async, num_workers, dsk, result, cache, queue, get_id, raise_on_exception, rerun_exceptions_locally, callbacks, **kwargs)
479 _execute_task(task, data) # Re-execute locally
480 else:
--> 481 raise(remote_exception(res, tb))
482 state['cache'][key] = res
483 finish_task(dsk, key, state, results, keyorder.get)
AttributeError: 'Future' object has no attribute 'head'
Traceback
---------
File "/work/analytics2/analytics/python/envs/analytics/lib/python3.5/site-packages/dask/async.py", line 264, in execute_task
result = _execute_task(task, data)
File "/work/analytics2/analytics/python/envs/analytics/lib/python3.5/site-packages/dask/async.py", line 246, in _execute_task
return func(*args2)
File "/work/analytics2/analytics/python/envs/analytics/lib/python3.5/site-packages/dask/dataframe/core.py", line 354, in <lambda>
dsk = {(name, 0): (lambda x, n: x.head(n=n), (self._name, 0), n)}
What's the right approach to distributing a data frame when it comes from a normal file system instead of HDFS?
Dask is trying to use the single-machine scheduler, which is the default if you create a dataframe using the normal dask library. Switch the default to use your cluster with the following lines:
import dask
dask.set_options(get=e.get)

Resources