I am trying to load an existing MODFLOW-USG model with FloPy (Windows environment). The model has a Voronoi mesh, and this seems to trip the "load" function:
m1=flopy.modflow.Modflow.load(model_name+".nam",model_ws=model_dir,verbose=True,check=False,exe_name="mfusg.exe",version='mfusg')
I get the following error, which appears to relate to the fact that FloPy is expecting a structured grid with rows and columns:
TypeError Traceback (most recent call last)
<ipython-input-33-62420c415719> in <module>
6 head_file = os.path.join(model_dir,model_name+'.hds')
7 print(head_file)
----> 8 m1=flopy.modflow.Modflow.load(model_name+".nam",model_ws=model_dir,verbose=True,check=False,exe_name="mfusg.exe",version='mfusg')
9 headobj = bf.HeadUFile(head_file,verbose=True,text='HEADU')
10 headobj.list_records()
~\Anaconda3\lib\site-packages\flopy\modflow\mf.py in load(f, version, exe_name, verbose, model_ws, load_only, forgive, check)
797 item.package.load(item.filehandle, ml,
798 ext_unit_dict=ext_unit_dict,
--> 799 check=False)
800 else:
801 item.package.load(item.filehandle, ml,
~\Anaconda3\lib\site-packages\flopy\modflow\mfrch.py in load(f, model, nper, ext_unit_dict, check)
408 print(txt)
409 t = Util2d.load(f, model, (nrow, ncol), np.float32, 'rech',
--> 410 ext_unit_dict)
411 else:
412 parm_dict = {}
~\Anaconda3\lib\site-packages\flopy\utils\util_array.py in load(f_handle, model, shape, dtype, name, ext_unit_dict, array_free_format, array_format)
2699
2700 elif cr_dict['type'] == 'internal':
-> 2701 data = Util2d.load_txt(shape, f_handle, dtype, cr_dict['fmtin'])
2702 u2d = Util2d(model, shape, dtype, data, name=name,
2703 iprn=cr_dict['iprn'], fmtin="(FREE)",
~\Anaconda3\lib\site-packages\flopy\utils\util_array.py in load_txt(shape, file_in, dtype, fmtin)
2376 elif len(shape) == 2:
2377 nrow, ncol = shape
-> 2378 num_items = nrow * ncol
2379 else:
2380 raise ValueError(
TypeError: unsupported operand type(s) for *: 'NoneType' and 'int'
I could not find any documentation or Jupyter notebooks with examples of loading an existing model with Voronoi mesh, only creating new triangular meshes or structured / local-grid-refined grids.
Try the code with forgive = True.
m1=flopy.modflow.Modflow.load(model_name+".nam",model_ws=model_dir,verbose=True,check=False,exe_name="mfusg.exe",version='mfusg', forgive = True)
Related
I am trying to replicate Keras-LSTM DeepExplainer example. I am getting the following error when trying to compute the shap values:
This warning: keras is no longer supported, please use tf.keras instead.
Your TensorFlow version is newer than 2.4.0 and so graph support has been removed in eager mode and some static graphs may not be supported. See PR #1483 for discussion.
And this error:
TypeError Traceback (most recent call last)
in
1 import shap
2 explainer = shap.DeepExplainer(model, x_train[:100])
----> 3 shap_values = explainer.shap_values(x_test[:10])
~/miniconda3/envs/mtq/lib/python3.8/site-packages/shap/explainers/_deep/init.py
in shap_values(self, X, ranked_outputs, output_rank_order,
check_additivity)
122 were chosen as "top".
123 """
--> 124 return self.explainer.shap_values(X, ranked_outputs, output_rank_order, check_additivity=check_additivity)
~/miniconda3/envs/mtq/lib/python3.8/site-packages/shap/explainers/_deep/deep_tf.py
in shap_values(self, X, ranked_outputs, output_rank_order,
check_additivity)
306 # run attribution computation graph
307 feature_ind = model_output_ranks[j,i]
--> 308 sample_phis = self.run(self.phi_symbolic(feature_ind), self.model_inputs,
joint_input) 309
310 # assign the attributions to the right part of the output arrays
~/miniconda3/envs/mtq/lib/python3.8/site-packages/shap/explainers/_deep/deep_tf.py
in run(self, out, model_inputs, X)
363
364 return final_out
--> 365 return self.execute_with_overridden_gradients(anon)
366
367 def custom_grad(self, op, *grads):
~/miniconda3/envs/mtq/lib/python3.8/site-packages/shap/explainers/_deep/deep_tf.py
in execute_with_overridden_gradients(self, f)
399 # define the computation graph for the attribution values using a custom gradient-like computation
400 try:
--> 401 out = f()
402 finally:
403 # reinstate the backpropagatable check
~/miniconda3/envs/mtq/lib/python3.8/site-packages/shap/explainers/_deep/deep_tf.py
in anon()
356 shape = list(self.model_inputs[i].shape)
357 shape[0] = -1
--> 358 data = X[i].reshape(shape)
359 v = tf.constant(data, dtype=self.model_inputs[i].dtype)
360 inputs.append(v)
TypeError: 'NoneType' object cannot be interpreted as an integer
I have checked out the PR#1483, but couldn't find a relevant fix there. Please suggest on what tensorflow, keras, and shap versions are needed to successfully replicate the example.
I have a strange problem relating to a topic model I am running with BERTopic. The model runs without any errors in Colab and vscode venv. However, when I run the same model in Jupyter Notebook using the same venv as I have in the vscode venv, the model returns an error, half-way through the run.
The error is below:
TypeError Traceback (most recent call last)
<timed exec> in <module>
c:\python\python39\lib\site-packages\bertopic\_bertopic.py in fit_transform(self, documents, embeddings, y)
285 # Reduce dimensionality with UMAP
286 if self.seed_topic_list is not None and self.embedding_model is not None:
--> 287 y, embeddings = self._guided_topic_modeling(embeddings)
288 umap_embeddings = self._reduce_dimensionality(embeddings, y)
289
c:\python\python39\lib\site-packages\bertopic\_bertopic.py in _guided_topic_modeling(self, embeddings)
1424 for seed_topic in range(len(seed_topic_list)):
1425 indices = [index for index, topic in enumerate(y) if topic == seed_topic]
-> 1426 embeddings[indices] = np.average([embeddings[indices], seed_topic_embeddings[seed_topic]], weights=[3, 1])
1427 return y, embeddings
1428
<__array_function__ internals> in average(*args, **kwargs)
c:\python\python39\lib\site-packages\numpy\lib\function_base.py in average(a, axis, weights, returned)
405 wgt = wgt.swapaxes(-1, axis)
406
--> 407 scl = wgt.sum(axis=axis, dtype=result_dtype)
408 if np.any(scl == 0.0):
409 raise ZeroDivisionError(
c:\python\python39\lib\site-packages\numpy\core\_methods.py in _sum(a, axis, dtype, out, keepdims, initial, where)
45 def _sum(a, axis=None, dtype=None, out=None, keepdims=False,
46 initial=_NoValue, where=True):
---> 47 return umr_sum(a, axis, dtype, out, keepdims, initial, where)
48
49 def _prod(a, axis=None, dtype=None, out=None, keepdims=False,
TypeError: No loop matching the specified signature and casting was found for ufunc add
Not sure what the source of the error could be, since the same code works in Colab and vscode venv. Any pointers in the right direction would be greatly appreciated.
Data looks like this :
data_clean2.head(3)
text target
0 [deed, reason, earthquak, may, allah, forgiv, u] 1
1 [forest, fire, near, la, rong, sask, canada] 1
2 [resid, ask, shelter, place, notifi, offic, evacu, shelter, place, order, expect] 1
I got this by stemming and lemmatizing the sentence and tokenizing before that. ( Hope that is right).
Now I want to use:
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(data_clean2['text'])
It gives me the following error :
AttributeError Traceback (most recent call last)
<ipython-input-140-6f68d1115c5f> in <module>
1 vectorizer = TfidfVectorizer()
----> 2 vectors = vectorizer.fit_transform(data_clean2['text'])
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in fit_transform(self, raw_documents, y)
1650 """
1651 self._check_params()
-> 1652 X = super().fit_transform(raw_documents)
1653 self._tfidf.fit(X)
1654 # X is already a transformed view of raw_documents so
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in fit_transform(self, raw_documents, y)
1056
1057 vocabulary, X = self._count_vocab(raw_documents,
-> 1058 self.fixed_vocabulary_)
1059
1060 if self.binary:
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _count_vocab(self, raw_documents, fixed_vocab)
968 for doc in raw_documents:
969 feature_counter = {}
--> 970 for feature in analyze(doc):
971 try:
972 feature_idx = vocabulary[feature]
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(doc)
350 tokenize)
351 return lambda doc: self._word_ngrams(
--> 352 tokenize(preprocess(self.decode(doc))), stop_words)
353
354 else:
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(x)
254
255 if self.lowercase:
--> 256 return lambda x: strip_accents(x.lower())
257 else:
258 return strip_accents
AttributeError: 'list' object has no attribute 'lower'
I know that I somehow cannot use it on the list, so what is my play here, trying to return the list into a string again?
Yes, first convert to string using:
data_clean2['text'] = data_clean2['text'].apply(', '.join)
Then use:
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(data_clean2['text'])
v = pd.DataFrame(vectors.toarray(), columns = vectorizer.get_feature_names())
So I have a dataframe X which looks something like this:
X.head()
0 My wife took me here on my birthday for breakf...
1 I have no idea why some people give bad review...
3 Rosie, Dakota, and I LOVE Chaparral Dog Park!!...
4 General Manager Scott Petello is a good egg!!!...
6 Drop what you're doing and drive here. After I...
Name: text, dtype: object
And then,
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()
X = cv.fit_transform(X)
But I get this error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-61-8ff79b91e317> in <module>()
----> 1 X = cv.fit_transform(X)
~/anaconda3/lib/python3.6/site-packages/sklearn/feature_extraction/text.py in fit_transform(self, raw_documents, y)
867
868 vocabulary, X = self._count_vocab(raw_documents,
--> 869 self.fixed_vocabulary_)
870
871 if self.binary:
~/anaconda3/lib/python3.6/site-packages/sklearn/feature_extraction/text.py in _count_vocab(self, raw_documents, fixed_vocab)
790 for doc in raw_documents:
791 feature_counter = {}
--> 792 for feature in analyze(doc):
793 try:
794 feature_idx = vocabulary[feature]
~/anaconda3/lib/python3.6/site-packages/sklearn/feature_extraction/text.py in <lambda>(doc)
264
265 return lambda doc: self._word_ngrams(
--> 266 tokenize(preprocess(self.decode(doc))), stop_words)
267
268 else:
~/anaconda3/lib/python3.6/site-packages/sklearn/feature_extraction/text.py in <lambda>(x)
230
231 if self.lowercase:
--> 232 return lambda x: strip_accents(x.lower())
233 else:
234 return strip_accents
~/anaconda3/lib/python3.6/site-packages/scipy/sparse/base.py in __getattr__(self, attr)
574 return self.getnnz()
575 else:
--> 576 raise AttributeError(attr + " not found")
577
578 def transpose(self, axes=None, copy=False):
AttributeError: lower not found
No idea why.
You need to specify the column name of the text data even if the dataframe has single column.
X_countMatrix = cv.fit_transform(X['text'])
Because a CountVectorizer expects an iterable as input and when you supply a dataframe as an argument, only thing thats iterated is the column names. So even if you did not have any errors, that would be incorrect. Lucky that you got an error and got a chance to correct it.
I am working on non-Engish corpus analysis but facing several problems. One of those problems is tfidf_vectorizer. After importing concerned liberaries, I processed following code to get results
contents = [open("D:\test.txt", encoding='utf8').read()]
#define vectorizer parameters
tfidf_vectorizer = TfidfVectorizer(max_df=0.8, max_features=200000,
min_df=0.2, stop_words=stopwords,
use_idf=True, tokenizer=tokenize_and_stem, ngram_range=(3,3))
%time tfidf_matrix = tfidf_vectorizer.fit_transform(contents)
print(tfidf_matrix.shape)
After processing above code I got following error message.
ValueError Traceback (most recent call last)
<ipython-input-144-bbcec8b8c065> in <module>()
5 use_idf=True, tokenizer=tokenize_and_stem, ngram_range=(3,3))
6
----> 7 get_ipython().magic('time tfidf_matrix = tfidf_vectorizer.fit_transform(contents) #fit the vectorizer to synopses')
8
9 print(tfidf_matrix.shape)
C:\Users\mazhar\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in magic(self, arg_s)
2156 magic_name, _, magic_arg_s = arg_s.partition(' ')
2157 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2158 return self.run_line_magic(magic_name, magic_arg_s)
2159
2160 #-------------------------------------------------------------------------
C:\Users\mazhar\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_line_magic(self, magic_name, line)
2077 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2078 with self.builtin_trap:
-> 2079 result = fn(*args,**kwargs)
2080 return result
2081
<decorator-gen-60> in time(self, line, cell, local_ns)
C:\Users\mazhar\Anaconda3\lib\site-packages\IPython\core\magic.py in <lambda>(f, *a, **k)
186 # but it's overkill for just that one bit of state.
187 def magic_deco(arg):
--> 188 call = lambda f, *a, **k: f(*a, **k)
189
190 if callable(arg):
C:\Users\mazhar\Anaconda3\lib\site-packages\IPython\core\magics\execution.py in time(self, line, cell, local_ns)
1178 else:
1179 st = clock2()
-> 1180 exec(code, glob, local_ns)
1181 end = clock2()
1182 out = None
<timed exec> in <module>()
C:\Users\mazhar\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in fit_transform(self, raw_documents, y)
1303 Tf-idf-weighted document-term matrix.
1304 """
-> 1305 X = super(TfidfVectorizer, self).fit_transform(raw_documents)
1306 self._tfidf.fit(X)
1307 # X is already a transformed view of raw_documents so
C:\Users\mazhar\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in fit_transform(self, raw_documents, y)
836 max_doc_count,
837 min_doc_count,
--> 838 max_features)
839
840 self.vocabulary_ = vocabulary
C:\Users\mazhar\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _limit_features(self, X, vocabulary, high, low, limit)
731 kept_indices = np.where(mask)[0]
732 if len(kept_indices) == 0:
--> 733 raise ValueError("After pruning, no terms remain. Try a lower"
734 " min_df or a higher max_df.")
735 return X[:, kept_indices], removed_terms
ValueError: After pruning, no terms remain. Try a lower min_df or a higher max_df.
If I change then min and max value the error is
Assuming your tokeniser works as expected, I see two problems with your code. First, TfIdfVectorizer expects a list of strings, whereas you are providing a single string. Second, min_df=0.2 is quite high- to be included, a term needs to occur in 20% of all documents, which is very unlikely for trigram features.
The following works for me
from sklearn.feature_extraction.text import TfidfVectorizer
with open("README.md") as infile:
contents = infile.readlines() # Note: readlines() instead of read()
tfidf_vectorizer = TfidfVectorizer(max_df=0.8, max_features=200000,
min_df=2, use_idf=True, ngram_range=(3,3))
# note: minimum of 2 occurrences, rather than 0.2 (20% of all documents)
tfidf_matrix = tfidf_vectorizer.fit_transform(contents)
print(tfidf_matrix.shape)
outputs (155, 28)