I am trying to train my model using glove. My code is as below:
#!/usr/bin/env python3
from __future__ import print_function
import argparse
import pprint
import gensim
from glove import Glove
from tensorflow.python.keras.utils.data_utils import Sequence
def read_corpus(filename):
delchars = [chr(c) for c in range(256)]
delchars = [x for x in delchars if not x.isalnum()]
delchars.remove(' ')
delchars = ''.join(delchars)
with open(filename, 'r') as datafile:
for line in datafile:
yield line.lower().translate(None, delchars).split(' ')
if __name__ == '__main__':
base_path = "/home/hunzala_awan/vocab.pubmed1.txt"
get_data = read_corpus(base_path)
glove = Glove(no_components=100, learning_rate=0.05)
glove.fit(get_data, epochs=10, verbose=True)
pprint.pprint(glove.most_similar("cancer", number=10))
When I try to run this code, I get the following error:
Traceback (most recent call last):
File "mytest3.py", line 36, in
glove.fit(get_data, epochs=10, verbose=True)
File "/usr/local/lib/python3.5/dist-packages/glove/glove.py", line 86, in fit
shape = matrix.shape
AttributeError: 'generator' object has no attribute 'shape'
What am I missing? Any help in this issue will be highly appreciated.
Thanks in advance
I'm not familiar with Glove, but it seems that it can't fit from genberator function. You can try yield it ahead-of-time and convert to list (it will be more memory-consuming):
glove.fit(list(get_data), epochs=10, verbose=True)
Related
Goal: instantiate unet_learner() using weights.
weights is a str that I bring in from a user-defined .yaml file; hence eval().
file_path and training are classes that hold parameters.
Code:
import numpy as np
from fastai.vision.all import *
def train(dls, file_path, training):
labels = np.loadtxt(file_path.labels, dtype=str)
weights = torch.tensor(eval(training.weights))
print('#################')
print(weights)
print(type(weights))
print('#################')
learner = unet_learner(dls, training.architecture,loss_func=CrossEntropyLossFlat(
axis=1,
weight=weights)
)
return learner.load(file_path.weights)
Placing torch.tensor() around weights again in the parameter line doesn't help. Same error.
Traceback:
(venv) me#ubuntu-pcs:~/PycharmProjects/project$ python pdl1_lung_train/main.py
/home/me/miniconda3/envs/venv/lib/python3.7/site-packages/torch/cuda/__init__.py:52: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at /opt/conda/conda-bld/pytorch_1607370156314/work/c10/cuda/CUDAFunctions.cpp:100.)
return torch._C._cuda_getDeviceCount() > 0
#################
tensor([0.4000, 0.9000])
<class 'torch.Tensor'>
#################
Traceback (most recent call last):
File "pdl1_lung_train/main.py", line 27, in <module>
main(ROOT)
File "pdl1_lung_train/main.py", line 19, in main
learner = train(dls, file_path, training)
File "/home/me/PycharmProjects/project/pdl1_lung_train/train.py", line 16, in train
weight=weights))
File "/home/me/miniconda3/envs/venv/lib/python3.7/site-packages/fastai/vision/learner.py", line 267, in unet_learner
model = create_unet_model(arch, n_out, img_size, pretrained=pretrained, **kwargs)
File "/home/me/miniconda3/envs/venv/lib/python3.7/site-packages/fastai/vision/learner.py", line 243, in create_unet_model
model = arch(pretrained)
TypeError: 'str' object is not callable
Please let me know if I need to add other info. to post.
I might be wrong but I think your training.architecture is a string. But according to unet_learner documentation it has to be callable.
Recently, I tried to use python to run a code in github. The code is located in: https://github.com/costapt/vess2ret
and I used the following softwares
TensorFlow-gpu-2.0-beta1
Keras :2.2.4
OS:Windows 10
python:3.5
CUDA:10.0
cuDNN:10.0
And I met the same problem already showed by some guys online. That is:
AttributeError: module 'tensorflow' has no attribute 'get_default_graph'
After I tried to change the code as the following
from keras import backend
change to:
from tensorflow.keras import backend
I met another problem. That is:
AttributeError: module 'tensorflow.python.keras.api._v2.keras.backend' has no attribute 'set_image_dim_ordering'
And I don't know what to do now
#Here are some codes which are related to the problem above.
import os
import keras
from tensorflow.keras import backend as K
from keras import objectives
from keras.layers import Input, merge
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import Convolution2D, Deconvolution2D
from keras.layers.core import Activation, Dropout
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.optimizers import Adam
KERAS_2 = keras.__version__[0] == '2'
try:
# keras 2 imports
from keras.layers.convolutional import Conv2DTranspose
from keras.layers.merge import Concatenate
except ImportError:
print("keras 2 layers could not be imported defaulting to keras1")
KERAS_2 = False
K.set_image_dim_ordering('th') #here is where the problem occurs at
#The first problem.
Traceback (most recent call last):
File "C:\zzProject_ML\vess2ret-master\train.py", line 326, in <module>
batch_size=params.batch_size, is_binary=params.is_b_binary)
File "C:\zzProject_ML\vess2ret-master\models.py", line 378, in g_unet
i = Input(shape=(in_ch, 512, 512))
File "C:\Users\10580\Anaconda3\envs\project_ML\lib\site-packages\keras\engine\input_layer.py", line 178, in Input
input_tensor=tensor)
File "C:\Users\10580\Anaconda3\envs\project_ML\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\10580\Anaconda3\envs\project_ML\lib\site-packages\keras\engine\input_layer.py", line 39, in __init__
name = prefix + '_' + str(K.get_uid(prefix))
File "C:\Users\10580\Anaconda3\envs\project_ML\lib\site-packages\keras\backend\tensorflow_backend.py", line 74, in get_uid
graph = tf.get_default_graph()
AttributeError: module 'tensorflow' has no attribute 'get_default_graph'
#The second problem.
Using TensorFlow backend.
Traceback (most recent call last):
File "C:\zzProject_ML\vess2ret-master\train.py", line 7, in <module>
import models as m
File "C:\zzProject_ML\vess2ret-master\models.py", line 25, in <module>
K.set_image_dim_ordering('th')
AttributeError: module 'tensorflow.python.keras.api._v2.keras.backend' has no attribute 'set_image_dim_ordering'
You are mixing tf.keras and keras in your imports (they aren't compatible), and keras does not currently support tensorflow 2.0 (no stable version has been released).
If you have to use tensorflow 2.0, then you have to use tf.keras included in that version. If you want to use keras, then you need to downgrade to a stable tensorflow version.
Instead of calling the optimizer as the function call it by name only like
tensorflow == 2.11.0
model.compile(loss=keras.metrics.categorical_crossentropy, optimizer=keras.optimizer.Adam(), metrics=['accuracy']) -- change the way of calling optimizer
Use the below
model.compile(loss=keras.metrics.categorical_crossentropy, optimizer="Adam", metrics=['accuracy'])
I'm trying to split a process that takes a long time to multiple processes using concurrent.futures module. Attached is the code below
Main function:
with concurrent.futures.ProcessPoolExecutor() as executor:
for idx, score in zip([idx for idx in range(dataframe.shape[0])],executor.map(get_max_fuzzy_score,[dataframe[idx:idx+1] for idx in range(dataframe.shape[0])])):
print('processing '+str(idx+1)+' of '+str(dataframe.shape[0]+1))
dataframe['max_row_score'].iloc[idx] = score
get_max_fuzzy_score function:
def get_max_fuzzy_score(picklepath_or_list, df):
import numpy as np
extracted_text_columns = list(df.filter(regex='extracted_text').columns)
data_list = [df[data].iloc[0] for data in extracted_text_columns if not df[data].isnull().values.any()]
try:
size = len(picklepath_or_list)
section_snippet_list = picklepath_or_list
except:
section_snippet_list = pickle.load(open(picklepath_or_list,'rb'))
scores = []
for section_snippet in section_snippet_list:
for data in data_list:
scores.append(fuzz.partial_ratio(data,section_snippet))
score = max(scores)
return score
The function takes values of a few columns and returns the max fuzzy score from a list that is built previously.
Here's the error I get:
Traceback (most recent call last):
File "multiprocessing.py", line 8, in <module>
import concurrent.futures
File "/home/naveen/anaconda3/lib/python3.6/concurrent/futures/__init__.py", line 17, in <module>
from concurrent.futures.process import ProcessPoolExecutor
File "/home/naveen/anaconda3/lib/python3.6/concurrent/futures/process.py", line 53, in <module>
import multiprocessing
File "/home/naveen/Documents/pramata-ie/data-science/scripts/multiprocessing.py", line 79, in <module>
with concurrent.futures.ProcessPoolExecutor() as executor:
AttributeError: module 'concurrent' has no attribute 'futures'
You can import it this way:
import concurrent.futures
and use it this way:
executor = concurrent.futures.ThreadPoolExecutor(max_workers=num_workers)
You can also import ThreadPoolExecutor this way:
from concurrent.futures.thread import ThreadPoolExecutor
and use it this way:
executor = ThreadPoolExecutor(max_workers=num_workers)
Don't name your python file as threading.py or multiprocessing.py
Why am I getting this error, please help.
I am newbie to machine learning.
This is my code and here I've applied lemmatization on 20 newsgroups dataset.
This code aims to get the 500 words with highest counts while applying filtering.
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.datasets import fetch_20newsgroups
from nltk.corpus import names
from nltk.stem import WordNetLemmatizer
def letters_only(astr):
return astr.isalpha()
cv = CountVectorizer(stop_words="english", max_features=500)
groups = fetch_20newsgroups()
cleaned = []
all_names = set(names.words())
lemmatizer = WordNetLemmatizer()
for post in groups.data:
cleaned.append(' '.join([lemmatizer.lemmatize(word.lower()
for word in post.split()
if letters_only(word) and word not in all_names)]))
transformed = cv.fit_transform(cleaned)
print(cv.get_feature_names())
Error:
Traceback (most recent call last):
File "<ipython-input-91-7158a74bae71>", line 18, in <module>
for word in post.split()
File "C:\Program Files\Anaconda3\lib\site-packages\nltk\stem\wordnet.py", line 40, in lemmatize
lemmas = wordnet._morphy(word, pos)
File "C:\Program Files\Anaconda3\lib\site-packages\nltk\corpus\reader\wordnet.py", line 1712, in _morphy
forms = apply_rules([form])
File "C:\Program Files\Anaconda3\lib\site-packages\nltk\corpus\reader\wordnet.py", line 1692, in apply_rules
for form in forms
File "C:\Program Files\Anaconda3\lib\site-packages\nltk\corpus\reader\wordnet.py", line 1694, in <listcomp>
if form.endswith(old)]
AttributeError: 'generator' object has no attribute 'endswith'
I'm not sure why, but turning that for loop one liner into regular for loop solved the problem;
for post in groups.data:
for word in post.split():
if letters_only(word) and word not in all_names:
cleaned.append(' '.join([lemmatizer.lemmatize(word.lower())]))
Hello I am making the following experiment, first I created a vectorizer called: tfidf:
tfidf_vectorizer = TfidfVectorizer(min_df=10,ngram_range=(1,3),analyzer='word',max_features=500)
Then I vectorized the following list:
tfidf = tfidf_vectorizer.fit_transform(listComments)
My list of comments looks as follows:
listComments = ["hello this is a test","the car is red",...]
I tried to save the model as follows:
#Saving tfidf
with open('vectorizerTFIDF.pickle','wb') as idxf:
pickle.dump(tfidf, idxf, pickle.HIGHEST_PROTOCOL)
I would like to use my vectorizer to apply the same tfidf to the following list:
lastComment = ["this is a car"]
Opening Model:
with open('vectorizerTFIDF.pickle', 'rb') as infile:
tdf = pickle.load(infile)
vector = tdf.transform(lastComment)
However I am getting:
Traceback (most recent call last):
File "C:/Users/LDA_test/ldaTest.py", line 141, in <module>
vector = tdf.transform(lastComment)
File "C:\Program Files\Anaconda3\lib\site-packages\scipy\sparse\base.py", line 559, in __getattr__
raise AttributeError(attr + " not found")
AttributeError: transform not found
I hope someone could support me with this issue thanks in advance,
You've pickled the vectorized array, not the transformer, you need pickle.dump(tfidf_vectorizer, idxf, pickle.HIGHEST_PROTOCOL)