How to save trained model (simpletransformers.ner) - nlp

this is my first time using simpletransformers.ner and now I want to save my model, this is my model
!pip install simpletransformers
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from simpletransformers.ner import NERModel,NERArgs
from transformers import AutoConfig, AutoTokenizer, AutoModel, TFAutoModel
model = NERModel('bert', 'bert-base-cased',labels=label,args =args)
model.train_model(train_data,eval_data = test_data,acc=accuracy_score)
result, model_outputs, preds_list = model.eval_model(test_data)

Related

Keras won't load json file

I ran my code and saved saved my model using the following code:
model_json = model.to_json()
with open(inFilePath+".json", "w") as json_file:
json_file.write(model_json)
modWeightsFilepath=inFilePath+".weights.hdf5"
checkpoint = ModelCheckpoint(modWeightsFilepath, monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=True, mode='auto')
And then I wanted to load my model again to make predictions:
from keras.models import model_from_json
json_file = open('/home/models/final_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
#load weights into new model
model.load_weights('/home/models/final_model.weights.hdf5')
print("Loaded model from disk")
But this gives me the following error:
TypeError: __init__() got an unexpected keyword argument 'ragged'
Full traceback:
And I don't quite know what's wrong. My Keras gpu version is 2.1.6-tf.
Edit:
In order to create the model I used:
import json
import numpy as np
from generator import DataGenerator
import tensorflow
KERAS_BACKEND=tensorflow
import keras
from keras.preprocessing import sequence
from keras.models import Sequential, Model
from keras import optimizers
from keras.layers import Dense, Dropout, Activation, Flatten, Input
from keras.layers import Conv1D, AveragePooling1D, MaxPooling1D
from keras.layers.merge import concatenate
from keras.optimizers import SGD
import os
import sys
from itertools import chain
#import matplotlib.pyplot as plt
from functools import reduce
from keras.callbacks import EarlyStopping,ModelCheckpoint
from sklearn.utils import class_weight
And in order to load the model, I imported:
from keras.models import model_from_json
after which I got the error that I told you about. And then I changed it to:
from tensorflow.keras.models import model_from_json
And the error persisted.

ValueError: could not convert string to float: '8/20/2014' then how i fit the train dataset to the model dataset

This image may be useful for your referenceCan someone please help me try to fix these errors? It does not fit the training data set to the model dataset:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df=pd.read_csv('./DATA/kc_house_data.csv')
X=df.drop('price',axis=1).values
y=df['price'].values
from sklearn.model_selection import train_test_split
X_test, X_train, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)
print(X_train.shape())
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
model=Sequential()
model.add(Dense(19,activation='relu'))
model.add(Dense(19,activation='relu'))
model.add(Dense(19,activation='relu'))
model.add(Dense(19,activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam',loss='mse')
print(model.fit(x=X_train,y=y_train,validation_data=(X_test,y_test),batch_size=128,epochs=400))
This image may be used for your reference

Pipe-lining Standardscaler, Recursive feature selection, and Classifier

I have a given dataset, X and Y.
I want to implement the following steps using pipeline:
- Standardscaler
- Recursive feature selection
- RandomForestClassifier
- cross-validation predict
I implemented as follows:
import numpy as np
from sklearn.feature_selection import RFE, RFECV
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_predict, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.datasets import load_iris
data = load_iris()
X = data.data
Y = data.target
print X.shape
print Y.shape
clf = RandomForestClassifier(n_estimators=50,max_features=None,n_jobs=-1,random_state=0)
kf = KFold(n_splits=2, shuffle=True, random_state=0)
pipeline = Pipeline([('standardscaler', StandardScaler()),
('rfecv', RFECV(estimator=clf, step=1, cv=kf, scoring='accuracy', n_jobs=7)),
('clf', clf)])
pipeline.fit(X,Y)
ypredict = cross_val_predict(pipeline, X, Y, cv=kf)
accuracy = accuracy_score(Y, ypredict)
print (accuracy)
Please look into my implementation deeply, and let me know where is wrong with my code. Thank you.
This works. The final estimator in the pipeline only needs to implement fit which REFCV does. Here's the code:
from sklearn.feature_selection import RFECV
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_predict, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.datasets import load_iris
data = load_iris()
X = data.data
Y = data.target
clf = RandomForestClassifier()
# create pipeline
estimators = [('standardize' , StandardScaler()),
('rfecv', RFECV(estimator=clf, scoring='accuracy'))]
# build the pipeline
pipeline = Pipeline(estimators)
# run the pipeline
kf = KFold(n_splits=2, shuffle=True, random_state=0)
ypredict = cross_val_predict(pipeline, X, Y, cv=kf)
accuracy = accuracy_score(Y, ypredict)
print (accuracy)
'Output':
0.96

AttributeError: 'RFECV' object has no attribute 'ranking_'

I tried to get features ranking, by using followings:
1. Standardscaler
2. RandomForestClassifier
3. Recursive feature selection
from sklearn.feature_selection import RFECV
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_predict, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.datasets import load_iris
data = load_iris()
X = data.data
Y = data.target
clf = RandomForestClassifier()
estimators = [('standardize' , StandardScaler()),
('rfecv', RFECV(estimator=clf, scoring='accuracy'))]
pipeline = Pipeline(estimators)
ranking_features = pipeline.named_steps['rfecv'].ranking_
print (ranking_features)
AttributeError: 'RFECV' object has no attribute 'ranking_'
Any best practice to do this is welcomed.
We first use rfecev to fit the data before calling the ranking_ attribute. Try running this code:
from sklearn.feature_selection import RFECV
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_predict, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.datasets import load_iris
data = load_iris()
X = data.data
Y = data.target
clf = RandomForestClassifier()
estimators = [('standardize' , StandardScaler()),
('rfecv', RFECV(estimator=clf, scoring='accuracy'))]
# create pipeline
pipeline = Pipeline(estimators)
# fit rfecv to data
rfecv_data = pipeline.named_steps['rfecv'].fit(X, Y)
# get the feature ranking
ranking_features = rfecv_data.ranking_
print (ranking_features)
'Output':
[2 3 1 1]

ValueError while in SVC

This is a cancer dataset with 10 features and a class.
X=df.iloc[:,1:10].values
y=df.iloc[:,[-1]].values
from sklearn.preprocessing import Imputer
imputer=Imputer(missing_values='NaN',strategy='mean',axis=1)
imputer=imputer.fit(X)
X=imputer.transform(X)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)
from sklearn.svm import SVC
classifier=SVC (kernel='rbf',random_state=0)
classifier.fit(X_train,y_train)
y_pred=classifier.predict(y_test)
When I execute this I get
ValueError: X.shape[1] = 1 should be equal to 9, the number of features at training time
Your error was caused by the following line, where you passed y_test instead of X_test:
classifier.predict(y_test)
Full code:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Imputer
from sklearn.svm import SVC
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
X=df.iloc[:,1:10]
y = data.target
imputer=Imputer(strategy='mean',axis=1)
X = imputer.fit_transform(X)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)
clf = SVC(kernel='rbf').fit(X_train, y_train)
y_pred=clf.predict(X_test)
print(clf.score(X_test, y_test))
yields:
0.6842105263157895

Resources