Can we save our trained dataset into xml format using pickle - python-3.x

Can we able to store our own trained dataset in xml format using pickle?
import numpy as np
import random
import pickle
import matplotlib
gui_env = [i for i in matplotlib.rcsetup.interactive_bk]
non_gui_backends = matplotlib.rcsetup.non_interactive_bk
print ("Non Gui backends are:", non_gui_backends)
print ("Gui backends I will test for", gui_env)
for gui in gui_env:
print ("testing", gui)
try:
matplotlib.use(gui,warn=False, force=True)
from matplotlib import pyplot as plt
print (" ",gui, "Is Available")
plt.plot([1.5,2.0,2.5])
fig = plt.gcf()
fig.suptitle(gui)
print ("Using ..... ",matplotlib.get_backend())
except:
print (" ",gui, "Not found")
import os
import cv2
from tqdm import tqdm
DATADIR = "Datasets/PetImages"
CATEGORIES = ["Dog", "Cat"]
training_data = []
IMG_SIZE = 50
def create_training_data():
for category in CATEGORIES:
path = os.path.join(DATADIR,category)
class_num = CATEGORIES.index(category)
for img in tqdm(os.listdir(path)):
try:
img_array = cv2.imread(os.path.join(path,img) ,cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
training_data.append([new_array, class_num])
except Exception as e:
pass
random.shuffle(training_data)
X = []
y = []
for features,label in training_data:
X.append(features)
y.append(label)
print(X[0].reshape(-1, IMG_SIZE, IMG_SIZE, 1))
X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
pickle_out = open("X.xml","wb")
pickle.dump(X, pickle_out)
pickle_out.close()
pickle_out = open("y.xml","wb")
pickle.dump(y, pickle_out)
pickle_out.close()
create_training_data()
print(len(training_data))

Related

Have a pytorch class produce 2 separate datasets

Does anyone have any suggestions for creating 2 datasets at once? When I run dataset = ImbCircuitDataset('test'), I want it to save 2 separate sets, even if I have to go all the way back to self.data and split it there, I just want the class to produce 2 separate datasets.
Basically run set1, set2 = ImbCircuitDataset('test')
I started doing it at the bottom of the def process(self) but I have a feeling its wrong. I am thinking in def processed_file_names(self) I need to return 'set1.pt', 'set2.pt'.
from scipy.io import loadmat
import pandas as pd
from torch_geometric.data import Data, InMemoryDataset
import torch
import neptune.new as neptune
import numpy as np
import torch_geometric
from tqdm import tqdm
from typing import Tuple, Union
from torch import Tensor
from collections.abc import Sequence
from torch_geometric.utils import from_networkx
import networkx as nx
import matplotlib.pyplot as plt
IndexType = Union[slice, Tensor, np.ndarray, Sequence]
print(f"Torch version: {torch.__version__}")
print(f"Cuda available: {torch.cuda.is_available()}")
print(f"Torch geometric version: {torch_geometric.__version__}")
class ImbCircuitDataset(InMemoryDataset):
def __init__(self, root, transform=None, pre_transform=None,
pre_filter=None):
super().__init__(root, transform, pre_transform, pre_filter)
self.data, self.slices = torch.load(self.processed_paths[0])
#property
def raw_file_names(self):
return 'shuffled_data.mat'
#property
def processed_file_names(self):
return 'data.pt'
def download(self):
pass
def process(self):
self.raw_data = loadmat(self.raw_paths[0], squeeze_me=True)
self.data = pd.DataFrame(self.raw_data['Graphs'])
self.data = self.data.sort_values('Labels', ascending=True, ignore_index=True)
for i in range(len(self.data)):
self.data['Ln'][i] = dict(enumerate(self.data['Ln'][i]))
data_list = []
for index, cir in tqdm(self.data.iterrows(), total=self.data.shape[0]):
nxg = nx.Graph(self.data['A'][index])
nx.set_node_attributes(nxg, self.data['Ln'][index], 'component')
pt_graph = self._get_graph_object(nxg)
pt_graph.x = self._get_node_features(nxg, self.data['Ln'][index])
pt_graph.performance = torch.tensor(self.data['Labels'][index], dtype=torch.float)
data_list.append(pt_graph)
if self.pre_filter is not None:
data_list = [d for d in data_list if self.pre_filter(d)]
if self.pre_transform is not None:
data_list = [self.pre_transform(d) for d in data_list]
split = 0.5
N = len(data_list)
set_1 = data_list[:N*split]
set_2 = data_list[N*split:]
data1, slices1 = self.collate(set_1)
data2, slices2 = self.collate(set_2)
torch.save((data1, slices1), self.processed_paths[0])
torch.save((data2,slices2), self.processed_paths[0])
def _get_node_features(self, nxgraph, node_labels):
betweenness = list(nx.betweenness_centrality(nxgraph).values())
eigenvector = list(nx.eigenvector_centrality(
nxgraph, max_iter=600).values())
mapping_dict = {'C': 0, 'G': 1, 'I': 2, 'O': 3, 'P': 4, 'R': 5}
component_labels = []
for value in node_labels.values():
if value in mapping_dict:
component_labels.append(mapping_dict[value])
all_feats = [component_labels, betweenness, eigenvector]
all_feats = np.asarray(all_feats).transpose()
return torch.tensor(all_feats, dtype=torch.float)
def _get_graph_object(self, nx_graph):
nxg = from_networkx(nx_graph)
return nxg
#property
def num_node_features(self) -> int:
return 3
#property
def num_classes(self) -> int:
return 2

Why doesn't plotly show the graph, when I execute the same statements inside a class?

I am learning classes. The script below, shows the graph correctly using plotly express, but if I integrate it as a method in a class, it doesn't.
I show the classes below. With the first one, we import quotes from Yahoo! With the second, we try to represent a graph of the imported normalized prices.
import pandas as pd
import pandas_datareader as pdr
import datetime as dt
from datetime import date
from plotly.offline import iplot
import plotly.express as px
class ImportadorCotizaciones:
def __init__(self):
self.cotizaciones = None
self.start = "2000-1-4"
self.end = date.today()
self.cotizaciones = None
def Importar_cotizaciones(self):
dicc_tickers = {"IBE.MC":"Iberdrola", "TEF.MC":"Telefonica", "^IBEX":"Ibex35" }
dfs = []
nombres = []
for (k,v) in dicc_tickers.items():
self.cotizaciones_de_ticker = pdr.DataReader(k, 'yahoo', self.start, self.end)
self.cotizaciones_de_ticker = self.cotizaciones_de_ticker[["Close"]]
self.cotizaciones_de_ticker = self.cotizaciones_de_ticker.rename(columns={"Close": v})
dfs.append(self.cotizaciones_de_ticker)
dfs = iter(dfs)
self.cotizaciones = next(dfs)
for df_ in dfs:
self.cotizaciones = self.cotizaciones.merge(df_, on='Date')
class Indicadores:
def __init__(self, importador):
self.importador = importador
def dibujar_grafico(self):
self.aux_val_ind = importador.cotizaciones[["Iberdrola", "Ibex35"]].pct_change().dropna()
df = self.aux_val_ind.copy(deep=True)
df['Media'] = df.mean(axis = 1)
# Usando plotly.express
px.line((df + 1).cumprod() ,y=df.columns ,title=f"\nValor de 1€ invertido desde el { importador.start} hasta el {importador.end} ")
importador = ImportadorCotizaciones()
importador.Importar_cotizaciones()
importador.cotizaciones[:3]
indicadores = Indicadores(importador)
indicadores.dibujar_grafico()
The script that works outside the class is:
# Usando plotly.express
from plotly.offline import iplot
import plotly.express as px
start = "2000-1-4"
end = date.today()
aux_val_ind = importador.cotizaciones[["Iberdrola", "Ibex35"]].pct_change().dropna()
df = aux_val_ind.copy(deep=True)
df['Media'] = df.mean(axis = 1)
px.line((df + 1).cumprod() ,y=df.columns ,title=f"\nValor actual de 1€ invertido el {start} ")
Only one issue, you missed to return the figure.
class Indicadores:
def __init__(self, importador):
self.importador = importador
def dibujar_grafico(self):
self.aux_val_ind = importador.cotizaciones[["Iberdrola", "Ibex35"]].pct_change().dropna()
df = self.aux_val_ind.copy(deep=True)
df['Media'] = df.mean(axis = 1)
# Usando plotly.express
return px.line((df + 1).cumprod() ,y=df.columns ,title=f"\nValor de 1€ invertido desde el { importador.start} hasta el {importador.end} ")

How do you parse the bin file from INT8 Calibration of TensorRT?

I have created a python script for calibrating(INT8) the dynamic scales of the activation of TinyYOLO V2 using TensorRT. The script gave me a file called calibration_cache.bin. How do I parse the .bin file ? What do the values inside the .bin file mean ?
calibrator.py
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
from PIL import Image
import ctypes
import tensorrt as trt
import os
CHANNEL = 3
HEIGHT = 416
WIDTH = 416
class PythonEntropyCalibrator(trt.IInt8EntropyCalibrator):
def __init__(self, input_layers, stream):
trt.IInt8EntropyCalibrator.__init__(self)
self.input_layers = input_layers
self.stream = stream
self.d_input = cuda.mem_alloc(self.stream.calibration_data.nbytes)
stream.reset()
def get_batch_size(self):
return self.stream.batch_size
def get_batch(self, bindings, names):
batch = self.stream.next_batch()
if not batch.size:
return None
cuda.memcpy_htod(self.d_input, batch)
for i in self.input_layers[0]:
assert names[0] != i
bindings[0] = int(self.d_input)
return bindings
def read_calibration_cache(self, length=0):
if os.path.exists('calibration_cache.bin'):
with open('calibration_cache.bin', 'rb') as f:
return f.read()
return None
def write_calibration_cache(self, cache, size=0):
with open('calibration_cache.bin', 'wb') as f:
f.write(cache)
return None
class ImageBatchStream():
def __init__(self, batch_size, calibration_files, preprocessor):
self.batch_size = batch_size
self.max_batches = (len(calibration_files) // batch_size) + \
(1 if (len(calibration_files) % batch_size) \
else 0)
self.files = calibration_files
self.calibration_data = np.zeros((batch_size, CHANNEL, HEIGHT, WIDTH), \
dtype=np.float32)
self.batch = 0
self.preprocessor = preprocessor
#staticmethod
def read_image_chw(path):
img = Image.open(path).resize((WIDTH,HEIGHT), Image.NEAREST)
im = np.array(img, dtype=np.float32, order='C')
im = im[:,:,::-1]
im = im.transpose((2,0,1))
return im
def reset(self):
self.batch = 0
def next_batch(self):
if self.batch < self.max_batches:
imgs = []
files_for_batch = self.files[self.batch_size * self.batch : \
self.batch_size * (self.batch + 1)]
for f in files_for_batch:
self.batch_size * (self.batch + 1)]
for f in files_for_batch:
print("[ImageBatchStream] Processing ", f)
img = ImageBatchStream.read_image_chw(f)
img = self.preprocessor(img)
imgs.append(img)
for i in range(len(imgs)):
self.calibration_data[i] = imgs[i]
self.batch += 1
return np.ascontiguousarray(self.calibration_data, dtype=np.float32)
else:
return np.array([])
test.py
from random import shuffle
from PIL import Image
import glob
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import os
from calibrator import *
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
model_file = './tiny_yolov2/Model.onnx'
dataset_loc = './Dataset/*.jpg'
def normalize(data):
data /= 255.0
return data
def create_calibration_dataset():
calibration_files = glob.glob(dataset_loc)
shuffle(calibration_files)
return calibration_files[:20]
calibration_files = create_calibration_dataset()
NUM_IMAGES_PER_BATCH = 5
batchstream = ImageBatchStream(NUM_IMAGES_PER_BATCH, calibration_files, normalize)
Int8_calibrator = PythonEntropyCalibrator(["conv2d_91_input"], batchstream)
builder = trt.Builder(TRT_LOGGER)
builder.int8_calibrator = Int8_calibrator
builder.refittable = True
builder.int8_mode = True
network = builder.create_network()
parser = trt.OnnxParser(network, TRT_LOGGER)
print(builder.int8_mode, builder.platform_has_fast_int8,builder.refittable)
with open(model_file, 'rb') as model:
parser.parse(model.read())
print('Done reading ONNX File\n')
engine = builder.build_cuda_engine(network)
print(engine, TRT_LOGGER)
with open("model.trt", "wb") as f:
f.write(engine.serialize())
print("Done converting the ONNX to TRT\n")
tinyolo_fitter = trt.Refitter(engine, TRT_LOGGER)
print(tinyolo_fitter.refit_cuda_engine())
print(tinyolo_fitter.get_tensors_with_dynamic_range())
calibration_cache.bin
TRT-5105-EntropyCalibration
image: 3c010a14
scalerPreprocessor_scaled: 38018ba0
image2: 38018ba0
batchnormalization_1_output: 3d07b31d
leakyrelu_1_output: 3c98a317
maxpooling2d_1_output: 3c1e5b30
batchnormalization_2_output: 3ca6aa67
leakyrelu_2_output: 3ca6aa67
maxpooling2d_2_output: 3c82cf7d
batchnormalization_3_output: 3ce07ce8
leakyrelu_3_output: 3ce52236
maxpooling2d_3_output: 3cc8ed6f
batchnormalization_4_output: 3d3df55f
leakyrelu_4_output: 3c651727
maxpooling2d_4_output: 3cec84fc
batchnormalization_5_output: 3d0f51e3
leakyrelu_5_output: 3cb52377
maxpooling2d_5_output: 3d026049
batchnormalization_6_output: 3d387291
leakyrelu_6_output: 3ccc009a
maxpooling2d_6_output: 3c8d0f0c
batchnormalization_7_output: 3e0de3d2
leakyrelu_7_output: 3d7b4397
batchnormalization_8_output: 3cc459d6
leakyrelu_8_output: 3cbd9562
grid: 3ddc32dc
def read_calibration_cache(self, length=0):
if os.path.exists('calibration_cache.bin'):
with open('calibration_cache.bin', 'rb') as f:
return f.read()
return None
This does the work i guess. If there is a calibration_cache.bin file in your dir, calibrator parses it instead of calibrating again.

Update plot in for loop in function

I'm trying to call a function like in the example below, and plot while running the code. The real values that I get as y-data are not really random numbers, but the point is that I would like it to get updated real-time. The plot in my example code below is just empty though and isn't getting updated.
import numpy as np
import matplotlib.pyplot as plt
import random as rnd
import time
initial_time = time.time()
def multiple_runs(number_of_runs):
x_data, y_data = [], []
fig, ax = plt.subplots()
sc = ax.scatter(x_data, y_data)
plt.draw()
for i in range(0, number_of_runs):
x_data.append(i+1)
y_data.append(rnd.randint(0,100))
sc.set_offsets(np.c_[x_data, y_data])
fig.canvas.draw_idle()
plt.pause(0.1)
print ('Total time after run number ' + str(i+1) + ': ' + str(time.time() - initial_time))
multiple_runs(100)
UPDATE:
Thanks #ImportanceOfBeingErnest , I got the code to work. However my problem right now is that the figure closes down as soon as it's finished, is there anyway to keep it open? I tried using plt.waitforbuttonpress() but I get a strange error from QTimer, not sure how or why. This is my working example code;
import numpy as np
import matplotlib.pyplot as plt
import random as rnd
import time
initial_time = time.time()
def multiple_runs(number_of_runs):
x_data, y_data = [], []
x_data2, y_data2 = [], []
fig, ax = plt.subplots(2, sharex = True)
sc = ax[0].scatter(x_data, y_data)
sc2 = ax[1].scatter(x_data2, y_data2)
ax[0].set(xlim=(0,100), ylim=(0,100))
ax[1].set(xlim=(0,100), ylim=(0,100))
plt.draw()
for i in range(0, number_of_runs):
x_data.append(i+1)
y_data.append(rnd.randint(0,100))
x_data2.append(i+1)
y_data2.append(rnd.randint(0,100))
sc.set_offsets(np.c_[x_data, y_data])
sc2.set_offsets(np.c_[x_data2, y_data2])
fig.canvas.draw_idle()
plt.pause(0.1)
print ('Total time after run number ' + str(i+1) + ': ' + str(time.time() - initial_time))
multiple_runs(100)
UPDATE2:
I tried using FuncAnimation, but getting the error TypeError: update() missing 2 required positional arguments: 'y' and 'y2'. I still need to use the for-loop because in my real code I'm using the previous values of y, to calculate the next values of y. This is my example code which is giving me the error;
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import random as rnd
import time
initial_time = time.time()
def multiple_runs(number_of_runs):
x_data, y_data = [], []
x_data2, y_data2 = [], []
fig, ax = plt.subplots(2, sharex = True)
sc = ax[0].scatter(x_data, y_data)
sc2 = ax[1].scatter(x_data2, y_data2)
ax[0].set(xlim=(0,100), ylim=(0,100))
ax[1].set(xlim=(0,100), ylim=(0,100))
def update(i, y, y2):
x_data.append(i+1)
y_data.append(y)
x_data2.append(i+1)
y_data2.append(y2)
sc.set_offsets(np.c_[x_data, y_data])
sc2.set_offsets(np.c_[x_data2, y_data2])
print ('Total time after run number ' + str(i+1) + ': ' + str(time.time() - initial_time))
for i in range(0, number_of_runs):
y = rnd.randint(0,100)
y2 = rnd.randint(0,100)
update(i,y,y2)
ani = FuncAnimation(fig, update, frames=number_of_runs, interval=100, repeat=False)
plt.show()
multiple_runs(100)
As commented, I would recommend to use FuncAnimation. This would look as follows in your case. Note that in order to close the window, one would need to press q or close it with the mouse.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import random as rnd
import time
initial_time = time.time()
def multiple_runs(number_of_runs):
x_data, y_data = [], []
x_data2, y_data2 = [], []
fig, ax = plt.subplots(2, sharex = True)
sc = ax[0].scatter(x_data, y_data)
sc2 = ax[1].scatter(x_data2, y_data2)
ax[0].set(xlim=(0,100), ylim=(0,100))
ax[1].set(xlim=(0,100), ylim=(0,100))
def get_ydata(i):
y = rnd.randint(0,100)
y2 = rnd.randint(0,100)
return y, y2
def update(i):
y, y2 = get_ydata(i)
x_data.append(i+1)
y_data.append(y)
x_data2.append(i+1)
y_data2.append(y2)
sc.set_offsets(np.c_[x_data, y_data])
sc2.set_offsets(np.c_[x_data2, y_data2])
ani = FuncAnimation(fig, update, frames=number_of_runs, interval=100, repeat=False)
plt.show()
multiple_runs(100)

Main thread not in main loop error in threading module

import time
import datetime as dt
import urllib.request
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import matplotlib.animation as Animation
from matplotlib import style
import matplotlib
import csv
import threading
style.use('fivethirtyeight')
fig = plt.figure()
def usd_in_bitcoin():
try:
resp = urllib.request.urlopen("https://bitcoinwisdom.com/")
except Exception as e:
print(e)
text = resp.read()
soup = BeautifulSoup(text, 'html.parser')
intermediate = soup.find('tr', {"id": "o_btcusd"})
ans = intermediate.find('td', {'class': 'r'})
return ans.contents[0]
def write_to_file(interval):
while True:
value = str(usd_in_bitcoin())
unix_time = str(time.time())
print(unix_time, value)
with open('bitcoin_usd.csv', 'a+') as file:
file.write(unix_time)
file.write("," + str(value))
file.write('\n')
time.sleep(interval)
def animate(i):
with open('bitcoin_usd.csv') as csv_file:
readcsv = csv.reader(csv_file, delimiter=',')
xs = []
ys = []
for row in readcsv:
if len(row) > 1:
x, y = [float(s) for s in row]
xs.append(dt.datetime.fromtimestamp(x))
ys.append(y)
print(len(xs))
dates = matplotlib.dates.date2num(xs)
# print(dates)
fig.clear()
plt.plot_date(dates, ys)
def plotting():
ani = Animation.FuncAnimation(fig, animate, interval=1000)
plt.show()
def main():
# plotting()
b = threading.Thread(name='making graph', target=plotting)
# a = threading.Thread(name='updating_csv', target=write_to_file, args=(5,))
# a.start()
b.start()
if __name__ == '__main__':
main()
In the above block of code, I am trying to plot the value of a bitcoin in usd by using scraping and then putting the value in a csv file.
Then I read the csv file to plot the graph.
Both plotting and scraping seem to work fine but if I do both of them simultaneously, I am getting an error saying main thread not in main loop. I searched a lot but was not able to solve this problem
The problem here is with the sequence of lines in main()
Try this:
def main():
a = threading.Thread(name='updating_csv', target=write_to_file, args=(5,))
a.start()
b = threading.Thread(name='making graph', target=plotting)
b.start()
plotting()

Resources