Pytorch Dataset for video - pytorch

Hi I made a video frames loader Dataset to be fed into a pytorch model. I want to sample frames from a video, but the frames should be uniformly sampled from each video. This is the class I came up with. I was wondering if there was any better method to speed up the sampling process.
Do you have any suggestion especially in the read_video method part??
Thanks
import torch
import torchvision as tv
import cv2
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from pathlib import Path
class VideoLoader(torch.utils.data.Dataset):
def __init__(self, data_path, classes, transforms=None, max_frames=None, frames_ratio=None):
super(VideoLoader, self).__init__()
self.data_path = data_path
self.classes = classes
self.frames_ratio = frames_ratio
self.transforms = transforms
self.max_frames = max_frames
def read_video(self, path):
frames = []
vc = cv2.VideoCapture(path)
total_frames = int(vc.get(cv2.CAP_PROP_FRAME_COUNT))
if self.frames_ratio:
if type(self.frames_ratio) is float:
frames_to_pick = int(total_frames * self.frames_ratio)
else:
frames_to_pick = self.frames_ratio
else:
frames_to_pick = total_frames
idxs = np.linspace(0, total_frames, frames_to_pick, endpoint=False)
for i in idxs:
ok, f = vc.read()
if ok:
f = tv.transforms.ToTensor()(f)
f = self.transforms(f) if self.transforms else f
frames.append(f)
vc.set(cv2.CAP_PROP_POS_FRAMES, i)
if self.max_frames and len(frames) == self.max_frames: break
else: break
vc.release()
return torch.stack(frames)
def __getitem__(self, index):
v_path, label = self.data_path[index]
return self.read_video(v_path), self.classes[label]
def __len__(self): return len(self.data_path)

Because you can't really seek through a video in parallel, there's not really any faster sampling process you can run locally. I personally had trouble with this problem which is why I started building a simple API for this called Sieve. You can literally upload data directly to Sieve (either from a cloud bucket or from local storage) and it'll quickly cut up all the frames for you and even mark them with things like motion, people, objects, and more. It parallelizes using serverless functions in the cloud which makes it really fast, even for hours or days of footage.
You can then quickly export from Sieve using the dashboard which gives you a quick curl command you can run to download the exact samples you want.
Here's a helpful repo: https://github.com/Sieve-Data/automatic-video-processing

If you are happy with extracting the frames of each video to disk beforehand, this library is exactly what you're looking for:
Video-Dataset-Loading-PyTorch on Github
https://github.com/RaivoKoot/Video-Dataset-Loading-Pytorch

Related

How to structure my own Python signal processing module?

I want to take my Python (currently Version 3.9.7) programming skills to a next level. Up to now I just wrote some small scripts for myself, that no one hat to review or reuse. Now, I want to write code that can be considered as "clean" and can be reused by others. For this purpose, I am writing my own signal processing module with which I can generate high- and lowpassfilters in order to filter signals. I have no experience with structuring packages / modules, so I have some questions regarding code structure.
Up to now, I have a class sim_lowpass:
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 22 10:37:19 2022
#author: ilja
"""
from matplotlib import pyplot as plt
import math
class sim_lowpass:
""" Lowpass Simulation Class """
def __init__(self, cutoff: int, order: int, fs: int) -> None:
self.fs = fs
self.nyq = int(0.5 * fs)
self.cutoff = cutoff
self.order = order
def _transfer_func(self,f: float) -> float:
""" Transfer function in the z-domain """
if self.order == 1:
return 1/(1+(f/(2*math.pi*self.cutoff)))
def transfer_func(self) -> list[float]:
""" Transfer function in the z-domain """
if self.order == 1:
# f = np.linspace(self.df, self.nyq, self.N/2)
f = list(range(int(self.nyq)))
return [self._transfer_func(i) for i in f]
def propagate(self, x_hat):
filtered = [i*j for i,j in zip(x_hat, self.impulse_response())]
return filtered
def bode_plot(self, tr_func: list[float]) -> None:
fig, (ax1, ax2) = plt.subplots(2, 1, constrained_layout=True,
figsize = (8,5))
ax1.plot(list(range(self.nyq)), tr_func)
#ax1.set_title('Magnitude')
ax1.set_xscale('log')
ax1.set_yscale('log')
ax1.set_ylabel('Magnitude (dB)')
ax1.grid(True)
# ax2.plot(list(range(self.nyq)), tr_func) # TODO
# ax2.set_title('Phase')
ax2.set_xscale('log')
ax2.set_yscale('log')
ax2.set_xlabel('Frequency (Hz)')
ax2.set_ylabel('Phase (deg)')
ax2.grid(True)
fig.suptitle('Bode Plot', fontsize=16)
def main() -> None:
# define filter params
cutoff = 100
order = 1
fs = 4e6
# create filter
lp = sim_lowpass(cutoff, order, fs)
tf = lp.transfer_func()
lp.bode_plot(tf)
if __name__ == '__main__':
main()
Questions:
First of all: Is the code up to now well structured (in terms of scalability, testability, ... what else is there?)
Second: Now I want to create the class sim_lowpass. How do I continue without copy-pasting the parts I can reuse from the highpass class?
Third: Where do I place this file (and what would be a meaningful name) inside the package hierarchy?
Last but not least: Any other tips for improvement?
I usually get inspiration for code-structure from real projects. For example, since you are using matplotlib, their github could be a place to start: https://github.com/matplotlib/matplotlib/tree/main/lib/matplotlib

Use of pytorch dataset for model inference- GPU

I am running T5-base-grammar-correction for grammer correction on my dataframe with text column
from happytransformer import HappyTextToText
from happytransformer import TTSettings
from tqdm.notebook import tqdm
tqdm.pandas()
happy_tt = HappyTextToText("T5", "./t5-base-grammar-correction")
beam_settings = TTSettings(num_beams=5, min_length=1, max_length=30)
def grammer_pipeline(text):
text = "gec: " + text
result = happy_tt.generate_text(text, args=beam_settings)
return result.text
df['new_text'] = df['original_text'].progress_apply(grammer_pipeline)
Pandas apply function, though runs and provides required results, but runs quite slow.
Also I get the below warning while executing the code
/home/.local/lib/python3.6/site-packages/transformers/pipelines/base.py:908: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
UserWarning,
I have access to GPU. Can somebody provide some pointers to speed up the execution and utilising full capabilities of GPU
--------------------------------EDIT---------------------------------
I tried using pytorch Dataset in the below way, but still the processing is slow:
class CustomD(Dataset):
def __init__(self, text):
self.text = text
self.len = text.shape[0]
def __len__(self):
return self.len
def __getitem__(self, idx):
text = self.text[idx]
text = "gec: " + text
result = happy_tt.generate_text(text, args=beam_settings)
return result.text
TD = GramData(df.original_text)
final_data = DataLoader(dataset=TD,
batch_size=10,
shuffle=False
)
import itertools
list_modified=[]
for (idx, batch) in enumerate(final_data):
list_modified.append(batch)
flat_list = [item for sublist in list_modified for item in sublist]
df["new_text"]=flat_list

How to retrieve one row at a time from the csv file using generator functions

I need to take one row from the CSV file to be used in the reinforcement learning class environment as an observation tuple. I have used generator function first it's not retrieving any data and secondly it will provide all the data iteratively which doesn't match with the requirement of my problem. Also, I need the currently selected observation(CSV row) to be used in multiple methods in the class environment for instance in the reward function.
Any idea or suggestion is highly appreciated on how to do this. Thanks
class Environment1:
def __init__(self, data, max_ticks=300):
self.data = data
self.application_latency=1342
self.reward = 0
#self.done = False
self.MAX_TICKS = max_ticks
self.episode_over = False
def step(self, act):
self.take_action(action)
reward = self.get_reward()
ob = self.get_state()
return ob, reward, self.episode_over
#return ob, reward, self.done # obs, reward, done
def get_state(self):
"""Get the observation. it is a tuple """
lst = [tuple(x) for x in data.values]
def gen(last):
for i in last:
print(yield i)
#observation_space= yield i
#ob = (observation_space.Edge_Latency, observation_space.Cloud_latency )
#print(ob)
#return ob
With what I gathered from your question, you want to create a generator of observation tuples from your csv data. Specifically, you want to pass each tuple with edge latency and cloud latency columns to another function. I have written some example code which will make a list of tuples for each row of your data.
import pandas as pd
import numpy as np
def createGenerator(self):
obs_data = [tuple(x) for x in self.data[['Edge_Latency', 'Cloud_latency']].to_numpy()]
for obs in obs_data:
yield obs

Different sessions show different streaming data with a single bokeh server, how to solve it?

I'm working on a simulated osilloscope where the server PC collects data and ultimately will publish the streaming plot online. Below is a working script that can do the job. However, when I open multiple browsers, the streaming plots exhibit different data. (Although they are using the same data source). The example 'ohlc' seems to have the same problem. So, what is the right way to do this? I'm considering to write data to a file, but that will bring some issues like file i/o delay and disk storage limitation etc. Thank you for any help.
from bokeh.server.server import Server
from bokeh.models import ColumnDataSource, Label
from bokeh.plotting import figure
from bokeh.layouts import column
import numpy as np
import datetime as dt
from functools import partial
import time
# this will be replaced with the real data collector in the end
def f_emitter(p=0.1):
v = np.random.rand()
return (dt.datetime.now(), 0. if v>p else v)
def make_document(doc, functions, labels):
def update():
for index, func in enumerate(functions):
data = func()
sources[index].stream(new_data=dict(time=[data[0]], data=[data[1]]), rollover=1000)
annotations[index].text = f'{data[1]: .3f}'
sources = [ColumnDataSource(dict(time=[], data=[])) for _ in range(len(functions))]
figs = []
annotations = []
for i in range(len(functions)):
figs.append(figure(x_axis_type='datetime',
y_axis_label=labels[i], toolbar_location=None,
active_drag=None, active_scroll=None))
figs[i].line(x='time', y='data', source=sources[i])
annotations.append(Label(x=10, y=10, text='', text_font_size='40px', text_color='black',
x_units='screen', y_units='screen', background_fill_color='white'))
figs[i].add_layout(annotations[i])
# print(figs[i].plot_height)
doc.add_root(column([fig for fig in figs], sizing_mode='stretch_both'))
doc.add_periodic_callback(callback=update, period_milliseconds=100)
if __name__ == '__main__':
# list of functions and labels to feed into the scope
functions = [f_emitter]
labels = ['emitter']
server = Server({'/': partial(make_document, functions=functions, labels=labels)})
server.start()
server.io_loop.add_callback(server.show, "/")
try:
server.io_loop.start()
except KeyboardInterrupt:
print('keyboard interruption')
When you connect with a new client, by default Bokeh creates a new session. Each session has its own document, so the data source end up not being the same.

generate multiple plots by querying mongodb using multiprocessing

I would like to speed up a plotting function that looks up data from mongodb atlas. I used examples from online, however I'm not sure if it is the correct implementation. Using multiprocessing.Pool() seems slower than doing it without the package. What am I doing wrong? Thanks.
from pymongo import MongoClient
from matplotlib.backends.backend_svg import FigureCanvasSVG
from matplotlib.figure import Figure
import io
import multiprocessing
import time
lstOfwavelengths = list(range(220,810,10))
def build_graph_mongo_multiproc(pltcodeWithSuffix,wellID):
client = MongoClient()
db = client.databasename
img = io.BytesIO()
fig = Figure(figsize=(0.6,0.6))
axis = fig.add_subplot(1,1,1)
absvals = db[pltcodeWithSuffix].find({"Wavelength":wavelength})
absvals = {k:v for k,v in absvals[0].items() if k}
axis.plot(lstOfwavelengths,absvals)
axis.set_title(f'{pltcodeWithSuffix}:{wellID}',fontsize=9)
axis.title.set_position([.5, .6])
axis.tick_params(
which='both',
bottom=False,
left=False,
labelbottom=False,
labelleft=False)
FigureCanvasSVG(fig).print_svg(img)
lstOfPlts.append(img.getvalue() )
The only difference from the single and multiproc function is that the MongoClient is called once, outside the function.
I found this great article: The efficient way of using multiprocessing with pymongo
Using the article as a template, I was able to reduce the computation time to ~7.5 seconds instead of 21 seconds. I'm sure someone more experienced can shave off more time, but I think that is suffice for my level.
manager = multiprocessing.Manager()
lstOfPlots = manager.list()
def chunks(l, n):
for i in range(0, len(l), n):
yield l[i:i + n]
def getAllWellVals(db,pltcodeWithSuffix,wellID):
lstOfVals = []
for i in db[pltcodeWithSuffix].find({}, {wellID:1,'_id':0}):
lstOfVals.append(i[wellID])
return lstOfVals
def build_graph_mongo_multiproc(chunk,pltcodeWithSuffix):
global lstOfPlots
client=MongoClient(connect_string,maxPoolSize=10000)
db = client[dbname]
#loop over the id's in the chunk and do the plotting with each
for wid in chunk:
#do the plotting with document collection.find_one(id)
img = io.BytesIO()
fig = Figure(figsize=(0.6,0.6))
axis = fig.add_subplot(1,1,1)
absVals = getAllWellVals(db,pltcodeWithSuffix,wid)
axis.plot(lstOfwavelengths,absVals)
axis.set_title(f'{wid}',fontsize=9)
axis.title.set_position([.5, .6])
axis.tick_params(
which='both',
bottom=False,
left=False,
labelbottom=False,
labelleft=False)
FigureCanvasSVG(fig).print_svg(img)
result = img.getvalue()
lstOfPlots.append(result)

Resources