I want to decompress a butch of nii.gz files in python so that they could be processed in sitk later on. When I decompress a single file manually by right-clicking the file and choosing 'Extract..', this file is then correctly interpreted by sitk (I do sitk.ReadImage(unzipped)). But when I try to decompress it in python using following code:
with gzip.open(segmentation_zipped, "rb") as f:
bindata = f.read()
segmentation_unzipped = os.path.join(segmentation_zipped.replace(".gz", ""))
with gzip.open(segmentation_unzipped, "wb") as f:
f.write(bindata)
I get error when sitk tries to read the file:
RuntimeError: Exception thrown in SimpleITK ReadImage: C:\d\VS14-Win64-pkg\SimpleITK\Code\IO\src\sitkImageReaderBase.cxx:82:
sitk::ERROR: Unable to determine ImageIO reader for "E:\BraTS19_2013_10_1_seg.nii"
Also when trying to do it a little differently:
input = gzip.GzipFile(segmentation_zipped, 'rb')
s = input.read()
input.close()
segmentation_unzipped = os.path.join(segmentation_zipped.replace(".gz", ""))
output = open(segmentation_unzipped, 'wb')
output.write(s)
output.close()
I get:
RuntimeError: Exception thrown in SimpleITK ReadImage: C:\d\VS14-Win64-pkg\SimpleITK-build\ITK\Modules\IO\PNG\src\itkPNGImageIO.cxx:101:
itk::ERROR: PNGImageIO(0000022E3AF2C0C0): PNGImageIO failed to read header for file:
Reason: fread read only 0 instead of 8
can anyone help?
No need to unzip the Nifti images, libraries such as Nibabel can handle it without decompression.
#==================================
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
#==================================
# load image (4D) [X,Y,Z_slice,time]
nii_img = nib.load('path_to_file.nii.gz')
nii_data = nii_img.get_fdata()
fig, ax = plt.subplots(number_of_frames, number_of_slices,constrained_layout=True)
fig.canvas.set_window_title('4D Nifti Image')
fig.suptitle('4D_Nifti 10 slices 30 time Frames', fontsize=16)
#-------------------------------------------------------------------------------
mng = plt.get_current_fig_manager()
mng.full_screen_toggle()
for slice in range(number_of_slices):
# if your data in 4D, otherwise remove this loop
for frame in range(number_of_frames):
ax[frame, slice].imshow(nii_data[:,:,slice,frame],cmap='gray', interpolation=None)
ax[frame, slice].set_title("layer {} / frame {}".format(slice, frame))
ax[frame, slice].axis('off')
plt.show()
Or you can Use SimpleITK as following:
import SimpleITK as sitk
import numpy as np
# A path to a T1-weighted brain .nii image:
t1_fn = 'path_to_file.nii'
# Read the .nii image containing the volume with SimpleITK:
sitk_t1 = sitk.ReadImage(t1_fn)
# and access the numpy array:
t1 = sitk.GetArrayFromImage(sitk_t1)
Related
I am currently doing an assignment on deep learning by downloading the assignment files from github.
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
from lr_utils import load_dataset
%matplotlib inline
You are given a dataset ("data.h5") containing: - a training set of m_train images labeled as cat (y=1) or non-cat (y=0) - a test set of m_test images labeled as cat or non-cat - each image is of shape (num_px, num_px, 3) where 3 is for the 3 channels (RGB). Thus, each image is square (height = num_px) and (width = num_px).
# Loading the data (cat/non-cat)
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
I ran the setup.sh file too but the error doesn't seem to go away.
lr_utils.py file:
import numpy as np
import h5py
def load_dataset():
train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
classes = np.array(test_dataset["list_classes"][:]) # the list of classes
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
Kindly help!
I solved the issue by downloading uncorrupted .h5 files and putting them in the folder datasets/ in the same directory.
The files you downloaded are corrupted. You can visit https://github.com/abdur75648/Deep-Learning-Specialization-Coursera to download the uncorrupted files.
you can download uncorrupted files from here :
https://www.kaggle.com/datasets/muhammeddalkran/catvnoncat
and replace it in the directory of the corrupted files
I am trying to save a variable with data-type of "IPython.core.display.SVG" as a PNG file in Jupyter Notebook environment.
First I tried:
with open('./file.png','wb+') as outfile:
outfile.write(my_svg.data)
And I got the error:
TypeError: a bytes-like object is required, not 'str'
Next, I tried:
with open('./file.png','wb+') as outfile:
outfile.write(my_svg.data.encode('utf-8'))
But, I cannot open "file.png". The operating system gives error:
The file “file.png” could not be opened. It may be damaged or use a file format that Preview doesn’t recognize.
I can save "my_svg" with "svg" extension as below:
with open('./file.svg','wb+') as outfile:
outfile.write(my_svg.data.encode('utf-8'))
But, when I want to convert "file.svg" into "file.png" by:
import cairosvg
cairosvg.svg2png(url="./file.svg", write_to="./file.png")
I get the error:
ValueError: unknown locale: UTF-8
This is how I get "IPython.core.display.SVG" data-type in Jupyter Notebook:
from rdkit import Chem
from rdkit.Chem.Draw import rdMolDraw2D
from IPython.display import SVG
smile_1 = 'C(C(N)=O)c(c)c'
smile_2 = 'o(cn)c(c)c'
m1 = Chem.MolFromSmiles(smile_1,sanitize=False)
Chem.SanitizeMol(m1, sanitizeOps=(Chem.SanitizeFlags.SANITIZE_ALL^Chem.SanitizeFlags.SANITIZE_KEKULIZE^Chem.SanitizeFlags.SANITIZE_SETAROMATICITY))
m2 = Chem.MolFromSmiles(smile_2,sanitize=False)
Chem.SanitizeMol(m2, sanitizeOps=(Chem.SanitizeFlags.SANITIZE_ALL^Chem.SanitizeFlags.SANITIZE_KEKULIZE^Chem.SanitizeFlags.SANITIZE_SETAROMATICITY))
mols = [m1, m2]
legends = ["smile_1", "smile_2"]
molsPerRow=2
subImgSize=(200, 200)
nRows = len(mols) // molsPerRow
if len(mols) % molsPerRow:
nRows += 1
fullSize = (molsPerRow * subImgSize[0], nRows * subImgSize[1])
d2d = rdMolDraw2D.MolDraw2DSVG(fullSize[0], fullSize[1], subImgSize[0], subImgSize[1])
d2d.drawOptions().prepareMolsBeforeDrawing=False
d2d.DrawMolecules(list(mols), legends=legends)
d2d.FinishDrawing()
SVG(d2d.GetDrawingText())
Environment:
macOS 11.2.3
python 3.6
RDKit version 2020.09.1
Any help is greatly appreciated.
Instead of creating an SVG with rdkit and trying to convert it to a PNG, why not just create a PNG directly?
from rdkit.Chem import Draw
from rdkit import Chem
# create rdkit mol
smile = 'CCCC'
mol = Chem.MolFromSmiles(smile)
# create png
d2d = Draw.MolDraw2DCairo(200, 200)
d2d.DrawMolecule(mol)
d2d.FinishDrawing()
png_data = d2d.GetDrawingText()
# save png to file
with open('mol_image.png', 'wb') as png_file:
png_file.write(png_data)
I am not sure why MolDraw2DCairo is not working for you but using the package you mention (cairosvg) you could extend your code sample quite easily:
# extra imports
import cairosvg
import tempfile
# replace molecule drawing part
d2d = rdMolDraw2D.MolDraw2DSVG(fullSize[0], fullSize[1], subImgSize[0], subImgSize[1])
d2d.drawOptions().prepareMolsBeforeDrawing=False
d2d.DrawMolecules(list(mols), legends=legends)
d2d.FinishDrawing()
svg_text = d2d.GetDrawingText()
# save to png file
with tempfile.NamedTemporaryFile(delete=True) as tmp:
tmp.write(svg_text.encode())
tmp.flush()
cairosvg.svg2png(url=tmp.name, write_to="./mol_img.png")
I have many .csv of NYC taxi from nyc.gov, one .csv = year-month. There I grab cca 15 of csvs and make HDF5s from them:
import h5py
import pandas as pd
import os
import glob
import numpy as np
import vaex
from tqdm import tqdm_notebook as tqdm
#hdf = pd.HDFStore('c:/Projekty/H5Edu/NYCTaxi/NYCTaxi.hp')
#df1 = pd.read_csv('path nejake csvcko')
#hdf.put('DF1', df1, format = 'table', data_columns = True)
csv_list = np.sort(np.array(glob.glob('G:\\NYCTaxi\\*.csv')))[::-1]
csv_list = csv_list[20:39]
output_dir = 'c:\\Datasety\\YelowTaxi\\DataH5\\'
for file in tqdm(csv_list, leave=False, desc='Converting to hdf5...'):
# Setting up the files, and directories
#zip_file = ZipFile(file)
output_file = file.split('\\')[-1][:-3]+'hdf5'
output = output_dir + output_file
#output = output_file
# Check if a converted file already exists: if it does skip it, otherwise read in the raw csv and convert it
if (os.path.exists(output) and os.path.isfile(output)):
pass
else:
# Importing the data into pandas
#pandas_df = [pd.read_csv(file, index_col=None, header=0)][0]
pandas_df = [pd.read_csv(file, index_col=None, header=0, low_memory=False)][0]
# Rename some columns to match the more well known dataset from
# http://stat-computing.org/dataexpo/2009/the-data.html
# Importing the data from pandas to vaex
vaex_df = vaex.from_pandas(pandas_df, copy_index=False)
# Export the data with vaex to hdf5
vaex_df.export_hdf5(path=output, progress=False)
Next I make one big HDF5:
import re
import glob
import vaex
import numpy as np
def tryint(s):
try:
return int(s)
except:
return s
def alphanum_key(s):
""" Turn a string into a list of string and number chunks.
"z23a" -> ["z", 23, "a"]
"""
return [ tryint(c) for c in re.split('([0-9]+)', s) ]
hdf5_list = glob.glob('c:\\Datasety\\YelowTaxi\\DataH5\\*.hdf5')
hdf5_list.sort(key=alphanum_key)
hdf5_list = np.array(hdf5_list)
#assert len(hdf5_list) == 3, "Incorrect number of files"
# This is an important step
master_df = vaex.open_many(hdf5_list)
# exporting
#master_df.export_hdf5(path='c:\\Datasety\\YelowTaxi\\DataH5\\Spojene.hd5', progress=True)
master_df.export_hdf5(path='c:\\Datasety\\YelowTaxi\\DataH5\\Spojene.hdf5', progress=True)
So far, everything is ok, I can open output file Spojene.hdf5.
Next, I append new .csv to Spojene.hdf5:
for file in csv_list:
#file = csv_list[0]
df2 = pd.read_csv(file, index_col=None, header=0, low_memory=False)
filename = 'c:\\Datasety\\YelowTaxi\\DataH5\\Spojene.hdf5'
df2.to_hdf(filename, 'data', append=True)
But, when I append new .csv to Spojene.hdf5, I cant open it:
df = vaex.open('c:\\Datasety\\YelowTaxi\\DataH5\\Spojene.hdf5')
ValueError: First columns has length 289184484, while column table has length 60107988
Pls, what can I do?
I think this is linked to how pandas is creating hdf5 files. According to vaex's documentation you can't open a HDF5 file with vaex if it has been created via to_hdf pandas method. I assume it is the same if you append to an existing HDF5 file.
To avoid this error you can reuse your logic where you convert the pandas dataframe to a vaex dataframe, export it to HDF5 and then use open_many. Something like this should work:
main_hdf5_file_path = "c:\\Datasety\\YelowTaxi\\DataH5\\Spojene.hdf5"
hdf5_files_created = []
for file in csv_list:
hdf5_file = file.replace(".csv", ".hdf5")
# from_csv can take additional parameters to forward to pd.read_csv
# You can also use convert=True to convert it automatically to hdf5 without the export_hdf5
# Refer to https://vaex.readthedocs.io/en/docs/api.html#vaex.from_csv
df = vaex.from_csv(file)
df.export_hdf5(hdf5_file)
hdf5_files_created.append(hdf5_file)
hdf5_to_read = hdf5_files_created + [main_hdf5_file_path]
final_df = vaex.open_many(hdf5_to_read)
final_df.export_hdf5(main_hdf5_file_path)
I have the below python code. It takes .wav file as an input via postman. It is received here as a base64 string which is then decoded back from base64. The code further processes the .wav file and generates the .png image. I have to save that in AWS S3. I am facing problems in saving it to AWS S3 because the file that is saved there does not open. It says photo viewer doesn't support this file format. Any idea how to do this?
import json
import base64
import boto3
#import scipy.io.wavfile as wav
#import scipy.signal as signal
import numpy as np
from matplotlib import pyplot as plt
from scipy import signal
import shutil
import wavio
import wave
import matplotlib.pylab as plt
from scipy.signal import butter, lfilter
from scipy.io import wavfile
import scipy.signal as sps
from io import BytesIO
def lambda_handler(event, context):
s3 = boto3.client("s3")
# retrieving data from event. Which is the wave audio file
get_file_content_from_postman = event["content"]
# decoding data. Here the wava file is converted back to binary form
decoded_file_name = base64.b64decode(get_file_content_from_postman)
new_rate = 2000
# Read file
sample_rate, clip = wavfile.read(BytesIO(decoded_file_name))
# Resample data
number_of_samples = round(len(clip) * float(new_rate) / sample_rate)
clip = sps.resample(clip, number_of_samples)
#butter_bandpass_filter is another fuction
a = butter_bandpass_filter(clip, 20, 400, 2000, order=4)
filtered = 2*((a-min(a))/(max(a)-min(a)))-1
fig = plt.figure(figsize=[1,1])
ax = fig.add_subplot(212)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
ax.set_frame_on(False)
powerSpectrum, freqenciesFound, time, imageAxis = plt.specgram(filtered, Fs=2000)
#filename is referring to the AWS Lambda /tmp directory
filename = '/tmp/' + 'image.png'
plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
s3_upload = s3.put_object( Bucket="aaa", Key="filename.png", Body=filename)
return {
'statusCode': 200,
'body': json.dumps("Executed successfully")
}
You are using put_object which means that Body is not a file name:
Body (bytes or seekable file-like object) -- Object data.
If you want to keep using put_object, then it should be:
with open(filename, 'rb') as file_obj:
s3_upload = s3.put_object( Bucket="aaa", Key="filename.png", Body=file_obj)
Or use upload_file which is more intuitive.
I am training my system for texture analysis, using local binary pattern. here I am training images. taken code from somewhere. I am getting the error in defining the path of images.
# OpenCV bindings
import cv2
# To performing path manipulations
import os
# Local Binary Pattern function
from skimage.feature import local_binary_pattern
# To calculate a normalized histogram
from scipy.stats import itemfreq
from sklearn.preprocessing import normalize
# Utility package -- use pip install cvutils to install
import cvutils
# To read class from file
import csv
#Store the path of training images in train_images
train_images = cvutils.imlist ("'C:\Users\Babar\MATLAB\isp\training
images\fire-image1.jpg',
'C:\Users\Babar\MATLAB\isp\training images\fire-image2.jpg',
'C:\Users\Babar\MATLAB\isp\training images\fire-image3.jpg'")
# Dictionary containing image paths as keys and corresponding label as
value
train_dic = {'fire-image1':0,'fire-image2':0,'fire-image3':0}
with open('C:\Users\Babar\MATLAB\isp\class_train.txt', 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter=' ')
for row in reader:
train_dic[row[0]] = int(row[1])