Importing CSV file error - python-3.x

When importing a CSV file in Python, how should I write the entire path for it? For ex:"C:\Users\user\Downloads\insurance.csv" or ("C:\Users\user\Downloads\insurance.csv")? Currently, it gives me an error:
the file doesn't exist

No, no, no, it's like this.
data_folder = "source_data\\text_files\\"
file_to_open = data_folder + "raw_data.txt"
f = open(file_to_open)
print(f.read())
Or...this...
from pathlib import Path
data_folder = Path("source_data/text_files/")
file_to_open = data_folder / "raw_data.txt"
f = open(file_to_open)
print(f.read())

Related

how to rename files in a folder using pathlib in python?

I need help renaming .jpg files in my folder with the same prefix, 'cat_'. for example, 070.jpg should be renamed cat_070.jpg.
the files are located within the Cat folder:
from pathlib import Path
p = Path('C:\\Users\\me\\Jupiter_Notebooks\\Dataset\\Train\\Cat\\')
so I dont quite see how to do it? the below is wrong because it does not 'look into' the files in this directory.
p.rename(Path(p.parent, 'cat_' + p.suffix))
I have also unsuccessfully tried this:
import os
from os import rename
from os import listdir
# Get path
cwd = "C:\\Users\\me\\Jupiter_Notebooks\\Dataset\\Train\\Cat"
# Get all files in dir
onlyfiles = [f for f in listdir(cwd) if isfile(join(cwd, f))]
for file in onlyfiles:
# Get the current format
if file[-4:]==(".jpg"):
s = file[1]
# Change format and get new filename
s[1] = 'cat'
s = '_'.join(s)
# Rename file
os.rename(file, s)
print(f"Renamed {file} to {s}")
FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:\\Users\\me\\Jupiter_Notebooks\\Dataset\\Train\\Cat\\'
how can I do it? sorry I'm really a beginner here.
How about:
from pathlib import Path
img_dir = Path('C:\\Users\\me\\Jupiter_Notebooks\\Dataset\\Train\\Cat\\') # path to folder with images
for img_path in img_dir.glob('*.jpg'): # iterate over all .jpg images in img_dir
new_name = f'cat_{img_path.stem}{img_path.suffix}' # or directly: f'cat_{img_path.name}'
img_path.rename(img_dir / new_name)
print(f'Renamed `{img_path.name}` to `{new_name}`')
pathlib also supports renaming files, so the os module is not even needed here.
use pathlib. Path. iterdir() to rename all files in a directory
1) for path in pathlib. Path("a_directory"). iterdir():
2) if path. is_file():
3) old_name = path. stem. original filename.
4) old_extension = path. suffix. original file extension.
5) directory = path. parent. ...
6) new_name = "text" + old_name + old_extension.
7) path. rename(pathlib.

Finding a file by extension

I am trying to find files with .desktop extension in a specific directory in Python3. I tried the code snippet below but it didn't work as I wanted. I want it to be a single string value.
import os, fnmatch
desktopfile = configparser.ConfigParser ()
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
script_tmp_dir = "/tmp/appiload/appinstall" # Geçici dizin (dosyalar burada ayıklanıyor)
desktopfilea=f"{script_tmp_dir}/squashfs-root/{str(find ('*.desktop', f'{script_tmp_dir}/squashfs-root/')}"
print(desktopfilea)
desktopfile.items()
Result:
/tmp/appiload/appinstall/squashfs-root/['/tmp/appiload/appinstall/squashfs-root/helloworld.desktop']
Use glob.glob instead of writing a function to do this job.
import os, glob
desktopfile = configparser.ConfigParser ()
script_tmp_dir = "/tmp/appiload/appinstall" # Geçici dizin (dosyalar burada ayıklanıyor)
desktopfilea = glob.glob(f'{script_tmp_dir}/squashfs-root/*.desktop')
# desktopfilea = " ".join(desktopfilea) # Join them in one string, using space as seperator
print(str(desktopfilea))
desktopfile.items()
I don't exactly understand what do you mean but I made a simple program that will print all the files with the .desktop extension and save them to 2 files: applications.json in an array and applications.txt just written one after another.
I also have 2 versions of the program, one that will only print and save only the file names with extensions and other one that will print and save the whole path.
File names only:
import os
import json
strApplications = ""
applications = []
for file in os.listdir(os.path.dirname(os.path.realpath(__file__))):
if file.endswith(".desktop"):
applications.append(file)
with open("applications.json", "w") as f:
json.dump(applications, f)
strApplications = strApplications + file + "\n"
with open("applications.txt", "w") as f:
f.write(strApplications)
print(strApplications)
Full file path:
import os
import json
cwd = os.getcwd()
files = [cwd + "\\" + f for f in os.listdir(cwd) if f.endswith(".desktop")]
with open("applications.json", "w") as f:
json.dump(files, f)
with open("applications.txt", "w") as f:
f.write("\n".join(files))
print("\n".join(files))

Renaming or naming a filename in python

I tried to name the file or rename a file in python, but it is showing a invalid syntax at the end of the path, before ',r'
os.rename(r 'D:\\Stackoverflow\\SC.png' ,r 'D:\\Stackoverflow\\Screen'+str(localtime)+'.png')
Try this:
from datetime import datetime
import os
src = "/tmp/test.txt"
dst = "/tmp/python_" + str(datetime.now().strftime('%Y-%m-%d-%H_%M_%S')) +".inf"
os.rename(src, dst)

Change file extension from .tar.gz to .tgz in Python3

Using pathlib, is there a simple solution to change a file extension with two suffixes like ".tar.gz" to a simple suffix like ".tgz".
Currently I tried:
import pathlib
src = pathlib.Path("path/to/archive.tar.gz")
dst = src.with_suffix("").with_suffix(".tgz")
print(dst)
I get:
path/to/archive.tgz
This question is related but not identical to Changing file extension in Python
is using path lib a requirement?
if not, the os module would work just fine:
import os
path_location = "/path/to/folder"
filename = "filename.extension"
newname = '.'.join([filename.split('.')[0], 'tgz'])
os.rename(os.path.join(path_location, filename), os.path.join(path_location, newname))
EDIT:
found this on the pathlib docs:
PurePath.with_suffix(suffix)¶
Return a new path with the suffix changed. If the original path doesn’t have a suffix, the new suffix is appended instead. If the suffix is an empty string, the original suffix is removed:
>>>
>>> p = PureWindowsPath('c:/Downloads/pathlib.tar.gz')
>>> p.with_suffix('.bz2')
PureWindowsPath('c:/Downloads/pathlib.tar.bz2')
>>> p = PureWindowsPath('README')
>>> p.with_suffix('.txt')
PureWindowsPath('README.txt')
>>> p = PureWindowsPath('README.txt')
>>> p.with_suffix('')
PureWindowsPath('README')
EDIT 2:
from pathlib import Path
p = Path('path/to/tar.gz')
new_ext = "tgz"
filename = p.stem
p.rename(Path(p.parent, "{0}.{1}".format(filename, new_ext)))
You could rename it.
import os
old_file = os.path.join("directory_where_file", "a.tar.gz")
new_file = os.path.join("directory_where_file", "b.tgz")
os.rename(old_file, new_file)
from pathlib import Path
p = Path('path/to/tar.gz')
name_without_ext = p.stem
p.rename(Path(p.parent, name_without_ext + '.' + new_ext))

PyPDF2 difference resulting in 1 character per line

im trying to create a simple script that will show me the difference (similar to github merging) by using difflib's HtmlDiff function.
so far ive gotten my pdf files together and am able to print their contents in binary using PyPDF2 functions.
import difflib
import os
import PyPDF2
os.chdir('.../MyPythonScripts/PDFtesterDifflib')
file1 = 'pdf1.pdf'
file2 = 'pdf2.pdf'
file1RL = open(file1, 'rb')
pdfreader1 = PyPDF2.PdfFileReader(file1RL)
PageOBJ1 = pdfreader1.getPage(0)
textOBJ1 = PageOBJ1.extractText()
file2RL = open(file2, 'rb')
pdfreader2 = PyPDF2.PdfFileReader(file2RL)
PageOBJ2 = pdfreader2.getPage(0)
textOBJ2 = PageOBJ2.extractText()
difference = difflib.HtmlDiff().make_file(textOBJ1,textOBJ2,file1,file2)
diff_report = open('...MyPythonScripts/PDFtesterDifflib/diff_report.html','w')
diff_report.write(difference)
diff_report.close()
the result is this:
How can i get my lines to read normally?
it should read:
1.apples
2.oranges
3. --this line should differ--
i am running python 3.6 on mac
Thanks in advance!

Resources