Dynamically create file folder based on line 1 in csv - python-3.x

This script downloads images, renames them based on line[0] adds a number to the end of the file name and saves them to a file folder. My goal here is to create the file folder name based on line[0] of my csv file, I'm new to python and need to download/sort 15000+ images. Any help would be appreciated! Using python 3.8.6
Note: 1 model may contain many images so the idea is to create the folder, place images for that model inside, move on to the next model, etc.
Csv file content
RHV3-484-L,https://www.fisherpaykel.com/on/demandware.static/-/Sites-fpa-master-catalog/default/dw0c85e188/product-mugs/cooking/ranges/mug/retail/RHV3-484-N-RHV3-484-L-external-mug-rs-84.png
RHV3-484-L,https://www.fisherpaykel.com/on/demandware.static/-/Sites-fpa-master-catalog/default/dwcbd711e5/inspire/caitlin-wilson-portland-dk-339-rs-84.png
RHV3-484-L,https://www.fisherpaykel.com/on/demandware.static/-/Sites-fpa-master-catalog/default/dw3702e52a/inspire/caitlin-wilson-portland-dk-385-rs-84.jpg
RHV3-484-L,https://www.fisherpaykel.com/on/demandware.static/-/Sites-fpa-master-catalog/default/dw0c85e188/product-mugs/cooking/ranges/mug/retail/RHV3-484-N-RHV3-484-L-external-mug-rs-84.png
RHV3-484-L,https://www.fisherpaykel.com/on/demandware.static/-/Sites-fpa-master-catalog/default/dwf99a5a9d/inspire/david-berridge-project-brooklyn-mw-6457-rs-84.jpg
Python script
import sys
import urllib
import urllib.request
from csv import reader
import os.path
import os
csv_filename = "images"
with open(csv_filename+".csv".format(csv_filename), 'r') as csv_file:
n = 1
for line in reader(csv_file):
if not os.path.exists("ImageID"):
os.makedirs("ImageID")
print("Image skipped for {0}".format(line[0]))
else:
if line[1] != '' and line[0] != "ImageID":
urllib.request.urlretrieve(line[1], "ImageID/" + line[0] + "-" + str(n) + ".jpg")
n += 1
print("Image saved for {0}".format(line[0]))
else:
print("No result for {0}".format(line[0]))

This seems to work as desired....
Couple comments in middle. Notably, you need to respect the .jpg or .png file. If you have file extensions that are longer (4 chars) you may need to split out the file name and then split by "."
Good Luck!
import sys
import urllib
import urllib.request
from csv import reader
import os.path
import os
csv_filename = "images.csv"
with open(csv_filename, 'r') as csv_file:
n = 1 # starting point
for line in reader(csv_file):
tgt_folder = line[0]
if not os.path.exists(tgt_folder):
os.makedirs(tgt_folder)
n = 1 # restart n if you find a NEW folder
# there should be no "else" clause here. Just test the folder name above, but don't waste a file
if line[1] != '' and line[0] != "ImageID": # not clear what this is for... ??
filename = ''.join([line[0], '-', str(n), line[1][-4:]])
destination = os.path.join(tgt_folder, filename)
urllib.request.urlretrieve(line[1], destination)
n += 1
print("Image saved for {0}".format(line[1]))
else:
print("No result for {0}".format(line[1]))

Related

Chose which function to run in script

I'm trying to make it so that the user chooses which function to run using if.
import os
import csv
import collections
import datetime
import pandas as pd
import time
import string
import re
import glob, os
folder_path = 'C:/ProgramData/WebPort/system/tags'
folder2_path = 'C:/ProgramData/WebPort/system'
search2_str = '"Prefix"'
print("Choices:\n 1 - Read from CSV\n 2 - Read from WPP")
x = input("Please enter your choice:\n")
x = int(x)
if x == 1:
csv_file_list = glob.glob(folder_path + '/*.csv')
with open("csv.txt", 'w') as wf:
for file in csv_file_list:
print(glob.glob(folder_path + '/*.csv'))
with open(file) as rf:
for line in rf:
if line.strip(): # if line is not empty
if not line.endswith("\n"):
line+="\n"
wf.write(line)
print('Reading from .csv')
elif x == 2:
for root, dirs, files in os.walk(folder2_path):
for file in files:
if file.endswith(".wpp"):
print(os.path.join(root, file))
with open(os.path.join(root, file), 'r') as fr, open ("wpp.txt",'a', encoding='utf-8') as fw:
for i,line in enumerate(fr):
if line.find(search2_str) != -1:
fw.write(line)
print('Reading from .wpp')
else:
print('wrong choice')
Getting Invalid syntax in line 34 using this.

How to save command line file as .txt file every time. In CMD need to display the speedtest output save the those command line as .txt file

This is the program as given below
import os
import time
index=0
file=open("out_{index}.txt", 'a')
while True:
ps=os.system(f"speedtest.exe")
file.append(ps)
index+=1
time.sleep(4)
I believe it is file.write() and not file.append().
I think you want to write to a new file every time.
I would do this.
import time
import os
index = 0
while True:
ps = os.system("speedtest.exe")
with open(f"out_{index}.txt", 'w+') as f:
f.write(ps)
index += 1
time.sleep(4)
However, if you want to write in the same file.
I would do this.
import time
import os
with open("out_file.txt", 'a+') as f:
index = 0
while True:
ps = os.system("speedtest.exe")
f.write(ps)
index += 1
time.sleep(4)

Searching a list of words from textfile and printing first three lines using python

I have a text file in that i have to access particular headings and access the first lines under the heading.I was able to do it for one heading while doing multiple heading i was facing issue.
I have successfully done for one heading. but doing to list of words i was unable to do it.
i was able to do it for one heading
Data =['work:']
i was not able to do it for this scenario.
Data =['work:','test:','ride:']
In the text file the data is like below:
work:
'500'
'ast'
'800'
test:
'tim'
'200'
'300'
ride:
'mic'
'100'
'657'
import math
import os
import glob
import re
import sys
sys.path.append('C:/Documents/tes')
def read(file,Data,res,outputline):
with open(file,'r') as f:
stc_file = os.path.basename(file)
for line in f:
if Data in line:
line = f.readlines()
return line[outputline]
fls = []
src_dir = r'C:/Documents/tes'
for root, dirs, files in os.walk(src_dir):
for filename in files:
if not filename.endswith('.txt'):
continue
filepath = os.path.join(root, filename)
fls.append(filepath)
result = []
Data = ['work:','test:','ride:']
for file in fls:
result=read(file,Data,result,0).split()+read(file,Data,result,1).split()+read(file,Data,result,2).split()
The above code is working for one heading,but for multiple headings i was not able to do.
['500','ast','800']
['tim','200','300']
['mic','100','657']
This above expected output .
This script will do what you asked, if each of the three (not sure if you wanted more, or an arbitrary number?) lines of data you are looking for are surrounded by single quotes—and if I understood your goal correctly...
import os
src_dir = os.getcwd() # or whatever path you want
keywords = ['work:', 'test:', 'ride:']
result = []
for root, dirs, files in os.walk(src_dir):
for filename in files:
if filename.endswith('.txt'):
path = os.path.join(root, filename)
try:
fh = open(path, 'r')
lines = [l.strip("'") for l in fh.read().splitlines()]
for i in range(len(lines)):
if lines[i] in keywords:
result.append(' '.join(lines[i+1:i+4]).split())
except Exception as e:
print('Something went wrong.')
print(e)
continue
print(result)

Print function outside the FOR Loop is not getting executed or called

I am trying to segregate the PDF files from the password protected and normal ones and finally I want to print all details to a log file along with total number of files scanned from the source directory.
I feel code is perfectly till before the last print() statement but the last print() function is not called by the program.
import PyPDF2
import os, sys, datetime
#import shutil
src = 'C:/Users/Balaji.B.R/Downloads/'
dst = 'C:/Users/Balaji.B.R/Downloads/success/'
op = 'C:/Users/Balaji.B.R/Desktop/op.txt'
sys.stdout = open(op,'w')
print("Date & Time :", datetime.datetime.now(), "\n")
files = os.listdir(src)
i=0
for f in files:
if (f.endswith('.pdf')):
#print("The File Name is",f)
pdffileobj = open(src+f, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdffileobj)
if pdfReader.isEncrypted:
print("The File Name is",f)
print("PDF is either protected or corrupted, kindly rescan \n")
i+=1
pdffileobj.close()
else:
i+=1
#shutil.move(src+f,dst+f)
pdffileobj.close()
else:
pass
print("Total PDF files scanned are:", i)
sys.stdout.close()

Trying to download ~200 files using ThreadPoolExecutor -> some files are skipped

I need to download ~200 files. If I run code below for the 1st time, the code downloads ~100 files. Then I need to run this code few more times in order to download the rest files (ie. if I run the code 2nd time - I can get +20-30 new files, if 3rd time - again +20-30, and so on). Why does this happen\how to fix? Maybe this is important - server may generate some files up to 10 sec.
import os
import concurrent.futures
import urllib.request
import shutil
def get_cities(osm_id, file_name, place_type):
file_folder = os.path.join(os.getcwd(), place_type)
file_name = file_name + '_' + place_type
file_path = os.path.join(file_folder, file_name)
if not os.path.exists(file_folder):
os.makedirs(file_folder)
if not os.path.exists(file_path):
area_id = str(3600000000 + osm_id)
url = 'http://overpass-api.de/api/interpreter?data=(node["place"="city"](area:'+area_id+'););out;'
with urllib.request.urlopen(url) as response, open(file_path, 'wb') as out_file:
shutil.copyfileobj(response, out_file)
def cities_controller(place_type='cities'):
countries = Countries.objects.filter(status=1).exclude(osm_id=None)
en_group_inst = Languages.objects.filter(iso_code='en').first().group
with concurrent.futures.ThreadPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
for country in countries:
osm_id = country.osm_id
file_name = CountriesTranslations.objects.get(
country=country, lang_group=en_group_inst).common_name.lower().replace(' ', '_')
executor.submit(get_cities, osm_id, file_name, place_type)
cities_controller()

Resources