How to pass function in terminal with file path as an argument - python-3.x

#I am typing this in terminal or in cmd -->
#python pract33.py main "C:\Users\Sharad\Desktop\New folder (2)\GSTR3B.pdf"
#I also know many methods like using "\" or "/" or r etc etc but want to type only the above command in terminal or in cmd and want output as "1".
#this code is to find whether the PDF is proper or not**
from pdfminer import high_level
import re
import os
def main1(vPath):
CT = 0
checkPath = os.path.isfile(vPath)
try:
if checkPath:
if vPath.endswith(".pdf"):
extracted_text = high_level.extract_text(vPath, "")
l = extracted_text.split('\n')
for i in l:
i = i.rstrip()
x = re.findall('[A-Za-z0-9]', i)
if len(x) > 5:
CT += 1
if CT > 20:
print("1")
else:
print("0")
except OSError:
pass

Related

Scraping info out of pdf's using Python

I have pdf's distributed over several folders and sub folders.
I've been trying to write a short python script with the idea to search each pdf for any term i enter.
As not all pdf's are searchable, I also tried to implement a list of searchable, and non searchable pdf's with the idea to bring everything in line.
The program seems to work, up to a point. The longer it runs, the slower it goes.
At a certain moment, it just stops. I think it is a memory issue, but i can't seem to find a solution.
The script i have already:
import os
# extracting_text.py
from PyPDF2 import PdfFileReader
search_word = input("enter a word you want to search in file: ")
counter = 0
noTextCounter = 0
SolutionCounter = 0
with open("Solutions.txt", "w") as text_file:
text_file.writelines(f"List of files that contain: {search_word}")
#print(f"List of files that contain: {search_word}", file=text_file)
def text_extractor(path):
with open(path, 'rb') as f:
#variable to find pdf's that only have image. If activated countempty has to be included in the return.
countEmpty = 0
countSolution = 0
pdf = PdfFileReader(f)
# get the first page
page = pdf.getPage(0)
# print(page)
# print('Page type: {}'.format(str(type(page))))
text = page.extractText()
if text == '':
print('No text')
countEmpty = countEmpty + 1
else:
if search_word in text:
print("word found")
countSolution = countSolution + 1
else:
print("word not found")
# print(text)
#Selection of potential returns
#return countEmpty
return countSolution
root = os.getcwd()
try:
for subdir, dirs, files in os.walk(root):
for file in files:
# print os.path.join(subdir, file)
filepath = subdir + os.sep + file
if filepath.endswith(".pdf"):
print(filepath)
counter = counter + 1
print(counter)
if __name__ == '__main__':
path = filepath
indicator = text_extractor(path)
#noTextCounter = noTextCounter + indicator
SolutionCounter = SolutionCounter + indicator
print("indicator: " + str(indicator))
if indicator == 1:
with open("Solutions.txt", "a") as text_file:
text_file.writelines('\n' + path)
#below is option to give 2 lists containing all the pdf's which are images and a list of non images
# #with open("ListOfImagePdfs.txt", "a") as text_file:
# text_file.writelines('\n' + path)
#else:
#with open("ListOfDataPdfs.txt", "a") as text_file:
# text_file.writelines('\n' + path)
#print("amount of image pdf's: " + str(noTextCounter))
except:
pass
#trycatch to be added

Simple Python File I/O spell check program

For a class I have to create a simple spell checking program that takes two files as inputs, one containing correctly spelled words and one containing a paragraph with a few misspelled words. I thought I had it figured out but I am getting an error I have never seen before. When the program finishes it gives the error:
<function check_words at 0x7f99ba6c60d0>
I have never seen this nor do I know what it means, any help in getting this program working would be appreciated. Program code is below:
import os
def main():
while True:
dpath = input("Please enter the path to your dictionary:")
fpath = input("Please enter the path to the file to spell check:")
d = os.path.isfile(dpath)
f = os.path.isfile(fpath)
if d == True and f == True:
check_words(dpath, fpath)
break
print("The following words were misspelled:")
print(check_words)
def linecheck(word, dlist):
if word in dlist:
return None
else:
return word
def check_words(dictionary, file_to_check):
d = dictionary
f = file_to_check
dlist = {}
wrong = []
with open(d, 'r') as c:
for line in c:
(key) = line.strip()
dlist[key] = ''
with open(f, 'r') as i:
for line in i:
line = line.strip()
fun = linecheck(line, dlist)
if fun is not None:
wrong.append(fun)
return wrong
if __name__ == '__main__':
main()
It's not an error, it's doing exactly what you are telling it to.
This line:
print(check_words)
You are telling it to print a function. The output you are seeing is just Python printing the name of the function and it's address: "printing the function".
Yes, don't do print(check_words), do print(check_words())
Furthermore, change check_words(dpath, fpath) to misspelled_words = check_words(dpath, fpath)
And change print(check_words) to print(misspelled_words)
Final code (with a few modifications):
import os
def main():
while True:
dpath = input("Please enter the path to your dictionary: ")
fpath = input("Please enter the path to the file to spell check: ")
d = os.path.isfile(dpath)
f = os.path.isfile(fpath)
if d == True and f == True:
misspelled_words = check_words(dpath, fpath)
break
print("\nThe following words were misspelled:\n----------")
#print(misspelled_words) #comment out this line if you are using the code below
#optional, if you want a better looking output
for word in misspelled_words: # erase these lines if you don't want to use them
print(word) # erase these lines if you don't want to use them
#------------------------
def linecheck(word, dlist):
if word in dlist:
return None
else:
return word
def check_words(dictionary, file_to_check):
d = dictionary
f = file_to_check
dlist = {}
wrong = []
with open(d, 'r') as c:
for line in c:
(key) = line.strip()
dlist[key] = ''
with open(f, 'r') as i:
for line in i:
line = line.strip()
fun = linecheck(line, dlist)
if fun is not None:
wrong.append(fun)
return wrong
if __name__ == '__main__':
main()

Python 3.6 Bitonic Sort with Multiprocessing library and multiple processes

I am trying to implement bitonic with the python multiprocessing library and a shared resource array that will be sorted at the end of the program.
The problem I am running into is that when I run the program, I get an prompt that asks "Your program is still running! Are you sure you want to cancel it?" and then when I click cancel N - 1 times (where N is the amount of processes I am trying to spawn) then it just hangs.
When this is run from the command line, it just outputs the unsorted array. Of course, I expect it to be sorted at the program's finish.
I've been using this resource to try and get a firm grasp on how I can mitigate my errors but I haven't had any luck, and now I am here.
ANY help would be appreciated, as I really don't have anywhere else to turn to.
I wrote this using Python 3.6 and here is the program in its entirety:
from multiprocessing import Process, Array
import sys
from random import randint
# remember to move this to separate file
def createInputFile(n):
input_file = open("input.txt","w+")
input_file.write(str(n)+ "\n")
for i in range(n):
input_file.write(str(randint(0, 1000000)) + "\n")
def main():
# createInputFile(1024) # uncomment this to create 'input.txt'
fp = open("input.txt","r") # remember to read from sys.argv
length = int(fp.readline()) # guaranteed to be power of 2 by instructor
arr = Array('i', range(length))
nums = fp.read().split()
for i in range(len(nums)):
arr[i]= int(nums[i]) # overwrite shared resource values
num_processes = 8 # remember to read from sys.argv
process_dict = dict()
change_in_bounds = len(arr)//num_processes
low_b = 0 # lower bound
upp_b = change_in_bounds # upper bound
for i in range(num_processes):
print("Process num: " + str(i)) # are all processes being generated?
process_dict[i] = Process(target=bitonic_sort, args=(True, arr[low_b:upp_b]) )
process_dict[i].start()
low_b += change_in_bounds
upp_b += change_in_bounds
for i in range(num_processes):
process_arr[i].join()
print(arr[:]) # Print our sorted array (hopefully)
def bitonic_sort(up, x):
if len(x) <= 1:
return x
else:
first = bitonic_sort(True, x[:len(x) // 2])
second = bitonic_sort(False, x[len(x) // 2:])
return bitonic_merge(up, first + second)
def bitonic_merge(up, x):
# assume input x is bitonic, and sorted list is returned
if len(x) == 1:
return x
else:
bitonic_compare(up, x)
first = bitonic_merge(up, x[:len(x) // 2])
second = bitonic_merge(up, x[len(x) // 2:])
return first + second
def bitonic_compare(up, x):
dist = len(x) // 2
for i in range(dist):
if (x[i] > x[i + dist]) == up:
x[i], x[i + dist] = x[i + dist], x[i] #swap
main()
I won't go into all the syntax errors in your code since I am sure your IDE tells you about those. The problem that you have is that you are missing an if name==main. I changed your def main() to def sort() and wrote this:
if __name__ == '__main__':
sort()
And it worked (after solving all the syntax errors)

Python script won't run over SSH but will run locally

I am writing a small application to monitor some temperatures using a raspberry pi. I would like to be able to remotely start or restart the monitoring script remotely. I connect via SSH, cd to the .py file's directory, and then i "python temp_controller.py &". This gives me an error importing something. "from w1thermsensor import W1ThermSensor". This error does not occur when running the script from Thonny directly on the Pi.
"Main" file.
import Send_Email
import Temp_Sensor
import os
import glob
import math
import timefuncs
import apc_controls
program_start_time = timefuncs.get_time() #Used to determine how long the program has been running.
printable_start_time = timefuncs.get_printable_time()
filename = ("Temperature Data " + printable_start_time.strftime("%c") + ".txt")
state = 0 #Used to switch between activities and determine if there has been an error.
temps = [0,0,0,0,0,0,0,0],[0,0] #Holds temperature data
over_temp_counter = 0 #variable for tracking consecutive over temp values
newdir = os.getcwd() + "/temps files"; os.chdir(newdir) #Changes directory to storage location for temperature files
with open(filename, "w+") as tempsfile:
tempsfile.write("Long Term Temperature Monitor Project\r\n")
tempsfile.write("Date-Time,Sensor ID,Sensor Name,Temperature\r\n")
test = 0
while True:
if (math.floor(timefuncs.get_time())) % 30 == 0 and state == 0:
print("sample")
state = 1 #stops this from executing multiple times per second
length = Temp_Sensor.read_sensors(temps) #gets number of sensors and sensor data with IDs
#Writes data line to log file
now = timefuncs.get_printable_time()
tempsfile.write("%s"%now)
i = 0
while i < length:
print("Sensor %s has temperature %.2f" % (temps[i][0], temps[i][1]))
tempsfile.write(",%s"%temps[i][0])
tempsfile.write(",%s"%Temp_Sensor.get_sensor_name(temps[i][0]))
tempsfile.write(",%f"%temps[i][1])
i += 1
tempsfile.write("\r\n")
#Checks temperatures to see if over temp
i = 0
over = False
while i < length:
if Temp_Sensor.check_temp(temps[i][1]):#if over temp
over = True
if over_temp_counter > 1:#ensures there is not a single fluke reading that causes error
print("over temp")
tempsfile.close()#close log file
Send_Email.send_fail_email(filename)#send email with log file
apc_controls.turn_off_apc()#shut down power to test
tempsfile = open("(After error" + printable_start_time.strftime("%c") + ".txt", "w+")
else:
print("increment over")
over_temp_counter += 1
i+=1
if over == False:
over_temp_counter = 0
elif (math.floor(timefuncs.get_time())) % 30 != 0:#if not 30 second increment reset blocker used to prevent the 30 secodn operations from occuring more than once
state = 0
File with the error.
import time
import glob
from w1thermsensor import W1ThermSensor
def read_sensors(data):
i = 0
j = 0
for sensor in W1ThermSensor.get_available_sensors([W1ThermSensor.THERM_SENSOR_DS18B20]):
data[i][j] = sensor.id
j+=1
data[i][j] = sensor.get_temperature()
i+=1
j = 0
return i
def get_sensor_name(id):
if id == "000009ac911f":
return "Sensor 1"
elif id == "000009aecc36":
return "Sensor 2"
def check_temp(value):
if value > 80:
return 1
else:
return 0
I guess on your local machine you did something like pip install w1thermsensor, right? You need to install the w1thermsensor dependency on your Raspberry pi too

generating random strings and matching them in python

this is a python program to generate a random string and to match it with a user given output and to get a return on the amount of attempts by the computer but i cant get the try count
import random
class txt:
def __init__(self):
self.txt = None
trycount = 0
def maketxt(self,txt):
txt = ""
a = []
a.append(txt.split())
# return a
# def match(self):
tokenlist = ["a", "b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"]
matchlist =[]
while (len(matchlist) <=24):
x =random.choice(tokenlist)
matchlist.append(x)
if matchlist == a:
print(trycount)
else :
trycount += 1
match()
t = txt()
t.maketxt("hagjkrshgujrahg")
I keep getting the error
File "C:/Users/#####/AppData/Local/Programs/Python/Python36/test1.py", line 25, in maketxt
trycount += 1
UnboundLocalError: local variable 'trycount' referenced before assignment

Resources