#I am typing this in terminal or in cmd -->
#python pract33.py main "C:\Users\Sharad\Desktop\New folder (2)\GSTR3B.pdf"
#I also know many methods like using "\" or "/" or r etc etc but want to type only the above command in terminal or in cmd and want output as "1".
#this code is to find whether the PDF is proper or not**
from pdfminer import high_level
import re
import os
def main1(vPath):
CT = 0
checkPath = os.path.isfile(vPath)
try:
if checkPath:
if vPath.endswith(".pdf"):
extracted_text = high_level.extract_text(vPath, "")
l = extracted_text.split('\n')
for i in l:
i = i.rstrip()
x = re.findall('[A-Za-z0-9]', i)
if len(x) > 5:
CT += 1
if CT > 20:
print("1")
else:
print("0")
except OSError:
pass
Related
I have pdf's distributed over several folders and sub folders.
I've been trying to write a short python script with the idea to search each pdf for any term i enter.
As not all pdf's are searchable, I also tried to implement a list of searchable, and non searchable pdf's with the idea to bring everything in line.
The program seems to work, up to a point. The longer it runs, the slower it goes.
At a certain moment, it just stops. I think it is a memory issue, but i can't seem to find a solution.
The script i have already:
import os
# extracting_text.py
from PyPDF2 import PdfFileReader
search_word = input("enter a word you want to search in file: ")
counter = 0
noTextCounter = 0
SolutionCounter = 0
with open("Solutions.txt", "w") as text_file:
text_file.writelines(f"List of files that contain: {search_word}")
#print(f"List of files that contain: {search_word}", file=text_file)
def text_extractor(path):
with open(path, 'rb') as f:
#variable to find pdf's that only have image. If activated countempty has to be included in the return.
countEmpty = 0
countSolution = 0
pdf = PdfFileReader(f)
# get the first page
page = pdf.getPage(0)
# print(page)
# print('Page type: {}'.format(str(type(page))))
text = page.extractText()
if text == '':
print('No text')
countEmpty = countEmpty + 1
else:
if search_word in text:
print("word found")
countSolution = countSolution + 1
else:
print("word not found")
# print(text)
#Selection of potential returns
#return countEmpty
return countSolution
root = os.getcwd()
try:
for subdir, dirs, files in os.walk(root):
for file in files:
# print os.path.join(subdir, file)
filepath = subdir + os.sep + file
if filepath.endswith(".pdf"):
print(filepath)
counter = counter + 1
print(counter)
if __name__ == '__main__':
path = filepath
indicator = text_extractor(path)
#noTextCounter = noTextCounter + indicator
SolutionCounter = SolutionCounter + indicator
print("indicator: " + str(indicator))
if indicator == 1:
with open("Solutions.txt", "a") as text_file:
text_file.writelines('\n' + path)
#below is option to give 2 lists containing all the pdf's which are images and a list of non images
# #with open("ListOfImagePdfs.txt", "a") as text_file:
# text_file.writelines('\n' + path)
#else:
#with open("ListOfDataPdfs.txt", "a") as text_file:
# text_file.writelines('\n' + path)
#print("amount of image pdf's: " + str(noTextCounter))
except:
pass
#trycatch to be added
For a class I have to create a simple spell checking program that takes two files as inputs, one containing correctly spelled words and one containing a paragraph with a few misspelled words. I thought I had it figured out but I am getting an error I have never seen before. When the program finishes it gives the error:
<function check_words at 0x7f99ba6c60d0>
I have never seen this nor do I know what it means, any help in getting this program working would be appreciated. Program code is below:
import os
def main():
while True:
dpath = input("Please enter the path to your dictionary:")
fpath = input("Please enter the path to the file to spell check:")
d = os.path.isfile(dpath)
f = os.path.isfile(fpath)
if d == True and f == True:
check_words(dpath, fpath)
break
print("The following words were misspelled:")
print(check_words)
def linecheck(word, dlist):
if word in dlist:
return None
else:
return word
def check_words(dictionary, file_to_check):
d = dictionary
f = file_to_check
dlist = {}
wrong = []
with open(d, 'r') as c:
for line in c:
(key) = line.strip()
dlist[key] = ''
with open(f, 'r') as i:
for line in i:
line = line.strip()
fun = linecheck(line, dlist)
if fun is not None:
wrong.append(fun)
return wrong
if __name__ == '__main__':
main()
It's not an error, it's doing exactly what you are telling it to.
This line:
print(check_words)
You are telling it to print a function. The output you are seeing is just Python printing the name of the function and it's address: "printing the function".
Yes, don't do print(check_words), do print(check_words())
Furthermore, change check_words(dpath, fpath) to misspelled_words = check_words(dpath, fpath)
And change print(check_words) to print(misspelled_words)
Final code (with a few modifications):
import os
def main():
while True:
dpath = input("Please enter the path to your dictionary: ")
fpath = input("Please enter the path to the file to spell check: ")
d = os.path.isfile(dpath)
f = os.path.isfile(fpath)
if d == True and f == True:
misspelled_words = check_words(dpath, fpath)
break
print("\nThe following words were misspelled:\n----------")
#print(misspelled_words) #comment out this line if you are using the code below
#optional, if you want a better looking output
for word in misspelled_words: # erase these lines if you don't want to use them
print(word) # erase these lines if you don't want to use them
#------------------------
def linecheck(word, dlist):
if word in dlist:
return None
else:
return word
def check_words(dictionary, file_to_check):
d = dictionary
f = file_to_check
dlist = {}
wrong = []
with open(d, 'r') as c:
for line in c:
(key) = line.strip()
dlist[key] = ''
with open(f, 'r') as i:
for line in i:
line = line.strip()
fun = linecheck(line, dlist)
if fun is not None:
wrong.append(fun)
return wrong
if __name__ == '__main__':
main()
I am trying to implement bitonic with the python multiprocessing library and a shared resource array that will be sorted at the end of the program.
The problem I am running into is that when I run the program, I get an prompt that asks "Your program is still running! Are you sure you want to cancel it?" and then when I click cancel N - 1 times (where N is the amount of processes I am trying to spawn) then it just hangs.
When this is run from the command line, it just outputs the unsorted array. Of course, I expect it to be sorted at the program's finish.
I've been using this resource to try and get a firm grasp on how I can mitigate my errors but I haven't had any luck, and now I am here.
ANY help would be appreciated, as I really don't have anywhere else to turn to.
I wrote this using Python 3.6 and here is the program in its entirety:
from multiprocessing import Process, Array
import sys
from random import randint
# remember to move this to separate file
def createInputFile(n):
input_file = open("input.txt","w+")
input_file.write(str(n)+ "\n")
for i in range(n):
input_file.write(str(randint(0, 1000000)) + "\n")
def main():
# createInputFile(1024) # uncomment this to create 'input.txt'
fp = open("input.txt","r") # remember to read from sys.argv
length = int(fp.readline()) # guaranteed to be power of 2 by instructor
arr = Array('i', range(length))
nums = fp.read().split()
for i in range(len(nums)):
arr[i]= int(nums[i]) # overwrite shared resource values
num_processes = 8 # remember to read from sys.argv
process_dict = dict()
change_in_bounds = len(arr)//num_processes
low_b = 0 # lower bound
upp_b = change_in_bounds # upper bound
for i in range(num_processes):
print("Process num: " + str(i)) # are all processes being generated?
process_dict[i] = Process(target=bitonic_sort, args=(True, arr[low_b:upp_b]) )
process_dict[i].start()
low_b += change_in_bounds
upp_b += change_in_bounds
for i in range(num_processes):
process_arr[i].join()
print(arr[:]) # Print our sorted array (hopefully)
def bitonic_sort(up, x):
if len(x) <= 1:
return x
else:
first = bitonic_sort(True, x[:len(x) // 2])
second = bitonic_sort(False, x[len(x) // 2:])
return bitonic_merge(up, first + second)
def bitonic_merge(up, x):
# assume input x is bitonic, and sorted list is returned
if len(x) == 1:
return x
else:
bitonic_compare(up, x)
first = bitonic_merge(up, x[:len(x) // 2])
second = bitonic_merge(up, x[len(x) // 2:])
return first + second
def bitonic_compare(up, x):
dist = len(x) // 2
for i in range(dist):
if (x[i] > x[i + dist]) == up:
x[i], x[i + dist] = x[i + dist], x[i] #swap
main()
I won't go into all the syntax errors in your code since I am sure your IDE tells you about those. The problem that you have is that you are missing an if name==main. I changed your def main() to def sort() and wrote this:
if __name__ == '__main__':
sort()
And it worked (after solving all the syntax errors)
I am writing a small application to monitor some temperatures using a raspberry pi. I would like to be able to remotely start or restart the monitoring script remotely. I connect via SSH, cd to the .py file's directory, and then i "python temp_controller.py &". This gives me an error importing something. "from w1thermsensor import W1ThermSensor". This error does not occur when running the script from Thonny directly on the Pi.
"Main" file.
import Send_Email
import Temp_Sensor
import os
import glob
import math
import timefuncs
import apc_controls
program_start_time = timefuncs.get_time() #Used to determine how long the program has been running.
printable_start_time = timefuncs.get_printable_time()
filename = ("Temperature Data " + printable_start_time.strftime("%c") + ".txt")
state = 0 #Used to switch between activities and determine if there has been an error.
temps = [0,0,0,0,0,0,0,0],[0,0] #Holds temperature data
over_temp_counter = 0 #variable for tracking consecutive over temp values
newdir = os.getcwd() + "/temps files"; os.chdir(newdir) #Changes directory to storage location for temperature files
with open(filename, "w+") as tempsfile:
tempsfile.write("Long Term Temperature Monitor Project\r\n")
tempsfile.write("Date-Time,Sensor ID,Sensor Name,Temperature\r\n")
test = 0
while True:
if (math.floor(timefuncs.get_time())) % 30 == 0 and state == 0:
print("sample")
state = 1 #stops this from executing multiple times per second
length = Temp_Sensor.read_sensors(temps) #gets number of sensors and sensor data with IDs
#Writes data line to log file
now = timefuncs.get_printable_time()
tempsfile.write("%s"%now)
i = 0
while i < length:
print("Sensor %s has temperature %.2f" % (temps[i][0], temps[i][1]))
tempsfile.write(",%s"%temps[i][0])
tempsfile.write(",%s"%Temp_Sensor.get_sensor_name(temps[i][0]))
tempsfile.write(",%f"%temps[i][1])
i += 1
tempsfile.write("\r\n")
#Checks temperatures to see if over temp
i = 0
over = False
while i < length:
if Temp_Sensor.check_temp(temps[i][1]):#if over temp
over = True
if over_temp_counter > 1:#ensures there is not a single fluke reading that causes error
print("over temp")
tempsfile.close()#close log file
Send_Email.send_fail_email(filename)#send email with log file
apc_controls.turn_off_apc()#shut down power to test
tempsfile = open("(After error" + printable_start_time.strftime("%c") + ".txt", "w+")
else:
print("increment over")
over_temp_counter += 1
i+=1
if over == False:
over_temp_counter = 0
elif (math.floor(timefuncs.get_time())) % 30 != 0:#if not 30 second increment reset blocker used to prevent the 30 secodn operations from occuring more than once
state = 0
File with the error.
import time
import glob
from w1thermsensor import W1ThermSensor
def read_sensors(data):
i = 0
j = 0
for sensor in W1ThermSensor.get_available_sensors([W1ThermSensor.THERM_SENSOR_DS18B20]):
data[i][j] = sensor.id
j+=1
data[i][j] = sensor.get_temperature()
i+=1
j = 0
return i
def get_sensor_name(id):
if id == "000009ac911f":
return "Sensor 1"
elif id == "000009aecc36":
return "Sensor 2"
def check_temp(value):
if value > 80:
return 1
else:
return 0
I guess on your local machine you did something like pip install w1thermsensor, right? You need to install the w1thermsensor dependency on your Raspberry pi too
this is a python program to generate a random string and to match it with a user given output and to get a return on the amount of attempts by the computer but i cant get the try count
import random
class txt:
def __init__(self):
self.txt = None
trycount = 0
def maketxt(self,txt):
txt = ""
a = []
a.append(txt.split())
# return a
# def match(self):
tokenlist = ["a", "b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"]
matchlist =[]
while (len(matchlist) <=24):
x =random.choice(tokenlist)
matchlist.append(x)
if matchlist == a:
print(trycount)
else :
trycount += 1
match()
t = txt()
t.maketxt("hagjkrshgujrahg")
I keep getting the error
File "C:/Users/#####/AppData/Local/Programs/Python/Python36/test1.py", line 25, in maketxt
trycount += 1
UnboundLocalError: local variable 'trycount' referenced before assignment