Convert docx. to pdf in python in linux - python-3.x

It doesn't create a pdf file in the directory also it prints - None
import sys
import subprocess
import re
def convert_to(folder, source, timeout=None):
args = [libreoffice_exec(), '--headless', '--convert-to', 'pdf', '--outdir', folder, source]
process = subprocess.run(args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, timeout=timeout)
filename = re.search('-> (.*?) using filter', process.stdout.decode())
return filename
def libreoffice_exec():
# TODO: Provide support for more platforms
if sys.platform == 'darwin':
return '/Applications/LibreOffice.app/Contents/MacOS/soffice'
return 'libreoffice'
result = convert_to('/directory_to_save_file', '/File_path', timeout=15)
print(result)
Could anyone give me solution where I can easily convert docx to pdf? Thanks in advance!

You can try below code in your function
import subprocess
output = subprocess.check_output(['libreoffice', '--convert-to', 'pdf' ,'demo.docx'])
print output

Related

Watchdog in Python to look for filesystem changes not working (freezing)

I'm working on a Python script to monitor a folder to check whether a new *.JPG file is added to that folder and then do some tasks. The code is working, but after some time after being started, it seems to be freezing and stops working even a new file is added to the folder.
Here is the code:
# -*- encoding: iso-8859-1 -*-
import time
import os
import flickrapi
import shutil
from PIL import Image
from PIL.ExifTags import TAGS
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
if __name__ == "__main__":
patterns = ["*.jpg"]
ignore_patterns = None
ignore_directories = False
case_sensitive = False
my_event_handler = PatternMatchingEventHandler(patterns, ignore_patterns, ignore_directories, case_sensitive)
def get_exif(img):
''' Extract Exif data from image '''
def copyright(img):
# get the path of img and create an output filename
tail = os.path.split(img)[1]
filename = 'wtmk_' + tail[:-3] + 'png'
#open the base image and get it's dimensions
while True:
try:
# read file
base_image = Image.open(img)
bw, bh = base_image.size
bw2 = bw // 2
break
except IOError:
time.sleep(5)
''' Add watermark to image '''
return waterMarkedImage
def on_created(event):
# Add the watermark
file = copyright(event.src_path)
# extract EXIF data
exifdata = get_exif(event.src_path)
''' Send the image to FLICKR '''
# Create and start the observer
my_event_handler.on_created = on_created
path = "c:\\temp"
go_recursively = False
my_observer = Observer()
my_observer.schedule(my_event_handler, path, recursive=go_recursively)
my_observer.start()
try:
while True:
time.sleep(5)
except KeyboardInterrupt:
my_observer.stop()
my_observer.join()
I'm running the above code using Python 3.8 on a Windows 10 machine. Any help would be awesome!
Marcio
It's probably an issue with your cmd.exe
To fix the script freezing open your cmd, click on your cmd icon in the top left, then go to Defaults and uncheck the QuickEdit Mode.
Then restart cmd and start your script. Hope that helps!

Best way to check the PDF file is corrupt using python

I try to check the PDF files are corrupted in windows environment and come up with following python code.
Just want to check is it the best way to check corrupted PDF files or is there any other easy way?
Note: C:\Temp\python\sample-map (1).pdf is the corrupted PDF file
Here is the sample code
import os
import subprocess
import re
from subprocess import Popen, PIPE
def checkFile(fullfile):
proc=subprocess.Popen(["file", "-b", fullfile], shell=True, stdout=PIPE, stderr=PIPE, bufsize=0)
# -b, --brief : do not prepend filenames to output lines
out, err = proc.communicate()
exitcode = proc.returncode
return exitcode, out, err
def searchFiles(dirpath):
pwdpath=os.path.dirname(os.path.realpath(__file__))
print("running path : %s" %pwdpath )
if os.access(dirpath, os.R_OK):
print("Path %s validation OK \n" %dirpath)
listfiles=os.listdir(dirpath)
for files in listfiles:
fullfile=os.path.join(dirpath, files)
if os.access(fullfile, os.R_OK):
code, out, error = checkFile(fullfile)
if str(code) !="0" or str(error, "utf-8") != "" or re.search("^(?!PDF(\s)).*", str(out,'utf-8')):
print("ERROR " + fullfile+"\n################")
else:
print("OK " + fullfile+"\n################")
else:
print("$s : File not readable" %fullfile)
else:
print("Path is not valid")
if __name__ == "__main__":
searchFiles('C:\Temp\python')
sample output :
$ "C:/Program Files (x86)/Python37-32/python.exe" c:/Users/myuser/python/check_pdf_file.py
running path : c:\Users\myuser\python
Path C:\Temp\python validation OK
OK C:\Temp\python\Induction Guide.pdf
################
ERROR C:\Temp\python\sample-map (1).pdf
################
OK C:\Temp\python\sample-map.pdf
################
I think you can use PyPDF2 module.
pip install pypdf2
The code is as follows.
from PyPDF2 import PdfFileReader
import os
def checkFile(fullfile):
with open(fullfile, 'rb') as f:
try:
pdf = PdfFileReader(f)
info = pdf.getDocumentInfo()
if info:
return True
else:
return False
except:
return False
def searchFiles(dirpath):
pwdpath = os.path.dirname(os.path.realpath(__file__))
print("running path : %s" %pwdpath )
if os.access(dirpath, os.R_OK):
print("Path %s validation OK \n" %dirpath)
listfiles = os.listdir(dirpath)
for f in listfiles:
fullfile = os.path.join(dirpath, f)
if checkFile(fullfile):
print("OK " + fullfile + "\n################")
else:
print("ERROR " + fullfile + "\n################")
else:
print("Path is not valid")
if __name__ == "__main__":
searchFiles('C:\Temp\python')
I tried to match your coding style.
I think this code can also be used on MacOS or Linux.

How to write to text file in python?

I am a beginner in using python. I have created a plain text file and have to encrypt it to output file. But I am getting an error as below and unable to write it to output file. The code is running but the output file which should be encrypted is created.
#!/usr/bin/env python3
import os
import binascii
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.backends import default_backend
import argparse
def readfile_binary(file):
with open(file, 'rb') as f:
content = f.read()
return content
def writefile_binary(file, content):
with open(file, 'wb') as f:
f.write(content)
def main():
parser = argparse.ArgumentParser(description = 'Encryption and Decryption of the file')
parser.add_argument('-in', dest = 'input', required = True)
parser.add_argument('-out', dest = 'output', required = True)
parser.add_argument('-K', dest = 'key', help = 'The key to be used for encryption must be in hex')
parser.add_argument('-iv', dest = 'iv', help = 'The inintialisation vector, must be in hex')
args = parser.parse_args()
input_content = readfile_binary(args. input)
output_content = writefile_binary(args. output)
if __name__ == "__main__":
main()
The output file should be encrypted and it should be available in the directory.
These two lines:
input_content = readfile_binary(args. input)
output_content = writefile_binary(args. output)
There should not be a space in args.input. Here is an example,
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('filename')
args = parser.parse_args()
# using type hints can help reasoning about code
def write(filename: str, content: str) -> None:
with open(filename, 'wb') as f:
f.write(str.encode(content))
# if the filename was successfully parsed from stdin
if args.filename == 'filename.txt':
print(f"args: {args.filename}")
# write to the appropriate output file
write(filename=args.filename, content="content")
You might need to correct your code's indentation. Python requires indenting code within each function definition, loop, etc.
And as eric points out, there should be no spaces after the periods in args. input and args. output. Change those to args.input and args.output instead.
So:
#!/usr/bin/env python3
import os
import binascii
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.backends import default_backend
import argparse
def readfile_binary(file):
with open(file, 'rb') as f:
content = f.read()
return content
def writefile_binary(file, content):
with open(file, 'wb') as f:
f.write(content)
def main():
parser = argparse.ArgumentParser(description = 'Encryption and Decryption of the file')
parser.add_argument('-in', dest = 'input', required = True)
parser.add_argument('-out', dest = 'output', required = True)
parser.add_argument('-K', dest = 'key', help = 'The key to be used for encryption must be in hex')
parser.add_argument('-iv', dest = 'iv', help = 'The inintialisation vector, must be in hex')
args = parser.parse_args()
input_content = readfile_binary(args.input)
output_content = writefile_binary(args.output)
if __name__ == "__main__":
main()

selenium+argparse - change browser with command line argument

I want to create a selenium script and use argparse to choose the browser from the command line. This is what I have - when I run test.py chrome, nothing happens.
test.py:
https://repl.it/repls/ProbableHeavenlySystem
from selenium import webdriver
import argparse
def main():
parser = argparse.ArgumentParser()
parser.add_argument('chrome')
parser.add_argument('firefox')
parser.parse_args()
args = parser.parse_args()
def pick_browser(args):
if args.chrome == 'chrome':
return args.webdriver.Chrome(executable_path='C:/pathto/chromedriver.exe')
elif args.firefox == 'firefox':
return args.webdriver.Firefox(
executable_path='C:/pathto/geckodriver.exe')
if __name__ == '__main__':
main()
Thanks for your help !
You can use sys.argv:
import sys
print(sys.argv)
in command line type python script.py chrome and in script file:
import sys
print(sys.argv[1]) # prints chrome
Here you can find a good tutorial.

Python3 read args from file in Linux

I have program.py that accepts only one argument as URL:
python3 program.py http://www.xxx.xxx
and the code is something like this
def Video(url)
#do something
#with that url string
def Main():
parser = argparse.ArgumentParser()
parser.add_argument("url", help="Add URL", type=str)
args = parser.parse_args()
Video(args.url) # here most the list of urls from file
if __name__ == '__main__':
Main()
But I would like my program to read the URL from a file, one after the other has ended
Something like this:
Python3 program.py < URLfile
Thanks
import sys
for url in sys.stdin.readlines():
do_something(url.strip())
I think configparser is what you need.
Flexible format:
[DEFAULT]
ServerAliveInterval = 45
Compression = yes
CompressionLevel = 9
ForwardX11 = yes
[bitbucket.org]
User = hg
done
parser = argparse.ArgumentParser()
parser.add_argument('url', nargs='+', type=str)
args = vars(parser.parse_args())
for x in args['url']:
Video(x)
if __name__ == '__main__':
Main()
and to use it:
python3 program.py $(<links)

Resources