Creating an empty folder on Dropbox with Python. Is there a simpler way? - python-3.x

Here's my sample code which works:
import os, io, dropbox
def createFolder(dropboxBaseFolder, newFolder):
# creating a temp dummy destination file path
dummyFileTo = dropboxBaseFolder + newFolder + '/' + 'temp.bin'
# creating a virtual in-memory binary file
f = io.BytesIO(b"\x00")
# uploading the dummy file in order to cause creation of the containing folder
dbx.files_upload(f.read(), dummyFileTo)
# now that the folder is created, delete the dummy file
dbx.files_delete_v2(dummyFileTo)
accessToken = '....'
dbx = dropbox.Dropbox(accessToken)
dropboxBaseDir = '/test_dropbox'
dropboxNewSubDir = '/new_empty_sub_dir'
createFolder(dropboxBaseDir, dropboxNewSubDir)
But is there a more efficient/simpler way to do the task ?

Yes, as Ronald mentioned in the comments, you can use the files_create_folder_v2 method to create a new folder.
That would look like this, modifying your code:
import dropbox
accessToken = '....'
dbx = dropbox.Dropbox(accessToken)
dropboxBaseDir = '/test_dropbox'
dropboxNewSubDir = '/new_empty_sub_dir'
res = dbx.files_create_folder_v2(dropboxBaseDir + dropboxNewSubDir)
# access the information for the newly created folder in `res`

Related

How to programmatically retrieve the workspace url and clusterOwnerUserId?

I would like to programmatically create the url to download a file.
To do this I need the workspaceUrl and clusterOwnerUserId.
How can I retrieve those in a Databricks notebook?
# how to get the `workspaceUrl` and `clusterOwnerUserId`?
tmp_file = '/tmp/output_abcd.xlsx'
filestore_file = '/FileStore/output_abcd.xlsx'
# code to create file omitted for brevity ...
dbutils.fs.cp(f'file:{tmp_file}', filestore_file)
downloadUrl = f'https://{workspaceUrl}/files/output_abcd.xlsx?o={clusterOwnerUserId}'
displayHTML(f"<a href='{downloadUrl}'>download</a>")
The variables are available in the spark conf.
E.g.
clusterOwnerUserId = spark.conf.get('spark.databricks.clusterUsageTags.orgId')
workspaceUrl = spark.conf.get('spark.databricks.workspaceUrl')
Use can then use the details as follows:
tmp_file = '/tmp/output_abcd.xlsx'
filestore_file = '/FileStore/output_abcd.xlsx'
# code to create file omitted for brevity ...
dbutils.fs.cp(f'file:{tmp_file}', filestore_file)
downloadUrl = f'https://{workspaceUrl}/files/output_abcd.xlsx?o={clusterOwnerUserId}'
displayHTML(f"<a href='{downloadUrl}'>download</a>")
Databricks Files in the Filestore at
/FileStore/my-stuff/my-file.txt is accessible at:
"https://databricks-instance-name.cloud.databricks.com/files/my-stuff/my-file.txt"
I don't think you need the o=... part. That is the workspace Id btw, not the clusterOwner user id.

How to create and add files to a directory?

I'm writing a program to take large PDF's and convert each page to a .jpg, then add the .jpg's of each pdf file to their own directory (which the program needs to create).
I have completed the conversion part of the program, but I am stuck on creating a directory and adding the files to the directory.
Here's my code so far.
import glob, sys, fitz, os, shutil
zoom_x = 2.0
zoom_y = 2.0
mat = fitz.Matrix(zoom_x, zoom_y) # to get better resolution
all_files = glob.glob('/Users/homefolder/Downloads/*.pdf') # image path
print(all_files)
for filename in all_files:
doc = fitz.open(filename)
head, tail = os.path.split(doc.name)
save_file_name = tail.split('.')[0]
for page in doc: # iterate through the pages
# print(page)
pix = page.get_pixmap(matrix=mat)
# render the image
filepath_save = '/Users/homefolder/Downloads/files' + save_file_name + str(page.number) + '.jpg'
pix.save(filepath_save) # save image
sample = glob.glob('/Users/homefolder/Downloads/*.jpg')
How would I write the code to create a directory for each pdf file and add those .jpg's to the directory?
You can create directory and save to it your processed files, I also refactored your code a bit:
import glob, fitz, os
zoom_x = 2.0
zoom_y = 2.0
mat = fitz.Matrix(zoom_x, zoom_y)
pdf_files = glob.glob('/Users/homefolder/Downloads/*.pdf')
save_to = '/Users/homefolder/Downloads/pdf_as_img/'
for path in pdf_files:
doc = fitz.open(path)
base_name, _ = os.path.splitext(os.path.basename(doc.name))
directory_to_save = os.path.join(save_to, base_name)
if not os.path.exists(directory_to_save):
os.makedirs(directory_to_save)
for page in doc:
pix = page.get_pixmap(matrix=mat)
filepath_save = os.path.join(directory_to_save, str(page.number) + '.jpg')
pix.save(filepath_save)
This script creates a directory for every pdf file and saves pages as jpg to it.

Django. TemporaryUploadedFile

I upload a file through the form, check it, and only after checking it I want to add it to my database.
form = BookForm(request.POST, request.FILES)
file = form.files
path = file.get('book_file').temporary_file_path()
in path - '/tmp/tmpbp4klqtw.upload.pdf'
But as soon as I want to transfer this file from the temporary storage to some other folder, I get the following error:
path = os.replace(path, settings.MEDIA_ROOT)
IsADirectoryError: [Errno 21] Is a directory: '/tmp/tmpbp4klqtw.upload.pdf' -> '/home/oem/bla/bla'
Can't understand why this file is not in reality? What can I do about it? Is it possible to set some special path for the "temporary file"?
UPD:
You should use path = os.replace(path, settings.MEDIA_ROOT + '/name-of-file.pdf') – Willem Van Onsem
os.replace(…) [python-doc] expects a filename as target if you specify a file as source, so you can move this to:
os.replace(path, f'{settings.MEDIA_ROOT}/name-of-file.pdf')
you can also make use of shutil.move(…) [python-doc] to specify the directory, this function will also return the filepath of the target file:
from shutil import move
target_file = move(path, settings.MEDIA_ROOT)

How to get the name of the directory from the name of the directory + the file

In an application, I can get the path to a file which resides in a directory as a string:
"/path/to/the/file.txt"
In order to write another another file into that same directory, I want to change the string "/path/to/the/file.txt" and remove the part "file.txt" to finally only get
"/path/to/the/"
as a string
I could use
string = "/path/to/the/file.txt"
string.split('/')
and then glue all the term (except the last one) together with a loop
Is there an easy way to do it?
You can use os.path.basename for getting last part of path and delete it with using replace.
import os
path = "/path/to/the/file.txt"
delete = os.path.basename(os.path.normpath(path))
print(delete) # will return file.txt
#Remove file.txt in path
path = path.replace(delete,'')
print(path)
OUTPUT :
file.txt
/path/to/the/
Let say you have an array include txt files . you can get all path like
new_path = ['file2.txt','file3.txt','file4.txt']
for get_new_path in new_path:
print(path + get_new_path)
OUTPUT :
/path/to/the/file2.txt
/path/to/the/file3.txt
/path/to/the/file4.txt
Here is what I finally used
iter = len(string.split('/'))-1
directory_path_str = ""
for i in range(0,iter):
directory_path_str = directory_path_str + srtr.split('/')[i] + "/"

How to copy files in Groovy

I need to copy a file in Groovy and saw some ways to achieve it on the web:
1
new AntBuilder().copy( file:"$sourceFile.canonicalPath",
tofile:"$destFile.canonicalPath")
2
command = ["sh", "-c", "cp src/*.txt dst/"]
Runtime.getRuntime().exec((String[]) command.toArray())
3
destination.withDataOutputStream { os->
source.withDataInputStream { is->
os << is
}
}
4
import java.nio.file.Files
import java.nio.file.Paths
Files.copy(Paths.get(a), Paths.get(b))
The 4th way seems cleanest to me as I am not sure how good is it to use AntBuilder and how heavy it is, I saw some people reporting issues with Groovy version change.
2nd way is OS dependent, 3rd might not be efficient.
Is there something in Groovy to just copy files like in the 4th statement or should I just use Java for it?
If you have Java 7, I would definitely go with
Path source = ...
Path target = ...
Files.copy(source, target)
With the java.nio.file.Path class, it can work with symbolic and hard links. From java.nio.file.Files:
This class consists exclusively of static methods that operate on
files, directories, or other types of files. In most cases, the
methods defined here will delegate to the associated file system
provider to perform the file operations.
Just as references:
Copy files from one folder to another with Groovy
http://groovyconsole.appspot.com/view.groovy?id=8001
My second option would be the ant task with AntBuilder.
If you are doing this in code, just use something like:
new File('copy.bin').bytes = new File('orig.bin').bytes
If this is for build-related code, this would also work, or use the Ant builder.
Note, if you are sure the files are textual you can use .text rather than .bytes.
If it is a text file, I would go with:
def src = new File('src.txt')
def dst = new File('dst.txt')
dst << src.text
I prefer this way:
def file = new File("old.file")
def newFile = new File("new.file")
Files.copy(file.toPath(), newFile.toPath())
To append to existing file :
def src = new File('src.txt')
def dest = new File('dest.txt')
dest << src.text
To overwrite if file exists :
def src = new File('src.txt')
def dest = new File('dest.txt')
dest.write(src.text)
I'm using AntBuilder for such tasks. It's simple, consistent, 'battle-proven' and fun.
2nd approach is too OS-specific (Linux-only in your case)
3rd it too low-level and it eats up more resources. It's useful if you need to transform the file on the way: change encoding for example
4th looks overcomplicated to me... NIO package is relatively new in JDK.
In the end of the day, I'd go for 1st option. There you can switch from copy to scp task, without re-developing the script almost from scratch
This is the way using platform independent groovy script. If anyone has questions please ask in the comments.
def file = new File("java/jcifs-1.3.18.jar")
this.class.classLoader.rootLoader.addURL(file.toURI().toURL())
def auth_server = Class.forName("jcifs.smb.NtlmPasswordAuthentication").newInstance("domain", "username", "password")
def auth_local = Class.forName("jcifs.smb.NtlmPasswordAuthentication").newInstance(null, "local_username", "local_password")
def source_url = args[0]
def dest_url = args[1]
def auth = auth_server
//prepare source file
if(!source_url.startsWith("\\\\"))
{
source_url = "\\\\localhost\\"+ source_url.substring(0, 1) + "\$" + source_url.substring(1, source_url.length());
auth = auth_local
}
source_url = "smb:"+source_url.replace("\\","/");
println("Copying from Source -> " + source_url);
println("Connecting to Source..");
def source = Class.forName("jcifs.smb.SmbFile").newInstance(source_url,auth)
println(source.canRead());
// Reset the authentication to default
auth = auth_server
//prepare destination file
if(!dest_url.startsWith("\\\\"))
{
dest_url = "\\\\localhost\\"+ dest_url.substring(0, 1) + "\$" +dest_url.substring(2, dest_url.length());
auth = auth_local
}
def dest = null
dest_url = "smb:"+dest_url.replace("\\","/");
println("Copying To Destination-> " + dest_url);
println("Connecting to Destination..");
dest = Class.forName("jcifs.smb.SmbFile").newInstance(dest_url,auth)
println(dest.canWrite());
if (dest.exists()){
println("Destination folder already exists");
}
source.copyTo(dest);
For copying files in Jenkins Groovy
For Linux:
try {
echo 'Copying the files to the required location'
sh '''cd /install/opt/
cp /install/opt/ssl.ks /var/local/system/'''
echo 'File is copied successfully'
}
catch(Exception e) {
error 'Copying file was unsuccessful'
}
**For Windows:**
try {
echo 'Copying the files to the required location'
bat '''#echo off
copy C:\\Program Files\\install\\opt\\ssl.ks C:\\ProgramData\\install\\opt'''
echo 'File is copied successfully'
}
catch(Exception e) {
error 'Copying file was unsuccessful'
}

Resources