How can I create a tarball with UID/GID of root? - python-3.x

I'm trying to amend this code so that the UID and GID of the files inserted into the tarball belong to root.
import tarball
sources = [ 'test-directory', 'another-directory/file1' ]
with tarfile.open("/tmp/test.tar","w") as tarball:
for source in sources:
tarball.add(source)
sources is mixed list of directory and filenames. With the above code, all the files are there, but with my user UID and GID. If I were doing this on the command line, I'd prefix a call to tar with fakeroot.
In Python (3), if I try just looking at one directory:
import tarfile
import glob
with tarfile.open("/tmp/test.tar","w") as tarball:
for filename in glob.iglob('test-directory/**', recursive=True):
info = tarball.gettarinfo(filename)
info.uid = 0
info.gid = 0
info.uname = 'root'
info.gname = 'root'
tarball.addfile(info)
That gets me proper ownership but it's missing files in the test-directory tree because I can't get the glob to working satisfactorily.
How can I do this?

Reading the source (tarfile.py)
I added this function based on inspecting the add() method from the above.
def add_tarinfo(tarball, tarinfo, name, arcname, fakeroot):
if fakeroot:
tarinfo.uid = 0
tarinfo.gid = 0
tarinfo.uname = 'root'
tarinfo.gname = 'root'
if tarinfo.isreg():
with open(name, "rb") as f:
tarball.addfile(tarinfo, f)
elif tarinfo.isdir():
tarball.addfile(tarinfo)
for f in os.listdir(name):
nname = os.path.join(name, f)
narcname = os.path.join(arcname, f)
ntarinfo = tarball.gettarinfo(nname, narcname)
add_tarinfo(tarfile, ntarinfo, nname, narcname, fakeroot)
else:
tarball.addfile(tarinfo)
So the original code becomes:
with tarfile.open("/tmp/test.tar","w") as tarball:
for arcname in self.sources:
name = os.path.join(self.source_path, arcname)
tarinfo = tarball.gettarinfo(name=name, arcname=arcname)
add_tarinfo(tarball, tarinfo, name, arcname, True)

I achieved this using the filter parameter (available since Python 3.2) of TarFile.add() method (docs.python.org):
def fakeroot_filter(tarinfo):
tarinfo.gid = 0
tarinfo.uid = 0
tarinfo.gname = 'root'
tarinfo.uname = 'root'
return tarinfo
with tarfile.open('data.tgz', 'w:gz', format=tarfile.GNU_FORMAT) as arc:
arc.add(f'{path}/data', arcname='data', filter=fakeroot_filter)
Some additional info about this feature can be found in the related issue on bugs.python.org

Related

Python Program error - The process cannot access the file because it is being used by another process

I am trying to test a python code which moves file from source path to target path . The test is done using pytest in Python3 . But I am facing a roadblock here. It is that , I am trying to remove the source and target paths at end of code completion. For this I am using a command like shutil.rmtree(path) or os.rmdir(path) . This is causing me the error - " [WinError 32] The process cannot access the file because it is being used by another process". Please help me on this. Below is the python pytest code :
import pytest
import os
import shutil
import tempfile
from sample_test_module import TestCondition
object_test_condition = TestCondition()
#pytest.mark.parametrize("test_value",['0'])
def test_condition_pass(test_value):
temp_dir = tempfile.mkdtemp()
temp_src_folder = 'ABC_File'
temp_src_dir = os.path.join(temp_dir , temp_src_folder)
temp_file_name = 'Sample_Test.txt'
temp_file_path = os.path.join(temp_src_dir , temp_file_name)
os.chdir(temp_dir)
os.mkdir(temp_src_folder)
try:
with open(temp_file_path , "w") as tmp:
tmp.write("Hello-World\n")
tmp.write("Hi-All\n")
except IOError:
print("Error has occured , please check it.")
org_val = object_test_condition.sample_test(temp_dir)
print("Temp file path is : " + temp_file_path)
print("Temp Dir is : " + temp_dir)
shutil.rmtree(temp_dir)
print("The respective dir path is now removed.)
assert org_val == test_value
Upon execution of the code , the below error is popping up :
[WinError32] The process cannot access the file because it is being used by another process : 'C:\Users\xyz\AppData\Local\Temp\tmptryggg56'
You are getting this error because the directory you are trying to remove is the current directory of the process. If you save the current directory before calling os.chdir (using os.getcwd()), and chdir back to that directory before removing temp_dir, it should work.
Your code isn't correctly indented, so here is my best guess at what it should look like.
import pytest
import os
import shutil
import tempfile
from sample_test_module import TestCondition
object_test_condition = TestCondition()
#pytest.mark.parametrize("test_value",['0'])
def test_condition_pass(test_value):
temp_dir = tempfile.mkdtemp()
temp_src_folder = 'ABC_File'
temp_src_dir = os.path.join(temp_dir , temp_src_folder)
temp_file_name = 'Sample_Test.txt'
temp_file_path = os.path.join(temp_src_dir , temp_file_name)
prev_dir = os.getcwd()
os.chdir(temp_dir)
os.mkdir(temp_src_folder)
try:
with open(temp_file_path , "w") as tmp:
tmp.write("Hello-World\n")
tmp.write("Hi-All\n")
except IOError:
print("Error has occured , please check it.")
org_val = object_test_condition.sample_test(temp_dir)
print("Temp file path is : " + temp_file_path)
print("Temp Dir is : " + temp_dir)
os.chdir(prev_dir)
shutil.rmtree(temp_dir)
print("The respective dir path is now removed.)
assert org_val == test_value
Can you try to close the temp file before removing
temp.close()

Iterate through folder/sub-directories and move found regex files into new folder

I´ve got a folder/sub-directories structure as follow:
-main_folder
-sub_1
322.txt
024.ops
-sub_2
977.txt
004.txt
-sub_3
396.xml
059.ops
I´m trying to iterate with os.walk through the folder and its sub-directories and collect the names inside these folders. When a name gets found by a regex rule, I want to either store the path in list or directly move that file into a new folder (mkdir).
I´ve already got the regex done to find the document I want.
For example:
find_000_099 = r'\b(0\d{2}.\w{1,4})'
find_300_399 = r'\b(3\d{2}.\w{1,4})'
find_900_999 = r'\b(9\d{2}.\w{1,4})'
I wish my expected result to be like:
-main_folder
-sub_from_000_099
024.ops
004.txt
059.ops
-sub_from_300_399
322.txt
396.xml
-sub_from_900_999
977.txt
You can use the below-given code, which moves the file from its initial directory to the desired directory.
import os
import re
import shutil
find_000_099 = r'\b(0\d{2}.\w{1,4})'
find_300_399 = r'\b(3\d{2}.\w{1,4})'
find_900_999 = r'\b(9\d{2}.\w{1,4})'
count = 0
for roots,dirs,files in os.walk('Directory Path'):
#print(roots, len(dirs), len(files))
if count == 0:
parent_dir = roots
os.mkdir ( parent_dir + "/sub_from_000_099" )
os.mkdir ( parent_dir + "/sub_from_300_399" )
os.mkdir ( parent_dir + "/sub_from_900_999" )
count += 1
else:
print(count)
for file in files:
print(file)
if re.match(find_000_099, file):
shutil.move ( roots + "/" + file, parent_dir + "/sub_from_000_099/" + file)
elif re.match ( find_300_399, file ):
shutil.move ( roots + "/" + file, parent_dir + "/sub_from_300_399/" + file )
elif re.match ( find_900_999, file ):
shutil.move ( roots + "/" + file, parent_dir + "/sub_from_900_999/" + file )
It's a skeleton code, which fulfills your requirements.
You can add checks on creating directories, by first checking whether the directory exists or not, and other checks as per your needs.
Here is a simpler way, using pathlib and shutil
import re
import shutil
from pathlib import Path
new_path = Path("new_folder")
if not new_path.exists(): new_path.mkdir()
# Getting all files in the main directory
files = Path("main_folder").rglob("*.*")
regs = {
r'\b(0\d{2}.\w{1,4})': "sub_1", # find_000_099
r'\b(3\d{2}.\w{1,4})': "sub_2", # find_300_399
r'\b(9\d{2}.\w{1,4})': "sub_3" # find_900_999
}
for f in files:
for reg in regs:
if re.search(reg, f.name):
temp_path = new_path / regs[reg]
if not temp_path.exists(): temp_path.mkdir()
# Change the following method to 'move' after testing it
shutil.copy(f, temp_path / f.name)
break

Python script to move oldest 1000 file into another directory

Here is my code with reads the input from a config file and moving files to another directory based on a condition and logs the information to a log file
import shutil
import configparser
import logging.handlers
import os
#Reading the input configuration
config = configparser.ConfigParser()
config.read("config_input.ini")
src_filepath = (config.get("Configuration Inputs","src_filepath"))
dst_filepath = (config.get("Configuration Inputs","dst_filepath"))
log_file_name = (config.get("Configuration Inputs","log_file_name"))
file_limit = int((config.get("Configuration Inputs","file_limit")))
if not os.path.exists (dst_filepath):
os.makedirs(dst_filepath)
onlyfiles_in_dst = next ( os.walk ( dst_filepath ) ) [ 2 ]
file_count_indst = len ( onlyfiles_in_dst )
onlyfiles_in_src = next ( os.walk ( src_filepath ) ) [ 2 ]
file_count_insrc = len ( onlyfiles_in_src )
def sorted_ls(src_filepath):
mtime = lambda f: os.stat(os.path.join(src_filepath, f)).st_mtime
return list(sorted(os.listdir(src_filepath), key=mtime))
move_list = sorted_ls(src_filepath)
#print (move_list)
if file_count_indst < file_limit:
for mfile in move_list:
shutil.move(src_filepath + '\\' + mfile, dst_filepath)
**#Logging everything**
logger = logging.getLogger()
logging.basicConfig(filename=log_file_name, format='%(asctime)s %(message)s', filemode='a')
logger.setLevel(logging.INFO)
logger.info('Number of files moved from source ' + str(len(move_list)))
But the problem is I want to move only the 1000 files from source to destination.
Something like
"ls -lrt| head ls -lrt | head -n 1000"
which I can not do iy as I am running this script on Windows platform.
Please suggest a proper way to do it.
Also please suggest how can I put it under a user defined class and may be can use in some other program.
Can't a simple counter be the solution?
if file_count_indst < file_limit:
count=0;
for mfile in move_list:
shutil.move(src_filepath + '\\' + mfile, dst_filepath)
count = count +1
if count==1000:
break

CouchDB change Database directory

I am trying to change the directory of the couch database. I am using a python script to import a csv file to the CouchDB. Script is running ok. Here it is just in case:
from couchdbkit import Server, Database
from couchdbkit.loaders import FileSystemDocsLoader
from csv import DictReader
import sys, subprocess, math, os
def parseDoc(doc):
for k,v in doc.items():
if (isinstance(v,str)):
#print k, v, v.isdigit()
# #see if this string is really an int or a float
if v.isdigit()==True: #int
doc[k] = int(v)
else: #try a float
try:
if math.isnan(float(v))==False:
doc[k] = float(v)
except:
pass
return doc
def upload(db, docs):
db.bulk_save(docs)
del docs
return list()
def uploadFile(fname, dbname):
#connect to the db
theServer = Server()
db = theServer.get_or_create_db(dbname)
#loop on file for upload
reader = DictReader(open(fname, 'rU'), dialect = 'excel')
docs = list()
checkpoint = 100
i = 0
for doc in reader:
newdoc = parseDoc(doc)
docs.append(newdoc)
if len(docs)%checkpoint==0:
docs = upload(db,docs)
i += 1
print 'Number : %d' %i
#don't forget the last batch
docs = upload(db,docs)
if __name__=='__main__':
x = '/media/volume1/Crimes_-_2001_to_present.csv'
filename = x
dbname = 'test'
uploadFile(filename, dbname)
I saw plenty posts on how to change the directory for appending the database. If I leave the /etc/couchdb/local.ini as it is (original after installation) the script is appending data to the default directory /var/lib/couchdb/1.0.1/. When I modify the local.ini to store the database to another disk:
database_dir = /media/volume1
view_index_dir = /media/volume1
and after the reboot of the CouchDB service I get this error :
restkit.errors.RequestError: socket.error: [Errno 111] Connection refused
I have checked the open sockets (couchdb uses 5984 as default) and it is not opened. But I get no errors when I start CouchDB service.
Any ideas how to fix it ?
I think the error may be due to you have changed the directory location in Local.ini but when you are trying to make new connection to existing database, it cannot find it there.
So move the database_name.couch file to new location which you can put in local.ini and then try to make a connection. I think this should work.

How to create a symbolic link with SCons?

I'm using SCons for building a project and need to add a symbolic link to a file it is installing via env.Install. What command(s) will make a link that's the equivalent of running ln -s on the command line?
SCons doesn't have a dedicated symbolic link command, but you can use os.symlink(src, dst) from Python's os module:
import os
env = Environment()
def SymLink(target, source, env):
os.symlink(os.path.abspath(str(source[0])), os.path.abspath(str(target[0])))
env.Command("file.out", "file.in", SymLink)
This may not work correctly on Windows, I've only tried it on Linux.
There seems to be little advancement in the SCons core code for symbolic link support and I wasn't satisfied any one solution I found on the web. Here is a potential builder which incorporates aspects of both Nick's and richq's answers. Additionally, it will catch name changes (due to the emitter method) and is as platform-agnostic as I could get it.
I prefer this builder because it will make links relative to the directory in which they are installed. One could add an option to force the link to be absolute I suppose, but I have not needed or wanted that yet.
Currently, if the OS doesn't support symlinks, I just pass and do nothing, but one could use os.copytree() for example however the dependency becomes messy if the source is a directory so the emitter would need to do something fancy. I'm up for any suggestions here.
One can put the following code into the file site_scons/site_tools/symlink.py (with blank _init_.py files in the appropriate places). Then do this in the SConstruct file:
SConstruct:
env = Environment()
env.Tool('symlink')
env.SymLink('link_name.txt', 'real_file.txt')
symlink.py:
import os
from os import path
from SCons.Node import FS
from SCons.Script import Action, Builder
def generate(env):
'''
SymLink(link_name,source)
env.SymLink(link_name,source)
Makes a symbolic link named "link_name" that points to the
real file or directory "source". The link produced is always
relative.
'''
bldr = Builder(action = Action(symlink_builder,symlink_print),
target_factory = FS.File,
source_factory = FS.Entry,
single_target = True,
single_source = True,
emitter = symlink_emitter)
env.Append(BUILDERS = {'SymLink' : bldr})
def exists(env):
'''
we could test if the OS supports symlinks here, or we could
use copytree as an alternative in the builder.
'''
return True
def symlink_print(target, source, env):
lnk = path.basename(target[0].abspath)
src = path.basename(source[0].abspath)
return 'Link: '+lnk+' points to '+src
def symlink_emitter(target, source, env):
'''
This emitter removes the link if the source file name has changed
since scons does not seem to catch this case.
'''
lnk = target[0].abspath
src = source[0].abspath
lnkdir,lnkname = path.split(lnk)
srcrel = path.relpath(src,lnkdir)
if int(env.get('verbose',0)) > 3:
ldir = path.relpath(lnkdir,env.Dir('#').abspath)
if rellnkdir[:2] == '..':
ldir = path.abspath(ldir)
print ' symbolic link in directory: %s' % ldir
print ' %s -> %s' % (lnkname,srcrel)
try:
if path.exists(lnk):
if os.readlink(lnk) != srcrel:
os.remove(lnk)
except AttributeError:
# no symlink available, so we remove the whole tree? (or pass)
#os.rmtree(lnk)
print 'no os.symlink capability on this system?'
return (target, source)
def symlink_builder(target, source, env):
lnk = target[0].abspath
src = source[0].abspath
lnkdir,lnkname = path.split(lnk)
srcrel = path.relpath(src,lnkdir)
if int(env.get('verbose',0)) > 4:
print 'target:', target
print 'source:', source
print 'lnk:', lnk
print 'src:', src
print 'lnkdir,lnkname:', lnkdir, lnkname
print 'srcrel:', srcrel
if int(env.get('verbose',0)) > 4:
print 'in directory: %s' % path.relpath(lnkdir,env.Dir('#').abspath)
print ' symlink: %s -> %s' % (lnkname,srcrel)
try:
os.symlink(srcrel,lnk)
except AttributeError:
# no symlink available, so we make a (deep) copy? (or pass)
#os.copytree(srcrel,lnk)
print 'no os.symlink capability on this system?'
return None
This creates a builder to perform the job:
mylib = env.SharedLibrary("foobar", SRCS)
builder = Builder(action = "ln -s ${SOURCE.file} ${TARGET.file}", chdir = True)
env.Append(BUILDERS = {"Symlink" : builder})
mylib_link = env.Symlink("_foobar.so", mylib)
env.Default(mylib)
env.Default(mylib_link)
Again, this solution is for Linux.
If you wanted to issue the command directly to the shell and know the OS, subprocess can be used as well.
E.g.: subprocess.call(['ln', '-s', '</src/path>', '</dest/path>'])
In addition to Nicks solution, you can add a directory symlink by using a file as a directory name carrier. It's not the cleanest solution and debugging path names is a pain, but this works well:
def symlink_last(target_source_env):
src = os.path.basename(os.path.dirname(str(source[0])))
link = "deliverables/last"
print "Symlinking "+ src + "as" + link
os.symlink(src, link)
BUILD_TARGETS.append('link')
install_dir = "deliverables/subdir"
carrier_file = "filename"
builder = Builder(action = symlink_last, chdir=False)
env.Append(BUILDERS={ "Symlink" : builder })
env.Alias(target="link", source=env.Symlink(dir="deliverables", source = install_dir + carrier_file)
This will make a link to deliverables/subdir named deliverables/last, provided that a file deliverables/subdir/filename exists.

Resources