CouchDB change Database directory - linux

I am trying to change the directory of the couch database. I am using a python script to import a csv file to the CouchDB. Script is running ok. Here it is just in case:
from couchdbkit import Server, Database
from couchdbkit.loaders import FileSystemDocsLoader
from csv import DictReader
import sys, subprocess, math, os
def parseDoc(doc):
for k,v in doc.items():
if (isinstance(v,str)):
#print k, v, v.isdigit()
# #see if this string is really an int or a float
if v.isdigit()==True: #int
doc[k] = int(v)
else: #try a float
try:
if math.isnan(float(v))==False:
doc[k] = float(v)
except:
pass
return doc
def upload(db, docs):
db.bulk_save(docs)
del docs
return list()
def uploadFile(fname, dbname):
#connect to the db
theServer = Server()
db = theServer.get_or_create_db(dbname)
#loop on file for upload
reader = DictReader(open(fname, 'rU'), dialect = 'excel')
docs = list()
checkpoint = 100
i = 0
for doc in reader:
newdoc = parseDoc(doc)
docs.append(newdoc)
if len(docs)%checkpoint==0:
docs = upload(db,docs)
i += 1
print 'Number : %d' %i
#don't forget the last batch
docs = upload(db,docs)
if __name__=='__main__':
x = '/media/volume1/Crimes_-_2001_to_present.csv'
filename = x
dbname = 'test'
uploadFile(filename, dbname)
I saw plenty posts on how to change the directory for appending the database. If I leave the /etc/couchdb/local.ini as it is (original after installation) the script is appending data to the default directory /var/lib/couchdb/1.0.1/. When I modify the local.ini to store the database to another disk:
database_dir = /media/volume1
view_index_dir = /media/volume1
and after the reboot of the CouchDB service I get this error :
restkit.errors.RequestError: socket.error: [Errno 111] Connection refused
I have checked the open sockets (couchdb uses 5984 as default) and it is not opened. But I get no errors when I start CouchDB service.
Any ideas how to fix it ?

I think the error may be due to you have changed the directory location in Local.ini but when you are trying to make new connection to existing database, it cannot find it there.
So move the database_name.couch file to new location which you can put in local.ini and then try to make a connection. I think this should work.

Related

Python sqlite3: how to check if connection is an in-memory database?

From Python's sqlite3 library, how can we determine if a connection belongs to a in-memory database?
import sqlite3
conn = sqlite3.connect(':memory:')
def is_in_memory_connection(conn):
# How to check if `conn` is an in-memory connection?
Is it possible to check the filename of an on-disk database? If so, I would presume that it would return None for an in-memory database.
This is the closest I could come up with:
import sqlite3
def is_in_memory_connection(conn):
local_cursor = conn.cursor()
local_cursor.execute('pragma database_list')
rows = local_cursor.fetchall()
print(rows[0][2])
return rows[0][2] == ''
#database = 'test.sqlite'
database = ':memory:'
conn = sqlite3.connect(database)
result = is_in_memory_connection(conn)
print(result)
If you have an in-memory database, database_list will show equivalent of this:
sqlite> pragma database_list;
seq name file
--- ---- ----
0 main
If you are opening a file that's on disk, it'll show the path of the file equivalent of this:
sqlite> pragma database_list;
seq name file
--- ---- --------------------------
0 main /home/testing/test.sqlite
Taking advantage of this, you could call pragma database_list to show the file. If the path is empty, the database is not associated with a file.
https://sqlite.org/pragma.html#pragma_database_list

Why doesn't Python see my file, even though it can connect to the server via smb?

I am trying to rename the file, I can see it when I print out the shared file.filename, but it gives an error saying its not there
import os, datetime
from smb.SMBConnection import SMBConnection
from datetime import date, datetime
userID = 'username'
password = 'mypsswrd$'
client_machine_name = 'localpcname'
server_name = 'hostname'
server_ip = '10.28.7.249'
domain_name = 'domain name'
now = datetime.now()
date_time = now.strftime("%d/%m/%y")
conn = SMBConnection(userID, password, client_machine_name, server_name, domain=domain_name, use_ntlm_v2=True,
is_direct_tcp=True)
conn.connect(server_ip, 445)
shares = conn.listShares()
for share in shares:
if not share.isSpecial and share.name not in ['NETLOGON', 'SYSVOL']:
sharedfiles = conn.listPath(share.name, '/')
for sharedfile in sharedfiles:
print(sharedfile.filename)
#change name
newname = 'CS_Out_Of_Country' + date_time +'.csv'
os.rename('CS_Out_Of_Country.csv', newname)
#Copy to archive
#close the connection
conn.close()
the connection works fine and I can see my csv file but it says it doesn't exist, how come?
os.rename operates in the current working directory. It doesn't know about the smb connection you created before. To fix this, you should either
Mount the smb share into the local filesystem, then os.chdir() to said mountpoint and then run os.rename() OR
use SMBConnection.rename (disclaimer: I've never used pysmb before, but that's what I found in 5 seconds using google.)

Paramiko SSH connection timeout after 3 hours

I develop a Python two scripts to transfer lot of data (~120Go) on my vm, with Paramiko.
My vm is on OVH server.
First script transfert ~ 40Go, and the second script ~ 80Go.
Stack :
Python 3.9.1
Paramiko 2.7.2
SCP 0.13.3
On my both scripts, I use this function to setup SSH connection.
def connect():
transport = paramiko.Transport((target_host, target_port))
transport.connect(None, target_username, target_pwd)
sftp_client = paramiko.SFTPClient.from_transport(transport)
green_print("SSH connected")
return sftp_client, transport
If I create one script which do the two transfer, I'm timeout after 3 hours.
With two distincts script which run in the same time, I'm timeout after 2h30 of transfer.
I already read many many many post on Paramiko, SSH connection, timeout parameter, ClientAliveInterval, etc... But nothing works.
After this time, I have this error
Connexion fermée par l'hôte distant / Connection closed by remote host
Three functions of my script :
def connect():
transport = paramiko.Transport((target_host, target_port))
transport.connect(None, target_username, target_pwd)
sftp_client = paramiko.SFTPClient.from_transport(transport)
green_print("SSH connected")
return sftp_client, transport
def transfert(sftp, vm, object_path):
os.chdir(os.path.split(object_path)[0])
parent = os.path.split(object_path)[1]
try:
sftp.mkdir(vm)
except:
pass
for path, _, files in os.walk(parent):
try:
sftp.mkdir(os.path.join(vm, path))
except:
pass
for filename in files:
sftp.put(os.path.join(object_path, filename),
os.path.join(vm, path, filename))
def job():
green_print("\nProcess start...")
check_folder()
folder = forfiles_method()
vm, lidar, pos = name_path(folder)
sftp, transport = connect()
transfert(sftp, vm, pos)
sftp.close()
transport.close()
minimal reproducible example :
from paramiko.sftp_client import SFTPClient
import paramiko
import os
target_host = 'xx.xx.x.xxx'
target_port = 22
target_username = "xxxxxxx"
target_pwd = 'xxxxxx'
remote_path = "e:/x/" # => on your vm
target_folder = '/folder1' # => on your computer
def connect():
transport = paramiko.Transport((target_host, target_port))
transport.connect(None, target_username, target_pwd)
sftp_client = paramiko.SFTPClient.from_transport(transport)
return sftp_client, transport
def transfert(sftp, remote_path, object_path):
os.chdir(os.path.split(object_path)[0])
parent = os.path.split(object_path)[1]
try:
sftp.mkdir(remote_path)
except:
pass
for path, _, files in os.walk(parent):
try:
sftp.mkdir(os.path.join(remote_path, path))
except:
pass
for filename in files:
sftp.put(os.path.join(object_path, filename),
os.path.join(remote_path, path, filename))
def job():
sftp, transport = connect()
transfert(sftp, remote_path, target_folder)
sftp.close()
transport.close()
The tree structure of my files, and I want to transfer only the "test" folder which contains more than 120GB.
folder / test
I'm new in Python dev.
If someone have a solution, I take it !
So the solution :
subprocess.run(["winscp.com", "/script=" + cmdFile], shell=True)
If winscp.com is not found like command, insert the path like : C:/Program Files (x86)/WinSCP/winscp.com
Write your commandes line in a txt file, here cmdFile.
Links, which can help you :
Running WinSCP command from Python
From Python run WinSCP commands in console
https://winscp.net/eng/docs/commandline

Error with Bulkloading from CSV to PostGres

import pandas
import pygrametl
import psycopg2
from pygrametl.tables import SlowlyChangingDimension,CachedDimension,BulkDimension
from pygrametl.datasources import CSVSource
##Connection to PostGres
connection = psycopg2.connect(host="localhost",database="postgres", user="postgres",
password="tekihcan")
connect = pygrametl.ConnectionWrapper(connection)
def pgcopybulkloader(name, atts, fieldsep, rowsep, nullval, filehandle):
# Here we use driver-specific code to get fast bulk loading.
# You can change this method if you use another driver or you can
# use the FactTable or BatchFactTable classes (which don't require
# use of driver-specifc code) instead of the BulkFactTable class.
global connection
curs = connect.cursor()
try:
curs.copy_from(file=filehandle, table=name, sep=fieldsep,
columns=atts,null='null')
except(Exception, psycopg2.Database) as error:
print("Error %s" % error)
date_dim = BulkDimension(name='date_dim',key='d_date_sk',attributes=[
'd_date_id (B)'
,'d_date'
,'d_month_seq'
,'d_week_seq'
,'d_quarter_seq'
,'d_year'
,'d_dow'
,'d_moy'
,'d_dom'
,'d_qoy'
,'d_fy_year'
,'d_fy_quarter_seq'
,'d_fy_week_seq'
,'d_day_name'
,'d_quarter_name'
,'d_holiday'
,'d_weekend'
,'d_following_holiday'
,'d_first_dom'
,'d_last_dom'
,'d_same_day_ly'
,'d_same_day_lq'
,'d_current_day'
,'d_current_week'
,'d_current_month'
,'d_current_quarter'
,'d_current_year'
],lookupatts = ['d_date_id (B)'],
bulkloader = pgcopybulkloader)
date_dim_source = CSVSource(open('C:/Users/HP\Documents/v2.13.0rc1/data/date_dim.csv',
'r', 16384),delimiter='|')
def main():
for row in date_dim_source:
date_dim.insert(row)
The code is failing with error -
As per my understanding the error is caused because the target table is empty. The CSV source doesn't have header as well. Could this be impacting the code?
Please find the link that was used to develop the code - https://chrthomsen.github.io/pygrametl/

RETR downloading zip File from ftp not writing

I am trying to donwload a huge zip file (~9Go zipped and ~130GO unzipped) from an FTP with python using the ftplib library but unfortunately when using the retrbinary method, it does create the file in my local diretory but it is not writing into the file. After a while the code runs, I get an timeout error. It used to work fine before, but when I tried to go deeper in the use of sockets by using this code it does not work anymore. Indeed, as the files I am trying to download are huge I want to have more control with the connection to prevent timeout error while downloading the files. I am not very familar with sockets so I may have misused it. I have been searching online but did not find any problems like this. (I tried with smaller files too for test but still have the same issues)
Here are the function that I tried but both have problems (method 1 is not writing to file, method 2 donwloads file but I can't unzip it)
import time
import socket
import ftplib
import threading
# To complete
filename = ''
local_folder = ''
ftp_folder = ''
host = ''
user = ''
mp = ''
# timeout error in method 1
def downloadFile_method_1(filename, local_folder, ftp_folder, host, user, mp):
try:
ftp = ftplib.FTP(host, user, mp, timeout=1600)
ftp.set_debuglevel(2)
except ftplib.error_perm as error:
print(error)
with open(local_folder + '/' + filename, "wb") as f:
ftp.retrbinary("RETR" + ftp_folder + '/' + filename, f.write)
# method 2 works to download zip file, but header error when unziping it
def downloadFile_method_2(filename, local_folder, ftp_folder, host, user, mp):
try:
ftp = ftplib.FTP(host, user, mp, timeout=1600)
ftp.set_debuglevel(2)
sock = ftp.transfercmd('RETR ' + ftp_folder + '/' + filename)
except ftplib.error_perm as error:
print(error)
def background():
f = open(local_folder + '/' + filename, 'wb')
while True:
block = sock.recv(1024*1024)
if not block:
break
f.write(block)
sock.close()
t = threading.Thread(target=background)
t.start()
while t.is_alive():
t.join(60)
ftp.voidcmd('NOOP')
def unzip_file(filename, local_folder):
local_filename = local_folder + '/' + filename
with ZipFile(local_filename, 'r') as zipObj:
zipObj.extractall(local_folder)
And the error I get for method 1:
ftplib.error_temp: 421 Timeout - try typing a little faster next time
And the error I get when I try to unzip after using method 2:
zipfile.BadZipFile: Bad magic number for file header
Alos, regarding this code If anyone could explain what this does concerning socketopt too would be helpful:
ftp.sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
ftp.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 75)
ftp.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 60)
Thanks for your help.

Resources