I'm exporting workbook to my local drive through tableau server by using tableau Server Client in python. I'm able to export some workbook but there are some Tableau Workbook which I'm enable to export there comes and error such as 400080: Bad Request
import os
import tableauserverclient as TSC
tableau_auth = TSC.TableauAuth('****', '*****', site_id="")
server = TSC.Server('https://****.net')
server.use_server_version()
tag_to_filter ='Summary'
new_folder_path = 'C:\\Users\\User\\Desktop\\Tableau\\TSC'
with server.auth.sign_in(tableau_auth):
req_option = TSC.RequestOptions().page_size(300)
all_workbooks, pagination_item = server.workbooks.get(req_option)
print([workbook.name for workbook in all_workbooks])
req_option.filter.add(TSC.Filter(TSC.RequestOptions.Field.Name, TSC.RequestOptions.Operator.Equals, tag_to_filter))
for workbook in TSC.Pager(server.workbooks, req_option):
workbook_path = new_folder_path + workbook.name
os.makedirs(workbook_path)
server.workbooks.populate_views(workbook)
for view in workbook.views:
size = TSC.PDFRequestOptions.PageType.Tabloid
orientation = TSC.PDFRequestOptions.Orientation.Landscape
req_option = TSC.PDFRequestOptions(size, orientation)
server.views.populate_pdf(view, req_option)
file_path = workbook_path + '/' + view.name + '.pdf'
print(view.name)
print(file_path)
with open(file_path, 'wb') as image_file:
image_file.write(view.pdf)
print('\tPDF of {0} from {1} workbook'.format(view.name, workbook_path))
raise ServerResponseError.from_response(server_response.content, self.parent_srv.namespace)
tableauserverclient.server.endpoint.exceptions.ServerResponseError:
400080: Bad Request
There was a problem querying the pdf for view 'e91a2103-3347-4c91-8d51-a1ef88321492'.
Related
Our Oracle database is upgraded to a new server, so it has a new server name. Most of our published workbooks on Tableau Server are connecting to this Oracle database. The username and password remains the same, but the server address is changed. I used the following Python code. It can identify the right workbook that needs server address update, however it produces an error: 'PW Update Failed with error:
404004: Resource Not Found
Datasource '5f125136-22da-48d0-bdc7-8e5edde8d809' could not be found.
'''
import tableauserverclient as TSC
import re
tableau_auth = TSC.TableauAuth('site_admin_username', 'site_admin_password', site_id='default') # site_id not needed if there is only one
search_server_regex = 'oldserver123' # server to search
replace_server = 'newserver123' # use if server name/address is changing- otherwise make it the same as search_server
overwrite_credentials = False # set to false to use existing credentials
search_for_certain_users = True # set to True if you only want to update connections for certain usernames
search_username = 'username'
replace_username = 'username'
replace_pw = 'password'
request_options = TSC.RequestOptions(pagesize=1000) # this needs to be > # of workbooks/data connections on the site
server = TSC.Server('http://tableau_server:8000') # tableau server
y = 0 # to keep track of how many are changed
try:
with server.auth.sign_in(tableau_auth):
all_workbooks, pagination_item = server.workbooks.get(req_options=request_options)
print("Total Workbooks to Search: {}".format(len(all_workbooks)))
for wb in all_workbooks:
server.workbooks.populate_connections(wb)
for item,conn in enumerate(wb.connections): #make sure to iterate through all connections in the workbook
if wb.connections[item].connection_type != 'sqlproxy': #sqlproxy indicates published datasource
if re.search(search_server_regex ,wb.connections[item].server_address,re.IGNORECASE):
connection = wb.connections[item]
if search_for_certain_users and re.search(search_username, connection.username, re.IGNORECASE):
# print(wb.name, '-', connection.connection_type)
connection.server_address = replace_server
connection.embed_password = False
if overwrite_credentials:
connection.embed_password = True
connection.username = replace_username
connection.password = replace_pw
server.datasources.update_connection(wb, connection)
y = y + 1
elif not search_for_certain_users:
# print(wb.name, '-', connection.connection_type)
connection.server_address = replace_server
connection.embed_password = False
if overwrite_credentials:
connection.embed_password = True
connection.username = replace_username
connection.password = replace_pw
server.datasources.update_connection(wb, connection)
y = y + 1
print("Workbook Connections Changed: {}".format(y))
except Exception as e:
print("PW Update Failed with error: {}".format(e))
print("Connections Updated: {}".format(y))
'''
How to fix the code?
you have to update the workbooks.
server.workbooks.update_connection(wb, connection)
I ran into the same problem and I'm hoping my solution fixes yours. I created a "helper" class that has one attribute called "id":
class datasource_id:
def __init__(self, id):
self.id = id
I put the class at the top of my code. Then I replaced the lines:
if overwrite_credentials:
connection.embed_password = True
connection.username = replace_username
connection.password = replace_pw
server.datasources.update_connection(wb, connection)
with the code below in both places:
if overwrite_credentials:
connection.embed_password = True
connection.username = replace_username
connection.password = replace_pw
d1 = datasource_id(wb.connections[item].datasource_id)
server.datasources.update_connection(d1, connection)
The reason this work is because the method .update_connections is using the argument in the id position of the "wb" as the datasource_id which isn't correct because the id position of the "wb" variable is the id of the workbook
I am trying to donwload a huge zip file (~9Go zipped and ~130GO unzipped) from an FTP with python using the ftplib library but unfortunately when using the retrbinary method, it does create the file in my local diretory but it is not writing into the file. After a while the code runs, I get an timeout error. It used to work fine before, but when I tried to go deeper in the use of sockets by using this code it does not work anymore. Indeed, as the files I am trying to download are huge I want to have more control with the connection to prevent timeout error while downloading the files. I am not very familar with sockets so I may have misused it. I have been searching online but did not find any problems like this. (I tried with smaller files too for test but still have the same issues)
Here are the function that I tried but both have problems (method 1 is not writing to file, method 2 donwloads file but I can't unzip it)
import time
import socket
import ftplib
import threading
# To complete
filename = ''
local_folder = ''
ftp_folder = ''
host = ''
user = ''
mp = ''
# timeout error in method 1
def downloadFile_method_1(filename, local_folder, ftp_folder, host, user, mp):
try:
ftp = ftplib.FTP(host, user, mp, timeout=1600)
ftp.set_debuglevel(2)
except ftplib.error_perm as error:
print(error)
with open(local_folder + '/' + filename, "wb") as f:
ftp.retrbinary("RETR" + ftp_folder + '/' + filename, f.write)
# method 2 works to download zip file, but header error when unziping it
def downloadFile_method_2(filename, local_folder, ftp_folder, host, user, mp):
try:
ftp = ftplib.FTP(host, user, mp, timeout=1600)
ftp.set_debuglevel(2)
sock = ftp.transfercmd('RETR ' + ftp_folder + '/' + filename)
except ftplib.error_perm as error:
print(error)
def background():
f = open(local_folder + '/' + filename, 'wb')
while True:
block = sock.recv(1024*1024)
if not block:
break
f.write(block)
sock.close()
t = threading.Thread(target=background)
t.start()
while t.is_alive():
t.join(60)
ftp.voidcmd('NOOP')
def unzip_file(filename, local_folder):
local_filename = local_folder + '/' + filename
with ZipFile(local_filename, 'r') as zipObj:
zipObj.extractall(local_folder)
And the error I get for method 1:
ftplib.error_temp: 421 Timeout - try typing a little faster next time
And the error I get when I try to unzip after using method 2:
zipfile.BadZipFile: Bad magic number for file header
Alos, regarding this code If anyone could explain what this does concerning socketopt too would be helpful:
ftp.sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
ftp.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 75)
ftp.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 60)
Thanks for your help.
Working on a Python 3 win32com.client script that only searches for emails from a specific sender and downloads 1 out of multiple attachments.
The issue I have is that in the instances where an email has two attachments, it tries to download and rename both, which overwrites the first file that I want with the second attachment.
The file attachment has a specific filename but there is another attachment with a similar name.
So far I have:
import win32com.client
import os
mydesktop = os.path.expanduser('~') + '/Desktop/'
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
# Select main Inbox
inbox = outlook.GetDefaultFolder(6)
messages = inbox.Items
sender = 'mysender#domain'
MyDailyfolder = mydesktop + 'My Daily Data/'
try:
for message in messages:
msg_date = message.SentOn.strftime('%Y-%m-%d')
try:
s = message.sender
s = str(s)
if s == sender:
for att in message.Attachments:
if "Dashboard2_dashboard2" in att.FileName: #<---This doesn't work.
outfile_name2 = 'MycustomName' + msg_date + '.csv'
outfile_path2 = MyDailyfolder + outfile_name2
if not os.path.exists(MyDailyfolder): os.makedirs(MyDailyfolder)
# save file
att.SaveASFile(outfile_path2)
print('Saved file:', outfile_name2)
except:
x=1
except:
x=1
The result downloads all of the attached files in an email to a new folder on my desktop but it overwrites each copy. I'm trying to select only the attachment that contains "Dashboard2dashboard2" on it. I think I have to use "for part in msg.walk():" but have never used that command before. Any ideas?
Ah the issue is a typo in my filename search. An extra underscore. Added an extra print in each step to make sure each part is valid.
sender = 'mysender#domain'
MyDailyfolder = mydesktop + 'My Daily Data/'
try:
for message in messages:
msg_date = message.SentOn.strftime('%Y-%m-%d')
try:
s = message.sender
s = str(s)
if s == sender:
print('Sender:' , message.sender)
for att in message.Attachments:
if "dashboard_2.csv" in att.FileName:
outfile_name = msg_date + att.FileName
print ('Match search confirmed')
# Backup test, print filename and watch spelling
print (att.FileName)
# Create a folder and copy/paste attachment there
outfile_path = MyDailyfolder + outfile_name
if not os.path.exists(MyDailyfolder): os.makedirs(MyDailyfolder)
# save file
att.SaveASFile(outfile_path)
except:
pass
except:
pass
Hi I am trying to unload multiple tables from Redshift to a particular S3 bucket getting below error:
psycopg2.InternalError: Specified unload destination on S3 is not empty. Consider using a different bucket / prefix, manually removing the target files in S3, or using the ALLOWOVERWRITE option.
if I add 'allowoverwrite' option on unload_function, it is overwritting before table and unloading last table in S3.
This is the code I have given:
import psycopg2
def unload_data(r_conn, aws_iam_role, datastoring_path, region, table_name):
unload = '''unload ('select * from {}')
to '{}'
credentials 'aws_iam_role={}'
manifest
gzip
delimiter ',' addquotes escape parallel off '''.format(table_name, datastoring_path, aws_iam_role)
print ("Exporting table to datastoring_path")
cur = r_conn.cursor()
cur.execute(unload)
r_conn.commit()
def main():
host_rs = 'dataingestion.*********.us******2.redshift.amazonaws.com'
port_rs = '5439'
database_rs = '******'
user_rs = '******'
password_rs = '********'
rs_tables = [ 'Employee', 'Employe_details' ]
iam_role = 'arn:aws:iam::************:role/RedshiftCopyUnload'
s3_datastoring_path = 's3://mysamplebuck/'
s3_region = 'us_*****_2'
print ("Exporting from source")
src_conn = psycopg2.connect(host = host_rs,
port = port_rs,
database = database_rs,
user = user_rs,
password = password_rs)
print ("Connected to RS")
for i, tabe in enumerate(rs_tables):
if tabe[0] == tabe[-1]:
print("No files to read!")
unload_data(src_conn, aws_iam_role = iam_role, datastoring_path = s3_datastoring_path, region = s3_region, table_name = rs_tables[i])
print (rs_tables[i])
if __name__=="__main__":
main()
It is complaining that you are saving the data to the same destination.
This would be like copying all the files on your computer to the same directory -- there will be files overwritten.
You should change your datastoring_path to be different for each table, such as:
.format(table_name, datastoring_path + '/' + table_name, aws_iam_role)
I'm coding to download a file from google drive to local system using google drive api. Below is my code. I've following questions:
1) Is there a way to specify sheet number to download only that from a workbook or complete workbook will be downloaded?
2) We have MIMETYPE 'text/csv', is there a way to save them using other delimiter say , pipe line?
3) Can we specify the location of download? Right now it's downloading where python script is.
from apiclient.discovery import build
from httplib2 import Http
from oauth2client import file , client , tools
import io
from apiclient.http import MediaIoBaseDownload
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
#Set the scope for authorization and specify json file
SCOPES = 'https://www.googleapis.com/auth/drive'
CLIENT_SECRET = 'client_secret.json'
#Atleast once we will have to allow our program to access document, after that it would be stored in storage.json file
store = file.Storage('storage.json')
credz = store.get()
if not credz or credz.invalid:
flow = client.flow_from_clientsecrets(CLIENT_SECRET , SCOPES)
credz = tools.run_flow(flow,store,flags)\
if flags else tools.run(flow,store)
DRIVE = build('drive','v2',http=credz.authorize(Http()))
MIMETYPE='text/csv'
file_id='1p3yRgi093TKbsBrxkUkV1cP-6h8dWUIKXycU62i9Arc'
request = DRIVE.files().export_media(fileId=file_id,mimeType=MIMETYPE)
fh = io.FileIO('Google App Scripts for beginner.csv','wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print ("Download %d%%." % int(status.progress() * 100))