How to use Python SQLAlchemy ORM with MonetDB and database schema - python-3.x

I tried and sadly failed to use Python SQLAlchemy ORM with MonetDB and database schema.
A minimal example to demonstrate my problem is the following:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String
engine = create_engine(f"monetdb://monetdb:monetdb#localhost:50000/demo")
connection = engine.connect()
Session = sessionmaker(bind=engine)
session = Session()
class Template(object):
__table_args__ = ({"schema": "test"}, )
Base = declarative_base(cls=Template)
class User(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True)
name = Column(String)
schemas = [name[0] for name in connection.execute("SELECT name FROM sys.schemas")]
if not "test" in schemas:
connection.execute("CREATE SCHEMA test")
Base.metadata.create_all(bind=engine)
session.add_all([User(name="a"), User(name="b"), User(name="c")])
session.commit()
print(session.query(User).one())
This should work with a clean/empty MonetDB database (e.g. the demo one in Windows).
If the above example is run, it throws an error similar to the following:
Traceback (most recent call last):
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1278, in _execute_context
cursor, statement, parameters, context
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py", line 593, in do_execute
cursor.execute(statement, parameters)
File "C:\some\path\Anaconda3\lib\site-packages\pymonetdb\sql\cursors.py", line 165, in execute
block = self.connection.execute(query)
File "C:\some\path\Anaconda3\lib\site-packages\pymonetdb\sql\connections.py", line 140, in execute
return self.command('s' + query + '\n;')
File "C:\some\path\Anaconda3\lib\site-packages\pymonetdb\sql\connections.py", line 145, in command
return self.mapi.cmd(command)
File "C:\some\path\Anaconda3\lib\site-packages\pymonetdb\mapi.py", line 266, in cmd
raise exception(msg)
pymonetdb.exceptions.OperationalError: 42000!TODO: column names of level >= 3
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "test.py", line 42, in <module>
print(session.query(User).one())
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\orm\query.py", line 3458, in one
ret = self.one_or_none()
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\orm\query.py", line 3427, in one_or_none
ret = list(self)
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\orm\query.py", line 3503, in __iter__
return self._execute_and_instances(context)
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\orm\query.py", line 3528, in _execute_and_instances
result = conn.execute(querycontext.statement, self._params)
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1014, in execute
return meth(self, multiparams, params)
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\sql\elements.py", line 298, in _execute_on_connection
return connection._execute_clauseelement(self, multiparams, params)
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1133, in _execute_clauseelement
distilled_params,
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1318, in _execute_context
e, statement, parameters, cursor, context
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1512, in _handle_dbapi_exception
sqlalchemy_exception, with_traceback=exc_info[2], from_=e
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py", line 178, in raise_
raise exception
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1278, in _execute_context
cursor, statement, parameters, context
File "C:\some\path\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py", line 593, in do_execute
cursor.execute(statement, parameters)
File "C:\some\path\Anaconda3\lib\site-packages\pymonetdb\sql\cursors.py", line 165, in execute
block = self.connection.execute(query)
File "C:\some\path\Anaconda3\lib\site-packages\pymonetdb\sql\connections.py", line 140, in execute
return self.command('s' + query + '\n;')
File "C:\some\path\Anaconda3\lib\site-packages\pymonetdb\sql\connections.py", line 145, in command
return self.mapi.cmd(command)
File "C:\some\path\Anaconda3\lib\site-packages\pymonetdb\mapi.py", line 266, in cmd
raise exception(msg)
sqlalchemy.exc.OperationalError: (pymonetdb.exceptions.OperationalError) 42000!TODO: column names of level >= 3
[SQL: SELECT test.users.id AS test_users_id, test.users."name" AS test_users_name
FROM test.users]
(Background on this error at: http://sqlalche.me/e/13/e3q8)
And here is how the log from a freshly started MonetDB server on Windows could like in this scenario:
# MonetDB 5 server v11.37.7 (Jun2020)
# Serving database 'demo', using 8 threads
# Compiled for x86_64-pc-winnt/64bit
# Found 63.847 GiB available main-memory of which we use 52.036 GiB
# Copyright (c) 1993 - July 2008 CWI.
# Copyright (c) August 2008 - 2020 MonetDB B.V., all rights reserved
# Visit https://www.monetdb.org/ for further information
# Listening for connection requests on mapi:monetdb://127.0.0.1:50000/
# SQL catalog created, loading sql scripts once
# loading sql script: 09_like.sql
# loading sql script: 10_math.sql
# loading sql script: 12_url.sql
# loading sql script: 13_date.sql
# loading sql script: 14_inet.sql
# loading sql script: 15_querylog.sql
# loading sql script: 16_tracelog.sql
# loading sql script: 17_temporal.sql
# loading sql script: 18_index.sql
# loading sql script: 20_vacuum.sql
# loading sql script: 21_dependency_views.sql
# loading sql script: 22_clients.sql
# loading sql script: 23_skyserver.sql
# loading sql script: 25_debug.sql
# loading sql script: 26_sysmon.sql
# loading sql script: 27_rejects.sql
# loading sql script: 39_analytics.sql
# loading sql script: 40_json.sql
# loading sql script: 41_md5sum.sql
# loading sql script: 45_uuid.sql
# loading sql script: 46_profiler.sql
# loading sql script: 51_sys_schema_extension.sql
# loading sql script: 58_hot_snapshot.sql
# loading sql script: 60_wlcr.sql
# loading sql script: 61_wlcr.sql
# loading sql script: 75_storagemodel.sql
# loading sql script: 80_statistics.sql
# loading sql script: 80_udf.sql
# loading sql script: 81_tracer.sql
# loading sql script: 90_generator.sql
# loading sql script: 99_system.sql
# MonetDB/SQL module loaded
# MonetDB server is started. To stop server press Ctrl-C.
#client1: createExceptionInternal: !ERROR: ParseException:SQLparser:42000!TODO: column names of level >= 3
It seems that the query
SELECT test.users.id AS test_users_id, test.users."name" AS test_users_name FROM test.users
can't be handeled correctly by the MonetDB API/driver.
Related bug reports can be found, too:
https://www.monetdb.org/bugzilla/show_bug.cgi?id=2526
https://www.monetdb.org/bugzilla/show_bug.cgi?id=2854
https://www.monetdb.org/bugzilla/show_bug.cgi?id=3062
Sadly, as the bugs were first mentioned in approx. 2010 this issue probably won't get fixed soon (or never at all).
And finally here is some version information:
System: Windows 10 1809
MonetDB: 20200529
python: 3.7.7
pymonetdb: 1.3.1
sqlalchemy: 1.3.18
sqlalchemy-monetdb: 1.0.0
Does anyone know a way to workaround this issue e.g by telling SQLAlchemy ORM to use temporary aliases, etc.?

Indeed, MonetDB still doesn't support more than two levels of naming. It's still on our list though and your question has just increased its position.
I don't know much about SQLAlchemy. Any chance you can find a workaround for this problem?

Related

Why am I not able to create table in SQLyog using pycharm, while pycharm is connecting to SQLyog?

I was trying to connect and make tables in database using python. I was using pycharm for that. I successfully connect pycharm with SQLyog database.
import pymysql
def CreateConn():
return pymysql.connect(host="localhost",database="myfirstDB",user="root",password="",port=3306)
CreateConn()
but as I was trying to make table by code , it shows some lines of error which I don't understand. I changed SQL database engine to SQLite and also tried to changed IDE to jupyter, still it shows error don't know why.
I tried below code for table creation in SQLyog;
def CreateTable():
conn=CreateConn()
cursor=conn.cursor() #helping to execute your query
query="create table student(sid int primary key auto_increment,name VARCHAR(50),email VARCHAR(50),city VARCHAR(50)"
cursor.execute(query)
conn.commit()
print("table created")
conn.close()
CreateTable()
I expected below table in SQLyog database;
expected result of above code in SQLyog database using pycharm
what is got as a result is below error lines;
Traceback (most recent call last):
File "C:\Users\asus\PycharmProjects\pythonProject\Database\Database.py", line 29, in <module>
CreateTable() #CALLING CREATE TABLE FUNCTION
File "C:\Users\asus\PycharmProjects\pythonProject\Database\Database.py", line 24, in CreateTable
cursor.execute(query)
File "C:\Users\asus\PycharmProjects\pythonProject\venv\lib\site-packages\pymysql\cursors.py", line 148, in execute
result = self._query(query)
File "C:\Users\asus\PycharmProjects\pythonProject\venv\lib\site-packages\pymysql\cursors.py", line 310, in _query
conn.query(q)
File "C:\Users\asus\PycharmProjects\pythonProject\venv\lib\site-packages\pymysql\connections.py", line 548, in query
self._affected_rows = self._read_query_result(unbuffered=unbuffered)
File "C:\Users\asus\PycharmProjects\pythonProject\venv\lib\site-packages\pymysql\connections.py", line 775, in _read_query_result
result.read()
File "C:\Users\asus\PycharmProjects\pythonProject\venv\lib\site-packages\pymysql\connections.py", line 1156, in read
first_packet = self.connection._read_packet()
File "C:\Users\asus\PycharmProjects\pythonProject\venv\lib\site-packages\pymysql\connections.py", line 725, in _read_packet
packet.raise_for_error()
File "C:\Users\asus\PycharmProjects\pythonProject\venv\lib\site-packages\pymysql\protocol.py", line 221, in raise_for_error
err.raise_mysql_exception(self._data)
File "C:\Users\asus\PycharmProjects\pythonProject\venv\lib\site-packages\pymysql\err.py", line 143, in raise_mysql_exception
raise errorclass(errno, errval)
pymysql.err.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '' at line 1")

Django Postgres table partitioning not working with 'pgpartition' command

I have a huge "Logs" postgres table being used extensively in my Django project.
Logs table has more than 20 million records and its slowing down the queries and page load time is also increased.
I am using below Django and Postgres versions:-
Django: 4.0
Postgres: 13
I read about table partitioning and decided to use "django-postgres-extra" as I don't want to manage migration files.
I followed all the steps mentioned in below link but I am not able to create partitioned tables using "pgpartition" command.
https://django-postgres-extra.readthedocs.io/en/master/table_partitioning.html
Am I missing something here?
models.py changes:-
from django.db import models
from psqlextra.types import PostgresPartitioningMethod
from psqlextra.models import PostgresPartitionedModel
from psqlextra.manager import PostgresManager
class Logs(PostgresPartitionedModel):
class PartitioningMeta:
method = PostgresPartitioningMethod.RANGE
key = ["ctime"]
objects = PostgresManager()
ctime = models.DateTimeField(auto_now_add=True)
logname = models.CharField(max_length=20)
builds = models.ForeignKey(Build)
settings.py
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'django.contrib.postgres',
'psqlextra',
'logs',
]
DATABASES = {
'default': {
'ENGINE': 'psqlextra.backend',
'NAME': 'logdb',
'USER': 'root',
'PASSWORD': 'crypt',
'HOST': 'localhost',
'PORT': 5432,
}
}
PSQLEXTRA_PARTITIONING_MANAGER = 'logs.partition.manager'
Created a file named 'partition' under logs app and defined a manager object as per doc link above.
partition.py
from dateutil.relativedelta import relativedelta
from logs.models import Logs
from psqlextra.partitioning import (
PostgresPartitioningManager,
PostgresCurrentTimePartitioningStrategy,
PostgresTimePartitionSize,
partition_by_current_time,
)
from psqlextra.partitioning.config import PostgresPartitioningConfig
manager = PostgresPartitioningManager([
# 3 partitions ahead, each partition is one month
# delete partitions older than 6 months
# partitions will be named `[table_name]_[year]_[3-letter month name]`.
PostgresPartitioningConfig(
model=Logs,
strategy=PostgresCurrentTimePartitioningStrategy(
size=PostgresTimePartitionSize(months=1),
count=1,
max_age=relativedelta(months=1),
),
),
])
When I ran "python manage.py pgmakemigrations" command it created a migration file as below:-
0002_alter_log_managers.py
# Generated by Django 4.0.3 on 2022-05-19 12:22
from django.db import migrations
import psqlextra.manager.manager
class Migration(migrations.Migration):
dependencies = [
('logs', '0001_initial'),
]
operations = [
migrations.AlterModelManagers(
name='logs',
managers=[
('objects', psqlextra.manager.manager.PostgresManager()),
],
),
]
But as soon as I run "pgpartition" command I get below error:-
python manage.py pgpartition
Traceback (most recent call last):
File "/home/user/party/manage.py", line 22, in <module>
main()
File "/home/user/party/manage.py", line 18, in main
execute_from_command_line(sys.argv)
File "/home/user/venv/lib/python3.10/site-packages/django/core/management/__init__.py", line 446, in execute_from_command_line
utility.execute()
File "/home/user/venv/lib/python3.10/site-packages/django/core/management/__init__.py", line 440, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/home/user/venv/lib/python3.10/site-packages/django/core/management/base.py", line 414, in run_from_argv
self.execute(*args, **cmd_options)
File "/home/user/venv/lib/python3.10/site-packages/django/core/management/base.py", line 460, in execute
output = self.handle(*args, **options)
File "/home/user/venv/lib/python3.10/site-packages/psqlextra/management/commands/pgpartition.py", line 72, in handle
plan = partitioning_manager.plan(
File "/home/user/venv/lib/python3.10/site-packages/psqlextra/partitioning/manager.py", line 52, in plan
model_plan = self._plan_for_config(
File "/home/user/venv/lib/python3.10/site-packages/psqlextra/partitioning/manager.py", line 84, in _plan_for_config
table = self._get_partitioned_table(connection, config.model)
File "/home/user/venv/lib/python3.10/site-packages/psqlextra/partitioning/manager.py", line 121, in _get_partitioned_table
raise PostgresPartitioningError(
psqlextra.partitioning.error.PostgresPartitioningError: Model Logs, with table logs_logs does not exists in the database. Did you run `python manage.py migrate`?
Not sure what I am missing here...
Looks like "django-postgres-extra" is only used for declarative partitioning. i.e. When we anticipate that a table might grow big in size in future then we can use declaration partitioning while creating table at the very beginning.
https://www.postgresql.org/docs/current/ddl-partitioning.html#DDL-PARTITIONING-DECLARATIVE
So I am dropping the idea for now and trying to improve performance by adding more indexes and using caching etc..

Cannot create Jupyter Notebook in HDInsight 4.0

I'm using Azure HDInsight 4.0 (Spark 2.4). When I attempt to create a new Jupyter notebook (Spark, but I get a similar error for PySpark notebooks), I get the following error message:
Traceback (most recent call last): File "/usr/bin/anaconda/lib/python2.7/site-packages/notebook/base/handlers.py", line 457, in wrapper result = yield gen.maybe_future(method(self, *args, **kwargs)) File "/usr/bin/anaconda/lib/python2.7/site-packages/tornado/gen.py", line 1015, in run value = future.result() File "/usr/bin/anaconda/lib/python2.7/site-packages/tornado/concurrent.py", line 237, in result raise_exc_info(self._exc_info) File "/usr/bin/anaconda/lib/python2.7/site-packages/tornado/gen.py", line 1021, in run yielded = self.gen.throw(*exc_info) File "/usr/bin/anaconda/lib/python2.7/site-packages/notebook/services/contents/handlers.py", line 216, in post yield self._new_untitled(path, type=type, ext=ext) File "/usr/bin/anaconda/lib/python2.7/site-packages/tornado/gen.py", line 1015, in run value = future.result() File "/usr/bin/anaconda/lib/python2.7/site-packages/tornado/concurrent.py", line 237, in result raise_exc_info(self._exc_info) File "/usr/bin/anaconda/lib/python2.7/site-packages/tornado/gen.py", line 285, in wrapper yielded = next(result) File "/usr/bin/anaconda/lib/python2.7/site-packages/notebook/services/contents/handlers.py", line 171, in _new_untitled model = yield gen.maybe_future(self.contents_manager.new_untitled(path=path, type=type, ext=ext)) File "/usr/bin/anaconda/lib/python2.7/site-packages/notebook/services/contents/manager.py", line 338, in new_untitled return self.new(model, path) File "/usr/bin/anaconda/lib/python2.7/site-packages/notebook/services/contents/manager.py", line 364, in new model = self.save(model, path) File "/var/lib/.jupyter/jupyterazure/jupyterazure/httpfscontentsmanager.py", line 84, in save self.create_checkpoint(path) File "/usr/bin/anaconda/lib/python2.7/site-packages/notebook/services/contents/manager.py", line 459, in create_checkpoint return self.checkpoints.create_checkpoint(self, path) File "/usr/bin/anaconda/lib/python2.7/site-packages/notebook/services/contents/checkpoints.py", line 79, in create_checkpoint model = contents_mgr.get(path, content=True) File "/var/lib/.jupyter/jupyterazure/jupyterazure/httpfscontentsmanager.py", line 56, in get 'metadata': {}}) File "/var/lib/.jupyter/jupyterazure/jupyterazure/model.py", line 45, in create_model_from_blob nbformat.version_info[0]) File "/usr/bin/anaconda/lib/python2.7/site-packages/nbformat/__init__.py", line 75, in reads nb = convert(nb, as_version) File "/usr/bin/anaconda/lib/python2.7/site-packages/nbformat/converter.py", line 54, in convert "version doesn't exist" % (to_version)) ValueError: Cannot convert notebook to v5 because that version doesn't exist
After this, a new notebook does appear on the home screen, but if I try to open it I get the following popup message:
An unknown error occurred while loading this notebook. This version can load notebook formats v4 or earlier. See the server log for details.
I can create a notebook just fine on an otherwise-identical HDI 3.6 cluster, but not on 4.0. (I need 4.0 because I need to use Spark 2.4.)
Has anyone experienced/resolved this before?
Recently, we have seen couple of questions on the same issue. You may follow the below steps to resolve the issue.
Steps to resolve this issue:
Step1: Connect to headnode via ssh and change content of file - /usr/bin/anaconda/lib/python2.7/site-packages/nbformat/_version.py, replace 5 to 4.
Change this to:
version_info = (4, 0, 3)
Step2: Restart Jupyter service via Ambari.
For more details, refer HDInshight Create not create Jupyter notebook
Hope this helps. Do let us know if you any further queries.

Error while initializing Ray on an EC2 master node

I am using Ray to run a parallel loop on an Ubuntu 14.04 cluster on AWS EC2. The following Python 3 script works well on my local machine with just 4 workers (imports and local initializations left out):-
ray.init() #initialize Ray
#ray.remote
def test_loop(n):
c=tests[n,0]
tout=100
rc=-1
with tmp.TemporaryDirectory() as path: #Create a temporary directory
for files in filelist: #then copy in all of the
sh.copy(filelist,path) #files
txtfile=path+'/inputf.txt' #create the external
fileId=open(txtfile,'w') #data input text file,
s='Number = '+str(c)+"\n" #write test number,
fileId.write(s)
fileId.close() #close external parameter file,
os.chdir(path) #and change working directory
try: #Try running simulation:
rc=sp.call('./simulation.run',timeout=tout,stdout=sp.DEVNULL,\
stderr=sp.DEVNULL,shell=True) #(must use .call for timeout)
outdat=sio.loadmat('outputf.dat') #get the output data struct
rt_Data=outdat.get('rt_Data') #extract simulation output
err=float(rt_Data[-1]) #use final value of error
except: #If system fails to execute,
err=deferr #use failure default
#end try
if (err<=0) or (err>deferr) or (rc!=0):
err=deferr #Catch other types of failure
return err
if __name__=='__main__':
result=ray.get([test_loop.remote(n) for n in range(0,ntest)])
print(result)
The unusual bit here is that the simulation.run has to read in a different test number from an external text file when it runs. The file name is the same for all iterations of the loop, but the test number is different.
I launched an EC2 cluster using Ray, with the number of CPUs available equal to n (I am trusting that Ray will not default to multi-threading). Then I had to copy the filelist (which includes the Python script) from my local machine to the master node using rsync, because I couldn't do this from the config (see recent question: "Workers not being launched on EC2 by Ray"). Then ssh into that node, and run the script. The result is a file-finding error:-
~$ python3 test_small.py
2019-04-29 23:39:27,065 WARNING worker.py:1337 -- WARNING: Not updating worker name since `setproctitle` is not installed. Install this with `pip install setproctitle` (or ray[debug]) to enable monitoring of worker processes.
2019-04-29 23:39:27,065 INFO node.py:469 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-04-29_23-39-27_3897/logs.
2019-04-29 23:39:27,172 INFO services.py:407 -- Waiting for redis server at 127.0.0.1:42930 to respond...
2019-04-29 23:39:27,281 INFO services.py:407 -- Waiting for redis server at 127.0.0.1:47779 to respond...
2019-04-29 23:39:27,282 INFO services.py:804 -- Starting Redis shard with 0.21 GB max memory.
2019-04-29 23:39:27,296 INFO node.py:483 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-04-29_23-39-27_3897/logs.
2019-04-29 23:39:27,296 INFO services.py:1427 -- Starting the Plasma object store with 0.31 GB memory using /dev/shm.
(pid=3917) sh: 0: getcwd() failed: No such file or directory
2019-04-29 23:39:44,960 ERROR worker.py:1672 -- Traceback (most recent call last):
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ray/worker.py", line 909, in _process_task
self._store_outputs_in_object_store(return_object_ids, outputs)
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ray/worker.py", line 820, in _store_outputs_in_object_store
self.put_object(object_ids[i], outputs[i])
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ray/worker.py", line 375, in put_object
self.store_and_register(object_id, value)
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ray/worker.py", line 309, in store_and_register
self.task_driver_id))
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ray/worker.py", line 238, in get_serialization_context
_initialize_serialization(driver_id)
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ray/worker.py", line 1148, in _initialize_serialization
serialization_context = pyarrow.default_serialization_context()
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ray/pyarrow_files/pyarrow/serialization.py", line 326, in default_serialization_context
register_default_serialization_handlers(context)
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ray/pyarrow_files/pyarrow/serialization.py", line 321, in register_default_serialization_handlers
_register_custom_pandas_handlers(serialization_context)
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ray/pyarrow_files/pyarrow/serialization.py", line 129, in _register_custom_pandas_handlers
import pandas as pd
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/pandas/__init__.py", line 42, in <module>
from pandas.core.api import *
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/pandas/core/api.py", line 10, in <module>
from pandas.core.groupby import Grouper
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/pandas/core/groupby.py", line 49, in <module>
from pandas.core.frame import DataFrame
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py", line 74, in <module>
from pandas.core.series import Series
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/pandas/core/series.py", line 3042, in <module>
import pandas.plotting._core as _gfx # noqa
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/pandas/plotting/__init__.py", line 8, in <module>
from pandas.plotting import _converter
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/pandas/plotting/_converter.py", line 7, in <module>
import matplotlib.units as units
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py", line 1060, in <module>
rcParams = rc_params()
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py", line 892, in rc_params
fname = matplotlib_fname()
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py", line 736, in matplotlib_fname
for fname in gen_candidates():
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py", line 725, in gen_candidates
yield os.path.join(six.moves.getcwd(), 'matplotlibrc')
FileNotFoundError: [Errno 2] No such file or directory
During handling of the above exception, another exception occurred:
The problem then seems to repeat for all the other workers and finally gives up:-
AttributeError: module 'pandas' has no attribute 'core'
This error is unexpected and should not have happened. Somehow a worker
crashed in an unanticipated way causing the main_loop to throw an exception,
which is being caught in "python/ray/workers/default_worker.py".
2019-04-29 23:44:08,489 ERROR worker.py:1672 -- A worker died or was killed while executing task 000000002d95245f833cdbf259672412d8455d89.
Traceback (most recent call last):
File "test_small.py", line 82, in <module>
result=ray.get([test_loop.remote(n) for n in range(0,ntest)])
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ray/worker.py", line 2184, in get
raise value
ray.exceptions.RayWorkerError: The worker died unexpectedly while executing this task.
I suspect that I am not initializing Ray correctly. I tried with ray.init(redis_address="172.31.50.149:6379") - which was the redis address given when the cluster was formed, but the error was more or less the same. I also tried starting Ray on the master (in case it needed starting):-
~$ ray start --redis-address 172.31.50.149:6379 #Start Ray
2019-04-29 23:46:20,774 INFO services.py:407 -- Waiting for redis server at 172.31.50.149:6379 to respond...
2019-04-29 23:48:29,076 INFO services.py:412 -- Failed to connect to the redis server, retrying.
....etc.
The installation of pandas and matplotlib on the master node seems to have solved the problem. Ray now initializes successfully.

python3 sqlalchemy pymysql connect string

using python3, I can connect to mysql using pymysql. all works as expected. enclosed code works.
import pymysql
conn = pymysql.connect(host='127.0.0.1', unix_socket='/home/jhgong/mysql/tmp/mysql.sock', user='root', passwd='my_pass', db='my_db', port='3333')
cur = conn.cursor()
cur.execute('select user from auth_users')
for i in cur:
print(i)
trying to get sqlalchemy to connect with pymysql, the default example strings don't seem to work. the above example does not work unless I declare both the port number and a unix_socket.
below is what I've been using to try and get sqlalchemy to connect. I assume that the socket and port number are both needed. I used connect_args to feed in a hash with the extra unix_socket location. no joy.
enclosed the the snippit I've been using that creates the error.
conarg = {
'unix_socket':'/home/jhgong/mysql/tmp/mysql.sock',
'db' :'ice'
}
engine = create_engine('mysql+pymysql://root:my_pass#127.0.0.1:3333/my_db', connect_args = conarg, echo=True)
connection = engine.connect()
with or without the db in conarg hash i get the following error:
>>> connection = engine.connect()
2013-01-17 13:04:20,819 INFO sqlalchemy.engine.base.Engine b'SELECT DATABASE()'
2013-01-17 13:04:20,819 INFO sqlalchemy.engine.base.Engine ()
Traceback (most recent call last):
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/pool.py", line 724, in _do_get
return self._pool.get(wait, self._timeout)
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/util/queue.py", line 163, in get
raise Empty
sqlalchemy.util.queue.Empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/engine/base.py", line 1574, in connect
return self._connection_cls(self, **kwargs)
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/engine/base.py", line 58, in __init__
self.__connection = connection or engine.raw_connection()
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/engine/base.py", line 1637, in raw_connection
return self.pool.unique_connection()
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/pool.py", line 182, in unique_connection
return _ConnectionFairy(self).checkout()
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/pool.py", line 398, in __init__
rec = self._connection_record = pool._do_get()
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/pool.py", line 744, in _do_get
con = self._create_connection()
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/pool.py", line 187, in _create_connection
return _ConnectionRecord(self)
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/pool.py", line 284, in __init__
exec_once(self.connection, self)
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/event.py", line 362, in exec_once
self(*args, **kw)
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/event.py", line 379, in __call__
fn(*args, **kw)
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/engine/strategies.py", line 168, in first_connect
dialect.initialize(c)
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/dialects/mysql/base.py", line 2005, in initialize
default.DefaultDialect.initialize(self, connection)
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/engine/default.py", line 183, in initialize
self._get_default_schema_name(connection)
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/dialects/mysql/base.py", line 1970, in _get_default_schema_name
return connection.execute('SELECT DATABASE()').scalar()
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/engine/base.py", line 645, in execute
params)
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/engine/base.py", line 791, in _execute_text
statement, parameters
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/engine/base.py", line 854, in _execute_context
context)
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/SQLAlchemy-0.8.0b1dev-py3.2.egg/sqlalchemy/engine/default.py", line 342, in do_execute
cursor.execute(statement, parameters)
File "/nfs/site/home/jhgongx/.local/lib/python3.2/site-packages/PyMySQL3-0.5-py3.2.egg/pymysql/cursors.py", line 105, in execute
query = query % escaped_args
TypeError: unsupported operand type(s) for %: 'bytes' and 'tuple'
it appears to be raising an error on empty pool queue. setting the size or type of pool queue has no effect.
any suggestions on how to figure this out?
Try this very simple example:
import sqlalchemy
from sqlalchemy.sql import select
from sqlalchemy import Table, MetaData
def init():
try:
server = 'xx'
db = 'xx'
login = 'xx'
passwd = 'xx'
engine_str = 'mysql+mysqlconnector://{}:{}#{}/{}'.format(login, passwd, server, db)
engine = sqlalchemy.create_engine(engine_str, echo=False, encoding='utf-8')
connection = engine.connect()
metadata = MetaData()
t_servers = Table('your_table_here', metadata, autoload=True, autoload_with=engine)
s = select([t_servers])
result = connection.execute(s)
for row in result:
print(row['the_field'])
except Exception:
raise
finally:
connection.close()
if __name__ == '__main__':
init()
Mysql connector for Python 3 : download here
I know this is late but the PyMySQL requirements mention CPython >= 2.6 or >= 3.3, it looks like you're using CPython 3.2 (the default version of python used), and that may be your problem.

Resources