Error creating partition key using MergeTree engine, Clickhouse - python-3.x

I've been trying to create model using infi.clickhouse_orm but there have been an issue with partition key
My model:
from infi.clickhouse_orm import Model, UInt16Field, Float32Field, StringField, MergeTree,DateField
class OHLC(Model):
__tablename__ = 'ohlc'
id = UInt16Field()
min = Float32Field()
max = Float32Field()
start_date = DateField()
interval = StringField()
engine = MergeTree(partition_key=['id'])
I get the error:
DB::Exception: Syntax error: .. SETTINGS index_granularity=8192.
Expected one of: Arrow, token, non-empty parenthesized list of
expressions
creating my db
""" SqlAlchemy ClickHouse database session maker """
db = Database('test', db_url=os.environ['TEST_CONNECTION'],
username=os.environ['CLICKHOUSE_USER'], password=os.environ['CLICKHOUSE_PASSWORD'])
db.create_database()
db.create_table(OHLC)

The MergeTree-engine required the primary key in the table declaration that passed in order_by-parameter:
..
engine = MergeTree(partition_key=['id'], order_by=['id'])
..
from infi.clickhouse_orm.engines import MergeTree
from infi.clickhouse_orm.fields import UInt16Field, Float32Field, StringField, DateField
from infi.clickhouse_orm.models import Model
from sqlalchemy import create_engine
class OHLC(Model):
__tablename__ = 'ohlc'
id = UInt16Field()
min = Float32Field()
max = Float32Field()
start_date = DateField()
interval = StringField()
engine = MergeTree(partition_key=['id'], order_by=['id'])
engine = create_engine('clickhouse://default:#localhost/test_001')
with engine.connect() as conn:
conn.connection.create_database()
conn.connection.create_table(OHLC)
requirements.txt
sqlalchemy==1.3.18
sqlalchemy-clickhouse==0.1.5.post0
infi.clickhouse_orm==1.3.0
Using id as partition key looks pretty suspicious, consider defining it as toYYYYMM(start_date) or something like this:
class OHLC(Model):
__tablename__ = 'ohlc'
id = UInt16Field()
min = Float32Field()
max = Float32Field()
start_date = DateField()
interval = StringField()
engine = MergeTree(partition_key=['toYYYYMM(start_date)'], order_by=['id'])

Related

SQLalchemy query not updating relationship columns in put api

I have a model.py with a class Histogram and a table histogram_axes -
histograms_axes = Table(
"histograms_axes",
_ORMBase.metadata,
Column("histogram_id", ForeignKey("histograms.id"), primary_key=True),
Column("axis_id", ForeignKey("axes.id"), primary_key=True),
)
class Histogram(Base):
block = Column(Integer)
num = Column(Integer)
id = Column(Integer, ForeignKey("s.id"))
axes = relationship(
"Axis", secondary=histograms_axes, backref="histograms", order_by="asc(Axis.name)"
)
I have written a put API in FastAPI to update the rows based on id, but it only updates the columns not the relationship in Histogram class.
#router.put("/histograms")
def put_histograms(response: HistogramSchema, id: int, axes: List[int], db: Session = Depends(get_db)):
histogram = db.query(Histogram).filter(Histogram.id == id)
if not histogram.first():
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Histogram id {id} is not found!")
histogram.axes = db.query(Axis).filter(Axis.id.in_(axes)).all()
response = response.dict(exclude_unset=True)
histogram.update(response)
db.commit()
return histogram.one_or_none()
How can I update the relationship table as well.

Inserting pandas dataframe into django model

I am having an issue writing a dataframe to my django models.py.
The file is long, but is quite simple in its methodology:
-import modules
-create django database
-requests.get necessary data
-alter data some to fit my goals, save as df
-connect to django db and insert df
My models.py is the following:
from django.db import models
import requests
import pandas as pd
from datetime import timezone
from datetime import datetime
from datetime import date
from datetime import timedelta
import time
from django.conf import settings
from sqlalchemy.engine import create_engine
class cryptoData(models.Model):
coin = models.CharField(max_length=10)
asset_id = models.SmallIntegerField()
time = models.DateTimeField()
close = models.FloatField()
volume = models.BigIntegerField()
market_cap = models.FloatField()
reddit_posts = models.IntegerField()
reddit_comments = models.IntegerField()
tweets = models.IntegerField()
tweet_favorites = models.IntegerField()
social_volume = models.IntegerField()
lunarcrush_key = 'fakekey1234'
def top_coins():
lc_market = requests.get(
url = 'https://api.lunarcrush.com/v2?data=market&',
params = {
'key': lunarcrush_key,
}
)
all_coins = []
for entry in lc_market.json().get('data'):
coin = []
coin.append(entry.get('s'))
coin.append(entry.get('mc'))
all_coins.append(coin)
all_coins.sort(key = lambda x : x[1], reverse = True)
top_ten_coins = all_coins[:10]
return(top_ten_coins)
top_coins_lst = top_coins()
top_coin_names_lst = [x[0] for x in top_coins_lst]
def get_coin_data(key, coin, date_diff, start_date, end_date):
lc = requests.get(
url = 'https://api.lunarcrush.com/v2?data=assets&',
params = {
'key': lunarcrush_key,
'symbol': coin,
'interval': 'day',
'data_points': date_diff,
'start': int(start_date.replace(tzinfo=timezone.utc).timestamp()),
'end': int(end_date.replace(tzinfo=timezone.utc).timestamp())
}
)
metric_names = []
for entry in lc.json().get('data')[0].get('timeSeries'):
for key in entry:
metric_names.append(key) if key not in metric_names else metric_names
metrics_list = []
for entry in lc.json().get('data')[0].get('timeSeries'):
row_list = []
for key in entry:
row_list.append(entry.get(key))
metrics_list.append(row_list)
metrics_df = pd.DataFrame(metrics_list, columns = metric_names)
metrics_df['time'] = metrics_df['time'].apply(lambda x : datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S'))
metrics_df['coin'] = coin
cols = list(metrics_df)
cols.insert(0, cols.pop(cols.index('coin')))
metrics_df = metrics_df.loc[:, cols]
return(metrics_df)
def get_all_coins_data(coins_list):
appended_data = []
end_date = datetime.now()
start_date = end_date - timedelta(days = 700)
date_diff = (end_date - start_date).days
for coin in coins_list:
appended_data.append(get_coin_data(lunarcrush_key, coin, date_diff, start_date, end_date))
time.sleep(.1)
output = pd.concat(appended_data)
return(output)
df = get_all_coins_data(top_coin_names_lst)
focused_df = df[['coin', 'asset_id', 'time', 'close', 'volume', 'market_cap', 'reddit_posts', 'reddit_comments', 'tweets', 'tweet_favorites', 'social_volume']]
user = settings.DATABASES['default']['USER']
password = settings.DATABASES['default']['PASSWORD']
database_name = settings.DATABASES['default']['NAME']
database_url = 'sqlite://{user}:{password}#localhost:5432/{database_name}'.format(
user=user,
password=password,
database_name=database_name,
)
engine = create_engine(database_url, echo=False)
focused_df.to_sql(cryptoData, con=engine)
When I run the manage.py runserver command, I get the following error:
sqlalchemy.exc.ArgumentError: Invalid SQLite URL: sqlite://user:password#localhost:5432/C:\Users\user\Programming\django_crypto_v6\source\db.sqlite3
Valid SQLite URL forms are:
sqlite:///:memory: (or, sqlite://)
sqlite:///relative/path/to/file.db
sqlite:////absolute/path/to/file.db
I'm struggling to resolve this issue. Any thoughts?
you are using the wrong pattern for SQLite database_url
see the docs at https://docs.sqlalchemy.org/en/14/core/engines.html#sqlite

SQLAlchemy Order joined table by field in another joined table

My project requires that Orders are split into their individual lines which can be displayed in their own views I want these views to order the lines by eta which is a value in the Order table.
I have 3 tables with a 1>1 join on tables 1&2 and a many>many join on tables 2 and 3 defined by table 4 as follows:
class Order(db.Model):
id = db.Column(db.Integer, primary_key=True)
eta = db.Column(db.DateTime())
order_lines = db.relationship('Line', backref='order', order_by=lambda: Line.id)
def __repr__(self):
return '<Order No. {}>'.format(self.increment_id)
class Line(db.Model):
id = db.Column(db.Integer, primary_key=True)
line_name = db.Column(db.String())
order_id = db.Column(db.Integer, db.ForeignKey('order.id'))
product_id = db.Column(db.String, db.ForeignKey('product.product_id'))
def __repr__(self):
return '<Line SKU: {}>'.format(self.line_sku)
class Line_view(db.Model):
id = db.Column(db.Integer, primary_key=True)
view_name = db.Column(db.String())
view_lines = relationship('Line',
secondary='line_view_join',
backref='views',
lazy='dynamic',
order_by= ***???*** ) #ordery by eta on Order table
def __repr__(self):
return '<View: {}>'.format(self.view_name)
class Line_view_join(db.Model):
__tablename__ = 'line_view_join'
id = db.Column(db.Integer(), primary_key=True)
line_id = db.Column(db.Integer(), db.ForeignKey('line.id', ondelete='CASCADE'))
view_id = db.Column(db.Integer(), db.ForeignKey('line_view.id', ondelete='CASCADE'))
I am trying to work out how to query table 3, Line_View and have the joined Lines ordered by the eta of Order table.
Such that when querying:
chosen_view = Line_view.query.filter_by(id = 1).one()
chosen_view.view_lines are ordered by Order.eta
I have Tried
class Line_view(db.Model):
id = db.Column(db.Integer, primary_key=True)
view_name = db.Column(db.String())
view_lines = relationship('Line',
secondary='line_view_join',
backref='views',
lazy='dynamic',
**order_by=lambda: asc(Line.order.eta))**
def __repr__(self):
return '<View: {}>'.format(self.view_name)
But this results in the error:
AttributeError: Neither 'InstrumentedAttribute' object nor 'Comparator' object associated with Line.order has an attribute 'eta'
Do you need to store the Line_views in the database? If not, you can query the Lines sorted by the eta attribute of the related order. Below, I create two orders with one line each, and then query the lines sorted by the eta attribute of their order:
eta = datetime(2019,10,10)
o = Order(eta = eta)
l = Line(order=o, line_name="sample")
db.session.add(o)
db.session.add(l)
eta = datetime(2019,11,11)
o1 = Order(eta = eta)
l1 = Line(order=o1, line_name="sample1")
db.session.add(o1)
db.session.add(l1)
db.session.commit()
lines = Line.query.join(Order).order_by(Order.eta)

How to get a field from from last row of an change log table in SQLAlchemy

I have the follow example. When calling Order.next I would expect to return a single order in the with the status NEW. However I am getting Orders in other OrderStatus.
If it matters this is using SQLite backend.
class OrderStatus(enum.Enum):
NEW = 0
MAKING = 1
MADE = 2
COLLECTED = 3
class Order(Base):
__tablename__ = 'orders'
id = Column(Integer, primary_key=True)
updates = relationship("OrderUpdate", order_by = "OrderUpdate.position", collection_class=ordering_list('position'))
#hybrid_method
def next(self):
order = Order.query \
.filter(Order.status in [OrderStatus.NEW]) \
.order_by(Order.id) \
.first()
return order
#hybrid_property
def status(self):
update = OrderUpdate \
.query.filter_by(order_id = self.id) \
.order_by(OrderUpdate.id.desc()) \
.first()
if update is None:
return None
return update.status
class OrderUpdate(Base):
__tablename__ = 'order_updates'
id = Column(Integer, primary_key=True)
position = Column(Integer)
status = Column('status', Enum(OrderStatus))
comment = Column(String)
order_id = Column(Integer, ForeignKey("orders.id"))
order = relationship("Order", back_populates="updates")
I didn't check, but I think the problem with line:
.filter(Order.status in [OrderStatus.NEW])
If you need to use in condition you need to use sqlalchemy in_(). So in your case it should be like this: Order.status.in_([1, 2]). Also you can do it without in_():
Order.query.filter_by(Order.status=0).order_by(Order.id).first()
Note! About Enum. This is not about Flask or Flask-SqlAlchemy. Just for you information.
# Python 3.6.1
class OrderStatus(enum.Enum):
NEW = 0
0 in [OrderStatus.NEW] # False
0 in [OrderStatus.NEW.value] # True
val = OrderStatus(0)
val in [OrderStatus.NEW] # True
Hope this helps.

cassandra unable to create table keyerror

Im trying to create a table but get this error -
keyspace = cluster.metadata.keyspaces[ks_name]
KeyError: 'cqlengine'
Models:
import uuid
from cqlengine import columns
from cqlengine.models import Model
from datetime import datetime
from cqlengine.management import sync_table
class Tickets(Model):
ticket_id = columns.UUID(primary_key=True, default=uuid.uuid4)
created_dt = columns.DateTime(default=datetime.now())
division = columns.Text()
pg = columns.Text()
duration = columns.Text()
error_count = columns.Text()
outage_caused = columns.Text()
system_caused = columns.Text()
addt_notes = columns.Text()
ticket_num = columns.Text()
ticket_type = columns.Text()
row_create_ts = columns.DateTime(default=datetime.now())
row_end_ts = columns.DateTime(
default='9999-12-31 00:00:00.00000-00')
#connection.setup(['127.0.0.1'], "cqlengine", protocol_version=3)
# sync_table(Tickets)
from cqlengine import connection
connection.setup(["localhost"], "cqlengine")
sync_table(Tickets)
The error is because you have not created a keyspace called cqlengine. You need to create a keyspace before using it.In this case you need to use cqlsh to create a keyspace called cqlengine, before running sync_table.

Resources