cassandra unable to create table keyerror - cassandra

Im trying to create a table but get this error -
keyspace = cluster.metadata.keyspaces[ks_name]
KeyError: 'cqlengine'
Models:
import uuid
from cqlengine import columns
from cqlengine.models import Model
from datetime import datetime
from cqlengine.management import sync_table
class Tickets(Model):
ticket_id = columns.UUID(primary_key=True, default=uuid.uuid4)
created_dt = columns.DateTime(default=datetime.now())
division = columns.Text()
pg = columns.Text()
duration = columns.Text()
error_count = columns.Text()
outage_caused = columns.Text()
system_caused = columns.Text()
addt_notes = columns.Text()
ticket_num = columns.Text()
ticket_type = columns.Text()
row_create_ts = columns.DateTime(default=datetime.now())
row_end_ts = columns.DateTime(
default='9999-12-31 00:00:00.00000-00')
#connection.setup(['127.0.0.1'], "cqlengine", protocol_version=3)
# sync_table(Tickets)
from cqlengine import connection
connection.setup(["localhost"], "cqlengine")
sync_table(Tickets)

The error is because you have not created a keyspace called cqlengine. You need to create a keyspace before using it.In this case you need to use cqlsh to create a keyspace called cqlengine, before running sync_table.

Related

Inserting pandas dataframe into django model

I am having an issue writing a dataframe to my django models.py.
The file is long, but is quite simple in its methodology:
-import modules
-create django database
-requests.get necessary data
-alter data some to fit my goals, save as df
-connect to django db and insert df
My models.py is the following:
from django.db import models
import requests
import pandas as pd
from datetime import timezone
from datetime import datetime
from datetime import date
from datetime import timedelta
import time
from django.conf import settings
from sqlalchemy.engine import create_engine
class cryptoData(models.Model):
coin = models.CharField(max_length=10)
asset_id = models.SmallIntegerField()
time = models.DateTimeField()
close = models.FloatField()
volume = models.BigIntegerField()
market_cap = models.FloatField()
reddit_posts = models.IntegerField()
reddit_comments = models.IntegerField()
tweets = models.IntegerField()
tweet_favorites = models.IntegerField()
social_volume = models.IntegerField()
lunarcrush_key = 'fakekey1234'
def top_coins():
lc_market = requests.get(
url = 'https://api.lunarcrush.com/v2?data=market&',
params = {
'key': lunarcrush_key,
}
)
all_coins = []
for entry in lc_market.json().get('data'):
coin = []
coin.append(entry.get('s'))
coin.append(entry.get('mc'))
all_coins.append(coin)
all_coins.sort(key = lambda x : x[1], reverse = True)
top_ten_coins = all_coins[:10]
return(top_ten_coins)
top_coins_lst = top_coins()
top_coin_names_lst = [x[0] for x in top_coins_lst]
def get_coin_data(key, coin, date_diff, start_date, end_date):
lc = requests.get(
url = 'https://api.lunarcrush.com/v2?data=assets&',
params = {
'key': lunarcrush_key,
'symbol': coin,
'interval': 'day',
'data_points': date_diff,
'start': int(start_date.replace(tzinfo=timezone.utc).timestamp()),
'end': int(end_date.replace(tzinfo=timezone.utc).timestamp())
}
)
metric_names = []
for entry in lc.json().get('data')[0].get('timeSeries'):
for key in entry:
metric_names.append(key) if key not in metric_names else metric_names
metrics_list = []
for entry in lc.json().get('data')[0].get('timeSeries'):
row_list = []
for key in entry:
row_list.append(entry.get(key))
metrics_list.append(row_list)
metrics_df = pd.DataFrame(metrics_list, columns = metric_names)
metrics_df['time'] = metrics_df['time'].apply(lambda x : datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S'))
metrics_df['coin'] = coin
cols = list(metrics_df)
cols.insert(0, cols.pop(cols.index('coin')))
metrics_df = metrics_df.loc[:, cols]
return(metrics_df)
def get_all_coins_data(coins_list):
appended_data = []
end_date = datetime.now()
start_date = end_date - timedelta(days = 700)
date_diff = (end_date - start_date).days
for coin in coins_list:
appended_data.append(get_coin_data(lunarcrush_key, coin, date_diff, start_date, end_date))
time.sleep(.1)
output = pd.concat(appended_data)
return(output)
df = get_all_coins_data(top_coin_names_lst)
focused_df = df[['coin', 'asset_id', 'time', 'close', 'volume', 'market_cap', 'reddit_posts', 'reddit_comments', 'tweets', 'tweet_favorites', 'social_volume']]
user = settings.DATABASES['default']['USER']
password = settings.DATABASES['default']['PASSWORD']
database_name = settings.DATABASES['default']['NAME']
database_url = 'sqlite://{user}:{password}#localhost:5432/{database_name}'.format(
user=user,
password=password,
database_name=database_name,
)
engine = create_engine(database_url, echo=False)
focused_df.to_sql(cryptoData, con=engine)
When I run the manage.py runserver command, I get the following error:
sqlalchemy.exc.ArgumentError: Invalid SQLite URL: sqlite://user:password#localhost:5432/C:\Users\user\Programming\django_crypto_v6\source\db.sqlite3
Valid SQLite URL forms are:
sqlite:///:memory: (or, sqlite://)
sqlite:///relative/path/to/file.db
sqlite:////absolute/path/to/file.db
I'm struggling to resolve this issue. Any thoughts?
you are using the wrong pattern for SQLite database_url
see the docs at https://docs.sqlalchemy.org/en/14/core/engines.html#sqlite

Error creating partition key using MergeTree engine, Clickhouse

I've been trying to create model using infi.clickhouse_orm but there have been an issue with partition key
My model:
from infi.clickhouse_orm import Model, UInt16Field, Float32Field, StringField, MergeTree,DateField
class OHLC(Model):
__tablename__ = 'ohlc'
id = UInt16Field()
min = Float32Field()
max = Float32Field()
start_date = DateField()
interval = StringField()
engine = MergeTree(partition_key=['id'])
I get the error:
DB::Exception: Syntax error: .. SETTINGS index_granularity=8192.
Expected one of: Arrow, token, non-empty parenthesized list of
expressions
creating my db
""" SqlAlchemy ClickHouse database session maker """
db = Database('test', db_url=os.environ['TEST_CONNECTION'],
username=os.environ['CLICKHOUSE_USER'], password=os.environ['CLICKHOUSE_PASSWORD'])
db.create_database()
db.create_table(OHLC)
The MergeTree-engine required the primary key in the table declaration that passed in order_by-parameter:
..
engine = MergeTree(partition_key=['id'], order_by=['id'])
..
from infi.clickhouse_orm.engines import MergeTree
from infi.clickhouse_orm.fields import UInt16Field, Float32Field, StringField, DateField
from infi.clickhouse_orm.models import Model
from sqlalchemy import create_engine
class OHLC(Model):
__tablename__ = 'ohlc'
id = UInt16Field()
min = Float32Field()
max = Float32Field()
start_date = DateField()
interval = StringField()
engine = MergeTree(partition_key=['id'], order_by=['id'])
engine = create_engine('clickhouse://default:#localhost/test_001')
with engine.connect() as conn:
conn.connection.create_database()
conn.connection.create_table(OHLC)
requirements.txt
sqlalchemy==1.3.18
sqlalchemy-clickhouse==0.1.5.post0
infi.clickhouse_orm==1.3.0
Using id as partition key looks pretty suspicious, consider defining it as toYYYYMM(start_date) or something like this:
class OHLC(Model):
__tablename__ = 'ohlc'
id = UInt16Field()
min = Float32Field()
max = Float32Field()
start_date = DateField()
interval = StringField()
engine = MergeTree(partition_key=['toYYYYMM(start_date)'], order_by=['id'])

How i can list aws Security Groups older than 30 days in python?

import boto3
from datetime import datetime
ec2res = boto3.resource('ec2')
ec2cli = boto3.client('ec2')
now = datetime.now()
def calculator(launch_date, object):
age = now - launch_date
intage = int(age.days)
if intage <= 30:
print(object)
list_security_groups = ec2cli.describe_security_groups()
for security_group in list_security_groups['SecurityGroups']:
launch_date = datetime.strptime(security_group['CreationDate'].strftime("%Y-%m-%d %H:%M:%S.%f"), "%Y-%m-%d %H:%M:%S.%f")
intage = calculator(launch_date,security_group)
or
list_security_groups = ec2res.security_groups.all()
for security_group in list_security_groups:
launch_date = datetime.strptime(security_group.launchtime.strftime("%Y-%m-%d %H:%M:%S.%f"), "%Y-%m-%d %H:%M:%S.%f")
intage = calculator(launch_date,security_group)
these formats are working for other aws object, but i got KeyError for security groups
There is no concept of a "Creation Date", nor any date, for Security Groups.
You could theoretically go through the security group's history in AWS Config to get this information.
Always look at the documentation to see which fields are available.

AttributeError: 'datetime.datetime' object has no attribute 'striftime'

I am currently writing a machine learning program for school to predict the weather. I have been using this article https://stackabuse.com/using-machine-learning-to-predict-the-weather-part-1/ as my main resource (I have had to adjust as wunderground is no longer free so I have instead been using openweathermap). I was writing the data collection and organization part of my code I received the following error 'AttributeError: 'datetime.datetime' object has no attribute 'striftime'. Sorry in advance for the massive block of code, I figured it would be the best way to troubleshoot the problem. Thank you for any the help. The parts with '** code **' are what I am struggling with
from datetime import datetime
from datetime import timedelta
import time
from collections import namedtuple
import pandas as pd
import requests
import matplotlib.pyplot as plt
#Data collection and Organization
url = 'http://history.openweathermap.org//storage/d12a3df743e650ba4035d2c6d42fb68f.json'
#res = requests.get(url)
#data = res.json()
target_date = datetime(2018, 4, 22)
features = ["date", "temperature", "pressure", "humidity", "maxtemperature", "mintemperature"]
DailySummary = namedtuple("DailySummary", features)
def extra_weather_data(url, target_date, days):
for _ in range(days):
**request = url.format(target_date.striftime('%Y%m%d'))**
respone = requests.get(request)
if response.status_code == 200:
data = response.json()
records.append(DailySummary(
date = target_date,
temperature = data['main']['temp'],
pressure = data['main']['pressure'],
humidity = data['main']['humidity'],
maxtemperature = data['main']['temp_max'],
mintemperature = data['main']['temp_min']))
time.sleep(6)
target_date += timedelta(days=1)
**records = extra_weather_data(url, target_date, 365)**
#Finished data collection now begin to clean and process data using Pandas
df = pd.DataFrame(records, columns=features).set_index('date')
tmp = df[['temperature','pressure','humidty', 'maxtemperature', 'mintemperature']].head(10)
def derive_nth_day_feature(df, feature, N):
rows =df.shape[0]
nth_prior_measurements = [None]*N + [df[feature][i-N] for i in range(N,rows)]
col_name = "{}_{}".format(feature, N)
df[col_name] = nth_prior_measurements
for feature in features:
if feature != 'date':
for N in range(1, 4):
derive_nth_day_feature(df, feature, N)
df.columns

DataFrame' object has no attribute 'sort'

hello~could u help me to solve the questions?
【Anaconda3-4.4.0】
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://root:123456#localhost:3306/mysql?charset=utf8')
sql = pd.read_sql('all_gzdata', engine, chunksize = 10000)
counts = [ i['fullURLId'].value_counts() for i in sql]
counts = pd.concat(counts).groupby(level=0).sum()
counts = counts.reset_index()
counts.columns = ['index', 'num']
counts['type'] = counts['index'].str.extract('(\d{3})')
counts_ = counts[['type', 'num']].groupby('type').sum()
the above codes are normal,but if I add the sentence below,python warns“'DataFrame' object has no attribute 'sort'”
counts_.sort('num', ascending = False)
...Question solved.
The last code should be "counts_.sort_values('num',ascending=False)" instead.

Resources