Motivation
Abstract out parametrized (via custom function parameters) chainable (preferably via the DataFrame.prototype) Polars expressions to provide user-defined, higher-level, reusable and chainable data analysis functions on the DataFrame
Desired behavior and failed intent
import pl from "nodejs-polars"
const { DataFrame, col } = pl
// user-defined, higher-level, reusable and chainable data analysis function
// with arbitrary complex parametrized Polars expressions
DataFrame.prototype.inc = function inc(column, x = 1, alias = `${column}Inc`) {
return this.withColumn(col(column).add(x).alias(alias))
}
const df = new DataFrame({ a: [1, 2, 3] })
// works, but implies code duplication on reuse
console.log(df.withColumn(col("a").add(1).alias("aInc")))
// desiged behavior gives TypeError: df.inc is not a function
console.log(df.inc("a").inc("a", 2, "aInc2"))
What it the recommended way to define custom functions that encapsulate Polars expressions in nodejs-polars?
a functional approach that does not require additional libraries would be to create a simple wrapper function & reexport polars with an overridden DataFrame method within your own package.
// polars_extension.js
import pl from 'nodejs-polars'
const customMethods = {
sumAlias() {
return this.sum();
},
};
export default {
...pl,
DataFrame(...args) {
return Object.assign(pl.DataFrame(...args), customMethods);
}
}
// another_file.js
import pl from './polars_extension'
pl.DataFrame({num: [1, 2, 3]}).sumAlias()
Prototype-based solution
function DF(df) { this.df = df }
DF.prototype.inc = function inc(column, x = 1, alias = `${column}Inc`) {
this.df = this.df.withColumn(col(column).add(x).alias(alias))
return this
}
const df = new DF(new DataFrame({ a: [1, 2, 3] }))
console.log(df.inc("a").inc("a", 2, "aInc2"))
Functional programming solution (preferred)
import { curry, pipe } from "rambda"
function inc(column, x, alias, df) {
return df.withColumn(col(column).add(x).alias(alias))
}
const makeInc = curry(inc)
const df = new DataFrame({ a: [1, 2, 3] })
console.log(pipe(makeInc("a", 1, "aInc"), makeInc("a", 2, "aInc2"))(df))
Output
shape: (3, 3)
┌─────┬──────┬───────┐
│ a ┆ aInc ┆ aInc2 │
│ --- ┆ --- ┆ --- │
│ f64 ┆ f64 ┆ f64 │
╞═════╪══════╪═══════╡
│ 1.0 ┆ 2.0 ┆ 3.0 │
├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 2.0 ┆ 3.0 ┆ 4.0 │
├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 3.0 ┆ 4.0 ┆ 5.0 │
└─────┴──────┴───────┘```
Related
My code is as follows, so the custom_parameter fails to be decoded, I'm not sure how I can define this maps in template file, How can I define the maps variable in template file.
Invalid template interpolation value; Cannot include the given value in a
│ string template: string required..
main.tf looks like this
resource "google_dataflow_flex_template_job" "dataflow_jobs_static" {
provider = google-beta
for_each = {
for k, v in var.dataflows : k => v
if v.type == "static"
}
parameters = merge(
yamldecode(templatefile("df/${each.key}/config.yaml", {
tf_host_project = var.host_project
tf_dataflow_subnet = var.dataflow_subnet
tf_airflow_project = local.airflow_project
tf_common_project = "np-common"
tf_dataeng_project = local.dataeng_project
tf_domain = var.domain
tf_use_case = var.use_case
tf_env = var.env
tf_region = lookup(local.regions, each.value.region, "northamerica-northeast1")
tf_short_region = each.value.region
tf_dataflow_job = each.key
tf_dataflow_job_img_tag = each.value.active_img_tag
tf_metadata_json = indent(6, file("df/${each.key}/metadata.json"))
tf_sdk_language = each.value.sdk_language
tf_custom_parameters = each.value.custom_parameters[*]
}))
)
}
terraform.tfvars looks like this
dataflows = {
"lastflow" = {
type = "static"
region = "nane1"
sdk_language = "JAVA"
active_img_tag = "0.2"
custom_parameters = {
bootstrapServers = "abc,gv"
}
},
vars.tf
variable "dataflows" {
type = map(object({
type = string
region = string
sdk_language = string
active_img_tag = string
custom_parameters = map(string)
}))
default = {}
}
config.yaml
custom_var: ${tf_custom_parameters}
Also my metadata json file looks like this
{
"name": "Streaming Beam PubSub to Kafka Testing",
"description": "An Apache Beam streaming pipeline that reads JSON encoded messages from Pub/Sub, uses Beam to transform the message data, and writes the results to a Kafka",
"parameters": [
{
"name": "custom_var",
"isOptional": true
}
]
}
Error
Error: Error in function call
│
│ on dataflow.tf line 60, in resource "google_dataflow_flex_template_job" "dataflow_jobs_static":
│ ├────────────────
│ │ each.key is "lastflow"
│ │ each.value.active_img_tag is "0.2"
│ │ each.value.custom_parameters is map of string with 1 element
│ │ each.value.region is "nane1"
│ │ each.value.sdk_language is "JAVA"
│ │ local.airflow_project is "-01"
│ │ local.dataeng_project is "-02"
│ │ local.regions is object with 2 attributes
│ │ var.common_project_index is "01"
│ │ var.dataflow_subnet is "dev-01"
│ │ var.domain is "datapltf"
│ │ var.env is "npe"
│ │ var.host_project is "rod-01"
│ │ var.use_case is "featcusto"
│
│ Call to function "templatefile" failed: df/lastflow/config.yaml:2,15-35:
│ Invalid template interpolation value; Cannot include the given value in a
│ string template: string required..
Hi I fixed this by referencing to the foll. doc
https://www.terraform.io/language/functions/templatefile#maps
following that, my solution is as follows,
the config.yaml was changed to be like this
%{ for key, value in tf_custom_parameters }
${key}: ${value}
%{ endfor ~}
And the metadata.json file changed to be as follows
{
"name": "Streaming Beam PubSub to Kafka Testing",
"description": "An Apache Beam streaming pipeline that reads JSON encoded messages from Pub/Sub, uses Beam to transform the message data, and writes the results to a Kafka",
"parameters": [
{
"name": "bootstrapServers",
"label": "test label.",
"isOptional": true,
"helpText": "test help Text"
}
]
}
And the one change in main.tf file was this
.........
.........
tf_custom_parameters = each.value.custom_parameters
.........
this the solution that helped.
I have defined a List called data, inside the list, i use a map with a key and a value. I now need to access the KEY and VALUE inside each element of the data list.
locals {
data = [
{ "secret1" = 1 },
{ "secret2" = 1 },
{ "secret3" = 1 },
{ "secret4" = 1 },
{ "secret5" = 1 }
]
}
The goal is to use the KEY and VALUE inside a google secret resource,
the value should then be used inside secret and version attribute. Something like this:
data "google_secret_manager_secret_version" "secret_datas" {
count = length(local.data)
secret = local.data[count.index].key
project = "myproject"
version = local.data[count.index].value
}
My Current Error Message
│ Error: Unsupported attribute
│
│ on dependabot.tf line 38, in data "google_secret_manager_secret_version" "secret_data":
│ 38: version = local.data[count.index].value
│ ├────────────────
│ │ count.index is 1
│ │ local.data is tuple with 5 elements
│
│ This object does not have an attribute named "value".
This would be much easier with the modern for_each meta-argument. After optimizing the structure of the data in the locals:
locals {
data = {
"secret1" = 1,
"secret2" = 1,
"secret3" = 1,
"secret4" = 1,
"secret5" = 1
}
}
we can easily use it in the resource.
data "google_secret_manager_secret_version" "secret_datas" {
for_each = local.data
secret = each.key
project = "myproject"
version = each.value
}
I can read a csv file which does not have column headers in the file. With the following code using polars in rust:
use polars::prelude::*;
fn read_wine_data() -> Result<DataFrame> {
let file = "datastore/wine.data";
CsvReader::from_path(file)?
.has_header(false)
.finish()
}
fn main() {
let df = read_wine_data();
match df {
Ok(content) => println!("{:?}", content.head(Some(10))),
Err(error) => panic!("Problem reading file: {:?}", error)
}
}
But now I want to add column names into the dataframe while reading or after reading, how can I add the columns names. Here is a column name vector:
let COLUMN_NAMES = vec![
"Class label", "Alcohol",
"Malic acid", "Ash",
"Alcalinity of ash", "Magnesium",
"Total phenols", "Flavanoids",
"Nonflavanoid phenols",
"Proanthocyanins",
"Color intensity", "Hue",
"OD280/OD315 of diluted wines",
"Proline"
];
How can I add these names to the dataframe. The data can be downloaded with the following code:
wget https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data
This seemed to work, by creating a schema object and passing it in with the with_schema method on the CsvReader:
use polars::prelude::*;
use polars::datatypes::DataType;
fn read_wine_data() -> Result<DataFrame> {
let file = "datastore/wine.data";
let mut schema: Schema = Schema::new();
schema.with_column("wine".to_string(), DataType::Float32);
CsvReader::from_path(file)?
.has_header(false)
.with_schema(&schema)
.finish()
}
fn main() {
let df = read_wine_data();
match df {
Ok(content) => println!("{:?}", content.head(Some(10))),
Err(error) => panic!("Problem reading file: {:?}", error)
}
}
Granted I don't know what the column names should be, but this is the output I got when adding the one column:
shape: (10, 1)
┌──────┐
│ wine │
│ --- │
│ f32 │
╞══════╡
│ 1.0 │
├╌╌╌╌╌╌┤
│ 1.0 │
├╌╌╌╌╌╌┤
│ 1.0 │
├╌╌╌╌╌╌┤
│ 1.0 │
├╌╌╌╌╌╌┤
│ ... │
├╌╌╌╌╌╌┤
│ 1.0 │
├╌╌╌╌╌╌┤
│ 1.0 │
├╌╌╌╌╌╌┤
│ 1.0 │
├╌╌╌╌╌╌┤
│ 1.0 │
└──────┘
Here is the full solution working for me:
fn read_csv_into_df(path: PathBuf) -> Result<DataFrame> {
let schema = Schema::from(vec![
Field::new("class_label", Int64),
Field::new("alcohol", Float64),
Field::new("malic_acid", Float64),
Field::new("ash", Float64),
Field::new("alcalinity_of_ash", Float64),
Field::new("magnesium", Float64),
Field::new("total_phenols", Float64),
Field::new("flavanoids", Float64),
Field::new("nonflavanoid_phenols", Float64),
Field::new("color_intensity", Float64),
Field::new("hue", Float64),
Field::new("od280/od315_of_diluted_wines", Float64),
Field::new("proline", Float64),
]);
CsvReader::from_path(path)?.has_header(false).with_schema(&schema).finish()
}
I had Use Field and types for each field to create a schema then use the schema in CsvReader to read the data.
Attempts to map the result from the SQL query. This seems correct to me, but terraform returns an error.
It does not return any blank result for IP and server name.
Is there a bug somewhere? It seems correct to me.
I have main.tf
/*
* admin database
*/
module "sql-servers-admindb" {
source = "../terraform-modules/terraform-data-db"
dbtype = "mysql"
connection = {
mysql = {
host = local.mysql-admin_credentials.dbhost
user = local.mysql-admin_credentials.dbuser
pass = local.mysql-admin_credentials.dbpass
dbase = local.mysql-admin_credentials.dbname
}
sqlite = null
}
debugfile = "debug.json"
query = {
sqlquery = <<EOT
SELECT sd.serwer_id, sd.ip_local, sd.host_mysql
FROM serwer_panel sd
JOIN serwer_serwer_grupa ssg ON ssg.serwer_id = sd.serwer_id
JOIN serwer_grupa sg ON ssg.serwer_grupa_id = sg.id
EOT
}
}
module "terraform-sql" {
source = "../terraform-modules/terraform-sql"
depends_on = [module.sql-servers-admindb]
for_each = { for sql in module.sql-servers-admindb.result.sqlquery : sql.serwer_id => sql }
general = {
zone = "sql.${var.dns_local_name[local.environment]}."
ptr_networks = "${var.dns_local_reverse_allowed[local.environment]}"
}
}
terraform-modules/terraform-sql/main.tf
// IP address and A record
module "sql-nsrecord_a" {
source = "../terraform-modules/terraform-generic-nsrecord
zone = "{var.general.zone}."
name = "${var.sql.serwer_id}."
type = "A"
records = [var.sql.local_ip]
ptr_networks = var.general.ptr_networks
}
/*
* General
*/
variable "general" {
type = object({
zone = string
ptr_networks = list(any)
})
default = {
zone = ""
ptr_networks = []
}
}
/*
* SQL records
*/
variable "sql" {
type = map(any)
default = {}
}
Example debug.json
Array
(
[sqlquery] => Array
(
[0] => Array
(
[serwer_id] => s12
[ip_local] => 127.0.0.1
[host_mysql] =>
)
Return error:
│ Error: Missing map element
│
│ on ../terraform-modules/terraform-sql/main.tf line 9, in module "sql-nsrecord_a":
│ 9: name = "${var.sql.serwer_id}."
│ ├────────────────
│ │ var.sql is empty map of dynamic
│
│ This map does not have an element with the key "serwer_id".
╵
╷
│ Error: Missing map element
│
│ on ../terraform-modules/terraform-sql/main.tf line 11, in module "sql-nsrecord_a":
│ 11: records = [var.sql.local_ip]
│ ├────────────────
│ │ var.sql is empty map of dynamic
│
I am asking for advice on how to improve it.
You don't seem to be passing the variable sql to the terraform-sql module and thus its default value is used:
default = {}
Hence an empty map. No wonder it doesn't contain some entry.
You need to amend your
module "terraform-sql" {
...
}
to pass the sql argument too.
Lets say I have an object:
var o = {
i1: {
p1: '1/1',
p2: '1/2'
}
};
I can address its elements like this:
console.log('i1 p2:',o['i1']['p2']);
I can set new 'vertical' property:
o['i1']['p3']='1/3';
console.table(o);
┌─────────┬───────┬───────┬───────┐
│ (index) │ p1 │ p2 │ p3 │
├─────────┼───────┼───────┼───────┤
│ i1 │ '1/1' │ '1/2' │ '1/3' │
└─────────┴───────┴───────┴───────┘
However I can not add another index ('horizontal' property) the same way:
o['i2']['p3']='2/3';
results in error: TypeError: Cannot set property 'p3' of undefined
because there is no 'i2' property defined yet.
Is there any syntax to solve this?
Thanks a lot.
Assign an object to the i2 property, where the object has a p3 property.
o.i2 = { p3: '2/3' }