Need help fixing memory leak for elixir and sweet_xml library - memory-leaks

I'm a newbie in elixir.
I've got the following lib/osm.ex file
defmodule Osm do
import SweetXml
def hello do
:world
end
def main(args) do
args |> parse_args |> process
end
defp parse_args(args) do
{options, _, _} = OptionParser.parse(args, switches: [osm_file: :string, help: :boolean])
options
end
def output_help() do
IO.puts "Usage: osm [OPTION]"
IO.puts ""
IO.puts " --osm-file an osm-file to import"
IO.puts " --help outputs this help-page"
end
def process([]) do
IO.puts "No arguments given"
end
def process(options) do
if options[:help] do
output_help()
else
case options do
[osm_file: _] ->
process_osm_file(options[:osm_file])
end
end
end
def process_osm_file(file) do
counts = %{:nodes => 0, :ways => 0, :relations => 0}
cond do
String.ends_with?(file, ".pbf") ->
IO.puts "parse osm-pbf."
String.ends_with?(file, ".osm.bz2") ->
IO.puts "extract and parse osm-xml."
String.ends_with?(file, ".osm") ->
IO.puts "parse osm-xml."
File.stream!(file)
|> stream_tags([:node, :way, :relation], discard: [:node, :way, :relation])
|> Stream.map(fn
{_, node} ->
process_element(node, counts)
end)
|> Enum.reduce(fn element, result ->
result_modified = %{result |
nodes: result[:nodes] + element[:nodes],
ways: result[:ways] + element[:ways],
relations: result[:relations] + element[:relations]
}
cond do
rem(result_modified[:nodes], 1000) == 0 ->
IO.write "\rnodes: " <> to_string(result_modified[:nodes]) <> "; ways: " <> to_string(result_modified[:ways]) <> "; relations: " <> to_string(result_modified[:relations]) <> "; mem: " <> to_string(:erlang.memory(:total))
true -> true
end
result_modified
end)
|> Stream.run
IO.puts ""
true ->
IO.puts "invalid osm-file extension."
end
end
defp process_element(doc, counts) do
case doc |> xmlElement(:name) do
:node ->
doc |> xmap(
id: ~x"./#id"i,
lat: ~x"./#lat"f,
lon: ~x"./#lon"f,
tags: [
~x"./tag"l,
key: ~x"./#k"s,
value: ~x"./#v"s
]
) |> process_node(counts)
:way ->
doc |> xmap(
id: ~x"./#id"i,
nd: [
~x"./nd"l,
ref: ~x"./#ref"i
],
tags: [
~x"./tag"l,
key: ~x"./#k"s,
value: ~x"./#v"s
]
) |> process_way(counts)
:relation ->
doc |> xmap(
id: ~x"./#id"i,
member: [
~x"./member"l,
type: ~x"./#type"s,
ref: ~x"./#ref"s,
role: ~x"./#role"s
],
tags: [
~x"./tag"l,
key: ~x"./#k"s,
value: ~x"./#v"s
]
) |> process_relation(counts)
_ ->
IO.puts "unhandled element"
end
end
defp process_node(node, counts) do
_ = node
Map.put(counts, :nodes, counts[:nodes] + 1)
end
defp process_way(way, counts) do
_ = way
Map.put(counts, :ways, counts[:ways] + 1)
end
defp process_relation(relation, counts) do
_ = relation
Map.put(counts, :relations, counts[:relations] + 1)
end
end
and the following mix.exs file
defmodule Osm.MixProject do
use Mix.Project
def project do
[
app: :osm,
version: "0.1.0",
elixir: "~> 1.7",
start_permanent: Mix.env() == :prod,
escript: [main_module: Osm],
deps: deps()
]
end
def application do
[
extra_applications: [:logger]
]
end
defp deps do
[
{:sweet_xml, github: 'kbrw/sweet_xml', app: false}
]
end
end
I compile it with mix escript.build
I've downloaded the berlin-latest.osm.bz2 file and extracted the berlin-latest.osm file.
If I call ./osm --osm-file=berlin-latest.osm
The script parses the xml-data and counts the nodes, ways and relations correctly, but the memory consumption is increasing until the end.
Is there a memory leak in the SweetXml library or am I doing something wrong?

I do not see something that would create a memory leak in your code.
I did the following test: I removed progressively all the code using SweetXml, and it is when I withdrew the first part using SweetXml (i.e: stream_tags([:node, :way, :relation], discard: [:node, :way, :relation])) that the memory leak disappeared. This clearly indicates that the memory consumption comes from SweetXml
Reading the source code of SweetXml.stream_tags/3 function, might bring you some answers. I haven't yet understood from where the leak comes from.
Edit: after a thorough inspection of the source code, I still haven't found the origin of the leak. I am starting to thing it is something even deeper, maybe linked to the way erlang VM works.

Related

How find integer in text

Help me figure out how to work with text
i have a string like: "word1 number: word2" for example : "result 0: Good" or "result 299: Bad"
i need print Undefined/Low or High
When string is null , print Undefined
When number 0-15, print Low
When number >15, print High
type GetResponse =
{
MyData: string voption
ErrorMessage: string voption }
val result: Result<GetResponse, MyError>
and then i try:
MyData =
match result with
| Ok value ->
if (value.Messages = null) then
ValueSome "result: Undefined"
else
let result =
value.Messages.FirstOrDefault(
(fun x -> x.ToUpperInvariant().Contains("result")),
"Undefined"
)
if (result <> "Undefined") then
ValueSome result
else
errors.Add("We don't have any result")
ValueNone
| Error err ->
errors.Add(err.ToErrorString)
ValueNone
ErrorMessage =
if errors.Any() then
(errors |> String.concat ", " |> ValueSome)
else
ValueNone
but i dont know gow check in string number and maybe there is some way print this without a billion if?
Parsing gets complex very quickly. I recommend using FParsec to simplify the logic and avoid errors. A basic parser that seems to meet your needs:
open System
open FParsec
let parseWord =
manySatisfy Char.IsLetter
let parseValue =
parseWord // parse any word (e.g. "result")
>>. spaces1 // skip whitespace
>>. puint32 // parse an unsigned integer value
.>> skipChar ':' // skip colon character
.>> spaces // skip whitespace
.>> parseWord // parse any word (e.g. "Good")
You can then use it like this:
type ParserResult = Undefined | Low | High
let parse str =
if isNull str then Result.Ok Undefined
else
match run parseValue str with
| Success (num, _ , _) ->
if num <= 15u then Result.Ok Low
else Result.Ok High
| Failure (errorMsg, _, _) ->
Result.Error errorMsg
parse null |> printfn "%A" // Ok Undefined
parse "result 0: Good" |> printfn "%A" // Ok Low
parse "result 299: Bad" |> printfn "%A" // Ok High
parse "invalid input" |> printfn "%A" // Error "Error in Ln: 1 Col: 9 ... Expecting: integer number"
There's definitely a learning curve with FParsec, but I think it's worth adding to your toolbelt.
I agree with Brian that parsing can become quite tricky very quickly. However if you have some well established format of the input and you're not very much into writing complex parsers, good old regular expressions can be of service ;)
Here is my take on the problem - please note that it has plenty of room to improve, this is just a proof of concept:
open System.Text.RegularExpressions
let test1 = "result 0: Good"
let test2 = "result 299: Bad"
let test3 = "some other text"
type ParserResult =
| Undefined
| Low of int
| High of int
let (|ValidNumber|_|) s =
//https://learn.microsoft.com/en-us/dotnet/api/system.text.regularexpressions.regex?view=net-6.0
let rx = new Regex("(\w\s+)(\d+)\:(\s+\w)")
let matches = rx.Matches(s)
if matches.Count > 0 then
let groups = matches.[0].Groups |> Seq.toList
match groups with
| [_; _; a; _] -> Some (int a.Value)
| _ -> None
else
None
let parseMyString str =
match str with
| ValidNumber n when n < 16 -> Low n
| ValidNumber n -> High n
| _ -> Undefined
//let r = parseMyString test1
printfn "%A" (parseMyString test1)
printfn "%A" (parseMyString test2)
printfn "%A" (parseMyString test3)
The active pattern ValidNumber returns the Some number if a match of the input string is found, otherwise it returns None. The parseMyString function uses the pattern and guards to initialise the final ParserOutput value.

F# - Error Loading Table With Empty Last Row Using HtmlProvider

When loading a Wikipedia table using HtmlProvider, I get an error message because the last row in the table is empty!
module SOQN =
open System
open FSharp.Data
let [<Literal>] wikiUrl = #"https://en.wikipedia.org/wiki/COVID-19_testing#Virus_testing_statistics_by_country"
type Covid = HtmlProvider<wikiUrl>
let main() =
printfn ""
printfn "SOQN: Error Loading Table With Empty Last Row Using HtmlProvider?"
printfn ""
let feed = Covid.Load(wikiUrl)
feed.Tables.``Virus testing statistics by country``.Rows
|> Seq.map (fun r -> r.Date)
|> printf "%A "
printfn ""
0
[<EntryPoint>]
main() |> ignore
printfn "Fini!"
printfn ""
// Actual Output:
// "Date is missing"
//
// Expected Output:
// seq [ "Albania"; "19 Apr"; "5542"; "562"; "10.1"; "1,936"; "196"; "[121]" ]
// ...
//
What am I missing?
For instance, can I preset the column types to 'string' similar to using 'schema' with CsvProvider?

Elixir - undefined function do_match/4

To refactor, I'm trying to move code out of my router into controllers.
I'm getting this error when I do so:
== Compilation error on file lib/api/controllers/product.ex ==
** (CompileError) lib/plug/router.ex:211: undefined function do_match/4
(stdlib) lists.erl:1338: :lists.foreach/2
(stdlib) erl_eval.erl:670: :erl_eval.do_apply/6
controller
defmodule Api.Controllers.Product do
use Plug.Router
import Api.ProductCategory
alias Api.ProductCategory, as: ProductCategory
import Api.Product
import Api.Shop
alias Api.Shop, as: Shop
alias Api.Product, as: Product
import Api.ProductShop
alias Api.ProductShop, as: ProductShop
import Api.Subcategory
alias Api.Subcategory, as: Subcategory
import Api.Category
alias Api.Category, as: Category
import Ecto.Query
import Api.Repo
def put_product(conn) do
errors = {}
# IO.inspect(conn.body_params)
# IO.inspect(conn.query_params["p_id"])
product = Api.Product |> Api.Repo.get(conn.query_params["p_id"])
shop = Api.Shop |> Api.Repo.get(conn.query_params["s_id"])
params = for key <- ~w(image description),
value = conn.body_params[key], into: %{},
do: {key, value}
changeset = Api.Product.changeset(product, params)
case Api.Repo.update(changeset) do
{:ok, product} ->
errors = Tuple.append(errors, "Product updated")
{:error, changeset} ->
errors = Tuple.append(errors, "Product not updated")
end
pid = conn.query_params["p_id"]
sid = conn.query_params["s_id"]
price = conn.body_params["price"]
product_shop = Api.Repo.get_by(ProductShop, s_id: sid, p_id: pid)
IO.inspect(product_shop)
changeset2 = Api.ProductShop.changeset(product_shop, %{price: price})
case Api.Repo.update(changeset2) do
{:ok, product_shop} ->
errors = Tuple.append(errors, "Price updated")
{:error, changeset2} ->
errors = Tuple.append(errors, "Price not updated")
end
IO.inspect(errors)
conn
|> put_resp_content_type("application/json")
|> send_resp(200, Poison.encode!(%{
successs: "success",
errors: Tuple.to_list(errors)
}))
end
end
router.ex
defmodule Api.Router do
use Plug.Router
import Api.ProductCategory
alias Api.ProductCategory, as: ProductCategory
import Api.Product
import Api.Shop
alias Api.Shop, as: Shop
alias Api.Product, as: Product
import Api.ProductShop
alias Api.ProductShop, as: ProductShop
import Api.Subcategory
alias Api.Subcategory, as: Subcategory
import Api.Category
alias Api.Category, as: Category
import Ecto.Query
import Api.Controllers.Product
alias Api.Controllers.Product, as: ProductController
if Mix.env == :dev do
use Plug.Debugger
end
plug :match
plug Plug.Parsers, parsers: [:json],
pass: ["application/json"],
json_decoder: Poison
plug :dispatch
get "/favicon.ico" do
# get_categories(conn)
end
get "/categories/" do
get_categories(conn)
end
options "/categories/" do
get_categories(conn)
end
....
put "/products" do
ProductController.put_product(conn)
end
...
What is causing the error?
Full error:
Benjamins-MacBook-Pro:api Ben$ iex -S mix
Erlang/OTP 19 [erts-8.2] [source] [64-bit] [smp:8:8] [async-threads:10] [hipe] [kernel-poll:false] [dtrace]
Compiling 2 files (.ex)
warning: the variable "errors" is unsafe as it has been set inside a case/cond/receive/if/&&/||. Please explic
itly return the variable value instead. For example:
case int do
1 -> atom = :one
2 -> atom = :two
end
should be written as
atom =
case int do
1 -> :one
2 -> :two
end
Unsafe variable found at:
lib/api/controllers/product.ex:54
warning: the variable "errors" is unsafe as it has been set inside a case/cond/receive/if/&&/||. Please explic
itly return the variable value instead. For example:
case int do
1 -> atom = :one
2 -> atom = :two
end
should be written as
atom =
case int do
1 -> :one
2 -> :two
end
Unsafe variable found at:
lib/api/controllers/product.ex:56
warning: the variable "errors" is unsafe as it has been set inside a case/cond/receive/if/&&/||. Please explic
itly return the variable value instead. For example:
case int do
1 -> atom = :one
2 -> atom = :two
end
should be written as
atom =
case int do
1 -> :one
2 -> :two
end
Unsafe variable found at:
lib/api/controllers/product.ex:59
lib/api/controllers/product.ex:59
warning: the variable "errors" is unsafe as it has been set inside a case/cond/receive/if/&&/||. Please explic
itly return the variable value instead. For example:
case int do
1 -> atom = :one
2 -> atom = :two
end
should be written as
atom =
case int do
1 -> :one
2 -> :two
end
Unsafe variable found at:
lib/api/controllers/product.ex:65
warning: variable "shop" is unused
lib/api/controllers/product.ex:30
warning: variable "product" is unused
lib/api/controllers/product.ex:38
warning: variable "changeset" is unused
lib/api/controllers/product.ex:40
warning: variable "product_shop" is unused
lib/api/controllers/product.ex:53
warning: variable "changeset2" is unused
lib/api/controllers/product.ex:55
== Compilation error on file lib/api/controllers/product.ex ==
** (CompileError) lib/plug/router.ex:211: undefined function do_match/4
(stdlib) lists.erl:1338: :lists.foreach/2
(stdlib) erl_eval.erl:670: :erl_eval.do_apply/6
You're getting the do_match error because your module uses Plug.Router but does not define any route. do_match function clauses are added by the get/post/etc macros in Plug.Router. With no routes, no function clause is ever defined causing that error. Since you don't actually want to define any routes in the module, you can just remove use Plug.Router.
You're also missing an import for the put_resp_content_type/2 function. Adding import Plug.Conn should fix that.

Elixir 'if' and 'and' not working as expected

I have this code:
if Map.has_key?(dbShop, "id") && Map.has_key?(dbProduct, "id") do
case Api.Repo.insertProductShop(conn, dbShop.id, dbProduct.id) do
{:ok, productShop} ->
{:ok, productShop}
{:error, changeset} ->
Tuple.append(errors, "could not insert product in the Shop")
end
else
IO.puts("(dbShop, id) && Map.has_key?(dbProduct, id) failed")
IO.inspect(dbShop)
IO.inspect(dbProduct)
end
Code execution makes it into the the else clause and the logs this to the console:
(dbShop, id) && Map.has_key?(dbProduct, id) failed
%Api.Shop{__meta__: #Ecto.Schema.Metadata<:loaded, "shops">, id: 23,
latitude: -36.846691, longitude: 174.7745803, name: "Yard Bar & Eatery",
placeId: "ChIJp6DGbAdIDW0RcbnExyPHvCk"}
%Api.Product{__meta__: #Ecto.Schema.Metadata<:loaded, "products">,
brand: "baba", description: " zhzngshshhshs", id: 34, image: "no-image",
name: "Nsn", numberOfVotes: nil, rating: nil}
So we can see that dbShop and dbProduct both have an id and somehow the code execution never makes it into the if clause. What am I doing wrong with my if clause? I want check that they both have an id and if so, go inside the if clause.
Full function in router:
post "/products" do
errors = {}
postedProduct = conn.body_params
dbProduct = %{}
dbShop = %{}
case Api.Repo.insertProduct(conn, postedProduct) do
{:success, product} ->
dbProduct = product
case Api.Repo.insertProductCategories(conn, postedProduct, dbProduct.id) do
{:ok, categories} ->
{:ok, categories}
{:error, failed_operation, failed_value, changes_so_far} ->
Tuple.append(errors, "could not insert productCategories. Product already has that category")
end
{:error, changeset} ->
IO.puts("product not inserted")
Tuple.append(errors, "could not insert product. Product already existed")
IO.inspect(errors)
end
if Map.has_key?(postedProduct, "shop") do
case Api.Repo.insertShop(conn, postedProduct["shop"]) do
{:ok, shop} ->
dbShop = shop
if Map.has_key?(dbShop, :id) && Map.has_key?(dbProduct, :id) do
case Api.Repo.insertProductShop(conn, dbShop.id, dbProduct.id) do
{:ok, productShop} ->
{:ok, productShop}
{:error, changeset} ->
Tuple.append(errors, "could not insert product in the Shop")
end
else
IO.puts("(dbShop, id) && Map.has_key?(dbProduct, id) failed")
IO.inspect(dbShop)
IO.inspect(dbProduct)
end
{:error, changeset} -> # shop already exists
# Tuple.append(errors, "could not insert shop")
if Map.has_key?(dbShop, "id") && Map.has_key?(dbProduct, "id") do
case Api.Repo.insertProductShop(conn, dbShop.id, dbProduct.id) do
{:ok, productShop} ->
{:ok, productShop}
{:error, changeset} ->
Tuple.append(errors, "The product has already been added to the shop")
end
end
end
end
if tuple_size(errors) > 0 do
IO.puts("errors")
IO.inspect(errors)
conn
|> put_resp_content_type("application/json")
|> send_resp(200, Poison.encode!(%{
successs: "success",
errors: errors
}))
else
conn
|> put_resp_content_type("application/json")
|> send_resp(200, Poison.encode!(%{
successs: "success"
}))
end
end
Your map has an atom :id as key, not the string "id", so your if should be:
if Map.has_key?(dbShop, :id) && Map.has_key?(dbProduct, :id) do
If the value is guaranteed to be non-nil if present, you can also shorten this to:
if dbShop[:id] && dbProduct[:id] do
Accessing a value using the bracket syntax will not throw an error if the key is not present, and instead return nil, which is a falsy value in Elixir.
Using if in Elixir is a code smell and a hint that the code might be re-written in more explicit manner. Here I would go with Kernel.SpecialForms.with/1, and instead of:
if Map.has_key?(dbShop, :id) && Map.has_key?(dbProduct, :id) do
# true
else
# false
end
use:
with %{id: shopId} when not is_nil(shopId) <- dbShop,
%{id: prodId} when not is_nil(prodId) <- dbProduct do
# true
else
^dbProduct -> IO.puts "prodId is nil"
^dbShop -> IO.puts "shopId is nil"
end

F#: I cannot return unit in a do clause and still have side effects

I'm writing a simple ini file parser and I'm having a little problem with the initialization of the object in the "do" clause. It wants me to return a unit but i can't get the blankity function to do the side effects if I try to pipe into an "ignore" or if i return "()" directly.
This code works as a separate function because I can ignore the results.
#light
module Utilities.Config
open System
open System.IO
open System.Text.RegularExpressions
open System.Collections.Generic
type Config(?fileName : string) =
let fileName = defaultArg fileName #"C:\path\myConfigs.ini"
static let defaultSettings =
dict[ "Setting1", "1";
"Setting2", "2";
"Debug", "0";
"State", "Disarray";]
let settingRegex = new Regex(#"\s*(?<key>([^;#=]*[^;#= ]))\s*=\s*(?<value>([^;#]*[^;# ]))")
let fileSettings = new Dictionary<string, string>()
let addFileSetting (groups : GroupCollection) =
fileSettings.Add(groups.Item("key").Value, groups.Item("value").Value)
do File.ReadAllLines(fileName)
|> Seq.map(fun line -> settingRegex.Match(line))
|> Seq.filter(fun mtch -> mtch.Success)
|> Seq.map(fun mtch -> addFileSetting(mtch.Groups) // Does not have the correct return type
//|> ignore //#1 Does not init the dictionary
//() //#2 Does not init the dictionary
//The extra step will work
member c.ReadFile =
File.ReadAllLines(fileName)
|> Seq.map(fun line -> settingRegex.Match(line))
|> Seq.filter(fun mtch -> mtch.Success)
|> Seq.map(fun mtch -> addFileSetting(mtch.Groups))
Use Seq.iter (executing an action for each element - returning unit) instead of Seq.map (transforming elements).
The code doesn't work with ignore because Seq's are evaluated lazily and when you ignore the result, there is no need to run any code at all. Read this article

Resources