extracting a substring in a string with some conditions - string

I hope you are well !
I present my problem to you. I have a string (on a single line) and the idea is to iterate the string from a given position in the string, go left character by character and stop when we meet a separator (the separators are in the list), and do the same on the right and at the end, put the two together to form the word you are looking for. I wrote the following function, but it doesn't quite work, when I position myself at the end of a word like in position 4, it returns me a string "" instead of returning "this". When I go to position 1, it sends me "his" and not "this". When I put myself in position 44, I output "tha" instead of "that". On the other hand, in the middle of the string as for position 36 or 40, it works.
Can anyone help me fix these bugs?
(* separators *)
let sep_list = [';';',';'(';')';'.';' ';]
(* my string to test *)
let my_str = "this is my string for testing purpose, only that"
(* my function *)
let search_word_in_cursor (str:string) (char_list:char list) (pos:int) : string =
let rec foo_left (str:string) (char_list:char list) (pos:int) (ret:string) : string =
if ((List.exists (fun x -> Char.equal x str.[pos]) char_list) || (pos <= 0)) then ret
else if (pos == 0) then String.make 1 str.[pos] ^ ret
else foo_left str char_list (pos - 1) ((String.make 1 str.[pos]) ^ ret)
in
let rec foo_right (str:string) (char_list:char list) (pos:int) (ret:string) : string =
if ((List.exists (fun x -> Char.equal x str.[pos]) char_list) || (pos >= (String.length str - 1))) then ret
else foo_right str char_list (pos + 1) (ret ^ (String.make 1 str.[pos]))
in
let sl = foo_left str char_list pos "" in
let sr = foo_right str char_list pos "" in
if (sr == "" && sl == "") then ""
else if (sr == "" && sl != "") then sl
else if (sr != "" && sl == "") then sr
else (String.sub sl 0 (String.length sl - 1)) ^ sr
(* expects and results *)
let () =
print_string (search_word_in_cursor my_str sep_list 4); (* expected:"this" output:"" *)
print_string (search_word_in_cursor my_str sep_list 1);(* expected:"this" output:"his" *)
print_string (search_word_in_cursor my_str sep_list 44);(* expected:"that" output:"tha" *)
print_string (search_word_in_cursor my_str sep_list 36);(* expected:"purpose" output:"purpose" *)
print_string (search_word_in_cursor my_str sep_list 40)(* expected:"only" output:"only" *)
Thank you, goog to y'all !

Related

Ocaml - Check, given a list of transitions, the word is recognized

I created a function that returns a list of transitions. Transitions are of type int * char * int.
for example (0 'E' 1); (1 'A' 2). The valid alphabet for the transitions are A, C, G and T and the char 'E' represents epsilon.
type transition = int * char * int;;
let get_start (a,_,_) = a;;
let get_char (_,a,_) = a;;
let get_end (_,_,a) = a;;
The initial state and the final state are stored in the following variables.
...
let i_strt = ref !state_initial;;
let i_end = ref !state_end;;
exception Out_of_loop;;
let seq = read_line();;(* string to be tested *)
let len_seq = String.length seq -1;;
let lst_trs_length = List.length !aux_transitions -1;; (* aux_transitions -> all transitions*)
let i = ref 0;;
let f = ref 0;;
while !i <= len_seq do
let c_r = seq.[i]in (* c_r = 'A' seq = ACGT*)
try
while !j <= lst_trs_length do
let aux_trs = List.nth !aux_transitions !j in (* 0 'E' 1 -> 1 'A' 2 ....*)
if (get_start aux_trs) = !i_strt then (* *)
let aux_chr = get_char aux_trs in (* 'A' *)
if aux_chr = c_r then(
i_strt := get_end aux_trs; (* i_strt = 1*)
raise Out_of_loop
)
else if aux_chr = 'E' then(
i_strt := get_end aux_trs;
j := -1
);
j := !j+1
done;
with Out_of_loop ->();
i := !i +1
done;
I am trying to use these two cycles to check whether the string "seq" can be recognized or not by the list of transitions taking into account the initial state. I am having trouble writing this function ... I want a function that, given a list of transitions and a string, returns 'true' in case it is recognized or false in the negative case.

Getting all strings in a lua script

I'm trying to encode some strings in my lua script, and since that I have a lua script with over 200k characters, encrypting each string query in the script with a function such as this example below
local string = "stackoverflow"
local string = [[stackoverflow]]
local string = [==[stackoverflow]==]
local string = 'stackoverflow'
to
local string=decode("jkrtbfmviwcfn",519211)
Trying to provide all above results to thread through a gsub and have the gsub encode the string text with a random offset number.
So far, I was only capable of gsubbing full quotation marks through.
function encode(x,offset,a)
for char in string.gmatch(x, "%a") do
local encrypted = string.byte(char) + offset
while encrypted > 122 do
encrypted = encrypted - 26
end
while encrypted < 97 do
encrypted = encrypted + 26
end
a[#a+1] = string.char(encrypted)
end
return table.concat(a)
end
luacode=[==[thatstring.Value="Encryptme!" testvalue.Value=[[string with
a linebreak]] string.Text="STOP!"]==]
luacode=luacode:gsub([=["(.-)"]=],function(s)
print("Caught "..s)
local offset=math.random(1,4)
local encoded=encode(s,offset,{})
return [[decode("]]..encoded..[[",]]..offset..[[)]]
end)
print("\n"..luacode)
With its output being
Caught Encryptme!
Caught STOP!
thatstring.Value=decode("crgvctxqi",4) testvalue.Value=[[string with
a linebreak]] string.Text=decode("opkl",2)
Any better solutions?
local function strings_and_comments(lua_code, callback)
-- lua_code must be valid Lua code (an error may be raised on syntax error)
-- callback will be invoked as callback(object_type, value, start_pos, end_pos)
-- callback("comment", comment_text, start_pos, end_pos) -- for comments
-- callback("string", string_value, start_pos, end_pos) -- for string literals
local objects = {} -- possible comments and string literals in the code
-- search for all start positions of comments (with false positives)
for pos, br1, eq, br2 in lua_code:gmatch"()%-%-(%-*%[?)(=*)(%[?)" do
table.insert(objects, {start_pos = pos,
terminator = br1 == "[" and br2 == "[" and "]"..eq.."]" or "\n"})
end
-- search for all start positions of string literals (with false positives)
for pos, eq in lua_code:gmatch"()%[(=*)%[[%[=]*" do
table.insert(objects, {is_string = true, start_pos = pos,
terminator = "]"..eq.."]"})
end
for pos, quote in lua_code:gmatch"()(['\"])" do
table.insert(objects, {is_string = true, start_pos = pos, quote = quote})
end
table.sort(objects, function(a, b) return a.start_pos < b.start_pos end)
local end_pos = 0
for _, object in ipairs(objects) do
local start_pos, ok, symbol = object.start_pos
if start_pos > end_pos then
if object.terminator == "\n" then
end_pos = lua_code:find("\n", start_pos + 1, true) or #lua_code
-- exclude last spaces and newline
while lua_code:sub(end_pos, end_pos):match"%s" do
end_pos = end_pos - 1
end
elseif object.terminator then
ok, end_pos = lua_code:find(object.terminator, start_pos + 1, true)
assert(ok, "Not a valid Lua code")
else
end_pos = start_pos
repeat
ok, end_pos, symbol = lua_code:find("(\\?.)", end_pos + 1)
assert(ok, "Not a valid Lua code")
until symbol == object.quote
end
local value = lua_code:sub(start_pos, end_pos):gsub("^%-*%s*", "")
if object.terminator ~= "\n" then
value = assert((loadstring or load)("return "..value))()
end
callback(object.is_string and "string" or "comment", value, start_pos, end_pos)
end
end
end
local inv256
local function encode(str)
local seed = math.random(0x7FFFFFFF)
local result = '",'..seed..'))'
if not inv256 then
inv256 = {}
for M = 0, 127 do
local inv = -1
repeat inv = inv + 2
until inv * (2*M + 1) % 256 == 1
inv256[M] = inv
end
end
repeat
seed = seed * 3
until seed > 2^43
local K = 8186484168865098 + seed
result = '(decode("'..str:gsub('.',
function(m)
local L = K % 274877906944 -- 2^38
local H = (K - L) / 274877906944
local M = H % 128
m = m:byte()
local c = (m * inv256[M] - (H - M) / 128) % 256
K = L * 21271 + H + c + m
return ('%02x'):format(c)
end
)..result
return result
end
function hide_strings_in_lua_code(lua_code)
local text = { [[
local function decode(str, seed)
repeat
seed = seed * 3
until seed > 2^43
local K = 8186484168865098 + seed
return (str:gsub('%x%x',
function(c)
local L = K % 274877906944 -- 2^38
local H = (K - L) / 274877906944
local M = H % 128
c = tonumber(c, 16)
local m = (c + (H - M) / 128) * (2*M + 1) % 256
K = L * 21271 + H + c + m
return string.char(m)
end
))
end
]] }
local pos = 1
strings_and_comments(lua_code,
function (object_type, value, start_pos, end_pos)
if object_type == "string" then
table.insert(text, lua_code:sub(pos, start_pos - 1))
table.insert(text, encode(value))
pos = end_pos + 1
end
end)
table.insert(text, lua_code:sub(pos))
return table.concat(text)
end
Usage:
math.randomseed(os.time())
-- This is the program to be converted
local luacode = [===[
print"Hello world!"
print[[string with
a linebreak]]
local str1 = "stackoverflow"
local str2 = [[stackoverflow]]
local str3 = [==[stackoverflow]==]
local str4 = 'stackoverflow'
print(str1)
print(str2)
print(str3)
print(str4)
]===]
-- Conversion
print(hide_strings_in_lua_code(luacode))
Output (converted program)
local function decode(str, seed)
repeat
seed = seed * 3
until seed > 2^43
local K = 8186484168865098 + seed
return (str:gsub('%x%x',
function(c)
local L = K % 274877906944 -- 2^38
local H = (K - L) / 274877906944
local M = H % 128
c = tonumber(c, 16)
local m = (c + (H - M) / 128) * (2*M + 1) % 256
K = L * 21271 + H + c + m
return string.char(m)
end
))
end
print(decode("ef869b23b69b7fbc7f89bbe7",2686976))
print(decode("c2dc20f7061c452db49302f8a1d9317aad1009711e0984",1210253312))
local str1 = (decode("84854df4599affe9c894060431",415105024))
local str2 = (decode("a5d7db792f0b514417827f34e3",1736704000))
local str3 = (decode("6a61bcf9fd6f403ed1b4846e58",1256259584))
local str4 = (decode("cad56d9dea239514aca9c8b8e0",1030488064))
print(str1)
print(str2)
print(str3)
print(str4)
Output of output (output produced by the converted program)
Hello world!
string with
a linebreak
stackoverflow
stackoverflow
stackoverflow
stackoverflow

OCAML - strings and substrings

Could someone help me to write a function that checks if a string is a substring of another string?
(there can be more than only 2 strings)
Thanks
With String module:
let contains s1 s2 =
try
let len = String.length s2 in
for i = 0 to String.length s1 - len do
if String.sub s1 i len = s2 then raise Exit
done;
false
with Exit -> true
With Str module, like #barti_ddu said check this topic:
let contains s1 s2 =
let re = Str.regexp_string s2 in
try
ignore (Str.search_forward re s1 0);
true
with Not_found -> false
With Batteries, you can use String.exists. It also exists in ExtLib: String.exists.
A String-based alternative to cago's answer that might have better performance and lower memory usage:
let is_substring string substring =
let ssl = String.length substring and sl = String.length string in
if ssl = 0 || ssl > sl then false else
let max = sl - ssl and clone = String.create ssl in
let rec check pos =
pos <= max && (
String.blit string pos clone 0 ssl ; clone = substring
|| check (String.index_from string (succ pos) substring.[0])
)
in
try check (String.index string substring.[0])
with Not_found -> false
String str="hello world";
System.out.println(str.contains("world"));//true
System.out.println(str.contains("world1"));//false

better way to get the last character in a string in f#

I want the last character from a string
I've got str.[str.Length - 1], but that's ugly. There must be a better way.
There's no better way to do it - what you have is fine.
If you really plan to do it a lot, you can author an F# extension property on the string type:
let s = "food"
type System.String with
member this.Last =
this.Chars(this.Length-1) // may raise an exception
printfn "%c" s.Last
This could be also handy:
let s = "I am string"
let lastChar = s |> Seq.last
Result:
val lastChar : char = 'g'
(This is old question), someone might find this useful, orig answer from Brian.
type System.String with
member this.Last() =
if this.Length > 1 then
this.Chars(this.Length - 1).ToString()
else
this.[0].ToString()
member this.Last(n:int) =
let absn = Math.Abs(n)
if this.Length > absn then
let nn =
let a = if absn = 0 then 1 else absn
let b = this.Length - a
if b < 0 then 0 else b
this.Chars(nn).ToString()
else
this.[0].ToString()
"ABCD".Last() -> "D"
"ABCD".Last(1) -> "D"
"ABCD".Last(-1) -> "D"
"ABCD".Last(2) -> "C"
You could also treat it as a sequence, but I'm not sure if that's any more or less ugly than the solution you have:
Seq.nth (Seq.length str - 1) str

How do I strip whitespace from a string in OCaml?

To learn the basics of OCaml, I'm solving one of the easy facebook engineering puzzles using it. Essentially, I'd like to do something like the following Python code:
some_str = some_str.strip()
That is, I'd like to strip all of the whitespace from the beginning and the end. I don't see anything obvious to do this in the OCaml Str library. Is there any easy way to do this, or am I going to have to write some code to do it (which I wouldn't mind, but would prefer not to :) ).
Bear in mind that I'm limited to what's in the libraries that come with the OCaml distribution.
I know this question is uber-old, but I was just pondering the same thing and came-up with this (from toplevel):
let strip str =
let str = Str.replace_first (Str.regexp "^ +") "" str in
Str.replace_first (Str.regexp " +$") "" str;;
val strip : string -> string = <fun>
then
strip " Hello, world! ";;
- : string = "Hello, world!"
UPDATE:
As of 4.00.0, standard library includes String.trim
It is really a mistake to limit yourself to the standard library, since the standard ilbrary is missing a lot of things. If, for example, you were to use Core, you could simply do:
open Core.Std
let x = String.strip " foobar "
let () = assert (x = "foobar")
You can of course look at the sources of Core if you want to see the implementation. There is a similar function in ExtLib.
how about
let trim str =
if str = "" then "" else
let search_pos init p next =
let rec search i =
if p i then raise(Failure "empty") else
match str.[i] with
| ' ' | '\n' | '\r' | '\t' -> search (next i)
| _ -> i
in
search init
in
let len = String.length str in
try
let left = search_pos 0 (fun i -> i >= len) (succ)
and right = search_pos (len - 1) (fun i -> i < 0) (pred)
in
String.sub str left (right - left + 1)
with
| Failure "empty" -> ""
(Via Code Codex)
I believe at the point when the other answers were given, version 4.00 was not out yet. Actually, in OCaml 4.00, there is a String.trim function in the string module to trim leading and trailing white spaces.
Alternatively, if you're restricted to an older version of OCaml, you may use this function that is shamelessly copied from the source of 4.00's string module.
let trim s =
let is_space = function
| ' ' | '\012' | '\n' | '\r' | '\t' -> true
| _ -> false in
let len = String.length s in
let i = ref 0 in
while !i < len && is_space (String.get s !i) do
incr i
done;
let j = ref (len - 1) in
while !j >= !i && is_space (String.get s !j) do
decr j
done;
if !i = 0 && !j = len - 1 then
s
else if !j >= !i then
String.sub s !i (!j - !i + 1)
else
""
;;
Something simple like this should work fine:
#require "str";;
let strip_string s =
Str.global_replace (Str.regexp "[\r\n\t ]") "" s
Standard library's
String.trim
does exactly that.

Resources