How to split string in GO by array of runes? - string

If there is any way to split string into array of strings, when you have as a separator an array of runes? There is an example what I want:
seperators = {' ',')','('}
SomeFunction("my string(qq bb)zz",seperators) => {"my","string","qq","bb","zz"}

For example,
package main
import (
"fmt"
"strings"
)
func split(s string, separators []rune) []string {
f := func(r rune) bool {
for _, s := range separators {
if r == s {
return true
}
}
return false
}
return strings.FieldsFunc(s, f)
}
func main() {
separators := []rune{' ', ')', '('}
s := "my string(qq bb)zz"
ss := split(s, separators)
fmt.Printf("%q\n", s)
fmt.Printf("%q\n", ss)
}
Output:
"my string(qq bb)zz"
["my" "string" "qq" "bb" "zz"]

with regexp:
package main
import (
"fmt"
"regexp"
)
var re = regexp.MustCompile("[() ]")
func main() {
text := "my string(qq bb)zz"
splinedText := re.Split(text, -1)
fmt.Printf("%q\n", text)
fmt.Printf("%q\n", splinedText)
}
output:
"my string(qq bb)zz"
["my" "string" "qq" "bb" "zz"]

I believe that a simpler approach would be to use the function FieldsFunc. Here is an example of it's implementation:
package main
import (
"fmt"
"strings"
"unicode"
)
func main() {
f := func(c rune) bool {
return !unicode.IsLetter(c) && !unicode.IsNumber(c)
}
fmt.Printf("Fields are: %q", strings.FieldsFunc(" foo1;bar2,baz3...", f))
}
Output :
Fields are: ["foo1" "bar2" "baz3"]

Related

How to remove string pattern and all the string behind that pattern?

For Example :
package main
import "fmt"
func main() {
pattern := "helloworld."
myString := "foo.bar.helloworld.qwerty.zxc.helloworld.asd"
fmt.Println(removeFromPattern(pattern, myString))
}
func removeFromPattern(p, ms string) string {
// I confused here (in efficient way)
}
Wanted output :
qwerty.zxc.helloworld.asd
How do I get that wanted output, also how to remove the first pattern and all the strings behind that pattern from myString ?
1- Using _, after, _ = strings.Cut(ms, p), try this:
func removeFromPattern(p, ms string) (after string) {
_, after, _ = strings.Cut(ms, p) // before and after sep.
return
}
Which uses strings.Index :
// Cut slices s around the first instance of sep,
// returning the text before and after sep.
// The found result reports whether sep appears in s.
// If sep does not appear in s, cut returns s, "", false.
func Cut(s, sep string) (before, after string, found bool) {
if i := Index(s, sep); i >= 0 {
return s[:i], s[i+len(sep):], true
}
return s, "", false
}
2- Using strings.Index, try this:
func removeFromPattern(p, ms string) string {
i := strings.Index(ms, p)
if i == -1 {
return ""
}
return ms[i+len(p):]
}
3- Using strings.Split, try this:
func removeFromPattern(p, ms string) string {
a := strings.Split(ms, p)
if len(a) != 2 {
return ""
}
return a[1]
}
4- Using regexp, try this
func removeFromPattern(p, ms string) string {
a := regexp.MustCompile(p).FindStringSubmatch(ms)
if len(a) < 2 {
return ""
}
return a[1]
}
strings.Split is enough
func main() {
pattern := "helloworld."
myString := "foo.bar.helloworld.qwerty.zxc"
res := removeFromPattern(pattern, myString)
fmt.Println(res)
}
func removeFromPattern(p, ms string) string {
parts := strings.Split(ms, p)
if len(parts) > 1 {
return parts[1]
}
return ""
}
func removeFromPattern(p, ms string) string {
return strings.ReplaceAll(ms, p, "")
}
func main() {
pattern := "helloworld."
myString := "foo.bar.helloworld.qwerty.zxc"
res := removeFromPattern(pattern, myString)
fmt.Println(res)
}

How to split string two between characters

I want to split a string up between two characters( {{ and }} ).
I have an string like {{number1}} + {{number2}} > {{number3}}
and I'm looking for something that returns:
[number1, number2, number3]
You can try it with Regex:
s := "{{number1}} + {{number2}} > {{number3}}"
// Find all substrings in form {<var name>}
re := regexp.MustCompile("{[a-z]*[0-9]*[a-z]*}")
nums := re.FindAllString(s, -1)
// Remove '{' and '}' from all substrings
for i, _ := range nums {
nums[i] = strings.TrimPrefix(nums[i], "{")
nums[i] = strings.TrimSuffix(nums[i], "}")
}
fmt.Println(nums) // output: [number1 number2 number3]
You can experiment with regex here: https://regex101.com/r/kkPWAS/1
Use the regex [A-Za-z]+[0-9] and filter the alpha numeric parts of the string as string array.
package main
import (
"fmt"
"regexp"
)
func main() {
s := `{{number1}} + {{number2}} > {{number3}}`
re := regexp.MustCompile("[A-Za-z]+[0-9]")
p := re.FindAllString(s, -1)
fmt.Println(p) //[number1 number2 number3]
}
the hard way using the template parser ^^
package main
import (
"fmt"
"strings"
"text/template/parse"
)
func main() {
input := "{{number1}} + {{number2}} > {{number3}}"
out := parseit(input)
fmt.Printf("%#v\n", out)
}
func parseit(input string) (out []string) {
input = strings.Replace(input, "{{", "{{.", -1) // Force func calls to become variables.
tree, err := parse.Parse("", input, "{{", "}}")
if err != nil {
panic(err)
}
visit(tree[""].Root, func(n parse.Node) bool {
x, ok := n.(*parse.FieldNode)
if ok {
out = append(out, strings.Join(x.Ident, "."))
}
return true
})
return
}
func visit(n parse.Node, fn func(parse.Node) bool) bool {
if n == nil {
return true
}
if !fn(n) {
return false
}
if l, ok := n.(*parse.ListNode); ok {
for _, nn := range l.Nodes {
if !visit(nn, fn) {
continue
}
}
}
if l, ok := n.(*parse.RangeNode); ok {
if !visit(l.BranchNode.Pipe, fn) {
return false
}
if l.BranchNode.List != nil {
if !visit(l.BranchNode.List, fn) {
return false
}
}
if l.BranchNode.ElseList != nil {
if !visit(l.BranchNode.ElseList, fn) {
return false
}
}
}
if l, ok := n.(*parse.ActionNode); ok {
for _, c := range l.Pipe.Decl {
if !visit(c, fn) {
continue
}
}
for _, c := range l.Pipe.Cmds {
if !visit(c, fn) {
continue
}
}
}
if l, ok := n.(*parse.CommandNode); ok {
for _, a := range l.Args {
if !visit(a, fn) {
continue
}
}
}
if l, ok := n.(*parse.PipeNode); ok {
for _, a := range l.Decl {
if !visit(a, fn) {
continue
}
}
for _, a := range l.Cmds {
if !visit(a, fn) {
continue
}
}
}
return true
}
If it happens you really were manipulating template string, but fails to do so due to function calls and that you do not want to execute this input = strings.Replace(input, "{{", "{{.", -1) // Force func calls to become variables.
You can always force load a template using functions similar to
var reMissingIdent = regexp.MustCompile(`template: :[0-9]+: function "([^"]+)" not defined`)
func ParseTextTemplateAnyway(s string) (*texttemplate.Template, texttemplate.FuncMap, error) {
fn := texttemplate.FuncMap{}
for {
t, err := texttemplate.New("").Funcs(fn).Parse(s)
if err == nil {
return t, fn, err
}
s := err.Error()
res := reMissingIdent.FindAllStringSubmatch(s, -1)
if len(res) > 0 {
fn[res[0][1]] = func(s ...interface{}) string { return "" }
} else {
return t, fn, err
}
}
// return nil, nil
}
You don't need to use libraries. You can create your own function.
package main
const r1 = '{'
const r2 = '}'
func GetStrings(in string) (out []string) {
var tren string
wr := false
f := true
for _, c := range in {
if wr && c != r2 {
tren = tren + string(c)
}
if c == r1 {
f = !f
wr = f
}
if c == r2 {
wr = false
if f {
out = append(out, tren)
tren = ""
}
f = !f
}
}
return
}

unexpected EOF with fmt.Scanner

If I want to scan through a string, I can do this:
package main
import (
"fmt"
"strings"
)
func main() {
r := strings.NewReader("west north east")
for {
var s string
_, e := fmt.Fscan(r, &s)
fmt.Printf("%q %v\n", s, e)
if e != nil { break }
}
}
Result:
"west" <nil>
"north" <nil>
"east" <nil>
"" EOF
I recently discovered fmt.Scanner [1], so I thought I would try to implement
it. I came up with this:
package main
import (
"fmt"
"strings"
)
type comma struct { tok string }
func (c *comma) Scan(state fmt.ScanState, verb rune) error {
tok, err := state.Token(false, func(r rune) bool {
return r != ','
})
if err != nil {
return err
}
if _, _, err := state.ReadRune(); err != nil {
if len(tok) == 0 {
return err
}
}
c.tok = string(tok)
return nil
}
func main() {
r := strings.NewReader("west,north,east")
for {
var c comma
_, e := fmt.Fscan(r, &c)
fmt.Printf("%q %v\n", c.tok, e)
if e != nil { break }
}
}
Result:
"west" <nil>
"north" <nil>
"east" <nil>
"" unexpected EOF
So the result is pretty close, but what bothers me is the unexpected EOF. Is
it possible to just get a regular EOF with a custom fmt.Scanner? Am I doing
something wrong here, or is this a bug?
https://golang.org/pkg/fmt#Scanner
Thanks to Ian Lance Taylor on the golang-nuts list, he suggested to panic
the error instead of return. In the Go code, Fscan calls a function
doScan, which in turn calls a function errorHandler [1]. This last function
uses recover to turn any panic into regular error. This program gives
idential output to my original example:
package main
import (
"fmt"
"strings"
)
type comma struct { tok string }
func (c *comma) Scan(state fmt.ScanState, verb rune) error {
tok, err := state.Token(false, func(r rune) bool {
return r != ','
})
if err != nil { return err }
if _, _, err := state.ReadRune(); err != nil {
if len(tok) == 0 {
panic(err)
}
}
c.tok = string(tok)
return nil
}
func main() {
r := strings.NewReader("west,north,east")
for {
var c comma
_, err := fmt.Fscan(r, &c)
fmt.Printf("%q %v\n", c.tok, err)
if err != nil { break }
}
}
https://github.com/golang/go/blob/go1.16.4/src/fmt/scan.go#L1056-L1067

How to tokenize string with alphabetical and numerical values

I have a string like "test123abc45alsdkfj", I want my scanner to behave such that it read "test" first, then 123, then "abc", then 45, then "alsdkfj". Kinda like stringstream in C++, is there a way to do this? Thanks!
I think there is a simple way like this, hope it will help you
package main
import (
"fmt"
"strings"
"text/scanner"
)
func isDigit(c byte) bool {
if c >= 48 && c <= 57 {
return true
}
return false
}
func main() {
const src = `test123abc45alsdkfj`
var s scanner.Scanner
s.Init(strings.NewReader(src))
for tok := s.Scan(); tok != scanner.EOF; tok = s.Scan() {
chars := s.TokenText()
temp := string(chars[0])
for i := range chars {
if i > 0 {
if isDigit(chars[i]) != isDigit(chars[i-1]) {
fmt.Println(temp)
temp = string(chars[i])
} else {
temp += string(chars[i])
}
}
}
}
}
and output will be
test
123
abc
45

How to check if a string is all upper or lower case in Go?

What is an easy way in Golang to check if all characters in a string are upper case or lower case?
Also, how to handle a case where the string has punctuation?
See these examples:
package main
import (
"fmt"
"unicode"
)
func main() {
s := "UPPERCASE"
fmt.Println(s.IsUpper()) // Should print true
s = "lowercase"
fmt.Println(s.IsUpper()) // Should print false
s = "lowercase"
fmt.Println(s.IsLower()) // Should print true
s = "I'M YELLING AT YOU!"
fmt.Println(s.IsUpper()) // Should print true
}
Note: s.IsUpper() and s.IsLower() doesn't really exist, but would be nice to find an equivalent.
You can of course compare the upper and lower cased strings in their entirety, or you can short-circuit the comparisons on the first failure, which would be more efficient when comparing long strings.
func IsUpper(s string) bool {
for _, r := range s {
if !unicode.IsUpper(r) && unicode.IsLetter(r) {
return false
}
}
return true
}
func IsLower(s string) bool {
for _, r := range s {
if !unicode.IsLower(r) && unicode.IsLetter(r) {
return false
}
}
return true
}
One solution is to use strings.ToUpper()/ToLower() and compare with the original string. This works for the punctuation case as well.
Here's the solution:
package main
import (
"fmt"
"strings"
)
func main() {
s := "UPPERCASE"
fmt.Println(strings.ToUpper(s) == s)
s = "lowercase"
fmt.Println(strings.ToUpper(s) == s)
s = "lowercase"
fmt.Println(strings.ToLower(s) == s)
s = "I'M YELLING AT YOU!"
fmt.Println(strings.ToUpper(s) == s)
}
A unicode.{IsUpper, Lower} and B strings.{ToUpper, Lower} both good
For the data composed of single bytes, A will be better than B
If the data byte is unsure then B is better than A: for example 中文a1
package main
import (
"strings"
"testing"
"unicode"
)
func IsUpperU(s string) bool {
for _, r := range s {
if !unicode.IsUpper(r) && unicode.IsLetter(r) {
return false
}
}
return true
}
func IsUpper(s string) bool {
return strings.ToUpper(s) == s
}
func IsLowerU(s string) bool {
for _, r := range s {
if !unicode.IsLower(r) && unicode.IsLetter(r) {
return false
}
}
return true
}
func IsLower(s string) bool {
return strings.ToLower(s) == s
}
func TestIsUpper(t *testing.T) {
for _, d := range []struct {
actual bool
expected bool
}{
{IsUpperU("中文A1"), false}, // be careful!
{IsUpper("中文A1"), true},
{IsUpper("中文a1"), false},
{IsUpperU("中文a1"), false},
} {
if d.actual != d.expected {
t.Fatal()
}
}
}
func TestIsLower(t *testing.T) {
for idx, d := range []struct {
actual bool
expected bool
}{
{IsLowerU("中文a1"), false}, // be careful!
{IsLower("中文a1"), true},
{IsLower("中文A1"), false},
{IsLowerU("中文A1"), false},
} {
if d.actual != d.expected {
t.Fatal(idx)
}
}
}
go playground
No need for unicode (For English letters only):
func IsUpper(s string) bool {
for _, charNumber := range s {
if charNumber > 90 || charNumber < 65 {
return false
}
}
return true
}
func IsLower(s string) bool {
for _, charNumber := range s {
if charNumber > 122 || charNumber < 97 {
return false
}
}
return true
}

Resources