How to remove string pattern and all the string behind that pattern? - string

For Example :
package main
import "fmt"
func main() {
pattern := "helloworld."
myString := "foo.bar.helloworld.qwerty.zxc.helloworld.asd"
fmt.Println(removeFromPattern(pattern, myString))
}
func removeFromPattern(p, ms string) string {
// I confused here (in efficient way)
}
Wanted output :
qwerty.zxc.helloworld.asd
How do I get that wanted output, also how to remove the first pattern and all the strings behind that pattern from myString ?

1- Using _, after, _ = strings.Cut(ms, p), try this:
func removeFromPattern(p, ms string) (after string) {
_, after, _ = strings.Cut(ms, p) // before and after sep.
return
}
Which uses strings.Index :
// Cut slices s around the first instance of sep,
// returning the text before and after sep.
// The found result reports whether sep appears in s.
// If sep does not appear in s, cut returns s, "", false.
func Cut(s, sep string) (before, after string, found bool) {
if i := Index(s, sep); i >= 0 {
return s[:i], s[i+len(sep):], true
}
return s, "", false
}
2- Using strings.Index, try this:
func removeFromPattern(p, ms string) string {
i := strings.Index(ms, p)
if i == -1 {
return ""
}
return ms[i+len(p):]
}
3- Using strings.Split, try this:
func removeFromPattern(p, ms string) string {
a := strings.Split(ms, p)
if len(a) != 2 {
return ""
}
return a[1]
}
4- Using regexp, try this
func removeFromPattern(p, ms string) string {
a := regexp.MustCompile(p).FindStringSubmatch(ms)
if len(a) < 2 {
return ""
}
return a[1]
}

strings.Split is enough
func main() {
pattern := "helloworld."
myString := "foo.bar.helloworld.qwerty.zxc"
res := removeFromPattern(pattern, myString)
fmt.Println(res)
}
func removeFromPattern(p, ms string) string {
parts := strings.Split(ms, p)
if len(parts) > 1 {
return parts[1]
}
return ""
}

func removeFromPattern(p, ms string) string {
return strings.ReplaceAll(ms, p, "")
}
func main() {
pattern := "helloworld."
myString := "foo.bar.helloworld.qwerty.zxc"
res := removeFromPattern(pattern, myString)
fmt.Println(res)
}

Related

Splitting string in 2 parts by removing substring in golang

I'm trying to parse strings that look something like this:
abc***********xyz
into a slice (or 2 variables) of "abc" and "xyz", removing all the asterisks.
The number of * can be variable and so can the letters on each side, so it's not necessarily a fixed length. I'm wondering if go has a nice way of doing this with the strings package?
Use strings.FieldsFunc where * is a field separator.
s := "abc***********xyz"
z := strings.FieldsFunc(s, func(r rune) bool { return r == '*' })
fmt.Println(len(z), z) // prints 2 [abc xyz]
Live Example.
Split on any number of asterisks:
words := regexp.MustCompile(`\*+`).Split(str, -1)
See live demo.
For best performance, write a for loop:
func SplitAsteriks(s string) []string {
var (
in bool // true if inside a token
tokens []string // collect function result here
i int
)
for j, r := range s {
if r == '*' {
if in {
// transition from token to separator
tokens = append(tokens, s[i:j])
in = false
}
} else {
if !in {
// transition from one or more separators to token
i = j
in = true
}
}
}
if in {
tokens = append(tokens, s[i:])
}
return tokens
}
Playground.
if performance is an issue, you can use this func:
func SplitAsteriks(s string) (result []string) {
if len(s) == 0 {
return
}
i1, i2 := 0, 0
for i := 0; i < len(s); i++ {
if s[i] == '*' && i1 == 0 {
i1 = i
}
if s[len(s)-i-1] == '*' && i2 == 0 {
i2 = len(s) - i
}
if i1 > 0 && i2 > 0 {
result = append(result, s[:i1], s[i2:])
return
}
}
result = append(result, s)
return
}
playground
Use this code given that the string is specified to have two parts:
s := "abc***********xyz"
p := s[:strings.IndexByte(s, '*')]
q := s[strings.LastIndexByte(s, '*')+1:]
fmt.Println(p, q) // prints abc xyz

How to split string two between characters

I want to split a string up between two characters( {{ and }} ).
I have an string like {{number1}} + {{number2}} > {{number3}}
and I'm looking for something that returns:
[number1, number2, number3]
You can try it with Regex:
s := "{{number1}} + {{number2}} > {{number3}}"
// Find all substrings in form {<var name>}
re := regexp.MustCompile("{[a-z]*[0-9]*[a-z]*}")
nums := re.FindAllString(s, -1)
// Remove '{' and '}' from all substrings
for i, _ := range nums {
nums[i] = strings.TrimPrefix(nums[i], "{")
nums[i] = strings.TrimSuffix(nums[i], "}")
}
fmt.Println(nums) // output: [number1 number2 number3]
You can experiment with regex here: https://regex101.com/r/kkPWAS/1
Use the regex [A-Za-z]+[0-9] and filter the alpha numeric parts of the string as string array.
package main
import (
"fmt"
"regexp"
)
func main() {
s := `{{number1}} + {{number2}} > {{number3}}`
re := regexp.MustCompile("[A-Za-z]+[0-9]")
p := re.FindAllString(s, -1)
fmt.Println(p) //[number1 number2 number3]
}
the hard way using the template parser ^^
package main
import (
"fmt"
"strings"
"text/template/parse"
)
func main() {
input := "{{number1}} + {{number2}} > {{number3}}"
out := parseit(input)
fmt.Printf("%#v\n", out)
}
func parseit(input string) (out []string) {
input = strings.Replace(input, "{{", "{{.", -1) // Force func calls to become variables.
tree, err := parse.Parse("", input, "{{", "}}")
if err != nil {
panic(err)
}
visit(tree[""].Root, func(n parse.Node) bool {
x, ok := n.(*parse.FieldNode)
if ok {
out = append(out, strings.Join(x.Ident, "."))
}
return true
})
return
}
func visit(n parse.Node, fn func(parse.Node) bool) bool {
if n == nil {
return true
}
if !fn(n) {
return false
}
if l, ok := n.(*parse.ListNode); ok {
for _, nn := range l.Nodes {
if !visit(nn, fn) {
continue
}
}
}
if l, ok := n.(*parse.RangeNode); ok {
if !visit(l.BranchNode.Pipe, fn) {
return false
}
if l.BranchNode.List != nil {
if !visit(l.BranchNode.List, fn) {
return false
}
}
if l.BranchNode.ElseList != nil {
if !visit(l.BranchNode.ElseList, fn) {
return false
}
}
}
if l, ok := n.(*parse.ActionNode); ok {
for _, c := range l.Pipe.Decl {
if !visit(c, fn) {
continue
}
}
for _, c := range l.Pipe.Cmds {
if !visit(c, fn) {
continue
}
}
}
if l, ok := n.(*parse.CommandNode); ok {
for _, a := range l.Args {
if !visit(a, fn) {
continue
}
}
}
if l, ok := n.(*parse.PipeNode); ok {
for _, a := range l.Decl {
if !visit(a, fn) {
continue
}
}
for _, a := range l.Cmds {
if !visit(a, fn) {
continue
}
}
}
return true
}
If it happens you really were manipulating template string, but fails to do so due to function calls and that you do not want to execute this input = strings.Replace(input, "{{", "{{.", -1) // Force func calls to become variables.
You can always force load a template using functions similar to
var reMissingIdent = regexp.MustCompile(`template: :[0-9]+: function "([^"]+)" not defined`)
func ParseTextTemplateAnyway(s string) (*texttemplate.Template, texttemplate.FuncMap, error) {
fn := texttemplate.FuncMap{}
for {
t, err := texttemplate.New("").Funcs(fn).Parse(s)
if err == nil {
return t, fn, err
}
s := err.Error()
res := reMissingIdent.FindAllStringSubmatch(s, -1)
if len(res) > 0 {
fn[res[0][1]] = func(s ...interface{}) string { return "" }
} else {
return t, fn, err
}
}
// return nil, nil
}
You don't need to use libraries. You can create your own function.
package main
const r1 = '{'
const r2 = '}'
func GetStrings(in string) (out []string) {
var tren string
wr := false
f := true
for _, c := range in {
if wr && c != r2 {
tren = tren + string(c)
}
if c == r1 {
f = !f
wr = f
}
if c == r2 {
wr = false
if f {
out = append(out, tren)
tren = ""
}
f = !f
}
}
return
}

How to check if a string is all upper or lower case in Go?

What is an easy way in Golang to check if all characters in a string are upper case or lower case?
Also, how to handle a case where the string has punctuation?
See these examples:
package main
import (
"fmt"
"unicode"
)
func main() {
s := "UPPERCASE"
fmt.Println(s.IsUpper()) // Should print true
s = "lowercase"
fmt.Println(s.IsUpper()) // Should print false
s = "lowercase"
fmt.Println(s.IsLower()) // Should print true
s = "I'M YELLING AT YOU!"
fmt.Println(s.IsUpper()) // Should print true
}
Note: s.IsUpper() and s.IsLower() doesn't really exist, but would be nice to find an equivalent.
You can of course compare the upper and lower cased strings in their entirety, or you can short-circuit the comparisons on the first failure, which would be more efficient when comparing long strings.
func IsUpper(s string) bool {
for _, r := range s {
if !unicode.IsUpper(r) && unicode.IsLetter(r) {
return false
}
}
return true
}
func IsLower(s string) bool {
for _, r := range s {
if !unicode.IsLower(r) && unicode.IsLetter(r) {
return false
}
}
return true
}
One solution is to use strings.ToUpper()/ToLower() and compare with the original string. This works for the punctuation case as well.
Here's the solution:
package main
import (
"fmt"
"strings"
)
func main() {
s := "UPPERCASE"
fmt.Println(strings.ToUpper(s) == s)
s = "lowercase"
fmt.Println(strings.ToUpper(s) == s)
s = "lowercase"
fmt.Println(strings.ToLower(s) == s)
s = "I'M YELLING AT YOU!"
fmt.Println(strings.ToUpper(s) == s)
}
A unicode.{IsUpper, Lower} and B strings.{ToUpper, Lower} both good
For the data composed of single bytes, A will be better than B
If the data byte is unsure then B is better than A: for example 中文a1
package main
import (
"strings"
"testing"
"unicode"
)
func IsUpperU(s string) bool {
for _, r := range s {
if !unicode.IsUpper(r) && unicode.IsLetter(r) {
return false
}
}
return true
}
func IsUpper(s string) bool {
return strings.ToUpper(s) == s
}
func IsLowerU(s string) bool {
for _, r := range s {
if !unicode.IsLower(r) && unicode.IsLetter(r) {
return false
}
}
return true
}
func IsLower(s string) bool {
return strings.ToLower(s) == s
}
func TestIsUpper(t *testing.T) {
for _, d := range []struct {
actual bool
expected bool
}{
{IsUpperU("中文A1"), false}, // be careful!
{IsUpper("中文A1"), true},
{IsUpper("中文a1"), false},
{IsUpperU("中文a1"), false},
} {
if d.actual != d.expected {
t.Fatal()
}
}
}
func TestIsLower(t *testing.T) {
for idx, d := range []struct {
actual bool
expected bool
}{
{IsLowerU("中文a1"), false}, // be careful!
{IsLower("中文a1"), true},
{IsLower("中文A1"), false},
{IsLowerU("中文A1"), false},
} {
if d.actual != d.expected {
t.Fatal(idx)
}
}
}
go playground
No need for unicode (For English letters only):
func IsUpper(s string) bool {
for _, charNumber := range s {
if charNumber > 90 || charNumber < 65 {
return false
}
}
return true
}
func IsLower(s string) bool {
for _, charNumber := range s {
if charNumber > 122 || charNumber < 97 {
return false
}
}
return true
}

GoLang Get String at Line N in Byte Slice

In a personal project I am implementing a function that returns a random line from a long file. For it to work I have to create a function that returns a string at line N, a second function that creates a random number between 0 and lines in file. While I was implementing those I figured it may be more efficient to store the data in byte slices by default, rather than storing them in separate files, which have to be read at run time.
Question: How would I go about implementing a function that returns a string at a random line of the []byte representation of my file.
My function for getting a string from a file:
func atLine(n int) (s string) {
f, err := os.Open("./path/to/file")
if err != nil {
panic("Could not read file.")
}
defer f.Close()
r := bufio.NewReader(f)
for i := 1; ; i++ {
line, _, err := r.ReadLine()
if err != nil {
break
}
if i == n {
s = string(line[:])
break
}
}
return s
}
Additional info:
Lines are not longer than 50 characters at most
Lines have no special characters (although a solution handling those is welcome)
Number of lines in the files is known and so the same can be applied for []byte
Dealing with just the question part (and not the sanity of this) - you have a []byte and want to get a specific string line from it - the bytes.Reader has no ReadLine method which you will have already noticed.
You can pass a bytes reader to bufio.NewReader, and gain the ReadLine functionality you are trying to access.
bytesReader := bytes.NewReader([]byte("test1\ntest2\ntest3\n"))
bufReader := bufio.NewReader(bytesReader)
value1, _, _ := bufReader.ReadLine()
value2, _, _ := bufReader.ReadLine()
value3, _, _ := bufReader.ReadLine()
fmt.Println(string(value1))
fmt.Println(string(value2))
fmt.Println(string(value3))
Obviously it is not sensible to ignore the errors, but for the purpose of brevity I do it here.
https://play.golang.org/p/fRQUfmZQke
Results:
test1
test2
test3
From here, it is straight forward to fit back into your existing code.
Here is an example of fast (in the order of nanoseconds) random access to lines of text as byte data. The data is buffered and indexed in memory.
lines.go:
package main
import (
"bytes"
"fmt"
"io/ioutil"
"os"
)
type Lines struct {
data []byte
index []int // line start, end pairs for data[start:end]
}
func NewLines(data []byte, nLines int) *Lines {
bom := []byte{0xEF, 0xBB, 0xBF}
if bytes.HasPrefix(data, bom) {
data = data[len(bom):]
}
lines := Lines{data: data, index: make([]int, 0, 2*nLines)}
for i := 0; ; {
j := bytes.IndexByte(lines.data[i:], '\n')
if j < 0 {
if len(lines.data[i:]) > 0 {
lines.index = append(lines.index, i)
lines.index = append(lines.index, len(lines.data))
}
break
}
lines.index = append(lines.index, i)
j += i
i = j + 1
if j > 0 && lines.data[j-1] == '\r' {
j--
}
lines.index = append(lines.index, j)
}
if len(lines.index) != cap(lines.index) {
lines.index = append([]int(nil), lines.index...)
}
return &lines
}
func (l *Lines) N() int {
return len(l.index) / 2
}
func (l *Lines) At(n int) (string, error) {
if 1 > n || n > l.N() {
err := fmt.Errorf(
"data has %d lines: at %d out of range",
l.N(), n,
)
return "", err
}
m := 2 * (n - 1)
return string(l.data[l.index[m]:l.index[m+1]]), nil
}
var (
// The Complete Works of William Shakespeare
// http://www.gutenberg.org/cache/epub/100/pg100.txt
fName = `/home/peter/shakespeare.pg100.txt`
nLines = 124787
)
func main() {
data, err := ioutil.ReadFile(fName)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
lines := NewLines(data, nLines)
for _, at := range []int{1 - 1, 1, 2, 12, 42, 124754, lines.N(), lines.N() + 1} {
line, err := lines.At(at)
if err != nil {
fmt.Fprintf(os.Stderr, "%d\t%v\n", at, err)
continue
}
fmt.Printf("%d\t%q\n", at, line)
}
}
Output:
0 data has 124787 lines: at 0 out of range
1 "The Project Gutenberg EBook of The Complete Works of William Shakespeare, by"
2 "William Shakespeare"
12 "Title: The Complete Works of William Shakespeare"
42 "SHAKESPEARE IS COPYRIGHT 1990-1993 BY WORLD LIBRARY, INC., AND IS"
124754 "http://www.gutenberg.org"
124787 "*** END: FULL LICENSE ***"
124788 data has 124787 lines: at 124788 out of range
lines_test.go:
package main
import (
"io/ioutil"
"math/rand"
"testing"
)
func benchData(b *testing.B) []byte {
data, err := ioutil.ReadFile(fName)
if err != nil {
b.Fatal(err)
}
return data
}
func BenchmarkNewLines(b *testing.B) {
data := benchData(b)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
lines := NewLines(data, nLines)
_ = lines
}
}
func BenchmarkLineAt(b *testing.B) {
data := benchData(b)
lines := NewLines(data, nLines)
ats := make([]int, 4*1024)
ats[0], ats[1] = 1, lines.N()
rand.Seed(42)
for i := range ats[2:] {
ats[2+i] = 1 + rand.Intn(lines.N())
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
at := ats[i%len(ats)]
line, err := lines.At(at)
if err != nil {
b.Error(err)
}
_ = line
}
}
Output
$ go test -bench=. lines.go lines_test.go
BenchmarkNewLines-8 1000 1898347 ns/op 1998898 B/op 2 allocs/op
BenchmarkLineAt-8 50000000 45.1 ns/op 49 B/op 0 allocs/op

How to split string in GO by array of runes?

If there is any way to split string into array of strings, when you have as a separator an array of runes? There is an example what I want:
seperators = {' ',')','('}
SomeFunction("my string(qq bb)zz",seperators) => {"my","string","qq","bb","zz"}
For example,
package main
import (
"fmt"
"strings"
)
func split(s string, separators []rune) []string {
f := func(r rune) bool {
for _, s := range separators {
if r == s {
return true
}
}
return false
}
return strings.FieldsFunc(s, f)
}
func main() {
separators := []rune{' ', ')', '('}
s := "my string(qq bb)zz"
ss := split(s, separators)
fmt.Printf("%q\n", s)
fmt.Printf("%q\n", ss)
}
Output:
"my string(qq bb)zz"
["my" "string" "qq" "bb" "zz"]
with regexp:
package main
import (
"fmt"
"regexp"
)
var re = regexp.MustCompile("[() ]")
func main() {
text := "my string(qq bb)zz"
splinedText := re.Split(text, -1)
fmt.Printf("%q\n", text)
fmt.Printf("%q\n", splinedText)
}
output:
"my string(qq bb)zz"
["my" "string" "qq" "bb" "zz"]
I believe that a simpler approach would be to use the function FieldsFunc. Here is an example of it's implementation:
package main
import (
"fmt"
"strings"
"unicode"
)
func main() {
f := func(c rune) bool {
return !unicode.IsLetter(c) && !unicode.IsNumber(c)
}
fmt.Printf("Fields are: %q", strings.FieldsFunc(" foo1;bar2,baz3...", f))
}
Output :
Fields are: ["foo1" "bar2" "baz3"]

Resources