How to tokenize string with alphabetical and numerical values - string

I have a string like "test123abc45alsdkfj", I want my scanner to behave such that it read "test" first, then 123, then "abc", then 45, then "alsdkfj". Kinda like stringstream in C++, is there a way to do this? Thanks!

I think there is a simple way like this, hope it will help you
package main
import (
"fmt"
"strings"
"text/scanner"
)
func isDigit(c byte) bool {
if c >= 48 && c <= 57 {
return true
}
return false
}
func main() {
const src = `test123abc45alsdkfj`
var s scanner.Scanner
s.Init(strings.NewReader(src))
for tok := s.Scan(); tok != scanner.EOF; tok = s.Scan() {
chars := s.TokenText()
temp := string(chars[0])
for i := range chars {
if i > 0 {
if isDigit(chars[i]) != isDigit(chars[i-1]) {
fmt.Println(temp)
temp = string(chars[i])
} else {
temp += string(chars[i])
}
}
}
}
}
and output will be
test
123
abc
45

Related

Splitting string in 2 parts by removing substring in golang

I'm trying to parse strings that look something like this:
abc***********xyz
into a slice (or 2 variables) of "abc" and "xyz", removing all the asterisks.
The number of * can be variable and so can the letters on each side, so it's not necessarily a fixed length. I'm wondering if go has a nice way of doing this with the strings package?
Use strings.FieldsFunc where * is a field separator.
s := "abc***********xyz"
z := strings.FieldsFunc(s, func(r rune) bool { return r == '*' })
fmt.Println(len(z), z) // prints 2 [abc xyz]
Live Example.
Split on any number of asterisks:
words := regexp.MustCompile(`\*+`).Split(str, -1)
See live demo.
For best performance, write a for loop:
func SplitAsteriks(s string) []string {
var (
in bool // true if inside a token
tokens []string // collect function result here
i int
)
for j, r := range s {
if r == '*' {
if in {
// transition from token to separator
tokens = append(tokens, s[i:j])
in = false
}
} else {
if !in {
// transition from one or more separators to token
i = j
in = true
}
}
}
if in {
tokens = append(tokens, s[i:])
}
return tokens
}
Playground.
if performance is an issue, you can use this func:
func SplitAsteriks(s string) (result []string) {
if len(s) == 0 {
return
}
i1, i2 := 0, 0
for i := 0; i < len(s); i++ {
if s[i] == '*' && i1 == 0 {
i1 = i
}
if s[len(s)-i-1] == '*' && i2 == 0 {
i2 = len(s) - i
}
if i1 > 0 && i2 > 0 {
result = append(result, s[:i1], s[i2:])
return
}
}
result = append(result, s)
return
}
playground
Use this code given that the string is specified to have two parts:
s := "abc***********xyz"
p := s[:strings.IndexByte(s, '*')]
q := s[strings.LastIndexByte(s, '*')+1:]
fmt.Println(p, q) // prints abc xyz

How to remove string pattern and all the string behind that pattern?

For Example :
package main
import "fmt"
func main() {
pattern := "helloworld."
myString := "foo.bar.helloworld.qwerty.zxc.helloworld.asd"
fmt.Println(removeFromPattern(pattern, myString))
}
func removeFromPattern(p, ms string) string {
// I confused here (in efficient way)
}
Wanted output :
qwerty.zxc.helloworld.asd
How do I get that wanted output, also how to remove the first pattern and all the strings behind that pattern from myString ?
1- Using _, after, _ = strings.Cut(ms, p), try this:
func removeFromPattern(p, ms string) (after string) {
_, after, _ = strings.Cut(ms, p) // before and after sep.
return
}
Which uses strings.Index :
// Cut slices s around the first instance of sep,
// returning the text before and after sep.
// The found result reports whether sep appears in s.
// If sep does not appear in s, cut returns s, "", false.
func Cut(s, sep string) (before, after string, found bool) {
if i := Index(s, sep); i >= 0 {
return s[:i], s[i+len(sep):], true
}
return s, "", false
}
2- Using strings.Index, try this:
func removeFromPattern(p, ms string) string {
i := strings.Index(ms, p)
if i == -1 {
return ""
}
return ms[i+len(p):]
}
3- Using strings.Split, try this:
func removeFromPattern(p, ms string) string {
a := strings.Split(ms, p)
if len(a) != 2 {
return ""
}
return a[1]
}
4- Using regexp, try this
func removeFromPattern(p, ms string) string {
a := regexp.MustCompile(p).FindStringSubmatch(ms)
if len(a) < 2 {
return ""
}
return a[1]
}
strings.Split is enough
func main() {
pattern := "helloworld."
myString := "foo.bar.helloworld.qwerty.zxc"
res := removeFromPattern(pattern, myString)
fmt.Println(res)
}
func removeFromPattern(p, ms string) string {
parts := strings.Split(ms, p)
if len(parts) > 1 {
return parts[1]
}
return ""
}
func removeFromPattern(p, ms string) string {
return strings.ReplaceAll(ms, p, "")
}
func main() {
pattern := "helloworld."
myString := "foo.bar.helloworld.qwerty.zxc"
res := removeFromPattern(pattern, myString)
fmt.Println(res)
}

How to split string two between characters

I want to split a string up between two characters( {{ and }} ).
I have an string like {{number1}} + {{number2}} > {{number3}}
and I'm looking for something that returns:
[number1, number2, number3]
You can try it with Regex:
s := "{{number1}} + {{number2}} > {{number3}}"
// Find all substrings in form {<var name>}
re := regexp.MustCompile("{[a-z]*[0-9]*[a-z]*}")
nums := re.FindAllString(s, -1)
// Remove '{' and '}' from all substrings
for i, _ := range nums {
nums[i] = strings.TrimPrefix(nums[i], "{")
nums[i] = strings.TrimSuffix(nums[i], "}")
}
fmt.Println(nums) // output: [number1 number2 number3]
You can experiment with regex here: https://regex101.com/r/kkPWAS/1
Use the regex [A-Za-z]+[0-9] and filter the alpha numeric parts of the string as string array.
package main
import (
"fmt"
"regexp"
)
func main() {
s := `{{number1}} + {{number2}} > {{number3}}`
re := regexp.MustCompile("[A-Za-z]+[0-9]")
p := re.FindAllString(s, -1)
fmt.Println(p) //[number1 number2 number3]
}
the hard way using the template parser ^^
package main
import (
"fmt"
"strings"
"text/template/parse"
)
func main() {
input := "{{number1}} + {{number2}} > {{number3}}"
out := parseit(input)
fmt.Printf("%#v\n", out)
}
func parseit(input string) (out []string) {
input = strings.Replace(input, "{{", "{{.", -1) // Force func calls to become variables.
tree, err := parse.Parse("", input, "{{", "}}")
if err != nil {
panic(err)
}
visit(tree[""].Root, func(n parse.Node) bool {
x, ok := n.(*parse.FieldNode)
if ok {
out = append(out, strings.Join(x.Ident, "."))
}
return true
})
return
}
func visit(n parse.Node, fn func(parse.Node) bool) bool {
if n == nil {
return true
}
if !fn(n) {
return false
}
if l, ok := n.(*parse.ListNode); ok {
for _, nn := range l.Nodes {
if !visit(nn, fn) {
continue
}
}
}
if l, ok := n.(*parse.RangeNode); ok {
if !visit(l.BranchNode.Pipe, fn) {
return false
}
if l.BranchNode.List != nil {
if !visit(l.BranchNode.List, fn) {
return false
}
}
if l.BranchNode.ElseList != nil {
if !visit(l.BranchNode.ElseList, fn) {
return false
}
}
}
if l, ok := n.(*parse.ActionNode); ok {
for _, c := range l.Pipe.Decl {
if !visit(c, fn) {
continue
}
}
for _, c := range l.Pipe.Cmds {
if !visit(c, fn) {
continue
}
}
}
if l, ok := n.(*parse.CommandNode); ok {
for _, a := range l.Args {
if !visit(a, fn) {
continue
}
}
}
if l, ok := n.(*parse.PipeNode); ok {
for _, a := range l.Decl {
if !visit(a, fn) {
continue
}
}
for _, a := range l.Cmds {
if !visit(a, fn) {
continue
}
}
}
return true
}
If it happens you really were manipulating template string, but fails to do so due to function calls and that you do not want to execute this input = strings.Replace(input, "{{", "{{.", -1) // Force func calls to become variables.
You can always force load a template using functions similar to
var reMissingIdent = regexp.MustCompile(`template: :[0-9]+: function "([^"]+)" not defined`)
func ParseTextTemplateAnyway(s string) (*texttemplate.Template, texttemplate.FuncMap, error) {
fn := texttemplate.FuncMap{}
for {
t, err := texttemplate.New("").Funcs(fn).Parse(s)
if err == nil {
return t, fn, err
}
s := err.Error()
res := reMissingIdent.FindAllStringSubmatch(s, -1)
if len(res) > 0 {
fn[res[0][1]] = func(s ...interface{}) string { return "" }
} else {
return t, fn, err
}
}
// return nil, nil
}
You don't need to use libraries. You can create your own function.
package main
const r1 = '{'
const r2 = '}'
func GetStrings(in string) (out []string) {
var tren string
wr := false
f := true
for _, c := range in {
if wr && c != r2 {
tren = tren + string(c)
}
if c == r1 {
f = !f
wr = f
}
if c == r2 {
wr = false
if f {
out = append(out, tren)
tren = ""
}
f = !f
}
}
return
}

How to check if a string is all upper or lower case in Go?

What is an easy way in Golang to check if all characters in a string are upper case or lower case?
Also, how to handle a case where the string has punctuation?
See these examples:
package main
import (
"fmt"
"unicode"
)
func main() {
s := "UPPERCASE"
fmt.Println(s.IsUpper()) // Should print true
s = "lowercase"
fmt.Println(s.IsUpper()) // Should print false
s = "lowercase"
fmt.Println(s.IsLower()) // Should print true
s = "I'M YELLING AT YOU!"
fmt.Println(s.IsUpper()) // Should print true
}
Note: s.IsUpper() and s.IsLower() doesn't really exist, but would be nice to find an equivalent.
You can of course compare the upper and lower cased strings in their entirety, or you can short-circuit the comparisons on the first failure, which would be more efficient when comparing long strings.
func IsUpper(s string) bool {
for _, r := range s {
if !unicode.IsUpper(r) && unicode.IsLetter(r) {
return false
}
}
return true
}
func IsLower(s string) bool {
for _, r := range s {
if !unicode.IsLower(r) && unicode.IsLetter(r) {
return false
}
}
return true
}
One solution is to use strings.ToUpper()/ToLower() and compare with the original string. This works for the punctuation case as well.
Here's the solution:
package main
import (
"fmt"
"strings"
)
func main() {
s := "UPPERCASE"
fmt.Println(strings.ToUpper(s) == s)
s = "lowercase"
fmt.Println(strings.ToUpper(s) == s)
s = "lowercase"
fmt.Println(strings.ToLower(s) == s)
s = "I'M YELLING AT YOU!"
fmt.Println(strings.ToUpper(s) == s)
}
A unicode.{IsUpper, Lower} and B strings.{ToUpper, Lower} both good
For the data composed of single bytes, A will be better than B
If the data byte is unsure then B is better than A: for example 中文a1
package main
import (
"strings"
"testing"
"unicode"
)
func IsUpperU(s string) bool {
for _, r := range s {
if !unicode.IsUpper(r) && unicode.IsLetter(r) {
return false
}
}
return true
}
func IsUpper(s string) bool {
return strings.ToUpper(s) == s
}
func IsLowerU(s string) bool {
for _, r := range s {
if !unicode.IsLower(r) && unicode.IsLetter(r) {
return false
}
}
return true
}
func IsLower(s string) bool {
return strings.ToLower(s) == s
}
func TestIsUpper(t *testing.T) {
for _, d := range []struct {
actual bool
expected bool
}{
{IsUpperU("中文A1"), false}, // be careful!
{IsUpper("中文A1"), true},
{IsUpper("中文a1"), false},
{IsUpperU("中文a1"), false},
} {
if d.actual != d.expected {
t.Fatal()
}
}
}
func TestIsLower(t *testing.T) {
for idx, d := range []struct {
actual bool
expected bool
}{
{IsLowerU("中文a1"), false}, // be careful!
{IsLower("中文a1"), true},
{IsLower("中文A1"), false},
{IsLowerU("中文A1"), false},
} {
if d.actual != d.expected {
t.Fatal(idx)
}
}
}
go playground
No need for unicode (For English letters only):
func IsUpper(s string) bool {
for _, charNumber := range s {
if charNumber > 90 || charNumber < 65 {
return false
}
}
return true
}
func IsLower(s string) bool {
for _, charNumber := range s {
if charNumber > 122 || charNumber < 97 {
return false
}
}
return true
}

function returns empty string even though it shouldn't in golang

I'm having a strange issue.
I have a package named tools in which I have various files with tools for my main package, one of them generates a pseudorandom string that should contain uppercase, lowercase, numerical and certain special characters, to make sure I don't get a string that misses some of the types I did some validations and yet, i seem to miss something because I get an error every now and then
This is my main file:
package main
import (
"../tools"
"fmt"
"strings"
)
const lower = "abcdefghizklmnopqrstuvwxyz"
const upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
const numrical= "0123456789"
const special = "!#$^*"
func main (){
for i :=0; i<10; i++ {
str := tools.GenSpecial(15)
fmt.Println(str, validate(str))
}
}
func haslower (s string) bool {
return strings.ContainsAny(s,lower)
}
func hasupper (s string) bool {
return strings.ContainsAny(s,upper)
}
func hasnumrical (s string) bool {
return strings.ContainsAny(s,numrical)
}
func hasspecial (s string) bool {
return strings.ContainsAny(s,special)
}
func validate (s string) bool {
return haslower(s) && hasupper(s) && hasnumrical(s) && hasspecial(s)
}
and this is the relevant parts from my tools file:
package tools
import (
"math/rand"
"time"
"strings"
)
const alphanum =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
const alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const specialchars =
"abcdefghijklmnopqrstuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!#$^*"
const lower = "abcdefghizklmnopqrstuvwxyz"
const upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
const numrical= "0123456789"
const special = "!#$^*"
func randomize() {
rand.Seed(time.Now().UnixNano())
}
func GenSpecial(n int) string { //function to generate a psuedorandom
alphabetical string with special characters
rstring := make([]byte, n)
for i := range rstring {
randomize()
rstring[i] = specialchars[rand.Intn(len(specialchars))]
}
if validate(string(rstring))&& string(rstring)!=""{
return string(rstring)
} else {
GenSpecial(n)
}
return "abc"
}
func haslower (s string) bool {
return strings.ContainsAny(s,lower)
}
func hasupper (s string) bool {
return strings.ContainsAny(s,upper)
}
func hasnumrical (s string) bool {
return strings.ContainsAny(s,numrical)
}
func hasspecial (s string) bool {
return strings.ContainsAny(s,special)
}
func validate (s string) bool {
return haslower(s) && hasupper(s) && hasnumrical(s) && hasspecial(s)
}
When I run my main file, i get some values that return the "abc" value, and I don't understand how or why.
Any ideas?
You are missing a return statement in your else case. If validate returns false, you call GenSpecial and then it returns "abc".
You want to say:
if validate(string(rstring))&& string(rstring)!=""{
return string(rstring)
} else {
return GenSpecial(n) // return here!
}

Resources