Related
I'm trying to parse strings that look something like this:
abc***********xyz
into a slice (or 2 variables) of "abc" and "xyz", removing all the asterisks.
The number of * can be variable and so can the letters on each side, so it's not necessarily a fixed length. I'm wondering if go has a nice way of doing this with the strings package?
Use strings.FieldsFunc where * is a field separator.
s := "abc***********xyz"
z := strings.FieldsFunc(s, func(r rune) bool { return r == '*' })
fmt.Println(len(z), z) // prints 2 [abc xyz]
Live Example.
Split on any number of asterisks:
words := regexp.MustCompile(`\*+`).Split(str, -1)
See live demo.
For best performance, write a for loop:
func SplitAsteriks(s string) []string {
var (
in bool // true if inside a token
tokens []string // collect function result here
i int
)
for j, r := range s {
if r == '*' {
if in {
// transition from token to separator
tokens = append(tokens, s[i:j])
in = false
}
} else {
if !in {
// transition from one or more separators to token
i = j
in = true
}
}
}
if in {
tokens = append(tokens, s[i:])
}
return tokens
}
Playground.
if performance is an issue, you can use this func:
func SplitAsteriks(s string) (result []string) {
if len(s) == 0 {
return
}
i1, i2 := 0, 0
for i := 0; i < len(s); i++ {
if s[i] == '*' && i1 == 0 {
i1 = i
}
if s[len(s)-i-1] == '*' && i2 == 0 {
i2 = len(s) - i
}
if i1 > 0 && i2 > 0 {
result = append(result, s[:i1], s[i2:])
return
}
}
result = append(result, s)
return
}
playground
Use this code given that the string is specified to have two parts:
s := "abc***********xyz"
p := s[:strings.IndexByte(s, '*')]
q := s[strings.LastIndexByte(s, '*')+1:]
fmt.Println(p, q) // prints abc xyz
I want to split a string up between two characters( {{ and }} ).
I have an string like {{number1}} + {{number2}} > {{number3}}
and I'm looking for something that returns:
[number1, number2, number3]
You can try it with Regex:
s := "{{number1}} + {{number2}} > {{number3}}"
// Find all substrings in form {<var name>}
re := regexp.MustCompile("{[a-z]*[0-9]*[a-z]*}")
nums := re.FindAllString(s, -1)
// Remove '{' and '}' from all substrings
for i, _ := range nums {
nums[i] = strings.TrimPrefix(nums[i], "{")
nums[i] = strings.TrimSuffix(nums[i], "}")
}
fmt.Println(nums) // output: [number1 number2 number3]
You can experiment with regex here: https://regex101.com/r/kkPWAS/1
Use the regex [A-Za-z]+[0-9] and filter the alpha numeric parts of the string as string array.
package main
import (
"fmt"
"regexp"
)
func main() {
s := `{{number1}} + {{number2}} > {{number3}}`
re := regexp.MustCompile("[A-Za-z]+[0-9]")
p := re.FindAllString(s, -1)
fmt.Println(p) //[number1 number2 number3]
}
the hard way using the template parser ^^
package main
import (
"fmt"
"strings"
"text/template/parse"
)
func main() {
input := "{{number1}} + {{number2}} > {{number3}}"
out := parseit(input)
fmt.Printf("%#v\n", out)
}
func parseit(input string) (out []string) {
input = strings.Replace(input, "{{", "{{.", -1) // Force func calls to become variables.
tree, err := parse.Parse("", input, "{{", "}}")
if err != nil {
panic(err)
}
visit(tree[""].Root, func(n parse.Node) bool {
x, ok := n.(*parse.FieldNode)
if ok {
out = append(out, strings.Join(x.Ident, "."))
}
return true
})
return
}
func visit(n parse.Node, fn func(parse.Node) bool) bool {
if n == nil {
return true
}
if !fn(n) {
return false
}
if l, ok := n.(*parse.ListNode); ok {
for _, nn := range l.Nodes {
if !visit(nn, fn) {
continue
}
}
}
if l, ok := n.(*parse.RangeNode); ok {
if !visit(l.BranchNode.Pipe, fn) {
return false
}
if l.BranchNode.List != nil {
if !visit(l.BranchNode.List, fn) {
return false
}
}
if l.BranchNode.ElseList != nil {
if !visit(l.BranchNode.ElseList, fn) {
return false
}
}
}
if l, ok := n.(*parse.ActionNode); ok {
for _, c := range l.Pipe.Decl {
if !visit(c, fn) {
continue
}
}
for _, c := range l.Pipe.Cmds {
if !visit(c, fn) {
continue
}
}
}
if l, ok := n.(*parse.CommandNode); ok {
for _, a := range l.Args {
if !visit(a, fn) {
continue
}
}
}
if l, ok := n.(*parse.PipeNode); ok {
for _, a := range l.Decl {
if !visit(a, fn) {
continue
}
}
for _, a := range l.Cmds {
if !visit(a, fn) {
continue
}
}
}
return true
}
If it happens you really were manipulating template string, but fails to do so due to function calls and that you do not want to execute this input = strings.Replace(input, "{{", "{{.", -1) // Force func calls to become variables.
You can always force load a template using functions similar to
var reMissingIdent = regexp.MustCompile(`template: :[0-9]+: function "([^"]+)" not defined`)
func ParseTextTemplateAnyway(s string) (*texttemplate.Template, texttemplate.FuncMap, error) {
fn := texttemplate.FuncMap{}
for {
t, err := texttemplate.New("").Funcs(fn).Parse(s)
if err == nil {
return t, fn, err
}
s := err.Error()
res := reMissingIdent.FindAllStringSubmatch(s, -1)
if len(res) > 0 {
fn[res[0][1]] = func(s ...interface{}) string { return "" }
} else {
return t, fn, err
}
}
// return nil, nil
}
You don't need to use libraries. You can create your own function.
package main
const r1 = '{'
const r2 = '}'
func GetStrings(in string) (out []string) {
var tren string
wr := false
f := true
for _, c := range in {
if wr && c != r2 {
tren = tren + string(c)
}
if c == r1 {
f = !f
wr = f
}
if c == r2 {
wr = false
if f {
out = append(out, tren)
tren = ""
}
f = !f
}
}
return
}
In a personal project I am implementing a function that returns a random line from a long file. For it to work I have to create a function that returns a string at line N, a second function that creates a random number between 0 and lines in file. While I was implementing those I figured it may be more efficient to store the data in byte slices by default, rather than storing them in separate files, which have to be read at run time.
Question: How would I go about implementing a function that returns a string at a random line of the []byte representation of my file.
My function for getting a string from a file:
func atLine(n int) (s string) {
f, err := os.Open("./path/to/file")
if err != nil {
panic("Could not read file.")
}
defer f.Close()
r := bufio.NewReader(f)
for i := 1; ; i++ {
line, _, err := r.ReadLine()
if err != nil {
break
}
if i == n {
s = string(line[:])
break
}
}
return s
}
Additional info:
Lines are not longer than 50 characters at most
Lines have no special characters (although a solution handling those is welcome)
Number of lines in the files is known and so the same can be applied for []byte
Dealing with just the question part (and not the sanity of this) - you have a []byte and want to get a specific string line from it - the bytes.Reader has no ReadLine method which you will have already noticed.
You can pass a bytes reader to bufio.NewReader, and gain the ReadLine functionality you are trying to access.
bytesReader := bytes.NewReader([]byte("test1\ntest2\ntest3\n"))
bufReader := bufio.NewReader(bytesReader)
value1, _, _ := bufReader.ReadLine()
value2, _, _ := bufReader.ReadLine()
value3, _, _ := bufReader.ReadLine()
fmt.Println(string(value1))
fmt.Println(string(value2))
fmt.Println(string(value3))
Obviously it is not sensible to ignore the errors, but for the purpose of brevity I do it here.
https://play.golang.org/p/fRQUfmZQke
Results:
test1
test2
test3
From here, it is straight forward to fit back into your existing code.
Here is an example of fast (in the order of nanoseconds) random access to lines of text as byte data. The data is buffered and indexed in memory.
lines.go:
package main
import (
"bytes"
"fmt"
"io/ioutil"
"os"
)
type Lines struct {
data []byte
index []int // line start, end pairs for data[start:end]
}
func NewLines(data []byte, nLines int) *Lines {
bom := []byte{0xEF, 0xBB, 0xBF}
if bytes.HasPrefix(data, bom) {
data = data[len(bom):]
}
lines := Lines{data: data, index: make([]int, 0, 2*nLines)}
for i := 0; ; {
j := bytes.IndexByte(lines.data[i:], '\n')
if j < 0 {
if len(lines.data[i:]) > 0 {
lines.index = append(lines.index, i)
lines.index = append(lines.index, len(lines.data))
}
break
}
lines.index = append(lines.index, i)
j += i
i = j + 1
if j > 0 && lines.data[j-1] == '\r' {
j--
}
lines.index = append(lines.index, j)
}
if len(lines.index) != cap(lines.index) {
lines.index = append([]int(nil), lines.index...)
}
return &lines
}
func (l *Lines) N() int {
return len(l.index) / 2
}
func (l *Lines) At(n int) (string, error) {
if 1 > n || n > l.N() {
err := fmt.Errorf(
"data has %d lines: at %d out of range",
l.N(), n,
)
return "", err
}
m := 2 * (n - 1)
return string(l.data[l.index[m]:l.index[m+1]]), nil
}
var (
// The Complete Works of William Shakespeare
// http://www.gutenberg.org/cache/epub/100/pg100.txt
fName = `/home/peter/shakespeare.pg100.txt`
nLines = 124787
)
func main() {
data, err := ioutil.ReadFile(fName)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
lines := NewLines(data, nLines)
for _, at := range []int{1 - 1, 1, 2, 12, 42, 124754, lines.N(), lines.N() + 1} {
line, err := lines.At(at)
if err != nil {
fmt.Fprintf(os.Stderr, "%d\t%v\n", at, err)
continue
}
fmt.Printf("%d\t%q\n", at, line)
}
}
Output:
0 data has 124787 lines: at 0 out of range
1 "The Project Gutenberg EBook of The Complete Works of William Shakespeare, by"
2 "William Shakespeare"
12 "Title: The Complete Works of William Shakespeare"
42 "SHAKESPEARE IS COPYRIGHT 1990-1993 BY WORLD LIBRARY, INC., AND IS"
124754 "http://www.gutenberg.org"
124787 "*** END: FULL LICENSE ***"
124788 data has 124787 lines: at 124788 out of range
lines_test.go:
package main
import (
"io/ioutil"
"math/rand"
"testing"
)
func benchData(b *testing.B) []byte {
data, err := ioutil.ReadFile(fName)
if err != nil {
b.Fatal(err)
}
return data
}
func BenchmarkNewLines(b *testing.B) {
data := benchData(b)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
lines := NewLines(data, nLines)
_ = lines
}
}
func BenchmarkLineAt(b *testing.B) {
data := benchData(b)
lines := NewLines(data, nLines)
ats := make([]int, 4*1024)
ats[0], ats[1] = 1, lines.N()
rand.Seed(42)
for i := range ats[2:] {
ats[2+i] = 1 + rand.Intn(lines.N())
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
at := ats[i%len(ats)]
line, err := lines.At(at)
if err != nil {
b.Error(err)
}
_ = line
}
}
Output
$ go test -bench=. lines.go lines_test.go
BenchmarkNewLines-8 1000 1898347 ns/op 1998898 B/op 2 allocs/op
BenchmarkLineAt-8 50000000 45.1 ns/op 49 B/op 0 allocs/op
The string will only contain 0's or 4's. The string will start with 4. example: 444, 44, 40, 4400, 4440, etc. These all are valid strings but 404 is not valid.
Currently, I am checking if 4 is present immediately after 0. I am not sure that this one is efficient one.
If you mean leading 4 and following 0.
use regexp
package main
import (
"regexp"
)
func check(s string) bool {
return regexp.MustCompile(`^4+0*$`).MatchString(s)
}
func main() {
for _, tt := range []string{"444", "44", "40", "4400", "4440"} {
if !check(tt) {
panic("want true: " + tt)
}
}
for _, tt := range []string{"404", "040"} {
if check(tt) {
panic("want false: " + tt)
}
}
}
non-regexp
package main
func check(s string) bool {
i := 0
r := []rune(s)
for i = 0; i < len(r); i++ {
if r[i] != '4' {
break
}
}
if i == 0 {
return false
}
for ; i < len(r); i++ {
if r[i] != '0' {
return false
}
}
return true
}
func main() {
for _, tt := range []string{"444", "44", "40", "4400", "4440"} {
if !check(tt) {
panic("want true: " + tt)
}
}
for _, tt := range []string{"404", "040"} {
if check(tt) {
panic("want false: " + tt)
}
}
}
faster version
func check(s string) bool {
i, l := 0, len(s)
for ; i < l; i++ {
if s[i] != '4' {
break
}
}
if i == 0 {
return false
}
for ; i < l; i++ {
if s[i] != '0' {
return false
}
}
return true
}
For example,
package main
import "fmt"
func isFourZero(s string) bool {
i := 0
var four bool
for ; i < len(s) && s[i] == '4'; i++ {
four = true
}
if four {
if i >= len(s) {
return true
}
var zero bool
for ; i < len(s) && s[i] == '0'; i++ {
zero = true
}
if zero {
if i >= len(s) {
return true
}
}
}
return false
}
func main() {
tests := []struct{ s string }{
{"444"}, {"44"}, {"40"}, {"4400"}, {"4440"}, {"404"}, {"004"},
}
for _, test := range tests {
fmt.Printf("%q \t %t\n", test.s, isFourZero(test.s))
}
}
Output:
"444" true
"44" true
"40" true
"4400" true
"4440" true
"404" false
"004" false
Since we care about speed, let's look at some benchmarks:
BenchmarkIsFourZeroPeterSO-4 10000000 201 ns/op
BenchmarkValidateYogeshDesai-4 5000000 347 ns/op
BenchmarkCheckMattn-4 2000000 602 ns/op
fourzero_test.go:
package main
import (
"strings"
"testing"
)
var tests = []struct{ s string }{
{"444"}, {"44"}, {"40"}, {"4400"}, {"4440"}, {"404"}, {"004"},
}
func BenchmarkIsFourZeroPeterSO(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, test := range tests {
isFourZero(test.s)
}
}
}
func BenchmarkValidateYogeshDesai(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, test := range tests {
validate(test.s)
}
}
}
func BenchmarkCheckMattn(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, test := range tests {
check(test.s)
}
}
}
func isFourZero(s string) bool {
i := 0
var four bool
for ; i < len(s) && s[i] == '4'; i++ {
four = true
}
if four {
if i >= len(s) {
return true
}
var zero bool
for ; i < len(s) && s[i] == '0'; i++ {
zero = true
}
if zero {
if i >= len(s) {
return true
}
}
}
return false
}
func validate(str string) bool {
if strings.HasPrefix(str, "4") {
for i := 0; i < len(str)-1; i++ {
if (str[i] == '0') && (str[i+1] == '4') {
return false
}
}
} else {
return false
}
return true
}
func check(s string) bool {
i := 0
r := []rune(s)
for i = 0; i < len(r); i++ {
if r[i] != '4' {
break
}
}
if i == 0 {
return false
}
for ; i < len(r); i++ {
if r[i] != '0' {
return false
}
}
return true
}
No RegExp
package main
import (
"fmt"
"strings"
)
func validate(str string) bool {
if strings.HasPrefix(str, "4") {
for i:= 0; i < len(str)-1; i++ {
if (str[i] == '0') && (str[i+1] == '4') {
return false
}
}
}else { return false }
return true
}
func main() {
data := []string{"4", "44", "4400", "4440", "404", "004"}
for _, val := range data {
fmt.Println(validate(val))
}
}
Output:
true
true
true
false
false
The following is another implementation using only a single loop:
func yetAnotherValidation(s string) bool {
//INVALID: if empty OR not started with '4'
if len(s) == 0 || s[0] != '4' {
return false
}
//INVALID: if len(s) > 2 AND contains "404"
for k := 2; k < len(s); k++ {
if s[k] == '4' && s[k-1] == '0' && s[k-2] == '4' {
return false
}
}
return true
}
Note:
*404* (e.g. 404, 4404, 4040, ...) is INVALID.
If s contains a character other than 0 or 4, the result will be undefined (depending on the position of that character). If you need to ensure whether the input only contains 0 or 4, then:
func yetAnotherValidation2(s string) bool {
//INVALID: if empty OR not started with '4'
if len(s) == 0 || s[0] != '4' {
return false
}
//INVALID: if second digit is not 0 or 4
if len(s) > 1 && s[1] != '0' && s[1] != '4' {
return false
}
//For len(s) > 2
for k := 2; k < len(s); k++ {
if s[k] == '4' && s[k-1] == '0' && s[k-2] == '4' {
return false
} else if s[k] != '0' && s[k] != '4' {
//Neither 0 nor 4
return false
}
}
return true
}
UPDATE:
Test and benchmark result:
=== RUN TestValidate
444 true
44 true
40 true
4400 true
4440 true
404 false
004 false
--- PASS: TestValidate (0.00s)
BenchmarkYetAnotherValidation-4 50000000 38.5 ns/op
BenchmarkYetAnotherValidation2-4 30000000 45.6 ns/op
BenchmarkIsFourZero-4 20000000 54.5 ns/op
BenchmarkCheckMattn-4 10000000 144 ns/op
BenchmarkCheckMattnFast-4 30000000 50.2 ns/op
I'm sure that I'm doing something wrong, I have a Go program that parses in 3D models in OBJ format and outputs a json object. When I run it without adding in goroutines I get the following output:
$ go run objParser.go ak47.obj extincteur_obj.obj
--Creating ak47.json3d from ak47.obj
--Exported 85772 faces with 89088 verticies
--Creating extincteur_obj.json3d from extincteur_obj.obj
--Exported 150316 faces with 151425 verticies
Parsed 2 files in 8.4963s
Then I added in the goroutines and I get this output:
$ go run objParser.go ak47.obj extincteur_obj.obj
--Creating ak47.json3d from ak47.obj
--Creating extincteur_obj.json3d from extincteur_obj.obj
--Exported 85772 faces with 89088 verticies
--Exported 150316 faces with 151425 verticies
Parsed 2 files in 10.23137s
The order of how it's printed is what I expected given the interlacing of the parsing but I have no idea why it actually takes longer! The code is pretty long, I snipped what I could but it's still pretty long, sorry about that!
package main
func parseFile(name string, finished chan int) {
var Verts []*Vertex
var Texs []*TexCoord
var Faces []*Face
var objFile, mtlFile, jsonFile *os.File
var parseMaterial bool
// Set up files and i/o
inName := name
outName := strings.Replace(inName, ".obj", ".json3d", -1)
parseMaterial = false
fmt.Printf("--"+FgGreen+"Creating"+Reset+" %s from %s\n", outName, inName)
var err error
var part []byte
var prefix bool
if objFile, err = os.Open(inName); err != nil {
fmt.Println(FgRed+"!!Failed to open input file!!"+Reset)
return
}
if jsonFile, err = os.Create(outName); err != nil {
fmt.Println(FgRed+"!!Failed to create output file!!"+Reset)
return
}
reader := bufio.NewReader(objFile)
writer := bufio.NewWriter(jsonFile)
buffer := bytes.NewBuffer(make([]byte, 1024))
// Read the file in and parse out what we need
for {
if part, prefix, err = reader.ReadLine(); err != nil {
break
}
buffer.Write(part)
if !prefix {
line := buffer.String()
if(strings.Contains(line, "v ")) {
Verts = append(Verts, parseVertex(line))
} else if(strings.Contains(line, "vt ")) {
Texs = append(Texs, parseTexCoord(line))
} else if(strings.Contains(line, "f ")) {
Faces = append(Faces, parseFace(line, Verts, Texs))
} else if(strings.Contains(line, "mtllib ")) {
mtlName := strings.Split(line, " ")[1]
if mtlFile, err = os.Open(mtlName); err != nil {
fmt.Printf("--"+FgRed+"Failed to find material file: %s\n"+Reset, mtlName)
parseMaterial = false
} else {
parseMaterial = true
}
}
buffer.Reset()
}
}
if err == io.EOF {
err = nil
}
objFile.Close()
// Write out the data
writer.WriteString("{\"obj\":[\n");
// Write out the verts
writer.WriteString("{\"vrt\":[\n");
for i, vert := range Verts {
writer.WriteString(vert.String())
if i < len(Verts) - 1 { writer.WriteString(",") }
writer.WriteString("\n")
}
// Write out the faces
writer.WriteString("],\"fac\":[\n")
for i, face := range Faces {
writer.WriteString(face.String(true))
if i < len(Faces) - 1 { writer.WriteString(",") }
writer.WriteString("\n")
}
// Write out the normals
writer.WriteString("],\"nrm\":[")
for i, face := range Faces {
writer.WriteString("[")
for j, vert := range face.verts {
length := math.Sqrt((vert.X * vert.X) + (vert.Y * vert.Y) + (vert.Z * vert.Z))
x := vert.X / length
y := vert.Y / length
z := vert.Z / length
normal := fmt.Sprintf("[%f,%f,%f]", x, y, z)
writer.WriteString(normal)
if(j < len(face.verts)-1) { writer.WriteString(",") }
}
writer.WriteString("]")
//writer.WriteString("[0, 1, 0]")
if i < len(Faces) - 1 { writer.WriteString(",") }
writer.WriteString("\n")
}
// Write out the tex coords
writer.WriteString("],\"tex\":[")
for i, face := range Faces {
writer.WriteString("[")
writer.WriteString(face.tex[0].String())
writer.WriteString(",")
writer.WriteString(face.tex[1].String())
writer.WriteString(",")
writer.WriteString(face.tex[2].String())
writer.WriteString("]")
if i < len(Faces) - 1 { writer.WriteString(",") }
writer.WriteString("\n")
}
// Close obj block
writer.WriteString("]}]");
if parseMaterial {
writer.WriteString(",mat:[{");
reader := bufio.NewReader(mtlFile)
// Read the file in and parse out what we need
for {
if part, prefix, err = reader.ReadLine(); err != nil {
break
}
buffer.Write(part)
if !prefix {
line := buffer.String()
if(strings.Contains(line, "map_Kd ")) {
parts := strings.Split(line, " ")
entry := fmt.Sprintf("\"t\":\"%s\",", parts[1])
writer.WriteString(entry)
width, height := 256, 256
var imageFile *os.File
if imageFile, err = os.Open(parts[1]); err != nil {
fmt.Printf("--"+FgRed+"Failed to find %s, defaulting to 256x256"+Reset+"\n", parts[1])
return
} else {
var config image.Config
imageReader := bufio.NewReader(imageFile)
config, err = jpeg.DecodeConfig(imageReader)
width, height = config.Width, config.Height
fmt.Printf("--"+FgGreen+"Verifing"+Reset+" that %s is %dpx x %dpx\n", parts[1], width, height)
}
size := fmt.Sprintf("\"w\":%d,\"h\":%d,", width, height)
writer.WriteString(size)
} else if(strings.Contains(line, "Kd ")) {
parts := strings.Split(line, " ")
entry := fmt.Sprintf("\"r\":%s, \"g\":%s, \"b\":%s,", parts[1], parts[2], parts[3])
writer.WriteString(entry)
}
buffer.Reset()
}
}
if err == io.EOF {
err = nil
}
writer.WriteString("\"res\":100,\"uv\":true}]");
}
// Close json
writer.WriteString("}");
writer.Flush()
jsonFile.Close()
fmt.Printf("--"+FgGreen+"Exported"+Reset+" %d faces with %d verticies\n", len(Faces), len(Verts))
finished <- -1
}
func main(){
// Verify we were called correctly
if len(os.Args) < 2 {
fmt.Println("Usage: go run objParser.go <OBJ File>");
return
}
files := len(os.Args)
finished := make(chan int)
now := time.Now()
for i := 1; i < files; i++ {
go parseFile(os.Args[i], finished)
}
for i := 1; i < files; i++ {
<- finished
}
fmt.Printf("Parsed %d files in %s\n", files-1, time.Since(now))
}
You should set GOMAXPROCS environment variable for go to the maximum number of usable processors. Or use function GOMAXPROCS at executing time.