-
Notifications
You must be signed in to change notification settings - Fork 0
/
fixedwidth.go
96 lines (69 loc) · 2.05 KB
/
fixedwidth.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/*
Copyright 2019 The David Buril Cardozo de Oliveira
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package fixedwith propose automated infer the columns in fixed-width text file
package fixedwith
import (
"bufio"
"errors"
"io"
"regexp"
)
// InferColumnsIndex Infer Column Widths of a Fixed-Width Text File
func InferColumnsIndex(reader *bufio.Reader, sampleValue uint) (columnsIndex []uint, err error) {
if reader == nil {
return nil, errors.New("the reader parameter is required")
}
var columns []bool
for i := uint(0); i < sampleValue || sampleValue == 0; i++ {
line, _, err := reader.ReadLine()
if err != nil && err != io.EOF {
return nil, err
}
if err == io.EOF {
break
}
if columns == nil {
columns = make([]bool, len(line))
}
for len(columns) < len(line) {
columns = append(columns, false)
}
index := findAllStringIndex(line)
for _, value := range index {
for j := value[0] + 1; j < value[1]; j++ {
columns[j] = true
}
}
}
result := parseColumnIndex(columns)
return result, nil
}
// parseColumnIndex parse column index
func parseColumnIndex(columns []bool) []uint {
result := make([]uint, 0)
beforeValue := false
beforeKey := 0
for key, value := range columns {
if value && !beforeValue {
result = append(result, uint(beforeKey))
}
beforeValue = value
beforeKey = key
}
return result
}
// findAllStringIndex build regex search columns in text file
func findAllStringIndex(text []byte) [][]int {
r, _ := regexp.Compile(`[^\s]+`)
return r.FindAllStringIndex(string(text), -1)
}