forked from tealeg/xlsx
-
Notifications
You must be signed in to change notification settings - Fork 0
/
stream_file_builder.go
417 lines (382 loc) · 16.3 KB
/
stream_file_builder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
// Authors: Ryan Hollis (ryanh@)
// The purpose of StreamFileBuilder and StreamFile is to allow streamed writing of XLSX files.
// Directions:
// 1. Create a StreamFileBuilder with NewStreamFileBuilder() or NewStreamFileBuilderForPath().
// 2. Add the sheets and their first row of data by calling AddSheet().
// 3. Call Build() to get a StreamFile. Once built, all functions on the builder will return an error.
// 4. Write to the StreamFile with Write(). Writes begin on the first sheet. New rows are always written and flushed
// to the io. All rows written to the same sheet must have the same number of cells as the header provided when the sheet
// was created or an error will be returned.
// 5. Call NextSheet() to proceed to the next sheet. Once NextSheet() is called, the previous sheet can not be edited.
// 6. Call Close() to finish.
// Directions for using custom styles and different data types:
// 1. Create a StreamFileBuilder with NewStreamFileBuilder() or NewStreamFileBuilderForPath().
// 2. Use MakeStyle() to create the styles you want yo use in your document. Keep a list of these styles.
// 3. Add all the styles you created by using AddStreamStyle() or AddStreamStyleList().
// 4. Add the sheets and their column styles of data by calling AddSheetS().
// 5. Call Build() to get a StreamFile. Once built, all functions on the builder will return an error.
// 6. Write to the StreamFile with WriteS(). Writes begin on the first sheet. New rows are always written and flushed
// to the io. All rows written to the same sheet must have the same number of cells as the number of column styles
// provided when adding the sheet with AddSheetS() or an error will be returned.
// 5. Call NextSheet() to proceed to the next sheet. Once NextSheet() is called, the previous sheet can not be edited.
// 6. Call Close() to finish.
// Future work suggestions:
// The current default style uses fonts that are not on Macs by default so opening the XLSX files in Numbers causes a
// pop up that says there are missing fonts. The font could be changed to something that is usually found on Mac and PC.
// Extend support for Formulas and Shared Strings.
package xlsx
import (
"archive/zip"
"errors"
"fmt"
"io"
"os"
"strconv"
"strings"
)
type StreamFileBuilder struct {
built bool
firstSheetAdded bool
customStylesAdded bool
xlsxFile *File
zipWriter *zip.Writer
cellTypeToStyleIds map[CellType]int
maxStyleId int
styleIds [][]int
customStreamStyles map[StreamStyle]struct{}
styleIdMap map[StreamStyle]int
defaultColumnCellMetadataAdded bool
}
const (
sheetFilePathPrefix = "xl/worksheets/sheet"
sheetFilePathSuffix = ".xml"
endSheetDataTag = "</sheetData>"
dimensionTag = `<dimension ref="%s"></dimension>`
// This is the index of the max style that this library will insert into XLSX sheets by default.
// This allows us to predict what the style id of styles that we add will be.
// TestXlsxStyleBehavior tests that this behavior continues to be what we expect.
initMaxStyleId = 1
)
var BuiltStreamFileBuilderError = errors.New("StreamFileBuilder has already been built, functions may no longer be used")
// NewStreamFileBuilder creates an StreamFileBuilder that will write to the the provided io.writer
func NewStreamFileBuilder(writer io.Writer) *StreamFileBuilder {
return &StreamFileBuilder{
zipWriter: zip.NewWriter(writer),
xlsxFile: NewFile(),
cellTypeToStyleIds: make(map[CellType]int),
maxStyleId: initMaxStyleId,
customStreamStyles: make(map[StreamStyle]struct{}),
styleIdMap: make(map[StreamStyle]int),
}
}
// NewStreamFileBuilderForPath takes the name of an XLSX file and returns a builder for it.
// The file will be created if it does not exist, or truncated if it does.
func NewStreamFileBuilderForPath(path string) (*StreamFileBuilder, error) {
file, err := os.Create(path)
if err != nil {
return nil, err
}
return NewStreamFileBuilder(file), nil
}
// AddSheet will add sheets with the given name with the provided headers. The headers cannot be edited later, and all
// rows written to the sheet must contain the same number of cells as the header. Sheet names must be unique, or an
// error will be thrown.
func (sb *StreamFileBuilder) AddSheet(name string, headers []string, cellTypes []*CellType) error {
if sb.built {
return BuiltStreamFileBuilderError
}
if len(cellTypes) > len(headers) {
return errors.New("cellTypes is longer than headers")
}
sheet, err := sb.xlsxFile.AddSheet(name)
if err != nil {
// Set built on error so that all subsequent calls to the builder will also fail.
sb.built = true
return err
}
sb.styleIds = append(sb.styleIds, []int{})
row := sheet.AddRow()
if count := row.WriteSlice(&headers, -1); count != len(headers) {
// Set built on error so that all subsequent calls to the builder will also fail.
sb.built = true
return errors.New("failed to write headers")
}
for i, cellType := range cellTypes {
var cellStyleIndex int
var ok bool
if cellType != nil {
// The cell type is one of the attributes of a Style.
// Since it is the only attribute of Style that we use, we can assume that cell types
// map one to one with Styles and their Style ID.
// If a new cell type is used, a new style gets created with an increased id, if an existing cell type is
// used, the pre-existing style will also be used.
cellStyleIndex, ok = sb.cellTypeToStyleIds[*cellType]
if !ok {
sb.maxStyleId++
cellStyleIndex = sb.maxStyleId
sb.cellTypeToStyleIds[*cellType] = sb.maxStyleId
}
sheet.Cols[i].SetType(*cellType)
}
sb.styleIds[len(sb.styleIds)-1] = append(sb.styleIds[len(sb.styleIds)-1], cellStyleIndex)
}
return nil
}
func (sb *StreamFileBuilder) AddSheetWithDefaultColumnMetadata(name string, headers []string, columnsDefaultCellMetadata []*CellMetadata) error {
if sb.built {
return BuiltStreamFileBuilderError
}
if len(columnsDefaultCellMetadata) > len(headers) {
return errors.New("columnsDefaultCellMetadata is longer than headers")
}
sheet, err := sb.xlsxFile.AddSheet(name)
if err != nil {
// Set built on error so that all subsequent calls to the builder will also fail.
sb.built = true
return err
}
sb.styleIds = append(sb.styleIds, []int{})
row := sheet.AddRow()
if count := row.WriteSlice(&headers, -1); count != len(headers) {
// Set built on error so that all subsequent calls to the builder will also fail.
sb.built = true
return errors.New("failed to write headers")
}
for i, cellMetadata := range columnsDefaultCellMetadata {
var cellStyleIndex int
var ok bool
if cellMetadata != nil {
// Exact same logic as `AddSheet` to ensure compatibility as much as possible
// with the `AddSheet` + `StreamFile.Write` code path
cellStyleIndex, ok = sb.cellTypeToStyleIds[cellMetadata.cellType]
if !ok {
sb.maxStyleId++
cellStyleIndex = sb.maxStyleId
sb.cellTypeToStyleIds[cellMetadata.cellType] = sb.maxStyleId
}
// Add streamStyle and set default cell metadata on col
sb.customStreamStyles[cellMetadata.streamStyle] = struct{}{}
sheet.Cols[i].SetCellMetadata(*cellMetadata)
}
sb.styleIds[len(sb.styleIds)-1] = append(sb.styleIds[len(sb.styleIds)-1], cellStyleIndex)
}
// Add fall back streamStyle
sb.customStreamStyles[StreamStyleDefaultString] = struct{}{}
// Toggle to true to ensure `styleIdMap` is constructed from `customStreamStyles` on `Build`
sb.customStylesAdded = true
// Hack to ensure the `dimension` tag on each `worksheet` xml is stripped. Otherwise only the first
// row of each worksheet will be read back rather than all rows
sb.defaultColumnCellMetadataAdded = true
return nil
}
// AddSheetS will add a sheet with the given name and column styles. The number of column styles given
// is the number of columns that will be created, and thus the number of cells each row has to have.
// columnStyles[0] becomes the style of the first column, columnStyles[1] the style of the second column etc.
// All the styles in columnStyles have to have been added or an error will be returned.
// Sheet names must be unique, or an error will be returned.
func (sb *StreamFileBuilder) AddSheetS(name string, columnStyles []StreamStyle) error {
if sb.built {
return BuiltStreamFileBuilderError
}
sheet, err := sb.xlsxFile.AddSheet(name)
if err != nil {
// Set built on error so that all subsequent calls to the builder will also fail.
sb.built = true
return err
}
// To make sure no new styles can be added after adding a sheet
sb.firstSheetAdded = true
// Check if all styles that will be used for columns have been created
for _, colStyle := range columnStyles {
if _, ok := sb.customStreamStyles[colStyle]; !ok {
return errors.New("trying to make use of a style that has not been added")
}
}
// Is needed for stream file to work but is not needed for streaming with styles
sb.styleIds = append(sb.styleIds, []int{})
sheet.maybeAddCol(len(columnStyles))
// Set default column styles based on the cel styles in the first row
// Set the default column width to 11. This makes enough places for the
// default date style cells to display the dates correctly
for i, colStyle := range columnStyles {
sheet.Cols[i].SetStreamStyle(colStyle)
sheet.Cols[i].Width = 11
}
return nil
}
// AddValidation will add a validation to a specific column.
func (sb *StreamFileBuilder) AddValidation(sheetIndex, colIndex, rowStartIndex int, validation *xlsxCellDataValidation) {
sheet := sb.xlsxFile.Sheets[sheetIndex]
column := sheet.Col(colIndex)
column.SetDataValidationWithStart(validation, rowStartIndex)
}
// Build begins streaming the XLSX file to the io, by writing all the XLSX metadata. It creates a StreamFile struct
// that can be used to write the rows to the sheets.
func (sb *StreamFileBuilder) Build() (*StreamFile, error) {
if sb.built {
return nil, BuiltStreamFileBuilderError
}
sb.built = true
parts, err := sb.xlsxFile.MarshallParts()
if err != nil {
return nil, err
}
if sb.customStylesAdded {
parts["xl/styles.xml"], err = sb.marshalStyles()
if err != nil {
return nil, err
}
}
es := &StreamFile{
zipWriter: sb.zipWriter,
xlsxFile: sb.xlsxFile,
sheetXmlPrefix: make([]string, len(sb.xlsxFile.Sheets)),
sheetXmlSuffix: make([]string, len(sb.xlsxFile.Sheets)),
styleIds: sb.styleIds,
styleIdMap: sb.styleIdMap,
}
for path, data := range parts {
// If the part is a sheet, don't write it yet. We only want to write the XLSX metadata files, since at this
// point the sheets are still empty. The sheet files will be written later as their rows come in.
if strings.HasPrefix(path, sheetFilePathPrefix) {
// sb.defaultColumnCellMetadataAdded is a hack because neither the `AddSheet` nor `AddSheetS` codepaths
// actually encode a valid worksheet dimension. `AddSheet` encodes an empty one: "" and `AddSheetS` encodes
// an effectively empty one: "A1". `AddSheetWithDefaultColumnMetadata` uses logic from both paths which results
// in an effectively invalid dimension being encoded which, upon read, results in only reading in the header of
// a given worksheet and non of the rows that follow
if err := sb.processEmptySheetXML(es, path, data, !sb.customStylesAdded || sb.defaultColumnCellMetadataAdded); err != nil {
return nil, err
}
continue
}
metadataFile, err := sb.zipWriter.Create(path)
if err != nil {
return nil, err
}
_, err = metadataFile.Write([]byte(data))
if err != nil {
return nil, err
}
}
if err := es.NextSheet(); err != nil {
return nil, err
}
return es, nil
}
func (sb *StreamFileBuilder) marshalStyles() (string, error) {
for streamStyle, _ := range sb.customStreamStyles {
XfId := handleStyleForXLSX(streamStyle.style, streamStyle.xNumFmtId, sb.xlsxFile.styles)
sb.styleIdMap[streamStyle] = XfId
}
styleSheetXMLString, err := sb.xlsxFile.styles.Marshal()
if err != nil {
return "", err
}
return styleSheetXMLString, nil
}
// AddStreamStyle adds a new style to the style sheet.
// Only Styles that have been added through this function will be usable.
// This function cannot be used after AddSheetS or Build has been called, and if it is
// called after AddSheetS or Buildit will return an error.
func (sb *StreamFileBuilder) AddStreamStyle(streamStyle StreamStyle) error {
if sb.firstSheetAdded {
return errors.New("at least one sheet has been added, cannot add new styles anymore")
}
if sb.built {
return errors.New("file has been build, cannot add new styles anymore")
}
sb.customStreamStyles[streamStyle] = struct{}{}
sb.customStylesAdded = true
return nil
}
// AddStreamStyleList adds a list of new styles to the style sheet.
// Only Styles that have been added through either this function or AddStreamStyle will be usable.
// This function cannot be used after AddSheetS and Build has been called, and if it is
// called after AddSheetS and Build it will return an error.
func (sb *StreamFileBuilder) AddStreamStyleList(streamStyles []StreamStyle) error {
for _, streamStyle := range streamStyles {
err := sb.AddStreamStyle(streamStyle)
if err != nil {
return err
}
}
return nil
}
// processEmptySheetXML will take in the path and XML data of an empty sheet, and will save the beginning and end of the
// XML file so that these can be written at the right time.
func (sb *StreamFileBuilder) processEmptySheetXML(sf *StreamFile, path, data string, removeDimensionTagFlag bool) error {
// Get the sheet index from the path
sheetIndex, err := getSheetIndex(sf, path)
if err != nil {
return err
}
// Remove the Dimension tag. Since more rows are going to be written to the sheet, it will be wrong.
// It is valid to for a sheet to be missing a Dimension tag, but it is not valid for it to be wrong.
if removeDimensionTagFlag {
data, err = removeDimensionTag(data, sf.xlsxFile.Sheets[sheetIndex])
if err != nil {
return err
}
}
// Split the sheet at the end of its SheetData tag so that more rows can be added inside.
prefix, suffix, err := splitSheetIntoPrefixAndSuffix(data)
if err != nil {
return err
}
sf.sheetXmlPrefix[sheetIndex] = prefix
sf.sheetXmlSuffix[sheetIndex] = suffix
return nil
}
// getSheetIndex parses the path to the XLSX sheet data and returns the index
// The files that store the data for each sheet must have the format:
// xl/worksheets/sheet123.xml
// where 123 is the index of the sheet. This file path format is part of the XLSX file standard.
func getSheetIndex(sf *StreamFile, path string) (int, error) {
indexString := path[len(sheetFilePathPrefix) : len(path)-len(sheetFilePathSuffix)]
sheetXLSXIndex, err := strconv.Atoi(indexString)
if err != nil {
return -1, errors.New("unexpected sheet file name from xlsx package")
}
if sheetXLSXIndex < 1 || len(sf.sheetXmlPrefix) < sheetXLSXIndex ||
len(sf.sheetXmlSuffix) < sheetXLSXIndex || len(sf.xlsxFile.Sheets) < sheetXLSXIndex {
return -1, errors.New("unexpected sheet index")
}
sheetArrayIndex := sheetXLSXIndex - 1
return sheetArrayIndex, nil
}
// removeDimensionTag will return the passed in XLSX Spreadsheet XML with the dimension tag removed.
// data is the XML data for the sheet
// sheet is the Sheet struct that the XML was created from.
// Can return an error if the XML's dimension tag does not match what is expected based on the provided Sheet
func removeDimensionTag(data string, sheet *Sheet) (string, error) {
x := len(sheet.Cols) - 1
y := len(sheet.Rows) - 1
if x < 0 {
x = 0
}
if y < 0 {
y = 0
}
var dimensionRef string
if x == 0 && y == 0 {
dimensionRef = "A1"
} else {
endCoordinate := GetCellIDStringFromCoords(x, y)
dimensionRef = "A1:" + endCoordinate
}
dataParts := strings.Split(data, fmt.Sprintf(dimensionTag, dimensionRef))
if len(dataParts) != 2 {
return "", errors.New("unexpected Sheet XML: dimension tag not found")
}
return dataParts[0] + dataParts[1], nil
}
// splitSheetIntoPrefixAndSuffix will split the provided XML sheet into a prefix and a suffix so that
// more spreadsheet rows can be inserted in between.
func splitSheetIntoPrefixAndSuffix(data string) (string, string, error) {
// Split the sheet at the end of its SheetData tag so that more rows can be added inside.
sheetParts := strings.Split(data, endSheetDataTag)
if len(sheetParts) != 2 {
return "", "", errors.New("unexpected Sheet XML: SheetData close tag not found")
}
return sheetParts[0], sheetParts[1], nil
}