forked from joncrlsn/dque
-
Notifications
You must be signed in to change notification settings - Fork 0
/
segment.go
430 lines (350 loc) · 11.7 KB
/
segment.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
package dque
//
// Copyright (c) 2018 Jon Carlson. All rights reserved.
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
//
//
// This is a segment of a memory-efficient FIFO durable queue. Items in the queue must be of the same type.
//
// Each qSegment instance corresponds to a file on disk.
//
// This segment is both persistent and in-memory so there is a memory limit to the size
// (which is why it is just a segment instead of being used for the entire queue).
//
import (
"bytes"
"encoding/binary"
"encoding/gob"
"fmt"
"io"
"os"
"path"
"sync"
"github.com/pkg/errors"
)
// ErrCorruptedSegment is returned when a segment file cannot be opened due to inconsistent formatting.
// Recovery may be possible by clearing or deleting the file, then reloading using dque.New().
type ErrCorruptedSegment struct {
Path string
Err error
}
// Error returns a string describing ErrCorruptedSegment
func (e ErrCorruptedSegment) Error() string {
return fmt.Sprintf("segment file %s is corrupted: %s", e.Path, e.Err)
}
// Unwrap returns the wrapped error
func (e ErrCorruptedSegment) Unwrap() error {
return e.Err
}
// ErrUnableToDecode is returned when an object cannot be decoded.
type ErrUnableToDecode struct {
Path string
Err error
}
// Error returns a string describing ErrUnableToDecode error
func (e ErrUnableToDecode) Error() string {
return fmt.Sprintf("object in segment file %s cannot be decoded: %s", e.Path, e.Err)
}
// Unwrap returns the wrapped error
func (e ErrUnableToDecode) Unwrap() error {
return e.Err
}
var (
errEmptySegment = errors.New("Segment is empty")
)
// qSegment represents a portion (segment) of a persistent queue
type qSegment struct {
dirPath string
number int
objects []interface{}
objectBuilder func() interface{}
file *os.File
mutex sync.Mutex
removeCount int
turbo bool
maybeDirty bool // filesystem changes may not have been flushed to disk
syncCount int64 // for testing
}
// load reads all objects from the queue file into a slice
// returns ErrCorruptedSegment or ErrUnableToDecode for errors pertaining to file contents.
func (seg *qSegment) load() error {
// This is heavy-handed but its safe
seg.mutex.Lock()
defer seg.mutex.Unlock()
// Open the file in read mode
f, err := os.OpenFile(seg.filePath(), os.O_RDONLY, 0644)
if err != nil {
return errors.Wrap(err, "error opening file: "+seg.filePath())
}
defer f.Close()
seg.file = f
// Loop until we can load no more
for {
// Read the 4 byte length of the gob
lenBytes := make([]byte, 4)
if n, err := io.ReadFull(seg.file, lenBytes); err != nil {
if err == io.EOF {
return nil
}
return ErrCorruptedSegment{
Path: seg.filePath(),
Err: errors.Wrapf(err, "error reading object length (read %d/4 bytes)", n),
}
}
// Convert the bytes into a 32-bit unsigned int
gobLen := binary.LittleEndian.Uint32(lenBytes)
if gobLen == 0 {
// Remove the first item from the in-memory queue
if len(seg.objects) == 0 {
return ErrCorruptedSegment{
Path: seg.filePath(),
Err: fmt.Errorf("excess deletion records (%d)", seg.removeCount+1),
}
}
seg.objects = seg.objects[1:]
// log.Println("TEMP: Detected delete in load()")
seg.removeCount++
continue
}
data := make([]byte, int(gobLen))
if _, err := io.ReadFull(seg.file, data); err != nil {
return ErrCorruptedSegment{
Path: seg.filePath(),
Err: errors.Wrap(err, "error reading gob data from file"),
}
}
// Decode the bytes into an object
object := seg.objectBuilder()
if err := gob.NewDecoder(bytes.NewReader(data)).Decode(object); err != nil {
return ErrUnableToDecode{
Path: seg.filePath(),
Err: errors.Wrapf(err, "failed to decode %T", object),
}
}
// Add item to the objects slice
seg.objects = append(seg.objects, object)
// log.Printf("TEMP: Loaded: %#v\n", object)
}
}
// peek returns the first item in the segment without removing it.
// If the queue is already empty, the emptySegment error will be returned.
func (seg *qSegment) peek() (interface{}, error) {
// This is heavy-handed but its safe
seg.mutex.Lock()
defer seg.mutex.Unlock()
if len(seg.objects) == 0 {
// Queue is empty so return nil object (and emptySegment error)
return nil, errEmptySegment
}
// Save a reference to the first item in the in-memory queue
object := seg.objects[0]
return object, nil
}
// remove removes and returns the first item in the segment and adds
// a zero length marker to the end of the queue file to signify a removal.
// If the queue is already empty, the emptySegment error will be returned.
func (seg *qSegment) remove() (interface{}, error) {
// This is heavy-handed but its safe
seg.mutex.Lock()
defer seg.mutex.Unlock()
if len(seg.objects) == 0 {
// Queue is empty so return nil object (and empty_segment error)
return nil, errEmptySegment
}
// Create a 4-byte length of value zero (this signifies a removal)
deleteLen := 0
deleteLenBytes := make([]byte, 4)
binary.LittleEndian.PutUint32(deleteLenBytes, uint32(deleteLen))
// Write the 4-byte length (of zero) first
if _, err := seg.file.Write(deleteLenBytes); err != nil {
return nil, errors.Wrapf(err, "failed to remove item from segment %d", seg.number)
}
// Save a reference to the first item in the in-memory queue
object := seg.objects[0]
// Remove the first item from the in-memory queue
seg.objects = seg.objects[1:]
// Increment the delete count
seg.removeCount++
// Possibly force writes to disk
if err := seg._sync(); err != nil {
return nil, err
}
return object, nil
}
// Add adds an item to the in-memory queue segment and appends it to the persistent file
func (seg *qSegment) add(object interface{}) error {
// This is heavy-handed but its safe
seg.mutex.Lock()
defer seg.mutex.Unlock()
// Encode the struct to a byte buffer
var buff bytes.Buffer
enc := gob.NewEncoder(&buff)
if err := enc.Encode(object); err != nil {
return errors.Wrap(err, "error gob encoding object")
}
// Count the bytes stored in the byte buffer
// and store the count into a 4-byte byte array
buffLen := len(buff.Bytes())
buffLenBytes := make([]byte, 4)
binary.LittleEndian.PutUint32(buffLenBytes, uint32(buffLen))
// Write the 4-byte buffer length first
if _, err := seg.file.Write(buffLenBytes); err != nil {
return errors.Wrapf(err, "failed to write object length to segment %d", seg.number)
}
// Then write the buffer bytes
if _, err := seg.file.Write(buff.Bytes()); err != nil {
return errors.Wrapf(err, "failed to write object to segment %d", seg.number)
}
// Rebuild object from buffer to prevent underlying data conflict
objectDup := seg.objectBuilder()
if err := gob.NewDecoder(&buff).Decode(objectDup); err != nil {
return errors.Wrap(err, "error gob decode object")
}
seg.objects = append(seg.objects, objectDup)
// Possibly force writes to disk
return seg._sync()
}
// size returns the number of objects in this segment.
// The size does not include items that have been removed.
func (seg *qSegment) size() int {
// This is heavy-handed but its safe
seg.mutex.Lock()
defer seg.mutex.Unlock()
return len(seg.objects)
}
// sizeOnDisk returns the number of objects in memory plus removed objects. This
// number will match the number of objects still on disk.
// This number is used to keep the file from growing forever when items are
// removed about as fast as they are added.
func (seg *qSegment) sizeOnDisk() int {
// This is heavy-handed but its safe
seg.mutex.Lock()
defer seg.mutex.Unlock()
return len(seg.objects) + seg.removeCount
}
// delete wipes out the queue and its persistent state
func (seg *qSegment) delete() error {
// This is heavy-handed but its safe
seg.mutex.Lock()
defer seg.mutex.Unlock()
if err := seg.file.Close(); err != nil {
return errors.Wrap(err, "unable to close the segment file before deleting")
}
// Delete the storage for this queue
err := os.Remove(seg.filePath())
if err != nil {
return errors.Wrap(err, "error deleting file: "+seg.filePath())
}
// Empty the in-memory slice of objects
seg.objects = seg.objects[:0]
seg.file = nil
return nil
}
func (seg *qSegment) fileName() string {
return fmt.Sprintf("%013d.dque", seg.number)
}
func (seg *qSegment) filePath() string {
return path.Join(seg.dirPath, seg.fileName())
}
// turboOn allows the filesystem to decide when to sync file changes to disk
// Speed is be greatly increased by turning turbo on, however there is some
// risk of losing data should a power-loss occur.
func (seg *qSegment) turboOn() {
seg.turbo = true
}
// turboOff re-enables the "safety" mode that syncs every file change to disk as
// they happen.
func (seg *qSegment) turboOff() error {
if !seg.turbo {
// turboOff is know to be called twice when the first and last ssegments
// are the same.
return nil
}
if err := seg.turboSync(); err != nil {
return err
}
seg.turbo = false
return nil
}
// turboSync does an fsync to disk if turbo is on.
func (seg *qSegment) turboSync() error {
if !seg.turbo {
// When the first and last segments are the same, this method
// will be called twice.
return nil
}
if seg.maybeDirty {
if err := seg.file.Sync(); err != nil {
return errors.Wrap(err, "unable to sync file changes.")
}
seg.syncCount++
seg.maybeDirty = false
}
return nil
}
// _sync must only be called by the add and remove methods on qSegment.
// Only syncs if turbo is off
func (seg *qSegment) _sync() error {
if seg.turbo {
// We do *not* force a sync if turbo is on
// We just mark it maybe dirty
seg.maybeDirty = true
return nil
}
if err := seg.file.Sync(); err != nil {
return errors.Wrap(err, "unable to sync file changes in _sync method.")
}
seg.syncCount++
seg.maybeDirty = false
return nil
}
// close is used when this is the last segment, but is now full, so we are
// creating a new last segment.
// This should only be called if this segment is not also the first segment.
func (seg *qSegment) close() error {
if err := seg.file.Close(); err != nil {
return errors.Wrapf(err, "unable to close segment file %s.", seg.fileName())
}
return nil
}
// newQueueSegment creates a new, persistent segment of the queue
func newQueueSegment(dirPath string, number int, turbo bool, builder func() interface{}) (*qSegment, error) {
seg := qSegment{dirPath: dirPath, number: number, turbo: turbo, objectBuilder: builder}
if !dirExists(seg.dirPath) {
return nil, errors.New("dirPath is not a valid directory: " + seg.dirPath)
}
if fileExists(seg.filePath()) {
return nil, errors.New("file already exists: " + seg.filePath())
}
// Create the file in append mode
var err error
seg.file, err = os.OpenFile(seg.filePath(), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return nil, errors.Wrapf(err, "error creating file: %s.", seg.filePath())
}
// Leave the file open for future writes
return &seg, nil
}
// openQueueSegment reads an existing persistent segment of the queue into memory
func openQueueSegment(dirPath string, number int, turbo bool, builder func() interface{}) (*qSegment, error) {
seg := qSegment{dirPath: dirPath, number: number, turbo: turbo, objectBuilder: builder}
if !dirExists(seg.dirPath) {
return nil, errors.New("dirPath is not a valid directory: " + seg.dirPath)
}
if !fileExists(seg.filePath()) {
return nil, errors.New("file does not exist: " + seg.filePath())
}
// Load the items into memory
if err := seg.load(); err != nil {
return nil, errors.Wrap(err, "unable to load queue segment in "+dirPath)
}
// Re-open the file in append mode
var err error
seg.file, err = os.OpenFile(seg.filePath(), os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
return nil, errors.Wrap(err, "error opening file: "+seg.filePath())
}
// Leave the file open for future writes
return &seg, nil
}