Skip to content

Commit

Permalink
handle failed checksum in various parts of file
Browse files Browse the repository at this point in the history
  • Loading branch information
l-k- committed Dec 4, 2023
1 parent 12b343b commit 9e6ed53
Showing 1 changed file with 145 additions and 31 deletions.
176 changes: 145 additions & 31 deletions R/readAxivity.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
if (length(configtz) == 0) configtz = desiredtz
blockBytes = 512
headerBytes = 1024
maxAllowedCorruptBlocks = 20 # max number consecutive blocks with a failed checksum that we'll tolerate

# Credits: The original version of this code developed outside GitHub was
# contributed by Dr. Evgeny Mirkes (Leicester University, UK)
#========================================================================
Expand Down Expand Up @@ -75,6 +77,28 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
return(NULL)
}

# sampling rate in one of file format U8 at offset 24
samplerate_dynrange = readBin(block[25], integer(), size = 1, signed = FALSE)

if (samplerate_dynrange != 0) { # Very old files that have zero at offset 24 don't have a checksum
checksum = sum(readBin(block, n = 256,
integer(),
size = 2,
signed = FALSE,
endian = "little"))
checksum = checksum %% 65536 # 65536 = 2^16; the checksum is calculated as a 16-bit integer
if (checksum != 0) {
# Checksum doesn't match. This means some bits in this block got corrupted.
# We don't know which, so we can't trust this block. We skip it, and impute it later.
rawdata_list = list(
struc = struc,
parameters = parameters,
checksum_pass = FALSE
)
return(invisible(rawdata_list))
}
}

idstr = readChar(block, 2, useBytes = TRUE)
if (idstr != "AX") {
stop("Packet header is incorrect. First two characters must be AX.")
Expand Down Expand Up @@ -114,22 +138,6 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
} else {
battery = 0
}
# sampling rate in one of file format U8 at offset 24
samplerate_dynrange = readBin(block[25], integer(), size = 1, signed = FALSE)

checksum_pass = TRUE
if (samplerate_dynrange != 0) { # Very old files that have zero at offset 24 don't have a checksum
# Perform checksum
checksum = sum(readBin(block, n = 256,
integer(),
size = 2,
signed = FALSE,
endian = "little"))
checksum = checksum %% 65536 # equals 2^16 the checksum is calculated on a 16bit integer
if (checksum != 0) {
checksum_pass = FALSE
}
}

# offset 25, per documentation:
# "top nibble: number of axes, 3=Axyz, 6=Gxyz/Axyz, 9=Gxyz/Axyz/Mxyz;
Expand Down Expand Up @@ -244,7 +252,7 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
length = blockLength,
struc = struc,
parameters = parameters,
checksum_pass = checksum_pass,
checksum_pass = TRUE,
blockID = blockID
)
if (complete) {
Expand Down Expand Up @@ -302,14 +310,30 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
accrange = bitwShiftR(16, (bitwShiftR(samplerate_dynrange, 6)))
version = readBin(block[42], integer(), size = 1, signed = FALSE) #offset 41

# Read the first data block without data
datas = readDataBlock(fid, complete = FALSE)
if (is.null(datas)) {
stop("Error reading the first data block.")
# Read the first data block without data.
# Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
is_corrupt = TRUE
for (ii in 1:maxAllowedCorruptBlocks+1) {
datas = readDataBlock(fid, complete = FALSE)

if (is.null(datas)) {
stop("Error reading the first data block.")
}

if (datas$checksum_pass) {
is_corrupt = FALSE
break
}

warning("Skipping corrupt block #", ii)
}
if (is_corrupt) {
stop("Error reading file. The first ", maxAllowedCorruptBlocks+1, " blocks are corrupt.")
}

if (frequency_header != datas$frequency) {
warning("Inconsistent value of measurement frequency: there is ",
frequency_header, " in header and ", datas$frequency, " in the first data block ")
frequency_header, " in header and ", datas$frequency, " in the first data block. ")
}

blockLength = datas$length # number of samples in a block
Expand Down Expand Up @@ -340,6 +364,9 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
on.exit({
close(fid)
})

QClog = NULL # Initialise log of data quality issues

#############################################################################
# read header
if (is.null(header)) {
Expand Down Expand Up @@ -368,14 +395,59 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
struc = list(0,0L,0)
if (end < numDBlocks) { # the end block isn't part of the data we'll read, but its start will be our ending timestamp
seek(fid, headerBytes + blockBytes * end, origin = 'start')
endBlock = readDataBlock(fid, struc = struc)

# Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
# Skip forward, so we'll end up reading more blocks than requested.
# This is important because when corrupt blocks are in the beginning of the requested interval,
# we also skip forward, ending up with fewer blocks than requested.
# for example, if we have a file with corrupt blocks 8-12, and we read the file 10 blocks at a time,
# then the 1st request will end up with blocks 1-12, and the 2nd request with blocks 13-20,
# and between the two requests, all 20 blocks will be accounted for.
is_corrupt = TRUE
for (ii in end : min(numDBlocks-1, end+maxAllowedCorruptBlocks+1)) {
endBlock = readDataBlock(fid, struc = struc)

if (endBlock$checksum_pass) {
is_corrupt = FALSE
break
}

warning("Skipping corrupt end block #", ii)
end = end + 1
}
if (is_corrupt && ii == end+maxAllowedCorruptBlocks+1) {
stop("Error reading file. The last ", maxAllowedCorruptBlocks+1, " blocks are corrupt.")
}
endTimestamp = as.numeric(endBlock$start)
} else {
}

if (end == numDBlocks) {
# end == numDBlocks, meaning we'll be reading all the remaining blocks.
# There is no block #numDBlocks, so we can't get the ending timestamp from the start of that block.
# Instead read the very last block of the file, and project what the ending timestamp should be.
seek(fid, headerBytes + blockBytes * (end-1), origin = 'start')
lastBlock = readDataBlock(fid, struc = struc)
# Instead read the very last block of the file (if the last block is corrupt, fing the last non-corrupt one),
# then project what the ending timestamp should be.

# Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
# SInce we are at the last block, we have to go backwards.
is_corrupt = TRUE
for (ii in (end-1) : max(start, end-maxAllowedCorruptBlocks-1)) {
seek(fid, headerBytes + blockBytes * ii, origin = 'start')
lastBlock = readDataBlock(fid, struc = struc)

if (lastBlock$checksum_pass) {
is_corrupt = FALSE
break
}

warning("Skipping corrupt end block #", ii)
end = end - 1
}
if (is_corrupt) {
if (start == end) {
stop("Error reading file. All requested blocks are corrupt.")
}
stop("Error reading file. The last ", maxAllowedCorruptBlocks+1, " blocks are corrupt.")
}
# the end timestamp should fall right after the actual very last timestamp of the file
endTimestamp = as.numeric(lastBlock$start) + blockLength * step
# now pad it generously in case there are gaps in the last block
Expand All @@ -386,7 +458,36 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
# Reinitiate file and skip header as well as the initial start-1 blocks
seek(fid, headerBytes + blockBytes * start, origin = 'start')
pos = 1 # position of the first element to complete in data
prevRaw = readDataBlock(fid, struc = struc)

# Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
is_corrupt = TRUE
for (ii in 1:min((end-start),maxAllowedCorruptBlocks+1)) {
prevRaw = readDataBlock(fid, struc = struc)

if (prevRaw$checksum_pass) {
is_corrupt = FALSE
break
}
QClog = rbind(QClog, data.frame(checksum_pass = FALSE,
blockID_current = start, # the actual block ID wasn't read b/c the block is corrupt
blockID_next = start+1,
start = 0,
end = 0,
blockLengthSeconds = 0,
frequency_blockheader = 0,
frequency_observed = 0,
imputed = FALSE))

warning("Skipping corrupt start block #", start)
start = start + 1
}
if (is_corrupt) {
if (start == end) {
stop("Error reading file. All requested blocks are corrupt.")
}
stop("Error reading file. The first ", maxAllowedCorruptBlocks+1, " blocks are corrupt.")
}

if (is.null(prevRaw)) {
return(invisible(list(header = header, data = NULL)))
}
Expand Down Expand Up @@ -414,8 +515,6 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
rawTime = vector(mode = "numeric", blockLength + 2)
rawPos = 1

QClog = NULL # Initialise log of data quality issues

# Read the data
for (ii in (start+1):end) {
if (ii == numDBlocks) {
Expand All @@ -431,6 +530,22 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
}
else { # read a new block
raw = readDataBlock(fid, struc = struc, parameters = prevRaw$parameters)
if (!raw$checksum_pass) {
# If the checksum doesn't match, we can't trust any of this block's data,
# so we have to completely skip the block.
# Depending on the nature of the faulty block, the data for the time period it represented
# will probably get imputed later, once we encounter a block with a valid checksum.
QClog = rbind(QClog, data.frame(checksum_pass = FALSE,
blockID_current = ii, # the actual block ID wasn't read b/c the block is corrupt
blockID_next = ii + 1,
start = 0,
end = 0,
blockLengthSeconds = 0,
frequency_blockheader = 0,
frequency_observed = 0,
imputed = FALSE))
next
}

if (is.null(raw)) {
# this shouldn't happen
Expand Down Expand Up @@ -472,7 +587,6 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
if ((ii < numDBlocks &&
raw$blockID != 0 &&
raw$blockID - prevRaw$blockID != 1) ||
prevRaw$checksum_pass == FALSE ||
frequency_bias > frequency_tol) {
# Log and impute this event
doQClog = TRUE
Expand Down

0 comments on commit 9e6ed53

Please sign in to comment.