Skip to content

Commit

Permalink
Ignore bits that were introduced as padding in transformUnitSize (f…
Browse files Browse the repository at this point in the history
…ixes wierkstudio#40)

Base64 embeds bytes into 6 bit units. It takes two units to properly encode a single byte. If you then decode this, you can conclude that the 4 additional bits contain no information. (Unless the input violates our assumption that it is a base64 encoding.)

You can conclude that if $y = \lceil xs_{input} / s_{output}\rceil$ with x an integer, then $x <= \lfloor ys_{output} / s_{input}\rfloor$. This commit checks every added unit if it is the last, and doesn't add the null byte if it is. It might be more efficient to just use the larger array and slice off the null byte when it exists.

It can be impossible to determine a true zero byte from padding if `inputSize < outputSize`, but for binary-to-text that requires an alphabet of more than 256 characters.

Signed-off-by: anderium <33520919+anderium@users.noreply.github.com>
  • Loading branch information
anderium committed Oct 15, 2024
1 parent 534ff1a commit 9faecd3
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 6 deletions.
4 changes: 2 additions & 2 deletions extensions/essentials/src/binary-slice.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ const execute: OperationExecuteExport = (request) => {

// Transform the array of bytes to an array of bits and slice them
// precisely to create the bit slice
const bitSlice = transformUnitSize(byteSlice, 8, 1).slice(
const bitSlice = transformUnitSize(byteSlice, 8, 1, true).slice(
bitSliceArgs.start - byteSliceStart * 8,
bitSliceArgs.end - byteSliceStart * 8
)

// Fill up the first byte with zero bits and turn the bits back into bytes
const paddingBits = bitSlice.length % 8 > 0 ? 8 - bitSlice.length % 8 : 0
const sliceBitsPadded = new Array(paddingBits).fill(0).concat(bitSlice)
const sliceBytes = transformUnitSize(sliceBitsPadded, 1, 8)
const sliceBytes = transformUnitSize(sliceBitsPadded, 8, 1, false)
slice = new Uint8Array(sliceBytes).buffer
break
}
Expand Down
4 changes: 2 additions & 2 deletions extensions/essentials/src/binary-to-text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ const execute: OperationExecuteExport = (request) => {
))

// Transform unit size (encode)
const encodedUnits = transformUnitSize(units, inUnitSize, outUnitSize)
const encodedUnits = transformUnitSize(units, inUnitSize, outUnitSize, true)
const encodedCodePoints = encodedUnits.map(unit => alphabet[unit])

// Apply padding
Expand Down Expand Up @@ -195,7 +195,7 @@ const execute: OperationExecuteExport = (request) => {
encodedUnits = encodedUnits.slice(0, j)

// Transform unit size (decode)
const units = transformUnitSize(encodedUnits, outUnitSize, inUnitSize)
const units = transformUnitSize(encodedUnits, inUnitSize, outUnitSize, false)
const buffer = Uint8Array.from(units).buffer

// Add an issue if foreign characters were encountered
Expand Down
18 changes: 16 additions & 2 deletions extensions/essentials/src/lib/binary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ export const isBigEndianEnvironment = (): boolean =>
export const transformUnitSize = (
inputUnits: number[],
inUnitSize: number,
outUnitSize: number
outUnitSize: number,
forward: boolean
): number[] => {
if (inUnitSize === outUnitSize) {
return inputUnits
Expand All @@ -84,11 +85,18 @@ export const transformUnitSize = (
return []
}

// Some bits may not hold any information when decoding, see issue #40
if (!forward) {
[inUnitSize, outUnitSize] = [outUnitSize, inUnitSize]
}

const commonSize = lcm(inUnitSize, outUnitSize)
const inUnits = commonSize / inUnitSize
const outUnits = commonSize / outUnitSize

const outputUnitsLength = Math.ceil((outUnits * inputUnits.length) / inUnits)
const outputUnitsLength = forward
? Math.ceil((outUnits * inputUnits.length) / inUnits)
: Math.floor((outUnits * inputUnits.length) / inUnits)
const outputUnits = new Array<number>(outputUnitsLength)

let remainingInBits, inUnit, moveBits
Expand All @@ -104,6 +112,12 @@ export const transformUnitSize = (
// If the current out unit is full, move to the next one
if (remainingOutBits === 0) {
outputUnits[o++] = outUnit
// If the remaining input bits contain no information, don't add a null unit (only occurs when decoding)
// TODO: This works, but I, anderium, don't like that this `if` executes on each iteration.
if (o === outputUnitsLength) {
o--
break
}
outUnit = 0
remainingOutBits = outUnitSize
}
Expand Down

0 comments on commit 9faecd3

Please sign in to comment.