Skip to content

Commit

Permalink
Misc
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Oct 26, 2023
1 parent 20bae27 commit 377568e
Show file tree
Hide file tree
Showing 14 changed files with 1,431 additions and 1,216 deletions.
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

## [1.1.17](https://github.com/GMOD/bam-js/compare/v1.1.16...v1.1.17) (2022-07-18)

- Bump devDeps and generic-filehandle to 3.0.0
- Bump devDeps and generic-filehandle2 to 3.0.0

## [1.1.16](https://github.com/GMOD/bam-js/compare/v1.1.15...v1.1.16) (2022-03-30)

Expand Down Expand Up @@ -289,7 +289,7 @@

## [1.0.18](https://github.com/GMOD/bam-js/compare/v1.0.17...v1.0.18) (2019-05-01)

- Bump generic-filehandle to 1.0.9 to fix error with using native fetch (global
- Bump generic-filehandle2 to 1.0.9 to fix error with using native fetch (global
fetch needed to be bound)
- Bump abortable-promise-cache to 1.0.1 version to fix error with using native
fetch and abort signals
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ var records = await t.getRecordsForRange('ctgA', 0, 50000)
```

The `bamPath` argument only works on nodejs. In the browser, you should pass
`bamFilehandle` with a generic-filehandle e.g. `RemoteFile`
`bamFilehandle` with a generic-filehandle2 e.g. `RemoteFile`

```typescript
const { RemoteFile } = require('generic-filehandle')
const { RemoteFile } = require('generic-filehandle2')
const bam = new BamFile({
bamFilehandle: new RemoteFile('yourfile.bam'), // or a full http url
baiFilehandle: new RemoteFile('yourfile.bam.bai'), // or a full http url
Expand Down Expand Up @@ -76,7 +76,7 @@ The BAM class constructor accepts arguments
yielding

Note: filehandles implement the Filehandle interface from
https://www.npmjs.com/package/generic-filehandle. This module offers the path
https://www.npmjs.com/package/generic-filehandle2. This module offers the path
and url arguments as convenience methods for supplying the LocalFile and
RemoteFile

Expand Down Expand Up @@ -112,7 +112,7 @@ for await (const chunk of file.streamRecordsForRange(
The `getRecordsForRange` simply wraps this process by concatenating chunks into
an array

### async getHeader(opts: {....anything to pass to generic-filehandle opts})
### async getHeader(opts: {....anything to pass to generic-filehandle2 opts})

This obtains the header from `HtsgetFile` or `BamFile`. Retrieves BAM file and
BAI/CSI header if applicable, or API request for refnames from htsget
Expand Down
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
"@gmod/bgzf-filehandle": "^1.4.4",
"abortable-promise-cache": "^1.5.0",
"buffer-crc32": "^0.2.13",
"generic-filehandle": "^3.0.0",
"long": "^4.0.0",
"quick-lru": "^4.0.0"
},
Expand Down
75 changes: 30 additions & 45 deletions src/bamFile.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Buffer } from 'buffer'
import crc32 from 'buffer-crc32'
import { unzip, unzipChunkSlice } from '@gmod/bgzf-filehandle'
import { LocalFile, RemoteFile, GenericFilehandle } from 'generic-filehandle'
import { LocalFile, RemoteFile, GenericFilehandle } from 'generic-filehandle2'
import AbortablePromiseCache from 'abortable-promise-cache'
import QuickLRU from 'quick-lru'

Expand Down Expand Up @@ -148,23 +148,21 @@ export default class BamFile {
let buffer
if (ret) {
const s = ret + blockLen
const res = await this.bam.read(Buffer.alloc(s), 0, s, 0, opts)
if (!res.bytesRead) {
throw new Error('Error reading header')
}
buffer = res.buffer.subarray(0, Math.min(res.bytesRead, ret))
buffer = await this.bam.read(s, 0, opts)
} else {
buffer = (await this.bam.readFile(opts)) as Buffer
}

const uncba = await unzip(buffer)
const dv = new DataView(uncba.buffer, uncba.byteOffset, uncba.byteLength)

if (uncba.readInt32LE(0) !== BAM_MAGIC) {
if (dv.getInt32(0, true) !== BAM_MAGIC) {
throw new Error('Not a BAM file')
}
const headLen = uncba.readInt32LE(4)
const headLen = dv.getInt32(4, true)
const decoder = new TextDecoder('utf8')

this.header = uncba.toString('utf8', 8, 8 + headLen)
this.header = decoder.decode(uncba.subarray(8, 8 + headLen))
const { chrToIndex, indexToChr } = await this._readRefSeqs(
headLen + 8,
65535,
Expand Down Expand Up @@ -204,30 +202,21 @@ export default class BamFile {
if (start > refSeqBytes) {
return this._readRefSeqs(start, refSeqBytes * 2, opts)
}
const size = refSeqBytes + blockLen
const { bytesRead, buffer } = await this.bam.read(
Buffer.alloc(size),
0,
refSeqBytes,
0,
opts,
)
if (!bytesRead) {
throw new Error('Error reading refseqs from header')
}
const uncba = await unzip(
buffer.subarray(0, Math.min(bytesRead, refSeqBytes)),
)
const nRef = uncba.readInt32LE(start)
const buffer = await this.bam.read(refSeqBytes, 0, opts)
const uncba = await unzip(buffer)
const dv = new DataView(uncba.buffer, uncba.byteOffset, uncba.byteLength)
const nRef = dv.getInt32(start, true)
const decoder = new TextDecoder('utf8')
let p = start + 4
const chrToIndex: { [key: string]: number } = {}
const indexToChr: { refName: string; length: number }[] = []
for (let i = 0; i < nRef; i += 1) {
const lName = uncba.readInt32LE(p)
const lName = dv.getInt32(p, true)
const refName = this.renameRefSeq(
uncba.toString('utf8', p + 4, p + 4 + lName - 1),
decoder.decode(uncba.subarray(p + 4, p + 4 + lName - 1)),
)
const lRef = uncba.readInt32LE(p + lName + 4)

const lRef = dv.getInt32(p + lName + 4, true)

chrToIndex[refName] = i
indexToChr.push({ refName, length: lRef })
Expand Down Expand Up @@ -387,24 +376,18 @@ export default class BamFile {
return mateFeatPromises.flat()
}

async _readRegion(position: number, size: number, opts: BaseOpts = {}) {
const { bytesRead, buffer } = await this.bam.read(
Buffer.alloc(size),
0,
size,
position,
opts,
)

return buffer.subarray(0, Math.min(bytesRead, size))
}

async _readChunk({ chunk, opts }: { chunk: Chunk; opts: BaseOpts }) {
const buffer = await this._readRegion(
chunk.minv.blockPosition,
const buffer = await this.bam.read(
chunk.fetchedSize(),
chunk.minv.blockPosition,
opts,
)
console.log(
'bamFile',
buffer.length,
buffer.byteLength,
chunk.fetchedSize(),
)

const {
buffer: data,
Expand All @@ -415,7 +398,7 @@ export default class BamFile {
}

async readBamFeatures(
ba: Buffer,
ba: Uint8Array,
cpositions: number[],
dpositions: number[],
chunk: Chunk,
Expand All @@ -424,9 +407,10 @@ export default class BamFile {
const sink = [] as BAMFeature[]
let pos = 0
let last = +Date.now()
const dv = new DataView(ba.buffer, ba.byteOffset, ba.byteLength)

while (blockStart + 4 < ba.length) {
const blockSize = ba.readInt32LE(blockStart)
while (blockStart + 4 < dv.byteLength) {
const blockSize = dv.getInt32(blockStart, true)
const blockEnd = blockStart + 4 + blockSize - 1

// increment position to the current decompressed status
Expand Down Expand Up @@ -470,7 +454,8 @@ export default class BamFile {
chunk.minv.dataPosition +
1
: // must be slice, not subarray for buffer polyfill on web
crc32.signed(ba.slice(blockStart, blockEnd)),
// @ts-expect-error
crc32.signed(ba.subarray(blockStart, blockEnd)),
})

sink.push(feature)
Expand Down
38 changes: 20 additions & 18 deletions src/csi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ export default class CSI extends IndexFile {
return []
}

parseAuxData(bytes: Buffer, offset: number) {
const formatFlags = bytes.readInt32LE(offset)
parseAuxData(bytes: Uint8Array, offset: number) {
const dv = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength)
const formatFlags = dv.getInt32(offset, true)
const coordinateType =
formatFlags & 0x10000 ? 'zero-based-half-open' : '1-based-closed'
const format = (
Expand All @@ -50,14 +51,14 @@ export default class CSI extends IndexFile {
throw new Error(`invalid Tabix preset format flags ${formatFlags}`)
}
const columnNumbers = {
ref: bytes.readInt32LE(offset + 4),
start: bytes.readInt32LE(offset + 8),
end: bytes.readInt32LE(offset + 12),
ref: dv.getInt32(offset + 4, true),
start: dv.getInt32(offset + 8, true),
end: dv.getInt32(offset + 12, true),
}
const metaValue = bytes.readInt32LE(offset + 16)
const metaValue = dv.getInt32(offset + 16, true)
const metaChar = metaValue ? String.fromCharCode(metaValue) : ''
const skipLines = bytes.readInt32LE(offset + 20)
const nameSectionLength = bytes.readInt32LE(offset + 24)
const skipLines = dv.getInt32(offset + 20, true)
const nameSectionLength = dv.getInt32(offset + 24, true)

return {
columnNumbers,
Expand All @@ -78,24 +79,25 @@ export default class CSI extends IndexFile {
async _parse(opts: { signal?: AbortSignal }) {
const buffer = await this.filehandle.readFile(opts)
const bytes = await unzip(buffer)
const dv = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength)

let csiVersion
const m = dv.getUint32(0, true)
// check TBI magic numbers
if (bytes.readUInt32LE(0) === CSI1_MAGIC) {
if (m === CSI1_MAGIC) {
csiVersion = 1
} else if (bytes.readUInt32LE(0) === CSI2_MAGIC) {
} else if (m === CSI2_MAGIC) {
csiVersion = 2
} else {
throw new Error('Not a CSI file')
// TODO: do we need to support big-endian CSI files?
}

this.minShift = bytes.readInt32LE(4)
this.depth = bytes.readInt32LE(8)
this.minShift = dv.getInt32(4, true)
this.depth = dv.getInt32(8, true)
this.maxBinNumber = ((1 << ((this.depth + 1) * 3)) - 1) / 7
const auxLength = bytes.readInt32LE(12)
const auxLength = dv.getInt32(12, true)
const aux = auxLength >= 30 ? this.parseAuxData(bytes, 16) : undefined
const refCount = bytes.readInt32LE(16 + auxLength)
const refCount = dv.getInt32(16 + auxLength, true)

type BinIndex = { [key: string]: Chunk[] }

Expand All @@ -108,20 +110,20 @@ export default class CSI extends IndexFile {
}>(refCount)
for (let i = 0; i < refCount; i++) {
// the binning index
const binCount = bytes.readInt32LE(curr)
const binCount = dv.getInt32(curr, true)
curr += 4
const binIndex: { [key: string]: Chunk[] } = {}
let stats // < provided by parsing a pseudo-bin, if present
for (let j = 0; j < binCount; j++) {
const bin = bytes.readUInt32LE(curr)
const bin = dv.getUint32(curr, true)
curr += 4
if (bin > this.maxBinNumber) {
stats = parsePseudoBin(bytes, curr + 28)
curr += 28 + 16
} else {
firstDataLine = findFirstData(firstDataLine, fromBytes(bytes, curr))
curr += 8
const chunkCount = bytes.readInt32LE(curr)
const chunkCount = dv.getInt32(curr, true)
curr += 4
const chunks = new Array<Chunk>(chunkCount)
for (let k = 0; k < chunkCount; k += 1) {
Expand Down
2 changes: 1 addition & 1 deletion src/indexFile.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { GenericFilehandle } from 'generic-filehandle'
import { GenericFilehandle } from 'generic-filehandle2'
import Chunk from './chunk'
import { BaseOpts } from './util'

Expand Down
Loading

0 comments on commit 377568e

Please sign in to comment.