Skip to content

Commit

Permalink
Attempt to infer file format from contents when extension isn't recog…
Browse files Browse the repository at this point in the history
…nized
  • Loading branch information
jrobinso committed Jul 19, 2024
1 parent 28d1ca8 commit b73cc78
Show file tree
Hide file tree
Showing 15 changed files with 521 additions and 236 deletions.
3 changes: 1 addition & 2 deletions js/bam/bamSource.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import BamWebserviceReader from "./bamWebserviceReader.js"
import HtsgetBamReader from "../htsget/htsgetBamReader.js"
import CramReader from "../cram/cramReader.js"
import {isDataURL} from "../util/igvUtils.js"
import * as TrackUtils from "../util/trackUtils.js"
import {StringUtils} from "../../node_modules/igv-utils/src/index.js"

class BamSource {
Expand Down Expand Up @@ -59,7 +58,7 @@ class BamSource {
} else {
if (!this.config.indexURL && config.indexed !== false) {
if (StringUtils.isString(this.config.url)) {
const inferIndexPath = TrackUtils.inferIndexPath(this.config.url, "bai")
const inferIndexPath = inferIndexPath(this.config.url, "bai")
if (inferIndexPath) {
console.error(`Warning: no indexURL specified for ${this.config.url}. Guessing ${inferIndexPath}`)
this.config.indexURL = inferIndexPath
Expand Down
15 changes: 4 additions & 11 deletions js/browser.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import GenomeUtils from "./genome/genomeUtils.js"
import ReferenceFrame, {createReferenceFrameList} from "./referenceFrame.js"
import {createColumn, doAutoscale, getElementAbsoluteHeight, getFilename} from "./util/igvUtils.js"
import {createViewport} from "./util/viewportUtils.js"
import {defaultSequenceTrackOrder} from './sequenceTrack.js'
import {bppSequenceThreshold, defaultSequenceTrackOrder} from './sequenceTrack.js'
import version from "./version.js"
import FeatureSource from "./feature/featureSource.js"
import {defaultNucleotideColors} from "./util/nucleotideColors.js"
Expand Down Expand Up @@ -59,11 +59,11 @@ import Genome from "./genome/genome.js"
import {setDefaults} from "./igv-create.js"
import {trackViewportPopoverList} from './trackViewport.js'
import TrackBase from "./trackBase.js"
import {bppSequenceThreshold} from "./sequenceTrack.js"
import {loadGenbank} from "./gbk/genbankParser.js"
import igvCss from "./embedCss.js"
import {sampleInfoTileWidth, sampleInfoTileXShim} from "./sample/sampleInfoConstants.js"
import QTLSelections from "./qtl/qtlSelections.js"
import {inferFileFormat, inferFileFormatFromContents} from "./util/fileFormatUtils.js"


// css - $igv-scrollbar-outer-width: 14px;
Expand Down Expand Up @@ -1206,15 +1206,8 @@ class Browser {
} else if (config.fastaURL) {
config.format = "fasta" // by definition
} else {
let filename = config.filename
if (!filename) {
filename = await getFilename(url)
}

const format = TrackUtils.inferFileFormat(filename)
if ("tsv" === format) {
config.format = await TrackUtils.inferFileFormatFromHeader(config)
} else if (format) {
const format = await inferFileFormat(config)
if (format) {
config.format = format
} else {
if (config.sourceType === "htsget") {
Expand Down
17 changes: 14 additions & 3 deletions js/feature/featureFileReader.js
Original file line number Diff line number Diff line change
Expand Up @@ -172,16 +172,27 @@ class FeatureFileReader {
}

} else {
// If this is a non-indexed file we will load all features in advance
const options = buildOptions(this.config)
let data = await igvxhr.loadByteArray(this.config.url, options)

let data

if (this.config._filecontents) {
// In rare instances the entire file must be read and decoded to determine the file format.
// When this occurs the file contents are temporarily stashed to prevent needing to read the file twice
data = this.config._filecontents
delete this.config._filecontents
} else {
// If this is a non-indexed file we will load all features in advance
const options = buildOptions(this.config)
data = await igvxhr.loadByteArray(this.config.url, options)
}

// If the data size is < max string length decode entire string with TextDecoder. This is much faster
// than decoding by line
if (data.length < MAX_STRING_LENGTH) {
data = new TextDecoder().decode(data)
}


let dataWrapper = getDataWrapper(data)
this.header = await this.parser.parseHeader(dataWrapper)

Expand Down
13 changes: 7 additions & 6 deletions js/feature/featureParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,21 @@
* THE SOFTWARE.
*/

import * as TrackUtils from '../util/trackUtils.js'
import {decodeBedpe, decodeBedpeDomain, fixBedPE} from './decode/bedpe.js'
import {decodeInteract} from "./decode/interact.js"
import {
decodeBed,
decodeBedGraph,
decodeBedmethyl,
decodeGappedPeak,
decodeGenePred,
decodeGenePredExt,
decodeNarrowPeak,
decodePeak,
decodeReflat,
decodeRepeatMasker,
decodeSNP,
decodeWig,
decodeBedmethyl,
decodeGappedPeak,
decodeNarrowPeak
decodeWig
} from "./decode/ucsc.js"
import {decodeGFF3, decodeGTF} from "./gff/gff.js"
import {decodeFusionJuncSpan} from "./decode/fusionJuncSpan.js"
Expand All @@ -48,6 +47,8 @@ import {decodeGcnv} from "../gcnv/gcnvDecoder.js"
import DecodeError from "./decode/decodeError.js"
import GFFHelper from "./gff/gffHelper.js"

import {getFormat} from "../util/fileFormats.js"

/**
* Parser for column style (tab delimited, etc) text file formats (bed, gff, vcf, etc).
*
Expand Down Expand Up @@ -319,7 +320,7 @@ class FeatureParser {
this.delimiter = "\t"
break
default:
const customFormat = TrackUtils.getFormat(format)
const customFormat = getFormat(format)
if (customFormat !== undefined) {
this.decode = decodeCustom
this.header.customFormat = customFormat
Expand Down
23 changes: 23 additions & 0 deletions js/gwas/gwasParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,29 @@ class GWASParser {
}
return allFeatures
}

/**
* Test first line to see if this is a GWAS file. Used to determine file format for the case of generic
* extensions such as "tsv"
* @param firstLine
*/
static isGWAS(firstLine) {
const tokens = firstLine.split('\t')
if (tokens.length < 5) {
return false
}
const requiredHeaders =
[
['chr', 'chromosome', 'chr_id', 'chrom'],
['bp', 'pos', 'position', 'chr_pos', 'chromEnd'],
['p', 'pval', 'p-value', 'p.value']
]
for (let h of requiredHeaders) {
if (!tokens.some(t => h.includes(t.toLowerCase()))) return false
}
return true
}

}

class GWASFeature {
Expand Down
3 changes: 2 additions & 1 deletion js/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ import {createBrowser, createTrack, removeAllBrowsers, removeBrowser, visibility
import embedCss from "./embedCss.js"
import version from "./version.js"
import * as TrackUtils from "./util/trackUtils.js"
import {registerFileFormats} from "./util/trackUtils.js"
import {igvxhr} from "../node_modules/igv-utils/src/index.js"
import {registerTrackClass, registerTrackCreatorFunction} from "./trackFactory.js"
import TrackBase from "./trackBase.js"
import Hub from "./ucsc/ucscHub.js"
import Browser from "./browser.js"

import {registerFileFormats} from "./util/fileFormats.js"

const setApiKey = igvxhr.setApiKey

function setGoogleOauthToken(accessToken) {
Expand Down
28 changes: 27 additions & 1 deletion js/qtl/qtlParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class QTLParser {
async parseHeader(dataWrapper) {

const config = this.config
if(config.delimiter) this.delimiter = config.delimiter
if (config.delimiter) this.delimiter = config.delimiter

const headerLine = await dataWrapper.nextLine()
const columns = this.parseHeaderLine(headerLine)
Expand Down Expand Up @@ -180,6 +180,32 @@ class QTLParser {
}
return allFeatures
}


/**
* Test first line to see if this is a QTL file. Used to determine file format for the case of generic
* extensions such as "tsv"
* @param firstLine
*/
static isQTL(firstLine) {
const tokens = firstLine.split('\t')
if (tokens.length < 5) {
return false
}
const requiredHeaders =
[
['chr', 'chromosome', 'chr_id', 'chrom'],
['bp', 'pos', 'position', 'chr_pos', 'chromEnd'],
['p', 'pval', 'p-value', 'p.value'],
['rsid', 'variant', 'snp'],
['phenotype', 'gene', 'gene_id', 'molecular_trait_id']
]
for (let h of requiredHeaders) {
if (!tokens.some(t => h.includes(t.toLowerCase()))) return false
}
return true
}

}


Expand Down
4 changes: 2 additions & 2 deletions js/roi/ROISet.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import {FileUtils, StringUtils} from '../../node_modules/igv-utils/src/index.js'
import FeatureSource from '../feature/featureSource.js'
import {appleCrayonRGBA} from '../util/colorPalletes.js'
import {computeWGFeatures} from "../feature/featureUtils.js"
import * as TrackUtils from "../util/trackUtils.js"
import {inferFileFormatFromName} from "../util/fileFormatUtils.js"


const appleCrayonColorName = 'nickel'
Expand Down Expand Up @@ -38,7 +38,7 @@ class ROISet {
config.format = config.format.toLowerCase()
} else {
const filename = FileUtils.getFilename(config.url)
config.format = TrackUtils.inferFileFormat(filename)
config.format = inferFileFormatFromName(filename)
}
this.featureSource = config.featureSource || FeatureSource(config, genome)
}
Expand Down
Loading

0 comments on commit b73cc78

Please sign in to comment.