我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用pysam.TabixFile()。
def __init__(self, stream, path=None, tabix_path=None, record_checks=None, parsed_samples=None): #: stream (``file``-like object) to read from self.stream = stream #: optional ``str`` with the path to the stream self.path = path #: optional ``str`` with path to tabix file self.tabix_path = tabix_path #: checks to perform on records, can contain 'FORMAT' and 'INFO' self.record_checks = tuple(record_checks or []) #: if set, list of samples to parse for self.parsed_samples = parsed_samples #: the ``pysam.TabixFile`` used for reading from index bgzip-ed VCF; #: constructed on the fly self.tabix_file = None # the iterator through the Tabix file to use self.tabix_iter = None #: the parser to use self.parser = parser.Parser(stream, self.path, self.record_checks) #: the Header self.header = self.parser.parse_header(parsed_samples)
def __init__(self, filename): self.tabix_file_name = filename # TODO: catch TABIX exceptions self.tabix = pysam.TabixFile(filename)
def IndexedVariantFileReader(phenocode): filepath = common_filepaths['pheno_gz'](phenocode) with read_gzip(filepath) as f: reader = csv.reader(f, dialect='pheweb-internal-dialect') colnames = next(reader) assert colnames[0].startswith('#') colnames[0] = colnames[0][1:] for field in colnames: assert field in conf.parse.per_variant_fields or field in conf.parse.per_assoc_fields, (field) colidxs = {field: colnum for colnum, field in enumerate(colnames)} with pysam.TabixFile(filepath, parser=None) as tabix_file: yield _ivfr(tabix_file, colidxs)
def context(self): with pysam.TabixFile(self._filepath, parser=None) as tabix_file: yield _mr(tabix_file, self._colidxs, self._colidxs_for_pheno, self._info_for_pheno)
def fetch(self, chrom_or_region, begin=None, end=None): """Jump to the start position of the given chromosomal position and limit iteration to the end position :param str chrom_or_region: name of the chromosome to jump to if begin and end are given and a samtools region string otherwise (e.g. "chr1:123,456-123,900"). :param int begin: 0-based begin position (inclusive) :param int end: 0-based end position (exclusive) """ if begin is not None and end is None: raise ValueError('begin and end must both be None or neither') # close tabix file if any and is open if self.tabix_file and not self.tabix_file.closed: self.tabix_file.close() # open tabix file if not yet open if not self.tabix_file or self.tabix_file.closed: self.tabix_file = pysam.TabixFile( filename=self.path, index=self.tabix_path) # jump to the next position if begin is None: self.tabix_iter = self.tabix_file.fetch(region=chrom_or_region) else: self.tabix_iter = self.tabix_file.fetch( reference=chrom_or_region, start=begin, end=end) return self
def read_tabix(fp, chrom=None, start=None, end=None): with closing(pysam.TabixFile(fp)) as f: names = list(f.header) or None df = pd.read_csv( io.StringIO('\n'.join(f.fetch(chrom, start, end))), sep='\t', header=None, names=names) return df
def load_fragments(options, sample, dataset, chrom=None, start=None, end=None, usecols=None, min_reads_per_frag=1): if start is not None: if start < 0: raise Exception("start coord is negative: {}:{}-{}".format(chrom, start, end)) if end is not None: if start >= end: raise Exception("end coord is before start: {}:{}-{}".format(chrom, start, end)) readclouds_path = os.path.join( options.results_dir, "CombineReadcloudsStep", "readclouds.{}.{}.tsv.gz".format(sample.name, dataset.id)) tabix = pysam.TabixFile(readclouds_path) if chrom is not None and chrom not in tabix.contigs: print("MISSING:", chrom) return pandas.DataFrame(columns="chrom start_pos end_pos bc num_reads obs_len hap".split()) if usecols is not None and "num_reads" not in usecols: usecols.append("num_reads") s = StringIO.StringIO("\n".join(tabix.fetch(chrom, start, end))) readclouds = pandas.read_table(s, header=None, names=Readcloud._fields, usecols=usecols) readclouds["chrom"] = readclouds["chrom"].astype("string") if min_reads_per_frag > 0: readclouds = readclouds.loc[readclouds["num_reads"]>min_reads_per_frag] return readclouds
def validate(self): assert os.path.exists(self.bam), "missing bam file '{}' for sample '{}' and dataset '{}'".format( self.bam, self.sample.name, self.id) # @staticmethod # def from_longranger_dir(self, longranger_dir): # fragments = os.path.join(longranger_dir, # "PHASER_SVCALLER_CS/PHASER_SVCALLER/_REPORTER/" # "REPORT_SINGLE_PARTITION/fork0/files/fragments.h5") # bam = os.path.join(longranger_dir, # "PHASER_SVCALLER_CS/PHASER_SVCALLER/ATTACH_PHASING/" # "fork0/files/phased_possorted_bam.bam") # phased_fragments = os.path.join(longranger_dir, # "10XSARCOMAC1/PHASER_SVCALLER_CS/PHASER_SVCALLER/" # "_SNPINDEL_PHASER/PHASE_SNPINDELS/fork0/files/" # "fragment_phasing.tsv.gz") # self.validate() # return TenXDataset(bam, fragments, phased_fragments) # def load_phased_fragments(self, chrom=None, start=None, end=None): # columns = ["chrom", "start_pos", "end_pos", "phase_set", "ps_start", # "ps_end", "bc", "h0", "h1", "hmix", "unkn"] # try: # tabix = pysam.TabixFile(self.phased_fragments) # s = StringIO.StringIO("\n".join(tabix.fetch(chrom, start, end))) # frags = pandas.read_table(s) # frags.columns = columns # except (IOError, ValueError): # frags = pandas.DataFrame(columns=columns) # return frags # def load_fragments(self, chrom=None, start=None, end=None): # tabix = pysam.TabixFile() # try: # fragments = utilities.read_data_frame(self.fragments) # goodbcs = utilities.get_good_barcodes(fragments) # fragments = fragments.loc[fragments["bc"].isin(goodbcs)] # # fragments = fragments.loc[fragments["num_reads"]>5] # if chrom is not None: # fragments = fragments.loc[fragments["chrom"]==chrom] # return fragments # except: # logging.exception("Unable to load fragments from fragments file " # "'{}'".format(self.fragments)) # raise