我们从Python开源项目中,提取了以下8个代码示例,用于说明如何使用pysam.faidx()。
def check_fasta(fa_f, pysam_flag=True): if not os.path.isfile(fa_f + '.fai'): pysam.faidx(fa_f) if pysam_flag: # return pysam FastaFile object fa = pysam.FastaFile(fa_f) return fa else: # return fasta file path return fa_f
def chrom_length(fasta_in): """ Compute chromosome lengths of fasta file and store them into a file. More about the .fai file format can be found here: http://www.htslib.org/doc/faidx.html Parameters ---------- fasta_in : str Path to genome FASTA file (can be .gz). Returns ------- str Absolute path to output file. """ iCount.log_inputs(LOGGER, level=logging.INFO) temp = iCount.files.decompress_to_tempfile(fasta_in) pysam.faidx(temp) # pylint: disable=no-member fai_file = os.path.abspath(fasta_in + '.fai') shutil.move(temp + '.fai', fai_file) LOGGER.info('Fai file saved to : %s', fai_file) return fai_file
def get_sequence(self, locus): if self.genome_path is None: tprint('MSILocusLoader error: Can not use .get_sequence() without' + ' specifying the path to the reference genome!') exit(1) sequence = [] for subseq in pysam.faidx(self.genome_path, locus)[1:]: sequence.append(subseq.strip()) return ''.join(sequence).upper() # end .get_sequence() # end MSILocusLoader class definition.
def index_fasta(infile): if os.path.isfile(infile): pass else: print >>sys.stderr, "Indexing " + infile + ' ...', pysam.faidx(infile) print >>sys.stderr, "Done!" #============================================================================== # transfer the .bed file into a .fasta file #==============================================================================
def bed_to_fasta(inbed,refgenome): '''extract features of sequence from bed line''' transtab = string.maketrans("ACGTNX","TGCANX") mRNA_seq = '' if inbed.strip(): try: fields = inbed.split() chrom = fields[0] tx_start = int( fields[1] ) geneName = fields[3] strand = fields[5].replace(" ","_") exon_starts = map(int, fields[11].rstrip( ',\n' ).split( ',' ) ) exon_starts = map((lambda x: x + tx_start ), exon_starts) exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) ) exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends); except: print >>sys.stderr,"Wrong format!" + inbed return None for st,end in zip(exon_starts, exon_ends): exon_coord = chrom + ':' + str(st +1) + '-' + str(end) tmp = pysam.faidx(refgenome,exon_coord) mRNA_seq += ''.join([i.rstrip('\n\r') for i in tmp[1:]]) if strand =='-': mRNA_seq = mRNA_seq.upper().translate(transtab)[::-1] return (geneName, mRNA_seq) #============================================================================== # transfer multiple-line fasta format into two-line fasta format #==============================================================================
def run(referenceset, fastq, gff, fasta, contigset, alignmentset, options, log_level): #'--log-file foo.log', #'--verbose', #'--debug', # requires 'ipdb' #'-j NWORKERS', #'--algorithm quiver', #'--diploid', # binary #'--minConfidence 40', #'--minCoverage 5', #'--alignmentSetRefWindows', cmd = "variantCaller --log-level {log_level} {options} --referenceFilename {referenceset} -o {fastq} -o {gff} -o {fasta} {alignmentset}" system(cmd.format(**locals())) try: say('Converting fasta {!r} to contigset {!r}'.format(fasta, contigset)) # Convert to contigset.xml import pysam pysam.faidx(fasta) # pylint: disable=no-member # I do not know why pylint does not see this defined. ds = ContigSet(fasta, strict=True) ds.write(contigset, relPaths=True) say('Successfully wrapped fasta {!r} in contigset {!r}'.format(fasta, contigset)) except Exception: say(traceback.format_exc()) say('Skipping conversion to contigset.')