我们从Python开源项目中,提取了以下10个代码示例,用于说明如何使用pysam.tabix_index()。
def run(argv): if should_run(): args = [ ffi.new('char[]', sites_filepath.encode('utf8')), ffi.new('char[]', common_filepaths['pheno']('*').encode('utf8')), ffi.new('char[]', matrix_gz_tmp_filepath.encode('utf8')) ] lib.cffi_make_matrix(*args) os.rename(matrix_gz_tmp_filepath, matrix_gz_filepath) pysam.tabix_index( filename=matrix_gz_filepath, force=True, seq_col=0, start_col=1, end_col=1 # note: these are 0-based, but `/usr/bin/tabix` is 1-based ) else: print('matrix is up-to-date!')
def convert_VariantFile_to_IndexedVariantFile(vf_path, ivf_path): from .load.cffi._x import ffi, lib make_basedir(ivf_path) tmp_path = get_tmp_path(ivf_path) args = [ ffi.new('char[]', vf_path.encode('utf8')), ffi.new('char[]', tmp_path.encode('utf8')), ffi.new('char[]', b'#'), ] lib.cffi_bgzip_file(*args) os.rename(tmp_path, ivf_path) pysam.tabix_index( filename=ivf_path, force=True, seq_col=0, start_col=1, end_col=1 # note: these are 0-based, but `/usr/bin/tabix` is 1-based )
def main(opts): # read in INDEL mutations indels = pd.read_csv(opts['input'], sep='\t') # pysam tabix uses 1-based coordinates pysam.tabix_index(opts['blacklist'], force=True, seq_col=0, start_col=1, end_col=2) # query black list to find INDELs with no hits non_coding_ixs, coding_ixs = [], [] black_list = pysam.Tabixfile(opts['blacklist']) for i, row in indels.iterrows(): result = black_list.fetch(reference=row['Chromosome'], start=row['Start_Position'], end=row['End_Position']) if not list(result): non_coding_ixs.append(i) else: coding_ixs.append(i) black_list.close() # save non-coding indels indels.ix[non_coding_ixs, :].to_csv(opts['output'], sep='\t', index=False) indels.ix[coding_ixs, :].to_csv(opts['blacklist_output'], sep='\t', index=False)
def indexFile(options): filename=options.output if not options.ensembl is None: sys.stdout.write('Compressing output file... ') sys.stdout.flush() pysam.tabix_compress(filename,filename+'.gz',force=True) sys.stdout.write('OK\n') sys.stdout.write('Indexing output file... ') sys.stdout.flush() pysam.tabix_index(filename+'.gz', seq_col=2, start_col=4, end_col=5, meta_char='#',force=True) sys.stdout.write('OK\n') else: print 'Compressing file...' pysam.tabix_compress(filename,filename+'.gz',force=True) print 'Indexing file...' pysam.tabix_index(filename+'.gz', seq_col=1, start_col=2, end_col=2, meta_char='#',force=True) # Sort records in file
def index_vcf(filename): pysam.tabix_index(filename, preset='vcf', force=True)
def batchTestHelper(self, modFile, pool, refLens): tmpName = tempfile.mkstemp('.tsv')[1] tmpfp = open(tmpName, 'wb') for line in modFile: tmpfp.write(line) tmpfp.close() pysam.tabix_index(tmpName, force=True, seq_col=1, start_col=2, end_col=2, meta_char='#', zerobased=True) tmpName += '.gz' modFile.close() self.chromoID = '1' self.modobj = mod.Mod(tmpName) self.modobj.load(self.chromoID) for tup in pool: bamIter=[Read(tup[0], tup[1]+1, tup[2]) for tup in pool] a = annot.Annotator(self.chromoID, refLens[self.chromoID], self.modobj, bamIter) results = a.execute() for i,res in enumerate(results): self.assertEqual(polish(res[0]),pool[i][3]) self.assertEqual(res[1], pool[i][4]) self.assertEqual(res[2], pool[i][5]) self.assertEqual(res[3], pool[i][6]) self.assertEqual(res[4], pool[i][7]) os.remove(tmpName) os.remove(tmpName+'.tbi')
def indexFile(options): sys.stdout.write('Compressing output file ... ') sys.stdout.flush() pysam.tabix_compress(options.output, options.output + '.gz', force=True) sys.stdout.write('OK\n') sys.stdout.write('Indexing output file ... ') sys.stdout.flush() pysam.tabix_index(options.output + '.gz', seq_col=1, start_col=2, end_col=2, meta_char='#', force=True) sys.stdout.write('OK\n') # Read records from file as a list
def indexFile(options): sys.stdout.write('Compressing output file... ') sys.stdout.flush() pysam.tabix_compress(options.output, options.output + '.gz', force=True) sys.stdout.write('OK\n') sys.stdout.write('Indexing output file... ') sys.stdout.flush() pysam.tabix_index(options.output + '.gz', seq_col=4, start_col=6, end_col=7, meta_char='#', force=True) sys.stdout.write('OK\n') # CHeck if string is a number (integer)
def __init__(self, vcffile=None): self.vcffile = vcffile self.filename = os.path.splitext(os.path.basename(str(vcffile)))[0] # create folder merge if it doesn't exists if not os.path.exists('merge'): os.makedirs('merge') # enter inside folder os.chdir('merge') self.annotation_files = OrderedDict() pysam.tabix_index('../snpeff/snpeff.output.vcf', preset='vcf') self.annotation_files['snpeff'] = { 'info': 'EFF', 'file': pysam.Tabixfile('../snpeff/snpeff.output.vcf.gz', 'r', encoding="utf-8") } pysam.tabix_index('../vep/vep.output.sorted.vcf', preset='vcf') self.annotation_files['vep'] = { 'info': 'CSQ', 'file': pysam.Tabixfile('../vep/vep.output.sorted.vcf.gz', 'r', encoding="utf-8") } pysam.tabix_index('../snpsift/snpsift.final.vcf', preset='vcf') self.annotation_files['vartype'] = { 'info': 'VARTYPE,SNP,MNP,INS,DEL,MIXED,HOM,HET', 'file': pysam.Tabixfile('../snpsift/snpsift.final.vcf.gz', 'r', encoding="utf-8") } pysam.tabix_index('../decipher/hi_predictions.vcf', preset='vcf') self.annotation_files['decipher'] = { 'info': 'HI_PREDICTIONS', 'file': pysam.Tabixfile('../decipher/hi_predictions.vcf.gz', 'r', encoding="utf-8") } pysam.tabix_index('../pynnotator/pynnotator.vcf', preset='vcf') # genomes1k dbsnp clinvar esp6500 ensembl_phen ensembl_clin self.pynnotator_tags = ['genomes1k', 'dbsnp', 'clinvar', 'esp6500', 'ensembl_phen', 'ensembl_clin'] self.annotation_files['pynnotator'] = { 'info': 'ALL', 'file': pysam.Tabixfile('../pynnotator/pynnotator.vcf.gz', 'r', encoding="utf-8") } pysam.tabix_index('../func_pred/func_pred_sorted.vcf', preset='vcf') self.annotation_files['dbnfsp'] = { 'info': 'dbNSFP_SIFT_score,dbNSFP_SIFT_converted_rankscore,dbNSFP_SIFT_pred,dbNSFP_Uniprot_acc_Polyphen2,dbNSFP_Uniprot_id_Polyphen2,dbNSFP_Uniprot_aapos_Polyphen2,dbNSFP_Polyphen2_HDIV_score,dbNSFP_Polyphen2_HDIV_rankscore,dbNSFP_Polyphen2_HDIV_pred,dbNSFP_Polyphen2_HVAR_score,dbNSFP_Polyphen2_HVAR_rankscore,dbNSFP_Polyphen2_HVAR_pred,dbNSFP_LRT_score,dbNSFP_LRT_converted_rankscore,dbNSFP_LRT_pred,dbNSFP_LRT_Omega,dbNSFP_MutationTaster_score,dbNSFP_MutationTaster_converted_rankscore,dbNSFP_MutationTaster_pred,dbNSFP_MutationTaster_model,dbNSFP_MutationTaster_AAE,dbNSFP_MutationAssessor_UniprotID,dbNSFP_MutationAssessor_variant,dbNSFP_MutationAssessor_score,dbNSFP_MutationAssessor_rankscore,dbNSFP_MutationAssessor_pred,dbNSFP_FATHMM_score,dbNSFP_FATHMM_converted_rankscore,dbNSFP_FATHMM_pred,dbNSFP_PROVEAN_score,dbNSFP_PROVEAN_converted_rankscore,dbNSFP_PROVEAN_pred,dbNSFP_Transcript_id_VEST3,dbNSFP_Transcript_var_VEST3,dbNSFP_VEST3_score,dbNSFP_VEST3_rankscore,dbNSFP_MetaSVM_score,dbNSFP_MetaSVM_rankscore,dbNSFP_MetaSVM_pred,dbNSFP_MetaLR_score,dbNSFP_MetaLR_rankscore,dbNSFP_MetaLR_pred,dbNSFP_Reliability_index,dbNSFP_M-CAP_score,dbNSFP_M-CAP_rankscore,dbNSFP_M-CAP_pred,dbNSFP_REVEL_score,dbNSFP_REVEL_rankscore,dbNSFP_MutPred_score,dbNSFP_MutPred_rankscore,dbNSFP_MutPred_protID,dbNSFP_MutPred_AAchange,dbNSFP_MutPred_Top5features,dbNSFP_CADD_raw,dbNSFP_CADD_raw_rankscore,dbNSFP_CADD_phred,dbNSFP_DANN_score,dbNSFP_DANN_rankscore,dbNSFP_fathmm-MKL_coding_score,dbNSFP_fathmm-MKL_coding_rankscore,dbNSFP_fathmm-MKL_coding_pred,dbNSFP_fathmm-MKL_coding_group,dbNSFP_Eigen_coding_or_noncoding,dbNSFP_Eigen-raw,dbNSFP_Eigen-phred,dbNSFP_Eigen-PC-raw,dbNSFP_Eigen-PC-phred,dbNSFP_Eigen-PC-raw_rankscore,dbNSFP_GenoCanyon_score,dbNSFP_GenoCanyon_score_rankscore,dbNSFP_integrated_fitCons_score,dbNSFP_integrated_fitCons_rankscore,dbNSFP_integrated_confidence_value,dbNSFP_GM12878_fitCons_score,dbNSFP_GM12878_fitCons_rankscore,dbNSFP_GM12878_confidence_value,dbNSFP_H1-hESC_fitCons_score,dbNSFP_H1-hESC_fitCons_rankscore,dbNSFP_H1-hESC_confidence_value,dbNSFP_HUVEC_fitCons_score,dbNSFP_HUVEC_fitCons_rankscore,dbNSFP_clinvar_rs,dbNSFP_clinvar_clnsig,dbNSFP_clinvar_trait,dbNSFP_clinvar_golden_stars', 'file': pysam.Tabixfile('../func_pred/func_pred_sorted.vcf.gz', 'r', encoding="utf-8") } self.dbsnp = pysam.Tabixfile(settings.dbsnp, 'r', encoding="utf-8")