我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.loadtxt()。
def calculate_loss_mix2(self, predictions, predictions_class, predictions_encoder, labels, **unused_params): with tf.name_scope("loss_mix2"): float_labels = tf.cast(labels, tf.float32) float_encoders = float_labels for i in range(FLAGS.encoder_layers): var_i = np.loadtxt(FLAGS.autoencoder_dir+'autoencoder_layer%d.model' % i) weight_i = tf.constant(var_i[:-1,:],dtype=tf.float32) bias_i = tf.reshape(tf.constant(var_i[-1,:],dtype=tf.float32),[-1]) float_encoders = tf.nn.xw_plus_b(float_encoders,weight_i,bias_i) if i<FLAGS.encoder_layers-1: float_encoders = tf.nn.relu(float_encoders) else: hidden_mean = tf.reduce_mean(float_encoders,axis=1,keep_dims=True) hidden_std = tf.sqrt(tf.reduce_mean(tf.square(float_encoders-hidden_mean),axis=1,keep_dims=True)) float_encoders = (float_encoders-hidden_mean)/(hidden_std+1e-6) #float_encoders = tf.nn.sigmoid(float_encoders) cross_entropy_encoder = 0.1*self.calculate_mseloss(predictions_encoder,float_encoders) cross_entropy_loss = self.calculate_loss(predictions,labels) return cross_entropy_encoder+cross_entropy_loss, float_encoders #return cross_entropy_encoder, float_encoders
def normalizeSNPs(normMethod, X, y, prev=None, frqFile=None): if (normMethod == 'frq'): print 'flipping SNPs for standardization...' empMean = X.mean(axis=0) / 2.0 X[:, empMean>0.5] = 2 - X[:, empMean>0.5] mafs = np.loadtxt(frqFile, usecols=[1,2]).mean(axis=1) snpsMean = 2*mafs snpsStd = np.sqrt(2*mafs*(1-mafs)) elif (normMethod == 'controls'): controls = (y<y.mean()) cases = ~controls snpsMeanControls, snpsStdControls = X[controls, :].mean(axis=0), X[controls, :].std(axis=0) snpsMeanCases, snpsStdCases = X[cases, :].mean(axis=0), X[cases, :].std(axis=0) snpsMean = (1-prev)*snpsMeanControls + prev*snpsMeanCases snpsStd = (1-prev)*snpsStdControls + prev*snpsStdCases elif (normMethod is None): snpsMean, snpsStd = X.mean(axis=0), X.std(axis=0) else: raise Exception('Unrecognized normalization method: ' + normMethod) return snpsMean, snpsStd
def predict(): fnn=joblib.load(PKL) dir='E:/????/??????/1 ???/captcha_master1/captcha_master/worddata/' predictValue = [] for fr in os.listdir(dir): dataset=[] f = dir + fr if f.rfind(u'.DS_Store') == -1 and f.rfind(u'Thumbs.db') == -1: data = np.loadtxt(f, delimiter=',') #data.reshape((1,2500)) for item in data: dataset.append(int(item)) #print(len(dataset)) out = fnn.activate(dataset) out = out.argmax() iconset = ['3', 'c', 'd', 'e', 'f', 'h', 'j', 'k', 'l', 'm', 'n', 'w', 'x', 'y'] for y, word in enumerate(iconset): if out == y: print(word) predictValue.append(word) print(u'????%s' % (''.join(predictValue)))
def _read(self, stream, text, byte_order): ''' Read the actual data from a PLY file. ''' if self._have_list: # There are list properties, so a simple load is # impossible. if text: self._read_txt(stream) else: self._read_bin(stream, byte_order) else: # There are no list properties, so loading the data is # much more straightforward. if text: self.data = _np.loadtxt( _islice(iter(stream.readline, ''), self.count), self.dtype()) else: self.data = _np.fromfile( stream, self.dtype(byte_order), self.count)
def __init__(self, target, instance, files): self.target = target self.instance = instance mask_files = natural_sort(filter(lambda fn: '_maskcrop.png' in fn, files)) depth_files = natural_sort(filter(lambda fn: '_depthcrop.png' in fn, files)) rgb_files = natural_sort(list(set(files) - set(mask_files) - set(depth_files))) loc_files = natural_sort(map(lambda fn: fn.replace('_crop.png', '_loc.txt'), rgb_files)) # Ensure all have equal number of files (Hack! doesn't ensure filename consistency) nfiles = np.min([len(loc_files), len(mask_files), len(depth_files), len(rgb_files)]) mask_files, depth_files, rgb_files, loc_files = mask_files[:nfiles], depth_files[:nfiles], \ rgb_files[:nfiles], loc_files[:nfiles] # print target, instance, len(loc_files), len(mask_files), len(depth_files), len(rgb_files) assert(len(mask_files) == len(depth_files) == len(rgb_files) == len(loc_files)) # Read images self.rgb = ImageDatasetReader.from_filenames(rgb_files) self.depth = ImageDatasetReader.from_filenames(depth_files) self.mask = ImageDatasetReader.from_filenames(mask_files) # Read top-left locations of bounding box self.locations = np.vstack([np.loadtxt(loc, delimiter=',', dtype=np.int32) for loc in loc_files])
def load_ply(fn, version): """ Retrieve aligned point cloud for each scene """ if version == 'v1': raise ValueError('''Version %s not supported. ''' '''Check dataset and choose either v1 or v2 scene dataset''' % version) # P = np.loadtxt(os.path.expanduser(fn), usecols=(2,3,4,5,6,7,8), dtype=np.float64) # return map(lambda p: RigidTransform(Quaternion.from_wxyz(p[:4]), p[4:]), P) elif version == 'v2': ply = PlyData.read(os.path.expanduser(fn)) xyz = np.vstack([ply['vertex'].data['x'], ply['vertex'].data['y'], ply['vertex'].data['z']]).T rgb = np.vstack([ply['vertex'].data['diffuse_red'], ply['vertex'].data['diffuse_green'], ply['vertex'].data['diffuse_blue']]).T return xyz, rgb else: raise ValueError('''Version %s not supported. ''' '''Check dataset and choose either v1 or v2 scene dataset''' % version)
def tsukuba_load_poses(fn): """ Retrieve poses X Y Z R P Y - > X -Y -Z R -P -Y np.deg2rad(p[3]),-np.deg2rad(p[4]),-np.deg2rad(p[5]), p[0]*.01,-p[1]*.01,-p[2]*.01, axes='sxyz') for p in P ] """ P = np.loadtxt(os.path.expanduser(fn), dtype=np.float64, delimiter=',') return [ RigidTransform.from_rpyxyz(np.pi, 0, 0, 0, 0, 0) * \ RigidTransform.from_rpyxyz( np.deg2rad(p[3]),np.deg2rad(p[4]),np.deg2rad(p[5]), p[0]*.01,p[1]*.01,p[2]*.01, axes='sxyz') * \ RigidTransform.from_rpyxyz(np.pi, 0, 0, 0, 0, 0) for p in P ] # return [ RigidTransform.from_rpyxyz( # np.deg2rad(p[3]),-np.deg2rad(p[4]),-np.deg2rad(p[5]), # p[0]*.01,-p[1]*.01,-p[2]*.01, axes='sxyz') for p in P ]
def test_values(self): """ Tests if the function returns the correct values. """ filename = get_test_file_full_path( ioclass=NestIO, filename='0gid-1time-2gex-3Vm-1261-0.dat', directory=self.local_test_dir, clean=False) id_to_test = 1 r = NestIO(filenames=filename) seg = r.read_segment(gid_list=[id_to_test], t_stop=1000. * pq.ms, sampling_period=pq.ms, lazy=False, id_column_dat=0, time_column_dat=1, value_columns_dat=2, value_types='V_m') dat = np.loadtxt(filename) target_data = dat[:, 2][np.where(dat[:, 0] == id_to_test)] target_data = target_data[:, None] st = seg.analogsignals[0] np.testing.assert_array_equal(st.magnitude, target_data)
def test_values(self): """ Tests if the routine loads the correct numbers from the file. """ id_to_test = 1 filename = get_test_file_full_path( ioclass=NestIO, filename='0gid-1time-1256-0.gdf', directory=self.local_test_dir, clean=False) r = NestIO(filenames=filename) seg = r.read_segment(gid_list=[id_to_test], t_start=400. * pq.ms, t_stop=500. * pq.ms, lazy=False, id_column_gdf=0, time_column_gdf=1) dat = np.loadtxt(filename) target_data = dat[:, 1][np.where(dat[:, 0] == id_to_test)] st = seg.spiketrains[0] np.testing.assert_array_equal(st.magnitude, target_data)
def __init__(self, filename): """ filename: string, path to ASCII file to read. """ self.filename = filename # read the first line to check the data type (int or float) of the data f = open(self.filename) line = f.readline() additional_parameters = {} if '.' not in line: additional_parameters['dtype'] = np.int32 self.data = np.loadtxt(self.filename, **additional_parameters) if len(self.data.shape) == 1: self.data = self.data[:, np.newaxis]
def load_data(filename, use_labels=True): """ Load data from CSV files and return them as numpy arrays The use_labels parameter indicates whether one should read the first column (containing class labels). If false, return all 0s. """ # load column 1 to 8 (ignore last one) data = np.loadtxt(open( filename), delimiter=',', usecols=range(1, 9), skiprows=1) if use_labels: labels = np.loadtxt(open( filename), delimiter=',', usecols=[0], skiprows=1) else: labels = np.zeros(data.shape[0]) return labels, data
def launch(): opts, h5_files, motifs_fn = __parseArgs() __initLog(opts) motifs = np.loadtxt(motifs_fn, dtype="str", ndmin=1) motifs,not_found = find_motifs_in_control(opts, motifs) if len(not_found)>0: logging.warning("") logging.warning(" ******************** Important *********************") logging.warning(" Did not find %s motifs in %s:" % (len(not_found), opts.control_pkl_name)) for nf in not_found: logging.warning(" %s" % nf) logging.warning(" These motif(s) will be removed from further analysis.") logging.warning(" These %s motifs will be kept:" % len(motifs)) for m in motifs: logging.warning(" %s" % m) logging.warning(" ****************************************************") logging.warning("") else: logging.info("Found entries for all %s motifs in %s" % (len(motifs), opts.control_pkl_name)) build_profiles(opts, h5_files, motifs, motifs_fn) print >> sys.stderr, "mBin methylation profiling has finished running. See log for details."
def parse_fields( self ): """ Read in the fields contained in the output files from methylprofiles. """ m = np.loadtxt(self.mfn, dtype="str", skiprows=1) o = np.loadtxt(self.ofn, dtype="str", skiprows=1) # Optional flags m = self.length_filter(m) o = self.length_filter(o) if self.opts.n_seqs!=None: m = self.subsample_seqs( m ) o = self.subsample_seqs( o ) # Pull out values self.ids = m[:,0].astype("str") self.lens = m[:,1].astype("int") self.mscores = m[:,2:].astype("float") if self.opts.seq_type=="contig": self.covs = o[:,2].astype("float") self.covcomps = o[:,2:].astype("float") self.comps = o[:,3:].astype("float") else: self.comps = o[:,2:].astype("float")
def transpose_contig_matrix( args ): contig = args[0] opts = args[1] logging.info(" Transposing %s" % contig) contig_ipds_fn = os.path.join( opts.tmp, "%s_ipds.tmp" % contig) contig_ipds_kmers_fn = os.path.join( opts.tmp, "%s_ipdskmers.tmp" % contig) contig_ipds_N_fn = os.path.join( opts.tmp, "%s_ipdsN.tmp" % contig) contig_ipds = np.loadtxt(contig_ipds_fn, dtype="float") contig_ipds_kmers = np.loadtxt(contig_ipds_kmers_fn, dtype="str") contig_ipds_N = np.loadtxt(contig_ipds_N_fn, dtype="int") if len(contig_ipds.shape)==1: contig_ipds = contig_ipds.reshape(1,contig_ipds.shape[0]) contig_ipds_N = contig_ipds_N.reshape(1,contig_ipds_N.shape[0]) contig_ipds = contig_ipds.T contig_ipds_N = contig_ipds_N.T np.savetxt(contig_ipds_fn+".trans", contig_ipds, fmt="%.4f", delimiter="\t") np.savetxt(contig_ipds_N_fn+".trans", contig_ipds_N, fmt="%s", delimiter="\t") return None
def __init__(self, name, number_of_classes, number_of_transformations, loaded_size, desired_size, max_size=None): loaded = np.loadtxt(name) if max_size is not None: subset = np.random.choice(loaded.shape[0], max_size, replace=False) loaded = loaded[subset, :] padded_x = self._pad(loaded[:, :-1], loaded_size, desired_size) self._x = self._transform(padded_x, number_of_transformations) self._y = self._int_labels_to_one_hot(loaded[:, -1], number_of_classes) self._completed_epochs = -1 self._new_epoch = False self._start_new_epoch()
def get_data(filename,headers,ph_units): # Importation des données .DAT dat_file = np.loadtxt("%s"%(filename),skiprows=headers,delimiter=',') labels = ["freq", "amp", "pha", "amp_err", "pha_err"] data = {l:dat_file[:,i] for (i,l) in enumerate(labels)} if ph_units == "mrad": data["pha"] = data["pha"]/1000 # mrad to rad data["pha_err"] = data["pha_err"]/1000 # mrad to rad if ph_units == "deg": data["pha"] = np.radians(data["pha"]) # deg to rad data["pha_err"] = np.radians(data["pha_err"]) # deg to rad data["phase_range"] = abs(max(data["pha"])-min(data["pha"])) # Range of phase measurements (used in NRMS error calculation) data["Z"] = data["amp"]*(np.cos(data["pha"]) + 1j*np.sin(data["pha"])) EI = np.sqrt(((data["amp"]*np.cos(data["pha"])*data["pha_err"])**2)+(np.sin(data["pha"])*data["amp_err"])**2) ER = np.sqrt(((data["amp"]*np.sin(data["pha"])*data["pha_err"])**2)+(np.cos(data["pha"])*data["amp_err"])**2) data["Z_err"] = ER + 1j*EI # Normalization of amplitude data["Z_max"] = max(abs(data["Z"])) # Maximum amplitude zn, zn_e = data["Z"]/data["Z_max"], data["Z_err"]/data["Z_max"] # Normalization of impedance by max amplitude data["zn"] = np.array([zn.real, zn.imag]) # 2D array with first column = real values, second column = imag values data["zn_err"] = np.array([zn_e.real, zn_e.imag]) # 2D array with first column = real values, second column = imag values return data
def do_annual_parallax_test(filename): """testing functions called by a few unit tests""" with open(filename) as data_file: lines = data_file.readlines() ulens_params = lines[3].split() event_params = lines[4].split() data = np.loadtxt(filename, dtype=None) model = Model({ 't_0':float(ulens_params[1])+2450000., 'u_0':float(ulens_params[3]), 't_E':float(ulens_params[4]), 'pi_E_N':float(ulens_params[5]), 'pi_E_E':float(ulens_params[6]) }, coords=SkyCoord( event_params[1]+' '+event_params[2], unit=(u.deg, u.deg))) model.parameters.t_0_par = float(ulens_params[2])+2450000. time = data[:,0] dataset = MulensData([time, 20.+time*0., 0.1+time*0.,], add_2450000=True) model.set_datasets([dataset]) model.parallax(satellite=False, earth_orbital=True, topocentric=False) return np.testing.assert_almost_equal( model.data_magnification[0] / data[:,1], 1.0, decimal=4)
def test_satellite_and_annual_parallax_calculation(): """test parallax calculation with Spitzer data""" model_with_par = Model({'t_0':2457181.93930, 'u_0':0.08858, 't_E':20.23090, 'pi_E_N':-0.05413, 'pi_E_E':-0.16434}, coords="18:17:54.74 -22:59:33.4") model_with_par.parallax(satellite=True, earth_orbital=True, topocentric=False) model_with_par.parameters.t_0_par = 2457181.9 data_OGLE = MulensData(file_name=SAMPLE_FILE_02, add_2450000=True) data_Spitzer = MulensData( file_name=SAMPLE_FILE_03, ephemerides_file=SAMPLE_FILE_03_EPH, add_2450000=True) model_with_par.set_datasets([data_OGLE, data_Spitzer]) ref_OGLE = np.loadtxt(SAMPLE_FILE_02_REF, unpack=True, usecols=[5]) ref_Spitzer = np.loadtxt(SAMPLE_FILE_03_REF, unpack=True, usecols=[5]) np.testing.assert_almost_equal(model_with_par.data_magnification[0], ref_OGLE, decimal=2) ratio = model_with_par.data_magnification[1] / ref_Spitzer np.testing.assert_almost_equal(ratio, [1.]*len(ratio), decimal=3)
def replace_coord(self, coordfile): """ Replace the coordinates with the data from the given coordinate file. """ try: coord_new = np.loadtxt(coordfile) except ValueError: coord_new = np.loadtxt(coordfile, delimiter=",") ra_new = coord_new[:, 0] dec_new = coord_new[:, 1] if self.number != len(ra_new): raise RuntimeError("invalid coordinate file: %s" % coordfile) self.ra = ra_new self.dec = dec_new print("Replaced coordinates")
def get_1000G_snps(sumstats, out_file): sf = np.loadtxt(sumstats,dtype=str,skiprows=1) h5f = h5py.File('ref/Misc/1000G_SNP_info.h5','r') rf = h5f['snp_chr'][:] h5f.close() ind1 = np.in1d(sf[:,1],rf[:,2]) ind2 = np.in1d(rf[:,2],sf[:,1]) sf1 = sf[ind1] rf1 = rf[ind2] ### check order ### if sum(sf1[:,1]==rf1[:,2])==len(rf1[:,2]): print 'Good!' else: print 'Shit happens, sorting sf1 to have the same order as rf1' O1 = np.argsort(sf1[:,1]) O2 = np.argsort(rf1[:,2]) O3 = np.argsort(O2) sf1 = sf1[O1][O3] out = ['hg19chrc snpid a1 a2 bp or p'+'\n'] for i in range(len(sf1[:,1])): out.append(sf1[:,0][i]+' '+sf1[:,1][i]+' '+sf1[:,2][i]+' '+sf1[:,3][i]+' '+rf1[:,1][i]+' '+sf1[:,5][i]+' '+sf1[:,6][i]+'\n') ff = open(out_file,"w") ff.writelines(out) ff.close()
def build_w2v_matrix(vocab_processor, w2v_path, vector_path, dim_size): w2v_dict = {} f = open(vector_path, 'r') for line in f.readlines(): word, vec = line.strip().split(' ', 1) w2v_dict[word] = np.loadtxt([vec], dtype='float32') vocab_list = vocab_processor._reverse_mapping w2v_W = np.zeros(shape=(len(vocab_list), dim_size), dtype='float32') for i, vocab in enumerate(vocab_list): # unknown vocab if i == 0: continue else: if vocab in w2v_dict: w2v_W[i] = w2v_dict[vocab] else: w2v_W[i] = get_unknown_word_vec(dim_size) cPickle.dump(w2v_W, open(w2v_path, 'wb')) return w2v_W
def readModelUBC(mesh, fileName): """Read UBC OcTree model and get vector :param string fileName: path to the UBC GIF model file to read :rtype: numpy.ndarray :return: OcTree model """ if type(fileName) is list: out = {} for f in fileName: out[f] = mesh.readModelUBC(f) return out assert hasattr(mesh, '_simpegReorderUBC'), 'The file must have been loaded from a UBC format.' assert mesh.dim == 3 modList = [] modArr = np.loadtxt(fileName) if len(modArr.shape) == 1: modList.append(modArr[mesh._simpegReorderUBC]) else: modList.append(modArr[mesh._simpegReorderUBC, :]) return modList
def load_nodeIDs_coords(nodefile="nodes.dyn"): """load in node IDs and coordinates Exclude '*' keyword lines :param nodefile: node filename (nodes.dyn) :returns: nodeIDcoords (numpy array) """ from numpy import loadtxt header_comment_skips = count_header_comment_skips(nodefile) nodeIDcoords = loadtxt(nodefile, delimiter=',', comments='*', skiprows=header_comment_skips, dtype=[('id', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]) return nodeIDcoords
def load_elems(elefile="elems.dyn"): """ :param elefile: elems.dyn :return: elems """ from numpy import loadtxt header_comment_skips = count_header_comment_skips(elefile) elems = loadtxt(elefile, delimiter=',', comments='*', skiprows=header_comment_skips, dtype=[('id', 'i4'), ('pid', 'i4'), ('n1', 'i4'), ('n2', 'i4'), ('n3', 'i4'), ('n4', 'i4'), ('n5', 'i4'), ('n6', 'i4'), ('n7', 'i4'), ('n8', 'i4')]) return elems
def test_loadtxt_fields_subarrays(self): # For ticket #1936 if sys.version_info[0] >= 3: from io import StringIO else: from StringIO import StringIO dt = [("a", 'u1', 2), ("b", 'u1', 2)] x = np.loadtxt(StringIO("0 1 2 3"), dtype=dt) assert_equal(x, np.array([((0, 1), (2, 3))], dtype=dt)) dt = [("a", [("a", 'u1', (1, 3)), ("b", 'u1')])] x = np.loadtxt(StringIO("0 1 2 3"), dtype=dt) assert_equal(x, np.array([(((0, 1, 2), 3),)], dtype=dt)) dt = [("a", 'u1', (2, 2))] x = np.loadtxt(StringIO("0 1 2 3"), dtype=dt) assert_equal(x, np.array([(((0, 1), (2, 3)),)], dtype=dt)) dt = [("a", 'u1', (2, 3, 2))] x = np.loadtxt(StringIO("0 1 2 3 4 5 6 7 8 9 10 11"), dtype=dt) data = [((((0, 1), (2, 3), (4, 5)), ((6, 7), (8, 9), (10, 11))),)] assert_equal(x, np.array(data, dtype=dt))
def test_record(self): c = TextIO() c.write('1 2\n3 4') c.seek(0) x = np.loadtxt(c, dtype=[('x', np.int32), ('y', np.int32)]) a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) assert_array_equal(x, a) d = TextIO() d.write('M 64.0 75.0\nF 25.0 60.0') d.seek(0) mydescriptor = {'names': ('gender', 'age', 'weight'), 'formats': ('S1', 'i4', 'f4')} b = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)], dtype=mydescriptor) y = np.loadtxt(d, dtype=mydescriptor) assert_array_equal(y, b)
def test_skiprows(self): c = TextIO() c.write('comment\n1,2,3,5\n') c.seek(0) x = np.loadtxt(c, dtype=int, delimiter=',', skiprows=1) a = np.array([1, 2, 3, 5], int) assert_array_equal(x, a) c = TextIO() c.write('# comment\n1,2,3,5\n') c.seek(0) x = np.loadtxt(c, dtype=int, delimiter=',', skiprows=1) a = np.array([1, 2, 3, 5], int) assert_array_equal(x, a)
def test_gzip_loadtxt(): # Thanks to another windows brokeness, we can't use # NamedTemporaryFile: a file created from this function cannot be # reopened by another open call. So we first put the gzipped string # of the test reference array, write it to a securely opened file, # which is then read from by the loadtxt function s = BytesIO() g = gzip.GzipFile(fileobj=s, mode='w') g.write(b'1 2 3\n') g.close() s.seek(0) with temppath(suffix='.gz') as name: with open(name, 'wb') as f: f.write(s.read()) res = np.loadtxt(name) s.close() assert_array_equal(res, [1, 2, 3])
def knn_masked_data(trX,trY,missing_data_dir, input_shape, k): raw_im_data = np.loadtxt(join(script_dir,missing_data_dir,'index.txt'),delimiter=' ',dtype=str) raw_mask_data = np.loadtxt(join(script_dir,missing_data_dir,'index_mask.txt'),delimiter=' ',dtype=str) # Using 'brute' method since we only want to do one query per classifier # so this will be quicker as it avoids overhead of creating a search tree knn_m = KNeighborsClassifier(algorithm='brute',n_neighbors=k) prob_Y_hat = np.zeros((raw_im_data.shape[0],int(np.max(trY)+1))) total_images = raw_im_data.shape[0] pbar = progressbar.ProgressBar(widgets=[progressbar.FormatLabel('\rProcessed %(value)d of %(max)d Images '), progressbar.Bar()], maxval=total_images, term_width=50).start() for i in range(total_images): mask_im=load_image(join(script_dir,missing_data_dir,raw_mask_data[i][0]), input_shape,1).reshape(np.prod(input_shape)) mask = np.logical_not(mask_im > eps) # since mask is 1 at missing locations v_im=load_image(join(script_dir,missing_data_dir,raw_im_data[i][0]), input_shape, 255).reshape(np.prod(input_shape)) rep_mask = np.tile(mask,(trX.shape[0],1)) # Corrupt whole training set according to the current mask corr_trX = np.multiply(trX, rep_mask) knn_m.fit(corr_trX, trY) prob_Y_hat[i,:] = knn_m.predict_proba(v_im.reshape(1,-1)) pbar.update(i) pbar.finish() return prob_Y_hat
def quiz19_20(): gamma_l = [32, 2, 0.125] lamb_l = [0.001, 1, 1000] data = np.loadtxt("hw2_lssvm_all.dat") x_train = data[:400, :-1] y_train = data[:400, -1].astype(int) x_test = data[400:, :-1] y_test = data[400:, -1].astype(int) n = len(y_train) print("gamma lamb e_in e_out") for gamma in gamma_l: for lamb in lamb_l: w = np.array(KRG(x_train, y_train, gamma, lamb, n)).flatten() e_in = err(x_train, y_train, (w, gamma, x_train)) e_out = err(x_test, y_test, (w, gamma, x_train)) print(gamma, " ", lamb, " ", e_in, " ", e_out) # quiz19-20
def _fileToMatrix(file_name): """rudimentary method to read in data from a file""" # TODO: np.loadtxt() might be an alternative # try: if 1 < 3: lres = [] for line in open(file_name, 'r').readlines(): if len(line) > 0 and line[0] not in ('%', '#'): lres.append(list(map(float, line.split()))) res = lres while res != [] and res[0] == []: # remove further leading empty lines del res[0] return res # except: print('could not read file ' + file_name) # ____________________________________________________________ # ____________________________________________________________
def __init__(self, dictionary=None, topic_data=None, topic_file=None, style=None): if dictionary is None: raise ValueError('no dictionary!') if topic_data is not None: topics = topic_data elif topic_file is not None: topics = np.loadtxt('%s' % topic_file) else: raise ValueError('no topic data!') # sort topics topics_sums = np.sum(topics, axis=1) idx = np.argsort(topics_sums)[::-1] self.data = topics[idx] self.dictionary = dictionary if style is None: style = self.STYLE_GENSIM self.style = style
def GetTransitTimes(file = 'ttv_kruse.dat'): ''' ''' planet, _, time, dtime = np.loadtxt(os.path.join(TRAPPIST_DAT, file), unpack = True) transit_times = [None for i in range(7)] if file == 'ttv_kruse.dat': for i in range(7): inds = np.where(planet == i + 1)[0] transit_times[i] = time[inds] + (2455000 - 2454833) elif file == 'ttv_agol.dat': for i in range(6): inds = np.where(planet == i + 1)[0] transit_times[i] = time[inds] + (2450000 - 2454833) # Append a few extra for padding pad = [transit_times[i][-1] + np.median(np.diff(transit_times[i])), transit_times[i][-1] + 2 * np.median(np.diff(transit_times[i])), transit_times[i][-1] + 3 * np.median(np.diff(transit_times[i]))] transit_times[i] = np.append(transit_times[i], pad) return PlanetProperty(transit_times)
def load_uci_german_credits(path, n_train): if not os.path.isfile(path): data_dir = os.path.dirname(path) if not os.path.exists(os.path.dirname(path)): os.makedirs(data_dir) download_dataset('https://archive.ics.uci.edu/ml/' 'machine-learning-databases/statlog/' 'german/german.data-numeric', path) n_dims = 24 data = np.loadtxt(path) x_train = data[:n_train, :n_dims] y_train = data[:n_train, n_dims] - 1 x_test = data[n_train:, :n_dims] y_test = data[n_train:, n_dims] - 1 return x_train, y_train, x_test, y_test
def load_uci_boston_housing(path, dtype=np.float32): if not os.path.isfile(path): data_dir = os.path.dirname(path) if not os.path.exists(os.path.dirname(path)): os.makedirs(data_dir) download_dataset('http://archive.ics.uci.edu/ml/' 'machine-learning-databases/housing/housing.data', path) data = np.loadtxt(path) data = data.astype(dtype) permutation = np.random.choice(np.arange(data.shape[0]), data.shape[0], replace=False) size_train = int(np.round(data.shape[0] * 0.8)) size_test = int(np.round(data.shape[0] * 0.9)) index_train = permutation[0: size_train] index_test = permutation[size_train:size_test] index_val = permutation[size_test:] x_train, y_train = data[index_train, :-1], data[index_train, -1] x_val, y_val = data[index_val, :-1], data[index_val, -1] x_test, y_test = data[index_test, :-1], data[index_test, -1] return x_train, y_train, x_val, y_val, x_test, y_test
def applyTexture(x, y, texture = texture_input): text = imread(texture_input) height,width = text.shape[:2] xmin, ymin = amin(x),amin(y) xmax, ymax = amax(x),amax(y) scale = max(((xmax - xmin + 2)/height),((ymax - ymin + 2)/width)) text = imresize(text, scale) # print text.shape[:2] # print xmax - xmin +2, ymax - ymin+2 X = (x-xmin).astype(int) Y = (y-ymin).astype(int) val1 = color.rgb2lab((text[X, Y]/255.).reshape(len(X), 1, 3)).reshape(len(X), 3) val2 = color.rgb2lab((im[x, y]/255.).reshape(len(x), 1, 3)).reshape(len(x), 3) L, A, B = mean(val2[:,0]), mean(val2[:,1]), mean(val2[:,2]) val2[:, 0] = np.clip(val2[:, 0] - L + val1[:,0], 0, 100) val2[:, 1] = np.clip(val2[:, 1] - A + val1[:,1], -127, 128) val2[:, 2] = np.clip(val2[:, 2] - B + val1[:,2], -127, 128) im[x,y] = color.lab2rgb(val2.reshape(len(x), 1, 3)).reshape(len(x), 3)*255 # points = np.loadtxt('nailpoint_5')
def read_data_tri(filename): data = numpy.loadtxt(filename) if len(data.shape) == 1: data = numpy.array([data]) points = data[:, :2] weights = data[:, 2] # The reference triangle is (-1, -1), (1, -1), (-1, 1). Transform the # points to barycentric coordinates. points += 1.0 points *= 0.5 points = numpy.array([ points[:, 0], points[:, 1], 1.0 - numpy.sum(points, axis=1) ]).T return points, weights * 0.5
def convert_data_to_timeseries(input_file, column, verbose=False): # Load the input file data = np.loadtxt(input_file, delimiter=',') # Extract the start and end dates start_date = str(int(data[0,0])) + '-' + str(int(data[0,1])) end_date = str(int(data[-1,0] + 1)) + '-' + str(int(data[-1,1] % 12 + 1)) if verbose: print "\nStart date =", start_date print "End date =", end_date # Create a date sequence with monthly intervals dates = pd.date_range(start_date, end_date, freq='M') # Convert the data into time series data data_timeseries = pd.Series(data[:,column], index=dates) if verbose: print "\nTime series data:\n", data_timeseries[:10] return data_timeseries
def test_data_sizes(self): """Test that different number of bits give correct throughput size""" for iterate in range(5): nbit = 2**iterate if nbit == 8: continue self.blocks[0] = ( SigprocReadBlock( './data/2chan' + str(nbit) + 'bitNoDM.fil'), [], [0]) open(self.logfile, 'w').close() Pipeline(self.blocks).main() number_fftd = np.loadtxt(self.logfile).astype(np.float32).view(np.complex64).size # Compare with simple copy self.blocks[1] = (CopyBlock(), [0], [1]) open(self.logfile, 'w').close() Pipeline(self.blocks).main() number_copied = np.loadtxt(self.logfile).size self.assertEqual(number_fftd, number_copied) # Go back to FFT self.blocks[1] = (FFTBlock(gulp_size=4096 * 8 * 8 * 8), [0], [1])
def test_equivalent_data_to_copy(self): """Test that the data coming out of this pipeline is equivalent the initial read data""" self.logfile = '.log.txt' self.blocks = [] self.blocks.append(( SigprocReadBlock( './data/1chan8bitNoDM.fil'), [], [0])) self.blocks.append((FFTBlock(gulp_size=4096 * 8 * 8 * 8 * 8), [0], [1])) self.blocks.append((IFFTBlock(gulp_size=4096 * 8 * 8 * 8 * 8), [1], [2])) self.blocks.append((WriteAsciiBlock(self.logfile), [2], [])) open(self.logfile, 'w').close() Pipeline(self.blocks).main() unfft_result = np.loadtxt(self.logfile).astype(np.float32).view(np.complex64) self.blocks[1] = (CopyBlock(), [0], [1]) self.blocks[2] = (WriteAsciiBlock(self.logfile), [1], []) del self.blocks[3] open(self.logfile, 'w').close() Pipeline(self.blocks).main() untouched_result = np.loadtxt(self.logfile).astype(np.float32) np.testing.assert_almost_equal(unfft_result, untouched_result, 2)
def load_single_voxel_grid(self,path): temp = re.split('_', path.split('.')[-2]) x_d = int(temp[len(temp) - 3]) y_d = int(temp[len(temp) - 2]) z_d = int(temp[len(temp) - 1]) a = np.loadtxt(path) if len(a)<=0: print " load_single_voxel_grid error: ", path exit() voxel_grid = np.zeros((x_d, y_d, z_d,1)) for i in a: voxel_grid[int(i[0]), int(i[1]), int(i[2]),0] = 1 # occupied #Data.plotFromVoxels(voxel_grid) voxel_grid = self.voxel_grid_padding(voxel_grid) return voxel_grid
def calculate_loss_distill(self, predictions, labels_distill, labels, **unused_params): with tf.name_scope("loss_distill"): print("loss_distill") epsilon = 10e-6 float_labels = tf.cast(labels, tf.float32) float_labels_distill = tf.cast(labels_distill, tf.float32) embedding_mat = np.loadtxt("./resources/embedding_matrix.model") vocab_size = embedding_mat.shape[1] labels_size = float_labels.get_shape().as_list()[1] embedding_mat = tf.cast(embedding_mat,dtype=tf.float32) cross_entropy_loss_1 = float_labels * tf.log(predictions + epsilon) + ( 1 - float_labels) * tf.log(1 - predictions + epsilon) float_labels_1 = float_labels[:,:vocab_size] labels_smooth = tf.matmul(float_labels_1,embedding_mat)/tf.reduce_sum(float_labels_1,axis=1,keep_dims=True) float_classes = labels_smooth for i in range(labels_size//vocab_size-1): float_classes = tf.concat((float_classes,labels_smooth),axis=1) cross_entropy_loss_2 = float_classes * tf.log(predictions + epsilon) + ( 1 - float_classes) * tf.log(1 - predictions + epsilon) cross_entropy_loss_3 = float_labels_distill * tf.log(predictions + epsilon) + ( 1 - float_labels_distill) * tf.log(1 - predictions + epsilon) cross_entropy_loss = cross_entropy_loss_1*0.5 + cross_entropy_loss_2*0.5 + cross_entropy_loss_3*0.5 cross_entropy_loss = tf.negative(cross_entropy_loss) return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def calculate_loss_negative(self, predictions_pos, predictions_neg, labels, **unused_params): with tf.name_scope("loss_negative"): epsilon = 10e-6 float_labels = tf.cast(labels, tf.float32) weight_pos = np.loadtxt(FLAGS.autoencoder_dir+"labels_uni.out") weight_pos = tf.reshape(tf.cast(weight_pos,dtype=tf.float32),[1,-1]) weight_pos = tf.log(tf.reduce_max(weight_pos)/weight_pos)+1 cross_entropy_loss_1 = float_labels * tf.log(predictions_pos + epsilon)*weight_pos + ( 1 - float_labels) * tf.log(1 - predictions_pos + epsilon) cross_entropy_loss_2 = (1-float_labels) * tf.log(predictions_neg + epsilon) + \ float_labels * tf.log(1 - predictions_neg + epsilon) cross_entropy_loss = tf.negative(cross_entropy_loss_1+cross_entropy_loss_2) return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def calculate_loss_mix(self, predictions, predictions_class, labels, **unused_params): with tf.name_scope("loss_mix"): float_labels = tf.cast(labels, tf.float32) if FLAGS.support_type=="class": seq = np.loadtxt(FLAGS.class_file) tf_seq = tf.one_hot(tf.constant(seq,dtype=tf.int32),FLAGS.encoder_size) float_classes_org = tf.matmul(float_labels,tf_seq) class_true = tf.ones(tf.shape(float_classes_org)) class_false = tf.zeros(tf.shape(float_classes_org)) float_classes = tf.where(tf.greater(float_classes_org, class_false), class_true, class_false) cross_entropy_class = self.calculate_loss(predictions_class,float_classes) elif FLAGS.support_type=="frequent": float_classes = float_labels[:,0:FLAGS.encoder_size] cross_entropy_class = self.calculate_loss(predictions_class,float_classes) elif FLAGS.support_type=="encoder": float_classes = float_labels for i in range(FLAGS.encoder_layers): var_i = np.loadtxt(FLAGS.autoencoder_dir+'autoencoder_layer%d.model' % i) weight_i = tf.constant(var_i[:-1,:],dtype=tf.float32) bias_i = tf.reshape(tf.constant(var_i[-1,:],dtype=tf.float32),[-1]) float_classes = tf.nn.xw_plus_b(float_classes,weight_i,bias_i) if i<FLAGS.encoder_layers-1: float_classes = tf.nn.relu(float_classes) else: float_classes = tf.nn.sigmoid(float_classes) #float_classes = tf.nn.relu(tf.sign(float_classes - 0.5)) cross_entropy_class = self.calculate_mseloss(predictions_class,float_classes) else: float_classes = float_labels for i in range(FLAGS.moe_layers-1): float_classes = tf.concat((float_classes,float_labels),axis=1) cross_entropy_class = self.calculate_loss(predictions_class,float_classes) cross_entropy_loss = self.calculate_loss(predictions,labels) return cross_entropy_loss + 0.1*cross_entropy_class