我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.str()。
def test_leak_in_structured_dtype_comparison(self): # gh-6250 recordtype = np.dtype([('a', np.float64), ('b', np.int32), ('d', (np.str, 5))]) # Simple case a = np.zeros(2, dtype=recordtype) for i in range(100): a == a assert_(sys.getrefcount(a) < 10) # The case in the bug report. before = sys.getrefcount(a) u, v = a[0], a[1] u == v del u, v gc.collect() after = sys.getrefcount(a) assert_equal(before, after)
def add_node_attribute(inFile, pedgraph, animal=1, atCol=4, atName="attr1"): """ inFile - pedigree as .txt file pedgraph - Pedigree as a networkX graph object animal - column for the animal ID atCol - column for the attribute atName - name for the attribute """ ped_df = pd.read_table(inFile, header=None, delim_whitespace=True) #print ped_df dic_ped = dict(zip(ped_df[animal - 1], ped_df[atCol - 1])) #print dic_ped correct_dic_ped = {str(k):int(v) for k,v in dic_ped.items()} #print correct_dic_ped for node, value in dic_ped.items(): pedgraph.node[str(node)]["EBV"] = value return correct_dic_ped
def add_ebv_attribute(inFile, pedgraph, animal=1, atCol=4, atName="attr1"): """ inFile - pedigree as .txt file pedgraph - Pedigree as a networkX graph object animal - column for the animal ID atCol - column for the attribute atName - name for the attribute """ ped_df = pd.read_table(inFile, header=None, delim_whitespace=True) #print ped_df dic_ped = dict(zip(ped_df[animal - 1], ped_df[atCol - 1])) #print dic_ped correct_dic_ped = {str(k):int(-v) for k,v in dic_ped.items()} #print correct_dic_ped for node, value in dic_ped.items(): pedgraph.node[str(node)]["EBV"] = value return correct_dic_ped
def __init__(self,filename='word2vec.pklz'): """ Py Word2vec?? """ super().__init__() self.name='word2vec' self.load(filename) self.vocab_cnt=len(self) self.dims=self[list(self.keys())[0]].shape[0] print('???:' + str(self.vocab_cnt)) print('???:' + str(self.dims)) self.word2idx= {w: i for i, w in enumerate(self.keys())} self.idx2word= {i: w for i, w in enumerate(self.keys())} self._matrix =np.array(list(self.values())) print(self._matrix.shape)
def get_antonyms(self,wordA:str, topk:int=10,ispositive:bool=True): seed=[['??','??'],['??','??'],['??','??'],['??','??'],['??','??']] proposal={} for pair in seed: if ispositive: result=self.analogy(pair[0],pair[1],wordA,topk) print(w2v.find_nearest_word((self[pair[0]] + self[pair[1]]) / 2, 3)) else: result = self.analogy(pair[1], pair[0], wordA, topk) print(w2v.find_nearest_word((self[pair[0]] + self[pair[1]]) / 2, 3)) for item in result: term_products = np.argwhere(self[wordA] * self[item[0]] < 0) #print(item[0] + ':' +wordA + str(term_products)) #print(item[0] + ':' +wordA+'('+str(pair)+') '+ str(len(term_products))) if len(term_products)>=self.dims/4: if item[0] not in proposal: proposal[item[0]] = item[1] elif item[1]> proposal[item[0]]: proposal[item[0]] +=item[1] for k,v in proposal.items(): proposal[k]=v/len(seed) sortitems=sorted(proposal.items(), key=lambda d: d[1],reverse=True) return [sortitems[i] for i in range(min(topk,len(sortitems)))]
def __init__(self): #??????????????data???????? current = os.getcwd() folder = os.path.join(current, 'data') if os.path.exists(folder) == False: os.mkdir(folder) os.chdir(folder) #??tushare?????A??? #df0=ts.get_stock_basics() df0=pd.read_csv('bases.csv',dtype={'code':np.str}) self.bases=df0.sort_values('timeToMarket',ascending=False) #????? ???????????? self.cxg=self.bases[(self.bases['timeToMarket']>20170101) & (self.bases['timeToMarket']<20170401)] self.codes= self.cxg['code'].values
def getBigDeal(self, code,vol): df = ts.get_today_ticks(code) t= df[df['volume']>vol] s=df[df['amount']>100000000] print '\n' if t.size!=0: print "Big volume" print self.base[self.base['code']==str(code)]['name'].values[0] print t if s.size!=0: print "Big amount: " print self.base[self.base['code']==str(code)]['name'].values[0] print s r=df[df['volume']>vol*10] if r.size!=0: print "Super amount:" print self.base[self.base['code']==str(code)]['name'].values[0] print r
def years(self): df_list=[] k=[str(i) for i in range(1,13)] print k j=[i for i in range(1,13)] result=[] for i in range(1,13): filename='2016-%s.xls' %str(i).zfill(2) #print filename t=pd.read_table(filename,encoding='gbk',dtype={u'????':np.str}) fee=t[u'???'].sum()+t[u'???'].sum()+t[u'????'].sum() print i," fee: " print fee df_list.append(t) result.append(fee) df=pd.concat(df_list,keys=k) #print df #df.to_excel('2016_delivery_order.xls') self.caculation(df) plt.plot(j,result) plt.show()
def getTotal(): path=os.path.join(os.getcwd(),'data') os.chdir(path) all=pd.read_csv('bases.csv',dtype={'code':np.str}) #print all all_code=all['code'].values #print all_code lists=[] for i in all_code: df=ts.get_k_data(i,start='2017-07-17',end='2017-07-17') lists.append(df) all_df=pd.DataFrame(lists) print all_df all_df.to_csv('2017-all.csv',encoding='gbk') all_df.to_excel('2017-excel.xls')
def add_code_redis(): rds = redis.StrictRedis(REDIS_HOST, 6379, db=0) rds_1 = redis.StrictRedis(REDIS_HOST, 6379, db=1) df = ts.get_stock_basics() df = df.reset_index() # ????? if rds.dbsize() != 0: rds.flushdb() if rds_1.dbsize() != 0: rds_1.flushdb() for i in range(len(df)): code, name, timeToMarket = df.loc[i]['code'], df.loc[i]['name'], df.loc[i]['timeToMarket'] # print str(timeToMarket) d = dict({code: ':'.join([name, str(timeToMarket)])}) # print d rds.set(code, name) rds_1.lpush('codes', d)
def read_cufflinks(sample_path, isoforms=False): ''' Function for reading a Cufflinks quantification result. Returns ------- A pandas.Series with the expression values in the sample. ''' if isoforms: quant_file = sample_path + '/isoforms.fpkm_tracking' else: quant_file = sample_path + '/genes.fpkm_tracking' df = pd.read_table(quant_file, engine='c', usecols=['tracking_id', 'FPKM'], index_col=0, dtype={'tracking_id': np.str, 'FPKM': np.float64}) df['tracking_id'] = df.index df = df.groupby('tracking_id').sum() df['TPM'] = df['FPKM'] / df['FPKM'].sum() * 1e6 df = df.rename(columns={'tracking_id': 'target_id'}) return df['TPM']
def tensor2state(tensor_frd, tensor_emy): ''' transform tensor 2 state tensor_frd, tensor_emy ndarray [9,10,16] return state ndarray [10,9] ''' assert tensor_frd.shape == tensor_emy.shape state = np.zeros((10,9), dtype=np.str) chessfrdplayer = 'KAABBNNRRCCPPPPP' chessemyplayer = 'kaabbnnrrccppppp' for i in range(tensor_frd.shape[0]): for j in range(tensor_frd.shape[1]): if ~(tensor_frd[i][j] == 0).all(): layer = np.argmax(tensor_frd[i][j]) state[j][i] = chessfrdplayer[layer] elif ~(tensor_emy[i][j] == 0).all(): layer = np.argmax(tensor_emy[i][j]) state[j][i] = chessemyplayer[layer] else: state[j][i] = ' ' return state
def _get_value(self, var: str): """ Utility method to return the value of the specified variable for this instance in the backing xarray data set. Parameters ---------- var: str Name of the variable. There should be no reason to pass a str directly. Instead, the names defined in the _DataVar class should be used. Returns ------- depending on variable The value of the specified variable for this instance """ return self._data[var][dict(instance=self._instance)]
def contains(self, filename: str, chunk_nr: int) -> bool: """ Check whether this data set contains an instance with the specified filename and chunk number. Parameters ---------- filename: str The filename of the instance chunk_nr: int The chunk number of the instance Returns ------- bool True, if this data set contains an instance with the specified filename and chunk number, False otherwise """ if filename not in self._data[_DataVar.FILENAME].values: return False instances_with_filename = self._data.where(self._data[_DataVar.FILENAME] == filename) return chunk_nr in instances_with_filename[_DataVar.CHUNK_NR].values
def labels_nominal(self) -> np.ndarray: """ Returns the nominal labels of all instances in this data set as a NumPy array. The order of labels in the returned array matches the order in which instances are stored in this data set. Returns ------- numpy.ndarray The nominal labels of the instances in this data set Raises ------ AttributeError If the data set is not fully labeled """ if not self.is_fully_labeled: raise AttributeError("data set does not have label information") return self._data[_DataVar.LABEL_NOMINAL].values.astype(np.str)
def save(self, path: Path): """ Writes this data set to the specified path. Any directories in the path that do not exist are automatically created. Parameters ---------- path: pathlib.Path """ if not path.parent.exists(): path.parent.mkdir(parents=True) self.log.info("writing data set as netCDF4 to %s", path) self._data.to_netcdf(path=str(path), engine="netcdf4", format="NETCDF4")
def _read_xz(self, filepath): dtype = { 'applicant_id': np.str, 'batch_number': np.str, 'cnpj_cpf': np.str, 'congressperson_document': np.str, 'congressperson_id': np.str, 'document_id': np.str, 'document_number': np.str, 'document_type': np.str, 'leg_of_the_trip': np.str, 'passenger': np.str, 'reimbursement_number': np.str, 'subquota_group_description': np.str, 'subquota_group_id': np.str, 'subquota_number': np.str, 'term_id': np.str, } return pd.read_csv(filepath, dtype=dtype)
def read_csv(self, name): filepath = os.path.join(self.path, name) log.info('Loading {}…'.format(name)) dtype = { 'applicant_id': np.str, 'batch_number': np.str, 'cnpj_cpf': np.str, 'congressperson_document': np.str, 'congressperson_id': np.str, 'document_id': np.str, 'document_number': np.str, 'document_type': np.str, 'leg_of_the_trip': np.str, 'passenger': np.str, 'reimbursement_number': np.str, 'subquota_group_description': np.str, 'subquota_group_id': np.str, 'subquota_number': np.str, 'term_id': np.str, } return pd.read_csv(filepath, dtype=dtype)
def pcaCreate(image_files,dir,name_num, dir_list): image_list = [] new_file_name = dir save_dir = dir_list + new_file_name save_dir_tt = save_dir + "\\" for image_file in image_files: image_list.append(misc.imread(image_file)) for image in image_list: img = np.asarray(image, dtype='float32') img = img / 255. img_size = img.size / 3 img1 = img.reshape(img_size, 3) img1 = np.transpose(img1) img_cov = np.cov([img1[0], img1[1], img1[2]]) lamda, p = np.linalg.eig(img_cov) p = np.transpose(p) alpha1 = random.normalvariate(0, 0.3) alpha2 = random.normalvariate(0, 0.3) alpha3 = random.normalvariate(0, 0.3) v = np.transpose((alpha1 * lamda[0], alpha2 * lamda[1], alpha3 * lamda[2])) add_num = np.dot(p, v) img2 = np.array([img[:, :, 0] + add_num[0], img[:, :, 1] + add_num[1], img[:, :, 2] + add_num[2]]) img2 = np.swapaxes(img2, 0, 2) img2 = np.swapaxes(img2, 0, 1) misc.imsave(save_dir_tt + np.str(name_num) + '.jpg', img2) name_num += 1 return image_list
def dataset(self): path = self.update_datasets() self._dataset = pd.read_csv(path, dtype={'cnpj_cpf': np.str}, encoding='utf-8') self.prepare_dataset() return self._dataset
def setUp(self): self.dataset = pd.read_csv('rosie/core/tests/fixtures/invalid_cnpj_cpf_classifier.csv', dtype={'recipient_id': np.str}) self.subject = InvalidCnpjCpfClassifier()
def setUp(self): self.full_dataset = pd.read_csv( self.MONTHLY_SUBQUOTA_LIMIT_FIXTURE_FILE, dtype={'subquota_number': np.str}) self.dataset = self.full_dataset[ ['applicant_id', 'subquota_number', 'issue_date', 'year', 'month', 'net_value']] self.test_result_dataset = self.full_dataset[['expected_prediction', 'test_case_description']] self.subject = MonthlySubquotaLimitClassifier() self.subject.fit_transform(self.dataset) self.prediction = self.subject.predict(self.dataset)
def setUp(self): self.dataset = pd.read_csv('rosie/chamber_of_deputies/tests/fixtures/meal_price_outlier_classifier.csv', dtype={'recipient_id': np.str}) self.subject = MealPriceOutlierClassifier() self.subject.fit(self.dataset)
def setUp(self): self.dataset = pd.read_csv('rosie/chamber_of_deputies/tests/fixtures/traveled_speeds_classifier.csv', dtype={'recipient_id': np.str}) self.subject = TraveledSpeedsClassifier() self.subject.fit(self.dataset)
def get_companies(self): path = os.path.join(self.path, self.COMPANIES_DATASET) dataset = pd.read_csv(path, dtype={'cnpj': np.str}, low_memory=False) dataset['cnpj'] = dataset['cnpj'].str.replace(r'\D', '') dataset['situation_date'] = pd.to_datetime( dataset['situation_date'], errors='coerce') return dataset
def load_test_data(ticker='000001'): ''' Load test test_data for develop :param ticker: :return: ticker tradeDate turnoverVol closePrice highestPrice lowestPrice openPrice ''' return pd.read_csv(BASE_DIR+'/tests/test_data/'+ticker+'.csv', dtype={"ticker": np.str}, index_col=0)
def load_mesh(filename): """ Open a json file and load the mesh into the target class As long as there are no namespace conflicts, the target __class__ will be stored on the properties.HasProperties registry and may be fetched from there. :param str filename: name of file to read in """ with open(filename, 'r') as outfile: jsondict = json.load(outfile) data = BaseMesh.deserialize(jsondict, trusted=True) return data
def _readUBC_3DMesh(TensorMesh, fileName): """Read UBC GIF 3D tensor mesh and generate same dimension TensorMesh. :param string fileName: path to the UBC GIF mesh file :rtype: TensorMesh :return: The tensor mesh for the fileName. """ # Interal function to read cell size lines for the UBC mesh files. def readCellLine(line): line_list = [] for seg in line.split(): if '*' in seg: sp = seg.split('*') seg_arr = np.ones((int(sp[0]),)) * float(sp[1]) else: seg_arr = np.array([float(seg)], float) line_list.append(seg_arr) return np.concatenate(line_list) # Read the file as line strings, remove lines with comment = ! msh = np.genfromtxt(fileName, delimiter='\n', dtype=np.str, comments='!') # Fist line is the size of the model sizeM = np.array(msh[0].split(), dtype=float) # Second line is the South-West-Top corner coordinates. x0 = np.array(msh[1].split(), dtype=float) # Read the cell sizes h1 = readCellLine(msh[2]) h2 = readCellLine(msh[3]) h3temp = readCellLine(msh[4]) # Invert the indexing of the vector to start from the bottom. h3 = h3temp[::-1] # Adjust the reference point to the bottom south west corner x0[2] = x0[2] - np.sum(h3) # Make the mesh tensMsh = TensorMesh([h1, h2, h3], x0=x0) return tensMsh
def readUBC(TensorMesh, fileName, meshdim=None): """Wrapper to Read UBC GIF 2D and 3D tensor mesh and generate same dimension TensorMesh. :param string fileName: path to the UBC GIF mesh file :param int meshdim: expected dimension of the mesh, if unknown the default argument is None :rtype: TensorMesh :return: The tensor mesh for the fileName. """ # Check the expected mesh dimensions if meshdim == None: # Read the file as line strings, remove lines with comment = ! msh = np.genfromtxt(fileName, delimiter='\n', dtype=np.str, comments='!', max_rows=1) # Fist line is the size of the model sizeM = np.array(msh.ravel()[0].split(), dtype=float) # Check if the mesh is a UBC 2D mesh if sizeM.shape[0] == 1: Tnsmsh = TensorMesh._readUBC_2DMesh(fileName) # Check if the mesh is a UBC 3D mesh elif sizeM.shape[0] == 3: Tnsmsh = TensorMesh._readUBC_3DMesh(fileName) else: raise Exception('File format not recognized') # expected dimension is 2 elif meshdim == 2: Tnsmsh = TensorMesh._readUBC_2DMesh(fileName) # expected dimension is 3 elif meshdim == 3: Tnsmsh = TensorMesh._readUBC_3DMesh(fileName) return Tnsmsh
def writeUBC(mesh, fileName, models=None): """Writes a TensorMesh to a UBC-GIF format mesh file. :param string fileName: File to write to :param dict models: A dictionary of the models """ assert mesh.dim == 3 s = '' s += '{0:d} {1:d} {2:d}\n'.format(*tuple(mesh.vnC)) # Have to it in the same operation or use mesh.x0.copy(), # otherwise the mesh.x0 is updated. origin = mesh.x0 + np.array([0, 0, mesh.hz.sum()]) origin.dtype = float s += '{0:.6f} {1:.6f} {2:.6f}\n'.format(*tuple(origin)) s += ('%.6f '*mesh.nCx+'\n')%tuple(mesh.hx) s += ('%.6f '*mesh.nCy+'\n')%tuple(mesh.hy) s += ('%.6f '*mesh.nCz+'\n')%tuple(mesh.hz[::-1]) f = open(fileName, 'w') f.write(s) f.close() if models is None: return assert type(models) is dict, 'models must be a dict' for key in models: assert type(key) is str, 'The dict key is a file name' mesh.writeModelUBC(key, models[key])
def test_pickle_py2_bytes_encoding(self): # Check that arrays and scalars pickled on Py2 are # unpickleable on Py3 using encoding='bytes' test_data = [ # (original, py2_pickle) (np.unicode_('\u6f2c'), asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n" "(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\n" "I0\ntp6\nbS',o\\x00\\x00'\np7\ntp8\nRp9\n.")), (np.array([9e123], dtype=np.float64), asbytes("cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\n" "p1\n(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\n" "p7\n(S'f8'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'<'\np11\nNNNI-1\nI-1\n" "I0\ntp12\nbI00\nS'O\\x81\\xb7Z\\xaa:\\xabY'\np13\ntp14\nb.")), (np.array([(9e123,)], dtype=[('name', float)]), asbytes("cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\np1\n" "(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\np7\n" "(S'V8'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'|'\np11\nN(S'name'\np12\ntp13\n" "(dp14\ng12\n(g7\n(S'f8'\np15\nI0\nI1\ntp16\nRp17\n(I3\nS'<'\np18\nNNNI-1\n" "I-1\nI0\ntp19\nbI0\ntp20\nsI8\nI1\nI0\ntp21\n" "bI00\nS'O\\x81\\xb7Z\\xaa:\\xabY'\np22\ntp23\nb.")), ] if sys.version_info[:2] >= (3, 4): # encoding='bytes' was added in Py3.4 for original, data in test_data: result = pickle.loads(data, encoding='bytes') assert_equal(result, original) if isinstance(result, np.ndarray) and result.dtype.names: for name in result.dtype.names: assert_(isinstance(name, str))
def test_mem_on_invalid_dtype(self): "Ticket #583" self.assertRaises(ValueError, np.fromiter, [['12', ''], ['13', '']], str)
def test_sign_bit(self, level=rlevel): x = np.array([0, -0.0, 0]) assert_equal(str(np.abs(x)), '[ 0. 0. 0.]')
def test_unaligned_unicode_access(self, level=rlevel): # Ticket #825 for i in range(1, 9): msg = 'unicode offset: %d chars' % i t = np.dtype([('a', 'S%d' % i), ('b', 'U2')]) x = np.array([(asbytes('a'), sixu('b'))], dtype=t) if sys.version_info[0] >= 3: assert_equal(str(x), "[(b'a', 'b')]", err_msg=msg) else: assert_equal(str(x), "[('a', u'b')]", err_msg=msg)
def test_zeros(self): # Regression test for #1061. # Set a size which cannot fit into a 64 bits signed integer sz = 2 ** 64 good = 'Maximum allowed dimension exceeded' try: np.empty(sz) except ValueError as e: if not str(e) == good: self.fail("Got msg '%s', expected '%s'" % (e, good)) except Exception as e: self.fail("Got exception of type %s instead of ValueError" % type(e))
def test_eq_string_and_object_array(self): # From e-mail thread "__eq__ with str and object" (Keith Goodman) a1 = np.array(['a', 'b'], dtype=object) a2 = np.array(['a', 'c']) assert_array_equal(a1 == a2, [True, False]) assert_array_equal(a2 == a1, [True, False])
def test_refcount_error_in_clip(self): # Ticket #1588 a = np.zeros((2,), dtype='>i2').clip(min=0) x = a + a # This used to segfault: y = str(x) # Check the final string: assert_(y == "[0 0]")
def test_format_on_flex_array_element(self): # Ticket #4369. dt = np.dtype([('date', '<M8[D]'), ('val', '<f8')]) arr = np.array([('2000-01-01', 1)], dt) formatted = '{0}'.format(arr[0]) assert_equal(formatted, str(arr[0]))
def test_run(self): """Only test hash runs at all.""" for t in [np.int, np.float, np.complex, np.int32, np.str, np.object, np.unicode]: dt = np.dtype(t) hash(dt)
def test_dtypeattr(self): assert_equal(self.one.dtype, np.dtype(np.int_)) assert_equal(self.three.dtype, np.dtype(np.float_)) assert_equal(self.one.dtype.char, 'l') assert_equal(self.three.dtype.char, 'd') self.assertTrue(self.three.dtype.str[0] in '<>') assert_equal(self.one.dtype.str[1], 'i') assert_equal(self.three.dtype.str[1], 'f')
def test_empty_subscript(self): a, b = self.d self.assertEqual(a[()], 0) self.assertEqual(b[()], 'x') self.assertTrue(type(a[()]) is a.dtype.type) self.assertTrue(type(b[()]) is str)
def test_empty_unicode(self): # don't throw decode errors on garbage memory for i in range(5, 100, 5): d = np.empty(i, dtype='U') str(d)
def test_swapaxes(self): a = np.arange(1*2*3*4).reshape(1, 2, 3, 4).copy() idx = np.indices(a.shape) assert_(a.flags['OWNDATA']) b = a.copy() # check exceptions assert_raises(ValueError, a.swapaxes, -5, 0) assert_raises(ValueError, a.swapaxes, 4, 0) assert_raises(ValueError, a.swapaxes, 0, -5) assert_raises(ValueError, a.swapaxes, 0, 4) for i in range(-4, 4): for j in range(-4, 4): for k, src in enumerate((a, b)): c = src.swapaxes(i, j) # check shape shape = list(src.shape) shape[i] = src.shape[j] shape[j] = src.shape[i] assert_equal(c.shape, shape, str((i, j, k))) # check array contents i0, i1, i2, i3 = [dim-1 for dim in c.shape] j0, j1, j2, j3 = [dim-1 for dim in src.shape] assert_equal(src[idx[j0], idx[j1], idx[j2], idx[j3]], c[idx[i0], idx[i1], idx[i2], idx[i3]], str((i, j, k))) # check a view is always returned, gh-5260 assert_(not c.flags['OWNDATA'], str((i, j, k))) # check on non-contiguous input array if k == 1: b = c