我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.NaN()。
def FORCAST(self, param): class Context: def __init__(self, N): self.N = N self.q = deque([], self.N) self.x = [i for i in range(self.N)] def handleInput(self, value): if len(self.q) < self.N: self.q.append(value) return np.NaN z1 = np.polyfit(self.x, self.q, 1) fn = np.poly1d(z1) y = fn(self.N + 1) self.q.append(value) return y ctx = Context(param[1]) result = param[0].apply(ctx.handleInput) return result #????
def Rstr(self): array2=[] prixe = math.log(0.03637 / float(252) + 1) ret = self.sharedf ret['change']=ret['change']-prixe rstr = [] print 1 if len(ret) > 525: for z in range(0, 504): array2.append(math.pow(math.pow(float(1) / 2, float(1 / float(126))), (503 - z))) for h in range(0,525): rstr.append(numpy.NaN) for c in range(525, len(ret)): rett=0 for f in range(0,len(duan)-21): rett=rett+duan.iloc[f, 16]*array2[f] rstr.append(rett) print rstr ret['rstr'] = rstr return ret[['date','rstr']]
def Cetop(self): tdate=[] Cetop=[] dfxjllb = Tools().ReadSqlData(self.name+"_sina", "xjllbdata") dfgg = self.sharedf for x in range(0,len(dfxjllb.index)): if dfxjllb.loc[x,u'????'][4:] == "0331": tdate.append(int(dfxjllb.loc[x, u'????'][:4]+"0430")) else: if dfxjllb.loc[x, u'????'][4:] == "0630": tdate.append(int(dfxjllb.loc[x, u'????'][:4] + "0831")) else: if dfxjllb.loc[x, u'????'][4:] == "0930": tdate.append(int(dfxjllb.loc[x, u'????'][:4] + "1031")) else: if dfxjllb.loc[x, u'????'][4:] == "1231": tdate.append(int(str(int(dfxjllb.loc[x, u'????'][:4]) + 1) + "0430")) else: tdate.append(numpy.NaN) dfxjllb['tdate']=tdate for x in range(1,len(dfgg.index)+1): Cetop.append(float(dfxjllb[dfxjllb[u'tdate']<int(str(dfgg.loc[x,u'date']).replace('-',''))].iloc[0,15:16])/dfgg.loc[x,u'traded_market_value']) dfgg['Cetop']=Cetop # return dfgg[['date','Cetop']]
def _compute_rarefaction_data(feature_table, min_depth, max_depth, steps, iterations, phylogeny, metrics): depth_range = np.linspace(min_depth, max_depth, num=steps, dtype=int) iter_range = range(1, iterations + 1) rows = feature_table.ids(axis='sample') cols = pd.MultiIndex.from_product([list(depth_range), list(iter_range)], names=['depth', 'iter']) data = {k: pd.DataFrame(np.NaN, index=rows, columns=cols) for k in metrics} for d, i in itertools.product(depth_range, iter_range): rt = rarefy(feature_table, d) for m in metrics: if m in phylogenetic_metrics(): vector = alpha_phylogenetic(table=rt, metric=m, phylogeny=phylogeny) else: vector = alpha(table=rt, metric=m) data[m][(d, i)] = vector return data
def test_remove_missing(): df = pd.DataFrame({'a': [1.0, np.NaN, 3, np.inf], 'b': [1, 2, 3, 4]}) df2 = pd.DataFrame({'a': [1.0, 3, np.inf], 'b': [1, 3, 4]}) df3 = pd.DataFrame({'a': [1.0, 3], 'b': [1, 3]}) with warnings.catch_warnings(record=True) as w: res = remove_missing(df, na_rm=True, vars=['b']) res.equals(df) res = remove_missing(df) res.equals(df2) res = remove_missing(df, na_rm=True, finite=True) res.equals(df3) assert len(w) == 1
def limits(self): if self.is_empty(): return (0, 1) # Fall back to the range if the limits # are not set or if any is None or NaN if self._limits is not None and self.range.range is not None: limits = [] if len(self._limits) == len(self.range.range): for l, r in zip(self._limits, self.range.range): value = r if pd.isnull(l) else l limits.append(value) else: limits = self._limits return tuple(limits) return self.range.range
def describe_numeric_1d(series, **kwargs): stats = {'mean': series.mean(), 'std': series.std(), 'variance': series.var(), 'min': series.min(), 'max': series.max()} stats['range'] = stats['max'] - stats['min'] for x in np.array([0.05, 0.25, 0.5, 0.75, 0.95]): stats[pretty_name(x)] = series.dropna().quantile(x) # The dropna() is a workaround for https://github.com/pydata/pandas/issues/13098 stats['iqr'] = stats['75%'] - stats['25%'] stats['kurtosis'] = series.kurt() stats['skewness'] = series.skew() stats['sum'] = series.sum() stats['mad'] = series.mad() stats['cv'] = stats['std'] / stats['mean'] if stats['mean'] else np.NaN stats['type'] = "NUM" stats['n_zeros'] = (len(series) - np.count_nonzero(series)) stats['p_zeros'] = stats['n_zeros'] / len(series) # Histograms stats['histogram'] = histogram(series, **kwargs) stats['mini_histogram'] = mini_histogram(series, **kwargs) return pd.Series(stats, name=series.name)
def get_extents(self, element, ranges): """ Subclasses the get_extents method using the GeoAxes set_extent method to project the extents to the Elements coordinate reference system. """ extents = super(GeoPlot, self).get_extents(element, ranges) if not getattr(element, 'crs', None) or not self.geographic: return extents elif any(e is None or not np.isfinite(e) for e in extents): extents = None else: try: extents = project_extents(extents, element.crs, DEFAULT_PROJ) except: extents = None return (np.NaN,)*4 if not extents else extents
def values(cls, dataset, dimension, expanded, flat): dimension = dataset.get_dimension(dimension) idx = dataset.get_dimension_index(dimension) data = dataset.data if idx not in [0, 1] and not expanded: return data[dimension.name].values values = [] columns = list(data.columns) arr = geom_to_array(data.geometry.iloc[0]) ds = dataset.clone(arr, datatype=cls.subtypes, vdims=[]) for i, d in enumerate(data.geometry): arr = geom_to_array(d) if idx in [0, 1]: ds.data = arr values.append(ds.interface.values(ds, dimension)) else: arr = np.full(len(arr), data.iloc[i, columns.index(dimension.name)]) values.append(arr) values.append([np.NaN]) return np.concatenate(values[:-1]) if values else np.array([])
def setUp(self): data = {"response": [0, 1, 1], "var_a": [21, 32, 10], "cyl": [4, 6, 4]} df = pd.DataFrame(data, index=[0, 1, 2]) priors_data = { "grp": ["cyl", "cyl", "cyl"], "var1": ["intercept", "intercept", "var_a"], "var2": [np.NaN, "var_a", np.NaN], "vcov": [0.123, -1.42, 0.998] } priors_df = pd.DataFrame(priors_data, index=[0, 1, 2]) self.formula = "response ~ 1 + var_a + (1 + var_a | cyl)" self.model = LogisticRegression(train_df=df, priors_df=priors_df, test_df=None)
def test_input_with_missing_values(self): gdf = GeoDataFrame.from_file("misc/nuts3_data.geojson") gdf.loc[12:18, "gdppps2008"] = np.NaN StePot = SmoothStewart(gdf, "gdppps2008", span=65000, beta=2, resolution=100000, mask=gdf) result = StePot.render(9, "equal_interval", output="Geodataframe") self.assertIsInstance(result, GeoDataFrame) self.assertEqual(len(result), 9) gdf2 = GeoDataFrame.from_file('misc/nuts3_data.geojson').to_crs({"init": "epsg:3035"}) gdf2.loc[:, 'gdppps2008'] = gdf2['gdppps2008'].astype(object) gdf2.loc[15:20, 'gdppps2008'] = "" gdf2.loc[75:78, 'gdppps2008'] = "" StePot = SmoothStewart(gdf2, 'gdppps2008', span=65000, beta=2, resolution=48000, mask=gdf2) result = StePot.render(9, 'equal_interval', output="GeoDataFrame") self.assertIsInstance(result, GeoDataFrame) self.assertEqual(len(result), 9)
def test_wrong_dtype_missing_values(self): gdf = GeoDataFrame.from_file("misc/nuts3_data.geojson") gdf.loc[12:18, "gdppps2008"] = np.NaN gdf.loc[25:35, "pop2008"] = np.NaN gdf.loc[0:len(gdf)-1, "pop2008"] = gdf["pop2008"].astype(str) StePot = SmoothStewart(gdf, "gdppps2008", span=65000, beta=2, resolution=100000, mask="misc/nuts3_data.geojson") result = StePot.render(9, "equal_interval", output="Geodataframe") self.assertIsInstance(result, GeoDataFrame) self.assertEqual(len(result), 9) StePot = SmoothStewart(gdf, "gdppps2008", variable_name2="pop2008", span=65000, beta=2, resolution=100000, mask="misc/nuts3_data.geojson") result = StePot.render(9, "equal_interval", output="Geodataframe") self.assertIsInstance(result, GeoDataFrame) self.assertEqual(len(result), 9)
def test_run_twice(self): algo1 = TestRegisterTransformAlgorithm( sim_params=self.sim_params, sids=[0, 1], env=self.env, ) res1 = algo1.run(self.data_portal) # Create a new trading algorithm, which will # use the newly instantiated environment. algo2 = TestRegisterTransformAlgorithm( sim_params=self.sim_params, sids=[0, 1], env=self.env, ) res2 = algo2.run(self.data_portal) # There are some np.NaN values in the first row because there is not # enough data to calculate the metric, e.g. beta. res1 = res1.fillna(value=0) res2 = res2.fillna(value=0) np.testing.assert_array_equal(res1, res2)
def make_expected_timelines_2q_out(cls): return { pd.Timestamp('2015-01-06', tz='utc'): { 'estimate2': np.array([[np.NaN, np.NaN]] * 3) }, pd.Timestamp('2015-01-07', tz='utc'): { 'estimate2': np.array([[np.NaN, np.NaN]] * 3) }, pd.Timestamp('2015-01-08', tz='utc'): { 'estimate2': np.array([[np.NaN, np.NaN]] * 3) }, pd.Timestamp('2015-01-09', tz='utc'): { 'estimate2': np.array([[np.NaN, np.NaN]] * 3) }, pd.Timestamp('2015-01-12', tz='utc'): { 'estimate2': np.array([[np.NaN, np.NaN]] * 2 + [[2100 * 3., 2110. * 4]]) } }
def make_expected_timelines_1q_out(cls): return { pd.Timestamp('2015-01-06', tz='utc'): { 'estimate1': np.array([[np.NaN, np.NaN]] + [[1100. * 1/.3, 1110. * 1/.4]] * 2), 'estimate2': np.array([[np.NaN, np.NaN]] + [[2100. * 1/.3, 2110. * 1/.4]] * 2), }, pd.Timestamp('2015-01-07', tz='utc'): { 'estimate1': np.array([[1100., 1110.]] * 3), 'estimate2': np.array([[2100., 2110.]] * 3) }, pd.Timestamp('2015-01-08', tz='utc'): { 'estimate1': np.array([[1100., 1110.]] * 3), 'estimate2': np.array([[2100., 2110.]] * 3) }, pd.Timestamp('2015-01-09', tz='utc'): { 'estimate1': np.array([[1100 * 3., 1210. * 4]] * 3), 'estimate2': np.array([[2100 * 3., 2210. * 4]] * 3) }, pd.Timestamp('2015-01-12', tz='utc'): { 'estimate1': np.array([[1200 * 3., np.NaN]] * 3), 'estimate2': np.array([[2200 * 3., np.NaN]] * 3) } }
def make_expected_timelines_2q_out(cls): return { pd.Timestamp('2015-01-06', tz='utc'): { 'estimate2': np.array([[np.NaN, np.NaN]] + [[2200 * 1/.3, 2210. * 1/.4]] * 2) }, pd.Timestamp('2015-01-07', tz='utc'): { 'estimate2': np.array([[2200., 2210.]] * 3) }, pd.Timestamp('2015-01-08', tz='utc'): { 'estimate2': np.array([[2200, 2210.]] * 3) }, pd.Timestamp('2015-01-09', tz='utc'): { 'estimate2': np.array([[2200 * 3., np.NaN]] * 3) }, pd.Timestamp('2015-01-12', tz='utc'): { 'estimate2': np.array([[np.NaN, np.NaN]] * 3) } }
def _calculate(self, X, y, categorical, metafeatures, helpers): import sklearn.lda if len(y.shape) == 1 or y.shape[1] == 1: kf = cross_validation.StratifiedKFold(y, n_folds=10) else: kf = cross_validation.KFold(y.shape[0], n_folds=10) accuracy = 0. try: for train, test in kf: lda = sklearn.lda.LDA() if len(y.shape) == 1 or y.shape[1] == 1: lda.fit(X[train], y[train]) else: lda = OneVsRestClassifier(lda) lda.fit(X[train], y[train]) predictions = lda.predict(X[test]) accuracy += sklearn.metrics.accuracy_score(predictions, y[test]) return accuracy / 10 except LinAlgError as e: self.logger.warning("LDA failed: %s Returned 0 instead!" % e) return np.NaN except ValueError as e: self.logger.warning("LDA failed: %s Returned 0 instead!" % e) return np.NaN
def stratified_folds(self, nfolds=5, sortby=None): self.df[('meta', 'Folds')] = np.NaN # Create an entry in the data frame that holds the folds self.df.sort_values(by=sortby, inplace=True) # sort the data frame by the column of interest uniqvals = np.unique(self.df[sortby]) # get the unique values from the column of interest # assign folds by stepping through the unique values fold_num = 1 for i in uniqvals: ind = self.df[sortby] == i # find where the data frame matches the unique value self.df.set_value(self.df.index[ind], ('meta', 'Folds'), fold_num) # Inrement the fold number, reset to 1 if it is greater than the desired number of folds fold_num = fold_num + 1 if fold_num > nfolds: fold_num = 1 # sort by index to return the df to its original order self.df.sort_index(inplace=True) self.folds_hist(sortby,50)
def test_invalid_edge_range(): with raises(ValueError): ew = graynet.extract(subject_id_list, fs_dir, edge_range=-1) with raises(ValueError): ew = graynet.extract(subject_id_list, fs_dir, edge_range=[]) with raises(ValueError): ew = graynet.extract(subject_id_list, fs_dir, edge_range=[1, ]) with raises(ValueError): ew = graynet.extract(subject_id_list, fs_dir, edge_range=[1, 2, 3]) with raises(ValueError): ew = graynet.extract(subject_id_list, fs_dir, edge_range=(1, np.NaN)) with raises(ValueError): ew = graynet.extract(subject_id_list, fs_dir, edge_range=(2, 1))
def test_invalid_nbins(): with raises(ValueError): ew = graynet.extract(subject_id_list, fs_dir, num_bins=np.NaN) with raises(ValueError): ew = graynet.extract(subject_id_list, fs_dir, num_bins=np.Inf) with raises(ValueError): ew = graynet.extract(subject_id_list, fs_dir, num_bins=2) # test_multi_edge() # test_multi_edge_CLI() # test_empty_subject_list() # test_run_no_IO() # test_run_roi_stats_via_API() # test_run_roi_stats_via_CLI() # test_CLI_only_weight_or_stats()
def predict(self, da): '''xarray.DataArray version of sklearn.cluster.KMeans.fit.''' # compatible with the sklean.cluster.KMeans predict method when the input data is not DataArray if not isinstance(da, xr.DataArray): return super().predict(da) # retrieve parameters n_samples = da.shape[0] features_shape = da.shape[1:] n_features = np.prod(features_shape) X = da.data.reshape(n_samples, n_features)# 'data' might be replaced with 'values'. # remove NaN values if exists in X try: X_valid = X[:, self.valid_features_index_] except: X_valid = X samples_dim = da.dims[0] samples_coord = {samples_dim: da.coords[samples_dim]} labels = xr.DataArray(super().predict(X_valid), dims=samples_dim, coords=samples_coord) return labels
def create_trial(self): trial = dict() # Status of the trial object trial['status'] = 0 trial['params'] = dict() # Stores the validation error trial['result'] = np.NaN trial['test_error'] = np.NaN # Validation error for every instance trial['instance_results'] = np.ones((self.folds)) * np.NaN # Status for every instance trial['instance_status'] = np.zeros((self.folds), dtype=int) # Contains the standard deviation in case of cross validation trial['std'] = np.NaN # Accumulated duration over all instances trial['duration'] = np.NaN # Stores the duration for every instance trial['instance_durations'] = np.ones((self.folds)) * np.NaN return trial
def get_arg_best(self): best_idx = -1 best_value = sys.maxint for i, trial in enumerate(self.trials): tmp_res = np.NaN if np.isfinite(trial['result']): tmp_res = trial['result'] elif np.isfinite(trial['instance_results']).any(): tmp_res = wrapping_util.nan_mean(trial['instance_results']) # np.nanmean is not available in older numpy versions # tmp_res = scipy.nanmean(trial['instance_results']) else: continue if tmp_res < best_value: best_idx = i best_value = tmp_res if best_idx == -1: raise ValueError("No best value found.") return best_idx # Get the best value so far, for more documentation see get_arg_best
def save_har6(params, **kwargs): if "x" not in params or "y" not in params or "z" not in params \ or "a" not in params or "b" not in params or "c" not in params: sys.stderr.write("No params found ['x', 'y']\n") return np.NaN x = float(params["x"]) y = float(params["y"]) z = float(params["z"]) a = float(params["a"]) b = float(params["b"]) c = float(params["c"]) if type(x) == np.ndarray: x = x[0] y = y[0] z = z[0] a = a[0] b = b[0] c = c[0] return har6(x, y, z, a, b, c)
def get_trial_index(experiment, fold, params): # Check whether we are in a new configuration; This has to check whether # the params were already inserted but also whether the fold already run # This is checked twice; the instance_result has to be not NaN and the # entry in instance_order has to exist new = True trial_index = np.NaN for idx, trial in enumerate(experiment.trials): exp = trial['params'] if exp == params and (idx, fold) not in experiment.instance_order and \ (experiment.get_trial_from_id(idx)['instance_results'][fold] == np.NaN or experiment.get_trial_from_id(idx)['instance_results'][fold] != experiment.get_trial_from_id(idx)['instance_results'][fold]): new = False trial_index = idx break if new: trial_index = experiment.add_job(params) return trial_index
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston scikit_data = load_boston() scikit_model = Imputer(strategy='most_frequent', axis=0) scikit_data['data'][1,8] = np.NaN input_data = scikit_data['data'][:,8].reshape(-1, 1) scikit_model.fit(input_data, scikit_data['target']) # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model
def calculateDeltas(data, n = 10): """create the distnances travelled since the last n time steps""" dx = np.diff(data[:,0]); dy = np.diff(data[:,1]); dx0 = dx.copy(); dy0 = dy.copy(); delta = np.zeros((dx.shape[0], n)); for i in range(n): delta[:,i] = np.sqrt(dx * dx + dy * dy); if i < n-1: dx0 = np.concatenate([[np.NaN], dx0[:-1]]); dy0 = np.concatenate([[np.NaN], dy0[:-1]]); dx += dx0; dy += dy0; return delta;
def _rolling_rank(self): dates = self._index window = self._window ranks = np.empty(len(dates), dtype=float) ranks[:] = np.NaN for i, date in enumerate(dates): if self._is_rolling and i >= window: prior_date = dates[i - window + 1] else: prior_date = dates[0] x_slice = self._x.truncate(before=prior_date, after=date).values if len(x_slice) == 0: continue ranks[i] = math.rank(x_slice) return ranks
def str_endswith(arr, pat, na=np.nan): """ Return boolean Series indicating whether each string in the Series/Index ends with passed pattern. Equivalent to :meth:`str.endswith`. Parameters ---------- pat : string Character sequence na : bool, default NaN Returns ------- endswith : Series/array of boolean values """ f = lambda x: x.endswith(pat) return _na_map(f, arr, na, dtype=bool)
def nsmallest(arr, n, keep='first'): """ Find the indices of the n smallest values of a numpy array. Note: Fails silently with NaN. """ if keep == 'last': arr = arr[::-1] narr = len(arr) n = min(n, narr) sdtype = str(arr.dtype) arr = arr.view(_dtype_map.get(sdtype, sdtype)) kth_val = algos.kth_smallest(arr.copy(), n - 1) return _finalize_nsmallest(arr, kth_val, n, keep, narr)
def test_resample_consistency(self): # GH 6418 # resample with bfill / limit / reindex consistency i30 = pd.date_range('2002-02-02', periods=4, freq='30T') s = pd.Series(np.arange(4.), index=i30) s[2] = np.NaN # Upsample by factor 3 with reindex() and resample() methods: i10 = pd.date_range(i30[0], i30[-1], freq='10T') s10 = s.reindex(index=i10, method='bfill') s10_2 = s.reindex(index=i10, method='bfill', limit=2) rl = s.reindex_like(s10, method='bfill', limit=2) r10_2 = s.resample('10Min').bfill(limit=2) r10 = s.resample('10Min').bfill() # s10_2, r10, r10_2, rl should all be equal assert_series_equal(s10_2, r10) assert_series_equal(s10_2, r10_2) assert_series_equal(s10_2, rl)
def test_skew(self): tm._skip_if_no_scipy() from scipy.stats import skew alt = lambda x: skew(x, bias=False) self._check_stat_op('skew', alt) # test corner cases, skew() returns NaN unless there's at least 3 # values min_N = 3 for i in range(1, min_N + 1): s = Series(np.ones(i)) df = DataFrame(np.ones((i, i))) if i < min_N: self.assertTrue(np.isnan(s.skew())) self.assertTrue(np.isnan(df.skew()).all()) else: self.assertEqual(0, s.skew()) self.assertTrue((df.skew() == 0).all())
def test_kurt(self): tm._skip_if_no_scipy() from scipy.stats import kurtosis alt = lambda x: kurtosis(x, bias=False) self._check_stat_op('kurt', alt) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) self.assertAlmostEqual(s.kurt(), s.kurt(level=0)['bar']) # test corner cases, kurt() returns NaN unless there's at least 4 # values min_N = 4 for i in range(1, min_N + 1): s = Series(np.ones(i)) df = DataFrame(np.ones((i, i))) if i < min_N: self.assertTrue(np.isnan(s.kurt())) self.assertTrue(np.isnan(df.kurt()).all()) else: self.assertEqual(0, s.kurt()) self.assertTrue((df.kurt() == 0).all())
def test_count(self): self.assertEqual(self.ts.count(), len(self.ts)) self.ts[::2] = np.NaN self.assertEqual(self.ts.count(), np.isfinite(self.ts).sum()) mi = MultiIndex.from_arrays([list('aabbcc'), [1, 2, 2, nan, 1, 2]]) ts = Series(np.arange(len(mi)), index=mi) left = ts.count(level=1) right = Series([2, 3, 1], index=[1, 2, nan]) assert_series_equal(left, right) ts.iloc[[0, 3, 5]] = nan assert_series_equal(ts.count(level=1), right - 1)
def test_first_last_valid(self): ts = self.ts.copy() ts[:5] = np.NaN index = ts.first_valid_index() self.assertEqual(index, ts.index[5]) ts[-5:] = np.NaN index = ts.last_valid_index() self.assertEqual(index, ts.index[-6]) ts[:] = np.nan self.assertIsNone(ts.last_valid_index()) self.assertIsNone(ts.first_valid_index()) ser = Series([], index=[]) self.assertIsNone(ser.last_valid_index()) self.assertIsNone(ser.first_valid_index())
def test_constructor_dict_timedelta_index(self): # GH #12169 : Resample category data with timedelta index # construct Series from dict as data and TimedeltaIndex as index # will result NaN in result Series data expected = Series( data=['A', 'B', 'C'], index=pd.to_timedelta([0, 10, 20], unit='s') ) result = Series( data={pd.to_timedelta(0, unit='s'): 'A', pd.to_timedelta(10, unit='s'): 'B', pd.to_timedelta(20, unit='s'): 'C'}, index=pd.to_timedelta([0, 10, 20], unit='s') ) # this should work assert_series_equal(result, expected)
def test_fromValue(self): nans = Series(np.NaN, index=self.ts.index) self.assertEqual(nans.dtype, np.float_) self.assertEqual(len(nans), len(self.ts)) strings = Series('foo', index=self.ts.index) self.assertEqual(strings.dtype, np.object_) self.assertEqual(len(strings), len(self.ts)) d = datetime.now() dates = Series(d, index=self.ts.index) self.assertEqual(dates.dtype, 'M8[ns]') self.assertEqual(len(dates), len(self.ts)) # GH12336 # Test construction of categorical series from value categorical = Series(0, index=self.ts.index, dtype="category") expected = Series(0, index=self.ts.index).astype("category") self.assertEqual(categorical.dtype, 'category') self.assertEqual(len(categorical), len(self.ts)) tm.assert_series_equal(categorical, expected)
def test_to_string_format_na(self): self.reset_display_options() df = DataFrame({'A': [np.nan, -1, -2.1234, 3, 4], 'B': [np.nan, 'foo', 'foooo', 'fooooo', 'bar']}) result = df.to_string() expected = (' A B\n' '0 NaN NaN\n' '1 -1.0000 foo\n' '2 -2.1234 foooo\n' '3 3.0000 fooooo\n' '4 4.0000 bar') self.assertEqual(result, expected) df = DataFrame({'A': [np.nan, -1., -2., 3., 4.], 'B': [np.nan, 'foo', 'foooo', 'fooooo', 'bar']}) result = df.to_string() expected = (' A B\n' '0 NaN NaN\n' '1 -1.0 foo\n' '2 -2.0 foooo\n' '3 3.0 fooooo\n' '4 4.0 bar') self.assertEqual(result, expected)
def test_to_csv_na_rep(self): # testing if NaN values are correctly represented in the index # GH 11553 df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]}) expected = "a,b,c\n0.0,0,2\n_,1,3\n" self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) # now with an index containing only NaNs df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]}) expected = "a,b,c\n_,0,2\n_,1,3\n" self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) # check if na_rep parameter does not break anything when no NaN df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]}) expected = "a,b,c\n0,0,2\n0,1,3\n" self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected)
def test_to_string_mixed(self): s = Series(['foo', np.nan, -1.23, 4.56]) result = s.to_string() expected = (u('0 foo\n') + u('1 NaN\n') + u('2 -1.23\n') + u('3 4.56')) self.assertEqual(result, expected) # but don't count NAs as floats s = Series(['foo', np.nan, 'bar', 'baz']) result = s.to_string() expected = (u('0 foo\n') + '1 NaN\n' + '2 bar\n' + '3 baz') self.assertEqual(result, expected) s = Series(['foo', 5, 'bar', 'baz']) result = s.to_string() expected = (u('0 foo\n') + '1 5\n' + '2 bar\n' + '3 baz') self.assertEqual(result, expected)
def test_unstack_to_series(self): # check reversibility data = self.frame.unstack() self.assertTrue(isinstance(data, Series)) undo = data.unstack().T assert_frame_equal(undo, self.frame) # check NA handling data = DataFrame({'x': [1, 2, np.NaN], 'y': [3.0, 4, np.NaN]}) data.index = Index(['a', 'b', 'c']) result = data.unstack() midx = MultiIndex(levels=[['x', 'y'], ['a', 'b', 'c']], labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) expected = Series([1, 2, np.NaN, 3, 4, np.NaN], index=midx) assert_series_equal(result, expected) # check composability of unstack old_data = data.copy() for _ in range(4): data = data.unstack() assert_frame_equal(old_data, data)
def test_rolling_skew_edge_cases(self): all_nan = Series([np.NaN] * 5) # yields all NaN (0 variance) d = Series([1] * 5) x = d.rolling(window=5).skew() assert_series_equal(all_nan, x) # yields all NaN (window too small) d = Series(np.random.randn(5)) x = d.rolling(window=2).skew() assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 0.177994, 1.548824] d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401 ]) expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824]) x = d.rolling(window=4).skew() assert_series_equal(expected, x)
def test_rolling_kurt_edge_cases(self): all_nan = Series([np.NaN] * 5) # yields all NaN (0 variance) d = Series([1] * 5) x = d.rolling(window=5).kurt() assert_series_equal(all_nan, x) # yields all NaN (window too small) d = Series(np.random.randn(5)) x = d.rolling(window=3).kurt() assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 1.224307, 2.671499] d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401 ]) expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499]) x = d.rolling(window=4).kurt() assert_series_equal(expected, x)
def test_interpolate(self): ts = Series(np.arange(len(self.ts), dtype=float), self.ts.index) ts_copy = ts.copy() ts_copy[5:10] = np.NaN linear_interp = ts_copy.interpolate(method='linear') self.assert_numpy_array_equal(linear_interp, ts) ord_ts = Series([d.toordinal() for d in self.ts.index], index=self.ts.index).astype(float) ord_ts_copy = ord_ts.copy() ord_ts_copy[5:10] = np.NaN time_interp = ord_ts_copy.interpolate(method='time') self.assert_numpy_array_equal(time_interp, ord_ts) # try time interpolation on a non-TimeSeries # Only raises ValueError if there are NaNs. non_ts = self.series.copy() non_ts[0] = np.NaN self.assertRaises(ValueError, non_ts.interpolate, method='time')
def test_homogenize(self): def _check_matches(indices, expected): data = {} for i, idx in enumerate(indices): data[i] = SparseSeries(idx.to_int_index().indices, sparse_index=idx) homogenized = spf.homogenize(data) for k, v in compat.iteritems(homogenized): assert (v.sp_index.equals(expected)) indices1 = [BlockIndex(10, [2], [7]), BlockIndex(10, [1, 6], [3, 4]), BlockIndex(10, [0], [10])] expected1 = BlockIndex(10, [2, 6], [2, 3]) _check_matches(indices1, expected1) indices2 = [BlockIndex(10, [2], [7]), BlockIndex(10, [2], [7])] expected2 = indices2[0] _check_matches(indices2, expected2) # must have NaN fill value data = {'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0)} assertRaisesRegexp(TypeError, "NaN fill value", spf.homogenize, data)
def somenan(self, x, fun, p=0.1): """returns sometimes np.NaN, otherwise fun(x)""" if np.random.rand(1) < p: return np.NaN else: return fun(x)
def cornersphere(self, x): """Sphere (squared norm) test objective function constraint to the corner""" nconstr = len(x) - 0 if any(x[:nconstr] < 1): return np.NaN return sum(x**2) - nconstr
def cornerelli(self, x): """ """ if any(x < 1): return np.NaN return self.elli(x) - self.elli(np.ones(len(x)))
def cornerellirot(self, x): """ """ if any(x < 1): return np.NaN return self.ellirot(x)
def lincon(self, x, theta=0.01): """ridge like linear function with one linear constraint""" if x[0] < 0: return np.NaN return theta * x[1] + x[0]