我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.timedelta64()。
def fill_nans(df, delta=None): """ """ if not delta: dt_diff = NP.diff(df.index.values) delta_timedelta64 = min(dt_diff) delta_seconds = delta_timedelta64 / NP.timedelta64(1, 's') delta = timedelta(seconds=delta_seconds) logger.info('Using delta = {} (s)'.format(delta.total_seconds())) index_new = PD.date_range(start=df.index[0], end=df.index[-1], freq=delta) missing = sorted(set(index_new) - set(df.index)) if missing: logger.warning('Missing time indices (filled by NaNs):') for x in missing: logger.warning(x) return df.reindex(index_new, copy=False), delta
def apply_emtf(df_E, df_B, emtf_key, index, extrapolate0=True): """ Apply the EMTF associated with *emtf_key* to magnetometer data found in *df_B* and store result to *df_E*. Use USArray .xml repository information :class:`Index` to process the 3-D EMTFs. """ logger.info('applying transfer function {}'.format(emtf_key)) interval = NP.diff(df_B.index.values[:2])[0] / NP.timedelta64(1, 's') Bx = df_B.B_X.values By = df_B.B_Y.values if emtf_key.startswith('USArray'): xml_fname = index[emtf_key][1] Ex, Ey = tf_3D(Bx, By, interval, xml_fname, extrapolate0=extrapolate0) else: Ex, Ey = tf_1D(Bx, By, interval, emtf_key) df_E[emtf_key + '_X'] = Ex df_E[emtf_key + '_Y'] = Ey return df_E
def enrich(self, column1, column2): """ This method calculates the difference in seconds between the 2 columns (column2 - column1) The final result may provided negative values depending on the values from column1 and column2. :param column1: first column. Values in column1 must be datetime type :param column2: second column. Values in column2 must be datetime type :type column1: string :type column2: string :return: original dataframe with a new column with the difference between column2 - column1 :rtype: pandas.DataFrame """ if column1 not in self.data.columns or \ column2 not in self.data.columns: return self.data self.data["timedifference"] = (self.data[column2] - self.data[column1]) / np.timedelta64(1, 's') return self.data
def _make_time_value_array(bg_df, start_index, end_index): min_in_hour = 60 hour_in_day = 24 array_len = int((bg_df.iloc[end_index]['created_at'] - bg_df.iloc[start_index]['created_at']) / np.timedelta64(1, 'm')) + 1 time_value_array = np.zeros(array_len) curr_minute = bg_df.iloc[start_index]['created_at'].minute curr_hour = bg_df.iloc[start_index]['created_at'].hour for array_index in range(array_len): time_value_array[array_index] = _get_hours_from_midnight(curr_hour) curr_minute += 1 if curr_minute >= min_in_hour: curr_minute = curr_minute % min_in_hour curr_hour = (curr_hour + 1) % hour_in_day return time_value_array #Function that adds data to fill in the gaps of the original data before the lomb-scargle is applied. IT helps make lomb-scargle more accurate #Essentially, if there is a gap bigger than the size of the MAX_TIME_GAP, then this recursive function will add a data point in between the two time points, creating two more gaps. #It will recursively call on both of these gaps until the gap size is less than or equal to the MAX_TIME_GAP #To add data, this function takes the mean of the old and new time, and it sets the value at this middle time to be the mean of the values between the old and new time. #It will update the array accordingly to make sure the time points are still in order and the indices are correct
def _make_data_array_helper(bg_df, time_array, value_array, data_gap_start_time, data_gap_end_time, start_index, index, curr, last, num_extra_added, col_one_name, col_two_name, item_str): new_time = int((bg_df.iloc[index]['created_at'] - bg_df.iloc[start_index]['created_at']) / np.timedelta64(1, 'm')) new_value = bg_df.iloc[index][col_one_name][col_two_name][item_str] old_time = time_array[last] old_value = value_array[last] #If it is a data gap, store the start and stop time for later removal if new_time - old_time > MAX_DATA_GAP_MINUTES: data_gap_start_time.append(old_time) data_gap_end_time.append(new_time) #keep track of the curr value before passing into _fill_data_gaps start_curr = curr curr = _fill_data_gaps(old_time, new_time, old_value, new_value, time_array, value_array, curr) #Find the number of extra entries num_extra_added num_extra_added += curr - start_curr - 1 last = curr - 1 return time_array, value_array, data_gap_start_time, data_gap_end_time, curr, last, num_extra_added #Function to make the data array for lomb-scargle given the bg_df dataframe, the start_index, the end_index, and the item_str, which is the column that you want to get #Can put any start and end index as a parameter
def _get_other_bg(bg_df, pred_array, pred_time_array, curr, miss, start_index, data_index, bg_str): pred_time_array[curr] = (bg_df.iloc[data_index]['created_at'] - bg_df.iloc[start_index]['created_at']) / np.timedelta64(1, 'm') try: pred_array[curr] = bg_df.iloc[data_index]['openaps']['enacted'][bg_str] curr, miss = _check_pred_nan(pred_array, curr, miss) except: try: pred_array[curr] = bg_df.iloc[data_index]['openaps']['suggested'][bg_str] curr, miss = _check_pred_nan(pred_array, curr, miss) except: miss += 1 return pred_array, pred_time_array, curr, miss #Function to get the predicted bg for the IOB, COB, and aCOB predictions #Looks at enacted directory first before looking at suggested directory. #If there is no data, then it increases the miss count by 1.
def __init__(self, x, y, t): try: if (len(x) != len(y) or len(y) != len(t)): raise Exception('Os arrays x, y e t precisam ser do mesmo tamanho') except: raise Exception('Os atributos x, y e t precisam ser um arrays') self.unit = 's' self.x = np.array(x, dtype='f8') self.y = np.array(y, dtype='f8') self.t = np.array(t, dtype='datetime64[{}]'.format(self.unit)) self.seconds = (self.t - np.datetime64("1970-01-01T00:00:00")) / np.timedelta64(1, 's') self._t = {str(v): i for i, v in enumerate(t)}
def test_ticket_1539(self): dtypes = [x for x in np.typeDict.values() if (issubclass(x, np.number) and not issubclass(x, np.timedelta64))] a = np.array([], dtypes[0]) failures = [] # ignore complex warnings with warnings.catch_warnings(): warnings.simplefilter('ignore', np.ComplexWarning) for x in dtypes: b = a.astype(x) for y in dtypes: c = a.astype(y) try: np.dot(b, c) except TypeError: failures.append((x, y)) if failures: raise AssertionError("Failures: %r" % failures)
def test_timedelta_arange(self): a = np.arange(3, 10, dtype='m8') assert_equal(a.dtype, np.dtype('m8')) assert_equal(a, np.timedelta64(0) + np.arange(3, 10)) a = np.arange(np.timedelta64(3, 's'), 10, 2, dtype='m8') assert_equal(a.dtype, np.dtype('m8[s]')) assert_equal(a, np.timedelta64(0, 's') + np.arange(3, 10, 2)) # Step of 0 is disallowed assert_raises(ValueError, np.arange, np.timedelta64(0), np.timedelta64(5), 0) # Promotion across nonlinear unit boundaries is disallowed assert_raises(TypeError, np.arange, np.timedelta64(0, 'D'), np.timedelta64(5, 'M')) assert_raises(TypeError, np.arange, np.timedelta64(0, 'Y'), np.timedelta64(5, 'D'))
def format_time(x): """Formats date values This function formats :class:`datetime.datetime` and :class:`datetime.timedelta` objects (and the corresponding numpy objects) using the :func:`xarray.core.formatting.format_timestamp` and the :func:`xarray.core.formatting.format_timedelta` functions. Parameters ---------- x: object The value to format. If not a time object, the value is returned Returns ------- str or `x` Either the formatted time object or the initial `x`""" if isinstance(x, (datetime64, datetime)): return format_timestamp(x) elif isinstance(x, (timedelta64, timedelta)): return format_timedelta(x) elif isinstance(x, ndarray): return list(x) if x.ndim else x[()] return x
def add_all_columns(group): group = group.sort_values(timestamp_col, ascending=True) group["event_nr"] = range(1, group.shape[0] + 1) group["last"] = "false" group["last"].iloc[-1] = "true" start_date = group[timestamp_col].iloc[0] elapsed = group[timestamp_col] - start_date elapsed = elapsed.fillna(0) group["elapsed"] = elapsed.apply(lambda x: float(x / np.timedelta64(1, 's'))) # s is for seconds end_date = group[timestamp_col].iloc[-1] tmp = end_date - group[timestamp_col] tmp = tmp.fillna(0) group["remtime"] = tmp.apply(lambda x: float(x / np.timedelta64(1, 's'))) # s is for seconds dur = group[timestamp_col] - group[timestamp_col].shift(1) dur = dur.fillna(0) group["duration"] = dur.apply(lambda x: float(x / np.timedelta64(1, 's'))) # s is for seconds group["weekday"] = group[timestamp_col].dt.weekday group["hour"] = group[timestamp_col].dt.hour return group
def unique1d(values): """ Hash table-based unique """ if np.issubdtype(values.dtype, np.floating): table = _hash.Float64HashTable(len(values)) uniques = np.array(table.unique(_ensure_float64(values)), dtype=np.float64) elif np.issubdtype(values.dtype, np.datetime64): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('M8[ns]') elif np.issubdtype(values.dtype, np.timedelta64): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) uniques = uniques.view('m8[ns]') elif np.issubdtype(values.dtype, np.integer): table = _hash.Int64HashTable(len(values)) uniques = table.unique(_ensure_int64(values)) else: table = _hash.PyObjectHashTable(len(values)) uniques = table.unique(_ensure_object(values)) return uniques
def _infer_fill_value(val): """ infer the fill value for the nan/NaT from the provided scalar/ndarray/list-like if we are a NaT, return the correct dtyped element to provide proper block construction """ if not is_list_like(val): val = [val] val = np.array(val, copy=False) if is_datetimelike(val): return np.array('NaT', dtype=val.dtype) elif is_object_dtype(val.dtype): dtype = lib.infer_dtype(_ensure_object(val)) if dtype in ['datetime', 'datetime64']: return np.array('NaT', dtype=_NS_DTYPE) elif dtype in ['timedelta', 'timedelta64']: return np.array('NaT', dtype=_TD_DTYPE) return np.nan
def test_timedelta(self): # this is valid too index = date_range('1/1/2000', periods=50, freq='B') shifted = index + timedelta(1) back = shifted + timedelta(-1) self.assertTrue(tm.equalContents(index, back)) self.assertEqual(shifted.freq, index.freq) self.assertEqual(shifted.freq, back.freq) result = index - timedelta(1) expected = index + timedelta(-1) self.assertTrue(result.equals(expected)) # GH4134, buggy with timedeltas rng = date_range('2013', '2014') s = Series(rng) result1 = rng - pd.offsets.Hour(1) result2 = DatetimeIndex(s - np.timedelta64(100000000)) result3 = rng - np.timedelta64(100000000) result4 = DatetimeIndex(s - pd.offsets.Hour(1)) self.assertTrue(result1.equals(result4)) self.assertTrue(result2.equals(result3))
def test_add_iadd(self): # only test adding/sub offsets as + is now numeric # offset offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h'), Timedelta(hours=2)] for delta in offsets: rng = timedelta_range('1 days', '10 days') result = rng + delta expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', freq='D') tm.assert_index_equal(result, expected) rng += delta tm.assert_index_equal(rng, expected) # int rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) result = rng + 1 expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) tm.assert_index_equal(result, expected) rng += 1 tm.assert_index_equal(rng, expected)
def test_barely_oob_dts(self): one_us = np.timedelta64(1).astype('timedelta64[us]') # By definition we can't go out of bounds in [ns], so we # convert the datetime64s to [us] so we can go out of bounds min_ts_us = np.datetime64(Timestamp.min).astype('M8[us]') max_ts_us = np.datetime64(Timestamp.max).astype('M8[us]') # No error for the min/max datetimes Timestamp(min_ts_us) Timestamp(max_ts_us) # One us less than the minimum is an error self.assertRaises(ValueError, Timestamp, min_ts_us - one_us) # One us more than the maximum is an error self.assertRaises(ValueError, Timestamp, max_ts_us + one_us)
def test_addition_subtraction_preserve_frequency(self): timestamp_instance = date_range('2014-03-05', periods=1, freq='D')[0] timedelta_instance = datetime.timedelta(days=1) original_freq = timestamp_instance.freq self.assertEqual((timestamp_instance + 1).freq, original_freq) self.assertEqual((timestamp_instance - 1).freq, original_freq) self.assertEqual( (timestamp_instance + timedelta_instance).freq, original_freq) self.assertEqual( (timestamp_instance - timedelta_instance).freq, original_freq) timedelta64_instance = np.timedelta64(1, 'D') self.assertEqual( (timestamp_instance + timedelta64_instance).freq, original_freq) self.assertEqual( (timestamp_instance - timedelta64_instance).freq, original_freq)
def test_conversion(self): for td in [Timedelta(10, unit='d'), Timedelta('1 days, 10:11:12.012345')]: pydt = td.to_pytimedelta() self.assertTrue(td == Timedelta(pydt)) self.assertEqual(td, pydt) self.assertTrue(isinstance(pydt, timedelta) and not isinstance( pydt, Timedelta)) self.assertEqual(td, np.timedelta64(td.value, 'ns')) td64 = td.to_timedelta64() self.assertEqual(td64, np.timedelta64(td.value, 'ns')) self.assertEqual(td, td64) self.assertTrue(isinstance(td64, np.timedelta64)) # this is NOT equal and cannot be roundtriped (because of the nanos) td = Timedelta('1 days, 10:11:12.012345678') self.assertTrue(td != td.to_pytimedelta())
def test_total_seconds(self): # GH 10939 # test index rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, freq='s') expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9, 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9] assert_allclose(rng.total_seconds(), expt, atol=1e-10, rtol=0) # test Series s = Series(rng) s_expt = Series(expt, index=[0, 1]) tm.assert_series_equal(s.dt.total_seconds(), s_expt) # with nat s[1] = np.nan s_expt = Series([1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9, np.nan], index=[0, 1]) tm.assert_series_equal(s.dt.total_seconds(), s_expt) # with both nat s = Series([np.nan, np.nan], dtype='timedelta64[ns]') tm.assert_series_equal(s.dt.total_seconds(), Series( [np.nan, np.nan], index=[0, 1]))
def test_constructor(self): expected = TimedeltaIndex(['1 days', '1 days 00:00:05', '2 days', '2 days 00:00:02', '0 days 00:00:03']) result = TimedeltaIndex(['1 days', '1 days, 00:00:05', np.timedelta64( 2, 'D'), timedelta(days=2, seconds=2), pd.offsets.Second(3)]) tm.assert_index_equal(result, expected) # unicode result = TimedeltaIndex([u'1 days', '1 days, 00:00:05', np.timedelta64( 2, 'D'), timedelta(days=2, seconds=2), pd.offsets.Second(3)]) expected = TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02']) tm.assert_index_equal(TimedeltaIndex(range(3), unit='s'), expected) expected = TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:05', '0 days 00:00:09']) tm.assert_index_equal(TimedeltaIndex([0, 5, 9], unit='s'), expected) expected = TimedeltaIndex( ['0 days 00:00:00.400', '0 days 00:00:00.450', '0 days 00:00:01.200']) tm.assert_index_equal(TimedeltaIndex([400, 450, 1200], unit='ms'), expected)
def _add_delta(self, delta): from pandas import TimedeltaIndex name = self.name if isinstance(delta, (Tick, timedelta, np.timedelta64)): new_values = self._add_delta_td(delta) elif isinstance(delta, TimedeltaIndex): new_values = self._add_delta_tdi(delta) # update name when delta is Index name = com._maybe_match_name(self, delta) elif isinstance(delta, DateOffset): new_values = self._add_offset(delta).asi8 else: new_values = self.astype('O') + delta tz = 'UTC' if self.tz is not None else None result = DatetimeIndex(new_values, tz=tz, name=name, freq='infer') utc = _utc() if self.tz is not None and self.tz is not utc: result = result.tz_convert(self.tz) return result
def test_ops_consistency_on_empty(self): # GH 7869 # consistency on empty # float result = Series(dtype=float).sum() self.assertEqual(result, 0) result = Series(dtype=float).mean() self.assertTrue(isnull(result)) result = Series(dtype=float).median() self.assertTrue(isnull(result)) # timedelta64[ns] result = Series(dtype='m8[ns]').sum() self.assertEqual(result, Timedelta(0)) result = Series(dtype='m8[ns]').mean() self.assertTrue(result is pd.NaT) result = Series(dtype='m8[ns]').median() self.assertTrue(result is pd.NaT)
def test_datetimelike_setitem_with_inference(self): # GH 7592 # assignment of timedeltas with NaT one_hour = timedelta(hours=1) df = DataFrame(index=date_range('20130101', periods=4)) df['A'] = np.array([1 * one_hour] * 4, dtype='m8[ns]') df.loc[:, 'B'] = np.array([2 * one_hour] * 4, dtype='m8[ns]') df.loc[:3, 'C'] = np.array([3 * one_hour] * 3, dtype='m8[ns]') df.ix[:, 'D'] = np.array([4 * one_hour] * 4, dtype='m8[ns]') df.ix[:3, 'E'] = np.array([5 * one_hour] * 3, dtype='m8[ns]') df['F'] = np.timedelta64('NaT') df.ix[:-1, 'F'] = np.array([6 * one_hour] * 3, dtype='m8[ns]') df.ix[-3:, 'G'] = date_range('20130101', periods=3) df['H'] = np.datetime64('NaT') result = df.dtypes expected = Series([np.dtype('timedelta64[ns]')] * 6 + [np.dtype('datetime64[ns]')] * 2, index=list('ABCDEFGH')) assert_series_equal(result, expected)
def test_constructor_dict_timedelta64_index(self): # GH 10160 td_as_int = [1, 2, 3, 4] def create_data(constructor): return dict((i, {constructor(s): 2 * i}) for i, s in enumerate(td_as_int)) data_timedelta64 = create_data(lambda x: np.timedelta64(x, 'D')) data_timedelta = create_data(lambda x: timedelta(days=x)) data_Timedelta = create_data(lambda x: Timedelta(x, 'D')) expected = DataFrame([{0: 0, 1: None, 2: None, 3: None}, {0: None, 1: 2, 2: None, 3: None}, {0: None, 1: None, 2: 4, 3: None}, {0: None, 1: None, 2: None, 3: 6}], index=[Timedelta(td, 'D') for td in td_as_int]) result_timedelta64 = DataFrame(data_timedelta64) result_timedelta = DataFrame(data_timedelta) result_Timedelta = DataFrame(data_Timedelta) assert_frame_equal(result_timedelta64, expected) assert_frame_equal(result_timedelta, expected) assert_frame_equal(result_Timedelta, expected)
def check_times(times, min_delta=np.timedelta64(1, 's'), max_delta=np.timedelta64(49, 'h'), f=None): '''QC time variable from a netcdf file. Raise a ValueError if a check is violated. Current checks: 1) Timestamps must be monotonic (increasing) 2) Maximum timestep size must less than a certain threshold (max_delta) ''' diffs = np.diff(times) negs = np.nonzero(diffs < min_delta)[0] too_big = np.nonzero(diffs > max_delta)[0] if len(negs) > 0: datestamps = pd.to_datetime(times[negs[0]-1: negs[0]+2]) warnings.warn('%s: times are not monotonically increasing. ' 'Found timestamp < %s at %s, first example: ' '%s' % (f, min_delta, negs, datestamps)) if len(too_big) > 0: datestamps = pd.to_datetime(times[too_big[0]-1: too_big[0]+2]) warnings.warn('%s: found a timestep where its delta is too ' 'large (greater than %s) at %s, first example: ' '%s' % (f, max_delta, too_big, datestamps))
def time_seconds(tc_array, year): """Return the time object from the timecodes """ tc_array = np.array(tc_array, copy=True) word = tc_array[:, 0] day = word >> 1 word = tc_array[:, 1].astype(np.uint64) msecs = ((127) & word) * 1024 word = tc_array[:, 2] msecs += word & 1023 msecs *= 1024 word = tc_array[:, 3] msecs += word & 1023 return (np.datetime64( str(year) + '-01-01T00:00:00Z', 's') + msecs[:].astype('timedelta64[ms]') + (day - 1)[:].astype('timedelta64[D]'))
def test_corrupted_data(self): self.incoming.add_measures(self.metric.id, [ incoming.Measure(datetime64(2014, 1, 1, 12, 0, 1), 69), ]) self.trigger_processing() self.incoming.add_measures(self.metric.id, [ incoming.Measure(datetime64(2014, 1, 1, 13, 0, 1), 1), ]) with mock.patch('gnocchi.carbonara.AggregatedTimeSerie.unserialize', side_effect=carbonara.InvalidData()): with mock.patch('gnocchi.carbonara.BoundTimeSerie.unserialize', side_effect=carbonara.InvalidData()): self.trigger_processing() m = self.storage.get_measures(self.metric) self.assertIn((datetime64(2014, 1, 1), numpy.timedelta64(1, 'D'), 1), m) self.assertIn((datetime64(2014, 1, 1, 13), numpy.timedelta64(1, 'h'), 1), m) self.assertIn((datetime64(2014, 1, 1, 13), numpy.timedelta64(5, 'm'), 1), m)
def test_aborted_initial_processing(self): self.incoming.add_measures(self.metric.id, [ incoming.Measure(datetime64(2014, 1, 1, 12, 0, 1), 5), ]) with mock.patch.object(self.storage, '_store_unaggregated_timeserie', side_effect=Exception): try: self.trigger_processing() except Exception: pass with mock.patch('gnocchi.storage.LOG') as LOG: self.trigger_processing() self.assertFalse(LOG.error.called) m = self.storage.get_measures(self.metric) self.assertIn((datetime64(2014, 1, 1), numpy.timedelta64(1, 'D'), 5.0), m) self.assertIn((datetime64(2014, 1, 1, 12), numpy.timedelta64(1, 'h'), 5.0), m) self.assertIn((datetime64(2014, 1, 1, 12), numpy.timedelta64(5, 'm'), 5.0), m)
def test_add_measures_update_subset_split(self): m, m_sql = self._create_metric('medium') measures = [ incoming.Measure(datetime64(2014, 1, 6, i, j, 0), 100) for i in six.moves.range(2) for j in six.moves.range(0, 60, 2)] self.incoming.add_measures(m.id, measures) self.trigger_processing([str(m.id)]) # add measure to end, in same aggregate time as last point. self.incoming.add_measures(m.id, [ incoming.Measure(datetime64(2014, 1, 6, 1, 58, 1), 100)]) with mock.patch.object(self.storage, '_store_metric_measures') as c: # should only resample last aggregate self.trigger_processing([str(m.id)]) count = 0 for call in c.mock_calls: # policy is 60 points and split is 48. should only update 2nd half args = call[1] if (args[0] == m_sql and args[2] == 'mean' and args[1].sampling == numpy.timedelta64(1, 'm')): count += 1 self.assertEqual(1, count)
def test_74_percentile_serialized(self): ts = carbonara.TimeSerie.from_tuples( [(datetime64(2014, 1, 1, 12, 0, 0), 3), (datetime64(2014, 1, 1, 12, 0, 4), 5), (datetime64(2014, 1, 1, 12, 0, 9), 6)]) ts = self._resample(ts, numpy.timedelta64(60, 's'), '74pct') self.assertEqual(1, len(ts)) self.assertEqual(5.48, ts[datetime64(2014, 1, 1, 12, 0, 0)][1]) # Serialize and unserialize key = ts.get_split_key() o, s = ts.serialize(key) saved_ts = carbonara.AggregatedTimeSerie.unserialize( s, key, '74pct') ts = carbonara.TimeSerie.from_tuples( [(datetime64(2014, 1, 1, 12, 0, 0), 3), (datetime64(2014, 1, 1, 12, 0, 4), 5), (datetime64(2014, 1, 1, 12, 0, 9), 6)]) ts = self._resample(ts, numpy.timedelta64(60, 's'), '74pct') saved_ts.merge(ts) self.assertEqual(1, len(ts)) self.assertEqual(5.48, ts[datetime64(2014, 1, 1, 12, 0, 0)][1])
def test_aggregation_std_with_unique(self): ts = carbonara.TimeSerie.from_tuples( [(datetime64(2014, 1, 1, 12, 0, 0), 3)]) ts = self._resample(ts, numpy.timedelta64(60, 's'), 'std') self.assertEqual(0, len(ts), ts.values) ts = carbonara.TimeSerie.from_tuples( [(datetime64(2014, 1, 1, 12, 0, 0), 3), (datetime64(2014, 1, 1, 12, 0, 4), 6), (datetime64(2014, 1, 1, 12, 0, 9), 5), (datetime64(2014, 1, 1, 12, 1, 6), 9)]) ts = self._resample(ts, numpy.timedelta64(60, 's'), "std") self.assertEqual(1, len(ts)) self.assertEqual(1.5275252316519465, ts[datetime64(2014, 1, 1, 12, 0, 0)][1])
def test_serialize(self): ts = {'sampling': numpy.timedelta64(500, 'ms'), 'agg': 'mean'} tsb = carbonara.BoundTimeSerie(block_size=ts['sampling']) tsb.set_values(numpy.array([ (datetime64(2014, 1, 1, 12, 0, 0, 1234), 3), (datetime64(2014, 1, 1, 12, 0, 0, 321), 6), (datetime64(2014, 1, 1, 12, 1, 4, 234), 5), (datetime64(2014, 1, 1, 12, 1, 9, 32), 7), (datetime64(2014, 1, 1, 12, 2, 12, 532), 1)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) key = ts['return'].get_split_key() o, s = ts['return'].serialize(key) self.assertEqual(ts['return'], carbonara.AggregatedTimeSerie.unserialize( s, key, 'mean'))
def test_no_truncation(self): ts = {'sampling': numpy.timedelta64(60, 's'), 'agg': 'mean'} tsb = carbonara.BoundTimeSerie() for i in six.moves.range(1, 11): tsb.set_values(numpy.array([ (datetime64(2014, 1, 1, 12, i, i), float(i))], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) tsb.set_values(numpy.array([ (datetime64(2014, 1, 1, 12, i, i + 1), float(i + 1))], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) self.assertEqual(i, len(list(ts['return'].fetch())))
def test_split_key(self): self.assertEqual( numpy.datetime64("2014-10-07"), carbonara.SplitKey.from_timestamp_and_sampling( numpy.datetime64("2015-01-01T15:03"), numpy.timedelta64(3600, 's'))) self.assertEqual( numpy.datetime64("2014-12-31 18:00"), carbonara.SplitKey.from_timestamp_and_sampling( numpy.datetime64("2015-01-01 15:03:58"), numpy.timedelta64(58, 's'))) key = carbonara.SplitKey.from_timestamp_and_sampling( numpy.datetime64("2015-01-01 15:03"), numpy.timedelta64(3600, 's')) self.assertGreater(key, numpy.datetime64("1970")) self.assertGreaterEqual(key, numpy.datetime64("1970"))
def test_split(self): sampling = numpy.timedelta64(5, 's') points = 100000 ts = carbonara.TimeSerie.from_data( timestamps=list(map(datetime.datetime.utcfromtimestamp, six.moves.range(points))), values=list(six.moves.range(points))) agg = self._resample(ts, sampling, 'mean') grouped_points = list(agg.split()) self.assertEqual( math.ceil((points / sampling.astype(float)) / carbonara.SplitKey.POINTS_PER_SPLIT), len(grouped_points)) self.assertEqual("0.0", str(carbonara.SplitKey(grouped_points[0][0], 0))) # 3600 × 5s = 5 hours self.assertEqual(datetime64(1970, 1, 1, 5), grouped_points[1][0]) self.assertEqual(carbonara.SplitKey.POINTS_PER_SPLIT, len(grouped_points[0][1]))
def test_from_timeseries(self): sampling = numpy.timedelta64(5, 's') points = 100000 ts = carbonara.TimeSerie.from_data( timestamps=list(map(datetime.datetime.utcfromtimestamp, six.moves.range(points))), values=list(six.moves.range(points))) agg = self._resample(ts, sampling, 'mean') split = [t[1] for t in list(agg.split())] self.assertEqual(agg, carbonara.AggregatedTimeSerie.from_timeseries( split, sampling=agg.sampling, max_size=agg.max_size, aggregation_method=agg.aggregation_method))
def add_hours_elpased_to_events(events, dt, remove_charttime=True): events['HOURS'] = (events.CHARTTIME - dt).apply(lambda s: s / np.timedelta64(1, 's')) / 60./60 if remove_charttime: del events['CHARTTIME'] return events
def add_age_to_icustays(stays): stays['AGE'] = (stays.INTIME - stays.DOB).apply(lambda s: s / np.timedelta64(1, 's')) / 60./60/24/365 stays.AGE.ix[stays.AGE<0] = 90 return stays
def _make_actual_bg_array(bg_df, start_index, end_index, prediction_start_time): total_len = start_index - end_index + 1 time_bg_array = np.zeros(total_len) actual_bg_array = np.zeros(total_len) array_index = 0 miss = 0 for df_index in range(start_index, end_index - 1, -1): #Keep track of the time starting at 0 at the start_index time = (bg_df.iloc[df_index]['created_at'] - bg_df.iloc[start_index]['created_at']) / np.timedelta64(1, 'm') if time > prediction_start_time: time_bg_array[array_index] = time try: actual_bg_array[array_index] = bg_df.iloc[df_index]['openaps']['enacted']['bg'] array_index += 1 last_time = time except: try: actual_bg_array[array_index] = bg_df.iloc[df_index]['openaps']['suggested']['bg'] array_index += 1 last_time = time except: #If a miss, don't move to the next index and instead add one to the number missed miss += 1 else: miss += 1 #Remove the number of missed data time_bg_array = np.resize(time_bg_array, total_len - miss) actual_bg_array = np.resize(actual_bg_array, total_len - miss) return time_bg_array, actual_bg_array #Returns true if the data lies in a data gap, so it will not be used
def add_pricing_date(i=0,in_place=True): if in_place: Pricing_Database.pricing_date += np.timedelta64(i, 'D') return None else: return Pricing_Database.pricing_date + np.timedelta64(i, 'D')
def add_date(npdate,i=0): return npdate + np.timedelta64(i,'D')
def date_diff(dt1,dt2): if(isinstance(dt1,int)): dt1 = add_pricing_date(dt1,in_place=False) if(isinstance(dt2,int)): dt2 = add_pricing_date(dt2,in_place=False) return (dt2-dt1)/np.timedelta64(1,'D')
def __to_Timestamp__(self, time): return time * np.timedelta64(1, 's') + np.datetime64("1970-01-01 00:00:00")
def test_intersection_sem_mock_do_test_2(self): poly = Polygon([(1, 1), (1, 3), (4, 3), (4, 1), (1, 1)]) response = self.traj2.intersection_shapely(poly) traj = self.traj2.to_Trajectory(response) time = np.datetime64('2000-02-01T00:01:00') seconds = (time - np.datetime64("1970-01-01 00:00:00")) / np.timedelta64(1, 's') assert (np.array_equal(traj.getTime()[0], seconds)) assert (np.array_equal(traj.getTime()[1], seconds)) assert (np.array_equal(traj.getTime()[2], seconds))