Commits (2)
...@@ -149,7 +149,10 @@ def weighted_resample_timeseries(pandas_dataframe, averaging, offset, interp_met ...@@ -149,7 +149,10 @@ def weighted_resample_timeseries(pandas_dataframe, averaging, offset, interp_met
window_breaks[:] = np.nan window_breaks[:] = np.nan
# calculate the average number of hours in the resampling periods # calculate the average number of hours in the resampling periods
averaging_hours = (window_breaks.index.to_series().diff() / pd.Timedelta(1,'hour')).mean() if len(window_breaks) > 1:
averaging_hours = (window_breaks.index.to_series().diff() / pd.Timedelta(1,'hour')).mean()
else:
averaging_hours = 24
interp_limit = int(24 / averaging_hours) + 1 # limit interpolation to 24 hours or 1 averaging period interp_limit = int(24 / averaging_hours) + 1 # limit interpolation to 24 hours or 1 averaging period
# also create breaks that are shifted 1 day forward # also create breaks that are shifted 1 day forward
...@@ -160,7 +163,10 @@ def weighted_resample_timeseries(pandas_dataframe, averaging, offset, interp_met ...@@ -160,7 +163,10 @@ def weighted_resample_timeseries(pandas_dataframe, averaging, offset, interp_met
df = pandas_dataframe.append(window_breaks[~window_breaks.index.isin(pandas_dataframe.index)]).sort_index() df = pandas_dataframe.append(window_breaks[~window_breaks.index.isin(pandas_dataframe.index)]).sort_index()
# interpolate values # interpolate values
df = df.interpolate(method=interp_method,limit=interp_limit) if interp_method in ['backfill', 'bfill', 'pad', 'ffill']:
df = df.fillna(method=interp_method,limit=interp_limit)
else:
df = df.interpolate(method=interp_method,limit=interp_limit)
# calculate the 'duration' weights for each row and for each time period (based on difference from previous timestamp) # calculate the 'duration' weights for each row and for each time period (based on difference from previous timestamp)
value_duration = (df.index.to_series() - df.index.to_series().shift(1)) / pd.Timedelta(1,'hour') value_duration = (df.index.to_series() - df.index.to_series().shift(1)) / pd.Timedelta(1,'hour')
...@@ -177,7 +183,7 @@ def weighted_resample_timeseries(pandas_dataframe, averaging, offset, interp_met ...@@ -177,7 +183,7 @@ def weighted_resample_timeseries(pandas_dataframe, averaging, offset, interp_met
df = df[:-1] df = df[:-1]
# resample and calculate the weighted average for each time period # resample and calculate the weighted average for each time period
dfResampled = df.resample(rule=averaging, closed='right', label='left').sum() dfResampled = df.resample(rule=averaging, closed='right', label='left').agg(lambda x: np.sum(x.values))
dfResampled = dfResampled[pandas_dataframe.columns].div(dfResampled['value_duration_weight'],axis='index') dfResampled = dfResampled[pandas_dataframe.columns].div(dfResampled['value_duration_weight'],axis='index')
if offset: if offset:
...@@ -199,11 +205,13 @@ def decimate_timeseries(df,bin_count=1000,col=None): ...@@ -199,11 +205,13 @@ def decimate_timeseries(df,bin_count=1000,col=None):
# bin the index values # bin the index values
bins = df.groupby(pd.cut(df.index,bins=bin_count,labels=np.arange(0,bin_count)).astype(int)) bins = df.groupby(pd.cut(df.index,bins=bin_count,labels=np.arange(0,bin_count)).astype(int))
# keep the max and min value in each bin # get the indices for the max and min value in each bin
maximums = df.loc[bins[col].idxmax()] max_indices = bins[col].idxmax()
minimums = df.loc[bins[col].idxmin()] max_indices = max_indices.where(~max_indices.isna(),bins.apply(lambda x: x.index[0]))
min_indices = bins[col].idxmin()
keep_indices = pd.concat([max_indices,min_indices]).drop_duplicates().sort_index()
return pd.concat([maximums,minimums]).drop_duplicates().sort_index() return df.loc[keep_indices]
else: else:
return df return df
......
...@@ -61,16 +61,16 @@ class TimeSeries(basechart.BaseChart): ...@@ -61,16 +61,16 @@ class TimeSeries(basechart.BaseChart):
interp_method = 'pad' interp_method = 'pad'
else: else:
interp_method = 'linear' interp_method = 'linear'
df = bmsapp.data_util.resample_timeseries(df,averaging_hours,use_rolling_averaging,interp_method=interp_method) df = bmsapp.data_util.resample_timeseries(df,averaging_hours,use_rolling_averaging,drop_na=False,interp_method=interp_method)
# limit the number of points to plot # limit the number of points to plot
df = bmsapp.data_util.decimate_timeseries(df, bin_count=1000,col='val') df = bmsapp.data_util.decimate_timeseries(df, bin_count=1000,col='val')
# create lists for plotly # create lists for plotly
if np.absolute(df.val.values).max() < 10000: if np.absolute(df.val.values).max() < 10000:
values = np.char.mod('%.4g',df.val.values).astype(float).tolist() values = df.val.round(4).where(df.val.notnull(),None).values.tolist()
else: else:
values = np.round(df.val.values).tolist() values = df.val.round(0).where(df.val.notnull(),None).values.tolist()
times = df.index.strftime('%Y-%m-%d %H:%M:%S').tolist() times = df.index.strftime('%Y-%m-%d %H:%M:%S').tolist()
else: else:
times = [] times = []
......