Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
energy
bmon
Compare Revisions
095b88e589e2c87c77c735d6e0adc30c5a419578...b5dff7a7aeba55fbd25cb20c22047877ee939013
Commits (2)
Handle NaN when resampling and plotting timeseries
· 2268ac15
alaskamapscience
authored
Aug 18, 2021
2268ac15
Improvements to averaging code
· b5dff7a7
alaskamapscience
authored
Aug 19, 2021
b5dff7a7
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
10 deletions
+18
-10
bmsapp/data_util.py
bmsapp/data_util.py
+15
-7
bmsapp/reports/timeseries.py
bmsapp/reports/timeseries.py
+3
-3
No files found.
bmsapp/data_util.py
View file @
b5dff7a7
...
...
@@ -149,7 +149,10 @@ def weighted_resample_timeseries(pandas_dataframe, averaging, offset, interp_met
window_breaks
[:]
=
np
.
nan
# calculate the average number of hours in the resampling periods
averaging_hours
=
(
window_breaks
.
index
.
to_series
().
diff
()
/
pd
.
Timedelta
(
1
,
'hour'
)).
mean
()
if
len
(
window_breaks
)
>
1
:
averaging_hours
=
(
window_breaks
.
index
.
to_series
().
diff
()
/
pd
.
Timedelta
(
1
,
'hour'
)).
mean
()
else
:
averaging_hours
=
24
interp_limit
=
int
(
24
/
averaging_hours
)
+
1
# limit interpolation to 24 hours or 1 averaging period
# also create breaks that are shifted 1 day forward
...
...
@@ -160,7 +163,10 @@ def weighted_resample_timeseries(pandas_dataframe, averaging, offset, interp_met
df
=
pandas_dataframe
.
append
(
window_breaks
[
~
window_breaks
.
index
.
isin
(
pandas_dataframe
.
index
)]).
sort_index
()
# interpolate values
df
=
df
.
interpolate
(
method
=
interp_method
,
limit
=
interp_limit
)
if
interp_method
in
[
'backfill'
,
'bfill'
,
'pad'
,
'ffill'
]:
df
=
df
.
fillna
(
method
=
interp_method
,
limit
=
interp_limit
)
else
:
df
=
df
.
interpolate
(
method
=
interp_method
,
limit
=
interp_limit
)
# calculate the 'duration' weights for each row and for each time period (based on difference from previous timestamp)
value_duration
=
(
df
.
index
.
to_series
()
-
df
.
index
.
to_series
().
shift
(
1
))
/
pd
.
Timedelta
(
1
,
'hour'
)
...
...
@@ -177,7 +183,7 @@ def weighted_resample_timeseries(pandas_dataframe, averaging, offset, interp_met
df
=
df
[:
-
1
]
# resample and calculate the weighted average for each time period
dfResampled
=
df
.
resample
(
rule
=
averaging
,
closed
=
'right'
,
label
=
'left'
).
sum
(
)
dfResampled
=
df
.
resample
(
rule
=
averaging
,
closed
=
'right'
,
label
=
'left'
).
agg
(
lambda
x
:
np
.
sum
(
x
.
values
)
)
dfResampled
=
dfResampled
[
pandas_dataframe
.
columns
].
div
(
dfResampled
[
'value_duration_weight'
],
axis
=
'index'
)
if
offset
:
...
...
@@ -199,11 +205,13 @@ def decimate_timeseries(df,bin_count=1000,col=None):
# bin the index values
bins
=
df
.
groupby
(
pd
.
cut
(
df
.
index
,
bins
=
bin_count
,
labels
=
np
.
arange
(
0
,
bin_count
)).
astype
(
int
))
# keep the max and min value in each bin
maximums
=
df
.
loc
[
bins
[
col
].
idxmax
()]
minimums
=
df
.
loc
[
bins
[
col
].
idxmin
()]
# get the indices for the max and min value in each bin
max_indices
=
bins
[
col
].
idxmax
()
max_indices
=
max_indices
.
where
(
~
max_indices
.
isna
(),
bins
.
apply
(
lambda
x
:
x
.
index
[
0
]))
min_indices
=
bins
[
col
].
idxmin
()
keep_indices
=
pd
.
concat
([
max_indices
,
min_indices
]).
drop_duplicates
().
sort_index
()
return
pd
.
concat
([
maximums
,
minimums
]).
drop_duplicates
().
sort_index
()
return
df
.
loc
[
keep_indices
]
else
:
return
df
...
...
bmsapp/reports/timeseries.py
View file @
b5dff7a7
...
...
@@ -61,16 +61,16 @@ class TimeSeries(basechart.BaseChart):
interp_method
=
'pad'
else
:
interp_method
=
'linear'
df
=
bmsapp
.
data_util
.
resample_timeseries
(
df
,
averaging_hours
,
use_rolling_averaging
,
interp_method
=
interp_method
)
df
=
bmsapp
.
data_util
.
resample_timeseries
(
df
,
averaging_hours
,
use_rolling_averaging
,
drop_na
=
False
,
interp_method
=
interp_method
)
# limit the number of points to plot
df
=
bmsapp
.
data_util
.
decimate_timeseries
(
df
,
bin_count
=
1000
,
col
=
'val'
)
# create lists for plotly
if
np
.
absolute
(
df
.
val
.
values
).
max
()
<
10000
:
values
=
np
.
char
.
mod
(
'%.4g'
,
df
.
val
.
values
).
astype
(
float
)
.
tolist
()
values
=
df
.
val
.
round
(
4
).
where
(
df
.
val
.
notnull
(),
None
).
values
.
tolist
()
else
:
values
=
np
.
round
(
df
.
val
.
values
)
.
tolist
()
values
=
df
.
val
.
round
(
0
).
where
(
df
.
val
.
notnull
(),
None
)
.
values
.
tolist
()
times
=
df
.
index
.
strftime
(
'%Y-%m-%d %H:%M:%S'
).
tolist
()
else
:
times
=
[]
...
...