import pandas as pd
import seaborn as sns

sns.set_theme()

seattle_air = pd.read_csv("seattle_air.csv", index_col="Time", parse_dates=True)
seattle_air

seattle_air.index

DatetimeIndex(['2017-04-06 00:00:00', '2017-04-06 01:00:00',
               '2017-04-06 02:00:00', '2017-04-06 03:00:00',
               '2017-04-06 04:00:00', '2017-04-06 05:00:00',
               '2017-04-06 06:00:00', '2017-04-06 07:00:00',
               '2017-04-06 08:00:00', '2017-04-06 09:00:00',
               ...
               '2022-04-06 14:00:00', '2022-04-06 15:00:00',
               '2022-04-06 16:00:00', '2022-04-06 17:00:00',
               '2022-04-06 18:00:00', '2022-04-06 19:00:00',
               '2022-04-06 20:00:00', '2022-04-06 21:00:00',
               '2022-04-06 22:00:00', '2022-04-06 23:00:00'],
              dtype='datetime64[ns]', name='Time', length=43848, freq=None)

# All the data in 2022 and all the columns
seattle_air.loc["2022", :]

seattle_air.loc["2022-04", :]

# Why is the colon between the two dates okay?
# It's okay because it's not directly in a tuple context. Remember that we use slice(...)
# when we're directly in the context of a tuple.
seattle_air.loc["2021-06-01":"2021-08-31", :]

# Pandas will not infer the year for you automatically, so there's not an immediately
# clear solution to what to do about this.
# There are ways around this! We might see some later.
seattle_air.loc["06-01":"08-31", :]

---------------------------------------------------------------------------
OverflowError                             Traceback (most recent call last)
File period.pyx:1169, in pandas._libs.tslibs.period.period_ordinal_to_dt64()

OverflowError: Overflow occurred in npy_datetimestruct_to_datetime

The above exception was the direct cause of the following exception:

OutOfBoundsDatetime                       Traceback (most recent call last)
Cell In[7], line 1
----> 1 seattle_air.loc["06-01":"08-31", :]

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexing.py:1184, in _LocationIndexer.__getitem__(self, key)
   1182     if self._is_scalar_access(key):
   1183         return self.obj._get_value(*key, takeable=self._takeable)
-> 1184     return self._getitem_tuple(key)
   1185 else:
   1186     # we by definition only have the 0th axis
   1187     axis = self.axis or 0

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexing.py:1377, in _LocIndexer._getitem_tuple(self, tup)
   1374 if self._multi_take_opportunity(tup):
   1375     return self._multi_take(tup)
-> 1377 return self._getitem_tuple_same_dim(tup)

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexing.py:1020, in _LocationIndexer._getitem_tuple_same_dim(self, tup)
   1017 if com.is_null_slice(key):
   1018     continue
-> 1020 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
   1021 # We should never have retval.ndim < self.ndim, as that should
   1022 #  be handled by the _getitem_lowerdim call above.
   1023 assert retval.ndim == self.ndim

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexing.py:1411, in _LocIndexer._getitem_axis(self, key, axis)
   1409 if isinstance(key, slice):
   1410     self._validate_key(key, axis)
-> 1411     return self._get_slice_axis(key, axis=axis)
   1412 elif com.is_bool_indexer(key):
   1413     return self._getbool_axis(key, axis=axis)

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexing.py:1443, in _LocIndexer._get_slice_axis(self, slice_obj, axis)
   1440     return obj.copy(deep=False)
   1442 labels = obj._get_axis(axis)
-> 1443 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
   1445 if isinstance(indexer, slice):
   1446     return self.obj._slice(indexer, axis=axis)

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexes/datetimes.py:682, in DatetimeIndex.slice_indexer(self, start, end, step)
    674 # GH#33146 if start and end are combinations of str and None and Index is not
    675 # monotonic, we can not use Index.slice_indexer because it does not honor the
    676 # actual elements, is only searching for start and end
    677 if (
    678     check_str_or_none(start)
    679     or check_str_or_none(end)
    680     or self.is_monotonic_increasing
    681 ):
--> 682     return Index.slice_indexer(self, start, end, step)
    684 mask = np.array(True)
    685 in_index = True

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexes/base.py:6662, in Index.slice_indexer(self, start, end, step)
   6618 def slice_indexer(
   6619     self,
   6620     start: Hashable | None = None,
   6621     end: Hashable | None = None,
   6622     step: int | None = None,
   6623 ) -> slice:
   6624     """
   6625     Compute the slice indexer for input labels and step.
   6626 
   (...)
   6660     slice(1, 3, None)
   6661     """
-> 6662     start_slice, end_slice = self.slice_locs(start, end, step=step)
   6664     # return a slice
   6665     if not is_scalar(start_slice):

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexes/base.py:6879, in Index.slice_locs(self, start, end, step)
   6877 start_slice = None
   6878 if start is not None:
-> 6879     start_slice = self.get_slice_bound(start, "left")
   6880 if start_slice is None:
   6881     start_slice = 0

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexes/base.py:6794, in Index.get_slice_bound(self, label, side)
   6790 original_label = label
   6792 # For datetime indices label may be a string that has to be converted
   6793 # to datetime boundary according to its resolution.
-> 6794 label = self._maybe_cast_slice_bound(label, side)
   6796 # we need to look up the label
   6797 try:

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexes/datetimes.py:642, in DatetimeIndex._maybe_cast_slice_bound(self, label, side)
    637 if isinstance(label, dt.date) and not isinstance(label, dt.datetime):
    638     # Pandas supports slicing with dates, treated as datetimes at midnight.
    639     # https://github.com/pandas-dev/pandas/issues/31501
    640     label = Timestamp(label).to_pydatetime()
--> 642 label = super()._maybe_cast_slice_bound(label, side)
    643 self._data._assert_tzawareness_compat(label)
    644 return Timestamp(label)

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexes/datetimelike.py:375, in DatetimeIndexOpsMixin._maybe_cast_slice_bound(self, label, side)
    369     except ValueError as err:
    370         # DTI -> parsing.DateParseError
    371         # TDI -> 'unit abbreviation w/o a number'
    372         # PI -> string cannot be parsed as datetime-like
    373         self._raise_invalid_indexer("slice", label, err)
--> 375     lower, upper = self._parsed_string_to_bounds(reso, parsed)
    376     return lower if side == "left" else upper
    377 elif not isinstance(label, self._data._recognized_scalars):

File /opt/conda/lib/python3.11/site-packages/pandas/core/indexes/datetimes.py:538, in DatetimeIndex._parsed_string_to_bounds(self, reso, parsed)
    536 freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev)
    537 per = Period(parsed, freq=freq)
--> 538 start, end = per.start_time, per.end_time
    540 # GH 24076
    541 # If an incoming date string contained a UTC offset, need to localize
    542 # the parsed date to this offset first before aligning with the index's
    543 # timezone
    544 start = start.tz_localize(parsed.tzinfo)

File period.pyx:1666, in pandas._libs.tslibs.period.PeriodMixin.start_time.__get__()

File period.pyx:1992, in pandas._libs.tslibs.period._Period.to_timestamp()

File period.pyx:1172, in pandas._libs.tslibs.period.period_ordinal_to_dt64()

OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 1-06-01 00:00:00

sns.relplot(seattle_air, x="Time", y="PM2.5", kind="line")

<seaborn.axisgrid.FacetGrid at 0x7a0f661be0d0>

seattle_air.index.year

Index([2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
       ...
       2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022],
      dtype='int32', name='Time', length=43848)

# Instead of supplying "Year" as a string,
# we can instead supply an equal-length series that indicates the group value
seattle_air.groupby(seattle_air.index.year).plot()

Time
2017    Axes(0.125,0.11;0.775x0.77)
2018    Axes(0.125,0.11;0.775x0.77)
2019    Axes(0.125,0.11;0.775x0.77)
2020    Axes(0.125,0.11;0.775x0.77)
2021    Axes(0.125,0.11;0.775x0.77)
2022    Axes(0.125,0.11;0.775x0.77)
dtype: object

seattle_air.index.day_of_year

Index([96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
       ...
       96, 96, 96, 96, 96, 96, 96, 96, 96, 96],
      dtype='int32', name='Time', length=43848)

grid = sns.relplot(
    seattle_air,
    x=seattle_air.index.day_of_year,
    y="PM2.5",
    hue=seattle_air.index.year,
    kind="line",
    errorbar=None, # Much faster when we don't generate error bars
)
# When column name is not specified, the index name "Time" is used
grid.set(xlabel="Day of Year")
grid.legend.set(title="Year");

missing_values = seattle_air["PM2.5"].isna()
# Show the missing values
seattle_air[missing_values]

seattle_air = seattle_air.interpolate()
# Show only the previously-missing values
seattle_air[missing_values]

%%html
<iframe width="640" height="360" src="https://www.youtube-nocookie.com/embed/fKv1Mixv0Hk" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>

	PM2.5
Time
2022-01-01 00:00:00	27.2
2022-01-01 01:00:00	25.1
2022-01-01 02:00:00	23.9
2022-01-01 03:00:00	21.0
2022-01-01 04:00:00	16.7
...	...
2022-04-06 19:00:00	5.1
2022-04-06 20:00:00	5.0
2022-04-06 21:00:00	5.3
2022-04-06 22:00:00	5.2
2022-04-06 23:00:00	5.2

	PM2.5
Time
2022-04-01 00:00:00	5.2
2022-04-01 01:00:00	5.1
2022-04-01 02:00:00	5.4
2022-04-01 03:00:00	5.4
2022-04-01 04:00:00	6.3
...	...
2022-04-06 19:00:00	5.1
2022-04-06 20:00:00	5.0
2022-04-06 21:00:00	5.3
2022-04-06 22:00:00	5.2
2022-04-06 23:00:00	5.2

	PM2.5
Time
2021-06-01 00:00:00	6.0
2021-06-01 01:00:00	6.1
2021-06-01 02:00:00	6.0
2021-06-01 03:00:00	6.6
2021-06-01 04:00:00	7.7
...	...
2021-08-31 19:00:00	5.9
2021-08-31 20:00:00	6.4
2021-08-31 21:00:00	6.7
2021-08-31 22:00:00	7.2
2021-08-31 23:00:00	6.5

	PM2.5
Time
2017-04-07 07:00:00	NaN
2017-04-17 06:00:00	NaN
2017-04-17 07:00:00	NaN
2017-04-17 09:00:00	NaN
2017-04-28 09:00:00	NaN
...	...
2022-02-28 05:00:00	NaN
2022-03-14 05:00:00	NaN
2022-03-15 12:00:00	NaN
2022-03-15 13:00:00	NaN
2022-03-28 05:00:00	NaN

	PM2.5
Time
2017-04-07 07:00:00	10.950000
2017-04-17 06:00:00	9.466667
2017-04-17 07:00:00	8.633333
2017-04-17 09:00:00	6.800000
2017-04-28 09:00:00	6.000000
...	...
2022-02-28 05:00:00	4.750000
2022-03-14 05:00:00	5.300000
2022-03-15 12:00:00	5.100000
2022-03-15 13:00:00	4.400000
2022-03-28 05:00:00	7.600000

Data Settings¶

Time series data¶

Visualizations with `DatetimeIndex`¶

What's in a `NaN`?¶

Consider context¶

Close reading¶

	PM2.5
Time
2017-04-06 00:00:00	6.8
2017-04-06 01:00:00	5.3
2017-04-06 02:00:00	5.3
2017-04-06 03:00:00	5.6
2017-04-06 04:00:00	5.9
...	...
2022-04-06 19:00:00	5.1
2022-04-06 20:00:00	5.0
2022-04-06 21:00:00	5.3
2022-04-06 22:00:00	5.2
2022-04-06 23:00:00	5.2

Data Settings¶

Time series data¶

Visualizations with DatetimeIndex¶

What's in a NaN?¶

Consider context¶

Close reading¶

Visualizations with `DatetimeIndex`¶

What's in a `NaN`?¶