import pandas as pd

seattle_air = pd.read_csv("seattle_air.csv", index_col="Time", parse_dates=True)
seattle_air.groupby(seattle_air.index.year).count()

seattle_air.groupby(seattle_air.index.year)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7bcd743ca3d0>

seattle_air

data = seattle_air.dropna()

                     PM2.5
Time                      
2017-04-06 00:00:00    6.8
2017-04-06 01:00:00    5.3
2017-04-06 02:00:00    5.3
2017-04-06 03:00:00    5.6
2017-04-06 04:00:00    5.9
...                    ...
2025-01-27 19:00:00    9.0
2025-01-27 20:00:00    9.0
2025-01-27 21:00:00    9.0
2025-01-27 22:00:00   11.0
2025-01-27 23:00:00   13.0

[67002 rows x 1 columns]

seattle_air

data = seattle_air.dropna(inplace=True)
data

seattle_air

another_seattle_air = pd.read_csv("seattle_air.csv", index_col="Time", parse_dates=True)

seattle_air = pd.read_csv("seattle_air.csv", index_col="Time", parse_dates=True)

l1 = [1, 2, 3]
l2 = [1, 2, 3]

l1[0] = 6

l1

[6, 2, 3]

l2

[1, 2, 3]

l1 == l2

True

l1 is l2

False

l1[0] = 6

l1 == l2

False

l1 = l2

l2

[1, 2, 3]

l1[0] = 6
l1

[6, 2, 3]

l2

[6, 2, 3]

my_list = [1, 2, 3]

my_list = "hello world"

class DataFrame:
    def __init__(self, x=0):
        self.x = x
        
    def dropna(self):
        self.columns = ["a", "b", "c"]
        return ...

    def add_value(self, x):
        print(type(self))
        self.x = x

df = DataFrame()

df.add_value(42)

<class '__main__.DataFrame'>

type(df)

__main__.DataFrame

type(seattle_air)

pandas.core.frame.DataFrame

df.dropna()
# Sort of but not really:
#DataFrame.dropna(df)

Ellipsis

df.add_value(42)
df.x

42

df2 = DataFrame()
df2.add_value(84)
df2.x

84

df.x

42

df3 = DataFrame()

df3.dropna()

Ellipsis

df3.columns

['a', 'b', 'c']

class DataFrame:
    """Represents two-dimensional tabular data structured around an index and column names."""
    
    def __init__(self, index, columns, data):
        """Initializes a new DataFrame object from the given index, columns, and tabular data."""
        print("Initializing DataFrame")
        self.index = index
        self.columns = columns
        self.data = data

    def dropna(self, inplace=False):
        """"
        Drops all rows containing NaN from this DataFrame. If inplace, returns None and modifies
        self. If not inplace, returns a new DataFrame without modifying self.
        """
        print("Calling dropna")
        if inplace:
            columns = [...]
            index = [...]
            data = [...]
            return None
        else:
            return DataFrame([...], [...], [...])

    def __getitem__(self, column_or_indexer):
        """Given a column or indexer, returns the selection as a new Series or DataFrame object."""
        print("Calling __getitem__")
        if column_or_indexer in self.columns:
            return "Series" # placeholder for a Series
        else:
            return DataFrame([...], [...], [...])

    def __repr__(self):
        """Return a Python-interpretable string representation of this DataFrame."""
        # return "DataFrame([0, 1, 2], ["PM2.5"], [10, 20, 30])"
        # return "DataFrame(" + repr(self.index) + ", " + repr(self.columns) + ", " + repr(self.data) + ")"
        return f"DataFrame({self.index}, {self.columns}, {self.data})"

    def __str__(self):
        """Return a human-readable string representation of this DataFrame."""
        return "My favorite DataFrame"

example = DataFrame([0, 1, 2], ["PM2.5"], [10, 20, 30])
example # display(example.__repr__())

Initializing DataFrame

DataFrame([0, 1, 2], ['PM2.5'], [10, 20, 30])

print(example) # print calls the .__str__ dunder method

My favorite DataFrame

print(repr(example))

# 1. repr(example) -> str
# 2. print(... some str... ) by calling that string's .__str__()

DataFrame([0, 1, 2], ['PM2.5'], [10, 20, 30])

example["PM2.5"]

Calling __getitem__

'Series'

example.__getitem__("PM2.5")

Calling __getitem__

'Series'

example["PM2.5"][0]

Calling __getitem__

'S'

example.__getitem__("PM2.5").__getitem__(0)

Calling __getitem__

'S'

staff[self, "Hours", "Iris"]
# becomes
staff.__getitem__(self, "Hours", "Iris")

# Slightly incorrect (but important to keep in mind)
# The correct translation is actually:
staff[(self, "Hours", "Iris")]
# becomes
staff.__getitem__((self, "Hours", "Iris"))

seattle_air.loc[:, ["PM2.5"]]
# becomes
seattle_air.loc.__getitem__( (slice(None), ["PM2.5"]) )

# Why might the Pandas developers be concerned here with this assignment statement?
# Ambiguity here: What are we actually reassigning?
  # Are we changing the original DataFrame? Or the intermediate Series?

dfmi['one']['second'] = value
# becomes
dfmi.__getitem__('one').__setitem__('second', value)

example

<__main__.DataFrame at 0x7c05a32f5a50>

"Hello world"

'Hello world'

repr('Hello world')

"'Hello world'"

'Hello world'.__repr__()

"'Hello world'"

len('Hello world')

11

'Hello world'.__len__()

11

class Student:
    """..."""

    # In Java, there are things called access control modifieres like `private`.
    # We don't have this in Python. In fact, Python has no access control modifiers.
    # In Python, we have a convention to prefix variables names with an underscore
    # to indicate that they should not be modified or accessed by any code outside
    # the current class.
    # In your assessments, be sure to use private fields whenever possible.

    def __init__(self, student_number: int, filename: str) -> None:
        """..."""
        self._name: str = filename[:-4] # trimming-out the .txt part
        self._number: int = student_number
        self._courses: dict[str, str] = {}
        with open(filename) as f:
            for line in f.readlines():
                course, credits = line.split()
                # list[str]
                self._courses[course] = credits

    def __getitem__(self, course_name: str) -> str | None:
        """..."""
        # if course_name in self.courses:
        #     return self.courses[course_name]
        # by default, Python functions return None
        return self._courses.get(course_name) # number of credits

    def get_courses(self) -> list[str]:
        """..."""
        return list(self._courses) # by default, dictionaries loop over only keys

    def get_name(self) -> str:
        return self._name

    def get_number(self) -> int:
        return self._number


nicole = Student(1234567, "nicole.txt")
for course in nicole.get_courses():
    print(course, nicole[course])

CSE163 4
PHIL100 4
CSE390HA 1

!pip install -q nb_mypy
%reload_ext nb_mypy
%nb_mypy mypy-options --strict

12332.09s - pydevd: Sending message related to process being replaced timed-out after 5 seconds

Version 1.0.5

class University:
    """..."""

    # What does it mean to "enroll students"? Your answer is a class invariant.
      # In this implementation, it means to add them to the self.students list.

    # Problem: This default empty list for students is created at class initialization.
    #   So it's shared amongst all instances of the University class.
    # Solution: Change it to default to None.
    def __init__(self, name: str, students: list[Student] = None) -> None:
        """..."""
        if students is None:
            students = []
        self.name = name
        self.students = students

    def enrollments(self) -> list[Student]:
        """..."""
        # return sorted(self.students, key=lambda student: student.name)
        return sorted(self.students, key=Student.get_name)

    def enroll(self, student: Student) -> None:
        """..."""
        self.students.append(student)


uw = University("Udub", [nicole])
uw.enrollments()

[<__main__.Student at 0x7da680af1fd0>]

sorted([5, 4, 3, 2, 1])

[1, 2, 3, 4, 5]

# Need to define some way of sorting (comparing pairwise) Student objects
sorted([nicole, Student(123, "nicole.txt")])

<cell>1: error: Name "nicole" is not defined  [name-defined]
<cell>1: error: Name "Student" is not defined  [name-defined]

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[32], line 1
----> 1 sorted([nicole, Student(123, "nicole.txt")])

TypeError: '<' not supported between instances of 'Student' and 'Student'

# key parameter allows us to define how we want to sort the list
#   Specifically, give it a 1-argument function that returns something sortable
#     The 1-argument function takes an element from your list.

def sorting_key(student):
    return student.number

sorted([nicole, Student(123, "nicole.txt")], key=sorting_key)

<cell>5: error: Function is missing a type annotation  [no-untyped-def]

[<__main__.Student at 0x7da69a3b5810>, <__main__.Student at 0x7da69536cdd0>]

sorted([nicole, Student(123, "nicole.txt")], key=lambda student: student.number)

[<__main__.Student at 0x7da65d119ed0>, <__main__.Student at 0x7da69536cdd0>]

sorted([nicole, Student(123, "nicole.txt")], key=Student.get_number)

[<__main__.Student at 0x7da69b394b50>, <__main__.Student at 0x7da680af1fd0>]

sorted([nicole, Student(123, "nicole.txt")], key=student.number)

<cell>1: error: Name "student" is not defined  [name-defined]

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[45], line 1
----> 1 sorted([nicole, Student(123, "nicole.txt")], key=student.number)

NameError: name 'student' is not defined

sorted([nicole, Student(123, "nicole.txt")], key=Student.number)

<cell>1: error: No overload variant of "sorted" matches argument types "list[Student]", "int"  [call-overload]
<cell>1: note: Possible overload variants:
<cell>1: note:     def [SupportsRichComparisonT: SupportsDunderLT[Any] | SupportsDunderGT[Any]] sorted(Iterable[SupportsRichComparisonT], /, *, key: None = ..., reverse: bool = ...) -> list[SupportsRichComparisonT]
<cell>1: note:     def [_T] sorted(Iterable[_T], /, *, key: Callable[[_T], SupportsDunderLT[Any] | SupportsDunderGT[Any]], reverse: bool = ...) -> list[_T]

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[49], line 1
----> 1 sorted([nicole, Student(123, "nicole.txt")], key=Student.number)

AttributeError: type object 'Student' has no attribute 'number'

wsu = University("Wazzu")
wsu.enrollments()

[]

seattle_u = University("SeattleU")
seattle_u.enrollments()

[]

seattle_u.enroll(nicole)
seattle_u.enrollments()

[<__main__.Student at 0x7da680af1fd0>]

wsu.enrollments()

[<__main__.Student at 0x7da680af1fd0>]

	PM2.5
Time
2017	6283
2018	8540
2019	8597
2020	8683
2021	8664
2022	8625
2023	8409
2024	8558
2025	643

Objects¶

Reference semantics¶

Defining classes¶

DataFrame "Solution"¶

Using the Custom DataFrame¶

Practice: `Student` class¶

Type annotations¶

Practice: `University` class¶

Mutable default parameters¶

	PM2.5
Time
2017-04-06 00:00:00	6.8
2017-04-06 01:00:00	5.3
2017-04-06 02:00:00	5.3
2017-04-06 03:00:00	5.6
2017-04-06 04:00:00	5.9
...	...
2025-01-27 19:00:00	9.0
2025-01-27 20:00:00	9.0
2025-01-27 21:00:00	9.0
2025-01-27 22:00:00	11.0
2025-01-27 23:00:00	13.0

Objects¶

Reference semantics¶

Defining classes¶

DataFrame "Solution"¶

Using the Custom DataFrame¶

Practice: Student class¶

Type annotations¶

Practice: University class¶

Mutable default parameters¶

Practice: `Student` class¶

Practice: `University` class¶