import doctest

nums = []
for i in range(10):
    nums.append(i)
nums

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

[i for i in range(10)]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

[i ** 2 for i in range(10)]

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

[i ** 2 for i in range(10) if i % 2 == 0]

[0, 4, 16, 36, 64]

words = "I saw a dog today".split()
[word[0] for word in words if len(word) >= 2]

['s', 'd', 't']

def count_odd(path):
    """
    For the file path, prints out each line number followed by the number of odd-length tokens.

    >>> count_odd("poem.txt")
    1 2
    2 1
    3 0
    4 3
    """
    with open(path) as f:
        lines = f.readlines()
        line_num = 1
        for line in lines:
            tokens = line.split()
            odd_count = len([token for token in tokens if len(token) % 2 == 1])
            print(line_num, odd_count)
            line_num += 1

doctest.run_docstring_examples(count_odd, globals())

def fun_numbers(start, stop):
    """
    Returns an increasing list of all fun numbers between start (inclusive) and stop (exclusive).
    A fun number is defined as a number that is either divisible by 2 or divisible by 5.

    >>> fun_numbers(2, 16)
    [2, 4, 5, 6, 8, 10, 12, 14, 15]
    """
    return ...

doctest.run_docstring_examples(fun_numbers, globals())

1, 2, 3

(1, 2, 3)

a = tuple([1, 2, 3])
a

(1, 2, 3)

a[2]

3

a[2] = 0 # tuples are immutable!

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[11], line 1
----> 1 a[2] = 0 # tuples are immutable!

TypeError: 'tuple' object does not support item assignment

len(a)

3

for value in a:
    print(value)

1
2
3

def first_two_letters(word):
    return word[0], word[1]

a, b = first_two_letters('goodbye')
a

'g'

returnvalue = first_two_letters('goodbye')
returnvalue[0]

'g'

nums = set()
nums.add(1)
nums.add(2)
nums.add(3)
nums.add(2) # duplicate ignored
nums.add(-1)
nums

{-1, 1, 2, 3}

def count_unique(path):
    unique = []
    with open(path) as f:
        for line in f.readlines():
            for token in line.split():
                if token not in unique:
                    unique.append(token)
    return len(unique)


%time count_unique("moby-dick.txt")

CPU times: user 7.53 s, sys: 13.3 ms, total: 7.54 s
Wall time: 7.54 s

32553

def count_unique(path):
    with open(path) as f:
        return len(set([token for line in f.readlines() for token in line.split()]))

%time count_unique("moby-dick.txt")

CPU times: user 29.7 ms, sys: 13.1 ms, total: 42.8 ms
Wall time: 41.1 ms

32553

def area_codes(phone_numbers):
    """
    Returns the number of unique area codes in the given sequence.

    >>> area_codes([
    ...     '123-456-7890',
    ...     '206-123-4567',
    ...     '123-000-0000',
    ...     '425-999-9999'
    ... ])
    3
    """
    return len(set(...))

doctest.run_docstring_examples(area_codes, globals())

empty_dictionary = dict()
empty_dictionary

{}

lecture_schedule = dict()
lecture_schedule["06/17"] = "welcome-and-control-structures"
lecture_schedule["06/19"] = "holiday: Juneteenth"
lecture_schedule["06/21"] = "files-and-data-structures"
lecture_schedule

{'06/17': 'welcome-and-control-structures',
 '06/19': 'holiday: Juneteenth',
 '06/21': 'files-and-data-structures'}

lecture_schedule.keys()

dict_keys(['06/17', '06/19', '06/21'])

for i in range(len(lecture_schedule.keys())):
    print(lecture_schedule.keys()[i])

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[23], line 2
      1 for i in range(len(lecture_schedule.keys())):
----> 2     print(lecture_schedule.keys()[i])

TypeError: 'dict_keys' object is not subscriptable

for k in lecture_schedule.keys():
    print(k)

06/17
06/19
06/21

lecture_schedule.values()

dict_values(['welcome-and-control-structures', 'holiday: Juneteenth', 'files-and-data-structures'])

lecture_schedule.items()

dict_items([('06/17', 'welcome-and-control-structures'), ('06/19', 'holiday: Juneteenth'), ('06/21', 'files-and-data-structures')])

for k, v in lecture_schedule.items():
    print(k, v)

06/17 welcome-and-control-structures
06/19 holiday: Juneteenth
06/21 files-and-data-structures

def count_tokens(path):
    counts = {}
    with open(path) as f:
        for token in f.read().split():
            if token not in counts:
                counts[token] = 1
            else:
                counts[token] += 1
    return counts

%time count_tokens("moby-dick.txt")

def count_tokens(path):
    from collections import Counter
    with open(path) as f:
        return Counter(f.read().split())

%time count_tokens("moby-dick.txt")

def count_by_first_letter(words):
    counts = {}
    for word in words:
        first_letter = word[0]
        counts[first_letter] += 1
    return counts

count_by_first_letter(['cats', 'dogs', 'deers'])

with open(...) as f:
    f.read()
    f.readlines()
    f.readline()

[item for .. in ... if .. ]

tuple() [] len

set()

dict() .keys() .values() .items()

Data Structures¶

List comprehensions¶

Practice: Fun numbers¶

Tuples¶

Sets¶

Practice: Area codes¶

Dictionaries¶

Practice: Count words by first letters¶

Review of today's lecture¶