import doctest

# what, if any, is the difference between these two?

d = {0: 'a', 1: 'b', 2: 'c'}
l = ['a', 'b', 'c']

len(d)

3

len(l)

3

d[0]

'a'

l[0]

'a'

l[4] = 'aardvark'
l[0]

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[13], line 1
----> 1 l[4] = 'aardvark'
      2 l[0]

IndexError: list assignment index out of range

d1 = {0: 'a', 1: 'b', 2: 'c'}
d2 = {1: 'b', 0: 'a', 2: 'c'}
d1 == d2

True

l[0] += 'a'

l[0]

'aa'

d1[4] = 'hello'
d1[4]

'hello'

d.keys()

dict_keys([0, 1, 2])

d[120398] = "Hello"

d["Hello"] = "World"

def count_tokens(path):
    counts = dict()
    with open(path) as f:
        for token in f.read().split():
            counts[token] = counts.get(token, 0) + 1
            
            # if token not in counts:
            #     counts[token] = 1
            # else:
            #     counts[token] += 1
    return counts


%time count_tokens("moby-dick.txt")['Moby']

CPU times: user 44.9 ms, sys: 4.64 ms, total: 49.5 ms
Wall time: 48.2 ms

76

l[123]

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[31], line 1
----> 1 l[123]

IndexError: list index out of range

d['doesnt exist'] = "exists"

def count_tokens(path):
    from collections import Counter
    with open(path) as f:
        return Counter(f.read().split())


%time count_tokens("moby-dick.txt")

dictionary = {"a": 1, "b": 2, "c": 3}
for key in dictionary:
    print(key, dictionary[key])

a 1
b 2
c 3

def multi():
    return 5, 7

a, b = multi()
print(a, b)

5 7

for i in dictionary.items():
    print(i)

('a', 1)
('b', 2)
('c', 3)

dictionary = {"a": 1, "b": 2, "c": 3}
for key, value in dictionary.items():
    print(key, value)

a 1
b 2
c 3

def max_item(nums):
    '''
    >>> max_item([12, 23, 1287675476834])
    1234
    '''
    max_n = nums[0]
    for n in nums:
        if n > max_n:
            max_n = n
    return max_n

doctest.run_docstring_examples(max_item, globals())

words = {"green": 2, "eggs": 6, "and": 3, "yam": 2}
words[None]

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[57], line 2
      1 words = {"green": 2, "eggs": 6, "and": 3, "yam": 2}
----> 2 words[None]

KeyError: None

words['yam']

2

def most_frequent(counts):
    """
    Returns the token in the given dictionary with the highest count, or None if empty.

    >>> most_frequent({"green": 2, "eggs": 6, "and": 3, "yam": 2})
    'eggs'
    >>> most_frequent({}) # None is not displayed as output

    """
    max_word = None
    for word in counts:
        if counts[word] > counts.get(max_word, 0):
            max_word = word
    return max_word


doctest.run_docstring_examples(most_frequent, globals())

dictionary = {"a": 1, "b": 2, "c": 3}
for key, value in dictionary.items():
    print(key, value)

with open("poem.txt") as f:
    for i, line in enumerate(f.readlines()):
        print(i, line[:-1])

0 she sells
1 sea
2 shells by
3 the sea shore

arabic_nums = [  1,    2,     3,    4,   5]
alpha_nums  = ["a",  "b",   "c",  "d", "e"]
roman_nums  = ["i", "ii"]

for arabic, alpha, roman in zip(arabic_nums, alpha_nums, roman_nums):
    print(arabic, alpha, roman)
    # print(items)

1 a i
2 b ii

import csv
with open('staff.csv') as f:
    reader = csv.DictReader(f)
    for r in reader:
        print(r)

{'Name': 'Anna', 'Hours': '20'}
{'Name': 'Iris', 'Hours': '15'}
{'Name': 'Abiy', 'Hours': '10'}
{'Name': 'Gege', 'Hours': '12'}

staff = [
    {"Name": "Anna", "Hours": 20},
    {"Name": "Iris", "Hours": 15},
    {"Name": "Abiy", "Hours": 10},
    {"Name": "Gege", "Hours": 12},
]
type(staff)

list

type(staff[0])

dict

staff[1]['Hours']

15

total_hours = 0
for ta in staff:
    total_hours += ta["Hours"]
total_hours

57

for ta in staff:
    if ta["Name"] == "Iris":
        print(ta["Hours"])

15

def largest_earthquake_place(path):
    """
    Returns the name of the place with the largest-magnitude earthquake in the specified CSV file.

    >>> largest_earthquake_place("earthquakes.csv")
    'Northern Mariana Islands'
    """
    import pandas as pd
    earthquakes = pd.read_csv(path).to_dict("records")
    ...


doctest.run_docstring_examples(largest_earthquake_place, globals())

id	year	month	day	latitude	longitude	name	magnitude
nc72666881	2016	7	27	37.672	-121.619	California	1.43
us20006i0y	2016	7	27	21.515	94.572	Burma	4.9
nc72666891	2016	7	27	37.577	-118.859	California	0.06
nc72666896	2016	7	27	37.596	-118.995	California	0.4
nn00553447	2016	7	27	39.378	-119.845	Nevada	0.3

CSV Data (& Dictionaries)¶

Dictionaries¶

Dictionary functions¶

None in Python¶

Loop unpacking¶

Comma-separated values¶

Practice: Largest earthquake place¶

Name	Hours
Anna	20
Iris	15
Abiy	10
Gege	12