import doctest

with open("poem.txt") as f:
    content = f.read()
    if content.split()[0] == "she":
        # Note that indentation is how Python tells apart different blocks of code
        ...
    print(content)

she sells
sea
shells by
the sea shore

# Q: What is the purpose of with?
with open("poem.txt") as f:
    lines = f.readlines()

# lines is a list of each line in the poem.txt file
line_num = 1
for line in lines:
    print(line_num, line[:-1]) # Slice-out the newline character at the end
    line_num += 1

1 she sells
2 sea
3 shells by
4 the sea shore

first_line = lines[0]
first_line

'she sells\n'

first_line[-1]

'\n'

lines

['she sells\n', 'sea\n', 'shells by\n', 'the sea shore\n']

# Do we have access to f after closing the file?
f

<_io.TextIOWrapper name='poem.txt' mode='r' encoding='UTF-8'>

with open("poem.txt") as f:
    lines = f.readlines()
    line_num = 1
    for line in lines:
        # Token processing: Splits each string (line) into separate tokens
        tokens = line.split()
        print(line_num, tokens)
        line_num += 1

1 ['she', 'sells']
2 ['sea']
3 ['shells', 'by']
4 ['the', 'sea', 'shore']

with open("poem.txt") as f:
    # f.readlines() -> list
    # Does a list have a split method? Python says no.
    # Instead, strings have a split method!
    result = f.readlines().split()

result

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[22], line 2
      1 with open("poem.txt") as f:
----> 2     result = f.readlines().split()
      4 result

AttributeError: 'list' object has no attribute 'split'

with open("poem.txt") as f:
    result = f.read()

result

'she sells\nsea\nshells by\nthe sea shore\n'

# Whitespace, including newline characters, are candidates for split points
result.split()

['she', 'sells', 'sea', 'shells', 'by', 'the', 'sea', 'shore']

def count_odd(path):
    """
    For the file path, prints out each line number followed by the number of odd-length tokens.

    >>> count_odd("poem.txt")
    1 2
    2 1
    3 0
    4 3
    """
    # How many odd-length tokens are on each line?
    # Need to loop over all the lines, and split each line so we can inspect each token
    with open("poem.txt") as f:
        lines = f.readlines()
        line_num = 1
        for line in lines:
            num_odd = 0
            tokens = line.split()
            # Is this token an odd-length token? But there could be multiple tokens to inspect!
            for token in tokens:
                if len(token) % 2 == 1: # Token is odd-length
                    num_odd += 1
            print(line_num, num_odd)
            line_num += 1


doctest.run_docstring_examples(count_odd, globals())

len(first_line) # "she sells\n" is 10 characters if the newline is one character

10

len(lines)

4

len(1)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[26], line 1
----> 1 len(1)

TypeError: object of type 'int' has no len()

len("1")

1

def first_tokens(path):
    """
    >>> first_tokens("poem.txt")
    ['she', 'sea', 'shells', 'the']
    """
    result = []
    with open(path) as f:
        for line in f.readlines():
            # line.split() returns a new list, which is not being used in this program
            # How could we fix this? Assign the result to a variable like tokens?
            tokens = line.split()
            result.append(tokens[0]) # grabbing the first character from the line, not the first token
    return result


doctest.run_docstring_examples(first_tokens, globals())

result = []
result += tokens[0]
result

['t', 'h', 'e']

result = []
# result += tokens[0] will convert to an .extend call
result.extend(tokens[0])
# .extend loops over all the elements of the thing you want to add, and 
#  adds each element one at a time to the current list
result

['t', 'h', 'e']

result = []
result.append(tokens[0])
# .append adds exactly the one item you give it to the end of the list
result

['the']

[] + "the"

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[34], line 1
----> 1 [] + "the"

TypeError: can only concatenate list (not "str") to list

result = []
result.extend(tokens)
# .extend loops over all the elements of the thing you want to add, and 
#  adds each element one at a time to the current list
result

['the', 'sea', 'shore']

tokens

['the', 'sea', 'shore']

result = []
result += [tokens[0]]
result

['the']

result = []
result += tokens[0:1] # A slice returns a new list!
result

['the']

result = []
result += [ [ tokens[0] ] ]
result

[['the']]

File Processing¶

Opening files in Python¶

Line processing¶

Token processing¶

Practice: Count odd-length tokens¶

Practice: Debugging first tokens¶