import doctest

def words_in_common(file1, file2):
    """
    Returns a set of words that are in both of the inputted files. 

    >>> words_in_common("twister.txt", "simple.txt")
    {'the'}
    >>> sorted(words_in_common("twister.txt", "pepper.txt"))
    ['peppers', 'peter', 'pickled', 'the']
    >>> words_in_common("pepper.txt", "empty.txt")
    set()
    """
    in_common = set()
    
    with open(file1) as f:
        # words = f.read().split().lower() buggy
        words = set(f.read().lower().split())
        
    with open(file2) as f:
        # in_common = words & f.read().split().lower() buggy
        in_common = words & set(f.read().lower().split())
    

    # return set(in_common) buggy
    return in_common



doctest.run_docstring_examples(words_in_common, globals())

def longest_word_by_letter(path):
    """
    Returns a dictionary containing pairs of the first letter of each word in the file and the length 
    of the longest word in the file that starts with the letter. If the file is empty, 
    an empty dictionary will be returned. 

    >>> longest_word_by_letter("simple.txt")
    {'t': 5, 's': 8, 'i': 2}
    >>> longest_word_by_letter("twister.txt")
    {'p': 7, 'a': 1, 'o': 2, 'i': 2, 'w': 7, 't': 3}
    >>> longest_word_by_letter("empty.txt")
    {}
    """
    counts = {}
    with open(path) as f:
        for word in f.read().split():
            letter = word[0].lower()
            if letter not in counts:
                counts[letter] = 0
            counts[letter] = max(counts[letter], len(word))
            
    return counts
    
    


doctest.run_docstring_examples(longest_word_by_letter, globals())

def dna_match_score(seq1, seq2):
    """
    Returns the alignment score of two DNA sequences of equal length, where score is the number of
    matching (+2 points), non-matching (-1 points), and missing characters (-2 points).

    >>> dna_match_score("-ATGC", "CATGT")
    3
    >>> dna_match_score("ATGC", "ATGC")
    8
    >>> dna_match_score("-AT", "C-T")
    -2
    """
    score = 0
    for i in range(len(seq1)):
        if seq1[i] == seq2[i]:
            score += 2
        elif seq1[i] == "-" or seq2[i] == "-":
            score -= 2
        else:
            score -= 1
    return score
    



doctest.run_docstring_examples(dna_match_score, globals())

doctest.testmod()

TestResults(failed=0, attempted=9)

Data Structures and Files¶

Group Activity: Debug words in common¶

Group Activity: Longest word by letter¶

Group Activity: DNA match score¶

Testing¶

i	`seq1`	`seq2`	score
0	-	C	-2
1	A	A	+2
2	T	T	+2
3	G	G	+2
4	C	T	-1