from nltk.corpus import wordnet from nltk.corpus import semcor #debug function to print out the matrix #taken from https://stackoverflow.com/questions/13214809/pretty-print-2d-python-list def print_matrix(matrix): s = [[str(e) for e in row] for row in matrix] lens = [max(map(len, col)) for col in zip(*s)] fmt = '\t'.join('{{:{}}}'.format(x) for x in lens) table = [fmt.format(*row) for row in s] print ('\n'.join(table)) # Returns the cost of inserting a word def ins_cost(word): return #...TODO # Returns the cost of deleting a word def del_cost(word): return #...TODO # Returns the cost of substituting word1 with word2 def sub_cost(word1, word2): return #...TODO #...TODO Parse arguments and load semcor sentences . . . sentence1 = ... sentence2 = ... # TODO print sentence1 # TODO print sentence2 n = len(sentence1) m = len(sentence2) cmatrix = # Matrix of cost values. TODO initialize the matrix to the correct size ematrix = # Matrix of edit operations corresponding to costs in cmatrix. # Store the operations: '=' (words match), 'INS', 'DEL', 'SUB' #TODO Set up row and column 0 in accordance with the algorithm for i in range(0, n+1): ... for i in range(0, m + 1): ... # Populate the matrices with dynamic programming for col in range(1, n + 1): for row in range(1, m + 1): ... # Your solution should include calls to ins_cost(), del_cost(), and sub_cost() # Output the minimum cost computed by the edit distance algorithm print(...) # Output the sequence of operation types to be performed on sentence1 that transform # it to sentence2 with minimum cost. Each operation should be followed by its individual cost. # E.g., '= 0 = 0 INS 1 = 0 SUB 1 DEL 1' for the Levenshtein distance # if sentence1 is 'A Z Q R X A' and sentence2 is 'A Z J Q R Y'. print(...)