import codecs import numpy as np from sklearn.preprocessing import normalize from generate import GENERATE import random vocab = codecs.open("brown_vocab_100.txt", "r", encoding="utf-16") #load the indices dictionary word_index_dict = {} for i, line in enumerate(vocab): #TODO: import part 1 code to build dictionary f = codecs.open("brown_100.txt", encoding = "utf-16") counts = #TODO: initialize numpy 0s array #TODO: iterate through file and update counts #TODO: normalize counts #TODO: writeout bigram probabilities f.close()