Update textClassifierHATT.py

richliao · web-flow · commit 249fd7c17ef1 · 2017-01-11T10:49:17.000-05:00
diff --git a/textClassifierHATT.py b/textClassifierHATT.py
@@ -69,9 +69,11 @@ def clean_str(string):
     for j, sent in enumerate(sentences):
         if j< MAX_SENTS:
             wordTokens = text_to_word_sequence(sent)
-            for k, word in enumerate(wordTokens):
-                if k<MAX_SENT_LENGTH:
+            k=0
+            for _, word in enumerate(wordTokens):
+                if k<MAX_SENT_LENGTH and tokenizer.word_index[word]<MAX_NB_WORDS:
                     data[i,j,k] = tokenizer.word_index[word]
+                    k=k+1                    
                     
 word_index = tokenizer.word_index
 print('Total %s unique tokens.' % len(word_index))