@@ -343,6 +343,7 @@ def update_word_parameters(self, X, words):
343
343
def preprocess_anchors (self , anchors ):
344
344
"""Preprocess anchors so that it is a list of column indices if not already"""
345
345
if anchors is not None :
346
+ processed_anchors = list ()
346
347
for n , anchor_list in enumerate (anchors ):
347
348
# Check if list of anchors or a single str or int anchor
348
349
if type (anchor_list ) is not list :
@@ -356,19 +357,21 @@ def preprocess_anchors(self, anchors):
356
357
if anchor in self .word2col_index :
357
358
new_anchor_list .append (self .word2col_index [anchor ])
358
359
else :
359
- w = 'Anchor word not in word column labels provided to CorEx: {}' .format (anchor )
360
- warnings . warn (w )
360
+ w = 'WARNING: Anchor word not in word column labels provided to CorEx: {}' .format (anchor )
361
+ print (w )
361
362
else :
362
363
raise NameError ("Provided non-index anchors to CorEx without also providing 'words'" )
363
364
else :
364
365
new_anchor_list .append (anchor )
365
366
# Update anchors with new anchor list
367
+ if len (new_anchor_list ) == 0 :
368
+ continue
366
369
if len (new_anchor_list ) == 1 :
367
- anchors [ n ] = new_anchor_list [0 ]
370
+ processed_anchors . append ( new_anchor_list [0 ])
368
371
else :
369
- anchors [ n ] = new_anchor_list
372
+ processed_anchors . append ( new_anchor_list )
370
373
371
- return anchors
374
+ return processed_anchors
372
375
373
376
def calculate_p_y (self , p_y_given_x ):
374
377
"""Estimate log p(y_j=1)."""
@@ -478,21 +481,21 @@ def __getstate__(self):
478
481
return self_dict
479
482
480
483
def save (self , filename , ensure_compatibility = True ):
481
- """
482
- Pickle a class instance. E.g., corex.save('saved.pkl')
484
+ """
485
+ Pickle a class instance. E.g., corex.save('saved.pkl')
483
486
When set to True, ensure_compatibility resets self.words before saving
484
487
a pickle to avoid Unicode loading issues usually seen when trying to load
485
488
the pickle from a Python 2 implementation.
486
489
It is recommended to set it to False if you know you are going to load the
487
- model in an all Python 3 implementation as self.words is required for fetching
490
+ model in an all Python 3 implementation as self.words is required for fetching
488
491
the topics via get_topics().
489
492
"""
490
493
# Avoid saving words with object.
491
494
#TODO: figure out why Unicode sometimes causes an issue with loading after pickling
492
495
temp_words = self .words
493
496
if ensure_compatibility and (self .words is not None ):
494
497
self .words = None
495
-
498
+
496
499
# Save CorEx object
497
500
import pickle
498
501
if path .dirname (filename ) and not path .exists (path .dirname (filename )):
0 commit comments