@@ -94,34 +94,39 @@ async def run_extraction(task_description: str) -> KeywordPhrase | None:
94
94
95
95
def _normalize_spaces (text : str ) -> str :
96
96
"""Replace uncommon whitespace characters with regular spaces."""
97
- for ch in ("\u00A0 " , "\u200B " , "\u200C " , "\u200D " , "\uFEFF " ):
97
+ for ch in ("\u00a0 " , "\u200b " , "\u200c " , "\u200d " , "\ufeff " ):
98
98
text = text .replace (ch , " " )
99
99
return text
100
100
101
101
102
102
def create_gist (keyword_phrase_obj : KeywordPhrase ) -> str :
103
103
"""Creates a hyphenated gist from the KeywordPhrase object."""
104
- action_verb = _normalize_spaces (keyword_phrase_obj .actionVerb ).strip ().lower ()
105
- phrase_elements = [
106
- _normalize_spaces (elem ).strip ().lower ()
107
- for elem in keyword_phrase_obj .phrase
104
+ # Ensure actionVerb is explicitly converted to a Python string before processing
105
+ action_verb_str = str (keyword_phrase_obj .actionVerb )
106
+ action_verb = _normalize_spaces (action_verb_str ).strip ().lower ()
107
+
108
+ # Ensure each element from the phrase is explicitly converted to a Python string
109
+ # before processing, then store them as actual strings.
110
+ phrase_elements_as_strings = [
111
+ _normalize_spaces (str (elem_obj )).strip ().lower ()
112
+ for elem_obj in keyword_phrase_obj .phrase
108
113
]
109
114
110
115
final_gist_parts = []
111
- if action_verb : # Only add if action_verb is not empty
116
+ if action_verb :
112
117
final_gist_parts .extend (action_verb .split ())
113
118
114
- if phrase_elements :
119
+ if phrase_elements_as_strings :
120
+ relevant_phrase_elements = phrase_elements_as_strings # Default assignment
115
121
# Verification: if the first phrase element is the same as action_verb,
116
122
# it means the model might have just repeated it from the explicit field.
117
123
# The instruction is to ignore the first element of phrase in this case.
118
- if phrase_elements [0 ] == action_verb :
119
- relevant_phrase_elements = phrase_elements [1 :]
120
- else :
121
- relevant_phrase_elements = phrase_elements
124
+ if phrase_elements_as_strings [0 ] == action_verb :
125
+ relevant_phrase_elements = phrase_elements_as_strings [1 :]
126
+ # No else needed, relevant_phrase_elements is already phrase_elements_as_strings
122
127
123
128
for elem in relevant_phrase_elements :
124
- final_gist_parts .extend (elem .split ())
129
+ final_gist_parts .extend (elem .split ()) # This will use Python's str.split()
125
130
126
131
# Remove duplicates while preserving order and filter out empty strings
127
132
seen = set ()
@@ -140,10 +145,10 @@ def main_cli():
140
145
formatter_class = argparse .RawTextHelpFormatter ,
141
146
)
142
147
143
- if ' --version' in sys .argv :
148
+ if " --version" in sys .argv :
144
149
print (f"{ parser .prog } { __version__ } " )
145
150
return
146
-
151
+
147
152
parser .add_argument (
148
153
"--version" ,
149
154
action = "version" ,
0 commit comments