@@ -114,11 +114,7 @@ def gen_nan_data(nrows: int, ncols: int) -> dict:
114
114
115
115
def gen_int_data (nrows : int , ncols : int , rand_low : int , rand_high : int ) -> dict :
116
116
"""
117
- Generate int data with caching.
118
-
119
- The generated data are saved in the dictionary and on a subsequent call,
120
- if the keys match, saved data will be returned. Therefore, we need
121
- to carefully monitor the changing of saved data and make its copy if needed.
117
+ Generate int data.
122
118
123
119
Parameters
124
120
----------
@@ -136,30 +132,16 @@ def gen_int_data(nrows: int, ncols: int, rand_low: int, rand_high: int) -> dict:
136
132
dict
137
133
Number of keys - `ncols`, each of them store np.ndarray of `nrows` length.
138
134
"""
139
- cache_key = ("int" , nrows , ncols , rand_low , rand_high )
140
- if cache_key in data_cache :
141
- return data_cache [cache_key ]
142
-
143
- logging .info (
144
- "Generating int data {} rows and {} columns [{}-{}]" .format (
145
- nrows , ncols , rand_low , rand_high
146
- )
147
- )
148
135
data = {
149
136
"col{}" .format (i ): np .random .randint (rand_low , rand_high , size = (nrows ))
150
137
for i in range (ncols )
151
138
}
152
- data_cache [cache_key ] = weakdict (data )
153
139
return data
154
140
155
141
156
142
def gen_str_int_data (nrows : int , ncols : int , rand_low : int , rand_high : int ) -> dict :
157
143
"""
158
- Generate int data and string data with caching.
159
-
160
- The generated data are saved in the dictionary and on a subsequent call,
161
- if the keys match, saved data will be returned. Therefore, we need
162
- to carefully monitor the changing of saved data and make its copy if needed.
144
+ Generate int data and string data.
163
145
164
146
Parameters
165
147
----------
@@ -178,30 +160,16 @@ def gen_str_int_data(nrows: int, ncols: int, rand_low: int, rand_high: int) -> d
178
160
Number of keys - `ncols`, each of them store np.ndarray of `nrows` length.
179
161
One of the columns with string values.
180
162
"""
181
- cache_key = ("str_int" , nrows , ncols , rand_low , rand_high )
182
- if cache_key in data_cache :
183
- return data_cache [cache_key ]
184
-
185
- logging .info (
186
- "Generating str_int data {} rows and {} columns [{}-{}]" .format (
187
- nrows , ncols , rand_low , rand_high
188
- )
189
- )
190
163
data = gen_int_data (nrows , ncols , rand_low , rand_high ).copy ()
191
164
# convert values in arbitary column to string type
192
165
key = list (data .keys ())[0 ]
193
166
data [key ] = [f"str_{ x } " for x in data [key ]]
194
- data_cache [cache_key ] = weakdict (data )
195
167
return data
196
168
197
169
198
170
def gen_true_false_int_data (nrows , ncols , rand_low , rand_high ):
199
171
"""
200
- Generate int data and string data "true" and "false" values with caching.
201
-
202
- The generated data are saved in the dictionary and on a subsequent call,
203
- if the keys match, saved data will be returned. Therefore, we need
204
- to carefully monitor the changing of saved data and make its copy if needed.
172
+ Generate int data and string data "true" and "false" values.
205
173
206
174
Parameters
207
175
----------
@@ -221,15 +189,6 @@ def gen_true_false_int_data(nrows, ncols, rand_low, rand_high):
221
189
One half of the columns with integer values, another half - with "true" and
222
190
"false" string values.
223
191
"""
224
- cache_key = ("true_false_int" , nrows , ncols , rand_low , rand_high )
225
- if cache_key in data_cache :
226
- return data_cache [cache_key ]
227
-
228
- logging .info (
229
- "Generating true_false_int data {} rows and {} columns [{}-{}]" .format (
230
- nrows , ncols , rand_low , rand_high
231
- )
232
- )
233
192
data = gen_int_data (nrows // 2 , ncols // 2 , rand_low , rand_high )
234
193
235
194
data_true_false = {
@@ -239,7 +198,6 @@ def gen_true_false_int_data(nrows, ncols, rand_low, rand_high):
239
198
for i in range (ncols - ncols // 2 )
240
199
}
241
200
data .update (data_true_false )
242
- data_cache [cache_key ] = weakdict (data )
243
201
return data
244
202
245
203
@@ -289,10 +247,20 @@ def gen_data(
289
247
"str_int" : gen_str_int_data ,
290
248
"true_false_int" : gen_true_false_int_data ,
291
249
}
250
+ cache_key = (data_type , nrows , ncols , rand_low , rand_high )
251
+ if cache_key in data_cache :
252
+ return data_cache [cache_key ]
253
+
254
+ logging .info (
255
+ "Generating {} data {} rows and {} columns [{}-{}]" .format (
256
+ data_type , nrows , ncols , rand_low , rand_high
257
+ )
258
+ )
292
259
assert data_type in type_to_generator
293
260
data_generator = type_to_generator [data_type ]
294
261
295
262
data = data_generator (nrows , ncols , rand_low , rand_high )
263
+ data_cache [cache_key ] = weakdict (data )
296
264
297
265
return data
298
266
0 commit comments