@@ -36,11 +36,15 @@ class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
36
36
- *default_data*: The default data of the data node. It is used at the data node instantiation
37
37
to write the data to the CSV file.
38
38
- *has_header* (`bool`): If True, indicates that the CSV file has a header.
39
+ - *separator* (`str`): The separator used in the CSV file. The default value is `,`.
39
40
- *exposed_type*: The exposed type of the data read from CSV file. The default value is `pandas`.
40
41
"""
41
42
42
43
__STORAGE_TYPE = "csv"
43
- __ENCODING_KEY = "encoding"
44
+ _ENCODING_KEY = "encoding"
45
+ _DEFAULT_ENCODING_VALUE = "utf-8"
46
+ _SEPARATOR_KEY = "separator"
47
+ _DEFAULT_SEPARATOR_VALUE = ","
44
48
45
49
_REQUIRED_PROPERTIES : List [str ] = []
46
50
@@ -65,12 +69,15 @@ def __init__(
65
69
if properties is None :
66
70
properties = {}
67
71
68
- if self .__ENCODING_KEY not in properties .keys ():
69
- properties [self .__ENCODING_KEY ] = "utf-8"
72
+ if self ._ENCODING_KEY not in properties .keys ():
73
+ properties [self ._ENCODING_KEY ] = self . _DEFAULT_ENCODING_VALUE
70
74
71
75
if self ._HAS_HEADER_PROPERTY not in properties .keys ():
72
76
properties [self ._HAS_HEADER_PROPERTY ] = True
73
77
78
+ if self ._SEPARATOR_KEY not in properties .keys ():
79
+ properties [self ._SEPARATOR_KEY ] = self ._DEFAULT_SEPARATOR_VALUE
80
+
74
81
properties [self ._EXPOSED_TYPE_PROPERTY ] = _TabularDataNodeMixin ._get_valid_exposed_type (properties )
75
82
self ._check_exposed_type (properties [self ._EXPOSED_TYPE_PROPERTY ])
76
83
@@ -106,7 +113,8 @@ def __init__(
106
113
self ._IS_GENERATED_KEY ,
107
114
self ._HAS_HEADER_PROPERTY ,
108
115
self ._EXPOSED_TYPE_PROPERTY ,
109
- self .__ENCODING_KEY ,
116
+ self ._ENCODING_KEY ,
117
+ self ._SEPARATOR_KEY ,
110
118
}
111
119
)
112
120
@@ -141,12 +149,12 @@ def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
141
149
142
150
def _read_as (self , path : str ):
143
151
properties = self .properties
144
- with open (path , encoding = properties [self .__ENCODING_KEY ]) as csvFile :
152
+ with open (path , encoding = properties [self ._ENCODING_KEY ]) as csvFile :
145
153
if properties [self ._HAS_HEADER_PROPERTY ]:
146
- reader_with_header = csv .DictReader (csvFile )
154
+ reader_with_header = csv .DictReader (csvFile , delimiter = properties [ self . _SEPARATOR_KEY ] )
147
155
return [self ._decoder (line ) for line in reader_with_header ]
148
156
149
- reader_without_header = csv .reader (csvFile )
157
+ reader_without_header = csv .reader (csvFile , delimiter = properties [ self . _SEPARATOR_KEY ] )
150
158
return [self ._decoder (line ) for line in reader_without_header ]
151
159
152
160
def _read_as_numpy (self , path : str ) -> np .ndarray :
@@ -162,20 +170,37 @@ def _read_as_pandas_dataframe(
162
170
properties = self .properties
163
171
if properties [self ._HAS_HEADER_PROPERTY ]:
164
172
if column_names :
165
- return pd .read_csv (path , encoding = properties [self .__ENCODING_KEY ])[column_names ]
166
- return pd .read_csv (path , encoding = properties [self .__ENCODING_KEY ])
173
+ return pd .read_csv (
174
+ path , encoding = properties [self ._ENCODING_KEY ], sep = properties [self ._SEPARATOR_KEY ]
175
+ )[column_names ]
176
+ return pd .read_csv (path , encoding = properties [self ._ENCODING_KEY ], sep = properties [self ._SEPARATOR_KEY ])
167
177
else :
168
178
if usecols :
169
- return pd .read_csv (path , encoding = properties [self .__ENCODING_KEY ], header = None , usecols = usecols )
170
- return pd .read_csv (path , encoding = properties [self .__ENCODING_KEY ], header = None )
179
+ return pd .read_csv (
180
+ path ,
181
+ encoding = properties [self ._ENCODING_KEY ],
182
+ sep = properties [self ._SEPARATOR_KEY ],
183
+ header = None ,
184
+ usecols = usecols ,
185
+ )
186
+ return pd .read_csv (
187
+ path , encoding = properties [self ._ENCODING_KEY ], header = None , sep = properties [self ._SEPARATOR_KEY ]
188
+ )
171
189
except pd .errors .EmptyDataError :
172
190
return pd .DataFrame ()
173
191
174
192
def _append (self , data : Any ):
175
193
properties = self .properties
176
194
exposed_type = properties [self ._EXPOSED_TYPE_PROPERTY ]
177
195
data = self ._convert_data_to_dataframe (exposed_type , data )
178
- data .to_csv (self ._path , mode = "a" , index = False , encoding = properties [self .__ENCODING_KEY ], header = False )
196
+ data .to_csv (
197
+ self ._path ,
198
+ mode = "a" ,
199
+ index = False ,
200
+ encoding = properties [self ._ENCODING_KEY ],
201
+ sep = properties [self ._SEPARATOR_KEY ],
202
+ header = False ,
203
+ )
179
204
180
205
def _write (self , data : Any , columns : Optional [List [str ]] = None ):
181
206
self ._write_to_path (self ._path , data , columns )
@@ -191,6 +216,7 @@ def _write_to_path(self, path: str, data: Any, columns: Optional[List[str]] = No
191
216
data .to_csv (
192
217
path ,
193
218
index = False ,
194
- encoding = properties [self .__ENCODING_KEY ],
219
+ encoding = properties [self ._ENCODING_KEY ],
220
+ sep = properties [self ._SEPARATOR_KEY ],
195
221
header = properties [self ._HAS_HEADER_PROPERTY ],
196
222
)
0 commit comments