Skip to content

Commit bfdd492

Browse files
authored
Merge pull request #2652 from Avaiga/feature/2603-separator-in-csv-dn
Feature/#2603 - Add separator property to CSVDataNode
1 parent 260af4c commit bfdd492

File tree

10 files changed

+116
-20
lines changed

10 files changed

+116
-20
lines changed

taipy/common/config/config.pyi

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ class Config:
424424
default_path: Optional[str] = None,
425425
encoding: Optional[str] = None,
426426
has_header: Optional[bool] = None,
427+
separator: Optional[str] = None,
427428
exposed_type: Optional[str] = None,
428429
scope: Optional[Scope] = None,
429430
validity_period: Optional[timedelta] = None,
@@ -436,6 +437,7 @@ class Config:
436437
default_path (Optional[str]): The default path of the CSV file.
437438
encoding (Optional[str]): The encoding of the CSV file.
438439
has_header (Optional[bool]): If True, indicates that the CSV file has a header.
440+
separator (Optional[str]): The character used to separate values in the CSV file.
439441
exposed_type (Optional[str]): The exposed type of the data read from CSV file.<br/>
440442
The default value is `pandas`.
441443
scope (Optional[Scope^]): The scope of the CSV data node configuration.<br/>

taipy/core/config/data_node_config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ class DataNodeConfig(Section):
9797
_OPTIONAL_EXPOSED_TYPE_CSV_PROPERTY = "exposed_type"
9898
_OPTIONAL_DEFAULT_PATH_CSV_PROPERTY = "default_path"
9999
_OPTIONAL_HAS_HEADER_CSV_PROPERTY = "has_header"
100+
_OPTIONAL_SEPARATOR_CSV_PROPERTY = "separator"
100101
# Excel
101102
_OPTIONAL_EXPOSED_TYPE_EXCEL_PROPERTY = "exposed_type"
102103
_OPTIONAL_DEFAULT_PATH_EXCEL_PROPERTY = "default_path"
@@ -198,6 +199,7 @@ class DataNodeConfig(Section):
198199
_OPTIONAL_DEFAULT_PATH_CSV_PROPERTY: str,
199200
_OPTIONAL_ENCODING_PROPERTY: str,
200201
_OPTIONAL_HAS_HEADER_CSV_PROPERTY: bool,
202+
_OPTIONAL_SEPARATOR_CSV_PROPERTY: str,
201203
_OPTIONAL_EXPOSED_TYPE_CSV_PROPERTY: (str, Callable),
202204
},
203205
_STORAGE_TYPE_VALUE_EXCEL: {
@@ -290,6 +292,7 @@ class DataNodeConfig(Section):
290292
_OPTIONAL_DEFAULT_PATH_CSV_PROPERTY: None,
291293
_OPTIONAL_ENCODING_PROPERTY: _DEFAULT_ENCODING_VALUE,
292294
_OPTIONAL_HAS_HEADER_CSV_PROPERTY: True,
295+
_OPTIONAL_SEPARATOR_CSV_PROPERTY: ",",
293296
_OPTIONAL_EXPOSED_TYPE_CSV_PROPERTY: _DEFAULT_EXPOSED_TYPE,
294297
},
295298
_STORAGE_TYPE_VALUE_EXCEL: {
@@ -622,6 +625,7 @@ def _configure_csv(
622625
default_path: Optional[str] = None,
623626
encoding: Optional[str] = None,
624627
has_header: Optional[bool] = None,
628+
separator: Optional[str] = None,
625629
exposed_type: Optional[str] = None,
626630
scope: Optional[Scope] = None,
627631
validity_period: Optional[timedelta] = None,
@@ -634,6 +638,7 @@ def _configure_csv(
634638
default_path (Optional[str]): The default path of the CSV file.
635639
encoding (Optional[str]): The encoding of the CSV file.
636640
has_header (Optional[bool]): If True, indicates that the CSV file has a header.
641+
separator (Optional[str]): The character used to separate values in the CSV file.
637642
exposed_type (Optional[str]): The exposed type of the data read from CSV file.<br/>
638643
The default value is `pandas`.
639644
scope (Optional[Scope^]): The scope of the CSV data node configuration.<br/>
@@ -655,6 +660,8 @@ def _configure_csv(
655660
properties[cls._OPTIONAL_ENCODING_PROPERTY] = encoding
656661
if has_header is not None:
657662
properties[cls._OPTIONAL_HAS_HEADER_CSV_PROPERTY] = has_header
663+
if separator is not None:
664+
properties[cls._OPTIONAL_SEPARATOR_CSV_PROPERTY] = separator
658665
if exposed_type is not None:
659666
properties[cls._OPTIONAL_EXPOSED_TYPE_CSV_PROPERTY] = exposed_type
660667

taipy/core/data/csv.py

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,15 @@ class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
3636
- *default_data*: The default data of the data node. It is used at the data node instantiation
3737
to write the data to the CSV file.
3838
- *has_header* (`bool`): If True, indicates that the CSV file has a header.
39+
- *separator* (`str`): The separator used in the CSV file. The default value is `,`.
3940
- *exposed_type*: The exposed type of the data read from CSV file. The default value is `pandas`.
4041
"""
4142

4243
__STORAGE_TYPE = "csv"
43-
__ENCODING_KEY = "encoding"
44+
_ENCODING_KEY = "encoding"
45+
_DEFAULT_ENCODING_VALUE = "utf-8"
46+
_SEPARATOR_KEY = "separator"
47+
_DEFAULT_SEPARATOR_VALUE = ","
4448

4549
_REQUIRED_PROPERTIES: List[str] = []
4650

@@ -65,12 +69,15 @@ def __init__(
6569
if properties is None:
6670
properties = {}
6771

68-
if self.__ENCODING_KEY not in properties.keys():
69-
properties[self.__ENCODING_KEY] = "utf-8"
72+
if self._ENCODING_KEY not in properties.keys():
73+
properties[self._ENCODING_KEY] = self._DEFAULT_ENCODING_VALUE
7074

7175
if self._HAS_HEADER_PROPERTY not in properties.keys():
7276
properties[self._HAS_HEADER_PROPERTY] = True
7377

78+
if self._SEPARATOR_KEY not in properties.keys():
79+
properties[self._SEPARATOR_KEY] = self._DEFAULT_SEPARATOR_VALUE
80+
7481
properties[self._EXPOSED_TYPE_PROPERTY] = _TabularDataNodeMixin._get_valid_exposed_type(properties)
7582
self._check_exposed_type(properties[self._EXPOSED_TYPE_PROPERTY])
7683

@@ -106,7 +113,8 @@ def __init__(
106113
self._IS_GENERATED_KEY,
107114
self._HAS_HEADER_PROPERTY,
108115
self._EXPOSED_TYPE_PROPERTY,
109-
self.__ENCODING_KEY,
116+
self._ENCODING_KEY,
117+
self._SEPARATOR_KEY,
110118
}
111119
)
112120

@@ -141,12 +149,12 @@ def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
141149

142150
def _read_as(self, path: str):
143151
properties = self.properties
144-
with open(path, encoding=properties[self.__ENCODING_KEY]) as csvFile:
152+
with open(path, encoding=properties[self._ENCODING_KEY]) as csvFile:
145153
if properties[self._HAS_HEADER_PROPERTY]:
146-
reader_with_header = csv.DictReader(csvFile)
154+
reader_with_header = csv.DictReader(csvFile, delimiter=properties[self._SEPARATOR_KEY])
147155
return [self._decoder(line) for line in reader_with_header]
148156

149-
reader_without_header = csv.reader(csvFile)
157+
reader_without_header = csv.reader(csvFile, delimiter=properties[self._SEPARATOR_KEY])
150158
return [self._decoder(line) for line in reader_without_header]
151159

152160
def _read_as_numpy(self, path: str) -> np.ndarray:
@@ -162,20 +170,37 @@ def _read_as_pandas_dataframe(
162170
properties = self.properties
163171
if properties[self._HAS_HEADER_PROPERTY]:
164172
if column_names:
165-
return pd.read_csv(path, encoding=properties[self.__ENCODING_KEY])[column_names]
166-
return pd.read_csv(path, encoding=properties[self.__ENCODING_KEY])
173+
return pd.read_csv(
174+
path, encoding=properties[self._ENCODING_KEY], sep=properties[self._SEPARATOR_KEY]
175+
)[column_names]
176+
return pd.read_csv(path, encoding=properties[self._ENCODING_KEY], sep=properties[self._SEPARATOR_KEY])
167177
else:
168178
if usecols:
169-
return pd.read_csv(path, encoding=properties[self.__ENCODING_KEY], header=None, usecols=usecols)
170-
return pd.read_csv(path, encoding=properties[self.__ENCODING_KEY], header=None)
179+
return pd.read_csv(
180+
path,
181+
encoding=properties[self._ENCODING_KEY],
182+
sep=properties[self._SEPARATOR_KEY],
183+
header=None,
184+
usecols=usecols,
185+
)
186+
return pd.read_csv(
187+
path, encoding=properties[self._ENCODING_KEY], header=None, sep=properties[self._SEPARATOR_KEY]
188+
)
171189
except pd.errors.EmptyDataError:
172190
return pd.DataFrame()
173191

174192
def _append(self, data: Any):
175193
properties = self.properties
176194
exposed_type = properties[self._EXPOSED_TYPE_PROPERTY]
177195
data = self._convert_data_to_dataframe(exposed_type, data)
178-
data.to_csv(self._path, mode="a", index=False, encoding=properties[self.__ENCODING_KEY], header=False)
196+
data.to_csv(
197+
self._path,
198+
mode="a",
199+
index=False,
200+
encoding=properties[self._ENCODING_KEY],
201+
sep=properties[self._SEPARATOR_KEY],
202+
header=False,
203+
)
179204

180205
def _write(self, data: Any, columns: Optional[List[str]] = None):
181206
self._write_to_path(self._path, data, columns)
@@ -191,6 +216,7 @@ def _write_to_path(self, path: str, data: Any, columns: Optional[List[str]] = No
191216
data.to_csv(
192217
path,
193218
index=False,
194-
encoding=properties[self.__ENCODING_KEY],
219+
encoding=properties[self._ENCODING_KEY],
220+
sep=properties[self._SEPARATOR_KEY],
195221
header=properties[self._HAS_HEADER_PROPERTY],
196222
)

tests/core/config/test_config_serialization.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ def test_read_write_toml_configuration_file():
135135
exposed_type = "tests.core.config.test_config_serialization.CustomClass:class"
136136
encoding = "utf-8"
137137
has_header = "True:bool"
138+
separator = ","
138139
139140
[DATA_NODE.test_json_dn]
140141
storage_type = "json"
@@ -305,7 +306,8 @@ def test_read_write_json_configuration_file():
305306
"path": "./test.csv",
306307
"exposed_type": "tests.core.config.test_config_serialization.CustomClass:class",
307308
"encoding": "utf-8",
308-
"has_header": "True:bool"
309+
"has_header": "True:bool",
310+
"separator": ","
309311
},
310312
"test_json_dn": {
311313
"storage_type": "json",

tests/core/config/test_configure_default_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def test_set_default_data_node_configuration_replace_old_default_config():
6666
)
6767
dn2 = Config.configure_data_node(id="dn2")
6868
assert dn2.storage_type == "csv"
69-
assert len(dn2.properties) == 6 # encoding, exposed_type, and has_header too
69+
assert len(dn2.properties) == 7 # encoding, separator, exposed_type, and has_header
7070
assert dn2.prop4 == "4"
7171
assert dn2.prop5 == "5"
7272
assert dn2.prop6 == "6"
@@ -85,7 +85,7 @@ def test_config_storage_type_different_from_default_data_node():
8585
# Config a datanode with specific "storage_type" different than "pickle"
8686
# should ignore the default datanode
8787
csv_dn = Config.configure_data_node(id="csv_dn", storage_type="csv")
88-
assert len(csv_dn.properties) == 3 # encoding, exposed_type, and has_header
88+
assert len(csv_dn.properties) == 4 # encoding, separator, exposed_type, and has_header
8989
assert csv_dn.properties.get("custom_property") is None
9090
assert csv_dn.scope == Scope.SCENARIO
9191

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
id;integer;text
2+
Ibelfu5;584;This is the first row
3+
h89653fu5;7;This is the second row
4+
hdds23;275;This is the third row
5+
q68423;754;This is the fourth row
6+
qqf8;10;This is the fifth row
7+
5sqf8;11778;This is the sixth row
8+
5458;95;This is the seventh row
9+
569ggg8;466;This is the 8th row
10+
kus458;635;This is the 9th row
11+
5kuds458;9;This is the last row
12+
jEn4a;1001;1st appended line
13+
4ajeQ;1002;2nd appended line

tests/core/data/test_csv_data_node.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class TestCSVDataNode:
5454
def test_create(self):
5555
default_path = "data/node/path"
5656
csv_dn_config = Config.configure_csv_data_node(
57-
id="foo_bar", default_path=default_path, has_header=False, name="super name"
57+
id="foo_bar", default_path=default_path, has_header=False, name="super name", separator=";"
5858
)
5959
dn = _DataManagerFactory._build_manager()._create(csv_dn_config, None, None)
6060
assert isinstance(dn, CSVDataNode)
@@ -70,6 +70,7 @@ def test_create(self):
7070
assert dn.path == default_path
7171
assert dn.properties["has_header"] is False
7272
assert dn.properties["exposed_type"] == "pandas"
73+
assert dn.properties["separator"] == ";"
7374

7475
csv_dn_config = Config.configure_csv_data_node(
7576
id="foo", default_path=default_path, has_header=True, exposed_type=MyCustomObject
@@ -79,6 +80,7 @@ def test_create(self):
7980
assert dn.config_id == "foo"
8081
assert dn.properties["has_header"] is True
8182
assert dn.properties["exposed_type"] == MyCustomObject
83+
assert dn.properties["separator"] == ","
8284

8385
with pytest.raises(InvalidConfigurationId):
8486
CSVDataNode(

tests/core/data/test_data_manager.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,11 @@ def test_create_and_get_csv_data_node(self):
109109
assert not _DataManager._get(csv_dn.id).is_ready_for_reading
110110
assert _DataManager._get(csv_dn.id).is_ready_for_reading == csv_dn.is_ready_for_reading
111111
assert (
112-
len(_DataManager._get(csv_dn.id).properties) == 5
113-
) # path, encoding, has_header, exposed_type, is_generated
112+
len(_DataManager._get(csv_dn.id).properties) == 6
113+
) # path, encoding, separator, has_header, exposed_type, is_generated
114114
assert _DataManager._get(csv_dn.id).properties.get("path") == "bar"
115115
assert _DataManager._get(csv_dn.id).properties.get("encoding") == "utf-8"
116+
assert _DataManager._get(csv_dn.id).properties.get("separator") == ","
116117
assert _DataManager._get(csv_dn.id).properties.get("has_header") is True
117118
assert _DataManager._get(csv_dn.id).properties.get("exposed_type") == "pandas"
118119
assert _DataManager._get(csv_dn.id).properties.get("is_generated") is False
@@ -137,7 +138,7 @@ def test_create_and_get_csv_data_node(self):
137138
assert _DataManager._get(csv_dn).job_ids == csv_dn.job_ids
138139
assert not _DataManager._get(csv_dn).is_ready_for_reading
139140
assert _DataManager._get(csv_dn).is_ready_for_reading == csv_dn.is_ready_for_reading
140-
assert len(_DataManager._get(csv_dn).properties) == 5 # path, encoding, has_header, exposed_type, is_generated
141+
assert len(_DataManager._get(csv_dn).properties) == 6
141142
assert _DataManager._get(csv_dn).properties.get("path") == "bar"
142143
assert _DataManager._get(csv_dn).properties.get("encoding") == "utf-8"
143144
assert _DataManager._get(csv_dn).properties.get("has_header") is True

tests/core/data/test_read_csv_data_node.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from taipy.core.exceptions.exceptions import NoData
2424

2525
csv_file_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
26+
csv_2_file_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example_2.csv")
2627

2728

2829
@dataclasses.dataclass
@@ -117,3 +118,11 @@ def test_read_without_header_custom_exposed_type():
117118
assert row_pandas[0] == row_custom.id
118119
assert str(row_pandas[1]) == row_custom.integer
119120
assert row_pandas[2] == row_custom.text
121+
122+
123+
def test_read_with_different_separator():
124+
csv_data_node_as_pandas = CSVDataNode(
125+
"bar", Scope.SCENARIO, properties={"path": csv_2_file_path, "has_header": True, "separator": ";"}
126+
)
127+
data_pandas = csv_data_node_as_pandas.read()
128+
assert pd.DataFrame.equals(data_pandas, pd.read_csv(csv_2_file_path, sep=";"))

tests/core/data/test_write_csv_data_node.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,29 @@ def test_append(csv_file, default_data_frame, content):
7171
)
7272

7373

74+
def test_append_with_different_separator():
75+
path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example_2.csv")
76+
77+
original_content = pd.read_csv(path, sep=";")
78+
content = pd.DataFrame(
79+
[
80+
{"id": "jEn4a", "integer": 1001, "text": "1st appended line"},
81+
{"id": "4ajeQ", "integer": 1002, "text": "2nd appended line"},
82+
],
83+
)
84+
85+
csv_dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "separator": ";"})
86+
_DataManagerFactory._build_manager()._repository._save(csv_dn)
87+
88+
csv_dn.append(content)
89+
assert_frame_equal(
90+
csv_dn.read(),
91+
pd.concat([original_content, pd.DataFrame(content, columns=["id", "integer", "text"])]).reset_index(drop=True),
92+
)
93+
# Reset the file to its original content
94+
csv_dn.write(original_content)
95+
96+
7497
def test_write_with_header_pandas(tmp_csv_file):
7598
csv_dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": tmp_csv_file})
7699
_DataManagerFactory._build_manager()._repository._save(csv_dn)
@@ -199,3 +222,14 @@ def test_write_with_column_names(tmp_csv_file):
199222
csv_dn.write_with_column_names(data, columns)
200223
df = pd.DataFrame(data, columns=columns)
201224
assert pd.DataFrame.equals(df, csv_dn.read())
225+
226+
227+
def test_write_with_different_separator(tmp_csv_file):
228+
data = [[11, 22, 33], [44, 55, 66]]
229+
columns = ["e", "f", "g"]
230+
231+
csv_dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": tmp_csv_file, "separator": ";"})
232+
_DataManagerFactory._build_manager()._repository._save(csv_dn)
233+
csv_dn.write_with_column_names(data, columns)
234+
df = pd.DataFrame(data, columns=columns)
235+
assert pd.DataFrame.equals(df, csv_dn.read())

0 commit comments

Comments
 (0)