Skip to content

Commit 3d11f21

Browse files
committed
Added support for metadata and features for ARFF
1 parent 4cf3188 commit 3d11f21

1 file changed

Lines changed: 14 additions & 13 deletions

File tree

src/dsff/arff.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,18 @@ def from_arff(dsff, path=None, target=TARGET_NAME, missing=MISSING_TOKEN):
99
""" Populate the DSFF file from an ARFF file. """
1010
path = fix_path(dsff, path, ".arff")
1111
dsff.logger.debug(f"creating DSFF from {path}...")
12-
d = []
12+
d, features = [], {}
1313
with open(path) as f:
1414
relation, attributes, data = False, [False, False], False
1515
for n, l in enumerate(f, 1):
1616
l = l.strip()
17-
# ignore comments before @RELATION
18-
if l.startswith("#"):
19-
continue
17+
# get metadata and feature descriptions from comments
18+
if l.startswith("%"):
19+
if (m := re.match(r"^\%\s+(.*?)\s*\:\s*(.*?)$", l)):
20+
name, descr = m.groups()
21+
features[name] = descr
22+
elif re.match(r"^\%\d+metadata\s*\:\s*\{.*\}$"):
23+
dsff.write(metadata=literal_eval(l.split(":", 1)))
2024
if not relation:
2125
if l.startswith("@RELATION "):
2226
relation = True
@@ -73,12 +77,7 @@ def from_arff(dsff, path=None, target=TARGET_NAME, missing=MISSING_TOKEN):
7377
for j, row in enumerate(d):
7478
if j > 0:
7579
row[i] = {'0': "False", '1': "True"}[row[i]]
76-
dsff.write(d)
77-
features = {}
78-
for headers in dsff['data'].rows:
79-
for header in headers:
80-
features[header.value] = ""
81-
break
80+
dsff.write(data=d)
8281
dsff.write(features=features)
8382

8483

@@ -144,9 +143,11 @@ def to_arff(dsff, path=None, target=TARGET_NAME, exclude=DEFAULT_EXCL, missing=M
144143
mlen_c = [max(x, len(row[k]) if types[k] == "NUMERIC" else len(row[k])+2) for k, x in enumerate(mlen_c)]
145144
d.append(row)
146145
# format the resulting data and output the ARFF
147-
d = "\n".join(" ".join(("{: <%s}" % (mlen_c[k]+1)).format((x if types[k] == "NUMERIC" or x == MISSING_TOKEN else \
148-
"'%s'" % x) + ",") for k, x in enumerate(row)).rstrip(" ,") for row in d)
149-
arff = "@RELATION \"{}\"\n\n{}\n\n@DATA\n{}".format(name, "\n".join(a), d)
146+
d = (nl := "\n").join(" ".join(("{: <%s}" % (mlen_c[k]+1)).format((x if types[k] == "NUMERIC" or \
147+
x == MISSING_TOKEN else "'%s'" % x) + ",") for k, x in enumerate(row)).rstrip(" ,") for row in d)
148+
arff = f"@RELATION \"{name}\"\n\n{nl.join(a)}\n\n@DATA\n{d}\n\n" \
149+
f"{['', f'% metadata: {json.dumps(dsff.metadata)}'][len(dsff.metadata) > 0]}\n\n" \
150+
f"{nl.join(f'% {name}: {descr}' for name, descr in dsff.features.items())}"
150151
if text:
151152
return arff
152153
with open(path, 'w+') as f:

0 commit comments

Comments
 (0)