Skip to content

Commit 69849f4

Browse files
tabulate: fix categorical and string edge cases
- Fix division by zero when all categorical values are undefined (now returns zero percents instead of NaN) - Remove duplicate code block in string handling - Fix empty string case to return proper cell(0,3)
1 parent 7cf774b commit 69849f4

1 file changed

Lines changed: 39 additions & 29 deletions

File tree

inst/tabulate.m

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,11 @@
8484
endif
8585

8686
total = sum (counts);
87-
percents = 100 * counts ./ total;
87+
if (total == 0)
88+
percents = zeros (size (counts));
89+
else
90+
percents = 100 * counts ./ total;
91+
endif
8892

8993
## Output format: Cell array
9094
out = cell (length (vals), 3);
@@ -96,40 +100,21 @@
96100
## Handle string arrays
97101
x(ismissing (x)) = [];
98102

99-
## Convert to cellstr and use grp2idx which is robust
100-
[idx, vals] = grp2idx (cellstr (x));
101-
102-
if (isempty (idx))
103-
counts = [];
104-
percents = [];
103+
if (isempty (x))
104+
out = cell (0, 3);
105105
else
106-
counts = accumarray (idx, 1);
107-
total = sum (counts);
108-
percents = 100 * counts ./ total;
109-
endif
106+
## Convert to cellstr and use grp2idx which is robust
107+
[idx, vals] = grp2idx (cellstr (x));
110108

111-
## Output format: Cell array
112-
vals_cell = vals;
113-
out = cell (length (vals_cell), 3);
114-
out(:,1) = vals_cell;
115-
out(:,2) = num2cell (counts);
116-
out(:,3) = num2cell (percents);
117-
118-
if (isempty (idx))
119-
counts = [];
120-
percents = [];
121-
else
122109
counts = accumarray (idx, 1);
123110
total = sum (counts);
124111
percents = 100 * counts ./ total;
125-
endif
126112

127-
## Output format: Cell array
128-
vals_cell = vals;
129-
out = cell (length (vals_cell), 3);
130-
out(:,1) = vals_cell;
131-
out(:,2) = num2cell (counts);
132-
out(:,3) = num2cell (percents);
113+
out = cell (length (vals), 3);
114+
out(:,1) = vals;
115+
out(:,2) = num2cell (counts);
116+
out(:,3) = num2cell (percents);
117+
endif
133118

134119
elseif (islogical (x))
135120
## Handle logical arrays
@@ -407,3 +392,28 @@
407392
%!error<tabulate: X must be either a numeric vector> tabulate ({1, 2, 3, 4})
408393
%!error<tabulate: X must be either a numeric vector> ...
409394
%! tabulate ({"a", "b"; "a", "c"})
395+
396+
## Test categorical with all undefined values (should return zero counts/percents)
397+
%!test
398+
%! x = categorical ({'a','b','c'});
399+
%! x(:) = categorical (missing);
400+
%! tbl = tabulate (x);
401+
%! assert (iscell (tbl));
402+
%! assert ([tbl{:,2}]', [0; 0; 0]);
403+
%! assert ([tbl{:,3}]', [0; 0; 0]);
404+
405+
## Test categorical with defined categories but no data
406+
%!test
407+
%! x = categorical ({}, {'low','med','high'});
408+
%! tbl = tabulate (x);
409+
%! assert (iscell (tbl));
410+
%! assert ([tbl{:,2}]', [0; 0; 0]);
411+
%! assert ([tbl{:,3}]', [0; 0; 0]);
412+
413+
## Test string array with all missing values (should return empty table)
414+
%!test
415+
%! x = string ({'a','b'});
416+
%! x(:) = missing;
417+
%! tbl = tabulate (x);
418+
%! assert (iscell (tbl));
419+
%! assert (isempty (tbl));

0 commit comments

Comments
 (0)