Skip to content

Commit 89ddfe4

Browse files
authored
Merge pull request #30 from JuliaAI/type-tree
Add functionality to print the scientific type hierarchy
2 parents d2ffbbc + 4e24418 commit 89ddfe4

8 files changed

Lines changed: 237 additions & 37 deletions

File tree

.github/codecov.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
coverage:
2+
status:
3+
project:
4+
default:
5+
threshold: 0.5%
6+
removed_code_behavior: fully_covered_patch
7+
patch:
8+
default:
9+
target: 80

.github/workflows/ci.yml

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,9 @@
11
name: CI
22
on:
3-
pull_request:
4-
branches:
5-
- master
6-
- dev
73
push:
8-
branches:
9-
- master
10-
- dev
11-
tags: '*'
4+
branches: [master, dev]
5+
tags: ["*"]
6+
pull_request:
127
jobs:
138
test:
149
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
@@ -17,31 +12,29 @@ jobs:
1712
fail-fast: false
1813
matrix:
1914
version:
20-
- '1.0'
21-
- '1'
15+
- '1' # automatically expands to the latest stable 1.x release of Julia
16+
- '1.6'
17+
- '1.10'
2218
os:
2319
- ubuntu-latest
20+
- windows-latest
2421
arch:
2522
- x64
23+
include:
24+
- os: macOS-latest
25+
arch: aarch64
26+
version: 1
2627
steps:
27-
- uses: actions/checkout@v2
28-
- uses: julia-actions/setup-julia@v1
28+
- uses: actions/checkout@v4
29+
- uses: julia-actions/setup-julia@v2
2930
with:
3031
version: ${{ matrix.version }}
3132
arch: ${{ matrix.arch }}
32-
- uses: actions/cache@v1
33-
env:
34-
cache-name: cache-artifacts
35-
with:
36-
path: ~/.julia/artifacts
37-
key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
38-
restore-keys: |
39-
${{ runner.os }}-test-${{ env.cache-name }}-
40-
${{ runner.os }}-test-
41-
${{ runner.os }}-
33+
- uses: julia-actions/cache@v2
4234
- uses: julia-actions/julia-buildpkg@v1
4335
- uses: julia-actions/julia-runtest@v1
4436
- uses: julia-actions/julia-processcoverage@v1
45-
- uses: codecov/codecov-action@v1
37+
- uses: codecov/codecov-action@v5
4638
with:
47-
file: lcov.info
39+
files: lcov.info
40+
token: ${{ secrets.CODECOV_TOKEN }}

Project.toml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
11
name = "ScientificTypesBase"
22
uuid = "30f210dd-8aff-4c5f-94ba-8e64358c1161"
33
authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>", "Thibaut Lienart <thibaut.lienart@gmail.com>"]
4-
version = "3.0.0"
4+
version = "3.1.0"
5+
6+
[deps]
7+
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
58

69
[compat]
10+
InteractiveUtils = "1"
711
julia = "1"
812

913
[extras]
14+
Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
1015
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
1116
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1217

1318
[targets]
14-
test = ["Test", "Tables"]
19+
test = ["Suppressor", "Test", "Tables"]

src/ScientificTypesBase.jl

Lines changed: 159 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
module ScientificTypesBase
22

3+
using InteractiveUtils # needed for displaying the type hierarchy with `scitype()`
4+
35
# Type exports
46
export Convention
57

@@ -31,28 +33,184 @@ abstract type Multiset{S} <: Known end
3133
# AbstractVector scitype:
3234
abstract type Iterator{Ω} end
3335

36+
"""
37+
Infinite{N}
38+
39+
Scientific type for scalar data with an intrinsic order, but of unbounded nature, either
40+
discrete or continuous.
41+
42+
Subtypes: [`Continuous`](@ref), [`Count`](@ref)
43+
44+
See also `scitype`.
45+
46+
"""
3447
abstract type Infinite <: Known end
48+
"""
49+
Finite{N}
50+
51+
Scientific type for scalar, categorical data taking on one of `N` possible discrete values,
52+
which may or may not have a natural ordering.
53+
54+
Subtypes: [`Multiclass{N}`](@ref), [`OrderedFactor{N}`](@ref)
55+
56+
Aliases: `Binary==Finite{2}`. Binary data can be unordered (`Multiclass{2}`) or ordered
57+
(`OrderedFactor{2}`).
58+
59+
See also `scitype`.
60+
61+
"""
3562
abstract type Finite{N} <: Known end
63+
"""
64+
Image{W,H}
65+
66+
Scientific type for image data, where `W` is the width and `H` the height.
67+
68+
Subtypes: [`GrayImage{W,H}`](@ref), [`ColorImage{W,H}`](@ref)
69+
70+
See also `scitype`.
71+
72+
"""
3673
abstract type Image{W,H} <: Known end
3774
abstract type ScientificTimeType <: Known end
75+
"""
76+
Textual
77+
78+
Scientific type for text data playing some linguistic role, for example in sentiment
79+
analysis. This is to be contrasted with text used simply to label classes of a categorical
80+
variable; see instead [`Finite`](@ref).
81+
82+
Examples: survey questions with discursive answers, text to be translated into a new
83+
language, vocabularies, email messages.
84+
85+
See also `scitype`.
86+
87+
"""
3888
abstract type Textual <: Known end
89+
"""
90+
Table{K}
91+
92+
Scientific type for tabular data. Here `K` will be a union of the scitypes of the columns
93+
(not the union of the *element* scitype of the columns).
94+
95+
See also `scitype`.
96+
97+
"""
3998
abstract type Table{K} <: Known end
99+
"""
100+
Continuous
101+
102+
Scientific type for continuous scalar data.
103+
104+
Examples: height, age, blood-pressure, weight, temperature.
105+
106+
Supertype: [`Infinite`](@ref)
40107
108+
See also `scitype`.
109+
110+
"""
41111
abstract type Continuous <: Infinite end
112+
"""
113+
Count
114+
115+
Scientific type for discrete, ordered data, of unbounded nature.
116+
117+
Examples: number of phone calls per hour, number of building occupants, number of
118+
earthquakes per year over 6 on the Richter scale, number of unsaturated carbon-carbon
119+
bonds in a molecule.
120+
121+
Supertype: [`Infinite`](@ref)
122+
123+
See also `scitype`.
124+
125+
"""
42126
abstract type Count <: Infinite end
127+
"""
128+
Multiclass{N}
43129
130+
Scientific type for scalar, categorical data with `N` possible values but no natural
131+
ordering for those classes (nominal data).
132+
133+
Examples: gender, team member, model number, product color, ethnicity, zipcode
134+
135+
Supertype: [`Finite{N}`](@ref)
136+
137+
See also `scitype`.
138+
139+
"""
44140
abstract type Multiclass{N} <: Finite{N} end
141+
"""
142+
OrderedFactor{N}
143+
144+
Scientific type for scalar, categorical data with `N` possible values with a natural
145+
ordering (ordinal data).
146+
147+
Includes the binary data scientific type `OrderedFactor{2}`, applying whenever it is
148+
natural to assign a "positive" class, for example, by a standard convention (e.g, "is
149+
toxic", "is an anomaly", "has the disease"). The "positive" class is the maximal class
150+
under the ordering. The distinction is important to disambiguate statistical metrics such
151+
as "number of true positives", "recall", etc.
152+
153+
Examples: letter grade in an exam, education level, number of stars in a review,
154+
safe/toxic, inlier/outlier, rejected/accepted.
155+
156+
Supertype: [`Finite{N}`](@ref)
157+
158+
See also `scitype`.
159+
160+
"""
45161
abstract type OrderedFactor{N} <: Finite{N} end
46162

47163
abstract type ScientificDate <: ScientificTimeType end
48164
abstract type ScientificTime <: ScientificTimeType end
49165
abstract type ScientificDateTime <: ScientificTimeType end
166+
"""
167+
GrayImage{W,H}
168+
169+
Scientific type for a grey-scale image, where `W` is the width and `H` the height.
50170
171+
Supertype: [`Image{W,H}`](@ref)
172+
173+
See also `scitype`.
174+
175+
"""
51176
abstract type GrayImage{W,H} <: Image{W,H} end
177+
"""
178+
ColorImage{W,H}
179+
180+
Scientific type for a color image, where `W` is the width and `H` the height.
181+
182+
Supertype: [`Image{W,H}`](@ref)
183+
184+
See also `scitype`.
185+
186+
"""
52187
abstract type ColorImage{W,H} <: Image{W,H} end
188+
"""
189+
Sampleable{Ω}
53190
54-
# when sampled, objects with these scitypes return objects of scitype Ω:
191+
Scientific type for an object, such a probability distribution, that can be sampled. Each
192+
individual sample `x` will satisfy `scitype(x) isa Ω`.
193+
194+
Subtype: [`Density{Ω}`](@ref)
195+
196+
See also `scitype`.
197+
198+
"""
55199
abstract type Sampleable{Ω} end
200+
"""
201+
Density{Ω}
202+
203+
Scientific type for an object representing a probability density function or probability
204+
mass function, and more generally, for any probability measure that is absolutely
205+
continuous with respect to some standard measure on the sample space. Elements `x` of the
206+
sample space will satisfy `scitype(x) isa Ω`. Objects of this type can, at least in
207+
principle, be sampled.
208+
209+
Supertype: [`Sampleable{Ω}`](@ref)
210+
211+
See also `scitype`.
212+
213+
"""
56214
abstract type Density{Ω} <: Sampleable{Ω} end
57215

58216
abstract type ManifoldPoint{M} <: Known end

src/scitype.jl

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,31 @@
11
# -----------------------------------------------------------------------------------------
2-
# This file introduces `scitype`, `Scitype` methods and associated fallbacks methods.
2+
# This file introduces `scitype`, `Scitype` methods and associated fallbacks methods.
33
# It also defines some conveneince methods.
44
# -----------------------------------------------------------------------------------------
55

66
# -----------------------------------------------------------------------------------------
77
# scitype function (generic) with fallbacks.
8+
9+
10+
# helper to print a type hierarchy:
11+
function print_type_tree(io, T, level=0)
12+
println(io, " " ^ level, T)
13+
for S in subtypes(T)
14+
print_type_tree(io, S, level + 1)
15+
end
16+
end
17+
18+
"""
19+
scitype(;io=stdout)
20+
21+
Print to `io` the scitype hierarchy, beginning at `Found` (and so excluding `Missing` and
22+
`Nothing`).
23+
24+
Note that third party packages can extend the hierarchy, so output is not static.
25+
26+
"""
27+
scitype(; io=stdout) = print_type_tree(io, Found)
28+
829
"""
930
scitype(X, C::Convention)
1031
@@ -34,7 +55,7 @@ In general, one cannot infer the scitype of an object of type
3455
Nevertheless, for some *restricted* machine types `U`, the statement
3556
`type(X) == AbstractArray{T, N}` for some `T<:U` already allows one
3657
deduce that `scitype(X, C) = AbstractArray{S, N}`, where `S` is determined
37-
by `U`, and convention `C` alone. This is the case in the `DefaultConvention` which is
58+
by `U`, and convention `C` alone. This is the case in the `DefaultConvention` which is
3859
used by *ScientificTypes.jl* , where for example, if `U = Integer`, then `S = Count`.
3960
4061
Such shortcuts are specified as follows:
@@ -67,7 +88,7 @@ function Fallback_Scitype(::Type{Union{T, Missing}}, C) where T
6788
return Union{Scitype(Missing, C), Scitype(T, C)}
6889
end
6990

70-
# For the case `Missing` and `Nothing`,
91+
# For the case `Missing` and `Nothing`,
7192
# we return `Missing` and `Nothing` respectively.
7293
Fallback_Scitype(::Type{Missing}, C) = Missing
7394
Fallback_Scitype(::Type{Nothing}, C) = Nothing
@@ -115,7 +136,7 @@ explicit `Scitype` correspondence exist mapping `T` to `S`.
115136
return Arr{scitype_union(A, C), N}
116137
elseif S === Union{Scitype(Missing, C), Unknown}
117138
return Arr{Union{Scitype(Missing, C), scitype_union(A, C)}, N}
118-
else
139+
else
119140
return Arr{S, N}
120141
end
121142
end

test/convention.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
using Test
2+
import ScientificTypesBase: scitype
3+
using ScientificTypesBase
4+
15
@testset "nonmissing" begin
26
U = Union{Missing,Int}
37
@test nonmissing(U) == Int
@@ -7,7 +11,8 @@ end
711
T0 = Table(Continuous)
812
@test T0 == Table{K} where K<:AbstractVector{<:Continuous}
913
T1 = Table(Continuous, Count)
10-
@test T1 == Table{K} where K<:Union{AbstractVector{<:Continuous}, AbstractVector{<:Count}}
14+
@test T1 ==
15+
Table{K} where K<:Union{AbstractVector{<:Continuous}, AbstractVector{<:Count}}
1116
T2 = Table(Continuous, Union{Missing,Continuous})
1217
@test T2 == Table{K} where K<:Union{AbstractVector{<:Union{Missing,Continuous}}}
1318
end

test/runtests.jl

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,2 @@
1-
using Test, ScientificTypesBase, Tables
2-
import ScientificTypesBase: scitype
3-
const ST = ScientificTypesBase
4-
51
include("convention.jl")
6-
72
include("scitype.jl")

test/scitype.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
1+
using Test
2+
using ScientificTypesBase
3+
import ScientificTypesBase: scitype
4+
using Suppressor
5+
16
struct MockMLJ <: Convention end
27

8+
@testset "type hierarch display" begin
9+
str = sprint(io->scitype(; io))
10+
# only smoke tests, to save maintenance:
11+
@test contains(str, "Found\n Known\n")
12+
@test contains(str, "Multiclass")
13+
@test contains(str, "Textual")
14+
@suppress scitype()
15+
end
16+
317
@testset "void types" begin
418
@test scitype(nothing, MockMLJ()) == Nothing
519
@test scitype(missing, MockMLJ()) == Missing

0 commit comments

Comments
 (0)