Skip to content
Merged
9 changes: 9 additions & 0 deletions .github/codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
coverage:
status:
project:
default:
threshold: 0.5%
removed_code_behavior: fully_covered_patch
patch:
default:
target: 80
41 changes: 17 additions & 24 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
name: CI
on:
pull_request:
branches:
- master
- dev
push:
branches:
- master
- dev
tags: '*'
branches: [master, dev]
tags: ["*"]
pull_request:
jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
Expand All @@ -17,31 +12,29 @@ jobs:
fail-fast: false
matrix:
version:
- '1.0'
- '1'
- '1' # automatically expands to the latest stable 1.x release of Julia
- '1.6'
- '1.10'
os:
- ubuntu-latest
- windows-latest
arch:
- x64
include:
- os: macOS-latest
arch: aarch64
version: 1
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: actions/cache@v1
env:
cache-name: cache-artifacts
with:
path: ~/.julia/artifacts
key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
restore-keys: |
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/cache@v2
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
- uses: codecov/codecov-action@v5
with:
file: lcov.info
files: lcov.info
token: ${{ secrets.CODECOV_TOKEN }}
9 changes: 7 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
name = "ScientificTypesBase"
uuid = "30f210dd-8aff-4c5f-94ba-8e64358c1161"
authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>", "Thibaut Lienart <thibaut.lienart@gmail.com>"]
version = "3.0.0"
version = "3.1.0"

[deps]
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[compat]
InteractiveUtils = "1"
julia = "1"

[extras]
Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "Tables"]
test = ["Suppressor", "Test", "Tables"]
160 changes: 159 additions & 1 deletion src/ScientificTypesBase.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
module ScientificTypesBase

using InteractiveUtils # needed for displaying the type hierarchy with `scitype()`

# Type exports
export Convention

Expand Down Expand Up @@ -31,28 +33,184 @@ abstract type Multiset{S} <: Known end
# AbstractVector scitype:
abstract type Iterator{Ω} end

"""
Infinite{N}

Scientific type for scalar data with an intrinsic order, but of unbounded nature, either
discrete or continuous.

Subtypes: [`Continuous`](@ref), [`Count`](@ref)

See also `scitype`.

"""
abstract type Infinite <: Known end
"""
Finite{N}

Scientific type for scalar, categorical data taking on one of `N` possible discrete values,
which may or may not have a natural ordering.

Subtypes: [`Multiclass{N}`](@ref), [`OrderedFactor{N}`](@ref)

Aliases: `Binary==Finite{2}`. Binary data can be unordered (`Multiclass{2}`) or ordered
(`OrderedFactor{2}`).

See also `scitype`.

"""
abstract type Finite{N} <: Known end
"""
Image{W,H}

Scientific type for image data, where `W` is the width and `H` the height.

Subtypes: [`GrayImage{W,H}`](@ref), [`ColorImage{W,H}`](@ref)

See also `scitype`.

"""
abstract type Image{W,H} <: Known end
abstract type ScientificTimeType <: Known end
"""
Textual

Scientific type for text data playing some linguistic role, for example in sentiment
analysis. This is to be contrasted with text used simply to label classes of a categorical
variable; see instead [`Finite`](@ref).

Examples: survey questions with discursive answers, text to be translated into a new
language, vocabularies, email messages.

See also `scitype`.

"""
abstract type Textual <: Known end
"""
Table{K}

Scientific type for tabular data. Here `K` will be a union of the scitypes of the columns
(not the union of the *element* scitype of the columns).

See also `scitype`.

"""
abstract type Table{K} <: Known end
"""
Continuous

Scientific type for continuous scalar data.

Examples: height, age, blood-pressure, weight, temperature.

Supertype: [`Infinite`](@ref)

See also `scitype`.

"""
abstract type Continuous <: Infinite end
"""
Count

Scientific type for discrete, ordered data, of unbounded nature.

Examples: number of phone calls per hour, number of building occupants, number of
earthquakes per year over 6 on the Richter scale, number of unsaturated carbon-carbon
bonds in a molecule.

Supertype: [`Infinite`](@ref)

See also `scitype`.

"""
abstract type Count <: Infinite end
"""
Multiclass{N}

Scientific type for scalar, categorical data with `N` possible values but no natural
ordering for those classes (nominal data).

Examples: gender, team member, model number, product color, ethnicity, zipcode

Supertype: [`Finite{N}`](@ref)

See also `scitype`.

"""
abstract type Multiclass{N} <: Finite{N} end
"""
OrderedFactor{N}

Scientific type for scalar, categorical data with `N` possible values with a natural
ordering (ordinal data).

Includes the binary data scientific type `OrderedFactor{2}`, applying whenever it is
natural to assign a "positive" class, for example, by a standard convention (e.g, "is
toxic", "is an anomaly", "has the disease"). The "positive" class is the maximal class
under the ordering. The distinction is important to disambiguate statistical metrics such
as "number of true positives", "recall", etc.

Examples: letter grade in an exam, education level, number of stars in a review,
safe/toxic, inlier/outlier, rejected/accepted.

Supertype: [`Finite{N}`](@ref)

See also `scitype`.

"""
abstract type OrderedFactor{N} <: Finite{N} end

abstract type ScientificDate <: ScientificTimeType end
abstract type ScientificTime <: ScientificTimeType end
abstract type ScientificDateTime <: ScientificTimeType end
"""
GrayImage{W,H}

Scientific type for a grey-scale image, where `W` is the width and `H` the height.

Supertype: [`Image{W,H}`](@ref)

See also `scitype`.

"""
abstract type GrayImage{W,H} <: Image{W,H} end
"""
ColorImage{W,H}

Scientific type for a color image, where `W` is the width and `H` the height.

Supertype: [`Image{W,H}`](@ref)

See also `scitype`.

"""
abstract type ColorImage{W,H} <: Image{W,H} end
"""
Sampleable{Ω}

# when sampled, objects with these scitypes return objects of scitype Ω:
Scientific type for an object, such a probability distribution, that can be sampled. Each
individual sample `x` will satisfy `scitype(x) isa Ω`.

Subtype: [`Density{Ω}`](@ref)

See also `scitype`.

"""
abstract type Sampleable{Ω} end
"""
Density{Ω}

Scientific type for an object representing a probability density function or probability
mass function, and more generally, for any probability measure that is absolutely
continuous with respect to some standard measure on the sample space. Elements `x` of the
sample space will satisfy `scitype(x) isa Ω`. Objects of this type can, at least in
principle, be sampled.

Supertype: [`Sampleable{Ω}`](@ref)

See also `scitype`.

"""
abstract type Density{Ω} <: Sampleable{Ω} end

abstract type ManifoldPoint{M} <: Known end
Expand Down
29 changes: 25 additions & 4 deletions src/scitype.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,31 @@
# -----------------------------------------------------------------------------------------
# This file introduces `scitype`, `Scitype` methods and associated fallbacks methods.
# This file introduces `scitype`, `Scitype` methods and associated fallbacks methods.
# It also defines some conveneince methods.
# -----------------------------------------------------------------------------------------

# -----------------------------------------------------------------------------------------
# scitype function (generic) with fallbacks.


# helper to print a type hierarchy:
function print_type_tree(io, T, level=0)
println(io, " " ^ level, T)
for S in subtypes(T)
print_type_tree(io, S, level + 1)
end
end

"""
scitype(;io=stdout)

Print to `io` the scitype hierarchy, beginning at `Found` (and so excluding `Missing` and
`Nothing`).

Note that third party packages can extend the hierarchy, so output is not static.

"""
scitype(; io=stdout) = print_type_tree(io, Found)

"""
scitype(X, C::Convention)

Expand Down Expand Up @@ -34,7 +55,7 @@ In general, one cannot infer the scitype of an object of type
Nevertheless, for some *restricted* machine types `U`, the statement
`type(X) == AbstractArray{T, N}` for some `T<:U` already allows one
deduce that `scitype(X, C) = AbstractArray{S, N}`, where `S` is determined
by `U`, and convention `C` alone. This is the case in the `DefaultConvention` which is
by `U`, and convention `C` alone. This is the case in the `DefaultConvention` which is
used by *ScientificTypes.jl* , where for example, if `U = Integer`, then `S = Count`.

Such shortcuts are specified as follows:
Expand Down Expand Up @@ -67,7 +88,7 @@ function Fallback_Scitype(::Type{Union{T, Missing}}, C) where T
return Union{Scitype(Missing, C), Scitype(T, C)}
end

# For the case `Missing` and `Nothing`,
# For the case `Missing` and `Nothing`,
# we return `Missing` and `Nothing` respectively.
Fallback_Scitype(::Type{Missing}, C) = Missing
Fallback_Scitype(::Type{Nothing}, C) = Nothing
Expand Down Expand Up @@ -115,7 +136,7 @@ explicit `Scitype` correspondence exist mapping `T` to `S`.
return Arr{scitype_union(A, C), N}
elseif S === Union{Scitype(Missing, C), Unknown}
return Arr{Union{Scitype(Missing, C), scitype_union(A, C)}, N}
else
else
return Arr{S, N}
end
end
Expand Down
7 changes: 6 additions & 1 deletion test/convention.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
using Test
import ScientificTypesBase: scitype
using ScientificTypesBase

@testset "nonmissing" begin
U = Union{Missing,Int}
@test nonmissing(U) == Int
Expand All @@ -7,7 +11,8 @@ end
T0 = Table(Continuous)
@test T0 == Table{K} where K<:AbstractVector{<:Continuous}
T1 = Table(Continuous, Count)
@test T1 == Table{K} where K<:Union{AbstractVector{<:Continuous}, AbstractVector{<:Count}}
@test T1 ==
Table{K} where K<:Union{AbstractVector{<:Continuous}, AbstractVector{<:Count}}
T2 = Table(Continuous, Union{Missing,Continuous})
@test T2 == Table{K} where K<:Union{AbstractVector{<:Union{Missing,Continuous}}}
end
5 changes: 0 additions & 5 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,2 @@
using Test, ScientificTypesBase, Tables
import ScientificTypesBase: scitype
const ST = ScientificTypesBase

include("convention.jl")

include("scitype.jl")
14 changes: 14 additions & 0 deletions test/scitype.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
using Test
using ScientificTypesBase
import ScientificTypesBase: scitype
using Suppressor

struct MockMLJ <: Convention end

@testset "type hierarch display" begin
str = sprint(io->scitype(; io))
# only smoke tests, to save maintenance:
@test contains(str, "Found\n Known\n")
@test contains(str, "Multiclass")
@test contains(str, "Textual")
@suppress scitype()
end

@testset "void types" begin
@test scitype(nothing, MockMLJ()) == Nothing
@test scitype(missing, MockMLJ()) == Missing
Expand Down
Loading