diff --git a/Export.lean b/Export.lean index 2054d47..775323a 100644 --- a/Export.lean +++ b/Export.lean @@ -1,4 +1,5 @@ import Lean +import Export.Flags import Std.Data.HashMap.Basic open Lean @@ -30,6 +31,7 @@ def Lean.KVMap.toJson (kvs: Lean.KVMap) : Json := .mkObj <| kvs.entries.map (fun (k, v) => (k.toString, reprStr v)) structure Context where + flags : Flags env : Environment structure State where @@ -38,17 +40,15 @@ structure State where visitedExprs : HashMap Expr Nat := HashMap.emptyWithCapacity 10000000 visitedConstants : NameHashSet := {} noMDataExprs : HashMap Expr Expr := HashMap.emptyWithCapacity 100000 - exportMData : Bool := false - exportUnsafe : Bool := false /-- Maps the name of an inductive type to a list of names of corresponding recursors. This is used to facilitate exporting of related inductives, constructors, and recursors as a unit. -/ recursorMap : NameMap NameSet := {} abbrev M := ReaderT Context <| StateT State IO -def M.run (env : Environment) (act : M α) : IO α := +def M.run (flags : Flags) (env : Environment) (act : M α) : IO α := StateT.run' (s := {}) do - ReaderT.run (r := { env }) do + ReaderT.run (r := { env, flags }) do act /-- @@ -58,7 +58,7 @@ of relevant recursors, which is used to ensure that for any inductive declaration, the inductives, constructors, and recursors are exported together in that order. -/ -def initState (env : Environment) (cliOptions : List String := []) : M Unit := do +def initState (env : Environment) : M Unit := do let mut recursorMap : NameMap NameSet := {} for (_, constInfo) in env.constants do if let .recInfo recVal := constInfo then @@ -67,11 +67,7 @@ def initState (env : Environment) (cliOptions : List String := []) : M Unit := d fun | none => some <| NameSet.empty.insert recVal.name | some recNames => some <| recNames.insert recVal.name - modify fun st => { st with - exportMData := cliOptions.any (· == "--export-mdata") - exportUnsafe := cliOptions.any (· == "--export-unsafe") - recursorMap - } + modify fun st => { st with recursorMap } /-- For a given primitive (name, level, expr) to be exported: @@ -228,12 +224,12 @@ partial def dumpExpr (e : Expr) : M Nat := do let aux (e : Expr) : M Expr := do modify (fun st => { st with noMDataExprs := HashMap.emptyWithCapacity }) removeMData e - dumpExprAux <| ← if (← get).exportMData then pure e else aux e + dumpExprAux <| ← if (← read).flags.exportMData then pure e else aux e partial def dumpConstant (c : Name) : M Unit := do let some declar := (← read).env.find? c | panic! s!"Constant {c} not found in environment." - if (declar.isUnsafe && !(← get).exportUnsafe) || (← get).visitedConstants.contains c then + if (← get).visitedConstants.contains c then return modify fun st => { st with visitedConstants := st.visitedConstants.insert c } match declar with @@ -307,12 +303,12 @@ partial def dumpConstant (c : Name) : M Unit := do let mut recursorNames := NameSet.empty for indName in baseIndVal.all do let val := ((← read).env.find? indName |>.get!).inductiveVal! - assert! ((!val.isUnsafe) || (← get).exportUnsafe) + assert! ((!val.isUnsafe) || (← read).flags.exportUnsafe) indVals := indVals.push val for ctor in val.ctors do match ((← read).env.find? ctor |>.get!) with | .ctorInfo ctorVal => - assert! ((!ctorVal.isUnsafe) || (← get).exportUnsafe) + assert! ((!ctorVal.isUnsafe) || (← read).flags.exportUnsafe) ctorVals := ctorVals.push ctorVal | _ => panic! "Expected a `ConstantInfo.ctorInfo`." modify fun st => { st with visitedConstants:= st.visitedConstants.insert indName } @@ -332,7 +328,7 @@ partial def dumpConstant (c : Name) : M Unit := do for recursorName in recursorNames do match ((← read).env.find? recursorName |>.get!) with | .recInfo recVal => - assert! ((!recVal.isUnsafe) || (← get).exportUnsafe) + assert! ((!recVal.isUnsafe) || (← read).flags.exportUnsafe) recursorVals := recursorVals.push recVal | _ => panic! "expected a `constantinfo.recinfo`." for recursorVal in recursorVals do diff --git a/Export/CLIArgs.lean b/Export/CLIArgs.lean new file mode 100644 index 0000000..77f2f0a --- /dev/null +++ b/Export/CLIArgs.lean @@ -0,0 +1,84 @@ +module + +public import Lean +public import Export.Flags +open Lean + +/-- +A command-line mention of a module can instruct lean4export to load a module, +or it can add all of the module's theorems to the root set of constants to +export. +-/ +public structure Include where + name : Name + includeAllTheorems : Bool +deriving Repr + +public structure LeanExportOpts extends Flags where + modules : Array Include + constants : List Name +deriving Repr + +public def parseOpts : List String → Except String LeanExportOpts := go {} #[] +where + go (flags : Flags) (modules : Array Include) : List String → Except String LeanExportOpts + | "--" :: [] => + throw "The argument `--` must be followed by at least one constant" + | "--" :: rest => do + let constants ← rest.mapM fun constant => do + let .some name := Syntax.decodeNameLit ("`" ++ constant) + | throw s!"Could not turn constant `{constant}` into an identifier" + return name + return ⟨flags, modules, constants⟩ + | [] => + if modules.size = 0 && !flags.printHelp then + throw "At least one module must be specified" + else + return ⟨flags, modules, []⟩ + | "-h" :: rest => go { flags with printHelp := true } modules rest + | "--help" :: rest => go { flags with printHelp := true } modules rest + | "--export-mdata" :: rest => go { flags with exportMData := true } modules rest + | "--export-unsafe" :: rest => go { flags with exportUnsafe := true } modules rest + | "--all-theorems" :: mod :: rest => do + let .some name := Syntax.decodeNameLit ("`" ++ mod) + | throw s!"Could not turn module name `{mod}` to an identifier" + go flags (modules.push ⟨name, true⟩) rest + | mod :: rest => do + let .some name := Syntax.decodeNameLit ("`" ++ mod) + | throw s!"Could not turn module name `{mod}` to an identifier" + go flags (modules.push ⟨name, false⟩) rest + +public def LeanExportOpts.shouldExportEverything (opts : LeanExportOpts) : Bool := + !opts.printHelp && opts.constants.length = 0 && opts.modules.all (not ·.includeAllTheorems) + +/-- +From a command line configuration, gets the root set of constants that the +exported environment must support. +-/ +public def getRootConstants (env : Environment) (opts : LeanExportOpts) : List Name := + if opts.shouldExportEverything then + -- Export "everything" for some value of everything + env.constants.toList.filterMap fun const => + if const.fst.isInternal then + .none + else if !opts.exportUnsafe && const.snd.isUnsafe then + .none + else + .some const.fst + + else + -- Export selected constants + opts.modules.filter (·.includeAllTheorems) + |>.map (·.name) + |>.foldl (β := NameSet) (init := NameSet.ofList opts.constants) (fun set mod => + -- Get module data from environment + let moduleIdx := env.getModuleIdx? mod |>.get! + let moduleData := env.header.moduleData[moduleIdx]! + + -- Read all theorems (NOTE: includes private/internal theorems) + let moduleTheorems := moduleData.constants + |>.filterMap fun + | .thmInfo thm => .some thm.name + | _ => .none + moduleTheorems.foldl (init := set) (fun set name => NameSet.insert set name)) + |>.toList diff --git a/Export/Flags.lean b/Export/Flags.lean new file mode 100644 index 0000000..4525438 --- /dev/null +++ b/Export/Flags.lean @@ -0,0 +1,7 @@ +module + +public structure Flags where + printHelp : Bool := false + exportMData : Bool := false + exportUnsafe : Bool := false +deriving Repr diff --git a/Main.lean b/Main.lean index 806fcf7..d3fa89d 100644 --- a/Main.lean +++ b/Main.lean @@ -1,18 +1,40 @@ import Export +import Export.CLIArgs open Lean -def main (args : List String) : IO Unit := do +def usage := "Usage: + lean4export (-h | --help) + lean4export [--export-mdata] [--export-unsafe] + [MODULE_NAME...] [(--all-theorems MODULE_NAME)...] + [-- CONSTANT...]" + +def main (args : List String) : IO UInt32 := do initSearchPath (← findSysroot) - let (opts, args) := args.partition (fun s => s.startsWith "--" && s.length ≥ 3) - let (imports, constants) := args.span (· != "--") - let imports := imports.toArray.map fun mod => { module := Syntax.decodeNameLit ("`" ++ mod) |>.get! } - let env ← importModules imports {} - let constants := match constants.tail? with - | some cs => cs.map fun c => Syntax.decodeNameLit ("`" ++ c) |>.get! - | none => env.constants.toList.map Prod.fst |>.filter (!·.isInternal) - M.run env do - let _ ← initState env opts - dumpMetadata - for c in constants do - modify (fun st => { st with noMDataExprs := {} }) - dumpConstant c + match parseOpts args with + | .error msg => do + IO.eprintln s!"{msg}\n\n{usage}\n" + return 1 + + | .ok opts => do + if opts.printHelp then + IO.eprintln usage + return 0 + + let env ← importModules (opts.modules.map ({ module := ·.name })) {} + let missingConstants ← opts.constants.filterM fun name => do + if env.constants.contains name then + return false + IO.eprintln s!"Required constant {name} not found in environment" + return true + if missingConstants.length > 0 then + return 1 + + let constants := getRootConstants env opts + + M.run opts.toFlags env do + initState env + dumpMetadata + for c in constants do + modify (fun st => { st with noMDataExprs := {} }) + dumpConstant c + return 0 diff --git a/README.md b/README.md index e692109..5e259f2 100644 --- a/README.md +++ b/README.md @@ -20,10 +20,25 @@ We can invoke the exporter on the "top level" mathlib file and export mathlib wi ```sh lake env ../.lake/build/bin/lean4export Mathlib > out.ndjson ``` -This exports the contents of the given Lean module (here just the top level `Mathlib` module in the `Mathlib.lean` file), and its transitive dependencies. A specific list of declarations to be exported from these modules can be given after a separating `--`, and more than one module can be passed to the initial invocation by including more than one name (separted with a space). +This exports all contents of the given Lean module (here just the top level `Mathlib` module in the `Mathlib.lean` file), and all the contents of that module's transitive dependencies. + +More than one module can be passed to the initial invocation by including more than one name (separated with a space). ### Options The option `--export-unsafe` can be used to include unsafe declarations in the export file. This may be useful for testing and debugging other tools, where unsafe declarations can serve as negative examples. The option `--export-mdata"` can be used to include `Expr.mdata` items in the export file, which are removed by default as they should not have an effect on type checking. + +The option `--help` prints usage information and then exits. + +The default behavior of exporting all constants can be modified by describing specific constants to export. Lean4export will import only those constants and their transitive dependencies. + + * The argument `--all-theorems ` will include all theorems in a specific module. + * The argument `--` can be followed by a list of specific constants. + +As an example, this command will export only the constants necessary to support the theorem `EuclideanGeometry.dist_inversion_inversion` as well all the theorems in Mathlib's `Mathlib/Algebra/Module/NatInt.lean` module. +```sh +lake env ../.lake/build/bin/lean4export Mathlib --theorems-from Mathlib.Algebra.Module.NatInt -- EuclideanGeometry.dist_inversion_inversion +``` + diff --git a/Test.lean b/Test.lean index 0394cfb..8b4d295 100644 --- a/Test.lean +++ b/Test.lean @@ -1,14 +1,15 @@ import Export +import Export.CLIArgs import Export.Parse open Lean def run (act : M α) : MetaM Unit := do let env ← getEnv - let _ ← M.run env (do initState env; act) + let _ ← M.run {} env (do initState env; act) def runEmpty (act : M α) : MetaM Unit := do let env ← Lean.mkEmptyEnvironment - let _ ← M.run env (do initState env; act) + let _ ← M.run {} env (do initState env; act) /-- info: {"in":1,"str":{"pre":0,"str":"foo"}} @@ -721,3 +722,129 @@ literals #guard_msgs in #eval runParserTest do dumpConstant ``literals + +def runOpts (args : List String) : MetaM (Option (List Name) × LeanExportOpts) := do + let env ← getEnv + match parseOpts args with + | .error msg => throwError msg + | .ok opts => + if opts.shouldExportEverything then + return ⟨.none, opts⟩ + else + return ⟨.some <| getRootConstants env opts, opts⟩ + +/-- error: Could not turn constant `bad const` into an identifier -/ +#guard_msgs in +#eval runOpts ["--", "bad const"] + +/-- error: Could not turn constant `bad module` into an identifier -/ +#guard_msgs in +#eval runOpts ["--", "bad module"] + +/-- error: At least one module must be specified -/ +#guard_msgs in +#eval runOpts [] + +/-- +info: (some [], + { toFlags := { printHelp := true, exportMData := false, exportUnsafe := false }, modules := #[], constants := [] }) +-/ +#guard_msgs in +#eval runOpts ["-h"] + +/-- +info: (some [`PUnit], + { toFlags := { printHelp := false, exportMData := false, exportUnsafe := false }, + modules := #[], + constants := [`PUnit] }) +-/ +#guard_msgs in +#eval runOpts ["--", "PUnit"] + +/-- +info: (some [`bar, `baz, `foo], + { toFlags := { printHelp := false, exportMData := false, exportUnsafe := false }, + modules := #[], + constants := [`foo, `bar, `baz] }) +-/ +#guard_msgs in +#eval runOpts ["--", "foo", "bar", "baz"] + +/-- +info: (some [`PUnit], + { toFlags := { printHelp := false, exportMData := false, exportUnsafe := false }, + modules := #[{ name := `Lean, includeAllTheorems := false }, { name := `Prelude, includeAllTheorems := false }], + constants := [`PUnit] }) +-/ +#guard_msgs in +#eval runOpts ["Lean", "Prelude", "--", "PUnit"] + +/-- +info: (some [], + { toFlags := { printHelp := false, exportMData := false, exportUnsafe := false }, + modules := #[{ name := `Lean, includeAllTheorems := true }], + constants := [] }) +-/ +#guard_msgs in +#eval runOpts ["--all-theorems", "Lean"] + +/-- +info: (some [`Lean.DataValue.ofString.injEq, + `Lean.DataValue.ofInt.inj, + `Lean.DataValue.ofSyntax.sizeOf_spec, + `Lean.KVMap.mk.inj, + `Lean.DataValue.ofBool.sizeOf_spec, + `Lean.KVMap.mk.injEq, + `Lean.DataValue.ofSyntax.injEq, + `Lean.DataValue.ofNat.injEq, + `Lean.DataValue.ofName.injEq, + `Lean.DataValue.ofNat.sizeOf_spec, + `Lean.DataValue.ofInt.sizeOf_spec, + `Lean.DataValue.ofBool.inj, + `Lean.DataValue.ofNat.inj, + `Lean.DataValue.ofBool.injEq, + `Lean.DataValue.ofString.inj, + `Lean.DataValue.ofInt.injEq, + `Lean.KVMap.mk.sizeOf_spec, + `Lean.DataValue.ofSyntax.inj, + `Lean.DataValue.ofName.inj, + `Lean.DataValue.ofString.sizeOf_spec, + `Lean.DataValue.ofName.sizeOf_spec], + { toFlags := { printHelp := false, exportMData := false, exportUnsafe := false }, + modules := #[{ name := `Lean, includeAllTheorems := false }, + { name := `Lean.Data.KVMap, includeAllTheorems := true }], + constants := [] }) +-/ +#guard_msgs in +#eval runOpts ["Lean", "--all-theorems", "Lean.Data.KVMap"] + +/-- +info: (some [`Lean.DataValue.ofString.injEq, + `Lean.DataValue.ofInt.inj, + `Lean.DataValue.ofSyntax.sizeOf_spec, + `Lean.KVMap.mk.inj, + `Lean.DataValue.ofBool.sizeOf_spec, + `Lean.KVMap.mk.injEq, + `Lean.DataValue.ofSyntax.injEq, + `Lean.DataValue.ofNat.injEq, + `Lean.DataValue.ofName.injEq, + `Lean.DataValue.ofNat.sizeOf_spec, + `Lean.DataValue.ofInt.sizeOf_spec, + `Lean.DataValue.ofBool.inj, + `Lean.DataValue.ofNat.inj, + `Lean.DataValue.ofBool.injEq, + `Lean.DataValue.ofString.inj, + `Lean.DataValue.ofInt.injEq, + `Lean.KVMap.mk.sizeOf_spec, + `Lean.DataValue.ofSyntax.inj, + `Lean.DataValue.ofName.inj, + `Lean.DataValue.ofString.sizeOf_spec, + `additionalConst, + `Lean.DataValue.ofName.sizeOf_spec], + { toFlags := { printHelp := false, exportMData := false, exportUnsafe := false }, + modules := #[{ name := `Lean, includeAllTheorems := false }, + { name := `Lean.Data.KVMap, includeAllTheorems := true }], + constants := [`additionalConst] }) +-/ +#guard_msgs in +#eval runOpts ["Lean", "--all-theorems", "Lean.Data.KVMap", "--", "additionalConst"]