diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 59fae41dd..875e9304d 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -25,7 +25,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install OS dependencies run: | - brew install llvm@14 tesseract remake + brew install llvm@14 tesseract remake libmagic - name: Install Mathics3 dependencies run: | # We can comment out after next Mathics3-Scanner release diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 820be57ec..5fca8651c 100755 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -29,7 +29,7 @@ jobs: # use --force because llvm may already exist, but it also may not exist. # so we will be safe here. Another possibility would be check and install # conditionally. - choco install --force llvm + # choco install --force llvm # choco install tesseract set LLVM_DIR="C:\Program Files\LLVM" - name: Install Mathics3 with Python dependencies diff --git a/mathics/builtin/files_io/importexport.py b/mathics/builtin/files_io/importexport.py index e7d8c5634..cfa0fb2da 100644 --- a/mathics/builtin/files_io/importexport.py +++ b/mathics/builtin/files_io/importexport.py @@ -3,7 +3,7 @@ r""" Importing and Exporting -Many kinds data formats can be read into \\Mathics. Variable +Many kinds data formats can be read into \Mathics. Variable :\$ExportFormats: /doc/reference-of-built-in-symbols/inputoutput-files-and-filesystem/importing-and-exporting/\$exportformats \ contains a list of file formats that are supported by @@ -25,37 +25,40 @@ from itertools import chain from urllib.error import HTTPError, URLError -from mathics.builtin.pymimesniffer import magic from mathics.core.atoms import ByteArray from mathics.core.attributes import A_NO_ATTRIBUTES, A_PROTECTED, A_READ_PROTECTED -from mathics.core.builtin import Builtin, Integer, Predefined, String, get_option +from mathics.core.builtin import Builtin, Integer, Predefined, String from mathics.core.convert.expression import to_mathics_list from mathics.core.convert.python import from_python from mathics.core.evaluation import Evaluation from mathics.core.expression import Expression from mathics.core.list import ListExpression from mathics.core.streams import stream_manager -from mathics.core.symbols import Symbol, SymbolNull, SymbolTrue, strip_context +from mathics.core.symbols import Symbol, SymbolNull, SymbolTrue from mathics.core.systemsymbols import ( SymbolByteArray, SymbolFailed, - SymbolRule, + SymbolFileExtension, + SymbolFileFormat, + SymbolFindFile, + SymbolOpenWrite, + SymbolOutputStream, SymbolToString, ) -from mathics.eval.files_io.files import eval_Close, eval_Open +from mathics.eval.files_io.files import eval_Close +from mathics.eval.files_io.importexport import ( + IMPORTERS, + MIMETYPE_TO_SHORTNAME, + eval_Import, + filetype_from_MIME_content, + filetype_from_path, + importer_exporter_options, +) # This tells documentation how to sort this module # Here we are also hiding "file_io" since this can erroneously appear at the top level. sort_order = "mathics.builtin.importing-and-exporting" -mimetypes.add_type("application/vnd.wolfram.mathematica.package", ".m") - -SymbolDeleteFile = Symbol("DeleteFile") -SymbolFileExtension = Symbol("FileExtension") -SymbolFileFormat = Symbol("FileFormat") -SymbolFindFile = Symbol("FindFile") -SymbolOpenWrite = Symbol("OpenWrite") -SymbolOutputStream = Symbol("OutputStream") SymbolStringToStream = Symbol("StringToStream") SymbolWriteString = Symbol("WriteString") @@ -65,126 +68,6 @@ # TODO: Add more file formats -mimetype_dict = { - "application/dbase": "DBF", - "application/dbf": "DBF", - "application/dicom": "DICOM", - "application/eps": "EPS", - "application/fits": "FITS", - "application/json": "JSON", - "application/mathematica": "NB", - "application/mbox": "MBOX", - "application/mdb": "MDB", - "application/msaccess": "MDB", - "application/octet-stream": "OBJ", - "application/pcx": "PCX", - "application/pdf": "PDF", - "application/postscript": "EPS", - "application/rss+xml": "RSS", - "application/rtf": "RTF", - "application/sla": "STL", - "application/tga": "TGA", - "application/vnd.google-earth.kml+xml": "KML", - "application/vnd.ms-excel": "XLS", - "application/vnd.ms-pki.stl": "STL", - "application/vnd.msaccess": "MDB", - "application/vnd.oasis.opendocument.spreadsheet": "ODS", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "XLSX", # nopep8 - "application/vnd.sun.xml.calc": "SXC", - "application/vnd.wolfram.cdf": "CDF", - "application/vnd.wolfram.cdf.text": "CDF", - "application/vnd.wolfram.mathematica.package": "Package", - "application/x-3ds": "3DS", - "application/x-cdf": "NASACDF", - "application/x-eps": "EPS", - "application/x-flac": "FLAC", - "application/x-font-bdf": "BDF", - "application/x-hdf": "HDF", - "application/x-msaccess": "MDB", - "application/x-netcdf": "NetCDF", - "application/x-shockwave-flash": "SWF", - "application/x-tex": "TeX", # Also TeX - "application/xhtml+xml": "XHTML", - "application/xml": "XML", - "audio/aiff": "AIFF", - "audio/basic": "AU", # Also SND - "audio/midi": "MIDI", - "audio/x-aifc": "AIFF", - "audio/x-aiff": "AIFF", - "audio/x-flac": "FLAC", - "audio/x-wav": "WAV", - "chemical/seq-aa-fasta": "FASTA", - "chemical/seq-na-fasta": "FASTA", - "chemical/seq-na-fastq": "FASTQ", - "chemical/seq-na-genbank": "GenBank", - "chemical/seq-na-sff": "SFF", - "chemical/x-cif": "CIF", - "chemical/x-daylight-smiles": "SMILES", - "chemical/x-hin": "HIN", - "chemical/x-jcamp-dx": "JCAMP-DX", - "chemical/x-mdl-molfile": "MOL", - "chemical/x-mdl-sdf": "SDF", - "chemical/x-mdl-sdfile": "SDF", - "chemical/x-mdl-tgf": "TGF", - "chemical/x-mmcif": "CIF", - "chemical/x-mol2": "MOL2", - "chemical/x-mopac-input": "Table", - "chemical/x-pdb": "PDB", - "chemical/x-xyz": "XYZ", - "image/bmp": "BMP", - "image/eps": "EPS", - "image/fits": "FITS", - "image/gif": "GIF", - "image/jp2": "JPEG2000", - "image/jpeg": "JPEG", - "image/pbm": "PNM", - "image/pcx": "PCX", - "image/pict": "PICT", - "image/png": "PNG", - "image/svg+xml": "SVG", - "image/tga": "TGA", - "image/tiff": "TIFF", - "image/vnd.dxf": "DXF", - "image/vnd.microsoft.icon": "ICO", - "image/x-3ds": "3DS", - "image/x-dxf": "DXF", - "image/x-exr": "OpenEXR", - "image/x-icon": "ICO", - "image/x-ms-bmp": "BMP", - "image/x-pcx": "PCX", - "image/x-portable-anymap": "PNM", - "image/x-portable-bitmap": "PBM", - "image/x-portable-graymap": "PGM", - "image/x-portable-pixmap": "PPM", - "image/x-xbitmap": "XBM", - "model/vrml": "VRML", - "model/x-lwo": "LWO", - "model/x-pov": "POV", - "model/x3d+xml": "X3D", - "text/calendar": "ICS", - "text/comma-separated-values": "CSV", - "text/csv": "CSV", - "text/html": "HTML", - "text/mathml": "MathML", - "text/plain": "Text", - "text/rtf": "RTF", - "text/scriptlet": "SCT", - "text/tab-separated-values": "TSV", - "text/texmacs": "Text", - "text/vnd.graphviz": "DOT", - "text/x-comma-separated-values": "CSV", - "text/x-csrc": "C", - "text/x-tex": "TeX", - "text/x-vcalendar": "VCS", - "text/x-vcard": "VCF", - "text/xml": "XML", - "video/avi": "AVI", - "video/quicktime": "QuickTime", - "video/x-flv": "FLV", - # None: 'Binary', -} - -IMPORTERS = {} EXPORTERS = {} EXTENSIONMAPPINGS = { "*.3ds": "3DS", @@ -902,45 +785,6 @@ } -def _importer_exporter_options( - available_options, options, builtin_name: str, evaluation -): - stream_options = [] - custom_options = [] - remaining_options = options.copy() - - if available_options and available_options.has_form("List", None): - for name in available_options.elements: - if isinstance(name, String): - py_name = name.get_string_value() - elif isinstance(name, Symbol): - py_name = strip_context(name.get_name()) - else: - py_name = None - - if py_name: - option = get_option(remaining_options, py_name, evaluation, pop=True) - if option is not None: - expr = Expression(SymbolRule, String(py_name), option) - if py_name == "CharacterEncoding": - stream_options.append(expr) - else: - custom_options.append(expr) - - syntax_option = remaining_options.get("System`$OptionSyntax", None) - if syntax_option and syntax_option != Symbol("System`Ignore"): - # warn about unsupported options. - for name, value in remaining_options.items(): - evaluation.message( - builtin_name, - "optx", - Expression(SymbolRule, strip_context(name), value), - strip_context(builtin_name), - ) - - return stream_options, custom_options - - class ConverterDumpsExtensionMappings(Predefined): r""" ## :internal native symbol: @@ -1269,14 +1113,19 @@ def eval(self, url: String, elements, evaluation: Evaluation, options={}): f.close() # on some OS (e.g. Windows) all writers need to be closed before another - # reader (e.g. Import._import) can access it. so close the file here. + # reader (e.g. Import) can access it. so close the file here. os.close(temp_handle) - def determine_filetype(): - return mimetype_dict.get(content_type) + def determine_filetype(content_type: str) -> str: + return MIMETYPE_TO_SHORTNAME.get(content_type, "Text") - result = Import._import( - temp_path, determine_filetype, elements, evaluation, options + result = eval_Import( + temp_path, + determine_filetype, + elements, + evaluation, + options, + data=content_type, ) except HTTPError as e: evaluation.message( @@ -1313,16 +1162,16 @@ class Import(Builtin): :WMA link:https://reference.wolfram.com/language/ref/Import.html
-
'Import'["$file$"] -
imports data from a file. +
'Import'["$source$"] +
imports data from a $source$. -
'Import'["$file$", "$fmt$"] +
'Import'["$source$", "$fmt$"]
imports file assuming the specified file format. -
'Import'["$file$", $elements$] +
'Import'["$source$", $elements$]
imports the specified elements from a file. -
'Import'["$file$", {"$fmt$", $elements$}] +
'Import'["$source$", {"$fmt$", $elements$}]
imports the specified elements from a file assuming the specified file format.
'Import'["http://$url$", ...] and 'Import'["ftp://$url$", ...] @@ -1362,242 +1211,44 @@ class Import(Builtin): summary_text = "import elements from a file" - def eval(self, filename, evaluation, options={}): - "Import[filename_, OptionsPattern[]]" - return self.eval_elements(filename, ListExpression(), evaluation, options) + def eval(self, source, evaluation, options={}): + "Import[source_, OptionsPattern[]]" + return self.eval_elements(source, ListExpression(), evaluation, options) - def eval_element(self, filename, element: String, evaluation, options={}): - "Import[filename_, element_String, OptionsPattern[]]" - return self.eval_elements( - filename, ListExpression(element), evaluation, options - ) + def eval_element(self, source, element: String, evaluation, options={}): + "Import[source_, element_String, OptionsPattern[]]" + return self.eval_elements(source, ListExpression(element), evaluation, options) - def eval_elements(self, filename, elements, evaluation, options={}): - "Import[filename_, elements_List?(AllTrue[#, NotOptionQ]&), OptionsPattern[]]" + def eval_elements(self, source, elements, evaluation, options={}): + "Import[source_, elements_List?(AllTrue[#, NotOptionQ]&), OptionsPattern[]]" # Check filename - path = filename.to_python() + path = source.to_python() if not (isinstance(path, str) and path[0] == path[-1] == '"'): - evaluation.message("Import", "chtype", filename) + evaluation.message("Import", "chtype", source) return SymbolFailed # Load local file - findfile = Expression(SymbolFindFile, filename).evaluate(evaluation) + findfile = Expression(SymbolFindFile, source).evaluate(evaluation) if findfile is SymbolFailed: evaluation.message("Import", "nffil") return findfile - def determine_filetype(): - return ( - Expression(SymbolFileFormat, findfile) - .evaluate(evaluation=evaluation) - .get_string_value() - ) - - return self._import(findfile, determine_filetype, elements, evaluation, options) - - @staticmethod - def _import(findfile, determine_filetype, elements, evaluation, options, data=None): - current_predetermined_out = evaluation.predetermined_out - # Check elements - if elements.has_form("List", None): - elements = elements.get_elements() - else: - elements = [elements] - - for el in elements: - if not isinstance(el, String): - evaluation.message("Import", "noelem", el) - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - - elements = [el.get_string_value() for el in elements] - - # Determine file type - for el in elements: - if el in IMPORTERS.keys(): - filetype = el - elements.remove(el) - break - else: - filetype = determine_filetype() - - if filetype not in IMPORTERS.keys(): - evaluation.message("Import", "fmtnosup", filetype) - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - - # Load the importer - conditionals, default_function, posts, importer_options = IMPORTERS[filetype] - - stream_options, custom_options = _importer_exporter_options( - importer_options.get("System`Options"), options, "System`Import", evaluation + data = ( + Expression(SymbolFileFormat, findfile) + .evaluate(evaluation=evaluation) + .get_string_value() ) - function_channels = importer_options.get("System`FunctionChannels") + def determine_filetype(data: str) -> str: + return data - if function_channels is None: - # TODO message - if data is None: - evaluation.message("Import", "emptyfch") - else: - evaluation.message("ImportString", "emptyfch") - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - - default_element = importer_options.get("System`DefaultElement") - if default_element is None: - # TODO message - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - - def get_results(tmp_function, findfile): - if function_channels == ListExpression(String("FileNames")): - joined_options = list(chain(stream_options, custom_options)) - tmpfile = False - if findfile is None: - tmpfile = True - stream = Expression(SymbolOpenWrite).evaluate(evaluation) - findfile = stream.elements[0] - if data is not None: - Expression(SymbolWriteString, data).evaluate(evaluation) - else: - Expression(SymbolWriteString, String("")).evaluate(evaluation) - eval_Close(stream, evaluation) - stream = None - import_expression = Expression(tmp_function, findfile, *joined_options) - tmp = import_expression.evaluate(evaluation) - if tmp is SymbolFailed: - return SymbolFailed - if tmpfile: - Expression(SymbolDeleteFile, findfile).evaluate(evaluation) - elif function_channels == ListExpression(String("Streams")): - if findfile is None: - stream = Expression(SymbolStringToStream, data).evaluate(evaluation) - else: - mode = "r" - if options.get("System`BinaryFormat") is SymbolTrue: - if not mode.endswith("b"): - mode += "b" - - encoding_option = options.get("System`CharacterEncoding") - encoding = ( - encoding_option.value - if isinstance(encoding_option, String) - else None - ) - - stream = eval_Open( - name=findfile, - mode=mode, - stream_type="InputStream", - encoding=encoding, - evaluation=evaluation, - ) - if stream is None: - return - if stream.get_head_name() != "System`InputStream": - evaluation.message("Import", "nffil") - evaluation.predetermined_out = current_predetermined_out - return None - tmp = Expression(tmp_function, stream, *custom_options).evaluate( - evaluation - ) - eval_Close(stream, evaluation) - else: - # TODO message - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - tmp = tmp.get_elements() - if not all(expr.has_form("Rule", None) for expr in tmp): - evaluation.predetermined_out = current_predetermined_out - return None - - # return {a.get_string_value() : b for a,b in map(lambda x: - # x.get_elements(), tmp)} - evaluation.predetermined_out = current_predetermined_out - return {a.get_string_value(): b for a, b in (x.get_elements() for x in tmp)} - - # Perform the import - defaults = None - - if not elements: - defaults = get_results(default_function, findfile) - if defaults is None: - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - elif defaults is SymbolFailed: - return SymbolFailed - if default_element is Symbol("Automatic"): - evaluation.predetermined_out = current_predetermined_out - return ListExpression( - *( - Expression(SymbolRule, String(key), defaults[key]) - for key in defaults.keys() - ) - ) - else: - result = defaults.get(default_element.get_string_value()) - if result is None: - evaluation.message( - "Import", "noelem", default_element, String(filetype) - ) - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - evaluation.predetermined_out = current_predetermined_out - return result - else: - assert len(elements) >= 1 - el = elements[0] - if el == "Elements": - defaults = get_results(default_function, findfile) - if defaults is None: - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - # Use set() to remove duplicates - evaluation.predetermined_out = current_predetermined_out - return from_python( - sorted( - set( - list(conditionals.keys()) - + list(defaults.keys()) - + list(posts.keys()) - ) - ) - ) - else: - if el in conditionals.keys(): - result = get_results(conditionals[el], findfile) - if result is None: - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - if len(list(result.keys())) == 1 and list(result.keys())[0] == el: - evaluation.predetermined_out = current_predetermined_out - return list(result.values())[0] - elif el in posts.keys(): - # TODO: allow use of conditionals - result = get_results(posts[el]) - if result is None: - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - else: - if defaults is None: - defaults = get_results(default_function, findfile) - if defaults is None: - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - if el in defaults.keys(): - evaluation.predetermined_out = current_predetermined_out - return defaults[el] - else: - evaluation.message( - "Import", "noelem", from_python(el), String(filetype) - ) - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed + return eval_Import( + findfile, determine_filetype, elements, evaluation, options, data=data + ) -class ImportString(Import): +class ImportString(Builtin): """ :WMA link: @@ -1608,7 +1259,7 @@ class ImportString(Import):
imports data in the specified format from a string.
'ImportString'["$file$", $elements$] -
imports the specified elements from a string. +
imports the specified elements from a file $file$.
'ImportString'["$data$"]
attempts to determine the format of the string from its content. @@ -1647,40 +1298,12 @@ def eval_elements(self, data, elements, evaluation, options={}): if not (isinstance(data, String)): evaluation.message("ImportString", "string", data) return SymbolFailed - path = data.value - - def determine_filetype(): - if not FileFormat.detector: - loader = magic.MagicLoader() - loader.load() - FileFormat.detector = magic.MagicDetector(loader.mimetypes) - mime = set(FileFormat.detector.match("", data=data.to_python())) - - result = [] - for key in mimetype_dict.keys(): - if key in mime: - result.append(mimetype_dict[key]) - - # The following fixes an extremely annoying behaviour on some (not all) - # installations of Windows, where we end up classifying .csv files als XLS. - if ( - len(result) == 1 - and result[0] == "XLS" - and path.lower().endswith(".csv") - ): - return String("CSV") - - if len(result) == 0: - result = "Binary" - elif len(result) == 1: - result = result[0] - else: - return None - return result + def determine_filetype(py_data: str) -> str: + return filetype_from_MIME_content(py_data) - return self._import( - None, determine_filetype, elements, evaluation, options, data=data + return eval_Import( + None, determine_filetype, elements, evaluation, options, data=data.value ) @@ -1689,11 +1312,11 @@ class Export(Builtin): :WMA link:https://reference.wolfram.com/language/ref/Export.html
-
'Export'["$file$.$ext$", $expr$] +
'Export'["$dest.ext$", $expr$]
exports $expr$ to a file, using the extension $ext$ to determine the format. -
'Export'["$file$", $expr$, "$format$"] -
exports $expr$ to a file in the specified format. +
'Export'["$dest$", $expr$, "$fmt$"] +
exports data $expr$ to a file in the specified format, $fmt$.
'Export'["$file$", $exprs$, $elems$]
exports $exprs$ to a file as elements specified by $elems$. @@ -1754,33 +1377,33 @@ def _infer_form(self, filename, evaluation: Evaluation): # to allow defining specific converters return self._extdict.get(ext) - def eval(self, filename, expr, evaluation, options={}): - "Export[filename_, expr_, OptionsPattern[Export]]" + def eval(self, dest, expr, evaluation, options={}): + "Export[dest_, expr_, OptionsPattern[Export]]" - # Check filename - if not self._check_filename(filename, evaluation): + # Check dest + if not self._check_filename(dest, evaluation): return SymbolFailed # Determine Format - form = self._infer_form(filename, evaluation) + form = self._infer_form(dest, evaluation) if form is None: - evaluation.message("Export", "infer", filename) + evaluation.message("Export", "infer", dest) return SymbolFailed else: - return self.eval_elements(filename, expr, String(form), evaluation, options) + return self.eval_elements(dest, expr, String(form), evaluation, options) - def eval_element(self, filename, expr, element: String, evaluation, options={}): - "Export[filename_, expr_, element_String, OptionsPattern[]]" + def eval_element(self, dest, expr, element: String, evaluation, options={}): + "Export[dest_, expr_, element_String, OptionsPattern[]]" return self.eval_elements( - filename, expr, ListExpression(element), evaluation, options + dest, expr, ListExpression(element), evaluation, options ) - def eval_elements(self, filename, expr, elems, evaluation, options={}): - "Export[filename_, expr_, elems_List?(AllTrue[#, NotOptionQ]&), OptionsPattern[]]" + def eval_elements(self, dest, expr, elems, evaluation, options={}): + "Export[dest_, expr_, elems_List?(AllTrue[#, NotOptionQ]&), OptionsPattern[]]" # Check filename - if not self._check_filename(filename, evaluation): + if not self._check_filename(dest, evaluation): return SymbolFailed # Process elems {comp* format?, elem1*} @@ -1804,9 +1427,9 @@ def eval_elements(self, filename, expr, elems, evaluation, options={}): # Infer format if not present if not found_form: assert format_spec == [] - format_spec = self._infer_form(filename, evaluation) + format_spec = self._infer_form(dest, evaluation) if format_spec is None: - evaluation.message("Export", "infer", filename) + evaluation.message("Export", "infer", dest) evaluation.predetermined_out = current_predetermined_out return SymbolFailed format_spec = [format_spec] @@ -1824,7 +1447,7 @@ def eval_elements(self, filename, expr, elems, evaluation, options={}): # Load the exporter exporter_symbol, exporter_options = EXPORTERS[format_spec[0]] function_channels = exporter_options.get("System`FunctionChannels") - stream_options, custom_options = _importer_exporter_options( + stream_options, custom_options = importer_exporter_options( exporter_options.get("System`Options"), options, "System`Export", evaluation ) @@ -1835,16 +1458,16 @@ def eval_elements(self, filename, expr, elems, evaluation, options={}): elif function_channels == ListExpression(String("FileNames")): exporter_function = Expression( exporter_symbol, - filename, + dest, expr, *list(chain(stream_options, custom_options)), ) res = exporter_function.evaluate(evaluation) elif function_channels == ListExpression(String("Streams")): - stream = Expression(SymbolOpenWrite, filename, *stream_options).evaluate( + stream = Expression(SymbolOpenWrite, dest, *stream_options).evaluate( evaluation ) - if stream.get_head_name() != "System`OutputStream": + if stream.head not in (SymbolOutputStream, SymbolOpenWrite): evaluation.message("Export", "nffil") evaluation.predetermined_out = current_predetermined_out return SymbolFailed @@ -1858,7 +1481,7 @@ def eval_elements(self, filename, expr, elems, evaluation, options={}): eval_Close(stream, evaluation) if res is SymbolNull: evaluation.predetermined_out = current_predetermined_out - return filename + return dest evaluation.predetermined_out = current_predetermined_out return SymbolFailed @@ -1928,12 +1551,6 @@ def eval_elements(self, expr, elems, evaluation: Evaluation, **options): # Just to be sure that the following evaluations do not change the value of this property current_predetermined_out = evaluation.predetermined_out - # Infer format if not present - if format_spec is None: - # evaluation.message("ExportString", "infer", filename) - evaluation.predetermined_out = current_predetermined_out - return SymbolFailed - # First item in format_spec is the explicit format. # The other elements (if present) are compression formats @@ -1951,7 +1568,7 @@ def eval_elements(self, expr, elems, evaluation: Evaluation, **options): exporter_symbol, exporter_options = EXPORTERS[format_spec[0]] function_channels = exporter_options.get("System`FunctionChannels") - stream_options, custom_options = _importer_exporter_options( + stream_options, custom_options = importer_exporter_options( exporter_options.get("System`Options"), options, "System Options", @@ -1994,7 +1611,7 @@ def eval_elements(self, expr, elems, evaluation: Evaluation, **options): res = tmpstream.read() tmpstream.close() if sys.platform not in ("win32",): - # On Windows unlink make the second NamedTemporaryFIle + # On Windows unlink make the second NamedTemporaryFile # fail giving something like: # [WinError 32] The process cannot access the file because it is being used by another process: ... # \\AppData\\Local\\Temp\\Mathics3-ExportString35eo_rih.svg' @@ -2067,8 +1684,6 @@ class FileFormat(Builtin): summary_text = "determine the file format of a file" - detector = None - def eval(self, filename: String, evaluation: Evaluation): "FileFormat[filename_String]" @@ -2079,39 +1694,7 @@ def eval(self, filename: String, evaluation: Evaluation): ) return findfile - path = findfile.value - if not FileFormat.detector: - loader = magic.MagicLoader() - loader.load() - FileFormat.detector = magic.MagicDetector(loader.mimetypes) - - mime = set(FileFormat.detector.match(path)) - - # If match fails match on extension only - if mime == set(): - mime, encoding = mimetypes.guess_type(path) - if mime is None: - mime = set() - else: - mime = set([mime]) - result = [] - for key in mimetype_dict.keys(): - if key in mime: - result.append(mimetype_dict[key]) - - # the following fixes an extremely annoying behaviour on some (not all) - # installations of Windows, where we end up classifying .csv files as XLS. - if len(result) == 1 and result[0] == "XLS" and path.lower().endswith(".csv"): - return String("CSV") - - if len(result) == 0: - result = "Binary" - elif len(result) == 1: - result = result[0] - else: - return None - - return from_python(result) + return String(filetype_from_path(findfile.value)) class B64Decode(Builtin): diff --git a/mathics/builtin/pymimesniffer/__init__.py b/mathics/builtin/pymimesniffer/__init__.py deleted file mode 100644 index 56fafa58b..000000000 --- a/mathics/builtin/pymimesniffer/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- diff --git a/mathics/builtin/pymimesniffer/magic.py b/mathics/builtin/pymimesniffer/magic.py deleted file mode 100644 index 38d63b35f..000000000 --- a/mathics/builtin/pymimesniffer/magic.py +++ /dev/null @@ -1,249 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import logging -import os.path -import sys - - -class MagicRule: - def __init__( - self, - mimeType, - parentType, - extensions, - allowsLeadingWhiteSpace, - magicNumbers, - magicStrings, - ): - self.mimeType = mimeType - self.parentType = parentType - self.extensions = extensions - self.allowsLeadingWhiteSpace = allowsLeadingWhiteSpace - self.magicNumbers = magicNumbers - self.magicStrings = magicStrings - - def __repr__(self): - return "" % self.mimeType - - -class MagicDetector: - def __init__(self, mimetypes): - self.mimetypes = mimetypes - - def match(self, filename, data=None): - matches = {} - - if not data: - file = open(filename, "rb") - buf = b"" - elif isinstance(data, str): - from io import StringIO - - file = StringIO(data) - matches["text/plain"] = self.mimetypes["text/plain"] - buf = "" - elif hasattr(data, "read"): - buf = b"" - file = data - else: - from io import BytesIO - - file = BytesIO(data) - buf = b"" - - ext = os.path.splitext(filename)[1] - - if ext: - ext = ext[1:] - - for mimetype, rules in self.mimetypes.items(): - for rule in rules: - if rule.parentType and rule.parentType not in list(matches.keys()): - continue - - if rule.extensions and ext != "" and ext not in rule.extensions: - continue - - for offset, value in rule.magicNumbers: - if offset + len(value) > len(buf): - buf += file.read(offset + len(value) - len(buf)) - - if buf[offset : offset + len(value)] == value: - matches[mimetype] = rule - break - - for caseSensitive, value in rule.magicStrings: - if len(value) > len(buf): - buf += file.read(len(value) - len(buf)) - - if buf[: len(value)] == value: - matches[mimetype] = rule - break - - return list(matches.keys()) - - -class MagicLoader: - def __init__(self, filename=None): - if not filename: - filename = os.path.join(os.path.dirname(__file__), "mimetypes.xml") - - if not os.path.isfile(filename): - raise IOError("magic mime type database '%s' doesn't exists" % filename) - - self.filename = filename - self.mimetypes = {} - - def getText(self, node, name=None): - text = b"" - - if name: - for child in node.getElementsByTagName(name): - text += self.getText(child).encode("utf-8", "ignore") - else: - for child in node.childNodes: - if child.nodeType == child.TEXT_NODE: - text += child.data.encode("utf-8", "ignore") - - return text.decode("utf-8") - - def getAttr(self, node, attr, default=""): - if not node.hasAttribute(attr): - return default - - return type(default)(node.getAttribute(attr)) - - def load(self, filename=None): - from binascii import unhexlify - from xml.dom.minidom import parse - - dom = parse(filename or self.filename) - - logging.info("loading magic database from %s", filename or self.filename) - - descriptions = dom.getElementsByTagName("description") - - for desc in descriptions: - mimeType = self.getText(desc, "mimeType") - parentType = self.getText(desc, "parentType") - extensions = self.getText(desc, "extensions").split(",") - allowsLeadingWhiteSpace = ( - self.getText(desc, "allowsLeadingWhiteSpace") == "true" - ) - - magicNumbers = [] - - for magicNumber in desc.getElementsByTagName("magicNumber"): - encoding = self.getAttr(magicNumber, "encoding", "string") - offset = self.getAttr(magicNumber, "offset", 0) - value = self.getText(magicNumber) - - if encoding == "hex": - value = unhexlify(value.replace(" ", "").encode("ascii")) - - magicNumbers.append((offset, value)) - - magicStrings = [] - - for magicString in desc.getElementsByTagName("magicString"): - caseSensitive = not ( - self.getAttr(magicString, "caseSensitive") == "false" - ) - value = self.getText(magicString) - - magicStrings.append((caseSensitive, value)) - - self.mimetypes.setdefault(mimeType, []).append( - MagicRule( - mimeType, - parentType, - extensions, - allowsLeadingWhiteSpace, - magicNumbers, - magicStrings, - ) - ) - - logging.info( - "loaded %d rules for %d MIME types from magic database", - len(descriptions), - len(self.mimetypes), - ) - - return len(descriptions) - - def reload(self): - self.mimetypes = {} - self.load() - - -import unittest - - -class TestDetector(unittest.TestCase): - detector = None - - def setUp(self): - if not self.detector: - loader = MagicLoader() - loader.load() - self.detector = MagicDetector(loader.mimetypes) - - def testMagicNumber(self): - self.assertEqual(["application/zip"], self.detector.match("test.zip", "PKtest")) - self.assertEqual([], self.detector.match("test.zip", "_PKtest")) - self.assertEqual([], self.detector.match("test.zip1", "PKtest")) - - self.assertEqual( - ["application/gzip"], self.detector.match("test.gz", "\x1f\x8b\x08test") - ) - self.assertEqual( - ["application/gzip"], self.detector.match("test.tgz", "\x1f\x8b\x08test") - ) - self.assertEqual([], self.detector.match("test.gz1", "\x1f\x8b\x08test")) - self.assertEqual([], self.detector.match("test.gz", "\x1f \x8b\x08test")) - - padding = "".join([" " for _ in range(257)]) - - self.assertEqual( - ["application/x-tar"], - self.detector.match("test.tar", padding + "ustartest"), - ) - self.assertEqual([], self.detector.match("test.tar1", padding + "ustartest")) - self.assertEqual([], self.detector.match("test.tar", padding + "ust artest")) - - -class TestLoader(unittest.TestCase): - def testInit(self): - self.assertRaises(IOError, MagicLoader, "not_exists_file") - - self.assert_(MagicLoader().filename) - - def testLoad(self): - loader = MagicLoader() - - self.assertFalse(loader.mimetypes) - - self.assert_(loader.load() > 0) - - self.assert_(loader.mimetypes) - - -def dump(mimetypes): - for type, rules in mimetypes.items(): - print(type) - - for rule in rules: - print(("\textenions = %s" % rule.extensions)) - print(("\tmagic num = %s" % rule.magicNumbers)) - print(("\tmagic str = %s" % rule.magicStrings)) - - -if __name__ == "__main__": - logging.basicConfig( - level=logging.DEBUG if "-v" in sys.argv else logging.WARN, - format="%(asctime)s %(levelname)-8s %(message)s", - ) - - unittest.main() diff --git a/mathics/builtin/pymimesniffer/mimetypes.xml b/mathics/builtin/pymimesniffer/mimetypes.xml deleted file mode 100644 index acbd0fc6e..000000000 --- a/mathics/builtin/pymimesniffer/mimetypes.xml +++ /dev/null @@ -1,1182 +0,0 @@ - - - - - - - - - - application/zip - zip - PK - - - - application/gzip - gz,tgz - 1f 8b 08 - - - - application/x-compress - z - 1f 9d 90 - - - - application/bzip2 - bz2,tbz2 - 42 5a 68 39 31 - - - - application/x-tar - ustar - tar - - - - application/x-rar-compressed - rar - 52 61 72 21 1a - - - - application/stuffit - sit - SIT! - - - - application/binhex - hqx - - - - application/vnd.ms-cab-compressed - cab - MSCF - - - - application/x-installshield-compressedfile - ISc( - - - - - - text/html - html,htm,htc,shtml,jsp,jspf,php,asp,xhtml - true - <HTML - <HEAD - <BODY - <!DOCTYPE HTML - <!-- - <TITLE - <H1> - - - - text/xml - xml - <?xml - - - - - application/xhtml+xml - html,htm,htc,shtml,jsp,jspf,php,asp,xhtml - text/xml - - - - application/xslt+xml - xsl,xslt - text/xml - - - - text/vnd.wap.wml - wml - text/xml - - - - application/rdf+xml - rdf,rdfs - text/xml - - - - application/owl+xml - owl - text/xml - - - - application/trix - trix - text/xml - - - - application/x-turtle - ttl - - - - text/rdf+n3 - n3 - - - - text/css - css - - - - text/javascript - js - - - - application/json - json - - - - application/java-archive - jar - application/zip - - - - application/x-java-webarchive - war - application/zip - - - - application/x-java-enterprisearchive - ear - application/zip - - - - application/x-url - [InternetShortcut] - url - - - - application/vnd.adobe.air-application-installer-package+zip - air - application/zip - - - - - - application/vnd.sun.xml.calc - application/zip - sxc - - - - application/vnd.sun.xml.draw - application/zip - sxd - - - - application/vnd.sun.xml.impress - application/zip - sxi - - - - application/vnd.sun.xml.writer - application/zip - sxw - - - - application/vnd.sun.xml.math - application/zip - sxm - - - - application/vnd.sun.xml.calc.template - application/zip - stc - - - - application/vnd.sun.xml.draw.template - application/zip - std - - - - application/vnd.sun.xml.impress.template - application/zip - sti - - - - application/vnd.sun.xml.writer.template - application/zip - stw - - - - - - application/vnd.oasis.opendocument.spreadsheet - application/zip - ods - - - - application/vnd.oasis.opendocument.graphics - application/zip - odg - - - - application/vnd.oasis.opendocument.presentation - application/zip - odp - - - - application/vnd.oasis.opendocument.text - application/zip - odt - - - - application/vnd.oasis.opendocument.formula - application/zip - odf - - - - application/vnd.oasis.opendocument.spreadsheet-template - application/zip - ots - - - - application/vnd.oasis.opendocument.graphics-template - application/zip - otg - - - - application/vnd.oasis.opendocument.presentation-template - application/zip - otp - - - - application/vnd.oasis.opendocument.text-template - application/zip - ott - - - - - - - - application/vnd.ms-office - d0 cf 11 e0 a1 b1 1a e1 00 00 00 00 00 00 00 00 - - - - application/vnd.ms-word - application/vnd.ms-office - doc,dot - - - - application/vnd.ms-excel - application/vnd.ms-office - xls,xlt - - - - application/vnd.ms-powerpoint - application/vnd.ms-office - ppt,pot,pps - - - - application/vnd.visio - application/vnd.ms-office - vsd,vst,vss - - - - - - application/x-mspublisher - application/vnd.ms-office - pub - - - - application/x-slk - slk,sylk - - - - - - application/vnd.openxmlformats-officedocument.wordprocessingml - application/zip - docx,docm,dotx,dotm - - - - application/vnd.openxmlformats-officedocument.spreadsheetml - application/zip - xlsx,xlsm,xltx,xltm,xlsb,xlam - - - - application/vnd.openxmlformats-officedocument.presentationml - application/zip - pptx,pptm,potx,potm,ppam,ppsx,ppsm - - - - application/vnd.ms-xpsdocument - application/zip - xps - - - - - - - application/vnd.stardivision.impress - application/vnd.ms-office - sdd - - - - application/vnd.stardivision.draw - application/vnd.ms-office - sda - - - - application/vnd.stardivision.writer - application/vnd.ms-office - sdw - - - - application/vnd.stardivision.calc - application/vnd.ms-office - sdc - - - - - - - application/vnd.ms-works - application/vnd.ms-office - wps,xlr - - - - application/vnd.ms-works - wks - ff 00 02 00 04 04 05 54 02 00 - - - - application/vnd.ms-works-db - application/vnd.ms-office - wdb - - - - - - application/vnd.wordperfect - wp,wpd,wpf,wpt,wpw,wp5,wp51,wp6,w60,w61 - ff 57 50 43 - - - - application/x-quattropro - application/vnd.ms-office - qpw,wb3 - - - - application/wb2 - wb2 - 00 00 02 00 - - - - - application/presentations - application/vnd.ms-office - shw - - - - application/presentations - application/vnd.wordperfect - shw - - - - - - message/rfc822 - eml,mht,mhtml - - - Return-Path: - From: - Date: - Forward to - Pipe to - Relay-Version: - #! rnews - N#! rnews - - - Path: - Xref: - Article - - - - application/vnd.ms-outlook - pst - 21 42 44 4e - - - - application/vnd.ms-outlookexpress - dbx - 4a 4d 46 36 03 00 10 00 - cf ad 12 fe c5 fd 74 6f 66 e3 d1 11 9a 4e 00 c0 - - - - - - text/plain - txt,1st,me,text,ans,asc,csv,tsv,faq,c,h,tex,latex,pv,log,nt - - - - text/java - java - - - - application/x-java-manifest - Manifest-Version: - - - - text/rtf - rtf - {\rtf - - - - application/pdf - pdf - %PDF- - - - - application/x-framemaker - book,fm,mif,mf - <MakerFile - <MIFFile - <MakerDictionary - <MakerScreenFont - <MML - <BookFile - <Maker - - - - application/postscript - ps - %! - - - - application/winhlp - hlp - ?_ - - - - application/x-chm - chm - ITSF - - - - application/x-freemind - mm - <map version - - - - - - application/x-ms-dos-executable - exe - MZ - - - - application/x-ms-scr - application/x-ms-dos-executable - scr - - - - application/x-ms-shortcut - lnk - 4c 00 00 00 01 14 02 00 00 00 00 00 c0 00 00 00 00 00 00 46 - - - - application/bat - bat - - - - application/x-java-class - class - ca fe ba be - - - - application/x-sh - sh - #!/bin/sh - #!/usr/bin/sh - - - - application/x-csh - csh - #!/bin/csh - #!/usr/bin/csh - - - - application/x-bash - bash - #!/bin/bash - #!/usr/bin/bash - - - - application/x-ksh - ksh - #!/bin/ksh - #!/usr/bin/ksh - - - - application/x-tsh - tsh - #!/bin/tsh - #!/usr/bin/tsh - - - - application/x-applescript - scpt - - - - - - image/bmp - bmp - BM - - - - image/gif - gif - GIF8 - - - - image/jpeg - jpg,jpeg - ff d8 ff - - - - image/png - png - 89 50 4e 47 0d 0a 1a - - - - image/svg+xml - svg - text/xml - - - - image/x-icon - ico - 00 00 01 00 - - - - image/x-raw - raw - - - - image/x-tga - tga - - - - image/x-portable-bitmap - pbm - P1 - P4 - - - - image/x-portable-greymap - pgm - P2 - P5 - - - - image/x-portable-pixmap - ppm - P3 - P6 - - - - image/tiff - tif,tiff - 4d 4d 00 2a - 49 49 2a 00 - - - - image/dng - dng - image/tiff - - - - image/x-paintshoppro - psp - Paint Shop Pro Image File - - - - image/xcf - xcf - 67 69 6d 70 20 78 63 66 20 - - - - application/vnd.corel-draw - cdr - CDRA - - - - image/x-xfig - fig - #FIG - - - - image/wmf - wmf - d7 cd c6 9a 00 00 - 01 00 09 00 00 03 - - - - image/x-xbitmap - xbm - - - - image/xpm - xpm - 2f 2a 20 58 50 4d 20 2a 2f 0a - - - - image/x-dwf - dwf - (DWF - - - - image/x-dwg - dwg - AC - - - - image/x-dxf - dxf - - - - image/x-itunes-albumartwork - itc - itch - - - - - - video/x-ms-asf - asf - 30 26 b2 75 8e 66 cf 11 a6 d9 00 aa 00 62 ce 6c - - - - video/x-ms-asx - asx - <asx - <ASX - - - - audio/x-ms-wax - wax - - - - video/x-ms-wvx - wvx - - - - video/x-ms-wmx - wmx - - - - video/x-msvideo - avi - 41 56 49 20 - - - - - - application/x-ms-wm - 30 26 b2 75 8e 66 cf 11 a6 d9 00 aa 00 62 ce 6c - - - - audio/x-ms-wma - application/x-ms-wm - wma - - - - video/x-ms-wmv - application/x-ms-wm - wmv,wm - - - - video/quicktime - mov - moov - - - - video/mpeg - mpg,mpeg - 00 00 01 b3 - 00 00 01 ba - - - - application/x-shockwave-flash - swf - 46 57 53 - - - - application/x-ogg - ogg - OggS - - - - application/vnd.rn-realmedia - rm,ram - .RMF - rtsp:// - - - - audio/x-wav - wav - WAVE - - - - audio/mpeg - mp3,mp2 - ID3 - - - - audio/midi - mid,midi,rmi - MThd - RMI - - - - video/x-msvideo - avi - 41 56 49 20 - - - - video/mp4 - mp4,mpg4,m4v,mp4v,divx,xvid,264 - - - - audio/mp4 - m4a,m4p - - - - video/3gpp - 3gp,3g2 - - - - audio/x-aiff - aiff - FORM - - - - application/x-ms-wmd - wmd - application/zip - - - - video/x-flv - flv - FLV - - - - audio/flac - flac - 66 4c 61 43 00 00 00 22 - - - - application/smil - smi,smil - - - - - - application/x-winamp-playlist - m3u - #EXTM3U - - - - audio/x-b4s - text/xml - b4s - - - - application/xspf+xml - text/xml - xspf - - - - audio/x-scpls - pls - [playlist] - - - - audio/x-kpl - kpl - [Metadata] artist= - - - - audio/x-kapsule - text/xml - p2p - - - - audio/x-magma - magma - #MAGMA - - - - vnd.ms-wpl - wpl - <?wpl - - - - - - application/pgp-signature - -----BEGIN PGP SIGNATURE----- - - - - application/x-md5 - md5 - MD5 - - - - application/x-sha - sha,sha0,sha1,sha2,sha256,sha512 - - - - application/x-axcrypt - axx - c0 b9 07 2e 4f 93 f1 46 a0 15 79 2c a1 d9 e8 21 15 00 00 00 02 - - - - - - text/calendar - ics - BEGIN:VCALENDAR - - - - application/x-mozilla-addressbook - mab - - - - application/x-ms-registry - reg - regf - - - - application/x-bittorrent - torrent - d8:announce - - - - application/x-pom - pom - <project> - - - - application/x-ms-wmz - wmz - application/zip - - - - text/x-vcard - vcf,vcard - BEGIN:VCARD - - - diff --git a/mathics/core/systemsymbols.py b/mathics/core/systemsymbols.py index 0b707705b..eea8861ee 100644 --- a/mathics/core/systemsymbols.py +++ b/mathics/core/systemsymbols.py @@ -98,6 +98,7 @@ SymbolD = Symbol("System`D") SymbolDefault = Symbol("System`Default") SymbolDefinition = Symbol("System`Definition") +SymbolDeleteFile = Symbol("System`DeleteFile") SymbolDerivative = Symbol("System`Derivative") SymbolDigitCharacter = Symbol("System`DigitCharacter") SymbolDirectedInfinity = Symbol("System`DirectedInfinity") @@ -128,7 +129,10 @@ SymbolFaceGridsStyle = Symbol("System`FaceGridsStyle") SymbolFactorial = Symbol("System`Factorial") SymbolFailed = Symbol("System`$Failed") +SymbolFileExtension = Symbol("System`FileExtension") +SymbolFileFormat = Symbol("System`FileFormat") SymbolFindClusters = Symbol("System`FindClusters") +SymbolFindFile = Symbol("System`FindFile") SymbolFirst = Symbol("System`First") SymbolFloor = Symbol("System`Floor") SymbolFormBox = Symbol("System`FormBox") @@ -235,6 +239,7 @@ SymbolNumericQ = Symbol("System`NumericQ") SymbolO = Symbol("System`O") SymbolOpacity = Symbol("System`Opacity") +SymbolOpenWrite = Symbol("System`OpenWrite") SymbolOperate = Symbol("System`Operate") SymbolOptionValue = Symbol("System`OptionValue") SymbolOptional = Symbol("System`Optional") @@ -337,6 +342,7 @@ SymbolStringQ = Symbol("System`StringQ") SymbolStringRiffle = Symbol("System`StringRiffle") SymbolStringSplit = Symbol("System`StringSplit") +SymbolStringToStream = Symbol("System`StringToStream") SymbolStyle = Symbol("System`Style") SymbolStyleBox = Symbol("System`StyleBox") SymbolSubValues = Symbol("System`SubValues") @@ -386,4 +392,5 @@ SymbolWord = Symbol("System`Word") SymbolWordBoundary = Symbol("System`WordBoundary") SymbolWordCharacter = Symbol("System`WordCharacter") +SymbolWriteString = Symbol("System`WriteString") SymbolXor = Symbol("System`Xor") diff --git a/mathics/data/ExampleData/Einstein.txt b/mathics/data/ExampleData/Einstein.txt new file mode 100644 index 000000000..3da318776 Binary files /dev/null and b/mathics/data/ExampleData/Einstein.txt differ diff --git a/mathics/data/ExampleData/PacletServer-Install.mx b/mathics/data/ExampleData/PacletServer-Install.mx new file mode 100644 index 000000000..4c04ea3c6 Binary files /dev/null and b/mathics/data/ExampleData/PacletServer-Install.mx differ diff --git a/mathics/eval/files_io/importexport.py b/mathics/eval/files_io/importexport.py new file mode 100644 index 000000000..516376c22 --- /dev/null +++ b/mathics/eval/files_io/importexport.py @@ -0,0 +1,546 @@ +""" +Functions for figuring out a filetype or MIME type a given +file path. +""" + +import mimetypes +import os.path as osp +from itertools import chain +from typing import Dict, Final, Optional + +from mathics.core.builtin import String, get_option +from mathics.core.convert.python import from_python +from mathics.core.expression import Expression +from mathics.core.list import ListExpression +from mathics.core.symbols import Symbol, SymbolTrue, strip_context +from mathics.core.systemsymbols import ( + SymbolDeleteFile, + SymbolFailed, + SymbolInputStream, + SymbolOpenWrite, + SymbolRule, + SymbolStringToStream, + SymbolWriteString, +) +from mathics.eval.files_io.files import eval_Close, eval_Open + +IMPORTERS = {} + +try: + from magic import from_file +except ImportError: + + def from_file(path: str, mime: bool = False) -> str: + """ + Standard library implementation mimicking magic.from_file. + + Args: + path: Path to the file. + mime: If True, returns MIME type. If False, returns a description. + """ + # Guess the MIME type based on the file extension + # Example: 'image/jpeg' or 'text/x-python'. + mime_type, encoding = mimetypes.guess_type(path) + + # Handle cases where the extension is unknown. + if mime_type is None: + # Fallback to binary or plain text if the extension is missing. + mime_type = "application/octet-stream" + + if mime: + return mime_type + + # Mimic the 'description' behavior of libmagic Since mimetypes + # doesn't provide descriptions, we provide a clean label. + description = mime_type.split("/")[-1].replace("x-", "").upper() + + if encoding: + return f"{description} ({encoding} compressed)" + return f"{description} data" + + +# Note Matlab and Objective C also use the ".m" extension! +mimetypes.add_type("application/vnd.wolfram.mathematica.package", ".m") + +# Do we need the below? +# mimetypes.add_type("application/vnd.wolfram.mathematica.package", ".wl") + +# MIMETYPE_TO_SHORTNAME is a mapping form MIME type names to short common names. +# The short common names are typically used as a file extension. + +# Here we should have *only* the names used when the name differs +# from mimetypes.guess_extension(mimetype).upper() gives a name different +# from what we have here. This happens for lowercase or mixed-case names. + +# TODO: go over to remove names that do not need to be on this list. +MIMETYPE_TO_SHORTNAME: Final[Dict[str, str]] = { + "application/dbase": "DBF", + "application/dbf": "DBF", + "application/dicom": "DICOM", + "application/eps": "EPS", + "application/fits": "FITS", + "application/json": "JSON", + "application/mathematica": "NB", + "application/mbox": "MBOX", + "application/mdb": "MDB", + "application/msaccess": "MDB", + "application/octet-stream": "OBJ", + "application/pcx": "PCX", + "application/pdf": "PDF", + "application/postscript": "EPS", + "application/rss+xml": "RSS", + "application/rtf": "RTF", + "application/sla": "STL", + "application/tga": "TGA", + "application/vnd.google-earth.kml+xml": "KML", + "application/vnd.ms-excel": "XLS", + "application/vnd.ms-pki.stl": "STL", + "application/vnd.msaccess": "MDB", + "application/vnd.oasis.opendocument.spreadsheet": "ODS", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "XLSX", # nopep8 + "application/vnd.sun.xml.calc": "SXC", + "application/vnd.wolfram.cdf": "CDF", + "application/vnd.wolfram.cdf.text": "CDF", + "application/vnd.wolfram.mathematica.package": "Package", + "application/x-3ds": "3DS", + "application/x-cdf": "NASACDF", + "application/x-eps": "EPS", + "application/x-flac": "FLAC", + "application/x-font-bdf": "BDF", + "application/x-hdf": "HDF", + "application/x-msaccess": "MDB", + "application/x-netcdf": "NetCDF", + "application/x-shockwave-flash": "SWF", + "application/x-tex": "TeX", # Also TeX + "application/xhtml+xml": "XHTML", + "application/xml": "XML", + "application/zip": "ZIP", + "audio/aiff": "AIFF", + "audio/basic": "AU", # Also SND + "audio/midi": "MIDI", + "audio/x-aifc": "AIFF", + "audio/x-aiff": "AIFF", + "audio/x-flac": "FLAC", + "audio/x-wav": "WAV", + "chemical/seq-aa-fasta": "FASTA", + "chemical/seq-na-fasta": "FASTA", + "chemical/seq-na-fastq": "FASTQ", + "chemical/seq-na-genbank": "GenBank", + "chemical/seq-na-sff": "SFF", + "chemical/x-cif": "CIF", + "chemical/x-daylight-smiles": "SMILES", + "chemical/x-hin": "HIN", + "chemical/x-jcamp-dx": "JCAMP-DX", + "chemical/x-mdl-molfile": "MOL", + "chemical/x-mdl-sdf": "SDF", + "chemical/x-mdl-sdfile": "SDF", + "chemical/x-mdl-tgf": "TGF", + "chemical/x-mmcif": "CIF", + "chemical/x-mol2": "MOL2", + "chemical/x-mopac-input": "Table", + "chemical/x-pdb": "PDB", + "chemical/x-xyz": "XYZ", + "image/bmp": "BMP", + "image/eps": "EPS", + "image/fits": "FITS", + "image/gif": "GIF", + "image/jp2": "JPEG2000", + "image/jpeg": "JPEG", + "image/pbm": "PNM", + "image/pcx": "PCX", + "image/pict": "PICT", + "image/png": "PNG", + "image/svg+xml": "SVG", + "image/tga": "TGA", + "image/tiff": "TIFF", + "image/vnd.dxf": "DXF", + "image/vnd.microsoft.icon": "ICO", + "image/x-3ds": "3DS", + "image/x-dxf": "DXF", + "image/x-exr": "OpenEXR", + "image/x-icon": "ICO", + "image/x-ms-bmp": "BMP", + "image/x-pcx": "PCX", + "image/x-portable-anymap": "PNM", + "image/x-portable-bitmap": "PBM", + "image/x-portable-graymap": "PGM", + "image/x-portable-pixmap": "PPM", + "image/x-xbitmap": "XBM", + "model/vrml": "VRML", + "model/x-lwo": "LWO", + "model/x-pov": "POV", + "model/x3d+xml": "X3D", + "text/calendar": "ICS", + "text/comma-separated-values": "CSV", + "text/csv": "CSV", + "text/html": "HTML", + "text/mathml": "MathML", + "text/plain": "Text", + "text/rtf": "RTF", + "text/scriptlet": "SCT", + "text/tab-separated-values": "TSV", + "text/texmacs": "Text", + "text/vnd.graphviz": "DOT", + "text/x-comma-separated-values": "CSV", + "text/x-csrc": "C", + "text/x-tex": "TeX", + "text/x-vcalendar": "VCS", + "text/x-vcard": "VCF", + "text/xml": "XML", + "video/avi": "AVI", + "video/quicktime": "QuickTime", + "video/x-flv": "FLV", + # None: 'Binary', +} + + +def filetype_from_path(path: str) -> Optional[String]: + """Classifies what kind of file `path` is. + A Mathics3 String is return if we can do this and None, if + there was some sort of error, e.g., `path` is not found. + + It does is using a MIME type, even though the path doesn't have to + be something received or transmitted over HTTP. + + MIME types are standardized and do not change, while file + descriptions or WL's codes are not and can change. + """ + + if not osp.exists(path): + return None + + try: + MIME_content_type = from_file(path, mime=True) + return filetype_from_MIME_content(MIME_content_type) + if MIME_content_type in MIMETYPE_TO_SHORTNAME: + short_name = MIMETYPE_TO_SHORTNAME[MIME_content_type] + else: + # Map MIME type to a standard extension using the stdlib + # mimetypes.guess_extension returns things like '.zip' or '.py' + ext = mimetypes.guess_extension(MIME_content_type) + + if ext: + # Clean up the extension (remove trailing dot and uppercase) + short_name = ext.rstrip(".").upper() + else: + short_name = MIME_content_type + + return String(short_name) + + except Exception: + return None + + +def filetype_from_MIME_content(mime_content_name: str) -> str: + + if mime_content_name in MIMETYPE_TO_SHORTNAME: + short_name = MIMETYPE_TO_SHORTNAME[mime_content_name] + else: + # Map MIME type to a standard extension using the stdlib + # mimetypes.guess_extension returns things like '.zip' or '.py' + file_extension = mimetypes.guess_extension(mime_content_name) + + if file_extension: + # Clean up the extension (remove trailing dot and uppercase) + short_name = file_extension.rstrip(".").upper() + else: + return "Text" + + return short_name + + +def importer_exporter_options( + available_options, options, builtin_name: str, evaluation +): + stream_options = [] + custom_options = [] + remaining_options = options.copy() + + if available_options and available_options.has_form("List", None): + for name in available_options.elements: + if isinstance(name, String): + py_name = name.get_string_value() + elif isinstance(name, Symbol): + py_name = strip_context(name.get_name()) + else: + py_name = None + + if py_name: + option = get_option(remaining_options, py_name, evaluation, pop=True) + if option is not None: + expr = Expression(SymbolRule, String(py_name), option) + if py_name == "CharacterEncoding": + stream_options.append(expr) + else: + custom_options.append(expr) + + syntax_option = remaining_options.get("System`$OptionSyntax", None) + if syntax_option and syntax_option != Symbol("System`Ignore"): + # warn about unsupported options. + for name, value in remaining_options.items(): + evaluation.message( + builtin_name, + "optx", + Expression(SymbolRule, String(strip_context(name)), value), + strip_context(builtin_name), + ) + + return stream_options, custom_options + + +def eval_Import( + findfile: Optional[String], + determine_filetype, + elements, + evaluation, + options, + data: Optional[str], +): + current_predetermined_out = evaluation.predetermined_out + # Check elements + if elements.has_form("List", None): + elements = elements.get_elements() + else: + elements = [elements] + + for el in elements: + if not isinstance(el, String): + evaluation.message("Import", "noelem", el) + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + + elements = [el.get_string_value() for el in elements] + + # Determine file type + for el in elements: + if el in IMPORTERS.keys(): + filetype = el + elements.remove(el) + break + else: + filetype = determine_filetype(data) + + if filetype not in IMPORTERS.keys(): + evaluation.message("Import", "fmtnosup", filetype) + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + + # Load the importer + conditionals, default_function, posts, importer_options = IMPORTERS[filetype] + + stream_options, custom_options = importer_exporter_options( + importer_options.get("System`Options"), options, "System`Import", evaluation + ) + + function_channels = importer_options.get("System`FunctionChannels") + + if function_channels is None: + # TODO message + if data is None: + evaluation.message("Import", "emptyfch") + else: + evaluation.message("ImportString", "emptyfch") + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + + default_element = importer_options.get("System`DefaultElement") + if default_element is None: + # TODO message + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + + # Perform the import + defaults = None + + if not elements: + defaults = get_results( + default_function, + findfile, + function_channels, + stream_options, + custom_options, + evaluation, + options, + data=data, + ) + if defaults is None: + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + elif defaults is SymbolFailed: + return SymbolFailed + if default_element is Symbol("Automatic"): + evaluation.predetermined_out = current_predetermined_out + return ListExpression( + *( + Expression(SymbolRule, String(key), defaults[key]) + for key in defaults.keys() + ) + ) + else: + result = defaults.get(default_element.get_string_value()) + if result is None: + evaluation.message( + "Import", "noelem", default_element, String(filetype) + ) + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + evaluation.predetermined_out = current_predetermined_out + return result + else: + assert len(elements) >= 1 + el = elements[0] + if el == "Elements": + defaults = get_results( + default_function, + findfile, + function_channels, + stream_options, + custom_options, + evaluation, + options, + data=data, + ) + if defaults is None: + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + # Use set() to remove duplicates + evaluation.predetermined_out = current_predetermined_out + return from_python( + sorted( + set( + list(conditionals.keys()) + + list(defaults.keys()) + + list(posts.keys()) + ) + ) + ) + else: + if el in conditionals.keys(): + result = get_results( + conditionals[el], + findfile, + function_channels, + stream_options, + custom_options, + evaluation, + options, + data=data, + ) + if result is None: + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + if len(list(result.keys())) == 1 and list(result.keys())[0] == el: + evaluation.predetermined_out = current_predetermined_out + return list(result.values())[0] + elif el in posts.keys(): + # TODO: allow use of conditionals + result = get_results( + posts[el], + findfile, + function_channels, + stream_options, + custom_options, + evaluation, + options, + data=data, + ) + if result is None: + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + else: + if defaults is None: + defaults = get_results( + default_function, + findfile, + function_channels, + stream_options, + custom_options, + evaluation, + options, + data=data, + ) + if defaults is None: + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + if el in defaults.keys(): + evaluation.predetermined_out = current_predetermined_out + return defaults[el] + else: + evaluation.message( + "Import", "noelem", from_python(el), String(filetype) + ) + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + + +def get_results( + tmp_function, + findfile: Optional[String], + function_channels, + stream_options, + custom_options, + evaluation, + options, + data: Optional[str], +): + current_predetermined_out = evaluation.predetermined_out + if function_channels == ListExpression(String("FileNames")): + joined_options = list(chain(stream_options, custom_options)) + tmpfile = False + if findfile is None: + tmpfile = True + stream = Expression(SymbolOpenWrite).evaluate(evaluation) + findfile = stream.elements[0] + if data is not None: + Expression(SymbolWriteString, String(data)).evaluate(evaluation) + else: + Expression(SymbolWriteString, String("")).evaluate(evaluation) + eval_Close(stream, evaluation) + import_expression = Expression(tmp_function, findfile, *joined_options) + tmp = import_expression.evaluate(evaluation) + if tmp is SymbolFailed: + return SymbolFailed + if tmpfile: + Expression(SymbolDeleteFile, findfile).evaluate(evaluation) + elif function_channels == ListExpression(String("Streams")): + if findfile is None: + stream = Expression(SymbolStringToStream, String(data)).evaluate(evaluation) + else: + mode = "r" + if options.get("System`BinaryFormat") is SymbolTrue: + if not mode.endswith("b"): + mode += "b" + + encoding_option = options.get("System`CharacterEncoding") + encoding = ( + encoding_option.value if isinstance(encoding_option, String) else None + ) + + stream = eval_Open( + name=findfile, + mode=mode, + stream_type="InputStream", + encoding=encoding, + evaluation=evaluation, + ) + if stream is None: + return + if stream.head is not SymbolInputStream: + evaluation.message("Import", "nffil") + evaluation.predetermined_out = current_predetermined_out + return None + tmp = Expression(tmp_function, stream, *custom_options).evaluate(evaluation) + eval_Close(stream, evaluation) + else: + # TODO message + evaluation.predetermined_out = current_predetermined_out + return SymbolFailed + tmp = tmp.get_elements() + if not all(expr.has_form("Rule", None) for expr in tmp): + evaluation.predetermined_out = current_predetermined_out + return None + + # return {a.get_string_value() : b for a,b in map(lambda x: + # x.get_elements(), tmp)} + evaluation.predetermined_out = current_predetermined_out + return {a.get_string_value(): b for a, b in (x.get_elements() for x in tmp)} diff --git a/pyproject.toml b/pyproject.toml index c91bff36d..72f949b9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "pillow >= 9.2", "pint >=0.24", # Earlier pint has problems with numpy 2.2.6 "python-dateutil", + "python-magic", # Pympler is used in ByteCount[] and MemoryInUse[]. "Pympler", "requests", diff --git a/test/builtin/files_io/test_importexport.py b/test/builtin/files_io/test_importexport.py index f4ce7a1e7..12884644e 100644 --- a/test/builtin/files_io/test_importexport.py +++ b/test/builtin/files_io/test_importexport.py @@ -263,6 +263,18 @@ def test_export(): ('FileFormat["ExampleData/Testosterone.svg"]', None, "SVG", None), ('FileFormat["ExampleData/colors.json"]', None, "JSON", None), ('FileFormat["ExampleData/InventionNo1.xml"]', None, "XML", None), + ( + 'FileFormat["ExampleData/PacletServer-Install.mx"]', + None, + "ZIP", + "Detect ZIP files", + ), + ( + 'FileFormat["ExampleData/Einstein.txt"]', + None, + "JPEG", + "JPEG stored as with .txt exension", + ), ], ) def test_importexport(str_expr, msgs, str_expected, fail_msg):