diff --git a/CHANGES.rst b/CHANGES.rst index f2af153..b349aea 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,23 @@ Changelog ========= +Unreleased +---------- + +**Changes:** + +- Added reading and writing of GP6 (``.gpx``) and GP7 (``.gp``) files. + ``guitarpro.parse`` detects the container format and maps the embedded + ``score.gpif`` document into the ``Song`` model; ``guitarpro.write`` rebuilds + the container when given ``version=(6, 0, 0)`` / ``(7, 0, 0)`` or a ``.gpx`` / + ``.gp`` extension. Coverage is the core musical content (song info, tracks, + tunings, measures, voices, beats, durations and notes). +- Added GP6/GP7 note effects: hammer-on/pull-off, slides, harmonics (natural, + artificial, pinch, tap, semi), left/right-hand fingering, accentuation + (accent, heavy accent, staccato) and beat text. Bends, grace notes and + chord diagrams are not yet translated. + + Version 0.11 ------------- diff --git a/README.rst b/README.rst index 3e3b290..8293a3c 100644 --- a/README.rst +++ b/README.rst @@ -9,7 +9,8 @@ PyGuitarPro Introduction ------------ -PyGuitarPro is a package to read, write and manipulate GP3, GP4 and GP5 files. Initially PyGuitarPro is a Python port +PyGuitarPro is a package to read, write and manipulate GP3, GP4 and GP5 files. GP6 (``.gpx``) and GP7 (``.gp``) +files can also be read and written. Initially PyGuitarPro is a Python port of `AlphaTab `_ which originally was a Haxe port of `TuxGuitar `_. diff --git a/docs/index.rst b/docs/index.rst index df10109..290e18a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,7 +3,8 @@ PyGuitarPro =========== -PyGuitarPro is a package to read, write and manipulate GP3, GP4 and GP5 files. Initially PyGuitarPro is a Python port +PyGuitarPro is a package to read, write and manipulate GP3, GP4 and GP5 files. GP6 (``.gpx``) and GP7 (``.gp``) files +can also be read and written. Initially PyGuitarPro is a Python port of `AlphaTab `_ which is a Haxe port of `TuxGuitar `_. To anyone wanting to create their own the best guitar tablature editor in Python this package will be the good thing to diff --git a/docs/pyguitarpro/quickstart.rst b/docs/pyguitarpro/quickstart.rst index 171e93d..4a37c50 100644 --- a/docs/pyguitarpro/quickstart.rst +++ b/docs/pyguitarpro/quickstart.rst @@ -36,7 +36,9 @@ Functions :func:`guitarpro.parse` and :func:`guitarpro.write` support not only f .. note:: - PyGuitarPro supports only GP3, GP4 and GP5 files. Support for GPX (Guitar Pro 6) files is out of scope of the - project. + PyGuitarPro reads and writes GP3, GP4, GP5 files, as well as GP6 (``.gpx``) and GP7 (``.gp``) files. GP6/GP7 + support covers the core musical content (song info, tracks, tunings, measures, voices, beats, durations and + notes) but not yet every advanced effect. Write the GP6/GP7 formats by passing ``version=(6, 0, 0)`` or + ``version=(7, 0, 0)`` to :func:`guitarpro.write`, or by using a ``.gpx`` / ``.gp`` file extension. .. vim: tw=120 cc=121 diff --git a/src/guitarpro/gpif.py b/src/guitarpro/gpif.py new file mode 100644 index 0000000..3a3dacf --- /dev/null +++ b/src/guitarpro/gpif.py @@ -0,0 +1,564 @@ +"""Maps a Guitar Pro ``score.gpif`` XML document into the :class:`Song` model. + +The GPIF format (used by GP6 and GP7) stores the score as a set of +cross-referenced lists -- ``MasterBars`` point at ``Bars`` by id, ``Bars`` +point at ``Voices``, ``Voices`` point at ``Beats``, ``Beats`` point at +``Notes`` and ``Rhythms``. This parser resolves those references and builds +the same object tree the binary readers produce. + +The mapping covers the core musical content: song info, tracks and tunings, +master bars and time signatures, voices, beats, durations and notes. +Advanced effects are not yet translated. +""" +import xml.etree.ElementTree as ET + +from . import models as gp + +__all__ = ('GPIFParser', 'GPIFWriter') + +# GPIF NoteValue -> Duration.value +_NOTE_VALUES = { + 'Whole': gp.Duration.whole, + 'Half': gp.Duration.half, + 'Quarter': gp.Duration.quarter, + 'Eighth': gp.Duration.eighth, + '16th': gp.Duration.sixteenth, + '32nd': gp.Duration.thirtySecond, + '64th': gp.Duration.sixtyFourth, + '128th': gp.Duration.hundredTwentyEighth, +} + +# GPIF Dynamic -> velocity +_DYNAMICS = { + 'PPP': gp.Velocities.pianoPianissimo, + 'PP': gp.Velocities.pianissimo, + 'P': gp.Velocities.piano, + 'MP': gp.Velocities.mezzoPiano, + 'MF': gp.Velocities.mezzoForte, + 'F': gp.Velocities.forte, + 'FF': gp.Velocities.fortissimo, + 'FFF': gp.Velocities.forteFortissimo, +} + +# Inverse lookups for writing. +_NOTE_VALUE_NAMES = {value: name for name, value in _NOTE_VALUES.items()} +_DYNAMIC_NAMES = {velocity: name for name, velocity in _DYNAMICS.items()} + +# Note effect encodings, after alphaTab's GpifParser. + +# Slide N: bit -> SlideType. +_SLIDE_FLAGS = { + 1: gp.SlideType.shiftSlideTo, + 2: gp.SlideType.legatoSlideTo, + 4: gp.SlideType.outDownwards, + 8: gp.SlideType.outUpwards, + 16: gp.SlideType.intoFromBelow, + 32: gp.SlideType.intoFromAbove, +} +_SLIDE_BITS = {slide: bit for bit, slide in _SLIDE_FLAGS.items()} + +# Note N: bit -> effect flag. +_ACCENT_STACCATO = 0x01 +_ACCENT_HEAVY = 0x04 +_ACCENT_NORMAL = 0x08 + +# LeftFingering / RightFingering text -> Fingering. +_FINGERING = { + 'P': gp.Fingering.thumb, + 'I': gp.Fingering.index, + 'M': gp.Fingering.middle, + 'A': gp.Fingering.annular, + 'C': gp.Fingering.little, +} +_FINGERING_NAMES = {finger: name for name, finger in _FINGERING.items()} + +# HarmonicType HType -> (HarmonicEffect class, stores a fret value). +_HARMONICS = { + 'natural': (gp.NaturalHarmonic, False), + 'artificial': (gp.ArtificialHarmonic, False), + 'pinch': (gp.PinchHarmonic, False), + 'tap': (gp.TappedHarmonic, True), + 'semi': (gp.SemiHarmonic, False), +} +_HARMONIC_NAMES = {cls: name for name, (cls, _) in _HARMONICS.items()} + + +class GPIFParser: + def __init__(self, data, versionTuple=None): + if isinstance(data, (bytes, bytearray)): + self.root = ET.fromstring(data) + else: + self.root = ET.fromstring(data.encode() if isinstance(data, str) else data) + self.versionTuple = versionTuple + + # -- helpers -------------------------------------------------------- + + def _text(self, path, default=''): + element = self.root.find(path) + if element is not None and element.text is not None: + return element.text.strip() + return default + + @staticmethod + def _index(elements): + """Index a list of elements by their ``id`` attribute.""" + return {e.get('id'): e for e in elements} + + @staticmethod + def _property(element, name): + """Return the ```` child of *element*, or None.""" + if element is None: + return None + for prop in element.findall('./Properties/Property'): + if prop.get('name') == name: + return prop + return None + + # -- entry point ---------------------------------------------------- + + def readSong(self): + root = self.root + self.bars = self._index(root.findall('./Bars/Bar')) + self.voices = self._index(root.findall('./Voices/Voice')) + self.beats = self._index(root.findall('./Beats/Beat')) + self.notes = self._index(root.findall('./Notes/Note')) + self.rhythms = self._index(root.findall('./Rhythms/Rhythm')) + + song = gp.Song(versionTuple=self.versionTuple) + self._readScoreInfo(song) + self._readTempo(song) + + masterBars = root.findall('./MasterBars/MasterBar') + self._readMeasureHeaders(song, masterBars) + self._readTracks(song) + self._readMeasures(song, masterBars) + return song + + # -- score info ----------------------------------------------------- + + def _readScoreInfo(self, song): + song.title = self._text('./Score/Title') + song.subtitle = self._text('./Score/SubTitle') + song.artist = self._text('./Score/Artist') + song.album = self._text('./Score/Album') + song.words = self._text('./Score/Words') + song.music = self._text('./Score/Music') + song.copyright = self._text('./Score/Copyright') + song.tab = self._text('./Score/Tabber') + song.instructions = self._text('./Score/Instructions') + notices = self._text('./Score/Notices') + song.notice = notices.splitlines() if notices else [] + + def _readTempo(self, song): + for automation in self.root.findall('./MasterTrack/Automations/Automation'): + if automation.findtext('Type') == 'Tempo': + value = (automation.findtext('Value') or '').split() + if value: + song.tempo = int(round(float(value[0]))) + break + + # -- measure headers ------------------------------------------------ + + def _readMeasureHeaders(self, song, masterBars): + song.measureHeaders = [] + for number, masterBar in enumerate(masterBars, start=1): + header = gp.MeasureHeader(number=number) + time = masterBar.findtext('Time') + if time and '/' in time: + numerator, denominator = time.split('/') + header.timeSignature = gp.TimeSignature( + numerator=int(numerator), + denominator=gp.Duration(value=int(denominator)), + ) + repeat = masterBar.find('Repeat') + if repeat is not None: + if repeat.get('start') == 'true': + header.isRepeatOpen = True + if repeat.get('end') == 'true': + header.repeatClose = int(repeat.get('count', 0)) + song.addMeasureHeader(header) + if not song.measureHeaders: + song.measureHeaders = [gp.MeasureHeader()] + + # -- tracks --------------------------------------------------------- + + def _readTracks(self, song): + trackElements = self.root.findall('./Tracks/Track') + song.tracks = [] + for number, element in enumerate(trackElements, start=1): + track = gp.Track(song, number=number) + track.name = (element.findtext('Name') or '').strip() or track.name + track.strings = self._readTuning(element) + track.measures = [] + song.tracks.append(track) + if not song.tracks: + song.tracks = [gp.Track(song)] + + def _readTuning(self, trackElement): + tuning = self._property(trackElement, 'Tuning') + pitches = None + if tuning is not None: + text = tuning.findtext('Pitches') + if text: + pitches = [int(p) for p in text.split()] + if not pitches: + # Default standard 6-string tuning, low to high. + pitches = [40, 45, 50, 55, 59, 64] + # GPIF lists pitches low-to-high; GuitarString #1 is the highest. + return [gp.GuitarString(number=i + 1, value=value) + for i, value in enumerate(reversed(pitches))] + + # -- measures / voices / beats / notes ------------------------------ + + def _readMeasures(self, song, masterBars): + for trackIndex, track in enumerate(song.tracks): + for header, masterBar in zip(song.measureHeaders, masterBars): + measure = gp.Measure(track, header) + measure.voices = [] + barIds = (masterBar.findtext('Bars') or '').split() + barId = barIds[trackIndex] if trackIndex < len(barIds) else None + bar = self.bars.get(barId) + self._readVoices(measure, bar) + track.measures.append(measure) + + def _readVoices(self, measure, bar): + voiceIds = [] + if bar is not None: + voiceIds = (bar.findtext('Voices') or '').split() + start = measure.start + for voiceId in voiceIds: + if voiceId == '-1': + continue + voice = gp.Voice(measure) + self._readBeats(voice, self.voices.get(voiceId), start) + measure.voices.append(voice) + # The model expects at least ``maxVoices`` voices per measure. + while len(measure.voices) < gp.Measure.maxVoices: + measure.voices.append(gp.Voice(measure)) + + def _readBeats(self, voice, voiceElement, start): + if voiceElement is None: + return start + for beatId in (voiceElement.findtext('Beats') or '').split(): + beatElement = self.beats.get(beatId) + if beatElement is None: + continue + beat = gp.Beat(voice) + beat.start = start + beat.duration = self._readDuration(beatElement) + beat.text = beatElement.findtext('FreeText') or None + self._readBeatNotes(beat, beatElement) + beat.status = gp.BeatStatus.normal if beat.notes else gp.BeatStatus.rest + voice.beats.append(beat) + start += beat.duration.time + return start + + def _readDuration(self, beatElement): + rhythm = None + ref = beatElement.find('Rhythm') + if ref is not None: + rhythm = self.rhythms.get(ref.get('ref')) + duration = gp.Duration() + if rhythm is None: + return duration + noteValue = rhythm.findtext('NoteValue') + duration.value = _NOTE_VALUES.get(noteValue, gp.Duration.quarter) + dots = rhythm.find('AugmentationDot') + if dots is not None and int(dots.get('count', 0)) > 0: + duration.isDotted = True + tuplet = rhythm.find('PrimaryTuplet') + if tuplet is not None: + duration.tuplet = gp.Tuplet( + enters=int(tuplet.get('num', 1)), + times=int(tuplet.get('den', 1)), + ) + return duration + + def _readBeatNotes(self, beat, beatElement): + dynamic = beatElement.findtext('Dynamic') + velocity = _DYNAMICS.get(dynamic, gp.Velocities.default) + noteIds = beatElement.findtext('Notes') + if not noteIds: + return + for noteId in noteIds.split(): + noteElement = self.notes.get(noteId) + if noteElement is None: + continue + beat.notes.append(self._readNote(beat, noteElement, velocity)) + + def _readNote(self, beat, noteElement, velocity): + note = gp.Note(beat, velocity=velocity, type=gp.NoteType.normal) + fret = self._property(noteElement, 'Fret') + if fret is not None: + note.value = int(fret.findtext('Fret') or 0) + string = self._property(noteElement, 'String') + if string is not None: + # GPIF strings are 0-based from the highest; the model is 1-based. + note.string = int(string.findtext('String') or 0) + 1 + if noteElement.find('Tie') is not None: + note.type = gp.NoteType.tie + self._readNoteEffects(note, noteElement) + return note + + def _readNoteEffects(self, note, noteElement): + effect = note.effect + + accent = noteElement.findtext('Accent') + if accent is not None: + flags = int(accent) + effect.staccato = bool(flags & _ACCENT_STACCATO) + effect.heavyAccentuatedNote = bool(flags & _ACCENT_HEAVY) + effect.accentuatedNote = bool(flags & _ACCENT_NORMAL) + + left = noteElement.findtext('LeftFingering') + if left in _FINGERING: + effect.leftHandFinger = _FINGERING[left] + right = noteElement.findtext('RightFingering') + if right in _FINGERING: + effect.rightHandFinger = _FINGERING[right] + + if self._property(noteElement, 'HopoOrigin') is not None: + effect.hammer = True + + slide = self._property(noteElement, 'Slide') + if slide is not None: + flags = int(slide.findtext('Flags') or 0) + effect.slides = [slideType + for bit, slideType in _SLIDE_FLAGS.items() + if flags & bit] + + if self._property(noteElement, 'Harmonic') is not None: + harmonicType = self._property(noteElement, 'HarmonicType') + name = (harmonicType.findtext('HType') if harmonicType is not None else None) + entry = _HARMONICS.get((name or '').lower()) + if entry is not None: + cls, hasFret = entry + harmonic = cls() + if hasFret: + fret = self._property(noteElement, 'HarmonicFret') + if fret is not None: + harmonic.fret = int(float(fret.findtext('HFret') or 0)) + effect.harmonic = harmonic + + +class GPIFWriter: + """Serializes a :class:`Song` into a ``score.gpif`` XML document. + + The model is a tree (measure -> voice -> beat -> note) while GPIF stores + flat, cross-referenced lists joined by ``id``. This writer hoists beats, + notes and rhythms into global tables, assigns ids and emits the reference + strings the format expects. It writes the same subset of the model that + :class:`GPIFParser` reads back. + """ + + def __init__(self, song): + self.song = song + self.root = ET.Element('GPIF') + # Global id counters and lists for the cross-referenced sections. + self._bars = [] + self._voices = [] + self._beats = [] + self._notes = [] + self._rhythms = [] + # Deduplicate rhythms by (value, dotted, tuplet). + self._rhythmIds = {} + + # -- helpers -------------------------------------------------------- + + @staticmethod + def _sub(parent, tag, text=None, **attrib): + element = ET.SubElement(parent, tag, {k: str(v) for k, v in attrib.items()}) + if text is not None: + element.text = str(text) + return element + + # -- entry point ---------------------------------------------------- + + def write(self): + self._sub(self.root, 'GPVersion', 6) + self._writeScoreInfo() + self._writeMasterTrack() + self._writeTracks() + self._writeMasterBars() + self._writeCollections() + return ET.tostring(self.root, encoding='UTF-8', xml_declaration=True) + + # -- score / master track ------------------------------------------ + + def _writeScoreInfo(self): + song = self.song + score = self._sub(self.root, 'Score') + self._sub(score, 'Title', song.title) + self._sub(score, 'SubTitle', song.subtitle) + self._sub(score, 'Artist', song.artist) + self._sub(score, 'Album', song.album) + self._sub(score, 'Words', song.words) + self._sub(score, 'Music', song.music) + self._sub(score, 'Copyright', song.copyright) + self._sub(score, 'Tabber', song.tab) + self._sub(score, 'Instructions', song.instructions) + self._sub(score, 'Notices', '\n'.join(song.notice)) + + def _writeMasterTrack(self): + master = self._sub(self.root, 'MasterTrack') + trackIds = ' '.join(str(i) for i in range(len(self.song.tracks))) + self._sub(master, 'Tracks', trackIds) + automations = self._sub(master, 'Automations') + automation = self._sub(automations, 'Automation') + self._sub(automation, 'Type', 'Tempo') + self._sub(automation, 'Linear', 'true') + self._sub(automation, 'Bar', 0) + self._sub(automation, 'Position', 0) + self._sub(automation, 'Visible', 'true') + self._sub(automation, 'Value', f'{self.song.tempo} 2') + + # -- tracks --------------------------------------------------------- + + def _writeTracks(self): + tracks = self._sub(self.root, 'Tracks') + for index, track in enumerate(self.song.tracks): + element = self._sub(tracks, 'Track', id=index) + self._sub(element, 'Name', track.name) + properties = self._sub(element, 'Properties') + tuning = self._sub(properties, 'Property', name='Tuning') + # The model orders strings high-to-low; GPIF lists pitches low-to-high. + pitches = ' '.join(str(s.value) for s in reversed(track.strings)) + self._sub(tuning, 'Pitches', pitches) + + # -- master bars / bars / voices / beats / notes / rhythms ---------- + + def _writeMasterBars(self): + masterBars = self._sub(self.root, 'MasterBars') + headers = self.song.measureHeaders + for measureIndex, header in enumerate(headers): + masterBar = self._sub(masterBars, 'MasterBar') + self._sub(masterBar, 'Key') + time = header.timeSignature + self._sub(masterBar, 'Time', f'{time.numerator}/{time.denominator.value}') + if header.isRepeatOpen or header.repeatClose >= 0: + attrib = {} + if header.isRepeatOpen: + attrib['start'] = 'true' + if header.repeatClose >= 0: + attrib['end'] = 'true' + attrib['count'] = str(header.repeatClose) + self._sub(masterBar, 'Repeat', **attrib) + # One bar per track; every track has a measure per header. + barIds = [self._writeBar(track.measures[measureIndex]) + for track in self.song.tracks] + self._sub(masterBar, 'Bars', ' '.join(str(i) for i in barIds)) + + def _writeBar(self, measure): + barId = len(self._bars) + bar = ET.Element('Bar', id=str(barId)) + self._bars.append(bar) + self._sub(bar, 'Clef', 'G2') + # GPIF always reserves four voice slots; -1 marks an unused one. + voiceIds = ['-1', '-1', '-1', '-1'] + slot = 0 + for voice in measure.voices: + if slot >= 4: + break + if voice.beats: + voiceIds[slot] = str(self._writeVoice(voice)) + slot += 1 + self._sub(bar, 'Voices', ' '.join(voiceIds)) + return barId + + def _writeVoice(self, voice): + voiceId = len(self._voices) + element = ET.Element('Voice', id=str(voiceId)) + self._voices.append(element) + beatIds = [self._writeBeat(beat) for beat in voice.beats] + self._sub(element, 'Beats', ' '.join(str(i) for i in beatIds)) + return voiceId + + def _writeBeat(self, beat): + beatId = len(self._beats) + element = ET.Element('Beat', id=str(beatId)) + self._beats.append(element) + if beat.notes: + velocity = beat.notes[0].velocity + self._sub(element, 'Dynamic', _DYNAMIC_NAMES.get(velocity, 'F')) + if beat.text: + self._sub(element, 'FreeText', beat.text) + rhythmId = self._writeRhythm(beat.duration) + self._sub(element, 'Rhythm', ref=rhythmId) + if beat.notes: + noteIds = [self._writeNote(note) for note in beat.notes] + self._sub(element, 'Notes', ' '.join(str(i) for i in noteIds)) + return beatId + + def _writeNote(self, note): + noteId = len(self._notes) + element = ET.Element('Note', id=str(noteId)) + self._notes.append(element) + self._writeNoteEffects(element, note) + properties = self._sub(element, 'Properties') + fret = self._sub(properties, 'Property', name='Fret') + self._sub(fret, 'Fret', note.value) + string = self._sub(properties, 'Property', name='String') + # The model is 1-based from the highest string; GPIF is 0-based. + self._sub(string, 'String', note.string - 1) + self._writeNoteProperties(properties, note) + if note.type is gp.NoteType.tie: + self._sub(element, 'Tie', origin='true') + return noteId + + def _writeNoteEffects(self, element, note): + """Write the note effects that are direct children of .""" + effect = note.effect + if effect.leftHandFinger in _FINGERING_NAMES: + self._sub(element, 'LeftFingering', _FINGERING_NAMES[effect.leftHandFinger]) + if effect.rightHandFinger in _FINGERING_NAMES: + self._sub(element, 'RightFingering', _FINGERING_NAMES[effect.rightHandFinger]) + flags = ((_ACCENT_STACCATO if effect.staccato else 0) + | (_ACCENT_HEAVY if effect.heavyAccentuatedNote else 0) + | (_ACCENT_NORMAL if effect.accentuatedNote else 0)) + if flags: + self._sub(element, 'Accent', flags) + + def _writeNoteProperties(self, properties, note): + """Write the note effects stored under .""" + effect = note.effect + if effect.hammer: + hopo = self._sub(properties, 'Property', name='HopoOrigin') + self._sub(hopo, 'Enable') + if effect.slides: + flags = sum(_SLIDE_BITS[s] for s in effect.slides if s in _SLIDE_BITS) + slide = self._sub(properties, 'Property', name='Slide') + self._sub(slide, 'Flags', flags) + if effect.harmonic is not None: + name = _HARMONIC_NAMES.get(type(effect.harmonic)) + if name is not None: + enable = self._sub(properties, 'Property', name='Harmonic') + self._sub(enable, 'Enable') + htype = self._sub(properties, 'Property', name='HarmonicType') + self._sub(htype, 'HType', name.capitalize()) + fret = getattr(effect.harmonic, 'fret', None) + hfret = self._sub(properties, 'Property', name='HarmonicFret') + self._sub(hfret, 'HFret', f'{float(fret if fret is not None else note.value):.6f}') + + def _writeRhythm(self, duration): + key = (duration.value, duration.isDotted, + duration.tuplet.enters, duration.tuplet.times) + if key in self._rhythmIds: + return self._rhythmIds[key] + rhythmId = len(self._rhythms) + element = ET.Element('Rhythm', id=str(rhythmId)) + self._rhythms.append(element) + self._sub(element, 'NoteValue', _NOTE_VALUE_NAMES.get(duration.value, 'Quarter')) + if duration.isDotted: + self._sub(element, 'AugmentationDot', count=1) + if (duration.tuplet.enters, duration.tuplet.times) != (1, 1): + self._sub(element, 'PrimaryTuplet', + num=duration.tuplet.enters, den=duration.tuplet.times) + self._rhythmIds[key] = str(rhythmId) + return str(rhythmId) + + def _writeCollections(self): + for tag, elements in (('Bars', self._bars), ('Voices', self._voices), + ('Beats', self._beats), ('Notes', self._notes), + ('Rhythms', self._rhythms)): + container = self._sub(self.root, tag) + container.extend(elements) diff --git a/src/guitarpro/gpx.py b/src/guitarpro/gpx.py new file mode 100644 index 0000000..c8f4bd7 --- /dev/null +++ b/src/guitarpro/gpx.py @@ -0,0 +1,292 @@ +"""Reader for Guitar Pro 6 (``.gpx``) and Guitar Pro 7 (``.gp``) files. + +Both formats wrap a ``score.gpif`` XML document inside a container: + +* GP6 ``.gpx`` -- a ``BCFZ``-compressed ``BCFS`` virtual filesystem. The + ``BCFZ`` layer is a custom bitstream LZ scheme; the ``BCFS`` layer is a + sector-based archive. The decompression algorithm is based on code + contributed by J. Jorgen von Bargen. +* GP7 ``.gp`` -- a plain ZIP archive with the score at + ``Content/score.gpif``. + +This module exposes :class:`GPXFile`, which mirrors the ``readSong`` and +``writeSong`` interface of the binary readers and delegates the +XML-to-:class:`Song` mapping to :mod:`guitarpro.gpif`. + +GP6 files can be written as well as read; the container is rebuilt as a +``BCFZ``-compressed ``BCFS`` archive holding a single ``score.gpif``. GP7 +files are written as a ZIP archive. +""" +import io +import struct +import zipfile + +from .gpif import GPIFParser, GPIFWriter +from .models import GPException + +__all__ = ('GPXFile', 'decompress', 'compress', 'extractGPIF') + +_HEADER_BCFS = b'BCFS' +_HEADER_BCFZ = b'BCFZ' +_SECTOR_SIZE = 0x1000 + + +class _EndOfStream(Exception): + """Raised when the BCFZ bitstream is exhausted.""" + + +class _BitReader: + """Reads individual bits from a byte string, most significant first.""" + + def __init__(self, data): + self.data = data + self.byte = 0 + self.bit = 0 + + def readBit(self): + if self.byte >= len(self.data): + raise _EndOfStream + result = (self.data[self.byte] >> (7 - self.bit)) & 1 + self.bit += 1 + if self.bit == 8: + self.bit = 0 + self.byte += 1 + return result + + def readBits(self, count): + """Read *count* bits, most significant first.""" + result = 0 + for _ in range(count): + result = (result << 1) | self.readBit() + return result + + def readBitsReversed(self, count): + """Read *count* bits, least significant first.""" + result = 0 + for i in range(count): + result |= self.readBit() << i + return result + + +def decompress(data): + """Decompress a ``BCFZ`` payload. + + :param data: the bytes following the ``BCFZ`` magic, starting with the + little-endian uncompressed length. + """ + expectedLength, = struct.unpack_from('> i) & 1) + + def writeBitsReversed(self, value, count): + """Write *count* bits of *value*, least significant first.""" + for i in range(count): + self.writeBit((value >> i) & 1) + + def writeBytes(self, data): + for byte in data: + self.writeBits(byte, 8) + + def getvalue(self): + if self.count: + # Flush the partial final byte, padding the low bits with zeros. + self.out.append(self.current << (8 - self.count)) + self.current = 0 + self.count = 0 + return bytes(self.out) + + +def compress(data): + """Compress *data* into a ``BCFZ`` payload (length prefix + bitstream). + + The BCFZ scheme allows back-references, but a stream of plain literal + runs is equally valid and decodes identically. Emitting literals only + keeps the encoder linear and simple; the modest size overhead (a 3-bit + header per three bytes) is acceptable for written files. + """ + writer = _BitWriter() + for offset in range(0, len(data), 3): + chunk = data[offset:offset + 3] + writer.writeBit(0) + writer.writeBitsReversed(len(chunk), 2) + writer.writeBytes(chunk) + return struct.pack(' Song: """Open a GP file and read its contents. @@ -94,9 +99,21 @@ def _open(song, stream, mode='rb', version=None, encoding=None): filename = getattr(fp, 'name', '') if mode == 'rb': + if not (hasattr(fp, 'seekable') and fp.seekable()): + # The magic peek below needs to rewind; buffer non-seekable streams. + fp = io.BytesIO(fp.read()) + magic = fp.read(4) + fp.seek(0) + if magic in _GPX_MAGICS: + gpfile = GPXFile(fp, encoding) + return gpfile, shouldClose gpfilebase = GPFileBase(fp, encoding) versionString = gpfilebase.readVersion() elif mode == 'wb': + gpxFormat = _gpxFormat(version, filename) + if gpxFormat is not None: + gpfile = GPXFile(fp, encoding, version=gpxFormat) + return gpfile, shouldClose isClipboard = song.clipboard is not None if version is None: version = song.versionTuple @@ -116,6 +133,17 @@ def getVersionAndGPFile(versionString): raise GPException(f"unsupported version '{versionString}'") +def _gpxFormat(version, filename): + """Return ``'gpx'``/``'gp'`` if *version* or *filename* targets GP6/GP7.""" + if version is not None: + return {(6, 0, 0): 'gpx', (7, 0, 0): 'gp'}.get(tuple(version)) + __, ext = os.path.splitext(filename) + ext = ext.lstrip('.').lower() + if ext in ('gpx', 'gp'): + return ext + return None + + def guessVersionByExtension(filename): __, ext = os.path.splitext(filename) ext = ext.lstrip('.') diff --git a/tests/A Simple Song.gpx b/tests/A Simple Song.gpx new file mode 100644 index 0000000..1d993ff Binary files /dev/null and b/tests/A Simple Song.gpx differ diff --git a/tests/Dear Song.gpx b/tests/Dear Song.gpx new file mode 100644 index 0000000..73e94d5 Binary files /dev/null and b/tests/Dear Song.gpx differ diff --git a/tests/test_gpx.py b/tests/test_gpx.py new file mode 100644 index 0000000..573e1a1 --- /dev/null +++ b/tests/test_gpx.py @@ -0,0 +1,182 @@ +import io +import zipfile +from pathlib import Path + +import pytest + +import guitarpro as gp +from guitarpro.gpx import decompress, extractGPIF + + +LOCATION = Path(__file__).parent +SAMPLE = LOCATION / 'A Simple Song.gpx' +# Triggers the zero-padded final byte in the BCFZ stream. +DEAR_SONG = LOCATION / 'Dear Song.gpx' + + +def test_extract_gpif_returns_xml(): + data = SAMPLE.read_bytes() + gpif = extractGPIF(data) + assert gpif.lstrip().startswith(b'' in gpif + + +def test_decompress_roundtrips_length(): + data = SAMPLE.read_bytes() + assert data[:4] == b'BCFZ' + decompressed = decompress(data[4:]) + assert decompressed[:4] == b'BCFS' + + +def test_parse_score_info(): + song = gp.parse(str(SAMPLE)) + assert song.title == 'A Simple Song' + assert song.artist == 'Hirokazu Sato (1966-2016)' + assert song.subtitle == 'www.classclef.com' + assert song.tempo == 65 + + +def test_parse_track_and_tuning(): + song = gp.parse(str(SAMPLE)) + assert len(song.tracks) == 1 + track = song.tracks[0] + assert track.name == 'Nylon Guitar' + # Standard tuning, high E to low E. + assert [s.value for s in track.strings] == [64, 59, 55, 50, 45, 40] + + +def test_parse_measures_and_beats(): + song = gp.parse(str(SAMPLE)) + track = song.tracks[0] + assert len(track.measures) == 31 + + # First measure: two eighth notes, time signature 1/4. + measure = track.measures[0] + assert measure.timeSignature.numerator == 1 + assert measure.timeSignature.denominator.value == 4 + beats = measure.voices[0].beats + assert len(beats) == 2 + assert all(b.duration.value == gp.Duration.eighth for b in beats) + assert beats[0].notes[0].value == 5 + assert beats[0].notes[0].string == 6 + + +def test_parse_zero_padded_stream(): + # The BCFZ payload of this file ends mid-byte, exercising the + # zero-padding path in the bit reader. + song = gp.parse(str(DEAR_SONG)) + assert song.title == 'Dear Song' + assert song.tempo == 55 + track = song.tracks[0] + assert len(track.measures) == 22 + # Compound and simple meters both appear. + signatures = {(m.timeSignature.numerator, m.timeSignature.denominator.value) + for m in track.measures} + assert (3, 8) in signatures + assert (6, 8) in signatures + # A dotted duration is present. + assert any(b.duration.isDotted + for m in track.measures + for v in m.voices + for b in v.beats) + + +def test_parse_gp7_zip_container(): + # A GP7 (.gp) file is a ZIP archive with the score at + # Content/score.gpif. Repackage a .gpx score into that layout and + # confirm it produces the same song. + gpif = extractGPIF(SAMPLE.read_bytes()) + buf = io.BytesIO() + with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as archive: + archive.writestr('VERSION', '7.0') + archive.writestr('Content/score.gpif', gpif) + buf.seek(0) + + song = gp.parse(buf) + reference = gp.parse(str(SAMPLE)) + assert song.title == reference.title + assert song == reference + + +def test_parse_non_seekable_stream(): + class NonSeekable: + def __init__(self, data): + self._stream = io.BytesIO(data) + + def read(self, size=-1): + return self._stream.read(size) + + def seekable(self): + return False + + song = gp.parse(NonSeekable(SAMPLE.read_bytes())) + assert song.title == 'A Simple Song' + + +def test_missing_score_raises(): + buf = io.BytesIO() + with zipfile.ZipFile(buf, 'w') as archive: + archive.writestr('VERSION', '7.0') + buf.seek(0) + with pytest.raises(gp.GPException): + gp.parse(buf) + + +def test_compress_decompress_roundtrip(): + from guitarpro.gpx import compress, decompress + gpif = extractGPIF(SAMPLE.read_bytes()) + assert decompress(compress(gpif)) == gpif + for payload in (b'', b'A', b'ABC', b'\x00\x00\x00\x00', bytes(range(256)) * 8): + assert decompress(compress(payload)) == payload + + +def test_decompress_stops_at_end_of_stream(): + # The declared length is only an upper bound; real GP6 streams end before + # reaching it. Decompression must stop at end-of-stream rather than loop + # forever on the zero-padded tail. + import struct + from guitarpro.gpx import compress, decompress + payload = b'the quick brown fox ' * 4 + blob = bytearray(compress(payload)) + struct.pack_into('