simple_ffmpeg_batch_io.AudioIO

Read/write audio frames or batches of audio frames from (compressed) file, including video file with audio stream(s), using FFmpeg backend.

This module defines the main AudioIO class used to open audio streams, read audio frames or batches of frames, and write processed outputs.

Authors

Dominique Vaufreydaz (inspired from original C++ code: https://github.com/Vaufreyd/ReadWriteVideosWithOpenCV)

  1"""
  2Read/write audio frames or batches of audio frames from (compressed) file, including video file with audio stream(s), using FFmpeg backend.
  3
  4This module defines the main `AudioIO` class used to open audio streams,
  5read audio frames or batches of frames, and write processed outputs.
  6
  7Authors
  8-------
  9Dominique Vaufreydaz (inspired from original C++ code: https://github.com/Vaufreyd/ReadWriteVideosWithOpenCV)
 10
 11"""
 12
 13__authors__ = ("Dominique Vaufreydaz")
 14
 15import sys
 16import subprocess as sp
 17import re
 18from enum import Enum
 19from typing import Union
 20
 21import numpy as np
 22
 23from .FrameCounter import FrameCounter
 24from .FrameContainer import FrameContainer
 25from .PipeMode import PipeMode
 26
 27# init static_ffmpeg at import time, first time it will download ffmpeg executables
 28import static_ffmpeg
 29static_ffmpeg.add_paths()
 30
 31class AudioIO:
 32    # "static" variables  to ffmpeg, ffprobe executables
 33    audioProgram, paramProgram = static_ffmpeg.run.get_or_fetch_platform_executables_else_raise()
 34
 35    class AudioIOException(Exception):
 36        """
 37        Dedicated exception class for AudioIO class.
 38        """
 39        def __init__(self, message="Error while reading/writing video occurs"):
 40            self.message = message
 41            super().__init__(self.message)
 42
 43    class AudioFormat(Enum):
 44        """
 45        Enum class for supported input video type: 32-bit float is the only supported type for the moment.
 46        """
 47        PCM32LE = 'pcm_f32le' # default format (unique mode for the moment)
 48
 49    @classmethod
 50    def reader(cls, filename, **kwargs):
 51        """
 52        Create and open an AudioIO object in reader mode
 53
 54        See ``AudioIO.open`` for the full list of accepted parameters.
 55        """
 56        reader = cls()
 57        reader.open(filename, **kwargs)
 58        return reader
 59
 60    @classmethod
 61    def writer(cls, filename, sample_rate, channels, **kwargs):
 62        """
 63        Create and open an AudioIO object in writer mode
 64
 65        See ``AudioIO.create`` for the full list of accepted parameters.
 66        """
 67        writer = cls()
 68        writer.create(filename, sample_rate, channels, **kwargs)
 69        return writer
 70
 71    # To use with context manager "with AudioIO.reader(...) as f:' for instance
 72    def __enter__(self):
 73        """
 74        Method call at initialisation of a context manager like "with AudioIO.reader/writer(...) as f:' for instance
 75        """
 76        # simply return myself
 77        return self
 78
 79    def __exit__(self, exc_type, exc_val, exc_tb):
 80        """
 81        Method call when existing of a context manager like "with AudioIO.reader/writer(...) as f:' for instance
 82        """
 83        # close AudioIO
 84        self.close()
 85        return False
 86
 87    @staticmethod
 88    def get_time_in_sec(filename, *, debug=False, logLevel=16):
 89        """
 90        Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).
 91
 92        Parameters
 93        ----------
 94        filename : str or path. 
 95            Raw audio waveform as a 1D array.
 96
 97        debug : bool (default False).
 98            Show debug info.
 99
100        log_level: int (default 16).
101            Log level to pass to the underlying ffmpeg/ffprobe command.
102        
103        Returns
104        ----------
105        float
106            Length in seconds of video file (including milliseconds as decimal part with 3 decimals)
107        """
108        
109        cmd = [AudioIO.paramProgram, # ffprobe
110                    '-hide_banner',
111                    '-loglevel', str(logLevel),
112                    '-show_entries', 'format=duration',
113                    '-of', 'default=noprint_wrappers=1:nokey=1',
114                    filename
115                    ]
116
117        if debug == True:
118            print(' '.join(cmd))
119
120        # call ffprobe and get params in one single line
121        lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg
122        output = lpipe.stdout.readlines()
123        lpipe.terminate()
124        # transform Bytes output to one single string
125        output = ''.join( [element.decode('utf-8') for element in output])
126
127        try:
128            return float(output)
129        except (ValueError, TypeError):
130            return None
131
132    @staticmethod
133    def get_params(filename, *, debug=False, logLevel=16):
134        """
135        Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.
136
137        Parameters
138        ----------
139        filename : str or path.
140            Raw audio waveform as a 1D array.
141
142        debug : bool (default (False).
143            Show debug info.
144
145        log_level: int (default 16).
146            Log level to pass to the underlying ffmpeg/ffprobe command.
147
148        Returns
149        ----------
150        tuple
151            Tuple containing (channels,sample_rate) of the file
152        """
153        cmd = [AudioIO.paramProgram, # ffprobe
154                    '-hide_banner',
155                    '-loglevel', str(logLevel),
156                    '-show_entries', 'stream=channels,sample_rate',
157                    filename
158                    ]
159
160        if debug == True:
161            print(' '.join(cmd))
162
163        # call ffprobe and get params in one single line
164        lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg
165        output = lpipe.stdout.readlines()
166        lpipe.terminate()
167        # transform Bytes output to one single string
168        output = ''.join( [element.decode('utf-8') for element in output])
169
170        pattern_sample_rate = r'sample_rate=(\d+)'
171        pattern_channels = r'channels=(\d+)'
172
173        # Search for values in the ffprobe output
174        match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE)
175        match_channels = re.search(pattern_channels, output, flags=re.MULTILINE)
176
177        # Extraction des valeurs
178        if match_sample_rate:
179            sample_rate = int(match_sample_rate.group(1))
180        else:
181            raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'")
182
183        if match_channels:
184            channels = int(match_channels.group(1))
185        else:
186            raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'")
187
188        return (channels,sample_rate)
189
190        # Attributes
191        mode: PipeMode
192        """ Pipemode of the current object (default PipeMode.UNK_MODE)"""
193
194        loglevel: int
195        """ loglevel of the underlying ffmpeg backend for this object (default 16)"""
196
197        debugModel: bool
198        """ debutMode flag for this object (print debut info, default False)"""
199
200        channels: int
201        """ Number of channels of images (default -1) """
202
203        sample_rate: int
204        """ sample_rate of images (default -1) """
205
206        plannar: bool
207        """ Read/write data as plannar, i.e. not interleaved (default True) """
208
209        pipe: sp.Popen
210        """ pipe object to ffmpeg/ffprobe (default None)"""
211
212        frame_size: int
213        """ Weight in bytes of one image (default -1)"""
214
215        filename: str
216        """ Filename of the file (default None)"""
217
218        frame_counter: FrameCounter
219        """ `Framecounter` object to count ellapsed time (default None)"""
220
221    def __init__(self, *, logLevel = 16, debugMode = False):
222        """
223        Create a VideoIO object giving ffmpeg/ffrobe loglevel and defining debug mode
224
225        Parameters
226        ----------
227        log_level: int (default 16)
228            Log level to pass to the underlying ffmpeg/ffprobe command.
229
230        debugMode: bool (default (False)
231            Show debug info. while processing video
232        """
233
234        self.mode = PipeMode.UNK_MODE
235        self.logLevel = logLevel
236        self.debugMode = debugMode
237
238        # Call init() method
239        self.init()
240
241    def init(self):
242        """
243        Init or reinit a VideoIO object.
244        """
245        self.channels  = -1
246        self.sample_rate = -1
247        self.plannar = True
248        self.pipe = None
249        self.frame_size = -1
250        self.filename = None
251        self.frame_counter = None
252
253    _repr_exclude = {"pipe"}
254    """ List of excluded attribute for string conversion. """
255
256    # converting the object to a string representation
257    def __repr__(self):
258        """
259        Convert object (excluding attributes in _repr_exclude) to string representation.
260        """
261        attrs = ", ".join(
262            f"{k}={v!r}"
263            for k, v in self.__dict__.items()
264            if k not in self._repr_exclude
265        )
266        return f"{self.__class__.__name__}({attrs})"
267
268    __str__ = __repr__
269    """ String representation """
270
271    def get_elapsed_time_as_str(self) -> str:
272        """
273        Method to get elapsed time (float value represented) as str.
274
275        Returns
276        ----------
277        str or None
278            Elapsed time (float value) as str, "15.500" for instance for 15 secondes and 500 milliseconds
279            None if no frame counter are available.
280        """
281        if self.frame_counter is None:
282            return None
283        return self.frame_counter.get_elapsed_time_as_str()
284
285    def get_formated_elapsed_time_as_str(self,show_ms=True) -> str:
286        """
287        Method to get elapsed time (hour format) as str.
288
289        Returns
290        ----------
291        str or None
292            Elapsed time (float value) as str, "00:00:15.500" for instance for 15 secondes and 500 milliseconds
293            None if no frame counter are available.
294        """
295        if self.frame_counter is None:
296            return None
297        return self.frame_counter.get_formated_elapsed_time_as_str()
298
299    def get_elapsed_time(self) -> float:
300        """
301        Method to get elapsed time as float value rounded to 3 decimals.
302
303        Returns
304        ----------
305        float or None
306            Elapsed time (float value) as str, 15.500 for instance for 15 secondes and 500 milliseconds
307            None if no frame counter are available.
308        """
309        if self.frame_counter is None:
310            return None
311        return self.frame_counter.get_elapsed_time()
312
313    def is_opened(self) -> bool:
314        """
315        Method to get status of the underlying pipe to ffmpeg.
316
317        Returns
318        ----------
319        bool
320            True if pipe is opened (reading or writing mode), False if not.
321        """
322        # is the pip opened?
323        if self.pipe is not None and self.pipe.poll() is None:
324            return True
325
326        return False
327
328    def close(self):
329        """
330        Method to close current pipe to ffmpeg (if any). Ffmpeg/ffprobe  will be terminated. Object can be reused using open or create methods.
331        """
332        if self.pipe is not None:
333            if self.mode == PipeMode.WRITE_MODE:
334                # killing will make ffmpeg not finish properly the job, close the pipe
335                # to let it know that no more data are comming
336                self.pipe.stdin.close()
337            else: # self.mode == PipeMode.READ_MODE
338                # in read mode, no need to be nice, send SIGTERM on Linux,/Kill it on windows
339                self.pipe.kill()
340
341            # wait for subprocess to end
342            self.pipe.wait()
343
344        # reinit object for later use
345        self.init()
346
347    def create( self, filename, sample_rate, channels, *, writeOverExistingFile = False,
348                outputEncoding = AudioFormat.PCM32LE, encodingParams = None, plannar = True ):
349        """
350        Method to create a audio file using parametrized access through ffmpeg. Importante note: calling create
351        on a AudioIO will close any former open video.
352
353        Parameters
354        ----------
355        filename: str or path
356            filename of path to the file (mp4, avi, ...)
357
358        sample_rate: int
359            If defined as a positive value, sample_rates of the output file will be set to this value.
360
361        channels: int
362            If defined as a positive value, number of channels of output file will be set to this value.
363
364        fps:
365            If defined as a positive value, fps of input video will be set to this value.
366
367        outputEncoding: AudioFormat optional (default AudioFormat.PCM32LE)
368            Define audio format for samples. Possible value is AudioFormat.PCM32LE.
369
370        encodingParams: str optional (default None)
371            Parameter to pass to ffmpeg to encode video like audio filters.
372
373        plannar : bool optionnal (default True)
374            Input data to write are grouped by channel if True, interleaved instead.
375
376        Returns
377        ----------
378        bool
379            Was the creation successfull
380        """
381
382        # Close if already opened
383        self.close()
384
385        # Set geometry/fps of the video stream from params
386        self.sample_rate = int(sample_rate)
387        self.channels = int(channels)
388        self.plannar = plannar
389
390        # Check params
391        if self.sample_rate <= 0 or self.channels <= 0:
392            raise self.AudioIOException("Bad parameters: sample_rate={}, channels={}".format(self.sample_rate,self.channels))
393
394        # To write audio, we do not need to know in advance frame size, we will write x values of n bytes
395        self.frame_size = None
396
397        # Video params are set, open the video
398        cmd = [self.audioProgram] # ffmpeg
399
400        if writeOverExistingFile == True:
401            cmd.extend(['-y'])
402
403        cmd.extend(['-hide_banner',
404            '-nostats',
405            '-loglevel', str(self.logLevel),
406            '-f', 'f32le', '-acodec', outputEncoding.value, # input expected coding
407            '-ar', f"{self.sample_rate}",
408            '-ac', f"{self.channels}",
409            '-i', '-'])
410
411        if encodingParams is not None:
412            cmd.extend(encodingParams.split())
413
414        # remove video
415        cmd.extend( ['-vn', filename ] )
416
417        if self.debugMode == True:
418            print( ' '.join(cmd), file=sys.stderr )
419
420        # store filename and set mode
421        self.filename = filename
422        self.mode = PipeMode.WRITE_MODE
423
424        # call ffmpeg in write mode
425        try:
426            self.pipe = sp.Popen(cmd, stdin=sp.PIPE)
427            self.frame_counter = FrameCounter(self.sample_rate)
428        except Exception as e:
429            # if pipe failed, reinit object and raise exception
430            self.init()
431            raise
432
433        return True
434
435    def open( self, filename, *, sample_rate = -1, channels = -1, inputEncoding = AudioFormat.PCM32LE,
436                    decodingParams = None, frame_size = 1.0, plannar = True, start_time = 0.0 ):
437        """
438        Method to read (video file containing) audio using parametrized access through ffmpeg. Importante note: calling open
439        on a AudioIO will close any former open file.
440
441        Parameters
442        ----------
443        filename: str or path
444            filename of path to the file (mp4, avi, ...)
445
446        sample_rate: int optional (default -1)
447            If defined as a positive value, sample rate of the input audio will be converted to this value.
448
449        channels: int optional (default -1)
450            If defined as a positive value, number of channels of the input audio will converted to this value.
451
452        inputEncoding: AudioFormat optional (default AudioFormat.PCM32LE)
453            Define audio format for samples. Possible value is AudioFormat.PCM32LE.
454
455        decodingParams: str optional (default None)
456            Parameter to pass to ffmpeg to decode video like audio filters.
457
458        plannar: bool optionnal (default True)
459            Group audio samples per channel if True. Else, samples are interleaved.
460
461        frame_size: int or float (default 1.0)
462            If frame_size is an int, it is the number of expected samples in each frame, for instance 8000 for 8000 samples.
463            if frame_size is a float, it is considered as a time in seconds for each audio frame, for instance 1.0 for 1 second, 0.010 for 10 ms.
464            Number of samples in this case is computed using frame_size and sample_rate as int(frame_size * sample_rate)
465
466        start_time: float optional (default 0.0)
467            Define the reading start time. If not set, reading at beginning of the file.
468
469        Returns
470        ----------
471        bool
472            Was the opening successfull
473        """
474
475        # Close if already opened
476        self.close()
477
478        # Force conversion of parameters
479        channels = int(channels)
480        sample_rate = float(sample_rate)
481
482        self.plannar = plannar
483
484        # get parameters from file if needed:
485        if sample_rate <= 0 or channels <= 0:
486            self.channels, self.sample_rate = self.getAudioParams(filename)
487
488        # check if parameters ask to overide video parameters
489        if channels > 0:
490            self.channels = channels
491        if sample_rate > 0:
492            self.sample_rate = sample_rate
493
494        # check parameters
495
496        if isinstance(frame_size,float):
497            # time in seconds
498            self.frame_size = int(frame_size*self.sample_rate)
499        elif isinstance(frame_size,int):
500            # number of samples
501            self.frame_size = frame_size
502        else:
503            # to do
504            pass
505
506        # Video params are set, open the video
507        cmd = [self.audioProgram, # ffmpeg
508                    '-hide_banner',
509                    '-nostats',
510                    '-loglevel', str(self.logLevel)]
511
512        if decodingParams is not None:
513            cmd.extend([decodingParams.split()])
514
515        if start_time < 0.0:
516            pass
517        elif start_time > 0.0:
518            cmd.extend(["-ss", f"{start_time}"])            
519
520        cmd.extend( ['-i', filename,
521                     '-f', 'f32le', '-acodec', inputEncoding.value, # input expected coding
522                     '-ar', f"{self.sample_rate}",
523                     '-ac', f"{self.channels}",
524                     '-' # output to stdout
525                    ]
526                )
527
528        if self.debugMode == True:
529            print( ' '.join(cmd) )
530
531        # store filename and set mode to READ_MODE
532        self.filename = filename
533        self.mode = PipeMode.READ_MODE
534
535        # call ffmpeg in read mode
536        try:
537            self.pipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg/ffprobe
538            self.frame_counter = FrameCounter(self.sample_rate)
539            if start_time > 0.0:
540                self.frame_counter += start_time # adding with float means adding time
541        except Exception as e:
542            # if pipe failed, reinit object and raise exception
543            self.init()
544            raise
545
546        return True
547
548    def read_frame(self, with_timestamps = False):
549        """
550        Read next frame from the audio file
551
552        Parameters
553        ----------
554        with_timestamps: bool optional (default False)
555            If set to True, the method returns a ``FrameContainer`` with the audio and an array containing the associated timestamp(s)
556
557        Returns
558        ----------
559        nparray or FrameContainer
560            A frame of shape (self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A frame
561            of shape (self.channels*self.frame_size) with interleaved data if self.plannar is False.
562            if with_timestamps is True, the return object is a FrameContainer with the audio data in ``FrameContainer.data`` and
563            the associated timestamp in ``FrameContainer.timestamps`` as an array (one element).
564        """
565
566        if self.pipe is None:
567            raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading a frame.".format(self.audioProgram))
568        # - pipe is in write mode
569        if self.mode != PipeMode.READ_MODE:
570            raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename))
571
572        if with_timestamps:
573            # get elapsed time in video, it is time of next frame(s)
574            current_elapsed_time = self.get_elapsed_time()
575
576        # read rgb image from pipe
577        toread = self.frame_size*4
578        buffer = self.pipe.stdout.read(toread)
579        if len(buffer) != toread:
580            # not considered as an error, no more frame, no exception
581            return None
582
583        # get numpy UINT8 array from buffer
584        audio = np.frombuffer(buffer, dtype = np.float32).reshape(self.frame_size, self.channels)
585
586        # make it plannar (or not)
587        if self.plannar:
588            #transpose it
589            audio = audio.T
590
591        # increase frame_counter
592        self.frame_counter.frame_count += (self.frame_size * self.channels)
593
594        # say to gc that this buffer is no longer needed
595        del buffer
596
597        if with_timestamps:
598            return FrameContainer(1, audio, self.frame_size/self.sample_rate, current_elapsed_time)
599        
600        return audio
601
602    def read_batch(self, numberOfFrames, with_timestamps = False):
603        """
604        Read next batch of audio from the file
605
606        Parameters
607        ----------
608        number_of_frames: int
609            Number of desired images within the batch. The last batch from the file may have less images.
610            
611        with_timestamps: bool optional (default False)
612            If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
613
614        Returns
615        ----------
616        nparray or FrameContainer
617            A batch of shape (n, self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A batch
618            of shape (n, self.channels*self.frame_size) with interleaved data if self.plannar is False.
619            if with_timestamps is True, the return object is a FrameContainer with the audio batch in ``FrameContainer.data`` and
620            the associated timestamp in ``FrameContainer.timestamps`` as an array (one element for each audio frame).
621        """
622
623        if self.pipe is None:
624            raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading frames.".format(self.audioProgram))
625        # - pipe is in write mode
626        if self.mode != PipeMode.READ_MODE:
627            raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename))
628
629        if with_timestamps:
630            # get elapsed time in video, it is time of next frame(s)
631            current_elapsed_time = self.get_elapsed_time()
632
633        # try to read complete batch
634        toread = self.frame_size*4*self.channels*numberOfFrames
635        buffer = self.pipe.stdout.read(toread)
636
637        # check if we have at least 1 Frame
638        if len(buffer) < toread:
639            # not considered as an error, no more frame, no exception
640            return None
641
642        # compute actual number of Frames
643        actualNbFrames = len(buffer)//(self.frame_size*4*self.channels)
644
645        # get and reshape batch from buffer
646        batch = np.frombuffer(buffer, dtype = np.float32).reshape((actualNbFrames, self.frame_size, self.channels,))
647
648        if self.plannar:
649            batch = batch.transpose(0, 2, 1)
650
651        # increase frame_counter
652        self.frame_counter.frame_count += (actualNbFrames * self.frame_size * self.channels)
653        
654        # say to gc that this buffer is no longer needed
655        del buffer
656
657        if with_timestamps:
658            return FrameContainer( actualNbFrames, batch, self.frame_size/self.sample_rate, current_elapsed_time)
659        
660        return batch
661
662    def write_frame(self, audio) -> bool:
663        """
664        Write an audio frame to the file
665
666        Parameters
667        ----------
668        audio: nparray
669            The audio frame to write to the video file of shape (self.channels,nb_samples_per_channel) if plannar is True else (self.channels*nb_samples_per_channel).
670
671        Returns
672        ----------
673        bool
674            Writing was successful or not.
675        """
676        # Check params
677        # - pipe exists
678        if self.pipe is None:
679            raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram))
680        # - pipe is in write mode
681        if self.mode != PipeMode.WRITE_MODE:
682            raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename))
683        # - shape of image is fine, thus we have pixels for a full compatible frame
684        if audio.shape[0] != self.channels:
685            raise self.AudioIOException("Wong audio shape: {} expected ({},{}).".format(audio.shape,self.channels,self.frame_size))
686        # - type of data is Float32
687        if audio.dtype != np.float32:
688            raise self.AudioIOException("Wong audio type: {} expected np.float32.".format(audio.dtype))
689
690        # array must have a shape (channels, samples), reshape it it to (samples, channels) if plannar
691        if not self.plannar:
692            audio = audio.reshape(-1)
693
694        # print( audio.shape )
695
696        # garantee to have a C continuous array
697        if not audio.flags['C_CONTIGUOUS']:
698            a = np.ascontiguousarray(a) 
699
700        # write frame
701        buffer = audio.tobytes()
702        if self.pipe.stdin.write( buffer ) < len(buffer):
703            print( f"Error writing frame to {self.filename}" )
704            return False
705
706        # increase frame_counter
707        self.frame_counter.frame_count += (self.frame_size * self.channels)
708
709        # say to gc that this buffer is no longer needed 
710        del buffer
711
712        return True
713
714    def write_batch(self, batch):
715        """
716        Write a batch of audio frame to the file
717
718        Parameters
719        ----------
720        batch: nparray
721            The batch of audio frames to write to the video file of shape (n,self.channels,nb_samples_per_channel) if plannar is True else (n,self.channels*nb_samples_per_channel) of interleaved audio data.
722
723        Returns
724        ----------
725        bool
726            Writing was successful or not.
727        """
728        # Check params
729        # - pipe exists
730        if self.pipe is None:
731            raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram))
732        # - pipe is in write mode
733        if self.mode != PipeMode.WRITE_MODE:
734            raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename))
735        # batch is 3D (n, channels, nb samples)
736        if batch.ndim !=3:
737            raise self.AudioIOException("Wrong batch shape: {} expected 3 dimensions (n, n_channels, n_samples_per_channel).".format(batch.shape))
738        # - shape of images in batch is fine
739        if batch.shape[2] != self.channels:
740            raise self.AudioIOException("Wrong audio channels in batch: {} expected {} {}.".format(batch.shape[2], self.channels, batch.shape))
741
742        # array must have a shape (n * n_channels * n_samples_per_channel) before writing them to pipe
743        # reshape it it to (n * n_channels * n_samples_per_channel) if plannar is False
744        if not self.plannar:
745            # goes from (n, n_channels, n_samples_per_channel) to (n * n_channels * n_samples_per_channel)
746            batch = batch.transpose(0, 2, 1) # first go to (n, n_samples_per_channel, n_channels)
747            batch = batch.reshape(-1) # then to 1D array (n * n_channels * n_samples_per_channel)
748
749        # garantee to have a C continuous array
750        if not batch.flags['C_CONTIGUOUS']:
751            batch = np.ascontiguousarray(batch)
752
753        # write frame
754        buffer = batch.tobytes()
755        if self.pipe.stdin.write( buffer ) < len(buffer):
756            # say to gc that this buffer is no longer needed
757            del buffer
758            raise self.AudioIOException("Error writing batch to '{}'.".format(self.filename))
759
760        # increase frame_counter
761        self.frame_counter.frame_count += int(batch.shape[0]/self.channels) # int conversion is mandatory to avoid confusion with time as float
762              
763        # say to gc that this buffer is no longer needed
764        del buffer
765
766        return True
767
768    def iter_frames(self, with_timestamps = False):
769        """
770        Method to iterate on audio frames using AudioIO obj.
771        for audio_frame in obj.iter_frames():
772            ....
773
774        Parameters
775        ----------
776        with_timestamps: bool optional (default False)
777            If set to True, the method returns a FrameContainer object with the batch and an array containing the associated timestamps to frames
778
779        Returns
780        ----------
781        nparray or FrameContainer
782            A batch of images of shape ()
783        """
784
785        try:
786            if self.mode == PipeMode.READ_MODE:
787                while self.isOpened():
788                    frame = self.readFrame(with_timestamps)
789                    if frame is not None:
790                        yield frame
791        finally:
792            self.close()
793
794    def iter_batches(self, batch_size : int, with_timestamps = False ):
795        """
796        Method to iterate on batch ofaudio  frames using VideoIO obj.
797        for audio_batch in obj.iter_batches():
798            ....
799
800        Parameters
801        ----------
802        with_timestamps: bool optional (default False)
803            If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
804        """
805        try:
806            if self.mode == PipeMode.READ_MODE:
807                while self.isOpened():
808                    batch = self.readBatch(batch_size, with_timestamps)
809                    if batch is not None:
810                        yield batch
811        finally:
812            self.close()
813
814    # function aliases to be compliant with original C++ version
815    getAudioTimeInSec = get_time_in_sec
816    getAudioParams = get_params
817    get_audio_time_in_sec = get_time_in_sec
818    get_audio_params = get_params
819    isOpened = is_opened
820    readFrame = read_frame
821    readBatch = read_batch
822    writeFrame = write_frame
823    writeBatch = write_batch
class AudioIO:
 32class AudioIO:
 33    # "static" variables  to ffmpeg, ffprobe executables
 34    audioProgram, paramProgram = static_ffmpeg.run.get_or_fetch_platform_executables_else_raise()
 35
 36    class AudioIOException(Exception):
 37        """
 38        Dedicated exception class for AudioIO class.
 39        """
 40        def __init__(self, message="Error while reading/writing video occurs"):
 41            self.message = message
 42            super().__init__(self.message)
 43
 44    class AudioFormat(Enum):
 45        """
 46        Enum class for supported input video type: 32-bit float is the only supported type for the moment.
 47        """
 48        PCM32LE = 'pcm_f32le' # default format (unique mode for the moment)
 49
 50    @classmethod
 51    def reader(cls, filename, **kwargs):
 52        """
 53        Create and open an AudioIO object in reader mode
 54
 55        See ``AudioIO.open`` for the full list of accepted parameters.
 56        """
 57        reader = cls()
 58        reader.open(filename, **kwargs)
 59        return reader
 60
 61    @classmethod
 62    def writer(cls, filename, sample_rate, channels, **kwargs):
 63        """
 64        Create and open an AudioIO object in writer mode
 65
 66        See ``AudioIO.create`` for the full list of accepted parameters.
 67        """
 68        writer = cls()
 69        writer.create(filename, sample_rate, channels, **kwargs)
 70        return writer
 71
 72    # To use with context manager "with AudioIO.reader(...) as f:' for instance
 73    def __enter__(self):
 74        """
 75        Method call at initialisation of a context manager like "with AudioIO.reader/writer(...) as f:' for instance
 76        """
 77        # simply return myself
 78        return self
 79
 80    def __exit__(self, exc_type, exc_val, exc_tb):
 81        """
 82        Method call when existing of a context manager like "with AudioIO.reader/writer(...) as f:' for instance
 83        """
 84        # close AudioIO
 85        self.close()
 86        return False
 87
 88    @staticmethod
 89    def get_time_in_sec(filename, *, debug=False, logLevel=16):
 90        """
 91        Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).
 92
 93        Parameters
 94        ----------
 95        filename : str or path. 
 96            Raw audio waveform as a 1D array.
 97
 98        debug : bool (default False).
 99            Show debug info.
100
101        log_level: int (default 16).
102            Log level to pass to the underlying ffmpeg/ffprobe command.
103        
104        Returns
105        ----------
106        float
107            Length in seconds of video file (including milliseconds as decimal part with 3 decimals)
108        """
109        
110        cmd = [AudioIO.paramProgram, # ffprobe
111                    '-hide_banner',
112                    '-loglevel', str(logLevel),
113                    '-show_entries', 'format=duration',
114                    '-of', 'default=noprint_wrappers=1:nokey=1',
115                    filename
116                    ]
117
118        if debug == True:
119            print(' '.join(cmd))
120
121        # call ffprobe and get params in one single line
122        lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg
123        output = lpipe.stdout.readlines()
124        lpipe.terminate()
125        # transform Bytes output to one single string
126        output = ''.join( [element.decode('utf-8') for element in output])
127
128        try:
129            return float(output)
130        except (ValueError, TypeError):
131            return None
132
133    @staticmethod
134    def get_params(filename, *, debug=False, logLevel=16):
135        """
136        Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.
137
138        Parameters
139        ----------
140        filename : str or path.
141            Raw audio waveform as a 1D array.
142
143        debug : bool (default (False).
144            Show debug info.
145
146        log_level: int (default 16).
147            Log level to pass to the underlying ffmpeg/ffprobe command.
148
149        Returns
150        ----------
151        tuple
152            Tuple containing (channels,sample_rate) of the file
153        """
154        cmd = [AudioIO.paramProgram, # ffprobe
155                    '-hide_banner',
156                    '-loglevel', str(logLevel),
157                    '-show_entries', 'stream=channels,sample_rate',
158                    filename
159                    ]
160
161        if debug == True:
162            print(' '.join(cmd))
163
164        # call ffprobe and get params in one single line
165        lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg
166        output = lpipe.stdout.readlines()
167        lpipe.terminate()
168        # transform Bytes output to one single string
169        output = ''.join( [element.decode('utf-8') for element in output])
170
171        pattern_sample_rate = r'sample_rate=(\d+)'
172        pattern_channels = r'channels=(\d+)'
173
174        # Search for values in the ffprobe output
175        match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE)
176        match_channels = re.search(pattern_channels, output, flags=re.MULTILINE)
177
178        # Extraction des valeurs
179        if match_sample_rate:
180            sample_rate = int(match_sample_rate.group(1))
181        else:
182            raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'")
183
184        if match_channels:
185            channels = int(match_channels.group(1))
186        else:
187            raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'")
188
189        return (channels,sample_rate)
190
191        # Attributes
192        mode: PipeMode
193        """ Pipemode of the current object (default PipeMode.UNK_MODE)"""
194
195        loglevel: int
196        """ loglevel of the underlying ffmpeg backend for this object (default 16)"""
197
198        debugModel: bool
199        """ debutMode flag for this object (print debut info, default False)"""
200
201        channels: int
202        """ Number of channels of images (default -1) """
203
204        sample_rate: int
205        """ sample_rate of images (default -1) """
206
207        plannar: bool
208        """ Read/write data as plannar, i.e. not interleaved (default True) """
209
210        pipe: sp.Popen
211        """ pipe object to ffmpeg/ffprobe (default None)"""
212
213        frame_size: int
214        """ Weight in bytes of one image (default -1)"""
215
216        filename: str
217        """ Filename of the file (default None)"""
218
219        frame_counter: FrameCounter
220        """ `Framecounter` object to count ellapsed time (default None)"""
221
222    def __init__(self, *, logLevel = 16, debugMode = False):
223        """
224        Create a VideoIO object giving ffmpeg/ffrobe loglevel and defining debug mode
225
226        Parameters
227        ----------
228        log_level: int (default 16)
229            Log level to pass to the underlying ffmpeg/ffprobe command.
230
231        debugMode: bool (default (False)
232            Show debug info. while processing video
233        """
234
235        self.mode = PipeMode.UNK_MODE
236        self.logLevel = logLevel
237        self.debugMode = debugMode
238
239        # Call init() method
240        self.init()
241
242    def init(self):
243        """
244        Init or reinit a VideoIO object.
245        """
246        self.channels  = -1
247        self.sample_rate = -1
248        self.plannar = True
249        self.pipe = None
250        self.frame_size = -1
251        self.filename = None
252        self.frame_counter = None
253
254    _repr_exclude = {"pipe"}
255    """ List of excluded attribute for string conversion. """
256
257    # converting the object to a string representation
258    def __repr__(self):
259        """
260        Convert object (excluding attributes in _repr_exclude) to string representation.
261        """
262        attrs = ", ".join(
263            f"{k}={v!r}"
264            for k, v in self.__dict__.items()
265            if k not in self._repr_exclude
266        )
267        return f"{self.__class__.__name__}({attrs})"
268
269    __str__ = __repr__
270    """ String representation """
271
272    def get_elapsed_time_as_str(self) -> str:
273        """
274        Method to get elapsed time (float value represented) as str.
275
276        Returns
277        ----------
278        str or None
279            Elapsed time (float value) as str, "15.500" for instance for 15 secondes and 500 milliseconds
280            None if no frame counter are available.
281        """
282        if self.frame_counter is None:
283            return None
284        return self.frame_counter.get_elapsed_time_as_str()
285
286    def get_formated_elapsed_time_as_str(self,show_ms=True) -> str:
287        """
288        Method to get elapsed time (hour format) as str.
289
290        Returns
291        ----------
292        str or None
293            Elapsed time (float value) as str, "00:00:15.500" for instance for 15 secondes and 500 milliseconds
294            None if no frame counter are available.
295        """
296        if self.frame_counter is None:
297            return None
298        return self.frame_counter.get_formated_elapsed_time_as_str()
299
300    def get_elapsed_time(self) -> float:
301        """
302        Method to get elapsed time as float value rounded to 3 decimals.
303
304        Returns
305        ----------
306        float or None
307            Elapsed time (float value) as str, 15.500 for instance for 15 secondes and 500 milliseconds
308            None if no frame counter are available.
309        """
310        if self.frame_counter is None:
311            return None
312        return self.frame_counter.get_elapsed_time()
313
314    def is_opened(self) -> bool:
315        """
316        Method to get status of the underlying pipe to ffmpeg.
317
318        Returns
319        ----------
320        bool
321            True if pipe is opened (reading or writing mode), False if not.
322        """
323        # is the pip opened?
324        if self.pipe is not None and self.pipe.poll() is None:
325            return True
326
327        return False
328
329    def close(self):
330        """
331        Method to close current pipe to ffmpeg (if any). Ffmpeg/ffprobe  will be terminated. Object can be reused using open or create methods.
332        """
333        if self.pipe is not None:
334            if self.mode == PipeMode.WRITE_MODE:
335                # killing will make ffmpeg not finish properly the job, close the pipe
336                # to let it know that no more data are comming
337                self.pipe.stdin.close()
338            else: # self.mode == PipeMode.READ_MODE
339                # in read mode, no need to be nice, send SIGTERM on Linux,/Kill it on windows
340                self.pipe.kill()
341
342            # wait for subprocess to end
343            self.pipe.wait()
344
345        # reinit object for later use
346        self.init()
347
348    def create( self, filename, sample_rate, channels, *, writeOverExistingFile = False,
349                outputEncoding = AudioFormat.PCM32LE, encodingParams = None, plannar = True ):
350        """
351        Method to create a audio file using parametrized access through ffmpeg. Importante note: calling create
352        on a AudioIO will close any former open video.
353
354        Parameters
355        ----------
356        filename: str or path
357            filename of path to the file (mp4, avi, ...)
358
359        sample_rate: int
360            If defined as a positive value, sample_rates of the output file will be set to this value.
361
362        channels: int
363            If defined as a positive value, number of channels of output file will be set to this value.
364
365        fps:
366            If defined as a positive value, fps of input video will be set to this value.
367
368        outputEncoding: AudioFormat optional (default AudioFormat.PCM32LE)
369            Define audio format for samples. Possible value is AudioFormat.PCM32LE.
370
371        encodingParams: str optional (default None)
372            Parameter to pass to ffmpeg to encode video like audio filters.
373
374        plannar : bool optionnal (default True)
375            Input data to write are grouped by channel if True, interleaved instead.
376
377        Returns
378        ----------
379        bool
380            Was the creation successfull
381        """
382
383        # Close if already opened
384        self.close()
385
386        # Set geometry/fps of the video stream from params
387        self.sample_rate = int(sample_rate)
388        self.channels = int(channels)
389        self.plannar = plannar
390
391        # Check params
392        if self.sample_rate <= 0 or self.channels <= 0:
393            raise self.AudioIOException("Bad parameters: sample_rate={}, channels={}".format(self.sample_rate,self.channels))
394
395        # To write audio, we do not need to know in advance frame size, we will write x values of n bytes
396        self.frame_size = None
397
398        # Video params are set, open the video
399        cmd = [self.audioProgram] # ffmpeg
400
401        if writeOverExistingFile == True:
402            cmd.extend(['-y'])
403
404        cmd.extend(['-hide_banner',
405            '-nostats',
406            '-loglevel', str(self.logLevel),
407            '-f', 'f32le', '-acodec', outputEncoding.value, # input expected coding
408            '-ar', f"{self.sample_rate}",
409            '-ac', f"{self.channels}",
410            '-i', '-'])
411
412        if encodingParams is not None:
413            cmd.extend(encodingParams.split())
414
415        # remove video
416        cmd.extend( ['-vn', filename ] )
417
418        if self.debugMode == True:
419            print( ' '.join(cmd), file=sys.stderr )
420
421        # store filename and set mode
422        self.filename = filename
423        self.mode = PipeMode.WRITE_MODE
424
425        # call ffmpeg in write mode
426        try:
427            self.pipe = sp.Popen(cmd, stdin=sp.PIPE)
428            self.frame_counter = FrameCounter(self.sample_rate)
429        except Exception as e:
430            # if pipe failed, reinit object and raise exception
431            self.init()
432            raise
433
434        return True
435
436    def open( self, filename, *, sample_rate = -1, channels = -1, inputEncoding = AudioFormat.PCM32LE,
437                    decodingParams = None, frame_size = 1.0, plannar = True, start_time = 0.0 ):
438        """
439        Method to read (video file containing) audio using parametrized access through ffmpeg. Importante note: calling open
440        on a AudioIO will close any former open file.
441
442        Parameters
443        ----------
444        filename: str or path
445            filename of path to the file (mp4, avi, ...)
446
447        sample_rate: int optional (default -1)
448            If defined as a positive value, sample rate of the input audio will be converted to this value.
449
450        channels: int optional (default -1)
451            If defined as a positive value, number of channels of the input audio will converted to this value.
452
453        inputEncoding: AudioFormat optional (default AudioFormat.PCM32LE)
454            Define audio format for samples. Possible value is AudioFormat.PCM32LE.
455
456        decodingParams: str optional (default None)
457            Parameter to pass to ffmpeg to decode video like audio filters.
458
459        plannar: bool optionnal (default True)
460            Group audio samples per channel if True. Else, samples are interleaved.
461
462        frame_size: int or float (default 1.0)
463            If frame_size is an int, it is the number of expected samples in each frame, for instance 8000 for 8000 samples.
464            if frame_size is a float, it is considered as a time in seconds for each audio frame, for instance 1.0 for 1 second, 0.010 for 10 ms.
465            Number of samples in this case is computed using frame_size and sample_rate as int(frame_size * sample_rate)
466
467        start_time: float optional (default 0.0)
468            Define the reading start time. If not set, reading at beginning of the file.
469
470        Returns
471        ----------
472        bool
473            Was the opening successfull
474        """
475
476        # Close if already opened
477        self.close()
478
479        # Force conversion of parameters
480        channels = int(channels)
481        sample_rate = float(sample_rate)
482
483        self.plannar = plannar
484
485        # get parameters from file if needed:
486        if sample_rate <= 0 or channels <= 0:
487            self.channels, self.sample_rate = self.getAudioParams(filename)
488
489        # check if parameters ask to overide video parameters
490        if channels > 0:
491            self.channels = channels
492        if sample_rate > 0:
493            self.sample_rate = sample_rate
494
495        # check parameters
496
497        if isinstance(frame_size,float):
498            # time in seconds
499            self.frame_size = int(frame_size*self.sample_rate)
500        elif isinstance(frame_size,int):
501            # number of samples
502            self.frame_size = frame_size
503        else:
504            # to do
505            pass
506
507        # Video params are set, open the video
508        cmd = [self.audioProgram, # ffmpeg
509                    '-hide_banner',
510                    '-nostats',
511                    '-loglevel', str(self.logLevel)]
512
513        if decodingParams is not None:
514            cmd.extend([decodingParams.split()])
515
516        if start_time < 0.0:
517            pass
518        elif start_time > 0.0:
519            cmd.extend(["-ss", f"{start_time}"])            
520
521        cmd.extend( ['-i', filename,
522                     '-f', 'f32le', '-acodec', inputEncoding.value, # input expected coding
523                     '-ar', f"{self.sample_rate}",
524                     '-ac', f"{self.channels}",
525                     '-' # output to stdout
526                    ]
527                )
528
529        if self.debugMode == True:
530            print( ' '.join(cmd) )
531
532        # store filename and set mode to READ_MODE
533        self.filename = filename
534        self.mode = PipeMode.READ_MODE
535
536        # call ffmpeg in read mode
537        try:
538            self.pipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg/ffprobe
539            self.frame_counter = FrameCounter(self.sample_rate)
540            if start_time > 0.0:
541                self.frame_counter += start_time # adding with float means adding time
542        except Exception as e:
543            # if pipe failed, reinit object and raise exception
544            self.init()
545            raise
546
547        return True
548
549    def read_frame(self, with_timestamps = False):
550        """
551        Read next frame from the audio file
552
553        Parameters
554        ----------
555        with_timestamps: bool optional (default False)
556            If set to True, the method returns a ``FrameContainer`` with the audio and an array containing the associated timestamp(s)
557
558        Returns
559        ----------
560        nparray or FrameContainer
561            A frame of shape (self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A frame
562            of shape (self.channels*self.frame_size) with interleaved data if self.plannar is False.
563            if with_timestamps is True, the return object is a FrameContainer with the audio data in ``FrameContainer.data`` and
564            the associated timestamp in ``FrameContainer.timestamps`` as an array (one element).
565        """
566
567        if self.pipe is None:
568            raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading a frame.".format(self.audioProgram))
569        # - pipe is in write mode
570        if self.mode != PipeMode.READ_MODE:
571            raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename))
572
573        if with_timestamps:
574            # get elapsed time in video, it is time of next frame(s)
575            current_elapsed_time = self.get_elapsed_time()
576
577        # read rgb image from pipe
578        toread = self.frame_size*4
579        buffer = self.pipe.stdout.read(toread)
580        if len(buffer) != toread:
581            # not considered as an error, no more frame, no exception
582            return None
583
584        # get numpy UINT8 array from buffer
585        audio = np.frombuffer(buffer, dtype = np.float32).reshape(self.frame_size, self.channels)
586
587        # make it plannar (or not)
588        if self.plannar:
589            #transpose it
590            audio = audio.T
591
592        # increase frame_counter
593        self.frame_counter.frame_count += (self.frame_size * self.channels)
594
595        # say to gc that this buffer is no longer needed
596        del buffer
597
598        if with_timestamps:
599            return FrameContainer(1, audio, self.frame_size/self.sample_rate, current_elapsed_time)
600        
601        return audio
602
603    def read_batch(self, numberOfFrames, with_timestamps = False):
604        """
605        Read next batch of audio from the file
606
607        Parameters
608        ----------
609        number_of_frames: int
610            Number of desired images within the batch. The last batch from the file may have less images.
611            
612        with_timestamps: bool optional (default False)
613            If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
614
615        Returns
616        ----------
617        nparray or FrameContainer
618            A batch of shape (n, self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A batch
619            of shape (n, self.channels*self.frame_size) with interleaved data if self.plannar is False.
620            if with_timestamps is True, the return object is a FrameContainer with the audio batch in ``FrameContainer.data`` and
621            the associated timestamp in ``FrameContainer.timestamps`` as an array (one element for each audio frame).
622        """
623
624        if self.pipe is None:
625            raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading frames.".format(self.audioProgram))
626        # - pipe is in write mode
627        if self.mode != PipeMode.READ_MODE:
628            raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename))
629
630        if with_timestamps:
631            # get elapsed time in video, it is time of next frame(s)
632            current_elapsed_time = self.get_elapsed_time()
633
634        # try to read complete batch
635        toread = self.frame_size*4*self.channels*numberOfFrames
636        buffer = self.pipe.stdout.read(toread)
637
638        # check if we have at least 1 Frame
639        if len(buffer) < toread:
640            # not considered as an error, no more frame, no exception
641            return None
642
643        # compute actual number of Frames
644        actualNbFrames = len(buffer)//(self.frame_size*4*self.channels)
645
646        # get and reshape batch from buffer
647        batch = np.frombuffer(buffer, dtype = np.float32).reshape((actualNbFrames, self.frame_size, self.channels,))
648
649        if self.plannar:
650            batch = batch.transpose(0, 2, 1)
651
652        # increase frame_counter
653        self.frame_counter.frame_count += (actualNbFrames * self.frame_size * self.channels)
654        
655        # say to gc that this buffer is no longer needed
656        del buffer
657
658        if with_timestamps:
659            return FrameContainer( actualNbFrames, batch, self.frame_size/self.sample_rate, current_elapsed_time)
660        
661        return batch
662
663    def write_frame(self, audio) -> bool:
664        """
665        Write an audio frame to the file
666
667        Parameters
668        ----------
669        audio: nparray
670            The audio frame to write to the video file of shape (self.channels,nb_samples_per_channel) if plannar is True else (self.channels*nb_samples_per_channel).
671
672        Returns
673        ----------
674        bool
675            Writing was successful or not.
676        """
677        # Check params
678        # - pipe exists
679        if self.pipe is None:
680            raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram))
681        # - pipe is in write mode
682        if self.mode != PipeMode.WRITE_MODE:
683            raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename))
684        # - shape of image is fine, thus we have pixels for a full compatible frame
685        if audio.shape[0] != self.channels:
686            raise self.AudioIOException("Wong audio shape: {} expected ({},{}).".format(audio.shape,self.channels,self.frame_size))
687        # - type of data is Float32
688        if audio.dtype != np.float32:
689            raise self.AudioIOException("Wong audio type: {} expected np.float32.".format(audio.dtype))
690
691        # array must have a shape (channels, samples), reshape it it to (samples, channels) if plannar
692        if not self.plannar:
693            audio = audio.reshape(-1)
694
695        # print( audio.shape )
696
697        # garantee to have a C continuous array
698        if not audio.flags['C_CONTIGUOUS']:
699            a = np.ascontiguousarray(a) 
700
701        # write frame
702        buffer = audio.tobytes()
703        if self.pipe.stdin.write( buffer ) < len(buffer):
704            print( f"Error writing frame to {self.filename}" )
705            return False
706
707        # increase frame_counter
708        self.frame_counter.frame_count += (self.frame_size * self.channels)
709
710        # say to gc that this buffer is no longer needed 
711        del buffer
712
713        return True
714
715    def write_batch(self, batch):
716        """
717        Write a batch of audio frame to the file
718
719        Parameters
720        ----------
721        batch: nparray
722            The batch of audio frames to write to the video file of shape (n,self.channels,nb_samples_per_channel) if plannar is True else (n,self.channels*nb_samples_per_channel) of interleaved audio data.
723
724        Returns
725        ----------
726        bool
727            Writing was successful or not.
728        """
729        # Check params
730        # - pipe exists
731        if self.pipe is None:
732            raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram))
733        # - pipe is in write mode
734        if self.mode != PipeMode.WRITE_MODE:
735            raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename))
736        # batch is 3D (n, channels, nb samples)
737        if batch.ndim !=3:
738            raise self.AudioIOException("Wrong batch shape: {} expected 3 dimensions (n, n_channels, n_samples_per_channel).".format(batch.shape))
739        # - shape of images in batch is fine
740        if batch.shape[2] != self.channels:
741            raise self.AudioIOException("Wrong audio channels in batch: {} expected {} {}.".format(batch.shape[2], self.channels, batch.shape))
742
743        # array must have a shape (n * n_channels * n_samples_per_channel) before writing them to pipe
744        # reshape it it to (n * n_channels * n_samples_per_channel) if plannar is False
745        if not self.plannar:
746            # goes from (n, n_channels, n_samples_per_channel) to (n * n_channels * n_samples_per_channel)
747            batch = batch.transpose(0, 2, 1) # first go to (n, n_samples_per_channel, n_channels)
748            batch = batch.reshape(-1) # then to 1D array (n * n_channels * n_samples_per_channel)
749
750        # garantee to have a C continuous array
751        if not batch.flags['C_CONTIGUOUS']:
752            batch = np.ascontiguousarray(batch)
753
754        # write frame
755        buffer = batch.tobytes()
756        if self.pipe.stdin.write( buffer ) < len(buffer):
757            # say to gc that this buffer is no longer needed
758            del buffer
759            raise self.AudioIOException("Error writing batch to '{}'.".format(self.filename))
760
761        # increase frame_counter
762        self.frame_counter.frame_count += int(batch.shape[0]/self.channels) # int conversion is mandatory to avoid confusion with time as float
763              
764        # say to gc that this buffer is no longer needed
765        del buffer
766
767        return True
768
769    def iter_frames(self, with_timestamps = False):
770        """
771        Method to iterate on audio frames using AudioIO obj.
772        for audio_frame in obj.iter_frames():
773            ....
774
775        Parameters
776        ----------
777        with_timestamps: bool optional (default False)
778            If set to True, the method returns a FrameContainer object with the batch and an array containing the associated timestamps to frames
779
780        Returns
781        ----------
782        nparray or FrameContainer
783            A batch of images of shape ()
784        """
785
786        try:
787            if self.mode == PipeMode.READ_MODE:
788                while self.isOpened():
789                    frame = self.readFrame(with_timestamps)
790                    if frame is not None:
791                        yield frame
792        finally:
793            self.close()
794
795    def iter_batches(self, batch_size : int, with_timestamps = False ):
796        """
797        Method to iterate on batch ofaudio  frames using VideoIO obj.
798        for audio_batch in obj.iter_batches():
799            ....
800
801        Parameters
802        ----------
803        with_timestamps: bool optional (default False)
804            If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
805        """
806        try:
807            if self.mode == PipeMode.READ_MODE:
808                while self.isOpened():
809                    batch = self.readBatch(batch_size, with_timestamps)
810                    if batch is not None:
811                        yield batch
812        finally:
813            self.close()
814
815    # function aliases to be compliant with original C++ version
816    getAudioTimeInSec = get_time_in_sec
817    getAudioParams = get_params
818    get_audio_time_in_sec = get_time_in_sec
819    get_audio_params = get_params
820    isOpened = is_opened
821    readFrame = read_frame
822    readBatch = read_batch
823    writeFrame = write_frame
824    writeBatch = write_batch
AudioIO(*, logLevel=16, debugMode=False)
222    def __init__(self, *, logLevel = 16, debugMode = False):
223        """
224        Create a VideoIO object giving ffmpeg/ffrobe loglevel and defining debug mode
225
226        Parameters
227        ----------
228        log_level: int (default 16)
229            Log level to pass to the underlying ffmpeg/ffprobe command.
230
231        debugMode: bool (default (False)
232            Show debug info. while processing video
233        """
234
235        self.mode = PipeMode.UNK_MODE
236        self.logLevel = logLevel
237        self.debugMode = debugMode
238
239        # Call init() method
240        self.init()

Create a VideoIO object giving ffmpeg/ffrobe loglevel and defining debug mode

Parameters

log_level: int (default 16) Log level to pass to the underlying ffmpeg/ffprobe command.

debugMode: bool (default (False) Show debug info. while processing video

@classmethod
def reader(cls, filename, **kwargs):
50    @classmethod
51    def reader(cls, filename, **kwargs):
52        """
53        Create and open an AudioIO object in reader mode
54
55        See ``AudioIO.open`` for the full list of accepted parameters.
56        """
57        reader = cls()
58        reader.open(filename, **kwargs)
59        return reader

Create and open an AudioIO object in reader mode

See AudioIO.open for the full list of accepted parameters.

@classmethod
def writer(cls, filename, sample_rate, channels, **kwargs):
61    @classmethod
62    def writer(cls, filename, sample_rate, channels, **kwargs):
63        """
64        Create and open an AudioIO object in writer mode
65
66        See ``AudioIO.create`` for the full list of accepted parameters.
67        """
68        writer = cls()
69        writer.create(filename, sample_rate, channels, **kwargs)
70        return writer

Create and open an AudioIO object in writer mode

See AudioIO.create for the full list of accepted parameters.

@staticmethod
def get_time_in_sec(filename, *, debug=False, logLevel=16):
 88    @staticmethod
 89    def get_time_in_sec(filename, *, debug=False, logLevel=16):
 90        """
 91        Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).
 92
 93        Parameters
 94        ----------
 95        filename : str or path. 
 96            Raw audio waveform as a 1D array.
 97
 98        debug : bool (default False).
 99            Show debug info.
100
101        log_level: int (default 16).
102            Log level to pass to the underlying ffmpeg/ffprobe command.
103        
104        Returns
105        ----------
106        float
107            Length in seconds of video file (including milliseconds as decimal part with 3 decimals)
108        """
109        
110        cmd = [AudioIO.paramProgram, # ffprobe
111                    '-hide_banner',
112                    '-loglevel', str(logLevel),
113                    '-show_entries', 'format=duration',
114                    '-of', 'default=noprint_wrappers=1:nokey=1',
115                    filename
116                    ]
117
118        if debug == True:
119            print(' '.join(cmd))
120
121        # call ffprobe and get params in one single line
122        lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg
123        output = lpipe.stdout.readlines()
124        lpipe.terminate()
125        # transform Bytes output to one single string
126        output = ''.join( [element.decode('utf-8') for element in output])
127
128        try:
129            return float(output)
130        except (ValueError, TypeError):
131            return None

Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).

Parameters

filename : str or path. Raw audio waveform as a 1D array.

debug : bool (default False). Show debug info.

log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.

Returns

float Length in seconds of video file (including milliseconds as decimal part with 3 decimals)

@staticmethod
def get_params(filename, *, debug=False, logLevel=16):
133    @staticmethod
134    def get_params(filename, *, debug=False, logLevel=16):
135        """
136        Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.
137
138        Parameters
139        ----------
140        filename : str or path.
141            Raw audio waveform as a 1D array.
142
143        debug : bool (default (False).
144            Show debug info.
145
146        log_level: int (default 16).
147            Log level to pass to the underlying ffmpeg/ffprobe command.
148
149        Returns
150        ----------
151        tuple
152            Tuple containing (channels,sample_rate) of the file
153        """
154        cmd = [AudioIO.paramProgram, # ffprobe
155                    '-hide_banner',
156                    '-loglevel', str(logLevel),
157                    '-show_entries', 'stream=channels,sample_rate',
158                    filename
159                    ]
160
161        if debug == True:
162            print(' '.join(cmd))
163
164        # call ffprobe and get params in one single line
165        lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg
166        output = lpipe.stdout.readlines()
167        lpipe.terminate()
168        # transform Bytes output to one single string
169        output = ''.join( [element.decode('utf-8') for element in output])
170
171        pattern_sample_rate = r'sample_rate=(\d+)'
172        pattern_channels = r'channels=(\d+)'
173
174        # Search for values in the ffprobe output
175        match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE)
176        match_channels = re.search(pattern_channels, output, flags=re.MULTILINE)
177
178        # Extraction des valeurs
179        if match_sample_rate:
180            sample_rate = int(match_sample_rate.group(1))
181        else:
182            raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'")
183
184        if match_channels:
185            channels = int(match_channels.group(1))
186        else:
187            raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'")
188
189        return (channels,sample_rate)
190
191        # Attributes
192        mode: PipeMode
193        """ Pipemode of the current object (default PipeMode.UNK_MODE)"""
194
195        loglevel: int
196        """ loglevel of the underlying ffmpeg backend for this object (default 16)"""
197
198        debugModel: bool
199        """ debutMode flag for this object (print debut info, default False)"""
200
201        channels: int
202        """ Number of channels of images (default -1) """
203
204        sample_rate: int
205        """ sample_rate of images (default -1) """
206
207        plannar: bool
208        """ Read/write data as plannar, i.e. not interleaved (default True) """
209
210        pipe: sp.Popen
211        """ pipe object to ffmpeg/ffprobe (default None)"""
212
213        frame_size: int
214        """ Weight in bytes of one image (default -1)"""
215
216        filename: str
217        """ Filename of the file (default None)"""
218
219        frame_counter: FrameCounter
220        """ `Framecounter` object to count ellapsed time (default None)"""

Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.

Parameters

filename : str or path. Raw audio waveform as a 1D array.

debug : bool (default (False). Show debug info.

log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.

Returns

tuple Tuple containing (channels,sample_rate) of the file

mode
logLevel
debugMode
def init(self):
242    def init(self):
243        """
244        Init or reinit a VideoIO object.
245        """
246        self.channels  = -1
247        self.sample_rate = -1
248        self.plannar = True
249        self.pipe = None
250        self.frame_size = -1
251        self.filename = None
252        self.frame_counter = None

Init or reinit a VideoIO object.

def get_elapsed_time_as_str(self) -> str:
272    def get_elapsed_time_as_str(self) -> str:
273        """
274        Method to get elapsed time (float value represented) as str.
275
276        Returns
277        ----------
278        str or None
279            Elapsed time (float value) as str, "15.500" for instance for 15 secondes and 500 milliseconds
280            None if no frame counter are available.
281        """
282        if self.frame_counter is None:
283            return None
284        return self.frame_counter.get_elapsed_time_as_str()

Method to get elapsed time (float value represented) as str.

Returns

str or None Elapsed time (float value) as str, "15.500" for instance for 15 secondes and 500 milliseconds None if no frame counter are available.

def get_formated_elapsed_time_as_str(self, show_ms=True) -> str:
286    def get_formated_elapsed_time_as_str(self,show_ms=True) -> str:
287        """
288        Method to get elapsed time (hour format) as str.
289
290        Returns
291        ----------
292        str or None
293            Elapsed time (float value) as str, "00:00:15.500" for instance for 15 secondes and 500 milliseconds
294            None if no frame counter are available.
295        """
296        if self.frame_counter is None:
297            return None
298        return self.frame_counter.get_formated_elapsed_time_as_str()

Method to get elapsed time (hour format) as str.

Returns

str or None Elapsed time (float value) as str, "00:00:15.500" for instance for 15 secondes and 500 milliseconds None if no frame counter are available.

def get_elapsed_time(self) -> float:
300    def get_elapsed_time(self) -> float:
301        """
302        Method to get elapsed time as float value rounded to 3 decimals.
303
304        Returns
305        ----------
306        float or None
307            Elapsed time (float value) as str, 15.500 for instance for 15 secondes and 500 milliseconds
308            None if no frame counter are available.
309        """
310        if self.frame_counter is None:
311            return None
312        return self.frame_counter.get_elapsed_time()

Method to get elapsed time as float value rounded to 3 decimals.

Returns

float or None Elapsed time (float value) as str, 15.500 for instance for 15 secondes and 500 milliseconds None if no frame counter are available.

def is_opened(self) -> bool:
314    def is_opened(self) -> bool:
315        """
316        Method to get status of the underlying pipe to ffmpeg.
317
318        Returns
319        ----------
320        bool
321            True if pipe is opened (reading or writing mode), False if not.
322        """
323        # is the pip opened?
324        if self.pipe is not None and self.pipe.poll() is None:
325            return True
326
327        return False

Method to get status of the underlying pipe to ffmpeg.

Returns

bool True if pipe is opened (reading or writing mode), False if not.

def close(self):
329    def close(self):
330        """
331        Method to close current pipe to ffmpeg (if any). Ffmpeg/ffprobe  will be terminated. Object can be reused using open or create methods.
332        """
333        if self.pipe is not None:
334            if self.mode == PipeMode.WRITE_MODE:
335                # killing will make ffmpeg not finish properly the job, close the pipe
336                # to let it know that no more data are comming
337                self.pipe.stdin.close()
338            else: # self.mode == PipeMode.READ_MODE
339                # in read mode, no need to be nice, send SIGTERM on Linux,/Kill it on windows
340                self.pipe.kill()
341
342            # wait for subprocess to end
343            self.pipe.wait()
344
345        # reinit object for later use
346        self.init()

Method to close current pipe to ffmpeg (if any). Ffmpeg/ffprobe will be terminated. Object can be reused using open or create methods.

def create( self, filename, sample_rate, channels, *, writeOverExistingFile=False, outputEncoding=<AudioFormat.PCM32LE: 'pcm_f32le'>, encodingParams=None, plannar=True):
348    def create( self, filename, sample_rate, channels, *, writeOverExistingFile = False,
349                outputEncoding = AudioFormat.PCM32LE, encodingParams = None, plannar = True ):
350        """
351        Method to create a audio file using parametrized access through ffmpeg. Importante note: calling create
352        on a AudioIO will close any former open video.
353
354        Parameters
355        ----------
356        filename: str or path
357            filename of path to the file (mp4, avi, ...)
358
359        sample_rate: int
360            If defined as a positive value, sample_rates of the output file will be set to this value.
361
362        channels: int
363            If defined as a positive value, number of channels of output file will be set to this value.
364
365        fps:
366            If defined as a positive value, fps of input video will be set to this value.
367
368        outputEncoding: AudioFormat optional (default AudioFormat.PCM32LE)
369            Define audio format for samples. Possible value is AudioFormat.PCM32LE.
370
371        encodingParams: str optional (default None)
372            Parameter to pass to ffmpeg to encode video like audio filters.
373
374        plannar : bool optionnal (default True)
375            Input data to write are grouped by channel if True, interleaved instead.
376
377        Returns
378        ----------
379        bool
380            Was the creation successfull
381        """
382
383        # Close if already opened
384        self.close()
385
386        # Set geometry/fps of the video stream from params
387        self.sample_rate = int(sample_rate)
388        self.channels = int(channels)
389        self.plannar = plannar
390
391        # Check params
392        if self.sample_rate <= 0 or self.channels <= 0:
393            raise self.AudioIOException("Bad parameters: sample_rate={}, channels={}".format(self.sample_rate,self.channels))
394
395        # To write audio, we do not need to know in advance frame size, we will write x values of n bytes
396        self.frame_size = None
397
398        # Video params are set, open the video
399        cmd = [self.audioProgram] # ffmpeg
400
401        if writeOverExistingFile == True:
402            cmd.extend(['-y'])
403
404        cmd.extend(['-hide_banner',
405            '-nostats',
406            '-loglevel', str(self.logLevel),
407            '-f', 'f32le', '-acodec', outputEncoding.value, # input expected coding
408            '-ar', f"{self.sample_rate}",
409            '-ac', f"{self.channels}",
410            '-i', '-'])
411
412        if encodingParams is not None:
413            cmd.extend(encodingParams.split())
414
415        # remove video
416        cmd.extend( ['-vn', filename ] )
417
418        if self.debugMode == True:
419            print( ' '.join(cmd), file=sys.stderr )
420
421        # store filename and set mode
422        self.filename = filename
423        self.mode = PipeMode.WRITE_MODE
424
425        # call ffmpeg in write mode
426        try:
427            self.pipe = sp.Popen(cmd, stdin=sp.PIPE)
428            self.frame_counter = FrameCounter(self.sample_rate)
429        except Exception as e:
430            # if pipe failed, reinit object and raise exception
431            self.init()
432            raise
433
434        return True

Method to create a audio file using parametrized access through ffmpeg. Importante note: calling create on a AudioIO will close any former open video.

Parameters

filename: str or path filename of path to the file (mp4, avi, ...)

sample_rate: int If defined as a positive value, sample_rates of the output file will be set to this value.

channels: int If defined as a positive value, number of channels of output file will be set to this value.

fps: If defined as a positive value, fps of input video will be set to this value.

outputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) Define audio format for samples. Possible value is AudioFormat.PCM32LE.

encodingParams: str optional (default None) Parameter to pass to ffmpeg to encode video like audio filters.

plannar : bool optionnal (default True) Input data to write are grouped by channel if True, interleaved instead.

Returns

bool Was the creation successfull

def open( self, filename, *, sample_rate=-1, channels=-1, inputEncoding=<AudioFormat.PCM32LE: 'pcm_f32le'>, decodingParams=None, frame_size=1.0, plannar=True, start_time=0.0):
436    def open( self, filename, *, sample_rate = -1, channels = -1, inputEncoding = AudioFormat.PCM32LE,
437                    decodingParams = None, frame_size = 1.0, plannar = True, start_time = 0.0 ):
438        """
439        Method to read (video file containing) audio using parametrized access through ffmpeg. Importante note: calling open
440        on a AudioIO will close any former open file.
441
442        Parameters
443        ----------
444        filename: str or path
445            filename of path to the file (mp4, avi, ...)
446
447        sample_rate: int optional (default -1)
448            If defined as a positive value, sample rate of the input audio will be converted to this value.
449
450        channels: int optional (default -1)
451            If defined as a positive value, number of channels of the input audio will converted to this value.
452
453        inputEncoding: AudioFormat optional (default AudioFormat.PCM32LE)
454            Define audio format for samples. Possible value is AudioFormat.PCM32LE.
455
456        decodingParams: str optional (default None)
457            Parameter to pass to ffmpeg to decode video like audio filters.
458
459        plannar: bool optionnal (default True)
460            Group audio samples per channel if True. Else, samples are interleaved.
461
462        frame_size: int or float (default 1.0)
463            If frame_size is an int, it is the number of expected samples in each frame, for instance 8000 for 8000 samples.
464            if frame_size is a float, it is considered as a time in seconds for each audio frame, for instance 1.0 for 1 second, 0.010 for 10 ms.
465            Number of samples in this case is computed using frame_size and sample_rate as int(frame_size * sample_rate)
466
467        start_time: float optional (default 0.0)
468            Define the reading start time. If not set, reading at beginning of the file.
469
470        Returns
471        ----------
472        bool
473            Was the opening successfull
474        """
475
476        # Close if already opened
477        self.close()
478
479        # Force conversion of parameters
480        channels = int(channels)
481        sample_rate = float(sample_rate)
482
483        self.plannar = plannar
484
485        # get parameters from file if needed:
486        if sample_rate <= 0 or channels <= 0:
487            self.channels, self.sample_rate = self.getAudioParams(filename)
488
489        # check if parameters ask to overide video parameters
490        if channels > 0:
491            self.channels = channels
492        if sample_rate > 0:
493            self.sample_rate = sample_rate
494
495        # check parameters
496
497        if isinstance(frame_size,float):
498            # time in seconds
499            self.frame_size = int(frame_size*self.sample_rate)
500        elif isinstance(frame_size,int):
501            # number of samples
502            self.frame_size = frame_size
503        else:
504            # to do
505            pass
506
507        # Video params are set, open the video
508        cmd = [self.audioProgram, # ffmpeg
509                    '-hide_banner',
510                    '-nostats',
511                    '-loglevel', str(self.logLevel)]
512
513        if decodingParams is not None:
514            cmd.extend([decodingParams.split()])
515
516        if start_time < 0.0:
517            pass
518        elif start_time > 0.0:
519            cmd.extend(["-ss", f"{start_time}"])            
520
521        cmd.extend( ['-i', filename,
522                     '-f', 'f32le', '-acodec', inputEncoding.value, # input expected coding
523                     '-ar', f"{self.sample_rate}",
524                     '-ac', f"{self.channels}",
525                     '-' # output to stdout
526                    ]
527                )
528
529        if self.debugMode == True:
530            print( ' '.join(cmd) )
531
532        # store filename and set mode to READ_MODE
533        self.filename = filename
534        self.mode = PipeMode.READ_MODE
535
536        # call ffmpeg in read mode
537        try:
538            self.pipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg/ffprobe
539            self.frame_counter = FrameCounter(self.sample_rate)
540            if start_time > 0.0:
541                self.frame_counter += start_time # adding with float means adding time
542        except Exception as e:
543            # if pipe failed, reinit object and raise exception
544            self.init()
545            raise
546
547        return True

Method to read (video file containing) audio using parametrized access through ffmpeg. Importante note: calling open on a AudioIO will close any former open file.

Parameters

filename: str or path filename of path to the file (mp4, avi, ...)

sample_rate: int optional (default -1) If defined as a positive value, sample rate of the input audio will be converted to this value.

channels: int optional (default -1) If defined as a positive value, number of channels of the input audio will converted to this value.

inputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) Define audio format for samples. Possible value is AudioFormat.PCM32LE.

decodingParams: str optional (default None) Parameter to pass to ffmpeg to decode video like audio filters.

plannar: bool optionnal (default True) Group audio samples per channel if True. Else, samples are interleaved.

frame_size: int or float (default 1.0) If frame_size is an int, it is the number of expected samples in each frame, for instance 8000 for 8000 samples. if frame_size is a float, it is considered as a time in seconds for each audio frame, for instance 1.0 for 1 second, 0.010 for 10 ms. Number of samples in this case is computed using frame_size and sample_rate as int(frame_size * sample_rate)

start_time: float optional (default 0.0) Define the reading start time. If not set, reading at beginning of the file.

Returns

bool Was the opening successfull

def read_frame(self, with_timestamps=False):
549    def read_frame(self, with_timestamps = False):
550        """
551        Read next frame from the audio file
552
553        Parameters
554        ----------
555        with_timestamps: bool optional (default False)
556            If set to True, the method returns a ``FrameContainer`` with the audio and an array containing the associated timestamp(s)
557
558        Returns
559        ----------
560        nparray or FrameContainer
561            A frame of shape (self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A frame
562            of shape (self.channels*self.frame_size) with interleaved data if self.plannar is False.
563            if with_timestamps is True, the return object is a FrameContainer with the audio data in ``FrameContainer.data`` and
564            the associated timestamp in ``FrameContainer.timestamps`` as an array (one element).
565        """
566
567        if self.pipe is None:
568            raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading a frame.".format(self.audioProgram))
569        # - pipe is in write mode
570        if self.mode != PipeMode.READ_MODE:
571            raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename))
572
573        if with_timestamps:
574            # get elapsed time in video, it is time of next frame(s)
575            current_elapsed_time = self.get_elapsed_time()
576
577        # read rgb image from pipe
578        toread = self.frame_size*4
579        buffer = self.pipe.stdout.read(toread)
580        if len(buffer) != toread:
581            # not considered as an error, no more frame, no exception
582            return None
583
584        # get numpy UINT8 array from buffer
585        audio = np.frombuffer(buffer, dtype = np.float32).reshape(self.frame_size, self.channels)
586
587        # make it plannar (or not)
588        if self.plannar:
589            #transpose it
590            audio = audio.T
591
592        # increase frame_counter
593        self.frame_counter.frame_count += (self.frame_size * self.channels)
594
595        # say to gc that this buffer is no longer needed
596        del buffer
597
598        if with_timestamps:
599            return FrameContainer(1, audio, self.frame_size/self.sample_rate, current_elapsed_time)
600        
601        return audio

Read next frame from the audio file

Parameters

with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer with the audio and an array containing the associated timestamp(s)

Returns

nparray or FrameContainer A frame of shape (self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A frame of shape (self.channels*self.frame_size) with interleaved data if self.plannar is False. if with_timestamps is True, the return object is a FrameContainer with the audio data in FrameContainer.data and the associated timestamp in FrameContainer.timestamps as an array (one element).

def read_batch(self, numberOfFrames, with_timestamps=False):
603    def read_batch(self, numberOfFrames, with_timestamps = False):
604        """
605        Read next batch of audio from the file
606
607        Parameters
608        ----------
609        number_of_frames: int
610            Number of desired images within the batch. The last batch from the file may have less images.
611            
612        with_timestamps: bool optional (default False)
613            If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
614
615        Returns
616        ----------
617        nparray or FrameContainer
618            A batch of shape (n, self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A batch
619            of shape (n, self.channels*self.frame_size) with interleaved data if self.plannar is False.
620            if with_timestamps is True, the return object is a FrameContainer with the audio batch in ``FrameContainer.data`` and
621            the associated timestamp in ``FrameContainer.timestamps`` as an array (one element for each audio frame).
622        """
623
624        if self.pipe is None:
625            raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading frames.".format(self.audioProgram))
626        # - pipe is in write mode
627        if self.mode != PipeMode.READ_MODE:
628            raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename))
629
630        if with_timestamps:
631            # get elapsed time in video, it is time of next frame(s)
632            current_elapsed_time = self.get_elapsed_time()
633
634        # try to read complete batch
635        toread = self.frame_size*4*self.channels*numberOfFrames
636        buffer = self.pipe.stdout.read(toread)
637
638        # check if we have at least 1 Frame
639        if len(buffer) < toread:
640            # not considered as an error, no more frame, no exception
641            return None
642
643        # compute actual number of Frames
644        actualNbFrames = len(buffer)//(self.frame_size*4*self.channels)
645
646        # get and reshape batch from buffer
647        batch = np.frombuffer(buffer, dtype = np.float32).reshape((actualNbFrames, self.frame_size, self.channels,))
648
649        if self.plannar:
650            batch = batch.transpose(0, 2, 1)
651
652        # increase frame_counter
653        self.frame_counter.frame_count += (actualNbFrames * self.frame_size * self.channels)
654        
655        # say to gc that this buffer is no longer needed
656        del buffer
657
658        if with_timestamps:
659            return FrameContainer( actualNbFrames, batch, self.frame_size/self.sample_rate, current_elapsed_time)
660        
661        return batch

Read next batch of audio from the file

Parameters

number_of_frames: int Number of desired images within the batch. The last batch from the file may have less images.

with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames

Returns

nparray or FrameContainer A batch of shape (n, self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A batch of shape (n, self.channels*self.frame_size) with interleaved data if self.plannar is False. if with_timestamps is True, the return object is a FrameContainer with the audio batch in FrameContainer.data and the associated timestamp in FrameContainer.timestamps as an array (one element for each audio frame).

def write_frame(self, audio) -> bool:
663    def write_frame(self, audio) -> bool:
664        """
665        Write an audio frame to the file
666
667        Parameters
668        ----------
669        audio: nparray
670            The audio frame to write to the video file of shape (self.channels,nb_samples_per_channel) if plannar is True else (self.channels*nb_samples_per_channel).
671
672        Returns
673        ----------
674        bool
675            Writing was successful or not.
676        """
677        # Check params
678        # - pipe exists
679        if self.pipe is None:
680            raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram))
681        # - pipe is in write mode
682        if self.mode != PipeMode.WRITE_MODE:
683            raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename))
684        # - shape of image is fine, thus we have pixels for a full compatible frame
685        if audio.shape[0] != self.channels:
686            raise self.AudioIOException("Wong audio shape: {} expected ({},{}).".format(audio.shape,self.channels,self.frame_size))
687        # - type of data is Float32
688        if audio.dtype != np.float32:
689            raise self.AudioIOException("Wong audio type: {} expected np.float32.".format(audio.dtype))
690
691        # array must have a shape (channels, samples), reshape it it to (samples, channels) if plannar
692        if not self.plannar:
693            audio = audio.reshape(-1)
694
695        # print( audio.shape )
696
697        # garantee to have a C continuous array
698        if not audio.flags['C_CONTIGUOUS']:
699            a = np.ascontiguousarray(a) 
700
701        # write frame
702        buffer = audio.tobytes()
703        if self.pipe.stdin.write( buffer ) < len(buffer):
704            print( f"Error writing frame to {self.filename}" )
705            return False
706
707        # increase frame_counter
708        self.frame_counter.frame_count += (self.frame_size * self.channels)
709
710        # say to gc that this buffer is no longer needed 
711        del buffer
712
713        return True

Write an audio frame to the file

Parameters

audio: nparray The audio frame to write to the video file of shape (self.channels,nb_samples_per_channel) if plannar is True else (self.channels*nb_samples_per_channel).

Returns

bool Writing was successful or not.

def write_batch(self, batch):
715    def write_batch(self, batch):
716        """
717        Write a batch of audio frame to the file
718
719        Parameters
720        ----------
721        batch: nparray
722            The batch of audio frames to write to the video file of shape (n,self.channels,nb_samples_per_channel) if plannar is True else (n,self.channels*nb_samples_per_channel) of interleaved audio data.
723
724        Returns
725        ----------
726        bool
727            Writing was successful or not.
728        """
729        # Check params
730        # - pipe exists
731        if self.pipe is None:
732            raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram))
733        # - pipe is in write mode
734        if self.mode != PipeMode.WRITE_MODE:
735            raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename))
736        # batch is 3D (n, channels, nb samples)
737        if batch.ndim !=3:
738            raise self.AudioIOException("Wrong batch shape: {} expected 3 dimensions (n, n_channels, n_samples_per_channel).".format(batch.shape))
739        # - shape of images in batch is fine
740        if batch.shape[2] != self.channels:
741            raise self.AudioIOException("Wrong audio channels in batch: {} expected {} {}.".format(batch.shape[2], self.channels, batch.shape))
742
743        # array must have a shape (n * n_channels * n_samples_per_channel) before writing them to pipe
744        # reshape it it to (n * n_channels * n_samples_per_channel) if plannar is False
745        if not self.plannar:
746            # goes from (n, n_channels, n_samples_per_channel) to (n * n_channels * n_samples_per_channel)
747            batch = batch.transpose(0, 2, 1) # first go to (n, n_samples_per_channel, n_channels)
748            batch = batch.reshape(-1) # then to 1D array (n * n_channels * n_samples_per_channel)
749
750        # garantee to have a C continuous array
751        if not batch.flags['C_CONTIGUOUS']:
752            batch = np.ascontiguousarray(batch)
753
754        # write frame
755        buffer = batch.tobytes()
756        if self.pipe.stdin.write( buffer ) < len(buffer):
757            # say to gc that this buffer is no longer needed
758            del buffer
759            raise self.AudioIOException("Error writing batch to '{}'.".format(self.filename))
760
761        # increase frame_counter
762        self.frame_counter.frame_count += int(batch.shape[0]/self.channels) # int conversion is mandatory to avoid confusion with time as float
763              
764        # say to gc that this buffer is no longer needed
765        del buffer
766
767        return True

Write a batch of audio frame to the file

Parameters

batch: nparray The batch of audio frames to write to the video file of shape (n,self.channels,nb_samples_per_channel) if plannar is True else (n,self.channels*nb_samples_per_channel) of interleaved audio data.

Returns

bool Writing was successful or not.

def iter_frames(self, with_timestamps=False):
769    def iter_frames(self, with_timestamps = False):
770        """
771        Method to iterate on audio frames using AudioIO obj.
772        for audio_frame in obj.iter_frames():
773            ....
774
775        Parameters
776        ----------
777        with_timestamps: bool optional (default False)
778            If set to True, the method returns a FrameContainer object with the batch and an array containing the associated timestamps to frames
779
780        Returns
781        ----------
782        nparray or FrameContainer
783            A batch of images of shape ()
784        """
785
786        try:
787            if self.mode == PipeMode.READ_MODE:
788                while self.isOpened():
789                    frame = self.readFrame(with_timestamps)
790                    if frame is not None:
791                        yield frame
792        finally:
793            self.close()

Method to iterate on audio frames using AudioIO obj. for audio_frame in obj.iter_frames(): ....

Parameters

with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer object with the batch and an array containing the associated timestamps to frames

Returns

nparray or FrameContainer A batch of images of shape ()

def iter_batches(self, batch_size: int, with_timestamps=False):
795    def iter_batches(self, batch_size : int, with_timestamps = False ):
796        """
797        Method to iterate on batch ofaudio  frames using VideoIO obj.
798        for audio_batch in obj.iter_batches():
799            ....
800
801        Parameters
802        ----------
803        with_timestamps: bool optional (default False)
804            If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
805        """
806        try:
807            if self.mode == PipeMode.READ_MODE:
808                while self.isOpened():
809                    batch = self.readBatch(batch_size, with_timestamps)
810                    if batch is not None:
811                        yield batch
812        finally:
813            self.close()

Method to iterate on batch ofaudio frames using VideoIO obj. for audio_batch in obj.iter_batches(): ....

Parameters

with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames

@staticmethod
def getAudioTimeInSec(filename, *, debug=False, logLevel=16):
 88    @staticmethod
 89    def get_time_in_sec(filename, *, debug=False, logLevel=16):
 90        """
 91        Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).
 92
 93        Parameters
 94        ----------
 95        filename : str or path. 
 96            Raw audio waveform as a 1D array.
 97
 98        debug : bool (default False).
 99            Show debug info.
100
101        log_level: int (default 16).
102            Log level to pass to the underlying ffmpeg/ffprobe command.
103        
104        Returns
105        ----------
106        float
107            Length in seconds of video file (including milliseconds as decimal part with 3 decimals)
108        """
109        
110        cmd = [AudioIO.paramProgram, # ffprobe
111                    '-hide_banner',
112                    '-loglevel', str(logLevel),
113                    '-show_entries', 'format=duration',
114                    '-of', 'default=noprint_wrappers=1:nokey=1',
115                    filename
116                    ]
117
118        if debug == True:
119            print(' '.join(cmd))
120
121        # call ffprobe and get params in one single line
122        lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg
123        output = lpipe.stdout.readlines()
124        lpipe.terminate()
125        # transform Bytes output to one single string
126        output = ''.join( [element.decode('utf-8') for element in output])
127
128        try:
129            return float(output)
130        except (ValueError, TypeError):
131            return None

Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).

Parameters

filename : str or path. Raw audio waveform as a 1D array.

debug : bool (default False). Show debug info.

log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.

Returns

float Length in seconds of video file (including milliseconds as decimal part with 3 decimals)

@staticmethod
def getAudioParams(filename, *, debug=False, logLevel=16):
133    @staticmethod
134    def get_params(filename, *, debug=False, logLevel=16):
135        """
136        Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.
137
138        Parameters
139        ----------
140        filename : str or path.
141            Raw audio waveform as a 1D array.
142
143        debug : bool (default (False).
144            Show debug info.
145
146        log_level: int (default 16).
147            Log level to pass to the underlying ffmpeg/ffprobe command.
148
149        Returns
150        ----------
151        tuple
152            Tuple containing (channels,sample_rate) of the file
153        """
154        cmd = [AudioIO.paramProgram, # ffprobe
155                    '-hide_banner',
156                    '-loglevel', str(logLevel),
157                    '-show_entries', 'stream=channels,sample_rate',
158                    filename
159                    ]
160
161        if debug == True:
162            print(' '.join(cmd))
163
164        # call ffprobe and get params in one single line
165        lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg
166        output = lpipe.stdout.readlines()
167        lpipe.terminate()
168        # transform Bytes output to one single string
169        output = ''.join( [element.decode('utf-8') for element in output])
170
171        pattern_sample_rate = r'sample_rate=(\d+)'
172        pattern_channels = r'channels=(\d+)'
173
174        # Search for values in the ffprobe output
175        match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE)
176        match_channels = re.search(pattern_channels, output, flags=re.MULTILINE)
177
178        # Extraction des valeurs
179        if match_sample_rate:
180            sample_rate = int(match_sample_rate.group(1))
181        else:
182            raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'")
183
184        if match_channels:
185            channels = int(match_channels.group(1))
186        else:
187            raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'")
188
189        return (channels,sample_rate)
190
191        # Attributes
192        mode: PipeMode
193        """ Pipemode of the current object (default PipeMode.UNK_MODE)"""
194
195        loglevel: int
196        """ loglevel of the underlying ffmpeg backend for this object (default 16)"""
197
198        debugModel: bool
199        """ debutMode flag for this object (print debut info, default False)"""
200
201        channels: int
202        """ Number of channels of images (default -1) """
203
204        sample_rate: int
205        """ sample_rate of images (default -1) """
206
207        plannar: bool
208        """ Read/write data as plannar, i.e. not interleaved (default True) """
209
210        pipe: sp.Popen
211        """ pipe object to ffmpeg/ffprobe (default None)"""
212
213        frame_size: int
214        """ Weight in bytes of one image (default -1)"""
215
216        filename: str
217        """ Filename of the file (default None)"""
218
219        frame_counter: FrameCounter
220        """ `Framecounter` object to count ellapsed time (default None)"""

Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.

Parameters

filename : str or path. Raw audio waveform as a 1D array.

debug : bool (default (False). Show debug info.

log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.

Returns

tuple Tuple containing (channels,sample_rate) of the file

@staticmethod
def get_audio_time_in_sec(filename, *, debug=False, logLevel=16):
 88    @staticmethod
 89    def get_time_in_sec(filename, *, debug=False, logLevel=16):
 90        """
 91        Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).
 92
 93        Parameters
 94        ----------
 95        filename : str or path. 
 96            Raw audio waveform as a 1D array.
 97
 98        debug : bool (default False).
 99            Show debug info.
100
101        log_level: int (default 16).
102            Log level to pass to the underlying ffmpeg/ffprobe command.
103        
104        Returns
105        ----------
106        float
107            Length in seconds of video file (including milliseconds as decimal part with 3 decimals)
108        """
109        
110        cmd = [AudioIO.paramProgram, # ffprobe
111                    '-hide_banner',
112                    '-loglevel', str(logLevel),
113                    '-show_entries', 'format=duration',
114                    '-of', 'default=noprint_wrappers=1:nokey=1',
115                    filename
116                    ]
117
118        if debug == True:
119            print(' '.join(cmd))
120
121        # call ffprobe and get params in one single line
122        lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg
123        output = lpipe.stdout.readlines()
124        lpipe.terminate()
125        # transform Bytes output to one single string
126        output = ''.join( [element.decode('utf-8') for element in output])
127
128        try:
129            return float(output)
130        except (ValueError, TypeError):
131            return None

Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).

Parameters

filename : str or path. Raw audio waveform as a 1D array.

debug : bool (default False). Show debug info.

log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.

Returns

float Length in seconds of video file (including milliseconds as decimal part with 3 decimals)

@staticmethod
def get_audio_params(filename, *, debug=False, logLevel=16):
133    @staticmethod
134    def get_params(filename, *, debug=False, logLevel=16):
135        """
136        Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.
137
138        Parameters
139        ----------
140        filename : str or path.
141            Raw audio waveform as a 1D array.
142
143        debug : bool (default (False).
144            Show debug info.
145
146        log_level: int (default 16).
147            Log level to pass to the underlying ffmpeg/ffprobe command.
148
149        Returns
150        ----------
151        tuple
152            Tuple containing (channels,sample_rate) of the file
153        """
154        cmd = [AudioIO.paramProgram, # ffprobe
155                    '-hide_banner',
156                    '-loglevel', str(logLevel),
157                    '-show_entries', 'stream=channels,sample_rate',
158                    filename
159                    ]
160
161        if debug == True:
162            print(' '.join(cmd))
163
164        # call ffprobe and get params in one single line
165        lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg
166        output = lpipe.stdout.readlines()
167        lpipe.terminate()
168        # transform Bytes output to one single string
169        output = ''.join( [element.decode('utf-8') for element in output])
170
171        pattern_sample_rate = r'sample_rate=(\d+)'
172        pattern_channels = r'channels=(\d+)'
173
174        # Search for values in the ffprobe output
175        match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE)
176        match_channels = re.search(pattern_channels, output, flags=re.MULTILINE)
177
178        # Extraction des valeurs
179        if match_sample_rate:
180            sample_rate = int(match_sample_rate.group(1))
181        else:
182            raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'")
183
184        if match_channels:
185            channels = int(match_channels.group(1))
186        else:
187            raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'")
188
189        return (channels,sample_rate)
190
191        # Attributes
192        mode: PipeMode
193        """ Pipemode of the current object (default PipeMode.UNK_MODE)"""
194
195        loglevel: int
196        """ loglevel of the underlying ffmpeg backend for this object (default 16)"""
197
198        debugModel: bool
199        """ debutMode flag for this object (print debut info, default False)"""
200
201        channels: int
202        """ Number of channels of images (default -1) """
203
204        sample_rate: int
205        """ sample_rate of images (default -1) """
206
207        plannar: bool
208        """ Read/write data as plannar, i.e. not interleaved (default True) """
209
210        pipe: sp.Popen
211        """ pipe object to ffmpeg/ffprobe (default None)"""
212
213        frame_size: int
214        """ Weight in bytes of one image (default -1)"""
215
216        filename: str
217        """ Filename of the file (default None)"""
218
219        frame_counter: FrameCounter
220        """ `Framecounter` object to count ellapsed time (default None)"""

Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.

Parameters

filename : str or path. Raw audio waveform as a 1D array.

debug : bool (default (False). Show debug info.

log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.

Returns

tuple Tuple containing (channels,sample_rate) of the file

def isOpened(self) -> bool:
314    def is_opened(self) -> bool:
315        """
316        Method to get status of the underlying pipe to ffmpeg.
317
318        Returns
319        ----------
320        bool
321            True if pipe is opened (reading or writing mode), False if not.
322        """
323        # is the pip opened?
324        if self.pipe is not None and self.pipe.poll() is None:
325            return True
326
327        return False

Method to get status of the underlying pipe to ffmpeg.

Returns

bool True if pipe is opened (reading or writing mode), False if not.

def readFrame(self, with_timestamps=False):
549    def read_frame(self, with_timestamps = False):
550        """
551        Read next frame from the audio file
552
553        Parameters
554        ----------
555        with_timestamps: bool optional (default False)
556            If set to True, the method returns a ``FrameContainer`` with the audio and an array containing the associated timestamp(s)
557
558        Returns
559        ----------
560        nparray or FrameContainer
561            A frame of shape (self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A frame
562            of shape (self.channels*self.frame_size) with interleaved data if self.plannar is False.
563            if with_timestamps is True, the return object is a FrameContainer with the audio data in ``FrameContainer.data`` and
564            the associated timestamp in ``FrameContainer.timestamps`` as an array (one element).
565        """
566
567        if self.pipe is None:
568            raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading a frame.".format(self.audioProgram))
569        # - pipe is in write mode
570        if self.mode != PipeMode.READ_MODE:
571            raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename))
572
573        if with_timestamps:
574            # get elapsed time in video, it is time of next frame(s)
575            current_elapsed_time = self.get_elapsed_time()
576
577        # read rgb image from pipe
578        toread = self.frame_size*4
579        buffer = self.pipe.stdout.read(toread)
580        if len(buffer) != toread:
581            # not considered as an error, no more frame, no exception
582            return None
583
584        # get numpy UINT8 array from buffer
585        audio = np.frombuffer(buffer, dtype = np.float32).reshape(self.frame_size, self.channels)
586
587        # make it plannar (or not)
588        if self.plannar:
589            #transpose it
590            audio = audio.T
591
592        # increase frame_counter
593        self.frame_counter.frame_count += (self.frame_size * self.channels)
594
595        # say to gc that this buffer is no longer needed
596        del buffer
597
598        if with_timestamps:
599            return FrameContainer(1, audio, self.frame_size/self.sample_rate, current_elapsed_time)
600        
601        return audio

Read next frame from the audio file

Parameters

with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer with the audio and an array containing the associated timestamp(s)

Returns

nparray or FrameContainer A frame of shape (self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A frame of shape (self.channels*self.frame_size) with interleaved data if self.plannar is False. if with_timestamps is True, the return object is a FrameContainer with the audio data in FrameContainer.data and the associated timestamp in FrameContainer.timestamps as an array (one element).

def readBatch(self, numberOfFrames, with_timestamps=False):
603    def read_batch(self, numberOfFrames, with_timestamps = False):
604        """
605        Read next batch of audio from the file
606
607        Parameters
608        ----------
609        number_of_frames: int
610            Number of desired images within the batch. The last batch from the file may have less images.
611            
612        with_timestamps: bool optional (default False)
613            If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
614
615        Returns
616        ----------
617        nparray or FrameContainer
618            A batch of shape (n, self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A batch
619            of shape (n, self.channels*self.frame_size) with interleaved data if self.plannar is False.
620            if with_timestamps is True, the return object is a FrameContainer with the audio batch in ``FrameContainer.data`` and
621            the associated timestamp in ``FrameContainer.timestamps`` as an array (one element for each audio frame).
622        """
623
624        if self.pipe is None:
625            raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading frames.".format(self.audioProgram))
626        # - pipe is in write mode
627        if self.mode != PipeMode.READ_MODE:
628            raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename))
629
630        if with_timestamps:
631            # get elapsed time in video, it is time of next frame(s)
632            current_elapsed_time = self.get_elapsed_time()
633
634        # try to read complete batch
635        toread = self.frame_size*4*self.channels*numberOfFrames
636        buffer = self.pipe.stdout.read(toread)
637
638        # check if we have at least 1 Frame
639        if len(buffer) < toread:
640            # not considered as an error, no more frame, no exception
641            return None
642
643        # compute actual number of Frames
644        actualNbFrames = len(buffer)//(self.frame_size*4*self.channels)
645
646        # get and reshape batch from buffer
647        batch = np.frombuffer(buffer, dtype = np.float32).reshape((actualNbFrames, self.frame_size, self.channels,))
648
649        if self.plannar:
650            batch = batch.transpose(0, 2, 1)
651
652        # increase frame_counter
653        self.frame_counter.frame_count += (actualNbFrames * self.frame_size * self.channels)
654        
655        # say to gc that this buffer is no longer needed
656        del buffer
657
658        if with_timestamps:
659            return FrameContainer( actualNbFrames, batch, self.frame_size/self.sample_rate, current_elapsed_time)
660        
661        return batch

Read next batch of audio from the file

Parameters

number_of_frames: int Number of desired images within the batch. The last batch from the file may have less images.

with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames

Returns

nparray or FrameContainer A batch of shape (n, self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A batch of shape (n, self.channels*self.frame_size) with interleaved data if self.plannar is False. if with_timestamps is True, the return object is a FrameContainer with the audio batch in FrameContainer.data and the associated timestamp in FrameContainer.timestamps as an array (one element for each audio frame).

def writeFrame(self, audio) -> bool:
663    def write_frame(self, audio) -> bool:
664        """
665        Write an audio frame to the file
666
667        Parameters
668        ----------
669        audio: nparray
670            The audio frame to write to the video file of shape (self.channels,nb_samples_per_channel) if plannar is True else (self.channels*nb_samples_per_channel).
671
672        Returns
673        ----------
674        bool
675            Writing was successful or not.
676        """
677        # Check params
678        # - pipe exists
679        if self.pipe is None:
680            raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram))
681        # - pipe is in write mode
682        if self.mode != PipeMode.WRITE_MODE:
683            raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename))
684        # - shape of image is fine, thus we have pixels for a full compatible frame
685        if audio.shape[0] != self.channels:
686            raise self.AudioIOException("Wong audio shape: {} expected ({},{}).".format(audio.shape,self.channels,self.frame_size))
687        # - type of data is Float32
688        if audio.dtype != np.float32:
689            raise self.AudioIOException("Wong audio type: {} expected np.float32.".format(audio.dtype))
690
691        # array must have a shape (channels, samples), reshape it it to (samples, channels) if plannar
692        if not self.plannar:
693            audio = audio.reshape(-1)
694
695        # print( audio.shape )
696
697        # garantee to have a C continuous array
698        if not audio.flags['C_CONTIGUOUS']:
699            a = np.ascontiguousarray(a) 
700
701        # write frame
702        buffer = audio.tobytes()
703        if self.pipe.stdin.write( buffer ) < len(buffer):
704            print( f"Error writing frame to {self.filename}" )
705            return False
706
707        # increase frame_counter
708        self.frame_counter.frame_count += (self.frame_size * self.channels)
709
710        # say to gc that this buffer is no longer needed 
711        del buffer
712
713        return True

Write an audio frame to the file

Parameters

audio: nparray The audio frame to write to the video file of shape (self.channels,nb_samples_per_channel) if plannar is True else (self.channels*nb_samples_per_channel).

Returns

bool Writing was successful or not.

def writeBatch(self, batch):
715    def write_batch(self, batch):
716        """
717        Write a batch of audio frame to the file
718
719        Parameters
720        ----------
721        batch: nparray
722            The batch of audio frames to write to the video file of shape (n,self.channels,nb_samples_per_channel) if plannar is True else (n,self.channels*nb_samples_per_channel) of interleaved audio data.
723
724        Returns
725        ----------
726        bool
727            Writing was successful or not.
728        """
729        # Check params
730        # - pipe exists
731        if self.pipe is None:
732            raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram))
733        # - pipe is in write mode
734        if self.mode != PipeMode.WRITE_MODE:
735            raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename))
736        # batch is 3D (n, channels, nb samples)
737        if batch.ndim !=3:
738            raise self.AudioIOException("Wrong batch shape: {} expected 3 dimensions (n, n_channels, n_samples_per_channel).".format(batch.shape))
739        # - shape of images in batch is fine
740        if batch.shape[2] != self.channels:
741            raise self.AudioIOException("Wrong audio channels in batch: {} expected {} {}.".format(batch.shape[2], self.channels, batch.shape))
742
743        # array must have a shape (n * n_channels * n_samples_per_channel) before writing them to pipe
744        # reshape it it to (n * n_channels * n_samples_per_channel) if plannar is False
745        if not self.plannar:
746            # goes from (n, n_channels, n_samples_per_channel) to (n * n_channels * n_samples_per_channel)
747            batch = batch.transpose(0, 2, 1) # first go to (n, n_samples_per_channel, n_channels)
748            batch = batch.reshape(-1) # then to 1D array (n * n_channels * n_samples_per_channel)
749
750        # garantee to have a C continuous array
751        if not batch.flags['C_CONTIGUOUS']:
752            batch = np.ascontiguousarray(batch)
753
754        # write frame
755        buffer = batch.tobytes()
756        if self.pipe.stdin.write( buffer ) < len(buffer):
757            # say to gc that this buffer is no longer needed
758            del buffer
759            raise self.AudioIOException("Error writing batch to '{}'.".format(self.filename))
760
761        # increase frame_counter
762        self.frame_counter.frame_count += int(batch.shape[0]/self.channels) # int conversion is mandatory to avoid confusion with time as float
763              
764        # say to gc that this buffer is no longer needed
765        del buffer
766
767        return True

Write a batch of audio frame to the file

Parameters

batch: nparray The batch of audio frames to write to the video file of shape (n,self.channels,nb_samples_per_channel) if plannar is True else (n,self.channels*nb_samples_per_channel) of interleaved audio data.

Returns

bool Writing was successful or not.

audioProgram = '/usr/local/lib/python3.12/site-packages/static_ffmpeg/bin/linux/ffmpeg'
paramProgram = '/usr/local/lib/python3.12/site-packages/static_ffmpeg/bin/linux/ffprobe'
class AudioIO.AudioIOException(builtins.Exception):
36    class AudioIOException(Exception):
37        """
38        Dedicated exception class for AudioIO class.
39        """
40        def __init__(self, message="Error while reading/writing video occurs"):
41            self.message = message
42            super().__init__(self.message)

Dedicated exception class for AudioIO class.

AudioIO.AudioIOException(message='Error while reading/writing video occurs')
40        def __init__(self, message="Error while reading/writing video occurs"):
41            self.message = message
42            super().__init__(self.message)
message
class AudioIO.AudioFormat(enum.Enum):
44    class AudioFormat(Enum):
45        """
46        Enum class for supported input video type: 32-bit float is the only supported type for the moment.
47        """
48        PCM32LE = 'pcm_f32le' # default format (unique mode for the moment)

Enum class for supported input video type: 32-bit float is the only supported type for the moment.

PCM32LE = <AudioFormat.PCM32LE: 'pcm_f32le'>