simple_ffmpeg_batch_io.AudioIO
Read/write audio frames or batches of audio frames from (compressed) file, including video file with audio stream(s), using FFmpeg backend.
This module defines the main AudioIO class used to open audio streams,
read audio frames or batches of frames, and write processed outputs.
Authors
Dominique Vaufreydaz (inspired from original C++ code: https://github.com/Vaufreyd/ReadWriteVideosWithOpenCV)
1""" 2Read/write audio frames or batches of audio frames from (compressed) file, including video file with audio stream(s), using FFmpeg backend. 3 4This module defines the main `AudioIO` class used to open audio streams, 5read audio frames or batches of frames, and write processed outputs. 6 7Authors 8------- 9Dominique Vaufreydaz (inspired from original C++ code: https://github.com/Vaufreyd/ReadWriteVideosWithOpenCV) 10 11""" 12 13__authors__ = ("Dominique Vaufreydaz") 14 15import sys 16import subprocess as sp 17import re 18from enum import Enum 19from typing import Union 20 21import numpy as np 22 23from .FrameCounter import FrameCounter 24from .FrameContainer import FrameContainer 25from .PipeMode import PipeMode 26 27# init static_ffmpeg at import time, first time it will download ffmpeg executables 28import static_ffmpeg 29static_ffmpeg.add_paths() 30 31class AudioIO: 32 # "static" variables to ffmpeg, ffprobe executables 33 audioProgram, paramProgram = static_ffmpeg.run.get_or_fetch_platform_executables_else_raise() 34 35 class AudioIOException(Exception): 36 """ 37 Dedicated exception class for AudioIO class. 38 """ 39 def __init__(self, message="Error while reading/writing video occurs"): 40 self.message = message 41 super().__init__(self.message) 42 43 class AudioFormat(Enum): 44 """ 45 Enum class for supported input video type: 32-bit float is the only supported type for the moment. 46 """ 47 PCM32LE = 'pcm_f32le' # default format (unique mode for the moment) 48 49 @classmethod 50 def reader(cls, filename, **kwargs): 51 """ 52 Create and open an AudioIO object in reader mode 53 54 See ``AudioIO.open`` for the full list of accepted parameters. 55 """ 56 reader = cls() 57 reader.open(filename, **kwargs) 58 return reader 59 60 @classmethod 61 def writer(cls, filename, sample_rate, channels, **kwargs): 62 """ 63 Create and open an AudioIO object in writer mode 64 65 See ``AudioIO.create`` for the full list of accepted parameters. 66 """ 67 writer = cls() 68 writer.create(filename, sample_rate, channels, **kwargs) 69 return writer 70 71 # To use with context manager "with AudioIO.reader(...) as f:' for instance 72 def __enter__(self): 73 """ 74 Method call at initialisation of a context manager like "with AudioIO.reader/writer(...) as f:' for instance 75 """ 76 # simply return myself 77 return self 78 79 def __exit__(self, exc_type, exc_val, exc_tb): 80 """ 81 Method call when existing of a context manager like "with AudioIO.reader/writer(...) as f:' for instance 82 """ 83 # close AudioIO 84 self.close() 85 return False 86 87 @staticmethod 88 def get_time_in_sec(filename, *, debug=False, logLevel=16): 89 """ 90 Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals). 91 92 Parameters 93 ---------- 94 filename : str or path. 95 Raw audio waveform as a 1D array. 96 97 debug : bool (default False). 98 Show debug info. 99 100 log_level: int (default 16). 101 Log level to pass to the underlying ffmpeg/ffprobe command. 102 103 Returns 104 ---------- 105 float 106 Length in seconds of video file (including milliseconds as decimal part with 3 decimals) 107 """ 108 109 cmd = [AudioIO.paramProgram, # ffprobe 110 '-hide_banner', 111 '-loglevel', str(logLevel), 112 '-show_entries', 'format=duration', 113 '-of', 'default=noprint_wrappers=1:nokey=1', 114 filename 115 ] 116 117 if debug == True: 118 print(' '.join(cmd)) 119 120 # call ffprobe and get params in one single line 121 lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg 122 output = lpipe.stdout.readlines() 123 lpipe.terminate() 124 # transform Bytes output to one single string 125 output = ''.join( [element.decode('utf-8') for element in output]) 126 127 try: 128 return float(output) 129 except (ValueError, TypeError): 130 return None 131 132 @staticmethod 133 def get_params(filename, *, debug=False, logLevel=16): 134 """ 135 Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds. 136 137 Parameters 138 ---------- 139 filename : str or path. 140 Raw audio waveform as a 1D array. 141 142 debug : bool (default (False). 143 Show debug info. 144 145 log_level: int (default 16). 146 Log level to pass to the underlying ffmpeg/ffprobe command. 147 148 Returns 149 ---------- 150 tuple 151 Tuple containing (channels,sample_rate) of the file 152 """ 153 cmd = [AudioIO.paramProgram, # ffprobe 154 '-hide_banner', 155 '-loglevel', str(logLevel), 156 '-show_entries', 'stream=channels,sample_rate', 157 filename 158 ] 159 160 if debug == True: 161 print(' '.join(cmd)) 162 163 # call ffprobe and get params in one single line 164 lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg 165 output = lpipe.stdout.readlines() 166 lpipe.terminate() 167 # transform Bytes output to one single string 168 output = ''.join( [element.decode('utf-8') for element in output]) 169 170 pattern_sample_rate = r'sample_rate=(\d+)' 171 pattern_channels = r'channels=(\d+)' 172 173 # Search for values in the ffprobe output 174 match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE) 175 match_channels = re.search(pattern_channels, output, flags=re.MULTILINE) 176 177 # Extraction des valeurs 178 if match_sample_rate: 179 sample_rate = int(match_sample_rate.group(1)) 180 else: 181 raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'") 182 183 if match_channels: 184 channels = int(match_channels.group(1)) 185 else: 186 raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'") 187 188 return (channels,sample_rate) 189 190 # Attributes 191 mode: PipeMode 192 """ Pipemode of the current object (default PipeMode.UNK_MODE)""" 193 194 loglevel: int 195 """ loglevel of the underlying ffmpeg backend for this object (default 16)""" 196 197 debugModel: bool 198 """ debutMode flag for this object (print debut info, default False)""" 199 200 channels: int 201 """ Number of channels of images (default -1) """ 202 203 sample_rate: int 204 """ sample_rate of images (default -1) """ 205 206 plannar: bool 207 """ Read/write data as plannar, i.e. not interleaved (default True) """ 208 209 pipe: sp.Popen 210 """ pipe object to ffmpeg/ffprobe (default None)""" 211 212 frame_size: int 213 """ Weight in bytes of one image (default -1)""" 214 215 filename: str 216 """ Filename of the file (default None)""" 217 218 frame_counter: FrameCounter 219 """ `Framecounter` object to count ellapsed time (default None)""" 220 221 def __init__(self, *, logLevel = 16, debugMode = False): 222 """ 223 Create a VideoIO object giving ffmpeg/ffrobe loglevel and defining debug mode 224 225 Parameters 226 ---------- 227 log_level: int (default 16) 228 Log level to pass to the underlying ffmpeg/ffprobe command. 229 230 debugMode: bool (default (False) 231 Show debug info. while processing video 232 """ 233 234 self.mode = PipeMode.UNK_MODE 235 self.logLevel = logLevel 236 self.debugMode = debugMode 237 238 # Call init() method 239 self.init() 240 241 def init(self): 242 """ 243 Init or reinit a VideoIO object. 244 """ 245 self.channels = -1 246 self.sample_rate = -1 247 self.plannar = True 248 self.pipe = None 249 self.frame_size = -1 250 self.filename = None 251 self.frame_counter = None 252 253 _repr_exclude = {"pipe"} 254 """ List of excluded attribute for string conversion. """ 255 256 # converting the object to a string representation 257 def __repr__(self): 258 """ 259 Convert object (excluding attributes in _repr_exclude) to string representation. 260 """ 261 attrs = ", ".join( 262 f"{k}={v!r}" 263 for k, v in self.__dict__.items() 264 if k not in self._repr_exclude 265 ) 266 return f"{self.__class__.__name__}({attrs})" 267 268 __str__ = __repr__ 269 """ String representation """ 270 271 def get_elapsed_time_as_str(self) -> str: 272 """ 273 Method to get elapsed time (float value represented) as str. 274 275 Returns 276 ---------- 277 str or None 278 Elapsed time (float value) as str, "15.500" for instance for 15 secondes and 500 milliseconds 279 None if no frame counter are available. 280 """ 281 if self.frame_counter is None: 282 return None 283 return self.frame_counter.get_elapsed_time_as_str() 284 285 def get_formated_elapsed_time_as_str(self,show_ms=True) -> str: 286 """ 287 Method to get elapsed time (hour format) as str. 288 289 Returns 290 ---------- 291 str or None 292 Elapsed time (float value) as str, "00:00:15.500" for instance for 15 secondes and 500 milliseconds 293 None if no frame counter are available. 294 """ 295 if self.frame_counter is None: 296 return None 297 return self.frame_counter.get_formated_elapsed_time_as_str() 298 299 def get_elapsed_time(self) -> float: 300 """ 301 Method to get elapsed time as float value rounded to 3 decimals. 302 303 Returns 304 ---------- 305 float or None 306 Elapsed time (float value) as str, 15.500 for instance for 15 secondes and 500 milliseconds 307 None if no frame counter are available. 308 """ 309 if self.frame_counter is None: 310 return None 311 return self.frame_counter.get_elapsed_time() 312 313 def is_opened(self) -> bool: 314 """ 315 Method to get status of the underlying pipe to ffmpeg. 316 317 Returns 318 ---------- 319 bool 320 True if pipe is opened (reading or writing mode), False if not. 321 """ 322 # is the pip opened? 323 if self.pipe is not None and self.pipe.poll() is None: 324 return True 325 326 return False 327 328 def close(self): 329 """ 330 Method to close current pipe to ffmpeg (if any). Ffmpeg/ffprobe will be terminated. Object can be reused using open or create methods. 331 """ 332 if self.pipe is not None: 333 if self.mode == PipeMode.WRITE_MODE: 334 # killing will make ffmpeg not finish properly the job, close the pipe 335 # to let it know that no more data are comming 336 self.pipe.stdin.close() 337 else: # self.mode == PipeMode.READ_MODE 338 # in read mode, no need to be nice, send SIGTERM on Linux,/Kill it on windows 339 self.pipe.kill() 340 341 # wait for subprocess to end 342 self.pipe.wait() 343 344 # reinit object for later use 345 self.init() 346 347 def create( self, filename, sample_rate, channels, *, writeOverExistingFile = False, 348 outputEncoding = AudioFormat.PCM32LE, encodingParams = None, plannar = True ): 349 """ 350 Method to create a audio file using parametrized access through ffmpeg. Importante note: calling create 351 on a AudioIO will close any former open video. 352 353 Parameters 354 ---------- 355 filename: str or path 356 filename of path to the file (mp4, avi, ...) 357 358 sample_rate: int 359 If defined as a positive value, sample_rates of the output file will be set to this value. 360 361 channels: int 362 If defined as a positive value, number of channels of output file will be set to this value. 363 364 fps: 365 If defined as a positive value, fps of input video will be set to this value. 366 367 outputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) 368 Define audio format for samples. Possible value is AudioFormat.PCM32LE. 369 370 encodingParams: str optional (default None) 371 Parameter to pass to ffmpeg to encode video like audio filters. 372 373 plannar : bool optionnal (default True) 374 Input data to write are grouped by channel if True, interleaved instead. 375 376 Returns 377 ---------- 378 bool 379 Was the creation successfull 380 """ 381 382 # Close if already opened 383 self.close() 384 385 # Set geometry/fps of the video stream from params 386 self.sample_rate = int(sample_rate) 387 self.channels = int(channels) 388 self.plannar = plannar 389 390 # Check params 391 if self.sample_rate <= 0 or self.channels <= 0: 392 raise self.AudioIOException("Bad parameters: sample_rate={}, channels={}".format(self.sample_rate,self.channels)) 393 394 # To write audio, we do not need to know in advance frame size, we will write x values of n bytes 395 self.frame_size = None 396 397 # Video params are set, open the video 398 cmd = [self.audioProgram] # ffmpeg 399 400 if writeOverExistingFile == True: 401 cmd.extend(['-y']) 402 403 cmd.extend(['-hide_banner', 404 '-nostats', 405 '-loglevel', str(self.logLevel), 406 '-f', 'f32le', '-acodec', outputEncoding.value, # input expected coding 407 '-ar', f"{self.sample_rate}", 408 '-ac', f"{self.channels}", 409 '-i', '-']) 410 411 if encodingParams is not None: 412 cmd.extend(encodingParams.split()) 413 414 # remove video 415 cmd.extend( ['-vn', filename ] ) 416 417 if self.debugMode == True: 418 print( ' '.join(cmd), file=sys.stderr ) 419 420 # store filename and set mode 421 self.filename = filename 422 self.mode = PipeMode.WRITE_MODE 423 424 # call ffmpeg in write mode 425 try: 426 self.pipe = sp.Popen(cmd, stdin=sp.PIPE) 427 self.frame_counter = FrameCounter(self.sample_rate) 428 except Exception as e: 429 # if pipe failed, reinit object and raise exception 430 self.init() 431 raise 432 433 return True 434 435 def open( self, filename, *, sample_rate = -1, channels = -1, inputEncoding = AudioFormat.PCM32LE, 436 decodingParams = None, frame_size = 1.0, plannar = True, start_time = 0.0 ): 437 """ 438 Method to read (video file containing) audio using parametrized access through ffmpeg. Importante note: calling open 439 on a AudioIO will close any former open file. 440 441 Parameters 442 ---------- 443 filename: str or path 444 filename of path to the file (mp4, avi, ...) 445 446 sample_rate: int optional (default -1) 447 If defined as a positive value, sample rate of the input audio will be converted to this value. 448 449 channels: int optional (default -1) 450 If defined as a positive value, number of channels of the input audio will converted to this value. 451 452 inputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) 453 Define audio format for samples. Possible value is AudioFormat.PCM32LE. 454 455 decodingParams: str optional (default None) 456 Parameter to pass to ffmpeg to decode video like audio filters. 457 458 plannar: bool optionnal (default True) 459 Group audio samples per channel if True. Else, samples are interleaved. 460 461 frame_size: int or float (default 1.0) 462 If frame_size is an int, it is the number of expected samples in each frame, for instance 8000 for 8000 samples. 463 if frame_size is a float, it is considered as a time in seconds for each audio frame, for instance 1.0 for 1 second, 0.010 for 10 ms. 464 Number of samples in this case is computed using frame_size and sample_rate as int(frame_size * sample_rate) 465 466 start_time: float optional (default 0.0) 467 Define the reading start time. If not set, reading at beginning of the file. 468 469 Returns 470 ---------- 471 bool 472 Was the opening successfull 473 """ 474 475 # Close if already opened 476 self.close() 477 478 # Force conversion of parameters 479 channels = int(channels) 480 sample_rate = float(sample_rate) 481 482 self.plannar = plannar 483 484 # get parameters from file if needed: 485 if sample_rate <= 0 or channels <= 0: 486 self.channels, self.sample_rate = self.getAudioParams(filename) 487 488 # check if parameters ask to overide video parameters 489 if channels > 0: 490 self.channels = channels 491 if sample_rate > 0: 492 self.sample_rate = sample_rate 493 494 # check parameters 495 496 if isinstance(frame_size,float): 497 # time in seconds 498 self.frame_size = int(frame_size*self.sample_rate) 499 elif isinstance(frame_size,int): 500 # number of samples 501 self.frame_size = frame_size 502 else: 503 # to do 504 pass 505 506 # Video params are set, open the video 507 cmd = [self.audioProgram, # ffmpeg 508 '-hide_banner', 509 '-nostats', 510 '-loglevel', str(self.logLevel)] 511 512 if decodingParams is not None: 513 cmd.extend([decodingParams.split()]) 514 515 if start_time < 0.0: 516 pass 517 elif start_time > 0.0: 518 cmd.extend(["-ss", f"{start_time}"]) 519 520 cmd.extend( ['-i', filename, 521 '-f', 'f32le', '-acodec', inputEncoding.value, # input expected coding 522 '-ar', f"{self.sample_rate}", 523 '-ac', f"{self.channels}", 524 '-' # output to stdout 525 ] 526 ) 527 528 if self.debugMode == True: 529 print( ' '.join(cmd) ) 530 531 # store filename and set mode to READ_MODE 532 self.filename = filename 533 self.mode = PipeMode.READ_MODE 534 535 # call ffmpeg in read mode 536 try: 537 self.pipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg/ffprobe 538 self.frame_counter = FrameCounter(self.sample_rate) 539 if start_time > 0.0: 540 self.frame_counter += start_time # adding with float means adding time 541 except Exception as e: 542 # if pipe failed, reinit object and raise exception 543 self.init() 544 raise 545 546 return True 547 548 def read_frame(self, with_timestamps = False): 549 """ 550 Read next frame from the audio file 551 552 Parameters 553 ---------- 554 with_timestamps: bool optional (default False) 555 If set to True, the method returns a ``FrameContainer`` with the audio and an array containing the associated timestamp(s) 556 557 Returns 558 ---------- 559 nparray or FrameContainer 560 A frame of shape (self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A frame 561 of shape (self.channels*self.frame_size) with interleaved data if self.plannar is False. 562 if with_timestamps is True, the return object is a FrameContainer with the audio data in ``FrameContainer.data`` and 563 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element). 564 """ 565 566 if self.pipe is None: 567 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading a frame.".format(self.audioProgram)) 568 # - pipe is in write mode 569 if self.mode != PipeMode.READ_MODE: 570 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 571 572 if with_timestamps: 573 # get elapsed time in video, it is time of next frame(s) 574 current_elapsed_time = self.get_elapsed_time() 575 576 # read rgb image from pipe 577 toread = self.frame_size*4 578 buffer = self.pipe.stdout.read(toread) 579 if len(buffer) != toread: 580 # not considered as an error, no more frame, no exception 581 return None 582 583 # get numpy UINT8 array from buffer 584 audio = np.frombuffer(buffer, dtype = np.float32).reshape(self.frame_size, self.channels) 585 586 # make it plannar (or not) 587 if self.plannar: 588 #transpose it 589 audio = audio.T 590 591 # increase frame_counter 592 self.frame_counter.frame_count += (self.frame_size * self.channels) 593 594 # say to gc that this buffer is no longer needed 595 del buffer 596 597 if with_timestamps: 598 return FrameContainer(1, audio, self.frame_size/self.sample_rate, current_elapsed_time) 599 600 return audio 601 602 def read_batch(self, numberOfFrames, with_timestamps = False): 603 """ 604 Read next batch of audio from the file 605 606 Parameters 607 ---------- 608 number_of_frames: int 609 Number of desired images within the batch. The last batch from the file may have less images. 610 611 with_timestamps: bool optional (default False) 612 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 613 614 Returns 615 ---------- 616 nparray or FrameContainer 617 A batch of shape (n, self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A batch 618 of shape (n, self.channels*self.frame_size) with interleaved data if self.plannar is False. 619 if with_timestamps is True, the return object is a FrameContainer with the audio batch in ``FrameContainer.data`` and 620 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element for each audio frame). 621 """ 622 623 if self.pipe is None: 624 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading frames.".format(self.audioProgram)) 625 # - pipe is in write mode 626 if self.mode != PipeMode.READ_MODE: 627 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 628 629 if with_timestamps: 630 # get elapsed time in video, it is time of next frame(s) 631 current_elapsed_time = self.get_elapsed_time() 632 633 # try to read complete batch 634 toread = self.frame_size*4*self.channels*numberOfFrames 635 buffer = self.pipe.stdout.read(toread) 636 637 # check if we have at least 1 Frame 638 if len(buffer) < toread: 639 # not considered as an error, no more frame, no exception 640 return None 641 642 # compute actual number of Frames 643 actualNbFrames = len(buffer)//(self.frame_size*4*self.channels) 644 645 # get and reshape batch from buffer 646 batch = np.frombuffer(buffer, dtype = np.float32).reshape((actualNbFrames, self.frame_size, self.channels,)) 647 648 if self.plannar: 649 batch = batch.transpose(0, 2, 1) 650 651 # increase frame_counter 652 self.frame_counter.frame_count += (actualNbFrames * self.frame_size * self.channels) 653 654 # say to gc that this buffer is no longer needed 655 del buffer 656 657 if with_timestamps: 658 return FrameContainer( actualNbFrames, batch, self.frame_size/self.sample_rate, current_elapsed_time) 659 660 return batch 661 662 def write_frame(self, audio) -> bool: 663 """ 664 Write an audio frame to the file 665 666 Parameters 667 ---------- 668 audio: nparray 669 The audio frame to write to the video file of shape (self.channels,nb_samples_per_channel) if plannar is True else (self.channels*nb_samples_per_channel). 670 671 Returns 672 ---------- 673 bool 674 Writing was successful or not. 675 """ 676 # Check params 677 # - pipe exists 678 if self.pipe is None: 679 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 680 # - pipe is in write mode 681 if self.mode != PipeMode.WRITE_MODE: 682 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 683 # - shape of image is fine, thus we have pixels for a full compatible frame 684 if audio.shape[0] != self.channels: 685 raise self.AudioIOException("Wong audio shape: {} expected ({},{}).".format(audio.shape,self.channels,self.frame_size)) 686 # - type of data is Float32 687 if audio.dtype != np.float32: 688 raise self.AudioIOException("Wong audio type: {} expected np.float32.".format(audio.dtype)) 689 690 # array must have a shape (channels, samples), reshape it it to (samples, channels) if plannar 691 if not self.plannar: 692 audio = audio.reshape(-1) 693 694 # print( audio.shape ) 695 696 # garantee to have a C continuous array 697 if not audio.flags['C_CONTIGUOUS']: 698 a = np.ascontiguousarray(a) 699 700 # write frame 701 buffer = audio.tobytes() 702 if self.pipe.stdin.write( buffer ) < len(buffer): 703 print( f"Error writing frame to {self.filename}" ) 704 return False 705 706 # increase frame_counter 707 self.frame_counter.frame_count += (self.frame_size * self.channels) 708 709 # say to gc that this buffer is no longer needed 710 del buffer 711 712 return True 713 714 def write_batch(self, batch): 715 """ 716 Write a batch of audio frame to the file 717 718 Parameters 719 ---------- 720 batch: nparray 721 The batch of audio frames to write to the video file of shape (n,self.channels,nb_samples_per_channel) if plannar is True else (n,self.channels*nb_samples_per_channel) of interleaved audio data. 722 723 Returns 724 ---------- 725 bool 726 Writing was successful or not. 727 """ 728 # Check params 729 # - pipe exists 730 if self.pipe is None: 731 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 732 # - pipe is in write mode 733 if self.mode != PipeMode.WRITE_MODE: 734 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 735 # batch is 3D (n, channels, nb samples) 736 if batch.ndim !=3: 737 raise self.AudioIOException("Wrong batch shape: {} expected 3 dimensions (n, n_channels, n_samples_per_channel).".format(batch.shape)) 738 # - shape of images in batch is fine 739 if batch.shape[2] != self.channels: 740 raise self.AudioIOException("Wrong audio channels in batch: {} expected {} {}.".format(batch.shape[2], self.channels, batch.shape)) 741 742 # array must have a shape (n * n_channels * n_samples_per_channel) before writing them to pipe 743 # reshape it it to (n * n_channels * n_samples_per_channel) if plannar is False 744 if not self.plannar: 745 # goes from (n, n_channels, n_samples_per_channel) to (n * n_channels * n_samples_per_channel) 746 batch = batch.transpose(0, 2, 1) # first go to (n, n_samples_per_channel, n_channels) 747 batch = batch.reshape(-1) # then to 1D array (n * n_channels * n_samples_per_channel) 748 749 # garantee to have a C continuous array 750 if not batch.flags['C_CONTIGUOUS']: 751 batch = np.ascontiguousarray(batch) 752 753 # write frame 754 buffer = batch.tobytes() 755 if self.pipe.stdin.write( buffer ) < len(buffer): 756 # say to gc that this buffer is no longer needed 757 del buffer 758 raise self.AudioIOException("Error writing batch to '{}'.".format(self.filename)) 759 760 # increase frame_counter 761 self.frame_counter.frame_count += int(batch.shape[0]/self.channels) # int conversion is mandatory to avoid confusion with time as float 762 763 # say to gc that this buffer is no longer needed 764 del buffer 765 766 return True 767 768 def iter_frames(self, with_timestamps = False): 769 """ 770 Method to iterate on audio frames using AudioIO obj. 771 for audio_frame in obj.iter_frames(): 772 .... 773 774 Parameters 775 ---------- 776 with_timestamps: bool optional (default False) 777 If set to True, the method returns a FrameContainer object with the batch and an array containing the associated timestamps to frames 778 779 Returns 780 ---------- 781 nparray or FrameContainer 782 A batch of images of shape () 783 """ 784 785 try: 786 if self.mode == PipeMode.READ_MODE: 787 while self.isOpened(): 788 frame = self.readFrame(with_timestamps) 789 if frame is not None: 790 yield frame 791 finally: 792 self.close() 793 794 def iter_batches(self, batch_size : int, with_timestamps = False ): 795 """ 796 Method to iterate on batch ofaudio frames using VideoIO obj. 797 for audio_batch in obj.iter_batches(): 798 .... 799 800 Parameters 801 ---------- 802 with_timestamps: bool optional (default False) 803 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 804 """ 805 try: 806 if self.mode == PipeMode.READ_MODE: 807 while self.isOpened(): 808 batch = self.readBatch(batch_size, with_timestamps) 809 if batch is not None: 810 yield batch 811 finally: 812 self.close() 813 814 # function aliases to be compliant with original C++ version 815 getAudioTimeInSec = get_time_in_sec 816 getAudioParams = get_params 817 get_audio_time_in_sec = get_time_in_sec 818 get_audio_params = get_params 819 isOpened = is_opened 820 readFrame = read_frame 821 readBatch = read_batch 822 writeFrame = write_frame 823 writeBatch = write_batch
32class AudioIO: 33 # "static" variables to ffmpeg, ffprobe executables 34 audioProgram, paramProgram = static_ffmpeg.run.get_or_fetch_platform_executables_else_raise() 35 36 class AudioIOException(Exception): 37 """ 38 Dedicated exception class for AudioIO class. 39 """ 40 def __init__(self, message="Error while reading/writing video occurs"): 41 self.message = message 42 super().__init__(self.message) 43 44 class AudioFormat(Enum): 45 """ 46 Enum class for supported input video type: 32-bit float is the only supported type for the moment. 47 """ 48 PCM32LE = 'pcm_f32le' # default format (unique mode for the moment) 49 50 @classmethod 51 def reader(cls, filename, **kwargs): 52 """ 53 Create and open an AudioIO object in reader mode 54 55 See ``AudioIO.open`` for the full list of accepted parameters. 56 """ 57 reader = cls() 58 reader.open(filename, **kwargs) 59 return reader 60 61 @classmethod 62 def writer(cls, filename, sample_rate, channels, **kwargs): 63 """ 64 Create and open an AudioIO object in writer mode 65 66 See ``AudioIO.create`` for the full list of accepted parameters. 67 """ 68 writer = cls() 69 writer.create(filename, sample_rate, channels, **kwargs) 70 return writer 71 72 # To use with context manager "with AudioIO.reader(...) as f:' for instance 73 def __enter__(self): 74 """ 75 Method call at initialisation of a context manager like "with AudioIO.reader/writer(...) as f:' for instance 76 """ 77 # simply return myself 78 return self 79 80 def __exit__(self, exc_type, exc_val, exc_tb): 81 """ 82 Method call when existing of a context manager like "with AudioIO.reader/writer(...) as f:' for instance 83 """ 84 # close AudioIO 85 self.close() 86 return False 87 88 @staticmethod 89 def get_time_in_sec(filename, *, debug=False, logLevel=16): 90 """ 91 Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals). 92 93 Parameters 94 ---------- 95 filename : str or path. 96 Raw audio waveform as a 1D array. 97 98 debug : bool (default False). 99 Show debug info. 100 101 log_level: int (default 16). 102 Log level to pass to the underlying ffmpeg/ffprobe command. 103 104 Returns 105 ---------- 106 float 107 Length in seconds of video file (including milliseconds as decimal part with 3 decimals) 108 """ 109 110 cmd = [AudioIO.paramProgram, # ffprobe 111 '-hide_banner', 112 '-loglevel', str(logLevel), 113 '-show_entries', 'format=duration', 114 '-of', 'default=noprint_wrappers=1:nokey=1', 115 filename 116 ] 117 118 if debug == True: 119 print(' '.join(cmd)) 120 121 # call ffprobe and get params in one single line 122 lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg 123 output = lpipe.stdout.readlines() 124 lpipe.terminate() 125 # transform Bytes output to one single string 126 output = ''.join( [element.decode('utf-8') for element in output]) 127 128 try: 129 return float(output) 130 except (ValueError, TypeError): 131 return None 132 133 @staticmethod 134 def get_params(filename, *, debug=False, logLevel=16): 135 """ 136 Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds. 137 138 Parameters 139 ---------- 140 filename : str or path. 141 Raw audio waveform as a 1D array. 142 143 debug : bool (default (False). 144 Show debug info. 145 146 log_level: int (default 16). 147 Log level to pass to the underlying ffmpeg/ffprobe command. 148 149 Returns 150 ---------- 151 tuple 152 Tuple containing (channels,sample_rate) of the file 153 """ 154 cmd = [AudioIO.paramProgram, # ffprobe 155 '-hide_banner', 156 '-loglevel', str(logLevel), 157 '-show_entries', 'stream=channels,sample_rate', 158 filename 159 ] 160 161 if debug == True: 162 print(' '.join(cmd)) 163 164 # call ffprobe and get params in one single line 165 lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg 166 output = lpipe.stdout.readlines() 167 lpipe.terminate() 168 # transform Bytes output to one single string 169 output = ''.join( [element.decode('utf-8') for element in output]) 170 171 pattern_sample_rate = r'sample_rate=(\d+)' 172 pattern_channels = r'channels=(\d+)' 173 174 # Search for values in the ffprobe output 175 match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE) 176 match_channels = re.search(pattern_channels, output, flags=re.MULTILINE) 177 178 # Extraction des valeurs 179 if match_sample_rate: 180 sample_rate = int(match_sample_rate.group(1)) 181 else: 182 raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'") 183 184 if match_channels: 185 channels = int(match_channels.group(1)) 186 else: 187 raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'") 188 189 return (channels,sample_rate) 190 191 # Attributes 192 mode: PipeMode 193 """ Pipemode of the current object (default PipeMode.UNK_MODE)""" 194 195 loglevel: int 196 """ loglevel of the underlying ffmpeg backend for this object (default 16)""" 197 198 debugModel: bool 199 """ debutMode flag for this object (print debut info, default False)""" 200 201 channels: int 202 """ Number of channels of images (default -1) """ 203 204 sample_rate: int 205 """ sample_rate of images (default -1) """ 206 207 plannar: bool 208 """ Read/write data as plannar, i.e. not interleaved (default True) """ 209 210 pipe: sp.Popen 211 """ pipe object to ffmpeg/ffprobe (default None)""" 212 213 frame_size: int 214 """ Weight in bytes of one image (default -1)""" 215 216 filename: str 217 """ Filename of the file (default None)""" 218 219 frame_counter: FrameCounter 220 """ `Framecounter` object to count ellapsed time (default None)""" 221 222 def __init__(self, *, logLevel = 16, debugMode = False): 223 """ 224 Create a VideoIO object giving ffmpeg/ffrobe loglevel and defining debug mode 225 226 Parameters 227 ---------- 228 log_level: int (default 16) 229 Log level to pass to the underlying ffmpeg/ffprobe command. 230 231 debugMode: bool (default (False) 232 Show debug info. while processing video 233 """ 234 235 self.mode = PipeMode.UNK_MODE 236 self.logLevel = logLevel 237 self.debugMode = debugMode 238 239 # Call init() method 240 self.init() 241 242 def init(self): 243 """ 244 Init or reinit a VideoIO object. 245 """ 246 self.channels = -1 247 self.sample_rate = -1 248 self.plannar = True 249 self.pipe = None 250 self.frame_size = -1 251 self.filename = None 252 self.frame_counter = None 253 254 _repr_exclude = {"pipe"} 255 """ List of excluded attribute for string conversion. """ 256 257 # converting the object to a string representation 258 def __repr__(self): 259 """ 260 Convert object (excluding attributes in _repr_exclude) to string representation. 261 """ 262 attrs = ", ".join( 263 f"{k}={v!r}" 264 for k, v in self.__dict__.items() 265 if k not in self._repr_exclude 266 ) 267 return f"{self.__class__.__name__}({attrs})" 268 269 __str__ = __repr__ 270 """ String representation """ 271 272 def get_elapsed_time_as_str(self) -> str: 273 """ 274 Method to get elapsed time (float value represented) as str. 275 276 Returns 277 ---------- 278 str or None 279 Elapsed time (float value) as str, "15.500" for instance for 15 secondes and 500 milliseconds 280 None if no frame counter are available. 281 """ 282 if self.frame_counter is None: 283 return None 284 return self.frame_counter.get_elapsed_time_as_str() 285 286 def get_formated_elapsed_time_as_str(self,show_ms=True) -> str: 287 """ 288 Method to get elapsed time (hour format) as str. 289 290 Returns 291 ---------- 292 str or None 293 Elapsed time (float value) as str, "00:00:15.500" for instance for 15 secondes and 500 milliseconds 294 None if no frame counter are available. 295 """ 296 if self.frame_counter is None: 297 return None 298 return self.frame_counter.get_formated_elapsed_time_as_str() 299 300 def get_elapsed_time(self) -> float: 301 """ 302 Method to get elapsed time as float value rounded to 3 decimals. 303 304 Returns 305 ---------- 306 float or None 307 Elapsed time (float value) as str, 15.500 for instance for 15 secondes and 500 milliseconds 308 None if no frame counter are available. 309 """ 310 if self.frame_counter is None: 311 return None 312 return self.frame_counter.get_elapsed_time() 313 314 def is_opened(self) -> bool: 315 """ 316 Method to get status of the underlying pipe to ffmpeg. 317 318 Returns 319 ---------- 320 bool 321 True if pipe is opened (reading or writing mode), False if not. 322 """ 323 # is the pip opened? 324 if self.pipe is not None and self.pipe.poll() is None: 325 return True 326 327 return False 328 329 def close(self): 330 """ 331 Method to close current pipe to ffmpeg (if any). Ffmpeg/ffprobe will be terminated. Object can be reused using open or create methods. 332 """ 333 if self.pipe is not None: 334 if self.mode == PipeMode.WRITE_MODE: 335 # killing will make ffmpeg not finish properly the job, close the pipe 336 # to let it know that no more data are comming 337 self.pipe.stdin.close() 338 else: # self.mode == PipeMode.READ_MODE 339 # in read mode, no need to be nice, send SIGTERM on Linux,/Kill it on windows 340 self.pipe.kill() 341 342 # wait for subprocess to end 343 self.pipe.wait() 344 345 # reinit object for later use 346 self.init() 347 348 def create( self, filename, sample_rate, channels, *, writeOverExistingFile = False, 349 outputEncoding = AudioFormat.PCM32LE, encodingParams = None, plannar = True ): 350 """ 351 Method to create a audio file using parametrized access through ffmpeg. Importante note: calling create 352 on a AudioIO will close any former open video. 353 354 Parameters 355 ---------- 356 filename: str or path 357 filename of path to the file (mp4, avi, ...) 358 359 sample_rate: int 360 If defined as a positive value, sample_rates of the output file will be set to this value. 361 362 channels: int 363 If defined as a positive value, number of channels of output file will be set to this value. 364 365 fps: 366 If defined as a positive value, fps of input video will be set to this value. 367 368 outputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) 369 Define audio format for samples. Possible value is AudioFormat.PCM32LE. 370 371 encodingParams: str optional (default None) 372 Parameter to pass to ffmpeg to encode video like audio filters. 373 374 plannar : bool optionnal (default True) 375 Input data to write are grouped by channel if True, interleaved instead. 376 377 Returns 378 ---------- 379 bool 380 Was the creation successfull 381 """ 382 383 # Close if already opened 384 self.close() 385 386 # Set geometry/fps of the video stream from params 387 self.sample_rate = int(sample_rate) 388 self.channels = int(channels) 389 self.plannar = plannar 390 391 # Check params 392 if self.sample_rate <= 0 or self.channels <= 0: 393 raise self.AudioIOException("Bad parameters: sample_rate={}, channels={}".format(self.sample_rate,self.channels)) 394 395 # To write audio, we do not need to know in advance frame size, we will write x values of n bytes 396 self.frame_size = None 397 398 # Video params are set, open the video 399 cmd = [self.audioProgram] # ffmpeg 400 401 if writeOverExistingFile == True: 402 cmd.extend(['-y']) 403 404 cmd.extend(['-hide_banner', 405 '-nostats', 406 '-loglevel', str(self.logLevel), 407 '-f', 'f32le', '-acodec', outputEncoding.value, # input expected coding 408 '-ar', f"{self.sample_rate}", 409 '-ac', f"{self.channels}", 410 '-i', '-']) 411 412 if encodingParams is not None: 413 cmd.extend(encodingParams.split()) 414 415 # remove video 416 cmd.extend( ['-vn', filename ] ) 417 418 if self.debugMode == True: 419 print( ' '.join(cmd), file=sys.stderr ) 420 421 # store filename and set mode 422 self.filename = filename 423 self.mode = PipeMode.WRITE_MODE 424 425 # call ffmpeg in write mode 426 try: 427 self.pipe = sp.Popen(cmd, stdin=sp.PIPE) 428 self.frame_counter = FrameCounter(self.sample_rate) 429 except Exception as e: 430 # if pipe failed, reinit object and raise exception 431 self.init() 432 raise 433 434 return True 435 436 def open( self, filename, *, sample_rate = -1, channels = -1, inputEncoding = AudioFormat.PCM32LE, 437 decodingParams = None, frame_size = 1.0, plannar = True, start_time = 0.0 ): 438 """ 439 Method to read (video file containing) audio using parametrized access through ffmpeg. Importante note: calling open 440 on a AudioIO will close any former open file. 441 442 Parameters 443 ---------- 444 filename: str or path 445 filename of path to the file (mp4, avi, ...) 446 447 sample_rate: int optional (default -1) 448 If defined as a positive value, sample rate of the input audio will be converted to this value. 449 450 channels: int optional (default -1) 451 If defined as a positive value, number of channels of the input audio will converted to this value. 452 453 inputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) 454 Define audio format for samples. Possible value is AudioFormat.PCM32LE. 455 456 decodingParams: str optional (default None) 457 Parameter to pass to ffmpeg to decode video like audio filters. 458 459 plannar: bool optionnal (default True) 460 Group audio samples per channel if True. Else, samples are interleaved. 461 462 frame_size: int or float (default 1.0) 463 If frame_size is an int, it is the number of expected samples in each frame, for instance 8000 for 8000 samples. 464 if frame_size is a float, it is considered as a time in seconds for each audio frame, for instance 1.0 for 1 second, 0.010 for 10 ms. 465 Number of samples in this case is computed using frame_size and sample_rate as int(frame_size * sample_rate) 466 467 start_time: float optional (default 0.0) 468 Define the reading start time. If not set, reading at beginning of the file. 469 470 Returns 471 ---------- 472 bool 473 Was the opening successfull 474 """ 475 476 # Close if already opened 477 self.close() 478 479 # Force conversion of parameters 480 channels = int(channels) 481 sample_rate = float(sample_rate) 482 483 self.plannar = plannar 484 485 # get parameters from file if needed: 486 if sample_rate <= 0 or channels <= 0: 487 self.channels, self.sample_rate = self.getAudioParams(filename) 488 489 # check if parameters ask to overide video parameters 490 if channels > 0: 491 self.channels = channels 492 if sample_rate > 0: 493 self.sample_rate = sample_rate 494 495 # check parameters 496 497 if isinstance(frame_size,float): 498 # time in seconds 499 self.frame_size = int(frame_size*self.sample_rate) 500 elif isinstance(frame_size,int): 501 # number of samples 502 self.frame_size = frame_size 503 else: 504 # to do 505 pass 506 507 # Video params are set, open the video 508 cmd = [self.audioProgram, # ffmpeg 509 '-hide_banner', 510 '-nostats', 511 '-loglevel', str(self.logLevel)] 512 513 if decodingParams is not None: 514 cmd.extend([decodingParams.split()]) 515 516 if start_time < 0.0: 517 pass 518 elif start_time > 0.0: 519 cmd.extend(["-ss", f"{start_time}"]) 520 521 cmd.extend( ['-i', filename, 522 '-f', 'f32le', '-acodec', inputEncoding.value, # input expected coding 523 '-ar', f"{self.sample_rate}", 524 '-ac', f"{self.channels}", 525 '-' # output to stdout 526 ] 527 ) 528 529 if self.debugMode == True: 530 print( ' '.join(cmd) ) 531 532 # store filename and set mode to READ_MODE 533 self.filename = filename 534 self.mode = PipeMode.READ_MODE 535 536 # call ffmpeg in read mode 537 try: 538 self.pipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg/ffprobe 539 self.frame_counter = FrameCounter(self.sample_rate) 540 if start_time > 0.0: 541 self.frame_counter += start_time # adding with float means adding time 542 except Exception as e: 543 # if pipe failed, reinit object and raise exception 544 self.init() 545 raise 546 547 return True 548 549 def read_frame(self, with_timestamps = False): 550 """ 551 Read next frame from the audio file 552 553 Parameters 554 ---------- 555 with_timestamps: bool optional (default False) 556 If set to True, the method returns a ``FrameContainer`` with the audio and an array containing the associated timestamp(s) 557 558 Returns 559 ---------- 560 nparray or FrameContainer 561 A frame of shape (self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A frame 562 of shape (self.channels*self.frame_size) with interleaved data if self.plannar is False. 563 if with_timestamps is True, the return object is a FrameContainer with the audio data in ``FrameContainer.data`` and 564 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element). 565 """ 566 567 if self.pipe is None: 568 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading a frame.".format(self.audioProgram)) 569 # - pipe is in write mode 570 if self.mode != PipeMode.READ_MODE: 571 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 572 573 if with_timestamps: 574 # get elapsed time in video, it is time of next frame(s) 575 current_elapsed_time = self.get_elapsed_time() 576 577 # read rgb image from pipe 578 toread = self.frame_size*4 579 buffer = self.pipe.stdout.read(toread) 580 if len(buffer) != toread: 581 # not considered as an error, no more frame, no exception 582 return None 583 584 # get numpy UINT8 array from buffer 585 audio = np.frombuffer(buffer, dtype = np.float32).reshape(self.frame_size, self.channels) 586 587 # make it plannar (or not) 588 if self.plannar: 589 #transpose it 590 audio = audio.T 591 592 # increase frame_counter 593 self.frame_counter.frame_count += (self.frame_size * self.channels) 594 595 # say to gc that this buffer is no longer needed 596 del buffer 597 598 if with_timestamps: 599 return FrameContainer(1, audio, self.frame_size/self.sample_rate, current_elapsed_time) 600 601 return audio 602 603 def read_batch(self, numberOfFrames, with_timestamps = False): 604 """ 605 Read next batch of audio from the file 606 607 Parameters 608 ---------- 609 number_of_frames: int 610 Number of desired images within the batch. The last batch from the file may have less images. 611 612 with_timestamps: bool optional (default False) 613 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 614 615 Returns 616 ---------- 617 nparray or FrameContainer 618 A batch of shape (n, self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A batch 619 of shape (n, self.channels*self.frame_size) with interleaved data if self.plannar is False. 620 if with_timestamps is True, the return object is a FrameContainer with the audio batch in ``FrameContainer.data`` and 621 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element for each audio frame). 622 """ 623 624 if self.pipe is None: 625 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading frames.".format(self.audioProgram)) 626 # - pipe is in write mode 627 if self.mode != PipeMode.READ_MODE: 628 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 629 630 if with_timestamps: 631 # get elapsed time in video, it is time of next frame(s) 632 current_elapsed_time = self.get_elapsed_time() 633 634 # try to read complete batch 635 toread = self.frame_size*4*self.channels*numberOfFrames 636 buffer = self.pipe.stdout.read(toread) 637 638 # check if we have at least 1 Frame 639 if len(buffer) < toread: 640 # not considered as an error, no more frame, no exception 641 return None 642 643 # compute actual number of Frames 644 actualNbFrames = len(buffer)//(self.frame_size*4*self.channels) 645 646 # get and reshape batch from buffer 647 batch = np.frombuffer(buffer, dtype = np.float32).reshape((actualNbFrames, self.frame_size, self.channels,)) 648 649 if self.plannar: 650 batch = batch.transpose(0, 2, 1) 651 652 # increase frame_counter 653 self.frame_counter.frame_count += (actualNbFrames * self.frame_size * self.channels) 654 655 # say to gc that this buffer is no longer needed 656 del buffer 657 658 if with_timestamps: 659 return FrameContainer( actualNbFrames, batch, self.frame_size/self.sample_rate, current_elapsed_time) 660 661 return batch 662 663 def write_frame(self, audio) -> bool: 664 """ 665 Write an audio frame to the file 666 667 Parameters 668 ---------- 669 audio: nparray 670 The audio frame to write to the video file of shape (self.channels,nb_samples_per_channel) if plannar is True else (self.channels*nb_samples_per_channel). 671 672 Returns 673 ---------- 674 bool 675 Writing was successful or not. 676 """ 677 # Check params 678 # - pipe exists 679 if self.pipe is None: 680 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 681 # - pipe is in write mode 682 if self.mode != PipeMode.WRITE_MODE: 683 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 684 # - shape of image is fine, thus we have pixels for a full compatible frame 685 if audio.shape[0] != self.channels: 686 raise self.AudioIOException("Wong audio shape: {} expected ({},{}).".format(audio.shape,self.channels,self.frame_size)) 687 # - type of data is Float32 688 if audio.dtype != np.float32: 689 raise self.AudioIOException("Wong audio type: {} expected np.float32.".format(audio.dtype)) 690 691 # array must have a shape (channels, samples), reshape it it to (samples, channels) if plannar 692 if not self.plannar: 693 audio = audio.reshape(-1) 694 695 # print( audio.shape ) 696 697 # garantee to have a C continuous array 698 if not audio.flags['C_CONTIGUOUS']: 699 a = np.ascontiguousarray(a) 700 701 # write frame 702 buffer = audio.tobytes() 703 if self.pipe.stdin.write( buffer ) < len(buffer): 704 print( f"Error writing frame to {self.filename}" ) 705 return False 706 707 # increase frame_counter 708 self.frame_counter.frame_count += (self.frame_size * self.channels) 709 710 # say to gc that this buffer is no longer needed 711 del buffer 712 713 return True 714 715 def write_batch(self, batch): 716 """ 717 Write a batch of audio frame to the file 718 719 Parameters 720 ---------- 721 batch: nparray 722 The batch of audio frames to write to the video file of shape (n,self.channels,nb_samples_per_channel) if plannar is True else (n,self.channels*nb_samples_per_channel) of interleaved audio data. 723 724 Returns 725 ---------- 726 bool 727 Writing was successful or not. 728 """ 729 # Check params 730 # - pipe exists 731 if self.pipe is None: 732 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 733 # - pipe is in write mode 734 if self.mode != PipeMode.WRITE_MODE: 735 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 736 # batch is 3D (n, channels, nb samples) 737 if batch.ndim !=3: 738 raise self.AudioIOException("Wrong batch shape: {} expected 3 dimensions (n, n_channels, n_samples_per_channel).".format(batch.shape)) 739 # - shape of images in batch is fine 740 if batch.shape[2] != self.channels: 741 raise self.AudioIOException("Wrong audio channels in batch: {} expected {} {}.".format(batch.shape[2], self.channels, batch.shape)) 742 743 # array must have a shape (n * n_channels * n_samples_per_channel) before writing them to pipe 744 # reshape it it to (n * n_channels * n_samples_per_channel) if plannar is False 745 if not self.plannar: 746 # goes from (n, n_channels, n_samples_per_channel) to (n * n_channels * n_samples_per_channel) 747 batch = batch.transpose(0, 2, 1) # first go to (n, n_samples_per_channel, n_channels) 748 batch = batch.reshape(-1) # then to 1D array (n * n_channels * n_samples_per_channel) 749 750 # garantee to have a C continuous array 751 if not batch.flags['C_CONTIGUOUS']: 752 batch = np.ascontiguousarray(batch) 753 754 # write frame 755 buffer = batch.tobytes() 756 if self.pipe.stdin.write( buffer ) < len(buffer): 757 # say to gc that this buffer is no longer needed 758 del buffer 759 raise self.AudioIOException("Error writing batch to '{}'.".format(self.filename)) 760 761 # increase frame_counter 762 self.frame_counter.frame_count += int(batch.shape[0]/self.channels) # int conversion is mandatory to avoid confusion with time as float 763 764 # say to gc that this buffer is no longer needed 765 del buffer 766 767 return True 768 769 def iter_frames(self, with_timestamps = False): 770 """ 771 Method to iterate on audio frames using AudioIO obj. 772 for audio_frame in obj.iter_frames(): 773 .... 774 775 Parameters 776 ---------- 777 with_timestamps: bool optional (default False) 778 If set to True, the method returns a FrameContainer object with the batch and an array containing the associated timestamps to frames 779 780 Returns 781 ---------- 782 nparray or FrameContainer 783 A batch of images of shape () 784 """ 785 786 try: 787 if self.mode == PipeMode.READ_MODE: 788 while self.isOpened(): 789 frame = self.readFrame(with_timestamps) 790 if frame is not None: 791 yield frame 792 finally: 793 self.close() 794 795 def iter_batches(self, batch_size : int, with_timestamps = False ): 796 """ 797 Method to iterate on batch ofaudio frames using VideoIO obj. 798 for audio_batch in obj.iter_batches(): 799 .... 800 801 Parameters 802 ---------- 803 with_timestamps: bool optional (default False) 804 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 805 """ 806 try: 807 if self.mode == PipeMode.READ_MODE: 808 while self.isOpened(): 809 batch = self.readBatch(batch_size, with_timestamps) 810 if batch is not None: 811 yield batch 812 finally: 813 self.close() 814 815 # function aliases to be compliant with original C++ version 816 getAudioTimeInSec = get_time_in_sec 817 getAudioParams = get_params 818 get_audio_time_in_sec = get_time_in_sec 819 get_audio_params = get_params 820 isOpened = is_opened 821 readFrame = read_frame 822 readBatch = read_batch 823 writeFrame = write_frame 824 writeBatch = write_batch
222 def __init__(self, *, logLevel = 16, debugMode = False): 223 """ 224 Create a VideoIO object giving ffmpeg/ffrobe loglevel and defining debug mode 225 226 Parameters 227 ---------- 228 log_level: int (default 16) 229 Log level to pass to the underlying ffmpeg/ffprobe command. 230 231 debugMode: bool (default (False) 232 Show debug info. while processing video 233 """ 234 235 self.mode = PipeMode.UNK_MODE 236 self.logLevel = logLevel 237 self.debugMode = debugMode 238 239 # Call init() method 240 self.init()
Create a VideoIO object giving ffmpeg/ffrobe loglevel and defining debug mode
Parameters
log_level: int (default 16) Log level to pass to the underlying ffmpeg/ffprobe command.
debugMode: bool (default (False) Show debug info. while processing video
50 @classmethod 51 def reader(cls, filename, **kwargs): 52 """ 53 Create and open an AudioIO object in reader mode 54 55 See ``AudioIO.open`` for the full list of accepted parameters. 56 """ 57 reader = cls() 58 reader.open(filename, **kwargs) 59 return reader
Create and open an AudioIO object in reader mode
See AudioIO.open for the full list of accepted parameters.
61 @classmethod 62 def writer(cls, filename, sample_rate, channels, **kwargs): 63 """ 64 Create and open an AudioIO object in writer mode 65 66 See ``AudioIO.create`` for the full list of accepted parameters. 67 """ 68 writer = cls() 69 writer.create(filename, sample_rate, channels, **kwargs) 70 return writer
Create and open an AudioIO object in writer mode
See AudioIO.create for the full list of accepted parameters.
88 @staticmethod 89 def get_time_in_sec(filename, *, debug=False, logLevel=16): 90 """ 91 Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals). 92 93 Parameters 94 ---------- 95 filename : str or path. 96 Raw audio waveform as a 1D array. 97 98 debug : bool (default False). 99 Show debug info. 100 101 log_level: int (default 16). 102 Log level to pass to the underlying ffmpeg/ffprobe command. 103 104 Returns 105 ---------- 106 float 107 Length in seconds of video file (including milliseconds as decimal part with 3 decimals) 108 """ 109 110 cmd = [AudioIO.paramProgram, # ffprobe 111 '-hide_banner', 112 '-loglevel', str(logLevel), 113 '-show_entries', 'format=duration', 114 '-of', 'default=noprint_wrappers=1:nokey=1', 115 filename 116 ] 117 118 if debug == True: 119 print(' '.join(cmd)) 120 121 # call ffprobe and get params in one single line 122 lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg 123 output = lpipe.stdout.readlines() 124 lpipe.terminate() 125 # transform Bytes output to one single string 126 output = ''.join( [element.decode('utf-8') for element in output]) 127 128 try: 129 return float(output) 130 except (ValueError, TypeError): 131 return None
Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).
Parameters
filename : str or path. Raw audio waveform as a 1D array.
debug : bool (default False). Show debug info.
log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.
Returns
float Length in seconds of video file (including milliseconds as decimal part with 3 decimals)
133 @staticmethod 134 def get_params(filename, *, debug=False, logLevel=16): 135 """ 136 Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds. 137 138 Parameters 139 ---------- 140 filename : str or path. 141 Raw audio waveform as a 1D array. 142 143 debug : bool (default (False). 144 Show debug info. 145 146 log_level: int (default 16). 147 Log level to pass to the underlying ffmpeg/ffprobe command. 148 149 Returns 150 ---------- 151 tuple 152 Tuple containing (channels,sample_rate) of the file 153 """ 154 cmd = [AudioIO.paramProgram, # ffprobe 155 '-hide_banner', 156 '-loglevel', str(logLevel), 157 '-show_entries', 'stream=channels,sample_rate', 158 filename 159 ] 160 161 if debug == True: 162 print(' '.join(cmd)) 163 164 # call ffprobe and get params in one single line 165 lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg 166 output = lpipe.stdout.readlines() 167 lpipe.terminate() 168 # transform Bytes output to one single string 169 output = ''.join( [element.decode('utf-8') for element in output]) 170 171 pattern_sample_rate = r'sample_rate=(\d+)' 172 pattern_channels = r'channels=(\d+)' 173 174 # Search for values in the ffprobe output 175 match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE) 176 match_channels = re.search(pattern_channels, output, flags=re.MULTILINE) 177 178 # Extraction des valeurs 179 if match_sample_rate: 180 sample_rate = int(match_sample_rate.group(1)) 181 else: 182 raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'") 183 184 if match_channels: 185 channels = int(match_channels.group(1)) 186 else: 187 raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'") 188 189 return (channels,sample_rate) 190 191 # Attributes 192 mode: PipeMode 193 """ Pipemode of the current object (default PipeMode.UNK_MODE)""" 194 195 loglevel: int 196 """ loglevel of the underlying ffmpeg backend for this object (default 16)""" 197 198 debugModel: bool 199 """ debutMode flag for this object (print debut info, default False)""" 200 201 channels: int 202 """ Number of channels of images (default -1) """ 203 204 sample_rate: int 205 """ sample_rate of images (default -1) """ 206 207 plannar: bool 208 """ Read/write data as plannar, i.e. not interleaved (default True) """ 209 210 pipe: sp.Popen 211 """ pipe object to ffmpeg/ffprobe (default None)""" 212 213 frame_size: int 214 """ Weight in bytes of one image (default -1)""" 215 216 filename: str 217 """ Filename of the file (default None)""" 218 219 frame_counter: FrameCounter 220 """ `Framecounter` object to count ellapsed time (default None)"""
Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.
Parameters
filename : str or path. Raw audio waveform as a 1D array.
debug : bool (default (False). Show debug info.
log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.
Returns
tuple Tuple containing (channels,sample_rate) of the file
242 def init(self): 243 """ 244 Init or reinit a VideoIO object. 245 """ 246 self.channels = -1 247 self.sample_rate = -1 248 self.plannar = True 249 self.pipe = None 250 self.frame_size = -1 251 self.filename = None 252 self.frame_counter = None
Init or reinit a VideoIO object.
272 def get_elapsed_time_as_str(self) -> str: 273 """ 274 Method to get elapsed time (float value represented) as str. 275 276 Returns 277 ---------- 278 str or None 279 Elapsed time (float value) as str, "15.500" for instance for 15 secondes and 500 milliseconds 280 None if no frame counter are available. 281 """ 282 if self.frame_counter is None: 283 return None 284 return self.frame_counter.get_elapsed_time_as_str()
Method to get elapsed time (float value represented) as str.
Returns
str or None Elapsed time (float value) as str, "15.500" for instance for 15 secondes and 500 milliseconds None if no frame counter are available.
286 def get_formated_elapsed_time_as_str(self,show_ms=True) -> str: 287 """ 288 Method to get elapsed time (hour format) as str. 289 290 Returns 291 ---------- 292 str or None 293 Elapsed time (float value) as str, "00:00:15.500" for instance for 15 secondes and 500 milliseconds 294 None if no frame counter are available. 295 """ 296 if self.frame_counter is None: 297 return None 298 return self.frame_counter.get_formated_elapsed_time_as_str()
Method to get elapsed time (hour format) as str.
Returns
str or None Elapsed time (float value) as str, "00:00:15.500" for instance for 15 secondes and 500 milliseconds None if no frame counter are available.
300 def get_elapsed_time(self) -> float: 301 """ 302 Method to get elapsed time as float value rounded to 3 decimals. 303 304 Returns 305 ---------- 306 float or None 307 Elapsed time (float value) as str, 15.500 for instance for 15 secondes and 500 milliseconds 308 None if no frame counter are available. 309 """ 310 if self.frame_counter is None: 311 return None 312 return self.frame_counter.get_elapsed_time()
Method to get elapsed time as float value rounded to 3 decimals.
Returns
float or None Elapsed time (float value) as str, 15.500 for instance for 15 secondes and 500 milliseconds None if no frame counter are available.
314 def is_opened(self) -> bool: 315 """ 316 Method to get status of the underlying pipe to ffmpeg. 317 318 Returns 319 ---------- 320 bool 321 True if pipe is opened (reading or writing mode), False if not. 322 """ 323 # is the pip opened? 324 if self.pipe is not None and self.pipe.poll() is None: 325 return True 326 327 return False
Method to get status of the underlying pipe to ffmpeg.
Returns
bool True if pipe is opened (reading or writing mode), False if not.
329 def close(self): 330 """ 331 Method to close current pipe to ffmpeg (if any). Ffmpeg/ffprobe will be terminated. Object can be reused using open or create methods. 332 """ 333 if self.pipe is not None: 334 if self.mode == PipeMode.WRITE_MODE: 335 # killing will make ffmpeg not finish properly the job, close the pipe 336 # to let it know that no more data are comming 337 self.pipe.stdin.close() 338 else: # self.mode == PipeMode.READ_MODE 339 # in read mode, no need to be nice, send SIGTERM on Linux,/Kill it on windows 340 self.pipe.kill() 341 342 # wait for subprocess to end 343 self.pipe.wait() 344 345 # reinit object for later use 346 self.init()
Method to close current pipe to ffmpeg (if any). Ffmpeg/ffprobe will be terminated. Object can be reused using open or create methods.
348 def create( self, filename, sample_rate, channels, *, writeOverExistingFile = False, 349 outputEncoding = AudioFormat.PCM32LE, encodingParams = None, plannar = True ): 350 """ 351 Method to create a audio file using parametrized access through ffmpeg. Importante note: calling create 352 on a AudioIO will close any former open video. 353 354 Parameters 355 ---------- 356 filename: str or path 357 filename of path to the file (mp4, avi, ...) 358 359 sample_rate: int 360 If defined as a positive value, sample_rates of the output file will be set to this value. 361 362 channels: int 363 If defined as a positive value, number of channels of output file will be set to this value. 364 365 fps: 366 If defined as a positive value, fps of input video will be set to this value. 367 368 outputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) 369 Define audio format for samples. Possible value is AudioFormat.PCM32LE. 370 371 encodingParams: str optional (default None) 372 Parameter to pass to ffmpeg to encode video like audio filters. 373 374 plannar : bool optionnal (default True) 375 Input data to write are grouped by channel if True, interleaved instead. 376 377 Returns 378 ---------- 379 bool 380 Was the creation successfull 381 """ 382 383 # Close if already opened 384 self.close() 385 386 # Set geometry/fps of the video stream from params 387 self.sample_rate = int(sample_rate) 388 self.channels = int(channels) 389 self.plannar = plannar 390 391 # Check params 392 if self.sample_rate <= 0 or self.channels <= 0: 393 raise self.AudioIOException("Bad parameters: sample_rate={}, channels={}".format(self.sample_rate,self.channels)) 394 395 # To write audio, we do not need to know in advance frame size, we will write x values of n bytes 396 self.frame_size = None 397 398 # Video params are set, open the video 399 cmd = [self.audioProgram] # ffmpeg 400 401 if writeOverExistingFile == True: 402 cmd.extend(['-y']) 403 404 cmd.extend(['-hide_banner', 405 '-nostats', 406 '-loglevel', str(self.logLevel), 407 '-f', 'f32le', '-acodec', outputEncoding.value, # input expected coding 408 '-ar', f"{self.sample_rate}", 409 '-ac', f"{self.channels}", 410 '-i', '-']) 411 412 if encodingParams is not None: 413 cmd.extend(encodingParams.split()) 414 415 # remove video 416 cmd.extend( ['-vn', filename ] ) 417 418 if self.debugMode == True: 419 print( ' '.join(cmd), file=sys.stderr ) 420 421 # store filename and set mode 422 self.filename = filename 423 self.mode = PipeMode.WRITE_MODE 424 425 # call ffmpeg in write mode 426 try: 427 self.pipe = sp.Popen(cmd, stdin=sp.PIPE) 428 self.frame_counter = FrameCounter(self.sample_rate) 429 except Exception as e: 430 # if pipe failed, reinit object and raise exception 431 self.init() 432 raise 433 434 return True
Method to create a audio file using parametrized access through ffmpeg. Importante note: calling create on a AudioIO will close any former open video.
Parameters
filename: str or path filename of path to the file (mp4, avi, ...)
sample_rate: int If defined as a positive value, sample_rates of the output file will be set to this value.
channels: int If defined as a positive value, number of channels of output file will be set to this value.
fps: If defined as a positive value, fps of input video will be set to this value.
outputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) Define audio format for samples. Possible value is AudioFormat.PCM32LE.
encodingParams: str optional (default None) Parameter to pass to ffmpeg to encode video like audio filters.
plannar : bool optionnal (default True) Input data to write are grouped by channel if True, interleaved instead.
Returns
bool Was the creation successfull
436 def open( self, filename, *, sample_rate = -1, channels = -1, inputEncoding = AudioFormat.PCM32LE, 437 decodingParams = None, frame_size = 1.0, plannar = True, start_time = 0.0 ): 438 """ 439 Method to read (video file containing) audio using parametrized access through ffmpeg. Importante note: calling open 440 on a AudioIO will close any former open file. 441 442 Parameters 443 ---------- 444 filename: str or path 445 filename of path to the file (mp4, avi, ...) 446 447 sample_rate: int optional (default -1) 448 If defined as a positive value, sample rate of the input audio will be converted to this value. 449 450 channels: int optional (default -1) 451 If defined as a positive value, number of channels of the input audio will converted to this value. 452 453 inputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) 454 Define audio format for samples. Possible value is AudioFormat.PCM32LE. 455 456 decodingParams: str optional (default None) 457 Parameter to pass to ffmpeg to decode video like audio filters. 458 459 plannar: bool optionnal (default True) 460 Group audio samples per channel if True. Else, samples are interleaved. 461 462 frame_size: int or float (default 1.0) 463 If frame_size is an int, it is the number of expected samples in each frame, for instance 8000 for 8000 samples. 464 if frame_size is a float, it is considered as a time in seconds for each audio frame, for instance 1.0 for 1 second, 0.010 for 10 ms. 465 Number of samples in this case is computed using frame_size and sample_rate as int(frame_size * sample_rate) 466 467 start_time: float optional (default 0.0) 468 Define the reading start time. If not set, reading at beginning of the file. 469 470 Returns 471 ---------- 472 bool 473 Was the opening successfull 474 """ 475 476 # Close if already opened 477 self.close() 478 479 # Force conversion of parameters 480 channels = int(channels) 481 sample_rate = float(sample_rate) 482 483 self.plannar = plannar 484 485 # get parameters from file if needed: 486 if sample_rate <= 0 or channels <= 0: 487 self.channels, self.sample_rate = self.getAudioParams(filename) 488 489 # check if parameters ask to overide video parameters 490 if channels > 0: 491 self.channels = channels 492 if sample_rate > 0: 493 self.sample_rate = sample_rate 494 495 # check parameters 496 497 if isinstance(frame_size,float): 498 # time in seconds 499 self.frame_size = int(frame_size*self.sample_rate) 500 elif isinstance(frame_size,int): 501 # number of samples 502 self.frame_size = frame_size 503 else: 504 # to do 505 pass 506 507 # Video params are set, open the video 508 cmd = [self.audioProgram, # ffmpeg 509 '-hide_banner', 510 '-nostats', 511 '-loglevel', str(self.logLevel)] 512 513 if decodingParams is not None: 514 cmd.extend([decodingParams.split()]) 515 516 if start_time < 0.0: 517 pass 518 elif start_time > 0.0: 519 cmd.extend(["-ss", f"{start_time}"]) 520 521 cmd.extend( ['-i', filename, 522 '-f', 'f32le', '-acodec', inputEncoding.value, # input expected coding 523 '-ar', f"{self.sample_rate}", 524 '-ac', f"{self.channels}", 525 '-' # output to stdout 526 ] 527 ) 528 529 if self.debugMode == True: 530 print( ' '.join(cmd) ) 531 532 # store filename and set mode to READ_MODE 533 self.filename = filename 534 self.mode = PipeMode.READ_MODE 535 536 # call ffmpeg in read mode 537 try: 538 self.pipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg/ffprobe 539 self.frame_counter = FrameCounter(self.sample_rate) 540 if start_time > 0.0: 541 self.frame_counter += start_time # adding with float means adding time 542 except Exception as e: 543 # if pipe failed, reinit object and raise exception 544 self.init() 545 raise 546 547 return True
Method to read (video file containing) audio using parametrized access through ffmpeg. Importante note: calling open on a AudioIO will close any former open file.
Parameters
filename: str or path filename of path to the file (mp4, avi, ...)
sample_rate: int optional (default -1) If defined as a positive value, sample rate of the input audio will be converted to this value.
channels: int optional (default -1) If defined as a positive value, number of channels of the input audio will converted to this value.
inputEncoding: AudioFormat optional (default AudioFormat.PCM32LE) Define audio format for samples. Possible value is AudioFormat.PCM32LE.
decodingParams: str optional (default None) Parameter to pass to ffmpeg to decode video like audio filters.
plannar: bool optionnal (default True) Group audio samples per channel if True. Else, samples are interleaved.
frame_size: int or float (default 1.0) If frame_size is an int, it is the number of expected samples in each frame, for instance 8000 for 8000 samples. if frame_size is a float, it is considered as a time in seconds for each audio frame, for instance 1.0 for 1 second, 0.010 for 10 ms. Number of samples in this case is computed using frame_size and sample_rate as int(frame_size * sample_rate)
start_time: float optional (default 0.0) Define the reading start time. If not set, reading at beginning of the file.
Returns
bool Was the opening successfull
549 def read_frame(self, with_timestamps = False): 550 """ 551 Read next frame from the audio file 552 553 Parameters 554 ---------- 555 with_timestamps: bool optional (default False) 556 If set to True, the method returns a ``FrameContainer`` with the audio and an array containing the associated timestamp(s) 557 558 Returns 559 ---------- 560 nparray or FrameContainer 561 A frame of shape (self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A frame 562 of shape (self.channels*self.frame_size) with interleaved data if self.plannar is False. 563 if with_timestamps is True, the return object is a FrameContainer with the audio data in ``FrameContainer.data`` and 564 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element). 565 """ 566 567 if self.pipe is None: 568 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading a frame.".format(self.audioProgram)) 569 # - pipe is in write mode 570 if self.mode != PipeMode.READ_MODE: 571 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 572 573 if with_timestamps: 574 # get elapsed time in video, it is time of next frame(s) 575 current_elapsed_time = self.get_elapsed_time() 576 577 # read rgb image from pipe 578 toread = self.frame_size*4 579 buffer = self.pipe.stdout.read(toread) 580 if len(buffer) != toread: 581 # not considered as an error, no more frame, no exception 582 return None 583 584 # get numpy UINT8 array from buffer 585 audio = np.frombuffer(buffer, dtype = np.float32).reshape(self.frame_size, self.channels) 586 587 # make it plannar (or not) 588 if self.plannar: 589 #transpose it 590 audio = audio.T 591 592 # increase frame_counter 593 self.frame_counter.frame_count += (self.frame_size * self.channels) 594 595 # say to gc that this buffer is no longer needed 596 del buffer 597 598 if with_timestamps: 599 return FrameContainer(1, audio, self.frame_size/self.sample_rate, current_elapsed_time) 600 601 return audio
Read next frame from the audio file
Parameters
with_timestamps: bool optional (default False)
If set to True, the method returns a FrameContainer with the audio and an array containing the associated timestamp(s)
Returns
nparray or FrameContainer
A frame of shape (self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A frame
of shape (self.channels*self.frame_size) with interleaved data if self.plannar is False.
if with_timestamps is True, the return object is a FrameContainer with the audio data in FrameContainer.data and
the associated timestamp in FrameContainer.timestamps as an array (one element).
603 def read_batch(self, numberOfFrames, with_timestamps = False): 604 """ 605 Read next batch of audio from the file 606 607 Parameters 608 ---------- 609 number_of_frames: int 610 Number of desired images within the batch. The last batch from the file may have less images. 611 612 with_timestamps: bool optional (default False) 613 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 614 615 Returns 616 ---------- 617 nparray or FrameContainer 618 A batch of shape (n, self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A batch 619 of shape (n, self.channels*self.frame_size) with interleaved data if self.plannar is False. 620 if with_timestamps is True, the return object is a FrameContainer with the audio batch in ``FrameContainer.data`` and 621 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element for each audio frame). 622 """ 623 624 if self.pipe is None: 625 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading frames.".format(self.audioProgram)) 626 # - pipe is in write mode 627 if self.mode != PipeMode.READ_MODE: 628 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 629 630 if with_timestamps: 631 # get elapsed time in video, it is time of next frame(s) 632 current_elapsed_time = self.get_elapsed_time() 633 634 # try to read complete batch 635 toread = self.frame_size*4*self.channels*numberOfFrames 636 buffer = self.pipe.stdout.read(toread) 637 638 # check if we have at least 1 Frame 639 if len(buffer) < toread: 640 # not considered as an error, no more frame, no exception 641 return None 642 643 # compute actual number of Frames 644 actualNbFrames = len(buffer)//(self.frame_size*4*self.channels) 645 646 # get and reshape batch from buffer 647 batch = np.frombuffer(buffer, dtype = np.float32).reshape((actualNbFrames, self.frame_size, self.channels,)) 648 649 if self.plannar: 650 batch = batch.transpose(0, 2, 1) 651 652 # increase frame_counter 653 self.frame_counter.frame_count += (actualNbFrames * self.frame_size * self.channels) 654 655 # say to gc that this buffer is no longer needed 656 del buffer 657 658 if with_timestamps: 659 return FrameContainer( actualNbFrames, batch, self.frame_size/self.sample_rate, current_elapsed_time) 660 661 return batch
Read next batch of audio from the file
Parameters
number_of_frames: int Number of desired images within the batch. The last batch from the file may have less images.
with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
Returns
nparray or FrameContainer
A batch of shape (n, self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A batch
of shape (n, self.channels*self.frame_size) with interleaved data if self.plannar is False.
if with_timestamps is True, the return object is a FrameContainer with the audio batch in FrameContainer.data and
the associated timestamp in FrameContainer.timestamps as an array (one element for each audio frame).
663 def write_frame(self, audio) -> bool: 664 """ 665 Write an audio frame to the file 666 667 Parameters 668 ---------- 669 audio: nparray 670 The audio frame to write to the video file of shape (self.channels,nb_samples_per_channel) if plannar is True else (self.channels*nb_samples_per_channel). 671 672 Returns 673 ---------- 674 bool 675 Writing was successful or not. 676 """ 677 # Check params 678 # - pipe exists 679 if self.pipe is None: 680 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 681 # - pipe is in write mode 682 if self.mode != PipeMode.WRITE_MODE: 683 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 684 # - shape of image is fine, thus we have pixels for a full compatible frame 685 if audio.shape[0] != self.channels: 686 raise self.AudioIOException("Wong audio shape: {} expected ({},{}).".format(audio.shape,self.channels,self.frame_size)) 687 # - type of data is Float32 688 if audio.dtype != np.float32: 689 raise self.AudioIOException("Wong audio type: {} expected np.float32.".format(audio.dtype)) 690 691 # array must have a shape (channels, samples), reshape it it to (samples, channels) if plannar 692 if not self.plannar: 693 audio = audio.reshape(-1) 694 695 # print( audio.shape ) 696 697 # garantee to have a C continuous array 698 if not audio.flags['C_CONTIGUOUS']: 699 a = np.ascontiguousarray(a) 700 701 # write frame 702 buffer = audio.tobytes() 703 if self.pipe.stdin.write( buffer ) < len(buffer): 704 print( f"Error writing frame to {self.filename}" ) 705 return False 706 707 # increase frame_counter 708 self.frame_counter.frame_count += (self.frame_size * self.channels) 709 710 # say to gc that this buffer is no longer needed 711 del buffer 712 713 return True
Write an audio frame to the file
Parameters
audio: nparray The audio frame to write to the video file of shape (self.channels,nb_samples_per_channel) if plannar is True else (self.channels*nb_samples_per_channel).
Returns
bool Writing was successful or not.
715 def write_batch(self, batch): 716 """ 717 Write a batch of audio frame to the file 718 719 Parameters 720 ---------- 721 batch: nparray 722 The batch of audio frames to write to the video file of shape (n,self.channels,nb_samples_per_channel) if plannar is True else (n,self.channels*nb_samples_per_channel) of interleaved audio data. 723 724 Returns 725 ---------- 726 bool 727 Writing was successful or not. 728 """ 729 # Check params 730 # - pipe exists 731 if self.pipe is None: 732 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 733 # - pipe is in write mode 734 if self.mode != PipeMode.WRITE_MODE: 735 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 736 # batch is 3D (n, channels, nb samples) 737 if batch.ndim !=3: 738 raise self.AudioIOException("Wrong batch shape: {} expected 3 dimensions (n, n_channels, n_samples_per_channel).".format(batch.shape)) 739 # - shape of images in batch is fine 740 if batch.shape[2] != self.channels: 741 raise self.AudioIOException("Wrong audio channels in batch: {} expected {} {}.".format(batch.shape[2], self.channels, batch.shape)) 742 743 # array must have a shape (n * n_channels * n_samples_per_channel) before writing them to pipe 744 # reshape it it to (n * n_channels * n_samples_per_channel) if plannar is False 745 if not self.plannar: 746 # goes from (n, n_channels, n_samples_per_channel) to (n * n_channels * n_samples_per_channel) 747 batch = batch.transpose(0, 2, 1) # first go to (n, n_samples_per_channel, n_channels) 748 batch = batch.reshape(-1) # then to 1D array (n * n_channels * n_samples_per_channel) 749 750 # garantee to have a C continuous array 751 if not batch.flags['C_CONTIGUOUS']: 752 batch = np.ascontiguousarray(batch) 753 754 # write frame 755 buffer = batch.tobytes() 756 if self.pipe.stdin.write( buffer ) < len(buffer): 757 # say to gc that this buffer is no longer needed 758 del buffer 759 raise self.AudioIOException("Error writing batch to '{}'.".format(self.filename)) 760 761 # increase frame_counter 762 self.frame_counter.frame_count += int(batch.shape[0]/self.channels) # int conversion is mandatory to avoid confusion with time as float 763 764 # say to gc that this buffer is no longer needed 765 del buffer 766 767 return True
Write a batch of audio frame to the file
Parameters
batch: nparray The batch of audio frames to write to the video file of shape (n,self.channels,nb_samples_per_channel) if plannar is True else (n,self.channels*nb_samples_per_channel) of interleaved audio data.
Returns
bool Writing was successful or not.
769 def iter_frames(self, with_timestamps = False): 770 """ 771 Method to iterate on audio frames using AudioIO obj. 772 for audio_frame in obj.iter_frames(): 773 .... 774 775 Parameters 776 ---------- 777 with_timestamps: bool optional (default False) 778 If set to True, the method returns a FrameContainer object with the batch and an array containing the associated timestamps to frames 779 780 Returns 781 ---------- 782 nparray or FrameContainer 783 A batch of images of shape () 784 """ 785 786 try: 787 if self.mode == PipeMode.READ_MODE: 788 while self.isOpened(): 789 frame = self.readFrame(with_timestamps) 790 if frame is not None: 791 yield frame 792 finally: 793 self.close()
Method to iterate on audio frames using AudioIO obj. for audio_frame in obj.iter_frames(): ....
Parameters
with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer object with the batch and an array containing the associated timestamps to frames
Returns
nparray or FrameContainer A batch of images of shape ()
795 def iter_batches(self, batch_size : int, with_timestamps = False ): 796 """ 797 Method to iterate on batch ofaudio frames using VideoIO obj. 798 for audio_batch in obj.iter_batches(): 799 .... 800 801 Parameters 802 ---------- 803 with_timestamps: bool optional (default False) 804 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 805 """ 806 try: 807 if self.mode == PipeMode.READ_MODE: 808 while self.isOpened(): 809 batch = self.readBatch(batch_size, with_timestamps) 810 if batch is not None: 811 yield batch 812 finally: 813 self.close()
Method to iterate on batch ofaudio frames using VideoIO obj. for audio_batch in obj.iter_batches(): ....
Parameters
with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
88 @staticmethod 89 def get_time_in_sec(filename, *, debug=False, logLevel=16): 90 """ 91 Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals). 92 93 Parameters 94 ---------- 95 filename : str or path. 96 Raw audio waveform as a 1D array. 97 98 debug : bool (default False). 99 Show debug info. 100 101 log_level: int (default 16). 102 Log level to pass to the underlying ffmpeg/ffprobe command. 103 104 Returns 105 ---------- 106 float 107 Length in seconds of video file (including milliseconds as decimal part with 3 decimals) 108 """ 109 110 cmd = [AudioIO.paramProgram, # ffprobe 111 '-hide_banner', 112 '-loglevel', str(logLevel), 113 '-show_entries', 'format=duration', 114 '-of', 'default=noprint_wrappers=1:nokey=1', 115 filename 116 ] 117 118 if debug == True: 119 print(' '.join(cmd)) 120 121 # call ffprobe and get params in one single line 122 lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg 123 output = lpipe.stdout.readlines() 124 lpipe.terminate() 125 # transform Bytes output to one single string 126 output = ''.join( [element.decode('utf-8') for element in output]) 127 128 try: 129 return float(output) 130 except (ValueError, TypeError): 131 return None
Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).
Parameters
filename : str or path. Raw audio waveform as a 1D array.
debug : bool (default False). Show debug info.
log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.
Returns
float Length in seconds of video file (including milliseconds as decimal part with 3 decimals)
133 @staticmethod 134 def get_params(filename, *, debug=False, logLevel=16): 135 """ 136 Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds. 137 138 Parameters 139 ---------- 140 filename : str or path. 141 Raw audio waveform as a 1D array. 142 143 debug : bool (default (False). 144 Show debug info. 145 146 log_level: int (default 16). 147 Log level to pass to the underlying ffmpeg/ffprobe command. 148 149 Returns 150 ---------- 151 tuple 152 Tuple containing (channels,sample_rate) of the file 153 """ 154 cmd = [AudioIO.paramProgram, # ffprobe 155 '-hide_banner', 156 '-loglevel', str(logLevel), 157 '-show_entries', 'stream=channels,sample_rate', 158 filename 159 ] 160 161 if debug == True: 162 print(' '.join(cmd)) 163 164 # call ffprobe and get params in one single line 165 lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg 166 output = lpipe.stdout.readlines() 167 lpipe.terminate() 168 # transform Bytes output to one single string 169 output = ''.join( [element.decode('utf-8') for element in output]) 170 171 pattern_sample_rate = r'sample_rate=(\d+)' 172 pattern_channels = r'channels=(\d+)' 173 174 # Search for values in the ffprobe output 175 match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE) 176 match_channels = re.search(pattern_channels, output, flags=re.MULTILINE) 177 178 # Extraction des valeurs 179 if match_sample_rate: 180 sample_rate = int(match_sample_rate.group(1)) 181 else: 182 raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'") 183 184 if match_channels: 185 channels = int(match_channels.group(1)) 186 else: 187 raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'") 188 189 return (channels,sample_rate) 190 191 # Attributes 192 mode: PipeMode 193 """ Pipemode of the current object (default PipeMode.UNK_MODE)""" 194 195 loglevel: int 196 """ loglevel of the underlying ffmpeg backend for this object (default 16)""" 197 198 debugModel: bool 199 """ debutMode flag for this object (print debut info, default False)""" 200 201 channels: int 202 """ Number of channels of images (default -1) """ 203 204 sample_rate: int 205 """ sample_rate of images (default -1) """ 206 207 plannar: bool 208 """ Read/write data as plannar, i.e. not interleaved (default True) """ 209 210 pipe: sp.Popen 211 """ pipe object to ffmpeg/ffprobe (default None)""" 212 213 frame_size: int 214 """ Weight in bytes of one image (default -1)""" 215 216 filename: str 217 """ Filename of the file (default None)""" 218 219 frame_counter: FrameCounter 220 """ `Framecounter` object to count ellapsed time (default None)"""
Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.
Parameters
filename : str or path. Raw audio waveform as a 1D array.
debug : bool (default (False). Show debug info.
log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.
Returns
tuple Tuple containing (channels,sample_rate) of the file
88 @staticmethod 89 def get_time_in_sec(filename, *, debug=False, logLevel=16): 90 """ 91 Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals). 92 93 Parameters 94 ---------- 95 filename : str or path. 96 Raw audio waveform as a 1D array. 97 98 debug : bool (default False). 99 Show debug info. 100 101 log_level: int (default 16). 102 Log level to pass to the underlying ffmpeg/ffprobe command. 103 104 Returns 105 ---------- 106 float 107 Length in seconds of video file (including milliseconds as decimal part with 3 decimals) 108 """ 109 110 cmd = [AudioIO.paramProgram, # ffprobe 111 '-hide_banner', 112 '-loglevel', str(logLevel), 113 '-show_entries', 'format=duration', 114 '-of', 'default=noprint_wrappers=1:nokey=1', 115 filename 116 ] 117 118 if debug == True: 119 print(' '.join(cmd)) 120 121 # call ffprobe and get params in one single line 122 lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg 123 output = lpipe.stdout.readlines() 124 lpipe.terminate() 125 # transform Bytes output to one single string 126 output = ''.join( [element.decode('utf-8') for element in output]) 127 128 try: 129 return float(output) 130 except (ValueError, TypeError): 131 return None
Static method to get length of an audio file (or video file containing audio) in seconds including milliseconds as decimal part (3 decimals).
Parameters
filename : str or path. Raw audio waveform as a 1D array.
debug : bool (default False). Show debug info.
log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.
Returns
float Length in seconds of video file (including milliseconds as decimal part with 3 decimals)
133 @staticmethod 134 def get_params(filename, *, debug=False, logLevel=16): 135 """ 136 Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds. 137 138 Parameters 139 ---------- 140 filename : str or path. 141 Raw audio waveform as a 1D array. 142 143 debug : bool (default (False). 144 Show debug info. 145 146 log_level: int (default 16). 147 Log level to pass to the underlying ffmpeg/ffprobe command. 148 149 Returns 150 ---------- 151 tuple 152 Tuple containing (channels,sample_rate) of the file 153 """ 154 cmd = [AudioIO.paramProgram, # ffprobe 155 '-hide_banner', 156 '-loglevel', str(logLevel), 157 '-show_entries', 'stream=channels,sample_rate', 158 filename 159 ] 160 161 if debug == True: 162 print(' '.join(cmd)) 163 164 # call ffprobe and get params in one single line 165 lpipe = sp.Popen(cmd, stdout=sp.PIPE, stdin=sp.PIPE) # stdin=sp.PIPE to prevent manipulation of shell echo mode by ffmpeg 166 output = lpipe.stdout.readlines() 167 lpipe.terminate() 168 # transform Bytes output to one single string 169 output = ''.join( [element.decode('utf-8') for element in output]) 170 171 pattern_sample_rate = r'sample_rate=(\d+)' 172 pattern_channels = r'channels=(\d+)' 173 174 # Search for values in the ffprobe output 175 match_sample_rate = re.search(pattern_sample_rate, output, flags=re.MULTILINE) 176 match_channels = re.search(pattern_channels, output, flags=re.MULTILINE) 177 178 # Extraction des valeurs 179 if match_sample_rate: 180 sample_rate = int(match_sample_rate.group(1)) 181 else: 182 raise AudioIO.AudioIOException("Unable to get audio sample_rate of '" + str(filename) + "'") 183 184 if match_channels: 185 channels = int(match_channels.group(1)) 186 else: 187 raise AudioIO.AudioIOException("Unable to get audio channels of '" + str(filename) + "'") 188 189 return (channels,sample_rate) 190 191 # Attributes 192 mode: PipeMode 193 """ Pipemode of the current object (default PipeMode.UNK_MODE)""" 194 195 loglevel: int 196 """ loglevel of the underlying ffmpeg backend for this object (default 16)""" 197 198 debugModel: bool 199 """ debutMode flag for this object (print debut info, default False)""" 200 201 channels: int 202 """ Number of channels of images (default -1) """ 203 204 sample_rate: int 205 """ sample_rate of images (default -1) """ 206 207 plannar: bool 208 """ Read/write data as plannar, i.e. not interleaved (default True) """ 209 210 pipe: sp.Popen 211 """ pipe object to ffmpeg/ffprobe (default None)""" 212 213 frame_size: int 214 """ Weight in bytes of one image (default -1)""" 215 216 filename: str 217 """ Filename of the file (default None)""" 218 219 frame_counter: FrameCounter 220 """ `Framecounter` object to count ellapsed time (default None)"""
Static method to get params (channels,sample_rate) of a (video containing) audio file in seconds.
Parameters
filename : str or path. Raw audio waveform as a 1D array.
debug : bool (default (False). Show debug info.
log_level: int (default 16). Log level to pass to the underlying ffmpeg/ffprobe command.
Returns
tuple Tuple containing (channels,sample_rate) of the file
314 def is_opened(self) -> bool: 315 """ 316 Method to get status of the underlying pipe to ffmpeg. 317 318 Returns 319 ---------- 320 bool 321 True if pipe is opened (reading or writing mode), False if not. 322 """ 323 # is the pip opened? 324 if self.pipe is not None and self.pipe.poll() is None: 325 return True 326 327 return False
Method to get status of the underlying pipe to ffmpeg.
Returns
bool True if pipe is opened (reading or writing mode), False if not.
549 def read_frame(self, with_timestamps = False): 550 """ 551 Read next frame from the audio file 552 553 Parameters 554 ---------- 555 with_timestamps: bool optional (default False) 556 If set to True, the method returns a ``FrameContainer`` with the audio and an array containing the associated timestamp(s) 557 558 Returns 559 ---------- 560 nparray or FrameContainer 561 A frame of shape (self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A frame 562 of shape (self.channels*self.frame_size) with interleaved data if self.plannar is False. 563 if with_timestamps is True, the return object is a FrameContainer with the audio data in ``FrameContainer.data`` and 564 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element). 565 """ 566 567 if self.pipe is None: 568 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading a frame.".format(self.audioProgram)) 569 # - pipe is in write mode 570 if self.mode != PipeMode.READ_MODE: 571 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 572 573 if with_timestamps: 574 # get elapsed time in video, it is time of next frame(s) 575 current_elapsed_time = self.get_elapsed_time() 576 577 # read rgb image from pipe 578 toread = self.frame_size*4 579 buffer = self.pipe.stdout.read(toread) 580 if len(buffer) != toread: 581 # not considered as an error, no more frame, no exception 582 return None 583 584 # get numpy UINT8 array from buffer 585 audio = np.frombuffer(buffer, dtype = np.float32).reshape(self.frame_size, self.channels) 586 587 # make it plannar (or not) 588 if self.plannar: 589 #transpose it 590 audio = audio.T 591 592 # increase frame_counter 593 self.frame_counter.frame_count += (self.frame_size * self.channels) 594 595 # say to gc that this buffer is no longer needed 596 del buffer 597 598 if with_timestamps: 599 return FrameContainer(1, audio, self.frame_size/self.sample_rate, current_elapsed_time) 600 601 return audio
Read next frame from the audio file
Parameters
with_timestamps: bool optional (default False)
If set to True, the method returns a FrameContainer with the audio and an array containing the associated timestamp(s)
Returns
nparray or FrameContainer
A frame of shape (self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A frame
of shape (self.channels*self.frame_size) with interleaved data if self.plannar is False.
if with_timestamps is True, the return object is a FrameContainer with the audio data in FrameContainer.data and
the associated timestamp in FrameContainer.timestamps as an array (one element).
603 def read_batch(self, numberOfFrames, with_timestamps = False): 604 """ 605 Read next batch of audio from the file 606 607 Parameters 608 ---------- 609 number_of_frames: int 610 Number of desired images within the batch. The last batch from the file may have less images. 611 612 with_timestamps: bool optional (default False) 613 If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames 614 615 Returns 616 ---------- 617 nparray or FrameContainer 618 A batch of shape (n, self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A batch 619 of shape (n, self.channels*self.frame_size) with interleaved data if self.plannar is False. 620 if with_timestamps is True, the return object is a FrameContainer with the audio batch in ``FrameContainer.data`` and 621 the associated timestamp in ``FrameContainer.timestamps`` as an array (one element for each audio frame). 622 """ 623 624 if self.pipe is None: 625 raise self.AudioIOException("No pipe opened to {}. Call open(...) before reading frames.".format(self.audioProgram)) 626 # - pipe is in write mode 627 if self.mode != PipeMode.READ_MODE: 628 raise self.AudioIOException("Pipe to {} for '{}' not opened in read mode.".format(self.audioProgram, self.filename)) 629 630 if with_timestamps: 631 # get elapsed time in video, it is time of next frame(s) 632 current_elapsed_time = self.get_elapsed_time() 633 634 # try to read complete batch 635 toread = self.frame_size*4*self.channels*numberOfFrames 636 buffer = self.pipe.stdout.read(toread) 637 638 # check if we have at least 1 Frame 639 if len(buffer) < toread: 640 # not considered as an error, no more frame, no exception 641 return None 642 643 # compute actual number of Frames 644 actualNbFrames = len(buffer)//(self.frame_size*4*self.channels) 645 646 # get and reshape batch from buffer 647 batch = np.frombuffer(buffer, dtype = np.float32).reshape((actualNbFrames, self.frame_size, self.channels,)) 648 649 if self.plannar: 650 batch = batch.transpose(0, 2, 1) 651 652 # increase frame_counter 653 self.frame_counter.frame_count += (actualNbFrames * self.frame_size * self.channels) 654 655 # say to gc that this buffer is no longer needed 656 del buffer 657 658 if with_timestamps: 659 return FrameContainer( actualNbFrames, batch, self.frame_size/self.sample_rate, current_elapsed_time) 660 661 return batch
Read next batch of audio from the file
Parameters
number_of_frames: int Number of desired images within the batch. The last batch from the file may have less images.
with_timestamps: bool optional (default False) If set to True, the method returns a FrameContainer with the batch and the an array containing the associated timestamps to frames
Returns
nparray or FrameContainer
A batch of shape (n, self.channels,self.frame_size) as defined in the reader/open call if self.plannar is True. A batch
of shape (n, self.channels*self.frame_size) with interleaved data if self.plannar is False.
if with_timestamps is True, the return object is a FrameContainer with the audio batch in FrameContainer.data and
the associated timestamp in FrameContainer.timestamps as an array (one element for each audio frame).
663 def write_frame(self, audio) -> bool: 664 """ 665 Write an audio frame to the file 666 667 Parameters 668 ---------- 669 audio: nparray 670 The audio frame to write to the video file of shape (self.channels,nb_samples_per_channel) if plannar is True else (self.channels*nb_samples_per_channel). 671 672 Returns 673 ---------- 674 bool 675 Writing was successful or not. 676 """ 677 # Check params 678 # - pipe exists 679 if self.pipe is None: 680 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 681 # - pipe is in write mode 682 if self.mode != PipeMode.WRITE_MODE: 683 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 684 # - shape of image is fine, thus we have pixels for a full compatible frame 685 if audio.shape[0] != self.channels: 686 raise self.AudioIOException("Wong audio shape: {} expected ({},{}).".format(audio.shape,self.channels,self.frame_size)) 687 # - type of data is Float32 688 if audio.dtype != np.float32: 689 raise self.AudioIOException("Wong audio type: {} expected np.float32.".format(audio.dtype)) 690 691 # array must have a shape (channels, samples), reshape it it to (samples, channels) if plannar 692 if not self.plannar: 693 audio = audio.reshape(-1) 694 695 # print( audio.shape ) 696 697 # garantee to have a C continuous array 698 if not audio.flags['C_CONTIGUOUS']: 699 a = np.ascontiguousarray(a) 700 701 # write frame 702 buffer = audio.tobytes() 703 if self.pipe.stdin.write( buffer ) < len(buffer): 704 print( f"Error writing frame to {self.filename}" ) 705 return False 706 707 # increase frame_counter 708 self.frame_counter.frame_count += (self.frame_size * self.channels) 709 710 # say to gc that this buffer is no longer needed 711 del buffer 712 713 return True
Write an audio frame to the file
Parameters
audio: nparray The audio frame to write to the video file of shape (self.channels,nb_samples_per_channel) if plannar is True else (self.channels*nb_samples_per_channel).
Returns
bool Writing was successful or not.
715 def write_batch(self, batch): 716 """ 717 Write a batch of audio frame to the file 718 719 Parameters 720 ---------- 721 batch: nparray 722 The batch of audio frames to write to the video file of shape (n,self.channels,nb_samples_per_channel) if plannar is True else (n,self.channels*nb_samples_per_channel) of interleaved audio data. 723 724 Returns 725 ---------- 726 bool 727 Writing was successful or not. 728 """ 729 # Check params 730 # - pipe exists 731 if self.pipe is None: 732 raise self.AudioIOException("No pipe opened to {}. Call create(...) before writing frames.".format(self.audioProgram)) 733 # - pipe is in write mode 734 if self.mode != PipeMode.WRITE_MODE: 735 raise self.AudioIOException("Pipe to {} for '{}' not opened in write mode.".format(self.audioProgram, self.filename)) 736 # batch is 3D (n, channels, nb samples) 737 if batch.ndim !=3: 738 raise self.AudioIOException("Wrong batch shape: {} expected 3 dimensions (n, n_channels, n_samples_per_channel).".format(batch.shape)) 739 # - shape of images in batch is fine 740 if batch.shape[2] != self.channels: 741 raise self.AudioIOException("Wrong audio channels in batch: {} expected {} {}.".format(batch.shape[2], self.channels, batch.shape)) 742 743 # array must have a shape (n * n_channels * n_samples_per_channel) before writing them to pipe 744 # reshape it it to (n * n_channels * n_samples_per_channel) if plannar is False 745 if not self.plannar: 746 # goes from (n, n_channels, n_samples_per_channel) to (n * n_channels * n_samples_per_channel) 747 batch = batch.transpose(0, 2, 1) # first go to (n, n_samples_per_channel, n_channels) 748 batch = batch.reshape(-1) # then to 1D array (n * n_channels * n_samples_per_channel) 749 750 # garantee to have a C continuous array 751 if not batch.flags['C_CONTIGUOUS']: 752 batch = np.ascontiguousarray(batch) 753 754 # write frame 755 buffer = batch.tobytes() 756 if self.pipe.stdin.write( buffer ) < len(buffer): 757 # say to gc that this buffer is no longer needed 758 del buffer 759 raise self.AudioIOException("Error writing batch to '{}'.".format(self.filename)) 760 761 # increase frame_counter 762 self.frame_counter.frame_count += int(batch.shape[0]/self.channels) # int conversion is mandatory to avoid confusion with time as float 763 764 # say to gc that this buffer is no longer needed 765 del buffer 766 767 return True
Write a batch of audio frame to the file
Parameters
batch: nparray The batch of audio frames to write to the video file of shape (n,self.channels,nb_samples_per_channel) if plannar is True else (n,self.channels*nb_samples_per_channel) of interleaved audio data.
Returns
bool Writing was successful or not.
36 class AudioIOException(Exception): 37 """ 38 Dedicated exception class for AudioIO class. 39 """ 40 def __init__(self, message="Error while reading/writing video occurs"): 41 self.message = message 42 super().__init__(self.message)
Dedicated exception class for AudioIO class.
44 class AudioFormat(Enum): 45 """ 46 Enum class for supported input video type: 32-bit float is the only supported type for the moment. 47 """ 48 PCM32LE = 'pcm_f32le' # default format (unique mode for the moment)
Enum class for supported input video type: 32-bit float is the only supported type for the moment.