[docs]defextract_audio(video_path:str,audio_output_path:str)->str:""" Extracts audio from a video file and saves it as an MP3 file. :param video_path: Path to the video file. :param audio_output_path: Path to save the extracted audio file. :return: Confirmation message with the path to the saved audio file. """(ffmpeg.input(video_path).output(audio_output_path,format="mp3").run(quiet=True,overwrite_output=True))# type: ignorereturnf"Audio extracted and saved to {audio_output_path}."
[docs]deftranscribe_audio_with_timestamps(audio_path:str)->str:""" Transcribes the audio file with timestamps using the Whisper model. :param audio_path: Path to the audio file. :return: Transcription with timestamps. """model=whisper.load_model("base")# type: ignoreresult:Dict[str,Any]=model.transcribe(audio_path,task="transcribe",language="en",verbose=False)# type: ignoresegments:List[Dict[str,Any]]=result["segments"]transcription_with_timestamps=""forsegmentinsegments:start:float=segment["start"]end:float=segment["end"]text:str=segment["text"]transcription_with_timestamps+=f"[{start:.2f} - {end:.2f}] {text}\n"returntranscription_with_timestamps
[docs]defget_video_length(video_path:str)->str:""" Returns the length of the video in seconds. :param video_path: Path to the video file. :return: Duration of the video in seconds. """cap=cv2.VideoCapture(video_path)ifnotcap.isOpened():raiseIOError(f"Cannot open video file {video_path}")fps=cap.get(cv2.CAP_PROP_FPS)frame_count=cap.get(cv2.CAP_PROP_FRAME_COUNT)duration=frame_count/fpscap.release()returnf"The video is {duration:.2f} seconds long."
[docs]defsave_screenshot(video_path:str,timestamp:float,output_path:str)->None:""" Captures a screenshot at the specified timestamp and saves it to the output path. :param video_path: Path to the video file. :param timestamp: Timestamp in seconds. :param output_path: Path to save the screenshot. The file format is determined by the extension in the path. """cap=cv2.VideoCapture(video_path)ifnotcap.isOpened():raiseIOError(f"Cannot open video file {video_path}")fps=cap.get(cv2.CAP_PROP_FPS)frame_number=int(timestamp*fps)cap.set(cv2.CAP_PROP_POS_FRAMES,frame_number)ret,frame=cap.read()ifret:cv2.imwrite(output_path,frame)else:raiseIOError(f"Failed to capture frame at {timestamp:.2f}s")cap.release()
[docs]asyncdeftranscribe_video_screenshot(video_path:str,timestamp:float,model_client:ChatCompletionClient)->str:""" Transcribes the content of a video screenshot captured at the specified timestamp using OpenAI API. :param video_path: Path to the video file. :param timestamp: Timestamp in seconds. :param model_client: ChatCompletionClient instance. :return: Description of the screenshot content. """screenshots=get_screenshot_at(video_path,[timestamp])ifnotscreenshots:return"Failed to capture screenshot."_,frame=screenshots[0]# Convert the frame to bytes and then to base64 encoding_,buffer=cv2.imencode(".jpg",frame)frame_bytes=buffer.tobytes()frame_base64=base64.b64encode(frame_bytes).decode("utf-8")screenshot_uri=f"data:image/jpeg;base64,{frame_base64}"messages=[UserMessage(content=["Following is a screenshot from the video at {} seconds. Describe what you see here.",AGImage.from_uri(screenshot_uri),],source="tool",)]result=awaitmodel_client.create(messages=messages)returnstr(result.content)
[docs]defget_screenshot_at(video_path:str,timestamps:List[float])->List[Tuple[float,np.ndarray[Any,Any]]]:""" Captures screenshots at the specified timestamps and returns them as Python objects. :param video_path: Path to the video file. :param timestamps: List of timestamps in seconds. :return: List of tuples containing timestamp and the corresponding frame (image). Each frame is a NumPy array (height x width x channels). """screenshots:List[Tuple[float,np.ndarray[Any,Any]]]=[]cap=cv2.VideoCapture(video_path)ifnotcap.isOpened():raiseIOError(f"Cannot open video file {video_path}")fps=cap.get(cv2.CAP_PROP_FPS)total_frames=cap.get(cv2.CAP_PROP_FRAME_COUNT)duration=total_frames/fpsfortimestampintimestamps:if0<=timestamp<=duration:frame_number=int(timestamp*fps)cap.set(cv2.CAP_PROP_POS_FRAMES,frame_number)ret,frame=cap.read()ifret:# Append the timestamp and frame to the listscreenshots.append((timestamp,frame))else:raiseIOError(f"Failed to capture frame at {timestamp:.2f}s")else:raiseValueError(f"Timestamp {timestamp:.2f}s is out of range [0s, {duration:.2f}s]")cap.release()returnscreenshots