Choosing 3GP signals the creator’s intent: on any phone, not just the high‑end smartphone crowd.
# Extract audio for Whisper audio_tmp = pathlib.Path("audio.wav") subprocess.run(["ffmpeg", "-i", str(video_path), "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", str(audio_tmp), "-hide_banner", "-loglevel", "error"], check=True) transcript, segments = transcribe(audio_tmp)
def transcribe(audio_path): model = load_model("base") result = model.transcribe(str(audio_path)) return result["text"], result["segments"]
If you notice any of these elements, point them out in the blog post – it shows you’ve actually watched the video and not just guessed.