当前位置：首页 > news >正文

FFmpeg 自定义IO CONTEXT实现音频解码，以及seek函数

news 2025/7/2 22:36:41

对于从音频流buffer中解码的场景中，我们需要实现自己的io context 去从buffer中解码，参考ffmepg官方实例：doc/examples/avio_reading.c

关于是否要实现avio context中的seek函数，需要看需要解码什么格式，大部分格式不需要seek,但是有些格式需要，比如apple开发的ALAC格式，这个格式的音频有的时候它的头文件moov信息是在文件的结尾，这就很坑，一般都是在开头，所以在获取音频的时候需要先seek到文件的结尾，获取moov的信息，然后再seek回来继续解析格式并解码。

关于moov格式的坑：【开发笔记】终于，我们解决了iOS播放器的一个Bug... - 哔哩哔哩

如果你不想实现seek，有没有办法直接把音频文件的moov信息从结尾提到开头呢？也是有的

ffmpeg -i ./old.mp4 -movflags faststart -c copy new.mp4

通过这个命令转换后再去解码，文件信息就在开头，就可以不用seek了。

可以通过以下命令去查看头文件信息：

ffprobe -v trace filename

直接贴代码：

#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavutil/file.h>
#include <libavutil/frame.h>
#include <libavutil/mem.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>#define BUF_SIZE 20480FILE *in_file = NULL;struct buffer_data {uint8_t *ptr;uint8_t *ori_ptr;  // for seek file streamsize_t size;       ///< size left in the buffersize_t file_size;  ///< size of the file to decode
};static char *av_get_err(int errnum) {static char err_buf[128] = {0};av_strerror(errnum, err_buf, 128);return err_buf;
}static void print_sample_format(const AVFrame *frame) {printf("ar-samplerate: %uHz\n", frame->sample_rate);printf("ac-channel: %u\n", frame->channels);printf("f-format: %u\n",frame->format);  // 格式需要注意，实际存储到本地文件时已经改成交错模式
}
/*
int read_size;static int read_packet(void *opaque, uint8_t *buf, int buf_size) {//    FILE *in_file = (FILE *)opaque;read_size = fread(buf, 1, buf_size, in_file);printf("read_packet read_size:%d, buf_size:%d\n", read_size, buf_size);if (read_size <= 0) {return AVERROR_EOF;  // 数据读取完毕}return read_size;
}
*/
static int read_packet(void *opaque, uint8_t *buf, int buf_size) {struct buffer_data *bd = (struct buffer_data *)opaque;buf_size = FFMIN(buf_size, bd->size);if (!buf_size) return AVERROR_EOF;//    printf("ptr:%p size:%zu buf_size: %d\n", bd->ptr, bd->size, buf_size);/* copy internal buffer data to buf */memcpy(buf, bd->ptr, buf_size);bd->ptr += buf_size;bd->size -= buf_size;return buf_size;
}// for some format like ALAC (apple format) , which moov partten is located at
// the end of file so we need to implement seek function during demux to seek to
// the end of file for paring the moov info and then seek back to the front
static int64_t seek_packet(void *opaque, int64_t offset, int whence) {//    FILE *in_file = (FILE *)opaque;struct buffer_data *bd = (struct buffer_data *)opaque;int64_t ret = -1;printf("whence=%d , offset=%lld \n", whence, offset);switch (whence) {case AVSEEK_SIZE:printf("AVSEEK_SIZE \n");ret = bd->file_size;break;case SEEK_SET:printf("SEEK_SET \n");bd->ptr = bd->ori_ptr + offset;bd->size = bd->file_size - offset;ret = bd->ptr;break;case SEEK_CUR:printf("SEEK_cur \n");break;case SEEK_END:printf("SEEK_end \n");break;}return ret;
}static void decode(AVCodecContext *dec_ctx, AVPacket *packet, AVFrame *frame,FILE *outfile) {int ret = 0;ret = avcodec_send_packet(dec_ctx, packet);if (ret == AVERROR(EAGAIN)) {printf("Receive_frame and send_packet both returned EAGAIN, which is an API ""violation.\n");} else if (ret < 0) {printf("Error submitting the packet to the decoder, err:%s\n",av_get_err(ret));return;}while (ret >= 0) {ret = avcodec_receive_frame(dec_ctx, frame);if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {return;} else if (ret < 0) {printf("Error during decoding\n");exit(1);}if (!packet) {printf("get flush frame\n");}int out_sample_bytes = av_get_bytes_per_sample(dec_ctx->sample_fmt);int out_sample_is_plannar = av_sample_fmt_is_planar(dec_ctx->sample_fmt);// printf("debug %d is out_sample_is_plannar : %d \n", __LINE__,//       out_sample_is_plannar);// print_sample_format(frame);if (out_sample_bytes < 0) {/* This should not occur, checking just for paranoia */fprintf(stderr, "Failed to calculate data size\n");exit(1);}//    printf("debug %d out_sample_bytes: %d samples: %d ch:%d\n", __LINE__,//           out_sample_bytes, frame->nb_samples,//           dec_ctx->ch_layout.nb_channels);if (out_sample_is_plannar) {  // plannar frames/**P表示Planar（平面），其数据格式排列方式为 :LLLLLLRRRRRRLLLLLLRRRRRRLLLLLLRRRRRRL...（每个LLLLLLRRRRRR为一个音频帧）而不带P的数据格式（即交错排列）排列方式为：LRLRLRLRLRLRLRLRLRLRLRLRLRLRLRLRLRLRL...（每个LR为一个音频样本）播放范例：   ffplay -ar 48000 -ac 2 -f f32le believe.pcm并不是每一种都是这样的格式*/// 这里的写法不是通用，通用要调用重采样的函数去实现// 这里只是针对解码出来是planar格式的转换for (int i = 0; i < frame->nb_samples; i++) {for (int ch = 0; ch < frame->channels; ch++) {// for(int ch = 0; ch < 1; ch++) {fwrite(frame->data[ch] + out_sample_bytes * i, 1, out_sample_bytes,outfile);}}} else  // packed framefwrite(frame->data[0],frame->nb_samples * out_sample_bytes * frame->channels, 1,outfile);}
}int main(int argc, char **argv) {if (argc != 3) {printf("usage: %s <intput file> <out file>\n", argv[0]);return -1;}av_log_set_level(AV_LOG_TRACE);const char *in_file_name = argv[1];const char *out_file_name = argv[2];//    FILE *in_file = NULL;FILE *out_file = NULL;// 1. 打开参数文件in_file = fopen(in_file_name, "rb");if (!in_file) {printf("open file %s failed\n", in_file_name);return -1;}out_file = fopen(out_file_name, "wb+");if (!out_file) {printf("open file %s failed\n", out_file_name);return -1;}struct buffer_data bd = {0};uint8_t *buffer = NULL;size_t buffer_size;int ret = av_file_map(in_file_name, &buffer, &buffer_size, 0, NULL);printf("file size: %d\n", buffer_size);bd.ptr = buffer;bd.ori_ptr = buffer;bd.file_size = buffer_size;bd.size = buffer_size;//    AVInputFormat* in_fmt = av_find_input_format("flac");// 2自定义 iouint8_t *io_buffer = av_malloc(BUF_SIZE);// AVIOContext *avio_ctx = avio_alloc_context(io_buffer, BUF_SIZE, 0, (void// *)in_file,AVIOContext *avio_ctx = avio_alloc_context(io_buffer, BUF_SIZE, 0, &bd,read_packet, NULL, seek_packet);// avio_alloc_context(io_buffer, BUF_SIZE, 0, &bd, read_packet, NULL, NULL);AVFormatContext *format_ctx = avformat_alloc_context();format_ctx->pb = avio_ctx;format_ctx->flags = AVFMT_FLAG_CUSTOM_IO;// int ret = avformat_open_input(&format_ctx, NULL, in_fmt, NULL);// 从输入源读取封装格式文件头ret = avformat_open_input(&format_ctx, NULL, NULL, NULL);if (ret < 0) {printf("avformat_open_input failed:%s\n", av_err2str(ret));return -1;}// 从输入源读取一段数据，尝试解码，以获取流信息if ((ret = avformat_find_stream_info(format_ctx, NULL)) < 0) {av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n");return ret;}av_dump_format(format_ctx, 0, NULL, 0);int audioStreamIndex =av_find_best_stream(format_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);AVStream *st = format_ctx->streams[audioStreamIndex];// 编码器查找// AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_ALAC);// set codec id from famat paramsAVCodec *codec = avcodec_find_decoder(st->codecpar->codec_id);if (!codec) {printf("avcodec_find_decoder failed\n");return -1;}AVCodecContext *codec_ctx = avcodec_alloc_context3(codec);if (!codec_ctx) {printf("avcodec_alloc_context3 failed\n");return -1;}// copy params from format to codecret = avcodec_parameters_to_context(codec_ctx, format_ctx->streams[audioStreamIndex]->codecpar);if (ret < 0) {printf("Failed to copy in_stream codecpar to codec context\n");}ret = avcodec_open2(codec_ctx, codec, NULL);if (ret < 0) {printf("avcodec_open2 failed:%s\n", av_err2str(ret));return -1;}printf("%d debug codec_ctx->sample_rate: %d\n", __LINE__,codec_ctx->sample_rate);AVPacket *packet = av_packet_alloc();AVFrame *frame = av_frame_alloc();while (1) {ret = av_read_frame(format_ctx, packet);if (ret < 0) {printf("av_read_frame failed:%s\n", av_err2str(ret));break;}decode(codec_ctx, packet, frame, out_file);}printf("read file finish\n");decode(codec_ctx, NULL, frame, out_file);fclose(in_file);fclose(out_file);av_frame_free(frame);av_packet_free(packet);avformat_close_input(&format_ctx);avcodec_free_context(&codec_ctx);printf("main finish\n");return 0;
}

m4a moov 格式解析：MP4格式解析---M4A是MP4中的音频部分_m4a格式解析_一个专研技术的小蜜蜂的博客-CSDN博客
mp4文件格式解析 - 知乎
参考：ffmpeg 利用AVIOContext自定义IO 输出结果写buffer - 知乎

Creating Custom FFmpeg IO-Context - CodeProject

ffmpeg AVIOContext 自定义 IO 及 seek

FFmpeg 自定义IO CONTEXT实现音频解码，以及seek函数

相关文章：

FFmpeg 自定义IO CONTEXT实现音频解码，以及seek函数

技能升级（2023寒假每日一题 13）

低频量化之可转债配债数据及策略 - 全网独家

Code area 和Data area的区别

Oracle LiveLabs DB Security （数据库安全）实验汇总

PAT A1012 The Best Rank

“我和AI抠图网站的秘密情缘“

最多能打多少场比赛呢

鸿蒙Hi3861学习二-程序烧录与日志输出

typescript Awaited＜Type＞教程用法

AES硬件运算单元

mulesoft MCIA 破釜沉舟备考 2023.04.28.26 (易错题)

k210单片机定时器的应用

linux0.12-7-1

设置文本框自动填充背景颜色为白色

Bitmap引起的OOM问题

【JavaEE初阶】认识线程（Thread）

自动化运维工具一Ansible Roles实战

json 中有递归parentId节点转 c#实体类时如何处理

给大家介绍几个手机冷门但好用的小技巧

Xshell远程连接Kali（默认 | 私钥）Note版

【WiFi帧结构】

SciencePlots——绘制论文中的图片

2.Vue编写一个app

Qwen3-Embedding-0.6B深度解析：多语言语义检索的轻量级利器

Python如何给视频添加音频和字幕

数据库分批入库

【C++从零实现Json-Rpc框架】第六弹 —— 服务端模块划分

初学 pytest 记录

Java毕业设计：WML信息查询与后端信息发布系统开发