From 96b9f927f164b7393d0099f9e5ef5f8c12347a8d Mon Sep 17 00:00:00 2001 From: ljs-darryl Date: Mon, 3 Nov 2025 17:02:15 +0800 Subject: [PATCH 1/2] update ffmepg spatial enc --- scripts/ffmpeg_enc.patch | 270 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 scripts/ffmpeg_enc.patch diff --git a/scripts/ffmpeg_enc.patch b/scripts/ffmpeg_enc.patch new file mode 100644 index 0000000..f825ed7 --- /dev/null +++ b/scripts/ffmpeg_enc.patch @@ -0,0 +1,270 @@ +diff --git a/libavcodec/packet.h b/libavcodec/packet.h +index 71bc2e0575..352d11f665 100644 +--- a/libavcodec/packet.h ++++ b/libavcodec/packet.h +@@ -353,7 +353,10 @@ enum AVPacketSideDataType { + * If its value becomes huge, some code using it + * needs to be updated as it assumes it to be smaller than other limits. + */ +- AV_PKT_DATA_NB ++ AV_PKT_DATA_NB, ++ AV_PKT_DATA_ECAM, ++ AV_PKT_DATA_ICAM, ++ AV_PKT_DATA_DISTORTION_COEFFICIENTS + }; + + /** +diff --git a/libavformat/movenc.c b/libavformat/movenc.c +index 4bc8bd1b2a..deace44642 100644 +--- a/libavformat/movenc.c ++++ b/libavformat/movenc.c +@@ -2057,7 +2057,13 @@ static unsigned int mov_find_codec_tag(AVFormatContext *s, MOVTrack *track) + !av_match_ext(s->url, "m4b")) + av_log(s, AV_LOG_WARNING, "Warning, extension is not .m4a nor .m4v " + "Quicktime/Ipod might not play the file\n"); +- ++ if (track->par->codec_tag == MKTAG('r', 'a', 'w', '1')){ ++ //track->tag = track->par->codec_tag; ++ return track->par->codec_tag; ++ } ++ else if (track->par->codec_tag == MKTAG('m', 'e', 't', 't')){ ++ return track->par->codec_tag; ++ } + if (track->mode == MODE_MOV) { + return mov_get_codec_tag(s, track); + } else +@@ -2630,6 +2636,8 @@ static int mov_write_ccst_tag(AVIOContext *pb) + return update_size(pb, pos); + } + ++ ++ + static int mov_write_aux_tag(AVIOContext *pb, const char *aux_type) + { + int64_t pos = avio_tell(pb); +@@ -2640,6 +2648,177 @@ static int mov_write_aux_tag(AVIOContext *pb, const char *aux_type) + return update_size(pb, pos); + } + ++static void mov_write_dfmt_tag(AVIOContext *pb, MOVTrack *track){ ++ //int64_t pos = avio_tell(pb); ++ avio_wb32(pb, 20); // size ++ ffio_wfourcc(pb, "dfmt"); ++ AVDictionaryEntry *t1 = av_dict_get(track->st->metadata, "data_accuracy", NULL, 0); ++ AVDictionaryEntry *t2 = av_dict_get(track->st->metadata, "depth_legal_range", NULL, 0); ++ AVDictionaryEntry *t3 = av_dict_get(track->st->metadata, "depth_data_precision", NULL, 0); ++ avio_wb32(pb, t1 ? atoi(t1->value) : 0); ++ avio_wb32(pb, t2 ? atoi(t2->value) : 0); ++ ffio_wfourcc(pb, t3 ? t3->value : "none"); ++} ++ ++static void mov_write_raw_tag(AVIOContext *pb, MOVTrack *track){ ++ //int64_t pos = avio_tell(pb); ++ avio_wb32(pb, 28); // size ++ ffio_wfourcc(pb, "rawC"); ++ mov_write_dfmt_tag(pb, track); ++} ++ ++static void mov_write_hfov_tag_ext(AVIOContext *pb, MOVTrack *track){ ++ AVDictionaryEntry *t = av_dict_get(track->st->metadata, "hfov", NULL, 0); ++ if (t) { ++ avio_wb32(pb, 12); // size ++ ffio_wfourcc(pb, "hfov"); ++ avio_wb32(pb, atoi(t->value)); ++ } ++} ++ ++static void mov_write_vfov_tag(AVIOContext *pb, MOVTrack *track){ ++ AVDictionaryEntry *t = av_dict_get(track->st->metadata, "vfov", NULL, 0); ++ if (t) { ++ avio_wb32(pb, 12); // size ++ ffio_wfourcc(pb, "vfov"); ++ avio_wb32(pb, atoi(t->value)); ++ } ++} ++ ++static void mov_write_ecam_tag(AVIOContext *pb, MOVTrack *track){ ++ AVPacketSideData *sd = av_packet_side_data_get(track->st->codecpar->coded_side_data, ++ track->st->codecpar->nb_coded_side_data, AV_PKT_DATA_ECAM); ++ AVDictionaryEntry *t2 = av_dict_get(track->st->metadata, "cam_count", NULL, 0); ++ av_log(NULL, AV_LOG_INFO, "ecam length: %d, cam_count: %d", sd->size, atoi(t2->value)); ++ int length ; ++ double mantissa, dvalue; ++ int exponent; ++ if (sd && t2) { ++ length = sd->size; ++ int single_data_size = length / atoi(t2->value); ++ for (int i = 0; i < atoi(t2->value); i++) { ++ int64_t pos = avio_tell(pb); ++ int16_t view_id = i & 0x3FF; ++ avio_wb32(pb, 0); // size ++ ffio_wfourcc(pb, "ecam"); ++ avio_wb32(pb, 0); ++ avio_wb16(pb, view_id); ++ avio_w8(pb, 31); ++ avio_w8(pb, 31); ++ for (int j = 0 ; j < single_data_size/sizeof(double); j++) { ++ memcpy(&dvalue, sd->data + i * single_data_size + j * sizeof(double), sizeof(double)); ++ mantissa = frexp(dvalue, &exponent); ++ avio_w8(pb, exponent); ++ avio_write(pb, (unsigned char*)&mantissa, sizeof(double)); ++ } ++ update_size(pb, pos); ++ } ++ } ++} ++static void mov_write_icam_tag(AVIOContext *pb, MOVTrack *track) { ++ AVPacketSideData *sd = av_packet_side_data_get(track->st->codecpar->coded_side_data, ++ track->st->codecpar->nb_coded_side_data, AV_PKT_DATA_ICAM); ++ AVDictionaryEntry *t2 = av_dict_get(track->st->metadata, "cam_count", NULL, 0); ++ av_log(NULL, AV_LOG_INFO, "icam length: %d, cam_count: %d", sd->size, atoi(t2->value)); ++ int length ; ++ double mantissa, dvalue; ++ int exponent; ++ if (sd && t2) { ++ length = sd->size; ++ int single_data_size = length / atoi(t2->value); ++ for (int i = 0; i < atoi(t2->value); i++) { ++ int64_t pos = avio_tell(pb); ++ int16_t view_id = i & 0x3FF; ++ avio_wb32(pb, 0); // size ++ ffio_wfourcc(pb, "icam"); ++ avio_wb32(pb, 0); ++ avio_wb16(pb, view_id); ++ avio_wb32(pb, 31); ++ avio_wb32(pb, 31); ++ avio_wb32(pb, 31); ++ for (int j = 0 ; j < single_data_size/sizeof(double); j++) { ++ memcpy(&dvalue, sd->data + i * single_data_size + j * sizeof(double), sizeof(double)); ++ mantissa = frexp(dvalue, &exponent); ++ avio_w8(pb, exponent); ++ avio_write(pb, (unsigned char*)&mantissa, sizeof(double)); ++ } ++ update_size(pb, pos); ++ } ++ } ++} ++static void mov_write_dstr_tag(AVIOContext *pb, MOVTrack *track){ ++ uint8_t end[1] = {0x00}; ++ AVDictionaryEntry *t1 = av_dict_get(track->st->metadata, "distortion_model", NULL, 0); ++ AVDictionaryEntry *t2 = av_dict_get(track->st->metadata, "camera_model", NULL, 0); ++ AVPacketSideData *sd = av_packet_side_data_get(track->st->codecpar->coded_side_data, ++ track->st->codecpar->nb_coded_side_data, AV_PKT_DATA_DISTORTION_COEFFICIENTS); ++ av_log(NULL, AV_LOG_INFO, "distortion_coefficients length: %d", sd->size); ++ if (t1 && t2 && sd) { ++ int64_t pos = avio_tell(pb); ++ avio_wb32(pb, 0); // size ++ ffio_wfourcc(pb, "dstr"); ++ avio_wb32(pb, 0); ++ avio_write(pb, t1->value, strlen(t1->value)); ++ avio_write(pb, end, 1); ++ avio_write(pb, t2->value, strlen(t2->value)); ++ avio_write(pb, end, 1); ++ avio_write(pb, sd->data, sd->size); ++ update_size(pb, pos); ++ } ++} ++ ++static void mov_write_tbtm_tag(AVIOContext *pb, MOVTrack *track){ ++ AVDictionaryEntry *t = av_dict_get(track->st->metadata, "track_base_time", NULL, 0); ++ if (t) { ++ avio_wb32(pb, 16); // size ++ ffio_wfourcc(pb, "tbtm"); ++ avio_wb64(pb, strtoll(t->value, NULL, 10)); ++ } ++} ++ ++static int mov_write_mett_tag(AVIOContext *pb, MOVTrack *track){ ++ int64_t pos = avio_tell(pb); ++ AVDictionaryEntry *t = av_dict_get(track->st->metadata, "mime_type", NULL, 0); ++ if (t) { ++ avio_wb32(pb, 0); // size ++ ffio_wfourcc(pb, "mett"); ++ uint8_t end[1] = {0x00}; ++ av_log(NULL, AV_LOG_DEBUG, "mime_type: %s length: %d \n", t->value, strlen(t->value)); ++ avio_write(pb, t->value, strlen(t->value)); ++ avio_write(pb, end, 1); ++ avio_write(pb, t->value, strlen(t->value)); ++ avio_write(pb, end, 1); ++ ++ AVDictionaryEntry *t1 = av_dict_get(track->st->metadata, "pose_coordinate", NULL, 0); ++ AVDictionaryEntry *t2 = av_dict_get(track->st->metadata, "data_accuracy", NULL, 0); ++ AVDictionaryEntry *t3 = av_dict_get(track->st->metadata, "pose_position", NULL, 0); ++ ++ if (t1) { ++ avio_wb32(pb, 16); ++ ffio_wfourcc(pb, "pcrd"); ++ avio_wb32(pb, 0); ++ avio_wb32(pb, atoi(t1->value)); ++ } ++ ++ if (t2) { ++ avio_wb32(pb, 16); ++ ffio_wfourcc(pb, "dtac"); ++ avio_wb32(pb, 0); ++ avio_wb32(pb, atoi(t2->value)); ++ } ++ ++ if (t3) { ++ avio_wb32(pb, 16); ++ ffio_wfourcc(pb, "ppos"); ++ avio_wb32(pb, 0); ++ ffio_wfourcc(pb, t3->value); ++ } ++ mov_write_tbtm_tag(pb, track); ++ } ++ return update_size(pb, pos); ++} ++ ++ + static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track) + { + int ret = AVERROR_BUG; +@@ -2699,6 +2878,8 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex + avio_w8(pb, strlen(compressor_name)); + avio_write(pb, compressor_name, 31); + ++ ++ + if (track->mode == MODE_MOV && + (track->par->codec_id == AV_CODEC_ID_V410 || track->par->codec_id == AV_CODEC_ID_V210)) + avio_wb16(pb, 0x18); +@@ -2730,6 +2911,16 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex + } else + avio_wb16(pb, 0xffff); /* Reserved */ + ++ if (track->tag == MKTAG('r','a','w','1')){ ++ mov_write_raw_tag(pb, track); ++ mov_write_hfov_tag_ext(pb, track); ++ mov_write_vfov_tag(pb, track); ++ mov_write_ecam_tag(pb, track); ++ mov_write_icam_tag(pb, track); ++ mov_write_dstr_tag(pb, track); ++ mov_write_tbtm_tag(pb, track); ++ } ++ + if (track->tag == MKTAG('m','p','4','v')) + mov_write_esds_tag(pb, track); + else if (track->par->codec_id == AV_CODEC_ID_H263) +@@ -2748,6 +2939,11 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex + if (ret < 0) + av_log(mov->fc, AV_LOG_WARNING, "Not writing 'lhvC' atom for multilayer stream.\n"); + } ++ mov_write_ecam_tag(pb, track); ++ mov_write_icam_tag(pb, track); ++ mov_write_dstr_tag(pb, track); ++ mov_write_tbtm_tag(pb, track); ++ + } else if (track->par->codec_id == AV_CODEC_ID_VVC) + mov_write_vvcc_tag(pb, track); + else if (track->par->codec_id == AV_CODEC_ID_H264 && !TAG_IS_AVCI(track->tag)) { +@@ -3022,7 +3218,8 @@ static int mov_write_stsd_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext + ret = mov_write_tmcd_tag(pb, track); + else if (track->par->codec_tag == MKTAG('g','p','m','d')) + ret = mov_write_gpmd_tag(pb, track); +- ++ else if (track->par->codec_tag == MKTAG('m','e','t','t')) ++ ret = mov_write_mett_tag(pb, track); + if (ret < 0) + return ret; + From d21aff39e9c827d1b2ca6b8a54389edf7da60dfd Mon Sep 17 00:00:00 2001 From: ljs-darryl Date: Mon, 3 Nov 2025 17:14:16 +0800 Subject: [PATCH 2/2] add spatial enc sample --- CMakeLists.txt | 103 +++++++++++++----------- src/mp4writer/main.cc | 11 +++ src/mp4writer/write.cc | 175 +++++++++++++++++++++++++++++++++++++++++ src/mp4writer/write.h | 27 +++++++ 4 files changed, 272 insertions(+), 44 deletions(-) create mode 100644 src/mp4writer/main.cc create mode 100644 src/mp4writer/write.cc create mode 100644 src/mp4writer/write.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1bcf9fc..afe42d0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.24.1 FATAL_ERROR) -project(spatialmp4 VERSION 0.1.0) +project(mp4writer VERSION 0.1.0) # Configure version header configure_file( @@ -28,21 +28,22 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden -fPIC -DEIGEN_MPL2_O set(WITH_OPENGL OFF CACHE BOOL "disable opengl" FORCE) include(cmake/ffmpeg_local.cmake) -include(cmake/opencv_host.cmake) -include(cmake/spdlog.cmake) -include(cmake/fmt.cmake) -include(cmake/eigen.cmake) -include(cmake/sophus.cmake) +# include(cmake/opencv_host.cmake) +# include(cmake/spdlog.cmake) +# include(cmake/fmt.cmake) +# include(cmake/eigen.cmake) +# include(cmake/sophus.cmake) if(BUILD_PYTHON) include(cmake/pybind11.cmake) endif() set(SRC - ./src/spatialmp4/utilities/RgbdUtils.cc - ./src/spatialmp4/utilities/OpencvUtils.cc - ./src/spatialmp4/utils.cc - ./src/spatialmp4/reader.cc + # ./src/spatialmp4/utilities/RgbdUtils.cc + # ./src/spatialmp4/utilities/OpencvUtils.cc + # ./src/spatialmp4/utils.cc + # ./src/spatialmp4/reader.cc + ./src/mp4writer/write.cc ) add_library(${CMAKE_PROJECT_NAME}_lib STATIC ${SRC}) @@ -59,11 +60,11 @@ endif() target_link_libraries(${CMAKE_PROJECT_NAME}_lib PUBLIC PkgConfig::LIBAV - PkgConfig::OpenCV - spdlog - fmt::fmt - Eigen3::Eigen - sophus + # PkgConfig::OpenCV + # spdlog + # fmt::fmt + # Eigen3::Eigen + # sophus ) # On newer versions of macOS (>=10.15) and with newer compilers, # filesystem is part of the standard library and doesn't need explicit linking @@ -74,6 +75,7 @@ endif() # Add macOS framework linking if(APPLE) + message(STATUS "build mac os framework linking.") set_target_properties(${CMAKE_PROJECT_NAME}_lib PROPERTIES INSTALL_RPATH "@loader_path" BUILD_WITH_INSTALL_RPATH TRUE @@ -98,40 +100,53 @@ if(APPLE) find_library(COREIMAGE_FRAMEWORK CoreImage) find_library(CORESERVICES_FRAMEWORK CoreServices) find_library(SECURITY_FRAMEWORK Security) - - target_link_libraries(${CMAKE_PROJECT_NAME}_lib PUBLIC - ${FOUNDATION_FRAMEWORK} - ${AUDIOTOOLBOX_FRAMEWORK} - ${COREAUDIO_FRAMEWORK} - ${AVFOUNDATION_FRAMEWORK} - ${COREVIDEO_FRAMEWORK} - ${COREMEDIA_FRAMEWORK} - ${COREGRAPHICS_FRAMEWORK} - ${OPENGL_FRAMEWORK} - ${APPLICATIONSERVICES_FRAMEWORK} - ${COREFOUNDATION_FRAMEWORK} - ${APPKIT_FRAMEWORK} - ${CARBON_FRAMEWORK} - ${METAL_FRAMEWORK} - ${VIDEOTOOLBOX_FRAMEWORK} - ${COREIMAGE_FRAMEWORK} - ${CORESERVICES_FRAMEWORK} - ${SECURITY_FRAMEWORK} - ) + + if(NOT COREAUDIO_FRAMEWORK) + message(FATAL_ERROR "Foundation framework not found") + endif() + + # target_link_libraries(${CMAKE_PROJECT_NAME}_lib PUBLIC + # "-framework Foundation" + # "-framework AudioToolbox" + # "-framework CoreAudio" + # "-framework AVFoundation" + # "-framework CoreVideo" + # "-framework CoreMedia" + # "-framework CoreGraphics" + # "-framework OpenGL" + # "-framework ApplicationServices" + # "-framework CoreFoundation" + # "-framework AppKit" + # "-framework Carbon" + # "-framework Metal" + # "-framework VideoToolbox" + # "-framework CoreImage" + # "-framework CoreServices" + # "-framework Security" + # ) endif() if(BUILD_TESTING) - include(cmake/gtest.cmake) - add_executable(test_reader - ${SRC} - ./src/spatialmp4/utilities/RgbdUtils.cc - ./src/spatialmp4/utilities/PointcloudUtils.cc - ./src/spatialmp4/reader_test.cc + # include(cmake/gtest.cmake) + # add_executable(test_reader + # ${SRC} + # ./src/spatialmp4/utilities/RgbdUtils.cc + # ./src/spatialmp4/utilities/PointcloudUtils.cc + # ./src/spatialmp4/reader_test.cc + # ) + + # target_link_libraries(test_reader PUBLIC + # gtest + # gtest_main + # ${CMAKE_PROJECT_NAME}_lib + # ) + + add_executable(test_writer + ./src/mp4writer/main.cc ) - target_link_libraries(test_reader PRIVATE + + target_link_libraries(test_writer PUBLIC ${CMAKE_PROJECT_NAME}_lib - gtest - gtest_main ) # On newer versions of macOS (>=10.15) and with newer compilers, diff --git a/src/mp4writer/main.cc b/src/mp4writer/main.cc new file mode 100644 index 0000000..14e5b87 --- /dev/null +++ b/src/mp4writer/main.cc @@ -0,0 +1,11 @@ +#include +#include "mp4writer/write.h" + +int main() { + std::cout << "Hello, World!" << std::endl; + MP4Writer *writer = new MP4Writer(); + writer->addTrack("video/raw"); + writer->writeSample(); + delete writer; + return 0; +} \ No newline at end of file diff --git a/src/mp4writer/write.cc b/src/mp4writer/write.cc new file mode 100644 index 0000000..7d92fae --- /dev/null +++ b/src/mp4writer/write.cc @@ -0,0 +1,175 @@ +#include "mp4writer/write.h" + + +using namespace std; + +string output_file = "output.mp4"; +uint8_t tmp_data1[10] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09}; +uint8_t tmp_data2[10] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09}; + + + +uint8_t distortion[40] = {0x52, 0x83, 0x9A, 0x90, 0x14, 0x39, 0x6E, + 0x3F, 0xAD, 0x56, 0x07, 0x0A, 0xBF, 0xE0, 0xAA, 0x3F, 0xF2, 0xEF, + 0x6C, 0xB3, 0x70, 0xA7, 0x36, 0x3F, 0xA1, 0x4F, 0x5E, 0xFF, 0xAB, + 0x18, 0x3A, 0xBF, 0x7C, 0xE7, 0xE4, 0x76, 0xF2, 0xA5, 0xAB, 0xBF}; + + +uint8_t ecam[] = { + 0xE4, 0xD3, 0x60, 0x20, 0xE7, 0x63, 0x79, 0xBF, 0x46, 0x56, 0x53, 0x11, 0xCD, 0xFF, 0xEF, + 0xBF, 0xA6, 0xD2, 0x65, 0x3B, 0x6D, 0x18, 0x6A, 0x3F, 0x88, 0x80, 0x25, 0x45, 0x60, 0xA6, + 0x95, 0xBF, 0x76, 0xE7, 0xA7, 0xB1, 0xC8, 0xFF, 0xEF, 0xBF, 0x1A, 0x8A, 0xCA, 0x86, 0x76, + 0x70, 0x79, 0x3F, 0x69, 0x31, 0x63, 0xCE, 0x35, 0xD6, 0x6E, 0x3F, 0x3B, 0xCA, 0xBF, 0x04, + 0x5D, 0x46, 0xB5, 0x3F, 0x30, 0xF7, 0xA8, 0x50, 0x82, 0xFF, 0x6E, 0xBF, 0x40, 0x3B, 0x94, + 0xE2, 0x50, 0xE7, 0x69, 0xBF, 0xF6, 0x5A, 0x74, 0x80, 0xE6, 0xFF, 0xEF, 0xBF, 0x18, 0xC1, + 0x5C, 0x53, 0x72, 0x21, 0x91, 0xBF, 0xE5, 0xD3, 0x60, 0x20, 0xE7, 0x63, 0x79, 0xBF, 0x43, + 0x56, 0x53, 0x11, 0xCD, 0xFF, 0xEF, 0xBF, 0xA6, 0xD2, 0x65, 0x3B, 0x6D, 0x18, 0x6A, 0x3F, + 0x9C, 0x78, 0x64, 0x2D, 0x79, 0x0E, 0x96, 0xBF, 0x74, 0xE7, 0xA7, 0xB1, 0xC8, 0xFF, 0xEF, + 0xBF, 0x17, 0x8A, 0xCA, 0x86, 0x76, 0x70, 0x79, 0x3F, 0x67, 0x31, 0x63, 0xCE, 0x35, 0xD6, + 0x6E, 0x3F, 0xC4, 0xA9, 0xD6, 0xDF, 0xBB, 0x80, 0x93, 0x3F, 0x2D, 0xF7, 0xA8, 0x50, 0x82, + 0xFF, 0x6E, 0xBF, 0x3F, 0x3B, 0x94, 0xE2, 0x50, 0xE7, 0x69, 0xBF, 0xF1, 0x5A, 0x74, 0x80, + 0xE6, 0xFF, 0xEF, 0xBF, 0x7E, 0x70, 0xFD, 0xB1, 0xFD, 0x60, 0x91, 0xBF }; +uint8_t icam[] = { + 0xD6, 0x8D, 0x3A, 0x03, 0x2E, 0x23, 0x70, 0x40, 0x01, 0x91, 0x3D, 0x13, 0xDF, 0x52, 0x70, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x69, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, + 0x63, 0x40, 0xD6, 0x8D, 0x3A, 0x03, 0x2E, 0x23, 0x70, 0x40, 0x01, 0x91, 0x3D, 0x13, 0xDF, + 0x52, 0x70, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x69, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x30, 0x63, 0x40 }; + +static void log_callback_test2(void *ptr, int level, const char *fmt, va_list vl) { + va_list vl2; + char *line = (char *) malloc(128 * sizeof(char)); + static int print_prefix = 1; + va_copy(vl2, vl); + av_log_format_line(ptr, level, fmt, vl2, line, 128, &print_prefix); + va_end(vl2); + line[127] = '\0'; + cout << line << endl; + free(line); +} +MP4Writer::MP4Writer() { + + + av_log_set_callback(log_callback_test2); + + AVOutputFormat *ofmt = NULL; + if(avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, output_file.c_str())) { + cout << "Could not create output context" << endl; + + } + cout << "create output context success" << endl; +} + +void MP4Writer::writeSample(){ + AVPacket *packet = av_packet_alloc(); + packet->stream_index = 0; + packet->data = tmp_data1; + packet->size = 10; + packet->pts = 1000; + packet->dts = 1000; + packet->flags = AV_PKT_FLAG_KEY; + av_interleaved_write_frame(fmt_ctx, packet); + av_packet_free(&packet); + + AVPacket *packet2 = av_packet_alloc(); + packet2->stream_index = 1; + packet2->data = tmp_data2; + packet2->size = 10; + packet2->pts = 1000; + packet2->dts = 1000; + packet2->flags = AV_PKT_FLAG_KEY; + av_interleaved_write_frame(fmt_ctx, packet2); + av_packet_free(&packet2); + +} + +void MP4Writer::addTrack(char* mime_type){ + if (fmt_ctx) { + // raw1 track + AVStream *stream1 = avformat_new_stream(fmt_ctx, NULL); + if (!stream1) { + cout << "Failed to create new stream" << endl; + } + AVCodecParameters *codecpar = stream1->codecpar; + codecpar->codec_type = AVMEDIA_TYPE_VIDEO; + //codecpar->codec_id = AV_CODEC_ID_H264; + //codecpar->format = AV_PIX_FMT_YUV420P; + codecpar->codec_tag = MKTAG('r', 'a', 'w', '1'); + codecpar->width = 1920; + codecpar->height = 1080; + + av_dict_set_int(&stream1->metadata, "data_accuracy", 2, 0); + av_dict_set_int(&stream1->metadata, "depth_legal_range", 1000, 0); + av_dict_set(&stream1->metadata, "depth_data_precision", "dtmm", 0); + av_dict_set_int(&stream1->metadata, "hfov", 87, 0); + av_dict_set_int(&stream1->metadata, "vfov", 71, 0); + + + //av_dict_set(&stream1->metadata, "distortion_coefficients", (char*)distortion, 0); + av_dict_set(&stream1->metadata, "distortion_model", "brown", 0); + av_dict_set(&stream1->metadata, "camera_model", "pinhole", 0); + //av_dict_set(&stream1->metadata, "ecam", (char*)ecam, 0); + //av_dict_set(&stream1->metadata, "icam", (char*)icam, 0); + av_dict_set(&stream1->metadata, "cam_count", "2", 0); + + AVPacketSideData *side_data_ecam = + av_packet_side_data_new(&stream1->codecpar->coded_side_data, &stream1->codecpar->nb_coded_side_data, + AV_PKT_DATA_ECAM, sizeof(ecam), 0); + memcpy(side_data_ecam->data, ecam, sizeof(ecam)); + AVPacketSideData *side_data_icam = + av_packet_side_data_new(&stream1->codecpar->coded_side_data, &stream1->codecpar->nb_coded_side_data, + AV_PKT_DATA_ICAM, sizeof(icam), 0); + memcpy(side_data_icam->data, icam, sizeof(icam)); + AVPacketSideData *side_data_distortion = + av_packet_side_data_new(&stream1->codecpar->coded_side_data, &stream1->codecpar->nb_coded_side_data, + AV_PKT_DATA_DISTORTION_COEFFICIENTS, sizeof(distortion), 0); + memcpy(side_data_distortion->data, distortion, sizeof(distortion)); + + + + // pose track + + AVStream *stream2 = avformat_new_stream(fmt_ctx, NULL); + if (!stream2) { + cout << "Failed to create new stream" << endl; + } + AVCodecParameters *codecpar2 = stream2->codecpar; + codecpar2->codec_type = AVMEDIA_TYPE_DATA; + codecpar2->codec_tag = MKTAG('m', 'e', 't', 't'); + //codecpar2->codec_id = AV_CODEC_ID_METT_DISPARITY; + + + av_dict_set_int(&stream2->metadata, "pose_coordinate", 1, 0); + av_dict_set_int(&stream2->metadata, "data_accuracy", 2, 0); + av_dict_set(&stream2->metadata, "pose_position", "head", 0); + av_dict_set(&stream2->metadata, "mime_type", "application/pose", 0); + av_dict_set_int(&stream2->metadata, "track_base_time", 1748504166622934, 0); + + + if(avio_open(&fmt_ctx->pb, output_file.c_str(), AVIO_FLAG_WRITE) < 0) { + cout << "Could not open output file" << endl; + return; + } + if(avformat_write_header(fmt_ctx, NULL) < 0) { + cout << "write header failed" << endl; + return; + } + } +} + +MP4Writer::~MP4Writer() { + if(fmt_ctx) { + cout << "write trailer" << endl; + av_write_trailer(fmt_ctx); + cout << "write trailer success" << endl; + avio_close(fmt_ctx->pb); + cout << "close io success" << endl; + avformat_free_context(fmt_ctx); + cout << "free context success" << endl; + } +} + + using namespace std; + + + diff --git a/src/mp4writer/write.h b/src/mp4writer/write.h new file mode 100644 index 0000000..3875d86 --- /dev/null +++ b/src/mp4writer/write.h @@ -0,0 +1,27 @@ +#include +#include +#include +#include +#include + +extern "C" { +#include +#include +#include +#include +#include +#include +#include +} + +class MP4Writer { +public: + MP4Writer(); + ~MP4Writer(); + int Write(const std::string& input_file, const std::string& output_file); + void addTrack(char* mime_type); + void closeFile(); + void writeSample(); +private: + AVFormatContext *fmt_ctx; +}; \ No newline at end of file