Try to fix timestamps

chrisballinger · chrisballinger · commit 07b52667f74b · 2013-09-26T19:22:04.000-07:00
diff --git a/FFmpegWrapper/FFmpegWrapper.m b/FFmpegWrapper/FFmpegWrapper.m
@@ -41,6 +41,7 @@
 #define VSYNC_DROP        0xff
 
 @interface FFStream : NSObject
+@property (nonatomic) AVRational frameRate;
 @property (nonatomic) AVStream *stream;
 - (id) initWithStream:(AVStream*)newStream;
 @end
@@ -55,12 +56,48 @@ - (id) initWithStream:(AVStream *)newStream {
 }
 @end
 
+@interface FFInputStream : FFStream
+/* predicted dts of the next packet read for this stream or (when there are
+ * several frames in a packet) of the next frame in current packet (in AV_TIME_BASE units) */
+@property (nonatomic) int64_t       nextDTS;
+@property (nonatomic) int64_t       DTS;       ///< dts of the last packet read for this stream (in AV_TIME_BASE units)
+
+@property (nonatomic) int64_t       nextPTS;  ///< synthetic pts for the next decode frame (in AV_TIME_BASE units)
+@property (nonatomic) int64_t       PTS;       ///< current pts of the decoded frame  (in AV_TIME_BASE units)
+@property (nonatomic) int64_t filterInRescaleDeltaLast;
+
+@property (nonatomic) BOOL sawFirstTS;
+@end
+
+@implementation FFInputStream
+@synthesize nextDTS, DTS, nextPTS, PTS;
+- (id) initWithStream:(AVStream *)newStream {
+    if (self = [super initWithStream:newStream]) {
+        self.nextPTS = AV_NOPTS_VALUE;
+        self.PTS = AV_NOPTS_VALUE;
+        self.nextDTS = AV_NOPTS_VALUE;
+        self.DTS = AV_NOPTS_VALUE;
+        self.filterInRescaleDeltaLast = AV_NOPTS_VALUE;
+        self.sawFirstTS = NO;
+    }
+    return self;
+}
+@end
+
 @interface FFOutputStream : FFStream
 @property (nonatomic) int64_t lastMuxDTS;
+@property (nonatomic) int frameNumber;
 @end
 
 @implementation FFOutputStream
-@synthesize lastMuxDTS;
+@synthesize lastMuxDTS, frameNumber;
+- (id) initWithStream:(AVStream *)newStream {
+    if (self = [super initWithStream:newStream]) {
+        self.lastMuxDTS = AV_NOPTS_VALUE;
+        self.frameNumber = 0;
+    }
+    return self;
+}
 @end
 
 @implementation FFmpegWrapper
@@ -103,6 +140,136 @@ + (NSError*) errorWithCode:(int)errorCode localizedDescription:(NSString*)descri
     return [NSError errorWithDomain:kFFmpegErrorDomain code:errorCode userInfo:userInfo];
 }
 
++ (void) copyInputStream:(FFInputStream*)inputStream outputStream:(FFOutputStream*)outputStream packet:(AVPacket*)packet outputFormatContext:(AVFormatContext*)outputFormatContext
+{
+    int64_t ost_tb_start_time = av_rescale_q(0, AV_TIME_BASE_Q, outputStream.stream->time_base);
+    AVPicture picture;
+    AVPacket outputPacket;
+    
+    av_init_packet(&outputPacket);
+    
+    if (packet->pts != AV_NOPTS_VALUE)
+        outputPacket.pts = av_rescale_q(packet->pts, inputStream.stream->time_base, outputStream.stream->time_base) - ost_tb_start_time;
+    else
+        outputPacket.pts = AV_NOPTS_VALUE;
+    
+    if (packet->dts == AV_NOPTS_VALUE)
+        outputPacket.dts = av_rescale_q(inputStream.DTS, AV_TIME_BASE_Q, outputStream.stream->time_base);
+    else
+        outputPacket.dts = av_rescale_q(packet->dts, inputStream.stream->time_base, outputStream.stream->time_base);
+    outputPacket.dts -= ost_tb_start_time;
+    
+    if (outputStream.stream->codec->codec_type == AVMEDIA_TYPE_AUDIO && packet->dts != AV_NOPTS_VALUE) {
+        int duration = av_get_audio_frame_duration(inputStream.stream->codec, packet->size);
+        if(!duration)
+            duration = inputStream.stream->codec->frame_size;
+        int64_t filter_in_rescale_delta_last;
+        outputPacket.dts = outputPacket.pts = av_rescale_delta(inputStream.stream->time_base, packet->dts,
+                                               (AVRational){1, inputStream.stream->codec->sample_rate}, duration, &filter_in_rescale_delta_last,
+                                               outputStream.stream->time_base) - ost_tb_start_time;
+        inputStream.filterInRescaleDeltaLast = filter_in_rescale_delta_last;
+    }
+    
+    outputPacket.duration = av_rescale_q(packet->duration, inputStream.stream->time_base, outputStream.stream->time_base);
+    outputPacket.flags    = packet->flags;
+    
+    // FIXME remove the following 2 lines they shall be replaced by the bitstream filters
+    if (  outputStream.stream->codec->codec_id != AV_CODEC_ID_H264
+        && outputStream.stream->codec->codec_id != AV_CODEC_ID_MPEG1VIDEO
+        && outputStream.stream->codec->codec_id != AV_CODEC_ID_MPEG2VIDEO
+        && outputStream.stream->codec->codec_id != AV_CODEC_ID_VC1
+        ) {
+        if (av_parser_change(inputStream.stream->parser, outputStream.stream->codec, &outputPacket.data, &outputPacket.size, packet->data, packet->size, packet->flags & AV_PKT_FLAG_KEY)) {
+            outputPacket.buf = av_buffer_create(outputPacket.data, outputPacket.size, av_buffer_default_free, NULL, 0);
+            if (!outputPacket.buf) {
+                NSLog(@"couldnt allocate packet buffer");
+            }
+        }
+    } else {
+        outputPacket.data = packet->data;
+        outputPacket.size = packet->size;
+    }
+    
+    if (outputStream.stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && (outputFormatContext->oformat->flags & AVFMT_RAWPICTURE)) {
+        /* store AVPicture in AVPacket, as expected by the output format */
+        avpicture_fill(&picture, outputPacket.data, outputStream.stream->codec->pix_fmt, outputStream.stream->codec->width, outputStream.stream->codec->height);
+        outputPacket.data = (uint8_t *)&picture;
+        outputPacket.size = sizeof(AVPicture);
+        outputPacket.flags |= AV_PKT_FLAG_KEY;
+    }
+    
+    //write_frame(of->ctx, &outputPacket, ost);
+    outputStream.stream->codec->frame_number++;
+}
+
+
+/* pkt = NULL means EOF (needed to flush decoder buffers) */
++ (int) processInputStream:(FFInputStream*)inputStream outputStream:(FFOutputStream*)outputStream packet:(AVPacket*)packet outputFormatContext:(AVFormatContext*)outputFormatContext
+{
+    AVPacket avpkt;
+    if (!inputStream.sawFirstTS) {
+        inputStream.DTS = inputStream.stream->avg_frame_rate.num ? - inputStream.stream->codec->has_b_frames * AV_TIME_BASE / av_q2d(inputStream.stream->avg_frame_rate) : 0;
+        inputStream.PTS = 0;
+        if (packet != NULL && packet->pts != AV_NOPTS_VALUE) {
+            inputStream.DTS += av_rescale_q(packet->pts, inputStream.stream->time_base, AV_TIME_BASE_Q);
+            inputStream.PTS = inputStream.DTS; //unused but better to set it to a value thats not totally wrong
+        }
+        inputStream.sawFirstTS = YES;
+    }
+    
+    if (inputStream.nextDTS == AV_NOPTS_VALUE)
+        inputStream.nextDTS = inputStream.DTS;
+    if (inputStream.nextPTS == AV_NOPTS_VALUE)
+        inputStream.nextPTS = inputStream.PTS;
+    
+    if (packet == NULL) {
+        /* EOF handling */
+        av_init_packet(&avpkt);
+        avpkt.data = NULL;
+        avpkt.size = 0;
+        //goto handle_eof;
+    } else {
+        avpkt = *packet;
+    }
+    
+    if (packet->dts != AV_NOPTS_VALUE) {
+        inputStream.nextDTS = inputStream.DTS = av_rescale_q(packet->dts, inputStream.stream->time_base, AV_TIME_BASE_Q);
+        inputStream.nextPTS = inputStream.PTS = inputStream.DTS;
+    }
+    
+    /* handle stream copy */
+    inputStream.DTS = inputStream.nextDTS;
+    switch (inputStream.stream->codec->codec_type) {
+        case AVMEDIA_TYPE_AUDIO:
+            inputStream.nextDTS += ((int64_t)AV_TIME_BASE * inputStream.stream->codec->frame_size) /
+            inputStream.stream->codec->sample_rate;
+            break;
+        case AVMEDIA_TYPE_VIDEO:
+            if (inputStream.frameRate.num) {
+                // TODO: Remove work-around for c99-to-c89 issue 7
+                AVRational time_base_q = AV_TIME_BASE_Q;
+                int64_t next_dts = av_rescale_q(inputStream.nextDTS, time_base_q, av_inv_q(inputStream.frameRate));
+                inputStream.nextDTS = av_rescale_q(next_dts + 1, av_inv_q(inputStream.frameRate), time_base_q);
+            } else if (packet->duration) {
+                inputStream.nextDTS += av_rescale_q(packet->duration, inputStream.stream->time_base, AV_TIME_BASE_Q);
+            } else if(inputStream.stream->codec->time_base.num != 0) {
+                int ticks= inputStream.stream->parser ? inputStream.stream->parser->repeat_pict + 1 : inputStream.stream->codec->ticks_per_frame;
+                inputStream.nextDTS += ((int64_t)AV_TIME_BASE *
+                                  inputStream.stream->codec->time_base.num * ticks) /
+                inputStream.stream->codec->time_base.den;
+            }
+            break;
+        default:
+            break;
+    }
+    inputStream.PTS = inputStream.DTS;
+    inputStream.nextPTS = inputStream.nextDTS;
+    
+    [[self class] copyInputStream:inputStream outputStream:outputStream packet:packet outputFormatContext:outputFormatContext];
+    
+    return 0;
+}
+
 + (NSError*) errorForAVErrorNumber:(int)errorNumber {
     NSString *description = [self stringForAVErrorNumber:errorNumber];
     return [self errorWithCode:errorNumber localizedDescription:description];
@@ -122,6 +289,7 @@ - (void) convertInputPath:(NSString*)inputPath outputPath:(NSString*)outputPath
         BOOL success = NO;
         int video_sync_method = VSYNC_PASSTHROUGH;
         int audio_sync_method = 0;
+        int64_t videoDTS = 0;
         NSError *error = nil;
         NSFileManager *fileManager = [NSFileManager defaultManager];
         NSDictionary *inputFileAttributes = [fileManager attributesOfItemAtPath:inputPath error:&error];
@@ -180,11 +348,13 @@ - (void) convertInputPath:(NSString*)inputPath outputPath:(NSString*)outputPath
         int copy_tb = -1;
         for (int i = 0; i < inputStreamCount; i++) {
             AVStream *inputStream = inputFormatContext->streams[i];
-            [inputStreams addObject:[[FFStream alloc] initWithStream:inputStream]];
+            FFInputStream *ffInputStream = [[FFInputStream alloc] initWithStream:inputStream];
+            [inputStreams addObject:ffInputStream];
             AVCodecContext *inputCodecContext = inputStream->codec;
             AVCodec *outputCodec = avcodec_find_encoder(inputCodecContext->codec_id);
             AVStream *outputStream = avformat_new_stream(outputFormatContext, outputCodec);
-            [outputStreams addObject:[[FFOutputStream alloc] initWithStream:outputStream]];
+            FFOutputStream *ffOutputStream = [[FFOutputStream alloc] initWithStream:outputStream];
+            [outputStreams addObject:ffOutputStream];
 
             AVCodecContext *outputCodecContext = outputStream->codec;
             
@@ -252,6 +422,11 @@ - (void) convertInputPath:(NSString*)inputPath outputPath:(NSString*)outputPath
                 outputCodecContext->time_base = inputCodecContext->time_base;
             }
             
+            if (ffInputStream && !ffOutputStream.frameRate.num)
+                ffOutputStream.frameRate = ffInputStream.frameRate;
+            if(ffOutputStream.frameRate.num)
+                outputCodecContext->time_base = av_inv_q(ffOutputStream.frameRate);
+            
             av_reduce(&outputCodecContext->time_base.num, &outputCodecContext->time_base.den,
                       outputCodecContext->time_base.num, outputCodecContext->time_base.den, INT_MAX);
             
@@ -273,8 +448,12 @@ - (void) convertInputPath:(NSString*)inputPath outputPath:(NSString*)outputPath
                     outputCodecContext->width              = inputCodecContext->width;
                     outputCodecContext->height             = inputCodecContext->height;
                     outputCodecContext->has_b_frames       = inputCodecContext->has_b_frames;
-                    sar = inputCodecContext->sample_aspect_ratio;
-                    break;
+                    if (inputStream->sample_aspect_ratio.num)
+                        sar = inputStream->sample_aspect_ratio;
+                    else
+                        sar = inputCodecContext->sample_aspect_ratio;
+                    outputStream->sample_aspect_ratio = inputCodecContext->sample_aspect_ratio = sar;
+                    outputStream->avg_frame_rate = inputStream->avg_frame_rate;                    break;
                 case AVMEDIA_TYPE_SUBTITLE:
                     outputCodecContext->width  = inputCodecContext->width;
                     outputCodecContext->height = inputCodecContext->height;
@@ -329,6 +508,9 @@ - (void) convertInputPath:(NSString*)inputPath outputPath:(NSString*)outputPath
             }
             
             FFOutputStream *ffOutputStream = [outputStreams objectAtIndex:packet->stream_index];
+            FFInputStream *ffInputStream = [inputStreams objectAtIndex:packet->stream_index];
+            
+            [[self class] processInputStream:ffInputStream outputStream:ffOutputStream packet:packet outputFormatContext:outputFormatContext];
             
             AVStream *outputStream = ffOutputStream.stream;