Subtitle Resynchronizer (I)
I found some of the movies I downloaded failed to have any perfectly matching subtitles that can be easily found online, so I have to use some of the closest. But using resync function of vobsub or so is obviously not a way round. With this tool and not too much tuning on it, I guess I may even use those most diverse ones. Complete code of the latest version with some inefficient commentary is provided here:
#include <cstdio>
#include <cstring>
#include <cstdlib>
typedef unsigned long movtime_t;
static void fgetline (char *buf, int size, FILE *f)
{
fgets(buf, size, f);
int len = strlen(buf);
buf[len - 1] = 0;
}
static bool isnumber (char *p)
{
int len = 0;
for ( ; *p != '/0'; ++p, ++len)
{
if (*p < '0' || *p > '9')
{
return false;
}
}
return (len > 0);
}
static bool parse_timestamp (char *p, movtime_t &t)
{
#define ISDIGIT(c) ((c)>='0'&&(c)<='9')
#define ASSERTC(c, ca) if (c!=ca) return false;
#define DIGIT2(v,p) /
if (!ISDIGIT((p)[0]) || !ISDIGIT((p)[1])) return false; /
v = ((p)[0]-'0')*10+((p)[1]-'0');
#define DIGIT3(v,p) /
if (!ISDIGIT((p)[0]) || !ISDIGIT((p)[1]) || !ISDIGIT((p)[2])) return false; /
v = ((p)[0]-'0')*100+((p)[1]-'0')*10+((p)[2]-'0');
int h, m, s, ms;
DIGIT2(h,p);
ASSERTC(p[2],':');
DIGIT2(m,p+3);
ASSERTC(p[5],':');
DIGIT2(s,p+6);
ASSERTC(p[8],',');
DIGIT3(ms,p+9);
t = (h*3600+m*60+s)*1000+ms;
return true;
}
static bool istimestamp (char *p, movtime_t &begin, movtime_t &end)
{
// form 00:00:46,430
if (strlen(p) < 29) { return false; }
if (!parse_timestamp(p, begin))
{
return false;
}
// no assertion on the arrow, for it's already very certain
if (!parse_timestamp(p + 17, end))
{
return false;
}
return true;
}
static void analyse_time (movtime_t t, int &h, int &m, int &s, int &ms)
{
ms = t % 1000;
t /= 1000;
s = t % 60;
t /= 60;
m = t % 60;
t /= 60;
h = t;
}
/* it should be big enough to avoid line segmentation */
#define sizebuf 2048
static void rectify (
FILE **ssfpins, // ssfnum
FILE **ssfpouts, // movnum
int ssfnum,
int movnum,
movtime_t *ssfdurs, // in msec, 120min --> 7200000
movtime_t *movdurs // in msec
)
{
// the initial offset of both items are provided at the tail
// the following are the absolute time of them
movtime_t ssfabs = ssfdurs[ssfnum];
movtime_t movabs = movdurs[movnum];
char buf[sizebuf];
char outbuf[sizebuf*2] = {0, };
char oldbuf[sizebuf*2] = {0, };
char tempbuf[sizebuf];
int ssfi = 0;
int movi = 0;
FILE *ssfpin = ssfpins[ssfi];
FILE *ssfpout = ssfpouts[movi];
int outcount = 0;
int oldcount;
bool someinold = false;
while (1)
{
if (feof(ssfpin))
{
__fileend:
ssfabs += ssfdurs[ssfi];
ssfi++;
if (ssfi >= ssfnum )
{
break;
}
ssfpin = ssfpins[ssfi];
}
fgetline(buf, sizebuf, ssfpin);
movtime_t begin, end;
if (isnumber(buf))
{
// print last
if (someinold)
{
fprintf(ssfpout-1, "%d/n", oldcount);
fprintf(ssfpout-1, "%s", oldbuf);
someinold = false;
}
if (outcount > 0)
{
fprintf(ssfpout, "%d/n", outcount);
fprintf(ssfpout, "%s", outbuf);
}
outbuf[0] = 0;
outcount++;
}
else if (istimestamp(buf, begin, end))
{
if (begin == 0 && end == 0)
{
// sign of end
goto __fileend;
}
movtime_t absbegin, absend;
movtime_t corrbegin, corrend;
absbegin = ssfabs + begin;
absend = ssfabs + end;
// check if it's the next output file's turn
if (absend >= movabs + movdurs[movi])
{
if (absbegin < movabs + movdurs[movi])
{
// absbegin to movabs + movdurs[movi] -> cur
corrbegin = absbegin - movabs;
corrend = movdurs[movi];
int h, m, s, ms;
analyse_time(corrbegin, h, m, s, ms);
sprintf(tempbuf, "%02d:%02d:%02d,%03d", h, m, s, ms);
strcat(outbuf, tempbuf);
analyse_time(corrend, h, m, s, ms);
sprintf(tempbuf, " --> %02d:%02d:%02d,%03d/n", h, m, s, ms);
strcat(outbuf, tempbuf);
strcpy(oldbuf, outbuf); outbuf[0] = 0;
oldcount = outcount;
someinold = true;
// movabs + movdurs[movi] to absend -> next
absbegin = movabs + movdurs[movi];
}
else
{ // totally in a new one
}
movabs += movdurs[movi];
movi++;
if (movi >= movnum)
{
break;
}
ssfpout = ssfpouts[movi];
outcount = 1;
}
corrbegin = absbegin - movabs;
corrend = absend - movabs;
int h, m, s, ms;
analyse_time(corrbegin, h, m, s, ms);
sprintf(tempbuf, "%02d:%02d:%02d,%03d", h, m, s, ms);
strcat(outbuf, tempbuf);
analyse_time(corrend, h, m, s, ms);
sprintf(tempbuf, " --> %02d:%02d:%02d,%03d/n", h, m, s, ms);
strcat(outbuf, tempbuf);
}
else
{
sprintf(tempbuf, "%s/n", buf);
strcat(outbuf, tempbuf);
if (someinold)
{
strcat(oldbuf, tempbuf);
}
}
}
}
struct CorrStruct
{
#define MAXFNUM 16
FILE *ssfpins[MAXFNUM]; // ssfnum in count
FILE *ssfpouts[MAXFNUM]; // movnum in count
int ssfnum;
int movnum;
// one more than ssfnum in count
movtime_t ssfdurs[MAXFNUM]; // in msec, 120min --> 7200000
movtime_t movdurs[MAXFNUM]; // in msec
};
static int ParseConfig (CorrStruct *corr, FILE *fConfig)
{
int i;
char buf[sizebuf];
fgetline(buf, sizebuf, fConfig);
corr->ssfnum = atoi(buf);
if (corr->ssfnum + 1 > MAXFNUM)
{
return -1; /* too many files */
}
fgetline(buf, sizebuf, fConfig);
if (!parse_timestamp(buf, corr->ssfdurs[corr->ssfnum]))
{
return -2; /* timestamp error */
}
for (i = 0; i < corr->ssfnum; i++)
{
fgetline(buf, sizebuf, fConfig);
corr->ssfpins[i] = fopen(buf, "r");
fgetline(buf, sizebuf, fConfig);
if (!parse_timestamp(buf, corr->ssfdurs[i]))
{
return -2;
}
}
fgetline(buf, sizebuf, fConfig);
corr->movnum = atoi(buf);
if (corr->movnum + 1 > MAXFNUM)
{
return -1; /* too many files */
}
fgetline(buf, sizebuf, fConfig);
if (!parse_timestamp(buf, corr->movdurs[corr->movnum]))
{
return -2;
}
for (i = 0; i < corr->movnum; i++)
{
fgetline(buf, sizebuf, fConfig);
corr->ssfpouts[i] = fopen(buf, "w");
fgetline(buf, sizebuf, fConfig);
if (!parse_timestamp(buf, corr->movdurs[i]))
{
return -2;
}
}
return 0;
}
static void Finalize (CorrStruct *corr)
{
for (int i = 0; i < corr->ssfnum; i++)
{
fclose(corr->ssfpins[i]);
}
for (int i = 0; i < corr->movnum; i++)
{
fclose(corr->ssfpouts[i]);
}
}
int main (void)
{
FILE *fConfig = fopen("ssresync_config.txt", "r");
CorrStruct corr;
ParseConfig(&corr, fConfig);
rectify(corr.ssfpins, corr.ssfpouts, corr.ssfnum, corr.movnum, corr.ssfdurs, corr.movdurs);
Finalize(&corr);
fclose(fConfig);
return 0;
}
// An example of config file
// It is applied on the subtitles downloaded from shooter.cn for the movie Amadeus with both the
// movie and the script divided into 3 parts with some time deviation between them.
/*
== BEGIN ==
3
00:00:00,000
inNewMov-Amadeus-CD1.eng.srt
00:58:03,160
inNewMov-Amadeus-CD2.eng.srt
00:58:12,500
inNewMov-Amadeus-CD3.eng.srt
00:58:23,240
3
00:00:00,000
NewMov-Amadeus-CD1.eng.srt
00:57:09,000
NewMov-Amadeus-CD2.eng.srt
00:57:40,000
NewMov-Amadeus-CD3.eng.srt
01:05:32,000
== END ==
*/
#include <cstdio>
#include <cstring>
#include <cstdlib>
typedef unsigned long movtime_t;
static void fgetline (char *buf, int size, FILE *f)
{
fgets(buf, size, f);
int len = strlen(buf);
buf[len - 1] = 0;
}
static bool isnumber (char *p)
{
int len = 0;
for ( ; *p != '/0'; ++p, ++len)
{
if (*p < '0' || *p > '9')
{
return false;
}
}
return (len > 0);
}
static bool parse_timestamp (char *p, movtime_t &t)
{
#define ISDIGIT(c) ((c)>='0'&&(c)<='9')
#define ASSERTC(c, ca) if (c!=ca) return false;
#define DIGIT2(v,p) /
if (!ISDIGIT((p)[0]) || !ISDIGIT((p)[1])) return false; /
v = ((p)[0]-'0')*10+((p)[1]-'0');
#define DIGIT3(v,p) /
if (!ISDIGIT((p)[0]) || !ISDIGIT((p)[1]) || !ISDIGIT((p)[2])) return false; /
v = ((p)[0]-'0')*100+((p)[1]-'0')*10+((p)[2]-'0');
int h, m, s, ms;
DIGIT2(h,p);
ASSERTC(p[2],':');
DIGIT2(m,p+3);
ASSERTC(p[5],':');
DIGIT2(s,p+6);
ASSERTC(p[8],',');
DIGIT3(ms,p+9);
t = (h*3600+m*60+s)*1000+ms;
return true;
}
static bool istimestamp (char *p, movtime_t &begin, movtime_t &end)
{
// form 00:00:46,430
if (strlen(p) < 29) { return false; }
if (!parse_timestamp(p, begin))
{
return false;
}
// no assertion on the arrow, for it's already very certain
if (!parse_timestamp(p + 17, end))
{
return false;
}
return true;
}
static void analyse_time (movtime_t t, int &h, int &m, int &s, int &ms)
{
ms = t % 1000;
t /= 1000;
s = t % 60;
t /= 60;
m = t % 60;
t /= 60;
h = t;
}
/* it should be big enough to avoid line segmentation */
#define sizebuf 2048
static void rectify (
FILE **ssfpins, // ssfnum
FILE **ssfpouts, // movnum
int ssfnum,
int movnum,
movtime_t *ssfdurs, // in msec, 120min --> 7200000
movtime_t *movdurs // in msec
)
{
// the initial offset of both items are provided at the tail
// the following are the absolute time of them
movtime_t ssfabs = ssfdurs[ssfnum];
movtime_t movabs = movdurs[movnum];
char buf[sizebuf];
char outbuf[sizebuf*2] = {0, };
char oldbuf[sizebuf*2] = {0, };
char tempbuf[sizebuf];
int ssfi = 0;
int movi = 0;
FILE *ssfpin = ssfpins[ssfi];
FILE *ssfpout = ssfpouts[movi];
int outcount = 0;
int oldcount;
bool someinold = false;
while (1)
{
if (feof(ssfpin))
{
__fileend:
ssfabs += ssfdurs[ssfi];
ssfi++;
if (ssfi >= ssfnum )
{
break;
}
ssfpin = ssfpins[ssfi];
}
fgetline(buf, sizebuf, ssfpin);
movtime_t begin, end;
if (isnumber(buf))
{
// print last
if (someinold)
{
fprintf(ssfpout-1, "%d/n", oldcount);
fprintf(ssfpout-1, "%s", oldbuf);
someinold = false;
}
if (outcount > 0)
{
fprintf(ssfpout, "%d/n", outcount);
fprintf(ssfpout, "%s", outbuf);
}
outbuf[0] = 0;
outcount++;
}
else if (istimestamp(buf, begin, end))
{
if (begin == 0 && end == 0)
{
// sign of end
goto __fileend;
}
movtime_t absbegin, absend;
movtime_t corrbegin, corrend;
absbegin = ssfabs + begin;
absend = ssfabs + end;
// check if it's the next output file's turn
if (absend >= movabs + movdurs[movi])
{
if (absbegin < movabs + movdurs[movi])
{
// absbegin to movabs + movdurs[movi] -> cur
corrbegin = absbegin - movabs;
corrend = movdurs[movi];
int h, m, s, ms;
analyse_time(corrbegin, h, m, s, ms);
sprintf(tempbuf, "%02d:%02d:%02d,%03d", h, m, s, ms);
strcat(outbuf, tempbuf);
analyse_time(corrend, h, m, s, ms);
sprintf(tempbuf, " --> %02d:%02d:%02d,%03d/n", h, m, s, ms);
strcat(outbuf, tempbuf);
strcpy(oldbuf, outbuf); outbuf[0] = 0;
oldcount = outcount;
someinold = true;
// movabs + movdurs[movi] to absend -> next
absbegin = movabs + movdurs[movi];
}
else
{ // totally in a new one
}
movabs += movdurs[movi];
movi++;
if (movi >= movnum)
{
break;
}
ssfpout = ssfpouts[movi];
outcount = 1;
}
corrbegin = absbegin - movabs;
corrend = absend - movabs;
int h, m, s, ms;
analyse_time(corrbegin, h, m, s, ms);
sprintf(tempbuf, "%02d:%02d:%02d,%03d", h, m, s, ms);
strcat(outbuf, tempbuf);
analyse_time(corrend, h, m, s, ms);
sprintf(tempbuf, " --> %02d:%02d:%02d,%03d/n", h, m, s, ms);
strcat(outbuf, tempbuf);
}
else
{
sprintf(tempbuf, "%s/n", buf);
strcat(outbuf, tempbuf);
if (someinold)
{
strcat(oldbuf, tempbuf);
}
}
}
}
struct CorrStruct
{
#define MAXFNUM 16
FILE *ssfpins[MAXFNUM]; // ssfnum in count
FILE *ssfpouts[MAXFNUM]; // movnum in count
int ssfnum;
int movnum;
// one more than ssfnum in count
movtime_t ssfdurs[MAXFNUM]; // in msec, 120min --> 7200000
movtime_t movdurs[MAXFNUM]; // in msec
};
static int ParseConfig (CorrStruct *corr, FILE *fConfig)
{
int i;
char buf[sizebuf];
fgetline(buf, sizebuf, fConfig);
corr->ssfnum = atoi(buf);
if (corr->ssfnum + 1 > MAXFNUM)
{
return -1; /* too many files */
}
fgetline(buf, sizebuf, fConfig);
if (!parse_timestamp(buf, corr->ssfdurs[corr->ssfnum]))
{
return -2; /* timestamp error */
}
for (i = 0; i < corr->ssfnum; i++)
{
fgetline(buf, sizebuf, fConfig);
corr->ssfpins[i] = fopen(buf, "r");
fgetline(buf, sizebuf, fConfig);
if (!parse_timestamp(buf, corr->ssfdurs[i]))
{
return -2;
}
}
fgetline(buf, sizebuf, fConfig);
corr->movnum = atoi(buf);
if (corr->movnum + 1 > MAXFNUM)
{
return -1; /* too many files */
}
fgetline(buf, sizebuf, fConfig);
if (!parse_timestamp(buf, corr->movdurs[corr->movnum]))
{
return -2;
}
for (i = 0; i < corr->movnum; i++)
{
fgetline(buf, sizebuf, fConfig);
corr->ssfpouts[i] = fopen(buf, "w");
fgetline(buf, sizebuf, fConfig);
if (!parse_timestamp(buf, corr->movdurs[i]))
{
return -2;
}
}
return 0;
}
static void Finalize (CorrStruct *corr)
{
for (int i = 0; i < corr->ssfnum; i++)
{
fclose(corr->ssfpins[i]);
}
for (int i = 0; i < corr->movnum; i++)
{
fclose(corr->ssfpouts[i]);
}
}
int main (void)
{
FILE *fConfig = fopen("ssresync_config.txt", "r");
CorrStruct corr;
ParseConfig(&corr, fConfig);
rectify(corr.ssfpins, corr.ssfpouts, corr.ssfnum, corr.movnum, corr.ssfdurs, corr.movdurs);
Finalize(&corr);
fclose(fConfig);
return 0;
}
// An example of config file
// It is applied on the subtitles downloaded from shooter.cn for the movie Amadeus with both the
// movie and the script divided into 3 parts with some time deviation between them.
/*
== BEGIN ==
3
00:00:00,000
inNewMov-Amadeus-CD1.eng.srt
00:58:03,160
inNewMov-Amadeus-CD2.eng.srt
00:58:12,500
inNewMov-Amadeus-CD3.eng.srt
00:58:23,240
3
00:00:00,000
NewMov-Amadeus-CD1.eng.srt
00:57:09,000
NewMov-Amadeus-CD2.eng.srt
00:57:40,000
NewMov-Amadeus-CD3.eng.srt
01:05:32,000
== END ==
*/
enjoy every minute of an appless, googless and oracless life