detectads/webif/plugin/detectads/silence.cpp

472 lines
17 KiB
C++

// Based on mausc.c by Tinsel Phipps.
// v1.0 Roger Siddons
// v2.0 Roger Siddons: Flag clusters asap, fix segfaults, optional headers
// v3.0 Roger Siddons: Remove lib dependencies & commfree
// v4.0 Kill process argv[1] when idle for 30 seconds.
// v4.1 Fix averaging overflow
// v4.2 Unblock the alarm signal so the job actually finishes.
// v5.0 Add heartbeat messages for Humax detectads and crop processing June 2015 rfb
// v5.1 Set metadata.format=o ,recomple with large file support Sep 2015 rfb
// Public domain. Requires libsndfile
// Source:https://www.mythtv.org/wiki/Commercial_detection_with_silence_for_UK_freeviewHD#silence.cpp
// Detects commercial breaks using clusters of audio silences
#include <cstdlib>
#include <cmath>
#include <cerrno>
#include <climits>
#include <deque>
#include <sndfile.h>
#include <unistd.h>
#include <signal.h>
typedef unsigned frameNumber_t;
typedef unsigned frameCount_t;
// Output to python wrapper requires prefix to indicate level
#define DELIMITER "@" // must correlate with python wrapper
char prefixdebug[7] = "debug" DELIMITER;
char prefixinfo[6] = "info" DELIMITER;
char prefixerr[5] = "err" DELIMITER;
char prefixcut[5] = "cut" DELIMITER;
char prefixstatus[8]= "status" DELIMITER; // rfb
void error(const char* mesg, bool die = true)
{
printf("%s%s\n", prefixerr, mesg);
if (die)
exit(1);
}
pid_t tail_pid = 0;
void watchdog(int sig)
{
if (0 != tail_pid)
kill(tail_pid, SIGTERM);
}
namespace Arg
// Program argument management
{
const float kvideoRate = 25.0; // sample rate in fps (maps time to frame count)
const frameCount_t krateInMins = kvideoRate * 60; // frames per min
const frameCount_t khBeatIntvl = 4096; // frames per heartbeat message, = nts block size - 2.4 min rfb
unsigned useThreshold; // Audio level of silence
frameCount_t useMinQuiet; // Minimum length of a silence to register
unsigned useMinDetect; // Minimum number of silences that constitute an advert
frameCount_t useMinLength; // adverts must be at least this long
frameCount_t useMaxSep; // silences must be closer than this to be in the same cluster
frameCount_t usePad; // padding for each cut
void usage()
{
error("Usage: silence <tail_pid> <threshold> <minquiet> <mindetect> <minlength> <maxsep> <pad>", false);
error("<tail_pid> : (int) Process ID to be killed after idle timeout.", false);
error("<threshold>: (float) silence threshold in dB.", false);
error("<minquiet> : (float) minimum time for silence detection in seconds.", false);
error("<mindetect>: (float) minimum number of silences to constitute an advert.", false);
error("<minlength>: (float) minimum length of advert break in seconds.", false);
error("<maxsep> : (float) maximum time between silences in an advert break in seconds.", false);
error("<pad> : (float) padding for each cut point in seconds.", false);
error("AU format audio is expected on stdin.", false);
error("Example: silence 4567 -75 0.1 5 60 90 1 < audio.au");
}
void parse(int argc, char **argv)
// Parse args and convert to useable values (frames)
{
if (8 != argc)
usage();
float argThreshold; // db
float argMinQuiet; // secs
float argMinDetect;
float argMinLength; // secs
float argMaxSep; // secs
float argPad; // secs
/* Load options. */
if (1 != sscanf(argv[1], "%d", &tail_pid))
error("Could not parse tail_pid option into a number");
if (1 != sscanf(argv[2], "%f", &argThreshold))
error("Could not parse threshold option into a number");
if (1 != sscanf(argv[3], "%f", &argMinQuiet))
error("Could not parse minquiet option into a number");
if (1 != sscanf(argv[4], "%f", &argMinDetect))
error("Could not parse mindetect option into a number");
if (1 != sscanf(argv[5], "%f", &argMinLength))
error("Could not parse minlength option into a number");
if (1 != sscanf(argv[6], "%f", &argMaxSep))
error("Could not parse maxsep option into a number");
if (1 != sscanf(argv[7], "%f", &argPad))
error("Could not parse pad option into a number");
/* Scale threshold to integer range that libsndfile will use. */
useThreshold = rint(INT_MAX * pow(10, argThreshold / 20));
/* Scale times to frames. */
useMinQuiet = ceil(argMinQuiet * kvideoRate);
useMinDetect = (int)argMinDetect;
useMinLength = ceil(argMinLength * kvideoRate);
useMaxSep = rint(argMaxSep * kvideoRate + 0.5);
usePad = rint(argPad * kvideoRate + 0.5);
/* Just use constants */
//useThreshold = 679094;
//useMinQuiet = 5;
//useMinDetect = 4;
//useMinLength = 1500;
//useMaxSep = 2250;
//usePad = 25;
printf("%sThreshold=%.1f, MinQuiet=%.2f, MinDetect=%.1f, MinLength=%.1f, MaxSep=%.1f, Pad=%.2f\n",
prefixdebug, argThreshold, argMinQuiet, argMinDetect, argMinLength, argMaxSep, argPad);
printf("%sFrame rate is %.2f, Detecting silences below %d that last for at least %d frames\n",
prefixdebug, kvideoRate, useThreshold, useMinQuiet);
printf("%sClusters are composed of a minimum of %d silences closer than %d frames and must be\n",
prefixdebug, useMinDetect, useMaxSep);
printf("%slonger than %d frames in total. Cuts will be padded by %d frames\n",
prefixdebug, useMinLength, usePad);
printf("%s< preroll, > postroll, - advert, ? too few silences, # too short, = comm flagged\n", prefixdebug);
printf("%s Start - End Start - End Duration Interval Level/Count\n", prefixinfo);
printf("%s frame - frame (mmm:ss-mmm:ss) frame (mm:ss.s) frame (mmm:ss)\n", prefixinfo);
}
}
class Silence
// Defines a silence
{
public:
enum state_t {progStart, detection, progEnd};
static const char state_log[3];
const state_t state; // type of silence
const frameNumber_t start; // frame of start
frameNumber_t end; // frame of end
frameCount_t length; // number of frames
frameCount_t interval; // frames between end of last silence & start of this one
double power; // average power level
Silence(frameNumber_t _start, double _power = 0, state_t _state = detection)
: state(_state), start(_start), end(_start), length(1), interval(0), power(_power) {}
void extend(frameNumber_t frame, double _power)
// Define end of the silence
{
end = frame;
length = frame - start + 1;
// maintain running average power: = (oldpower * (newlength - 1) + newpower)/ newlength
power += (_power - power)/length;
}
};
// c++0x doesn't allow initialisation within class
const char Silence::state_log[3] = {'<', ' ', '>'};
class Cluster
// A cluster of silences
{
private:
void setState()
{
if (this->start->start == 1)
state = preroll;
else if (this->end->state == Silence::progEnd)
state = postroll;
else if (length < Arg::useMinLength)
state = tooshort;
else if (silenceCount < Arg::useMinDetect)
state = toofew;
else
state = advert;
}
public:
// tooshort..unset are transient states - they may be updated, preroll..postroll are final
enum state_t {tooshort, toofew, unset, preroll, advert, postroll};
static const char state_log[6];
static frameNumber_t completesAt; // frame where the most recent cluster will complete
state_t state; // type of cluster
const Silence* start; // first silence
Silence* end; // last silence
frameNumber_t padStart, padEnd; // padded cluster start/end frames
unsigned silenceCount; // number of silences
frameCount_t length; // number of frames
frameCount_t interval; // frames between end of last cluster and start of this one
Cluster(Silence* s) : state(unset), start(s), end(s), silenceCount(1), length(s->length), interval(0)
{
completesAt = end->end + Arg::useMaxSep; // finish cluster <maxsep> beyond silence end
setState();
// pad everything except pre-rolls
padStart = (state == preroll ? 1 : start->start + Arg::usePad);
}
void extend(Silence* _end)
// Define end of a cluster
{
end = _end;
silenceCount++;
length = end->end - start->start + 1;
completesAt = end->end + Arg::useMaxSep; // finish cluster <maxsep> beyond silence end
setState();
// pad everything except post-rolls
padEnd = end->end - (state == postroll ? 0 : Arg::usePad);
}
};
// c++0x doesn't allow initialisation within class
const char Cluster::state_log[6] = {'#', '?', '.', '<', '-', '>'};
frameNumber_t Cluster::completesAt = 0;
class ClusterList
// Manages a list of detected silences and a list of assigned clusters
{
protected:
// list of detected silences
std::deque<Silence*> silence;
// list of deduced clusters of the silences
std::deque<Cluster*> cluster;
public:
Silence* insertStartSilence()
// Inserts a fake silence at the front of the silence list
{
// create a single frame silence at frame 1 and insert it at front
Silence* ref = new Silence(1, 0, Silence::progStart);
silence.push_front(ref);
return ref;
}
void addSilence(Silence* newSilence)
// Adds a silence detection to the end of the silence list
{
// set interval between this & previous silence/prog start
newSilence->interval = newSilence->start
- (silence.empty() ? 1 : silence.back()->end - 1);
// store silence
silence.push_back(newSilence);
}
void addCluster(Cluster* newCluster)
// Adds a cluster to end of the cluster list
{
// set interval between new cluster & previous one/prog start
newCluster->interval = newCluster->start->start
- (cluster.empty() ? 1 : cluster.back()->end->end - 1);
// store cluster
cluster.push_back(newCluster);
}
};
Silence* currentSilence; // the silence currently being detected/built
Cluster* currentCluster; // the cluster currently being built
ClusterList* clist; // List of completed silences & clusters
frameNumber_t lastHeatbeat; // Frame of last HeartBeat message rfb
void report(const char* err,
const char type,
const char* msg1,
const frameNumber_t start,
const frameNumber_t end,
const frameNumber_t interval,
const int power)
// Logs silences/clusters/cuts in a standard format
{
frameCount_t duration = end - start + 1;
printf("%s%c %7s %6d-%6d (%3d:%02ld-%3d:%02ld), %4d (%2d:%04.1f), %5d (%3d:%02ld), [%7d]\n",
err, type, msg1, start, end,
(start+13) / Arg::krateInMins, lrint(start / Arg::kvideoRate) % 60,
(end+13) / Arg::krateInMins, lrint(end / Arg::kvideoRate) % 60,
duration, (duration+1) / Arg::krateInMins, fmod(duration / Arg::kvideoRate, 60),
interval, (interval+13) / Arg::krateInMins, lrint(interval / Arg::kvideoRate) % 60, power);
}
void processHeartbeat(frameNumber_t frame) // rfb
// Generate a periodic progress message when not processing a cluster
// Allows user to see something is happening between adverts and also allows
// automatic cropping to know how far is safe to process
{
report(prefixstatus, '=', "HtBeat",
lastHeatbeat, frame-1,
frame-lastHeatbeat, 0);
lastHeatbeat = frame;
} // rfb
void processSilence()
// Process a silence detection
{
// ignore detections that are too short
if (currentSilence->state == Silence::detection && currentSilence->length < Arg::useMinQuiet)
{
// throw it away
delete currentSilence;
currentSilence = NULL;
}
else
{
// record new silence
clist->addSilence(currentSilence);
// assign it to a cluster
if (currentCluster)
{
// add to existing cluster
currentCluster->extend(currentSilence);
}
else if (currentSilence->interval <= Arg::useMaxSep) // only possible for very first silence
{
// First silence is close to prog start so extend cluster to the start
// by inserting a fake silence at prog start and starting the cluster there
currentCluster = new Cluster(clist->insertStartSilence());
currentCluster->extend(currentSilence);
}
else
{
// this silence is the start of a new cluster
currentCluster = new Cluster(currentSilence);
// Generate Heartbeat at prior frame - Last safe point to copy file to // rfb
processHeartbeat(currentCluster->start->start -1);
}
report(prefixdebug, currentSilence->state_log[currentSilence->state], "Silence",
currentSilence->start, currentSilence->end,
currentSilence->interval, currentSilence->power);
// silence is now owned by the list, start looking for next
currentSilence = NULL;
}
}
void processCluster()
// Process a completed cluster
{
// record new cluster
clist->addCluster(currentCluster);
report(prefixinfo, currentCluster->state_log[currentCluster->state], "Cluster",
currentCluster->start->start, currentCluster->end->end,
currentCluster->interval, currentCluster->silenceCount);
// only flag clusters at final state
if (currentCluster->state > Cluster::unset)
report(prefixcut, '=', "Cut", currentCluster->padStart, currentCluster->padEnd, 0, 0);
// cluster is now owned by the list, start looking for next
currentCluster = NULL;
}
int main(int argc, char **argv)
// Detect silences and allocate to clusters
{
// Remove logging prefixes if writing to terminal
if (isatty(1))
prefixcut[0] = prefixinfo[0] = prefixdebug[0] = prefixerr[0] = prefixstatus[0] = '\0'; //rfb
// flush output buffer after every line
setvbuf(stdout, NULL, _IOLBF, 0);
Arg::parse(argc, argv);
/* Check the input is an audiofile. */
SF_INFO metadata;
metadata.format=0;
SNDFILE* input = sf_open_fd(STDIN_FILENO, SFM_READ, &metadata, SF_FALSE);
if (NULL == input) {
error("libsndfile error:", false);
error(sf_strerror(NULL));
}
/* Allocate data buffer to contain audio data from one video frame. */
const size_t frameSamples = metadata.channels * metadata.samplerate / Arg::kvideoRate;
int* samples = (int*)malloc(frameSamples * sizeof(int));
if (NULL == samples)
error("Couldn't allocate memory");
// create silence/cluster list
clist = new ClusterList();
// Kill head of pipeline if timeout happens.
signal(SIGALRM, watchdog);
sigset_t intmask;
sigemptyset(&intmask);
sigaddset(&intmask, SIGALRM);
sigprocmask(SIG_UNBLOCK, &intmask, NULL);
alarm(30);
// Process the input one frame at a time and process cuts along the way.
frameNumber_t frames = 0;
lastHeatbeat = 0; // rfb
while (frameSamples == static_cast<size_t>(sf_read_int(input, samples, frameSamples)))
{
alarm(30);
frames++;
// determine average audio level in this frame
unsigned long long avgabs = 0;
unsigned maxabs = 0;
unsigned minabs = INT_MAX;
for (unsigned i = 0; i < frameSamples; i++)
{
if (maxabs < abs(samples[i]))
maxabs = abs(samples[i]);
// if (minabs > abs(samples[i]))
// minabs = abs(samples[i]);
avgabs += abs(samples[i]);
}
avgabs = avgabs / frameSamples;
// check for a silence
if (maxabs < Arg::useThreshold) // Use max level njm/mymsman
{
if (currentSilence)
{
// extend current silence
currentSilence->extend(frames, avgabs);
}
else // transition to silence
{
// start a new silence
currentSilence = new Silence(frames, avgabs);
}
}
else if (currentSilence) // transition out of silence
{
processSilence();
}
// in noise: check for cluster completion
else if (currentCluster && frames > currentCluster->completesAt)
{
processCluster();
}
else if (!currentCluster && frames >= lastHeatbeat+Arg::khBeatIntvl) // rfb
{
// issue periodic progress message for use by crop stage of pipeline
processHeartbeat(frames);
} // rfb
}
// Complete any current silence (prog may have finished in silence)
if (currentSilence)
{
processSilence();
}
// extend any cluster close to prog end
if (currentCluster && frames <= currentCluster->completesAt)
{
// generate a silence at prog end and extend cluster to it
currentSilence = new Silence(frames, 0, Silence::progEnd);
processSilence();
}
// Complete any final cluster
if (currentCluster)
{
processCluster();
}
report(prefixstatus, '=', "End",
lastHeatbeat, frames,
frames-lastHeatbeat, 0);
}